monitoring swtich back to prometheus-operator
This commit is contained in:
		| @@ -37,7 +37,7 @@ datasources: | ||||
|     datasources: | ||||
|       - name: Prometheus | ||||
|         type: prometheus | ||||
|         url: http://prometheus-server.monitoring.svc:80 | ||||
|         url: http://prometheus.monitoring.svc:9090 | ||||
|         isDefault: true | ||||
|       - name: Thanos | ||||
|         type: prometheus | ||||
|   | ||||
| @@ -3,4 +3,6 @@ kind: ConfigMap | ||||
| metadata: | ||||
|   name: argocd-cmd-params-cm | ||||
| data: | ||||
|   server.insecure: "true" | ||||
|   # server.insecure: "true" | ||||
|   # DID NOT FIX RELOAD LOOPS | ||||
|   # application.namespaces: "*" | ||||
| @@ -7,3 +7,4 @@ data: | ||||
|   # switch to annotation based resource tracking as per | ||||
|   # https://argo-cd.readthedocs.io/en/stable/user-guide/resource_tracking/ | ||||
|   application.resourceTrackingMethod: annotation+label | ||||
|   admin.enabled: "false" | ||||
|   | ||||
| @@ -9,16 +9,9 @@ spec: | ||||
|   routes: | ||||
|     - kind: Rule | ||||
|       match: Host(`argocd.kluster.moll.re`) | ||||
|       priority: 10 | ||||
|       services: | ||||
|         - name: argocd-server | ||||
|           port: 80 | ||||
|     - kind: Rule | ||||
|       match: Host(`argocd.kluster.moll.re`) && Header(`Content-Type`, `application/grpc`) | ||||
|       priority: 11 | ||||
|       services: | ||||
|         - name: argocd-server | ||||
|           port: 80 | ||||
|           scheme: h2c | ||||
|           port: 443 | ||||
|           scheme: https | ||||
|   tls: | ||||
|     certResolver: default-tls | ||||
| @@ -4,14 +4,15 @@ kind: Kustomization | ||||
| namespace: argocd | ||||
| resources: | ||||
|   - namespace.yaml | ||||
|   - https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.1/manifests/install.yaml | ||||
|   - https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.3/manifests/install.yaml | ||||
|   - ingress.yaml | ||||
|   - argo-apps.application.yaml | ||||
|   - bootstrap-repo.sealedsecret.yaml | ||||
|   - argocd-oauth.sealedsecret.yaml | ||||
|   - servicemonitor.yaml | ||||
|   # DID NOT FIX RELOAD LOOPS | ||||
|   # - github.com/argoproj/argo-cd/examples/k8s-rbac/argocd-server-applications?ref=master | ||||
|  | ||||
| components: | ||||
|   - https://github.com/argoproj-labs/argocd-extensions/manifests | ||||
|  | ||||
| patches: | ||||
|   - path: argocd.configmap.yaml | ||||
|   | ||||
							
								
								
									
										77
									
								
								infrastructure/argocd/servicemonitor.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								infrastructure/argocd/servicemonitor.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-metrics | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-metrics | ||||
|   endpoints: | ||||
|   - port: metrics | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-server-metrics | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-server-metrics | ||||
|   endpoints: | ||||
|   - port: metrics | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-repo-server-metrics | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-repo-server | ||||
|   endpoints: | ||||
|   - port: metrics | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-applicationset-controller-metrics | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-applicationset-controller | ||||
|   endpoints: | ||||
|   - port: metrics | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-dex-server | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-dex-server | ||||
|   endpoints: | ||||
|     - port: metrics | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: argocd-redis-haproxy-metrics | ||||
|   labels: | ||||
|     release: prometheus-operator | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app.kubernetes.io/name: argocd-redis-ha-haproxy | ||||
|   endpoints: | ||||
|   - port: http-exporter-port | ||||
| @@ -6,8 +6,13 @@ namespace: monitoring | ||||
| resources:  | ||||
|   - namespace.yaml | ||||
|   # prometheus-operator crds | ||||
|   - https://github.com/prometheus-operator/prometheus-operator?ref=v0.79.2 | ||||
|   # single prometheus instance with a thanos sidecar | ||||
|   - prometheus.yaml | ||||
|   - thanos-store.statefulset.yaml | ||||
|   - thanos-query.deployment.yaml | ||||
|   - thanos-objstore-config.sealedsecret.yaml | ||||
|   # - loki-objstore-config.sealedsecret.yaml | ||||
|  | ||||
|  | ||||
| images: | ||||
|   - name: thanos | ||||
| @@ -21,8 +26,8 @@ helmCharts: | ||||
|     repo: https://grafana.github.io/helm-charts | ||||
|     version: 6.24.0 | ||||
|     valuesFile: loki.values.yaml | ||||
|   - name: prometheus | ||||
|     releaseName: prometheus | ||||
|   - name: prometheus-node-exporter | ||||
|     releaseName: prometheus-node-exporter | ||||
|     repo: https://prometheus-community.github.io/helm-charts | ||||
|     version: 26.0.1 | ||||
|     valuesFile: prometheus.values.yaml | ||||
|     version: 4.43.1 | ||||
|     valuesFile: prometheus-node-exporter.values.yaml | ||||
|   | ||||
| @@ -0,0 +1,14 @@ | ||||
| prometheus: | ||||
|   monitor: | ||||
|     enabled: true | ||||
|  | ||||
|     jobLabel: "node-exporter" | ||||
|  | ||||
|    | ||||
| resources: | ||||
|   limits: | ||||
|     cpu: 200m | ||||
|     memory: 50Mi | ||||
|   requests: | ||||
|     cpu: 100m | ||||
|     memory: 30Mi | ||||
| @@ -1,574 +0,0 @@ | ||||
| podSecurityPolicy: | ||||
|   enabled: true | ||||
|  | ||||
| server: | ||||
|   extraArgs: | ||||
|     log.level: debug | ||||
|     storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md | ||||
|     storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md | ||||
|   retention: 180d | ||||
|   service: | ||||
|     annotations: | ||||
|       prometheus.io/scrape: "true" | ||||
|       prometheus.io/port: "9090" | ||||
|   statefulSet: | ||||
|     enabled: true | ||||
|   podAnnotations: | ||||
|     prometheus.io/scrape: "true" | ||||
|     prometheus.io/port: "10902" | ||||
|   # sidecarContainers: | ||||
|   #   thanos-sidecar: | ||||
|   #     image: thanos | ||||
|   #     resources: | ||||
|   #       requests: | ||||
|   #         memory: "512Mi" | ||||
|   #     env: | ||||
|   #       - name: GOOGLE_APPLICATION_CREDENTIALS | ||||
|   #         value: /etc/secret/sa | ||||
|   #     args: | ||||
|   #       - "sidecar" | ||||
|   #       - "--log.level=debug" | ||||
|   #       - "--tsdb.path=/data/" | ||||
|   #       - "--prometheus.url=http://127.0.0.1:9090" | ||||
|   #       - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}" | ||||
|   #       - "--reloader.config-file=/etc/prometheus-config/prometheus.yml" | ||||
|   #       - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml" | ||||
|   #       - "--reloader.rule-dir=/etc/prometheus-config/rules" | ||||
|   #     ports: | ||||
|   #       - name: sidecar-http | ||||
|   #         containerPort: 10902 | ||||
|   #       - name: grpc | ||||
|   #         containerPort: 10901 | ||||
|   #       - name: cluster | ||||
|   #         containerPort: 10900 | ||||
|   #     volumeMounts: | ||||
|   #       - name: storage-volume | ||||
|   #         mountPath: /data | ||||
|   #       - name: thanos-storage-secret | ||||
|   #         mountPath: /etc/secret | ||||
|   #       - name: config-volume | ||||
|   #         mountPath: /etc/prometheus-config | ||||
|   #         readOnly: false | ||||
|   #       - name: prometheus-config-shared | ||||
|   #         mountPath: /etc/prometheus-shared/ | ||||
|   #         readOnly: false | ||||
|   # # configPath: /etc/prometheus-shared/prometheus.yml | ||||
|   # replicaCount: 1 | ||||
|   # persistentVolume: | ||||
|   #   size: 20Gi | ||||
|   #   storageClass: nfs-client | ||||
|   # extraVolumes: # spec.template.spec.volumes | ||||
|   #   - name: prometheus-config-shared | ||||
|   #     emptyDir: {} | ||||
|   # extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container | ||||
|   #   - name: prometheus-config-shared | ||||
|   #     mountPath: /etc/prometheus-shared/ | ||||
|   # resources: | ||||
|   #   requests: | ||||
|   #     memory: 1Gi | ||||
|   # global: | ||||
|   #   scrape_interval: 5s | ||||
|   #   scrape_timeout: 4s | ||||
|   #   external_labels: | ||||
|   #     prometheus_group: KLUSTER | ||||
|   #     prometheus_replica: '$(HOSTNAME)' | ||||
|   #   evaluation_interval: 5s | ||||
|   # extraSecretMounts: | ||||
|   #   - name: thanos-storage-secret | ||||
|   #     mountPath: /etc/secret/ | ||||
|   #     subPath: sa | ||||
|   #     readOnly: false | ||||
|   #     secretName: thanos-objstore-config | ||||
|  | ||||
| # as thanos sidecar is taking care of the config reload | ||||
| # we can disable the prometheus configmap reload | ||||
| configmapReload: | ||||
|   prometheus: | ||||
|     enabled: false | ||||
|  | ||||
| ## Prometheus server ConfigMap entries | ||||
| ## | ||||
| serverFiles: | ||||
|   ## Alerts configuration | ||||
|   ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ | ||||
|   alerting_rules.yml: {} | ||||
|   # groups: | ||||
|   #   - name: Instances | ||||
|   #     rules: | ||||
|   #       - alert: InstanceDown | ||||
|   #         expr: up == 0 | ||||
|   #         for: 5m | ||||
|   #         labels: | ||||
|   #           severity: page | ||||
|   #         annotations: | ||||
|   #           description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' | ||||
|   #           summary: 'Instance {{ $labels.instance }} down' | ||||
|   ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml | ||||
|   alerts: {} | ||||
|  | ||||
|   ## Records configuration | ||||
|   ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ | ||||
|   recording_rules.yml: {} | ||||
|   ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml | ||||
|   rules: {} | ||||
|  | ||||
|   prometheus.yml: | ||||
|     rule_files: | ||||
|       - /etc/config/recording_rules.yml | ||||
|       - /etc/config/alerting_rules.yml | ||||
|     ## Below two files are DEPRECATED will be removed from this default values file | ||||
|       - /etc/config/rules | ||||
|       - /etc/config/alerts | ||||
|  | ||||
|     scrape_configs: | ||||
|       - job_name: prometheus | ||||
|         static_configs: | ||||
|           - targets: | ||||
|             - localhost:9090 | ||||
|  | ||||
|       # A scrape configuration for running Prometheus on a Kubernetes cluster. | ||||
|       # This uses separate scrape configs for cluster components (i.e. API server, node) | ||||
|       # and services to allow each to use different authentication configs. | ||||
|       # | ||||
|       # Kubernetes labels will be added as Prometheus labels on metrics via the | ||||
|       # `labelmap` relabeling action. | ||||
|  | ||||
|       # Scrape config for API servers. | ||||
|       # | ||||
|       # Kubernetes exposes API servers as endpoints to the default/kubernetes | ||||
|       # service so this uses `endpoints` role and uses relabelling to only keep | ||||
|       # the endpoints associated with the default/kubernetes service using the | ||||
|       # default named port `https`. This works for single API server deployments as | ||||
|       # well as HA API server deployments. | ||||
|       - job_name: 'kubernetes-apiservers' | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: endpoints | ||||
|  | ||||
|         # Default to scraping over https. If required, just disable this or change to | ||||
|         # `http`. | ||||
|         scheme: https | ||||
|  | ||||
|         # This TLS & bearer token file config is used to connect to the actual scrape | ||||
|         # endpoints for cluster components. This is separate to discovery auth | ||||
|         # configuration because discovery & scraping are two separate concerns in | ||||
|         # Prometheus. The discovery auth config is automatic if Prometheus runs inside | ||||
|         # the cluster. Otherwise, more config options have to be provided within the | ||||
|         # <kubernetes_sd_config>. | ||||
|         tls_config: | ||||
|           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||
|           # If your node certificates are self-signed or use a different CA to the | ||||
|           # master CA, then disable certificate verification below. Note that | ||||
|           # certificate verification is an integral part of a secure infrastructure | ||||
|           # so this should only be disabled in a controlled environment. You can | ||||
|           # disable certificate verification by uncommenting the line below. | ||||
|           # | ||||
|           insecure_skip_verify: true | ||||
|         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||
|  | ||||
|         # Keep only the default/kubernetes service endpoints for the https port. This | ||||
|         # will add targets for each API server which Kubernetes adds an endpoint to | ||||
|         # the default/kubernetes service. | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | ||||
|             action: keep | ||||
|             regex: default;kubernetes;https | ||||
|  | ||||
|       - job_name: 'kubernetes-nodes' | ||||
|  | ||||
|         # Default to scraping over https. If required, just disable this or change to | ||||
|         # `http`. | ||||
|         scheme: https | ||||
|  | ||||
|         # This TLS & bearer token file config is used to connect to the actual scrape | ||||
|         # endpoints for cluster components. This is separate to discovery auth | ||||
|         # configuration because discovery & scraping are two separate concerns in | ||||
|         # Prometheus. The discovery auth config is automatic if Prometheus runs inside | ||||
|         # the cluster. Otherwise, more config options have to be provided within the | ||||
|         # <kubernetes_sd_config>. | ||||
|         tls_config: | ||||
|           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||
|           # If your node certificates are self-signed or use a different CA to the | ||||
|           # master CA, then disable certificate verification below. Note that | ||||
|           # certificate verification is an integral part of a secure infrastructure | ||||
|           # so this should only be disabled in a controlled environment. You can | ||||
|           # disable certificate verification by uncommenting the line below. | ||||
|           # | ||||
|           insecure_skip_verify: true | ||||
|         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: node | ||||
|  | ||||
|         relabel_configs: | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_node_label_(.+) | ||||
|           - target_label: __address__ | ||||
|             replacement: kubernetes.default.svc:443 | ||||
|           - source_labels: [__meta_kubernetes_node_name] | ||||
|             regex: (.+) | ||||
|             target_label: __metrics_path__ | ||||
|             replacement: /api/v1/nodes/$1/proxy/metrics | ||||
|  | ||||
|  | ||||
|       - job_name: 'kubernetes-nodes-cadvisor' | ||||
|  | ||||
|         # Default to scraping over https. If required, just disable this or change to | ||||
|         # `http`. | ||||
|         scheme: https | ||||
|  | ||||
|         # This TLS & bearer token file config is used to connect to the actual scrape | ||||
|         # endpoints for cluster components. This is separate to discovery auth | ||||
|         # configuration because discovery & scraping are two separate concerns in | ||||
|         # Prometheus. The discovery auth config is automatic if Prometheus runs inside | ||||
|         # the cluster. Otherwise, more config options have to be provided within the | ||||
|         # <kubernetes_sd_config>. | ||||
|         tls_config: | ||||
|           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||
|           # If your node certificates are self-signed or use a different CA to the | ||||
|           # master CA, then disable certificate verification below. Note that | ||||
|           # certificate verification is an integral part of a secure infrastructure | ||||
|           # so this should only be disabled in a controlled environment. You can | ||||
|           # disable certificate verification by uncommenting the line below. | ||||
|           # | ||||
|           insecure_skip_verify: true | ||||
|         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: node | ||||
|  | ||||
|         # This configuration will work only on kubelet 1.7.3+ | ||||
|         # As the scrape endpoints for cAdvisor have changed | ||||
|         # if you are using older version you need to change the replacement to | ||||
|         # replacement: /api/v1/nodes/$1:4194/proxy/metrics | ||||
|         # more info here https://github.com/coreos/prometheus-operator/issues/633 | ||||
|         relabel_configs: | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_node_label_(.+) | ||||
|           - target_label: __address__ | ||||
|             replacement: kubernetes.default.svc:443 | ||||
|           - source_labels: [__meta_kubernetes_node_name] | ||||
|             regex: (.+) | ||||
|             target_label: __metrics_path__ | ||||
|             replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor | ||||
|  | ||||
|         # Metric relabel configs to apply to samples before ingestion. | ||||
|         # [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) | ||||
|         # metric_relabel_configs: | ||||
|         # - action: labeldrop | ||||
|         #   regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone) | ||||
|  | ||||
|       # Scrape config for service endpoints. | ||||
|       # | ||||
|       # The relabeling allows the actual service scrape endpoint to be configured | ||||
|       # via the following annotations: | ||||
|       # | ||||
|       # * `prometheus.io/scrape`: Only scrape services that have a value of | ||||
|       # `true`, except if `prometheus.io/scrape-slow` is set to `true` as well. | ||||
|       # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need | ||||
|       # to set this to `https` & most likely set the `tls_config` of the scrape config. | ||||
|       # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | ||||
|       # * `prometheus.io/port`: If the metrics are exposed on a different port to the | ||||
|       # service then set this appropriately. | ||||
|       # * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters | ||||
|       # then you can set any parameter | ||||
|       - job_name: 'kubernetes-service-endpoints' | ||||
|         honor_labels: true | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: endpoints | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] | ||||
|             action: keep | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] | ||||
|             action: drop | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] | ||||
|             action: replace | ||||
|             target_label: __scheme__ | ||||
|             regex: (https?) | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] | ||||
|             action: replace | ||||
|             target_label: __metrics_path__ | ||||
|             regex: (.+) | ||||
|           - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] | ||||
|             action: replace | ||||
|             target_label: __address__ | ||||
|             regex: (.+?)(?::\d+)?;(\d+) | ||||
|             replacement: $1:$2 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) | ||||
|             replacement: __param_$1 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_service_label_(.+) | ||||
|           - source_labels: [__meta_kubernetes_namespace] | ||||
|             action: replace | ||||
|             target_label: namespace | ||||
|           - source_labels: [__meta_kubernetes_service_name] | ||||
|             action: replace | ||||
|             target_label: service | ||||
|           - source_labels: [__meta_kubernetes_pod_node_name] | ||||
|             action: replace | ||||
|             target_label: node | ||||
|  | ||||
|       # Scrape config for slow service endpoints; same as above, but with a larger | ||||
|       # timeout and a larger interval | ||||
|       # | ||||
|       # The relabeling allows the actual service scrape endpoint to be configured | ||||
|       # via the following annotations: | ||||
|       # | ||||
|       # * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true` | ||||
|       # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need | ||||
|       # to set this to `https` & most likely set the `tls_config` of the scrape config. | ||||
|       # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | ||||
|       # * `prometheus.io/port`: If the metrics are exposed on a different port to the | ||||
|       # service then set this appropriately. | ||||
|       # * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters | ||||
|       # then you can set any parameter | ||||
|       - job_name: 'kubernetes-service-endpoints-slow' | ||||
|         honor_labels: true | ||||
|  | ||||
|         scrape_interval: 5m | ||||
|         scrape_timeout: 30s | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: endpoints | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] | ||||
|             action: keep | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] | ||||
|             action: replace | ||||
|             target_label: __scheme__ | ||||
|             regex: (https?) | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] | ||||
|             action: replace | ||||
|             target_label: __metrics_path__ | ||||
|             regex: (.+) | ||||
|           - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] | ||||
|             action: replace | ||||
|             target_label: __address__ | ||||
|             regex: (.+?)(?::\d+)?;(\d+) | ||||
|             replacement: $1:$2 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) | ||||
|             replacement: __param_$1 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_service_label_(.+) | ||||
|           - source_labels: [__meta_kubernetes_namespace] | ||||
|             action: replace | ||||
|             target_label: namespace | ||||
|           - source_labels: [__meta_kubernetes_service_name] | ||||
|             action: replace | ||||
|             target_label: service | ||||
|           - source_labels: [__meta_kubernetes_pod_node_name] | ||||
|             action: replace | ||||
|             target_label: node | ||||
|  | ||||
|       - job_name: 'prometheus-pushgateway' | ||||
|         honor_labels: true | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: service | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] | ||||
|             action: keep | ||||
|             regex: pushgateway | ||||
|  | ||||
|       # Example scrape config for probing services via the Blackbox Exporter. | ||||
|       # | ||||
|       # The relabeling allows the actual service scrape endpoint to be configured | ||||
|       # via the following annotations: | ||||
|       # | ||||
|       # * `prometheus.io/probe`: Only probe services that have a value of `true` | ||||
|       - job_name: 'kubernetes-services' | ||||
|         honor_labels: true | ||||
|  | ||||
|         metrics_path: /probe | ||||
|         params: | ||||
|           module: [http_2xx] | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: service | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] | ||||
|             action: keep | ||||
|             regex: true | ||||
|           - source_labels: [__address__] | ||||
|             target_label: __param_target | ||||
|           - target_label: __address__ | ||||
|             replacement: blackbox | ||||
|           - source_labels: [__param_target] | ||||
|             target_label: instance | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_service_label_(.+) | ||||
|           - source_labels: [__meta_kubernetes_namespace] | ||||
|             target_label: namespace | ||||
|           - source_labels: [__meta_kubernetes_service_name] | ||||
|             target_label: service | ||||
|  | ||||
|       # Example scrape config for pods | ||||
|       # | ||||
|       # The relabeling allows the actual pod scrape endpoint to be configured via the | ||||
|       # following annotations: | ||||
|       # | ||||
|       # * `prometheus.io/scrape`: Only scrape pods that have a value of `true`, | ||||
|       # except if `prometheus.io/scrape-slow` is set to `true` as well. | ||||
|       # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need | ||||
|       # to set this to `https` & most likely set the `tls_config` of the scrape config. | ||||
|       # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | ||||
|       # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. | ||||
|       - job_name: 'kubernetes-pods' | ||||
|         honor_labels: true | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: pod | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] | ||||
|             action: keep | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] | ||||
|             action: drop | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] | ||||
|             action: replace | ||||
|             regex: (https?) | ||||
|             target_label: __scheme__ | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] | ||||
|             action: replace | ||||
|             target_label: __metrics_path__ | ||||
|             regex: (.+) | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] | ||||
|             action: replace | ||||
|             regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) | ||||
|             replacement: '[$2]:$1' | ||||
|             target_label: __address__ | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] | ||||
|             action: replace | ||||
|             regex: (\d+);((([0-9]+?)(\.|$)){4}) | ||||
|             replacement: $2:$1 | ||||
|             target_label: __address__ | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) | ||||
|             replacement: __param_$1 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_pod_label_(.+) | ||||
|           - source_labels: [__meta_kubernetes_namespace] | ||||
|             action: replace | ||||
|             target_label: namespace | ||||
|           - source_labels: [__meta_kubernetes_pod_name] | ||||
|             action: replace | ||||
|             target_label: pod | ||||
|           - source_labels: [__meta_kubernetes_pod_phase] | ||||
|             regex: Pending|Succeeded|Failed|Completed | ||||
|             action: drop | ||||
|           - source_labels: [__meta_kubernetes_pod_node_name] | ||||
|             action: replace | ||||
|             target_label: node | ||||
|  | ||||
|       # Example Scrape config for pods which should be scraped slower. An useful example | ||||
|       # would be stackriver-exporter which queries an API on every scrape of the pod | ||||
|       # | ||||
|       # The relabeling allows the actual pod scrape endpoint to be configured via the | ||||
|       # following annotations: | ||||
|       # | ||||
|       # * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true` | ||||
|       # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need | ||||
|       # to set this to `https` & most likely set the `tls_config` of the scrape config. | ||||
|       # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | ||||
|       # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. | ||||
|       - job_name: 'kubernetes-pods-slow' | ||||
|         honor_labels: true | ||||
|  | ||||
|         scrape_interval: 5m | ||||
|         scrape_timeout: 30s | ||||
|  | ||||
|         kubernetes_sd_configs: | ||||
|           - role: pod | ||||
|  | ||||
|         relabel_configs: | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] | ||||
|             action: keep | ||||
|             regex: true | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] | ||||
|             action: replace | ||||
|             regex: (https?) | ||||
|             target_label: __scheme__ | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] | ||||
|             action: replace | ||||
|             target_label: __metrics_path__ | ||||
|             regex: (.+) | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] | ||||
|             action: replace | ||||
|             regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) | ||||
|             replacement: '[$2]:$1' | ||||
|             target_label: __address__ | ||||
|           - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] | ||||
|             action: replace | ||||
|             regex: (\d+);((([0-9]+?)(\.|$)){4}) | ||||
|             replacement: $2:$1 | ||||
|             target_label: __address__ | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) | ||||
|             replacement: __param_$1 | ||||
|           - action: labelmap | ||||
|             regex: __meta_kubernetes_pod_label_(.+) | ||||
|           - source_labels: [__meta_kubernetes_namespace] | ||||
|             action: replace | ||||
|             target_label: namespace | ||||
|           - source_labels: [__meta_kubernetes_pod_name] | ||||
|             action: replace | ||||
|             target_label: pod | ||||
|           - source_labels: [__meta_kubernetes_pod_phase] | ||||
|             regex: Pending|Succeeded|Failed|Completed | ||||
|             action: drop | ||||
|           - source_labels: [__meta_kubernetes_pod_node_name] | ||||
|             action: replace | ||||
|             target_label: node | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| # Configuration of subcharts defined in Chart.yaml | ||||
|  | ||||
| ## alertmanager sub-chart configurable values | ||||
| ## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager | ||||
| ## | ||||
| alertmanager: | ||||
|   enabled: false | ||||
|  | ||||
| ## kube-state-metrics sub-chart configurable values | ||||
| ## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics | ||||
| ## | ||||
| kube-state-metrics: | ||||
|   ## If false, kube-state-metrics sub-chart will not be installed | ||||
|   ## | ||||
|   enabled: true | ||||
|  | ||||
| ## prometheus-node-exporter sub-chart configurable values | ||||
| ## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter | ||||
| ## | ||||
| prometheus-node-exporter: | ||||
|   ## If false, node-exporter will not be installed | ||||
|   ## | ||||
|   enabled: true | ||||
|  | ||||
|   rbac: | ||||
|     pspEnabled: false | ||||
|  | ||||
|   containerSecurityContext: | ||||
|     allowPrivilegeEscalation: false | ||||
|  | ||||
| ## prometheus-pushgateway sub-chart configurable values | ||||
| ## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway | ||||
| ## | ||||
| prometheus-pushgateway: | ||||
|   ## If false, pushgateway will not be installed | ||||
|   ## | ||||
|   enabled: false | ||||
							
								
								
									
										78
									
								
								infrastructure/monitoring/prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								infrastructure/monitoring/prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| apiVersion: v1 | ||||
| kind: ServiceAccount | ||||
| metadata: | ||||
|   name: prometheus | ||||
| --- | ||||
| apiVersion: rbac.authorization.k8s.io/v1 | ||||
| kind: ClusterRole | ||||
| metadata: | ||||
|   name: prometheus | ||||
| rules: | ||||
| - apiGroups: [""] | ||||
|   resources: | ||||
|   - nodes | ||||
|   - nodes/metrics | ||||
|   - services | ||||
|   - endpoints | ||||
|   - pods | ||||
|   verbs: ["get", "list", "watch"] | ||||
| - apiGroups: [""] | ||||
|   resources: | ||||
|   - configmaps | ||||
|   verbs: ["get"] | ||||
| - apiGroups: | ||||
|   - networking.k8s.io | ||||
|   resources: | ||||
|   - ingresses | ||||
|   verbs: ["get", "list", "watch"] | ||||
| - nonResourceURLs: ["/metrics"] | ||||
|   verbs: ["get"] | ||||
| --- | ||||
| apiVersion: rbac.authorization.k8s.io/v1 | ||||
| kind: ClusterRoleBinding | ||||
| metadata: | ||||
|   name: prometheus | ||||
| roleRef: | ||||
|   apiGroup: rbac.authorization.k8s.io | ||||
|   kind: ClusterRole | ||||
|   name: prometheus | ||||
| subjects: | ||||
| - kind: ServiceAccount | ||||
|   name: prometheus | ||||
|   namespace: monitoring # needs to be the same as in the kustomization.yaml | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: Prometheus | ||||
| metadata: | ||||
|   name: prometheus | ||||
| spec: | ||||
|   securityContext: | ||||
|     runAsUser: 65534 # same as the thanos sidecar | ||||
|   resources: | ||||
|     requests: | ||||
|       memory: 400Mi | ||||
|   retention: 730d | ||||
|   retentionSize: 3GiB | ||||
|   serviceAccountName: prometheus | ||||
|   enableAdminAPI: false | ||||
|   serviceMonitorNamespaceSelector: {} | ||||
|   serviceMonitorSelector: {} | ||||
|   thanos: | ||||
|     version: v0.34.1 | ||||
|     objectStorageConfig: | ||||
|       # loads the config from a secret named thanos-objstore-config in the same namespace | ||||
|       key: thanos.yaml | ||||
|       name: thanos-objstore-config | ||||
| --- | ||||
| apiVersion: v1 | ||||
| kind: Service | ||||
| metadata: | ||||
|   name: prometheus | ||||
| spec: | ||||
|   type: ClusterIP | ||||
|   ports: | ||||
|   - port: 9090 | ||||
|     targetPort: 9090 | ||||
|     protocol: TCP | ||||
|   selector: | ||||
|     prometheus: prometheus | ||||
							
								
								
									
										55
									
								
								infrastructure/monitoring/thanos-query.deployment.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								infrastructure/monitoring/thanos-query.deployment.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| apiVersion: apps/v1 | ||||
| kind: Deployment | ||||
| metadata: | ||||
|   name: thanos-querier | ||||
|   labels: | ||||
|     app: thanos-querier | ||||
| spec: | ||||
|   replicas: 1 | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app: thanos-querier | ||||
|   template: | ||||
|     metadata: | ||||
|       labels: | ||||
|         app: thanos-querier | ||||
|     spec: | ||||
|       containers: | ||||
|       - name: thanos | ||||
|         image: thanos | ||||
|         args: | ||||
|         - query | ||||
|         - --log.level=debug | ||||
|         - --query.replica-label=replica | ||||
|         - --endpoint=dnssrv+_grpc._tcp.thanos-store:10901 | ||||
|         - --endpoint=dnssrv+_grpc._tcp.prometheus:9090 | ||||
|         ports: | ||||
|         - name: http | ||||
|           containerPort: 10902 | ||||
|         - name: grpc | ||||
|           containerPort: 10901 | ||||
|         livenessProbe: | ||||
|           httpGet: | ||||
|             port: http | ||||
|             path: /-/healthy | ||||
|         readinessProbe: | ||||
|           httpGet: | ||||
|             port: http | ||||
|             path: /-/ready | ||||
| --- | ||||
| apiVersion: v1 | ||||
| kind: Service | ||||
| metadata: | ||||
|   name: thanos-querier | ||||
| spec: | ||||
|   selector: | ||||
|     app: thanos-querier | ||||
|   ports: | ||||
|     - name: http | ||||
|       protocol: TCP | ||||
|       port: 10902 | ||||
|       targetPort: http | ||||
|     - name: grpc | ||||
|       protocol: TCP | ||||
|       port: 10901 | ||||
|       targetPort: grpc | ||||
							
								
								
									
										71
									
								
								infrastructure/monitoring/thanos-store.statefulset.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								infrastructure/monitoring/thanos-store.statefulset.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| apiVersion: apps/v1 | ||||
| kind: Deployment | ||||
| metadata: | ||||
|   name: thanos-store | ||||
|   labels: | ||||
|     app: thanos-store | ||||
| spec: | ||||
|   replicas: 1 | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app: thanos-store | ||||
|   template: | ||||
|     metadata: | ||||
|       labels: | ||||
|         app: thanos-store | ||||
|         thanos-store-api: "true" | ||||
|     spec: | ||||
|       containers: | ||||
|         - name: thanos | ||||
|           image: thanos | ||||
|           args: | ||||
|           - store | ||||
|           - --log.level=debug | ||||
|           - --data-dir=/data | ||||
|           - --grpc-address=0.0.0.0:10901 | ||||
|           - --http-address=0.0.0.0:10902 | ||||
|           - --objstore.config-file=/etc/secret/thanos.yaml | ||||
|           - --index-cache-size=500MB | ||||
|           - --chunk-pool-size=500MB | ||||
|           ports: | ||||
|           - name: http | ||||
|             containerPort: 10902 | ||||
|           - name: grpc | ||||
|             containerPort: 10901 | ||||
|           livenessProbe: | ||||
|             httpGet: | ||||
|               port: 10902 | ||||
|               path: /-/healthy | ||||
|           readinessProbe: | ||||
|             httpGet: | ||||
|               port: 10902 | ||||
|               path: /-/ready | ||||
|           volumeMounts: | ||||
|             - name: thanos-objstore-config | ||||
|               mountPath: /etc/secret | ||||
|               readOnly: true | ||||
|             - name: thanos-data | ||||
|               mountPath: /data | ||||
|       volumes: | ||||
|         - name: thanos-objstore-config | ||||
|           secret: | ||||
|             secretName: thanos-objstore-config | ||||
|         - name: thanos-data | ||||
|           emptyDir: {} | ||||
| --- | ||||
| apiVersion: v1 | ||||
| kind: Service | ||||
| metadata: | ||||
|   labels: | ||||
|     app.kubernetes.io/name: thanos-store | ||||
|   name: thanos-store | ||||
| spec: | ||||
|   ports: | ||||
|   - name: grpc | ||||
|     port: 10901 | ||||
|     targetPort: 10901 | ||||
|   - name: http | ||||
|     port: 10902 | ||||
|     targetPort: 10902 | ||||
|   selector: | ||||
|     app: thanos-store | ||||
| @@ -17,3 +17,6 @@ spec: | ||||
|     automated: | ||||
|       prune: true | ||||
|       selfHeal: true | ||||
|     syncOptions: | ||||
|       - Replace=true | ||||
|       # because the prometheus-operator CRDs are too large | ||||
|   | ||||
		Reference in New Issue
	
	Block a user