monitoring swtich back to prometheus-operator
This commit is contained in:
parent
2a56392af0
commit
16161bafb7
@ -37,7 +37,7 @@ datasources:
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus-server.monitoring.svc:80
|
||||
url: http://prometheus.monitoring.svc:9090
|
||||
isDefault: true
|
||||
- name: Thanos
|
||||
type: prometheus
|
||||
|
@ -3,4 +3,6 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: argocd-cmd-params-cm
|
||||
data:
|
||||
server.insecure: "true"
|
||||
# server.insecure: "true"
|
||||
# DID NOT FIX RELOAD LOOPS
|
||||
# application.namespaces: "*"
|
@ -7,3 +7,4 @@ data:
|
||||
# switch to annotation based resource tracking as per
|
||||
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_tracking/
|
||||
application.resourceTrackingMethod: annotation+label
|
||||
admin.enabled: "false"
|
||||
|
@ -9,16 +9,9 @@ spec:
|
||||
routes:
|
||||
- kind: Rule
|
||||
match: Host(`argocd.kluster.moll.re`)
|
||||
priority: 10
|
||||
services:
|
||||
- name: argocd-server
|
||||
port: 80
|
||||
- kind: Rule
|
||||
match: Host(`argocd.kluster.moll.re`) && Header(`Content-Type`, `application/grpc`)
|
||||
priority: 11
|
||||
services:
|
||||
- name: argocd-server
|
||||
port: 80
|
||||
scheme: h2c
|
||||
port: 443
|
||||
scheme: https
|
||||
tls:
|
||||
certResolver: default-tls
|
@ -4,14 +4,15 @@ kind: Kustomization
|
||||
namespace: argocd
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.1/manifests/install.yaml
|
||||
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.3/manifests/install.yaml
|
||||
- ingress.yaml
|
||||
- argo-apps.application.yaml
|
||||
- bootstrap-repo.sealedsecret.yaml
|
||||
- argocd-oauth.sealedsecret.yaml
|
||||
- servicemonitor.yaml
|
||||
# DID NOT FIX RELOAD LOOPS
|
||||
# - github.com/argoproj/argo-cd/examples/k8s-rbac/argocd-server-applications?ref=master
|
||||
|
||||
components:
|
||||
- https://github.com/argoproj-labs/argocd-extensions/manifests
|
||||
|
||||
patches:
|
||||
- path: argocd.configmap.yaml
|
||||
|
77
infrastructure/argocd/servicemonitor.yaml
Normal file
77
infrastructure/argocd/servicemonitor.yaml
Normal file
@ -0,0 +1,77 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-metrics
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-metrics
|
||||
endpoints:
|
||||
- port: metrics
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-server-metrics
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-server-metrics
|
||||
endpoints:
|
||||
- port: metrics
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-repo-server-metrics
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-repo-server
|
||||
endpoints:
|
||||
- port: metrics
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-applicationset-controller-metrics
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-applicationset-controller
|
||||
endpoints:
|
||||
- port: metrics
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-dex-server
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-dex-server
|
||||
endpoints:
|
||||
- port: metrics
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: argocd-redis-haproxy-metrics
|
||||
labels:
|
||||
release: prometheus-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-redis-ha-haproxy
|
||||
endpoints:
|
||||
- port: http-exporter-port
|
@ -6,8 +6,13 @@ namespace: monitoring
|
||||
resources:
|
||||
- namespace.yaml
|
||||
# prometheus-operator crds
|
||||
- https://github.com/prometheus-operator/prometheus-operator?ref=v0.79.2
|
||||
# single prometheus instance with a thanos sidecar
|
||||
- prometheus.yaml
|
||||
- thanos-store.statefulset.yaml
|
||||
- thanos-query.deployment.yaml
|
||||
- thanos-objstore-config.sealedsecret.yaml
|
||||
# - loki-objstore-config.sealedsecret.yaml
|
||||
|
||||
|
||||
images:
|
||||
- name: thanos
|
||||
@ -21,8 +26,8 @@ helmCharts:
|
||||
repo: https://grafana.github.io/helm-charts
|
||||
version: 6.24.0
|
||||
valuesFile: loki.values.yaml
|
||||
- name: prometheus
|
||||
releaseName: prometheus
|
||||
- name: prometheus-node-exporter
|
||||
releaseName: prometheus-node-exporter
|
||||
repo: https://prometheus-community.github.io/helm-charts
|
||||
version: 26.0.1
|
||||
valuesFile: prometheus.values.yaml
|
||||
version: 4.43.1
|
||||
valuesFile: prometheus-node-exporter.values.yaml
|
||||
|
@ -0,0 +1,14 @@
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
|
||||
jobLabel: "node-exporter"
|
||||
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 50Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 30Mi
|
@ -1,574 +0,0 @@
|
||||
podSecurityPolicy:
|
||||
enabled: true
|
||||
|
||||
server:
|
||||
extraArgs:
|
||||
log.level: debug
|
||||
storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
||||
storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
||||
retention: 180d
|
||||
service:
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9090"
|
||||
statefulSet:
|
||||
enabled: true
|
||||
podAnnotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "10902"
|
||||
# sidecarContainers:
|
||||
# thanos-sidecar:
|
||||
# image: thanos
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: "512Mi"
|
||||
# env:
|
||||
# - name: GOOGLE_APPLICATION_CREDENTIALS
|
||||
# value: /etc/secret/sa
|
||||
# args:
|
||||
# - "sidecar"
|
||||
# - "--log.level=debug"
|
||||
# - "--tsdb.path=/data/"
|
||||
# - "--prometheus.url=http://127.0.0.1:9090"
|
||||
# - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}"
|
||||
# - "--reloader.config-file=/etc/prometheus-config/prometheus.yml"
|
||||
# - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml"
|
||||
# - "--reloader.rule-dir=/etc/prometheus-config/rules"
|
||||
# ports:
|
||||
# - name: sidecar-http
|
||||
# containerPort: 10902
|
||||
# - name: grpc
|
||||
# containerPort: 10901
|
||||
# - name: cluster
|
||||
# containerPort: 10900
|
||||
# volumeMounts:
|
||||
# - name: storage-volume
|
||||
# mountPath: /data
|
||||
# - name: thanos-storage-secret
|
||||
# mountPath: /etc/secret
|
||||
# - name: config-volume
|
||||
# mountPath: /etc/prometheus-config
|
||||
# readOnly: false
|
||||
# - name: prometheus-config-shared
|
||||
# mountPath: /etc/prometheus-shared/
|
||||
# readOnly: false
|
||||
# # configPath: /etc/prometheus-shared/prometheus.yml
|
||||
# replicaCount: 1
|
||||
# persistentVolume:
|
||||
# size: 20Gi
|
||||
# storageClass: nfs-client
|
||||
# extraVolumes: # spec.template.spec.volumes
|
||||
# - name: prometheus-config-shared
|
||||
# emptyDir: {}
|
||||
# extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container
|
||||
# - name: prometheus-config-shared
|
||||
# mountPath: /etc/prometheus-shared/
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: 1Gi
|
||||
# global:
|
||||
# scrape_interval: 5s
|
||||
# scrape_timeout: 4s
|
||||
# external_labels:
|
||||
# prometheus_group: KLUSTER
|
||||
# prometheus_replica: '$(HOSTNAME)'
|
||||
# evaluation_interval: 5s
|
||||
# extraSecretMounts:
|
||||
# - name: thanos-storage-secret
|
||||
# mountPath: /etc/secret/
|
||||
# subPath: sa
|
||||
# readOnly: false
|
||||
# secretName: thanos-objstore-config
|
||||
|
||||
# as thanos sidecar is taking care of the config reload
|
||||
# we can disable the prometheus configmap reload
|
||||
configmapReload:
|
||||
prometheus:
|
||||
enabled: false
|
||||
|
||||
## Prometheus server ConfigMap entries
|
||||
##
|
||||
serverFiles:
|
||||
## Alerts configuration
|
||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
|
||||
alerting_rules.yml: {}
|
||||
# groups:
|
||||
# - name: Instances
|
||||
# rules:
|
||||
# - alert: InstanceDown
|
||||
# expr: up == 0
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
|
||||
# summary: 'Instance {{ $labels.instance }} down'
|
||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
|
||||
alerts: {}
|
||||
|
||||
## Records configuration
|
||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
|
||||
recording_rules.yml: {}
|
||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
|
||||
rules: {}
|
||||
|
||||
prometheus.yml:
|
||||
rule_files:
|
||||
- /etc/config/recording_rules.yml
|
||||
- /etc/config/alerting_rules.yml
|
||||
## Below two files are DEPRECATED will be removed from this default values file
|
||||
- /etc/config/rules
|
||||
- /etc/config/alerts
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
# A scrape configuration for running Prometheus on a Kubernetes cluster.
|
||||
# This uses separate scrape configs for cluster components (i.e. API server, node)
|
||||
# and services to allow each to use different authentication configs.
|
||||
#
|
||||
# Kubernetes labels will be added as Prometheus labels on metrics via the
|
||||
# `labelmap` relabeling action.
|
||||
|
||||
# Scrape config for API servers.
|
||||
#
|
||||
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
||||
# service so this uses `endpoints` role and uses relabelling to only keep
|
||||
# the endpoints associated with the default/kubernetes service using the
|
||||
# default named port `https`. This works for single API server deployments as
|
||||
# well as HA API server deployments.
|
||||
- job_name: 'kubernetes-apiservers'
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
# Keep only the default/kubernetes service endpoints for the https port. This
|
||||
# will add targets for each API server which Kubernetes adds an endpoint to
|
||||
# the default/kubernetes service.
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
|
||||
- job_name: 'kubernetes-nodes'
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
|
||||
|
||||
- job_name: 'kubernetes-nodes-cadvisor'
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# This configuration will work only on kubelet 1.7.3+
|
||||
# As the scrape endpoints for cAdvisor have changed
|
||||
# if you are using older version you need to change the replacement to
|
||||
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
|
||||
# more info here https://github.com/coreos/prometheus-operator/issues/633
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
|
||||
# Metric relabel configs to apply to samples before ingestion.
|
||||
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
|
||||
# metric_relabel_configs:
|
||||
# - action: labeldrop
|
||||
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
|
||||
|
||||
# Scrape config for service endpoints.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape`: Only scrape services that have a value of
|
||||
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
||||
# service then set this appropriately.
|
||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
||||
# then you can set any parameter
|
||||
- job_name: 'kubernetes-service-endpoints'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: (.+?)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: service
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
# Scrape config for slow service endpoints; same as above, but with a larger
|
||||
# timeout and a larger interval
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
||||
# service then set this appropriately.
|
||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
||||
# then you can set any parameter
|
||||
- job_name: 'kubernetes-service-endpoints-slow'
|
||||
honor_labels: true
|
||||
|
||||
scrape_interval: 5m
|
||||
scrape_timeout: 30s
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: (.+?)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: service
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
- job_name: 'prometheus-pushgateway'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: pushgateway
|
||||
|
||||
# Example scrape config for probing services via the Blackbox Exporter.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/probe`: Only probe services that have a value of `true`
|
||||
- job_name: 'kubernetes-services'
|
||||
honor_labels: true
|
||||
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: service
|
||||
|
||||
# Example scrape config for pods
|
||||
#
|
||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
||||
# following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
|
||||
# except if `prometheus.io/scrape-slow` is set to `true` as well.
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
||||
- job_name: 'kubernetes-pods'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
regex: (https?)
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
||||
replacement: '[$2]:$1'
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
||||
replacement: $2:$1
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_pod_phase]
|
||||
regex: Pending|Succeeded|Failed|Completed
|
||||
action: drop
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
# Example Scrape config for pods which should be scraped slower. An useful example
|
||||
# would be stackriver-exporter which queries an API on every scrape of the pod
|
||||
#
|
||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
||||
# following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
||||
- job_name: 'kubernetes-pods-slow'
|
||||
honor_labels: true
|
||||
|
||||
scrape_interval: 5m
|
||||
scrape_timeout: 30s
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
regex: (https?)
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
||||
replacement: '[$2]:$1'
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
||||
replacement: $2:$1
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_pod_phase]
|
||||
regex: Pending|Succeeded|Failed|Completed
|
||||
action: drop
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
|
||||
|
||||
|
||||
# Configuration of subcharts defined in Chart.yaml
|
||||
|
||||
## alertmanager sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
||||
##
|
||||
alertmanager:
|
||||
enabled: false
|
||||
|
||||
## kube-state-metrics sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
|
||||
##
|
||||
kube-state-metrics:
|
||||
## If false, kube-state-metrics sub-chart will not be installed
|
||||
##
|
||||
enabled: true
|
||||
|
||||
## prometheus-node-exporter sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
||||
##
|
||||
prometheus-node-exporter:
|
||||
## If false, node-exporter will not be installed
|
||||
##
|
||||
enabled: true
|
||||
|
||||
rbac:
|
||||
pspEnabled: false
|
||||
|
||||
containerSecurityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
|
||||
## prometheus-pushgateway sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
|
||||
##
|
||||
prometheus-pushgateway:
|
||||
## If false, pushgateway will not be installed
|
||||
##
|
||||
enabled: false
|
78
infrastructure/monitoring/prometheus.yaml
Normal file
78
infrastructure/monitoring/prometheus.yaml
Normal file
@ -0,0 +1,78 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: monitoring # needs to be the same as in the kustomization.yaml
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus
|
||||
spec:
|
||||
securityContext:
|
||||
runAsUser: 65534 # same as the thanos sidecar
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
retention: 730d
|
||||
retentionSize: 3GiB
|
||||
serviceAccountName: prometheus
|
||||
enableAdminAPI: false
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector: {}
|
||||
thanos:
|
||||
version: v0.34.1
|
||||
objectStorageConfig:
|
||||
# loads the config from a secret named thanos-objstore-config in the same namespace
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9090
|
||||
targetPort: 9090
|
||||
protocol: TCP
|
||||
selector:
|
||||
prometheus: prometheus
|
55
infrastructure/monitoring/thanos-query.deployment.yaml
Normal file
55
infrastructure/monitoring/thanos-query.deployment.yaml
Normal file
@ -0,0 +1,55 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
labels:
|
||||
app: thanos-querier
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-querier
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-querier
|
||||
spec:
|
||||
containers:
|
||||
- name: thanos
|
||||
image: thanos
|
||||
args:
|
||||
- query
|
||||
- --log.level=debug
|
||||
- --query.replica-label=replica
|
||||
- --endpoint=dnssrv+_grpc._tcp.thanos-store:10901
|
||||
- --endpoint=dnssrv+_grpc._tcp.prometheus:9090
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
port: http
|
||||
path: /-/healthy
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
port: http
|
||||
path: /-/ready
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
spec:
|
||||
selector:
|
||||
app: thanos-querier
|
||||
ports:
|
||||
- name: http
|
||||
protocol: TCP
|
||||
port: 10902
|
||||
targetPort: http
|
||||
- name: grpc
|
||||
protocol: TCP
|
||||
port: 10901
|
||||
targetPort: grpc
|
71
infrastructure/monitoring/thanos-store.statefulset.yaml
Normal file
71
infrastructure/monitoring/thanos-store.statefulset.yaml
Normal file
@ -0,0 +1,71 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: thanos-store
|
||||
labels:
|
||||
app: thanos-store
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-store
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-store
|
||||
thanos-store-api: "true"
|
||||
spec:
|
||||
containers:
|
||||
- name: thanos
|
||||
image: thanos
|
||||
args:
|
||||
- store
|
||||
- --log.level=debug
|
||||
- --data-dir=/data
|
||||
- --grpc-address=0.0.0.0:10901
|
||||
- --http-address=0.0.0.0:10902
|
||||
- --objstore.config-file=/etc/secret/thanos.yaml
|
||||
- --index-cache-size=500MB
|
||||
- --chunk-pool-size=500MB
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
port: 10902
|
||||
path: /-/healthy
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
port: 10902
|
||||
path: /-/ready
|
||||
volumeMounts:
|
||||
- name: thanos-objstore-config
|
||||
mountPath: /etc/secret
|
||||
readOnly: true
|
||||
- name: thanos-data
|
||||
mountPath: /data
|
||||
volumes:
|
||||
- name: thanos-objstore-config
|
||||
secret:
|
||||
secretName: thanos-objstore-config
|
||||
- name: thanos-data
|
||||
emptyDir: {}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: thanos-store
|
||||
name: thanos-store
|
||||
spec:
|
||||
ports:
|
||||
- name: grpc
|
||||
port: 10901
|
||||
targetPort: 10901
|
||||
- name: http
|
||||
port: 10902
|
||||
targetPort: 10902
|
||||
selector:
|
||||
app: thanos-store
|
@ -17,3 +17,6 @@ spec:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- Replace=true
|
||||
# because the prometheus-operator CRDs are too large
|
||||
|
Loading…
x
Reference in New Issue
Block a user