monitoring swtich back to prometheus-operator
This commit is contained in:
parent
2a56392af0
commit
16161bafb7
@ -37,7 +37,7 @@ datasources:
|
|||||||
datasources:
|
datasources:
|
||||||
- name: Prometheus
|
- name: Prometheus
|
||||||
type: prometheus
|
type: prometheus
|
||||||
url: http://prometheus-server.monitoring.svc:80
|
url: http://prometheus.monitoring.svc:9090
|
||||||
isDefault: true
|
isDefault: true
|
||||||
- name: Thanos
|
- name: Thanos
|
||||||
type: prometheus
|
type: prometheus
|
||||||
|
@ -3,4 +3,6 @@ kind: ConfigMap
|
|||||||
metadata:
|
metadata:
|
||||||
name: argocd-cmd-params-cm
|
name: argocd-cmd-params-cm
|
||||||
data:
|
data:
|
||||||
server.insecure: "true"
|
# server.insecure: "true"
|
||||||
|
# DID NOT FIX RELOAD LOOPS
|
||||||
|
# application.namespaces: "*"
|
@ -7,3 +7,4 @@ data:
|
|||||||
# switch to annotation based resource tracking as per
|
# switch to annotation based resource tracking as per
|
||||||
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_tracking/
|
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_tracking/
|
||||||
application.resourceTrackingMethod: annotation+label
|
application.resourceTrackingMethod: annotation+label
|
||||||
|
admin.enabled: "false"
|
||||||
|
@ -9,16 +9,9 @@ spec:
|
|||||||
routes:
|
routes:
|
||||||
- kind: Rule
|
- kind: Rule
|
||||||
match: Host(`argocd.kluster.moll.re`)
|
match: Host(`argocd.kluster.moll.re`)
|
||||||
priority: 10
|
|
||||||
services:
|
services:
|
||||||
- name: argocd-server
|
- name: argocd-server
|
||||||
port: 80
|
port: 443
|
||||||
- kind: Rule
|
scheme: https
|
||||||
match: Host(`argocd.kluster.moll.re`) && Header(`Content-Type`, `application/grpc`)
|
|
||||||
priority: 11
|
|
||||||
services:
|
|
||||||
- name: argocd-server
|
|
||||||
port: 80
|
|
||||||
scheme: h2c
|
|
||||||
tls:
|
tls:
|
||||||
certResolver: default-tls
|
certResolver: default-tls
|
@ -4,14 +4,15 @@ kind: Kustomization
|
|||||||
namespace: argocd
|
namespace: argocd
|
||||||
resources:
|
resources:
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.1/manifests/install.yaml
|
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.3/manifests/install.yaml
|
||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
- argo-apps.application.yaml
|
- argo-apps.application.yaml
|
||||||
- bootstrap-repo.sealedsecret.yaml
|
- bootstrap-repo.sealedsecret.yaml
|
||||||
- argocd-oauth.sealedsecret.yaml
|
- argocd-oauth.sealedsecret.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
# DID NOT FIX RELOAD LOOPS
|
||||||
|
# - github.com/argoproj/argo-cd/examples/k8s-rbac/argocd-server-applications?ref=master
|
||||||
|
|
||||||
components:
|
|
||||||
- https://github.com/argoproj-labs/argocd-extensions/manifests
|
|
||||||
|
|
||||||
patches:
|
patches:
|
||||||
- path: argocd.configmap.yaml
|
- path: argocd.configmap.yaml
|
||||||
|
77
infrastructure/argocd/servicemonitor.yaml
Normal file
77
infrastructure/argocd/servicemonitor.yaml
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-metrics
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-metrics
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-server-metrics
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-server-metrics
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-repo-server-metrics
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-repo-server
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-applicationset-controller-metrics
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-applicationset-controller
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-dex-server
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-dex-server
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-redis-haproxy-metrics
|
||||||
|
labels:
|
||||||
|
release: prometheus-operator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-redis-ha-haproxy
|
||||||
|
endpoints:
|
||||||
|
- port: http-exporter-port
|
@ -6,8 +6,13 @@ namespace: monitoring
|
|||||||
resources:
|
resources:
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
# prometheus-operator crds
|
# prometheus-operator crds
|
||||||
|
- https://github.com/prometheus-operator/prometheus-operator?ref=v0.79.2
|
||||||
|
# single prometheus instance with a thanos sidecar
|
||||||
|
- prometheus.yaml
|
||||||
|
- thanos-store.statefulset.yaml
|
||||||
|
- thanos-query.deployment.yaml
|
||||||
- thanos-objstore-config.sealedsecret.yaml
|
- thanos-objstore-config.sealedsecret.yaml
|
||||||
# - loki-objstore-config.sealedsecret.yaml
|
|
||||||
|
|
||||||
images:
|
images:
|
||||||
- name: thanos
|
- name: thanos
|
||||||
@ -21,8 +26,8 @@ helmCharts:
|
|||||||
repo: https://grafana.github.io/helm-charts
|
repo: https://grafana.github.io/helm-charts
|
||||||
version: 6.24.0
|
version: 6.24.0
|
||||||
valuesFile: loki.values.yaml
|
valuesFile: loki.values.yaml
|
||||||
- name: prometheus
|
- name: prometheus-node-exporter
|
||||||
releaseName: prometheus
|
releaseName: prometheus-node-exporter
|
||||||
repo: https://prometheus-community.github.io/helm-charts
|
repo: https://prometheus-community.github.io/helm-charts
|
||||||
version: 26.0.1
|
version: 4.43.1
|
||||||
valuesFile: prometheus.values.yaml
|
valuesFile: prometheus-node-exporter.values.yaml
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
prometheus:
|
||||||
|
monitor:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
jobLabel: "node-exporter"
|
||||||
|
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 50Mi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 30Mi
|
@ -1,574 +0,0 @@
|
|||||||
podSecurityPolicy:
|
|
||||||
enabled: true
|
|
||||||
|
|
||||||
server:
|
|
||||||
extraArgs:
|
|
||||||
log.level: debug
|
|
||||||
storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
|
||||||
storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
|
||||||
retention: 180d
|
|
||||||
service:
|
|
||||||
annotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "9090"
|
|
||||||
statefulSet:
|
|
||||||
enabled: true
|
|
||||||
podAnnotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
prometheus.io/port: "10902"
|
|
||||||
# sidecarContainers:
|
|
||||||
# thanos-sidecar:
|
|
||||||
# image: thanos
|
|
||||||
# resources:
|
|
||||||
# requests:
|
|
||||||
# memory: "512Mi"
|
|
||||||
# env:
|
|
||||||
# - name: GOOGLE_APPLICATION_CREDENTIALS
|
|
||||||
# value: /etc/secret/sa
|
|
||||||
# args:
|
|
||||||
# - "sidecar"
|
|
||||||
# - "--log.level=debug"
|
|
||||||
# - "--tsdb.path=/data/"
|
|
||||||
# - "--prometheus.url=http://127.0.0.1:9090"
|
|
||||||
# - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}"
|
|
||||||
# - "--reloader.config-file=/etc/prometheus-config/prometheus.yml"
|
|
||||||
# - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml"
|
|
||||||
# - "--reloader.rule-dir=/etc/prometheus-config/rules"
|
|
||||||
# ports:
|
|
||||||
# - name: sidecar-http
|
|
||||||
# containerPort: 10902
|
|
||||||
# - name: grpc
|
|
||||||
# containerPort: 10901
|
|
||||||
# - name: cluster
|
|
||||||
# containerPort: 10900
|
|
||||||
# volumeMounts:
|
|
||||||
# - name: storage-volume
|
|
||||||
# mountPath: /data
|
|
||||||
# - name: thanos-storage-secret
|
|
||||||
# mountPath: /etc/secret
|
|
||||||
# - name: config-volume
|
|
||||||
# mountPath: /etc/prometheus-config
|
|
||||||
# readOnly: false
|
|
||||||
# - name: prometheus-config-shared
|
|
||||||
# mountPath: /etc/prometheus-shared/
|
|
||||||
# readOnly: false
|
|
||||||
# # configPath: /etc/prometheus-shared/prometheus.yml
|
|
||||||
# replicaCount: 1
|
|
||||||
# persistentVolume:
|
|
||||||
# size: 20Gi
|
|
||||||
# storageClass: nfs-client
|
|
||||||
# extraVolumes: # spec.template.spec.volumes
|
|
||||||
# - name: prometheus-config-shared
|
|
||||||
# emptyDir: {}
|
|
||||||
# extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container
|
|
||||||
# - name: prometheus-config-shared
|
|
||||||
# mountPath: /etc/prometheus-shared/
|
|
||||||
# resources:
|
|
||||||
# requests:
|
|
||||||
# memory: 1Gi
|
|
||||||
# global:
|
|
||||||
# scrape_interval: 5s
|
|
||||||
# scrape_timeout: 4s
|
|
||||||
# external_labels:
|
|
||||||
# prometheus_group: KLUSTER
|
|
||||||
# prometheus_replica: '$(HOSTNAME)'
|
|
||||||
# evaluation_interval: 5s
|
|
||||||
# extraSecretMounts:
|
|
||||||
# - name: thanos-storage-secret
|
|
||||||
# mountPath: /etc/secret/
|
|
||||||
# subPath: sa
|
|
||||||
# readOnly: false
|
|
||||||
# secretName: thanos-objstore-config
|
|
||||||
|
|
||||||
# as thanos sidecar is taking care of the config reload
|
|
||||||
# we can disable the prometheus configmap reload
|
|
||||||
configmapReload:
|
|
||||||
prometheus:
|
|
||||||
enabled: false
|
|
||||||
|
|
||||||
## Prometheus server ConfigMap entries
|
|
||||||
##
|
|
||||||
serverFiles:
|
|
||||||
## Alerts configuration
|
|
||||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
|
|
||||||
alerting_rules.yml: {}
|
|
||||||
# groups:
|
|
||||||
# - name: Instances
|
|
||||||
# rules:
|
|
||||||
# - alert: InstanceDown
|
|
||||||
# expr: up == 0
|
|
||||||
# for: 5m
|
|
||||||
# labels:
|
|
||||||
# severity: page
|
|
||||||
# annotations:
|
|
||||||
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
|
|
||||||
# summary: 'Instance {{ $labels.instance }} down'
|
|
||||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
|
|
||||||
alerts: {}
|
|
||||||
|
|
||||||
## Records configuration
|
|
||||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
|
|
||||||
recording_rules.yml: {}
|
|
||||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
|
|
||||||
rules: {}
|
|
||||||
|
|
||||||
prometheus.yml:
|
|
||||||
rule_files:
|
|
||||||
- /etc/config/recording_rules.yml
|
|
||||||
- /etc/config/alerting_rules.yml
|
|
||||||
## Below two files are DEPRECATED will be removed from this default values file
|
|
||||||
- /etc/config/rules
|
|
||||||
- /etc/config/alerts
|
|
||||||
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: prometheus
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- localhost:9090
|
|
||||||
|
|
||||||
# A scrape configuration for running Prometheus on a Kubernetes cluster.
|
|
||||||
# This uses separate scrape configs for cluster components (i.e. API server, node)
|
|
||||||
# and services to allow each to use different authentication configs.
|
|
||||||
#
|
|
||||||
# Kubernetes labels will be added as Prometheus labels on metrics via the
|
|
||||||
# `labelmap` relabeling action.
|
|
||||||
|
|
||||||
# Scrape config for API servers.
|
|
||||||
#
|
|
||||||
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
|
||||||
# service so this uses `endpoints` role and uses relabelling to only keep
|
|
||||||
# the endpoints associated with the default/kubernetes service using the
|
|
||||||
# default named port `https`. This works for single API server deployments as
|
|
||||||
# well as HA API server deployments.
|
|
||||||
- job_name: 'kubernetes-apiservers'
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
|
|
||||||
# Default to scraping over https. If required, just disable this or change to
|
|
||||||
# `http`.
|
|
||||||
scheme: https
|
|
||||||
|
|
||||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
||||||
# endpoints for cluster components. This is separate to discovery auth
|
|
||||||
# configuration because discovery & scraping are two separate concerns in
|
|
||||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
||||||
# the cluster. Otherwise, more config options have to be provided within the
|
|
||||||
# <kubernetes_sd_config>.
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
# If your node certificates are self-signed or use a different CA to the
|
|
||||||
# master CA, then disable certificate verification below. Note that
|
|
||||||
# certificate verification is an integral part of a secure infrastructure
|
|
||||||
# so this should only be disabled in a controlled environment. You can
|
|
||||||
# disable certificate verification by uncommenting the line below.
|
|
||||||
#
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
# Keep only the default/kubernetes service endpoints for the https port. This
|
|
||||||
# will add targets for each API server which Kubernetes adds an endpoint to
|
|
||||||
# the default/kubernetes service.
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
||||||
action: keep
|
|
||||||
regex: default;kubernetes;https
|
|
||||||
|
|
||||||
- job_name: 'kubernetes-nodes'
|
|
||||||
|
|
||||||
# Default to scraping over https. If required, just disable this or change to
|
|
||||||
# `http`.
|
|
||||||
scheme: https
|
|
||||||
|
|
||||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
||||||
# endpoints for cluster components. This is separate to discovery auth
|
|
||||||
# configuration because discovery & scraping are two separate concerns in
|
|
||||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
||||||
# the cluster. Otherwise, more config options have to be provided within the
|
|
||||||
# <kubernetes_sd_config>.
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
# If your node certificates are self-signed or use a different CA to the
|
|
||||||
# master CA, then disable certificate verification below. Note that
|
|
||||||
# certificate verification is an integral part of a secure infrastructure
|
|
||||||
# so this should only be disabled in a controlled environment. You can
|
|
||||||
# disable certificate verification by uncommenting the line below.
|
|
||||||
#
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: kubernetes.default.svc:443
|
|
||||||
- source_labels: [__meta_kubernetes_node_name]
|
|
||||||
regex: (.+)
|
|
||||||
target_label: __metrics_path__
|
|
||||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
|
||||||
|
|
||||||
|
|
||||||
- job_name: 'kubernetes-nodes-cadvisor'
|
|
||||||
|
|
||||||
# Default to scraping over https. If required, just disable this or change to
|
|
||||||
# `http`.
|
|
||||||
scheme: https
|
|
||||||
|
|
||||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
||||||
# endpoints for cluster components. This is separate to discovery auth
|
|
||||||
# configuration because discovery & scraping are two separate concerns in
|
|
||||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
||||||
# the cluster. Otherwise, more config options have to be provided within the
|
|
||||||
# <kubernetes_sd_config>.
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
# If your node certificates are self-signed or use a different CA to the
|
|
||||||
# master CA, then disable certificate verification below. Note that
|
|
||||||
# certificate verification is an integral part of a secure infrastructure
|
|
||||||
# so this should only be disabled in a controlled environment. You can
|
|
||||||
# disable certificate verification by uncommenting the line below.
|
|
||||||
#
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
|
|
||||||
# This configuration will work only on kubelet 1.7.3+
|
|
||||||
# As the scrape endpoints for cAdvisor have changed
|
|
||||||
# if you are using older version you need to change the replacement to
|
|
||||||
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
|
|
||||||
# more info here https://github.com/coreos/prometheus-operator/issues/633
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: kubernetes.default.svc:443
|
|
||||||
- source_labels: [__meta_kubernetes_node_name]
|
|
||||||
regex: (.+)
|
|
||||||
target_label: __metrics_path__
|
|
||||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
|
||||||
|
|
||||||
# Metric relabel configs to apply to samples before ingestion.
|
|
||||||
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
|
|
||||||
# metric_relabel_configs:
|
|
||||||
# - action: labeldrop
|
|
||||||
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
|
|
||||||
|
|
||||||
# Scrape config for service endpoints.
|
|
||||||
#
|
|
||||||
# The relabeling allows the actual service scrape endpoint to be configured
|
|
||||||
# via the following annotations:
|
|
||||||
#
|
|
||||||
# * `prometheus.io/scrape`: Only scrape services that have a value of
|
|
||||||
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
|
|
||||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
||||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
||||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
||||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
||||||
# service then set this appropriately.
|
|
||||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
|
||||||
# then you can set any parameter
|
|
||||||
- job_name: 'kubernetes-service-endpoints'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
||||||
action: keep
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
|
||||||
action: drop
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
||||||
action: replace
|
|
||||||
target_label: __scheme__
|
|
||||||
regex: (https?)
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
||||||
action: replace
|
|
||||||
target_label: __metrics_path__
|
|
||||||
regex: (.+)
|
|
||||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
||||||
action: replace
|
|
||||||
target_label: __address__
|
|
||||||
regex: (.+?)(?::\d+)?;(\d+)
|
|
||||||
replacement: $1:$2
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
|
||||||
replacement: __param_$1
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_label_(.+)
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
action: replace
|
|
||||||
target_label: namespace
|
|
||||||
- source_labels: [__meta_kubernetes_service_name]
|
|
||||||
action: replace
|
|
||||||
target_label: service
|
|
||||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
||||||
action: replace
|
|
||||||
target_label: node
|
|
||||||
|
|
||||||
# Scrape config for slow service endpoints; same as above, but with a larger
|
|
||||||
# timeout and a larger interval
|
|
||||||
#
|
|
||||||
# The relabeling allows the actual service scrape endpoint to be configured
|
|
||||||
# via the following annotations:
|
|
||||||
#
|
|
||||||
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
|
|
||||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
||||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
||||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
||||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
||||||
# service then set this appropriately.
|
|
||||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
|
||||||
# then you can set any parameter
|
|
||||||
- job_name: 'kubernetes-service-endpoints-slow'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
scrape_interval: 5m
|
|
||||||
scrape_timeout: 30s
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
|
||||||
action: keep
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
||||||
action: replace
|
|
||||||
target_label: __scheme__
|
|
||||||
regex: (https?)
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
||||||
action: replace
|
|
||||||
target_label: __metrics_path__
|
|
||||||
regex: (.+)
|
|
||||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
||||||
action: replace
|
|
||||||
target_label: __address__
|
|
||||||
regex: (.+?)(?::\d+)?;(\d+)
|
|
||||||
replacement: $1:$2
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
|
||||||
replacement: __param_$1
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_label_(.+)
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
action: replace
|
|
||||||
target_label: namespace
|
|
||||||
- source_labels: [__meta_kubernetes_service_name]
|
|
||||||
action: replace
|
|
||||||
target_label: service
|
|
||||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
||||||
action: replace
|
|
||||||
target_label: node
|
|
||||||
|
|
||||||
- job_name: 'prometheus-pushgateway'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: service
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
||||||
action: keep
|
|
||||||
regex: pushgateway
|
|
||||||
|
|
||||||
# Example scrape config for probing services via the Blackbox Exporter.
|
|
||||||
#
|
|
||||||
# The relabeling allows the actual service scrape endpoint to be configured
|
|
||||||
# via the following annotations:
|
|
||||||
#
|
|
||||||
# * `prometheus.io/probe`: Only probe services that have a value of `true`
|
|
||||||
- job_name: 'kubernetes-services'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
metrics_path: /probe
|
|
||||||
params:
|
|
||||||
module: [http_2xx]
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: service
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
||||||
action: keep
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__address__]
|
|
||||||
target_label: __param_target
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: blackbox
|
|
||||||
- source_labels: [__param_target]
|
|
||||||
target_label: instance
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_label_(.+)
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
target_label: namespace
|
|
||||||
- source_labels: [__meta_kubernetes_service_name]
|
|
||||||
target_label: service
|
|
||||||
|
|
||||||
# Example scrape config for pods
|
|
||||||
#
|
|
||||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
||||||
# following annotations:
|
|
||||||
#
|
|
||||||
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
|
|
||||||
# except if `prometheus.io/scrape-slow` is set to `true` as well.
|
|
||||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
||||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
||||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
||||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
|
||||||
- job_name: 'kubernetes-pods'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: pod
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
||||||
action: keep
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
|
||||||
action: drop
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
|
||||||
action: replace
|
|
||||||
regex: (https?)
|
|
||||||
target_label: __scheme__
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
||||||
action: replace
|
|
||||||
target_label: __metrics_path__
|
|
||||||
regex: (.+)
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
|
||||||
action: replace
|
|
||||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
|
||||||
replacement: '[$2]:$1'
|
|
||||||
target_label: __address__
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
|
||||||
action: replace
|
|
||||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
|
||||||
replacement: $2:$1
|
|
||||||
target_label: __address__
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
|
||||||
replacement: __param_$1
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_pod_label_(.+)
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
action: replace
|
|
||||||
target_label: namespace
|
|
||||||
- source_labels: [__meta_kubernetes_pod_name]
|
|
||||||
action: replace
|
|
||||||
target_label: pod
|
|
||||||
- source_labels: [__meta_kubernetes_pod_phase]
|
|
||||||
regex: Pending|Succeeded|Failed|Completed
|
|
||||||
action: drop
|
|
||||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
||||||
action: replace
|
|
||||||
target_label: node
|
|
||||||
|
|
||||||
# Example Scrape config for pods which should be scraped slower. An useful example
|
|
||||||
# would be stackriver-exporter which queries an API on every scrape of the pod
|
|
||||||
#
|
|
||||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
||||||
# following annotations:
|
|
||||||
#
|
|
||||||
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
|
|
||||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
||||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
||||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
||||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
|
||||||
- job_name: 'kubernetes-pods-slow'
|
|
||||||
honor_labels: true
|
|
||||||
|
|
||||||
scrape_interval: 5m
|
|
||||||
scrape_timeout: 30s
|
|
||||||
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: pod
|
|
||||||
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
|
||||||
action: keep
|
|
||||||
regex: true
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
|
||||||
action: replace
|
|
||||||
regex: (https?)
|
|
||||||
target_label: __scheme__
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
||||||
action: replace
|
|
||||||
target_label: __metrics_path__
|
|
||||||
regex: (.+)
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
|
||||||
action: replace
|
|
||||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
|
||||||
replacement: '[$2]:$1'
|
|
||||||
target_label: __address__
|
|
||||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
|
||||||
action: replace
|
|
||||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
|
||||||
replacement: $2:$1
|
|
||||||
target_label: __address__
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
|
||||||
replacement: __param_$1
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_pod_label_(.+)
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
action: replace
|
|
||||||
target_label: namespace
|
|
||||||
- source_labels: [__meta_kubernetes_pod_name]
|
|
||||||
action: replace
|
|
||||||
target_label: pod
|
|
||||||
- source_labels: [__meta_kubernetes_pod_phase]
|
|
||||||
regex: Pending|Succeeded|Failed|Completed
|
|
||||||
action: drop
|
|
||||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
||||||
action: replace
|
|
||||||
target_label: node
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Configuration of subcharts defined in Chart.yaml
|
|
||||||
|
|
||||||
## alertmanager sub-chart configurable values
|
|
||||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
|
||||||
##
|
|
||||||
alertmanager:
|
|
||||||
enabled: false
|
|
||||||
|
|
||||||
## kube-state-metrics sub-chart configurable values
|
|
||||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
|
|
||||||
##
|
|
||||||
kube-state-metrics:
|
|
||||||
## If false, kube-state-metrics sub-chart will not be installed
|
|
||||||
##
|
|
||||||
enabled: true
|
|
||||||
|
|
||||||
## prometheus-node-exporter sub-chart configurable values
|
|
||||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
|
||||||
##
|
|
||||||
prometheus-node-exporter:
|
|
||||||
## If false, node-exporter will not be installed
|
|
||||||
##
|
|
||||||
enabled: true
|
|
||||||
|
|
||||||
rbac:
|
|
||||||
pspEnabled: false
|
|
||||||
|
|
||||||
containerSecurityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
|
|
||||||
## prometheus-pushgateway sub-chart configurable values
|
|
||||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
|
|
||||||
##
|
|
||||||
prometheus-pushgateway:
|
|
||||||
## If false, pushgateway will not be installed
|
|
||||||
##
|
|
||||||
enabled: false
|
|
78
infrastructure/monitoring/prometheus.yaml
Normal file
78
infrastructure/monitoring/prometheus.yaml
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/metrics
|
||||||
|
- services
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
verbs: ["get"]
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- nonResourceURLs: ["/metrics"]
|
||||||
|
verbs: ["get"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: prometheus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring # needs to be the same as in the kustomization.yaml
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: Prometheus
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 65534 # same as the thanos sidecar
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 400Mi
|
||||||
|
retention: 730d
|
||||||
|
retentionSize: 3GiB
|
||||||
|
serviceAccountName: prometheus
|
||||||
|
enableAdminAPI: false
|
||||||
|
serviceMonitorNamespaceSelector: {}
|
||||||
|
serviceMonitorSelector: {}
|
||||||
|
thanos:
|
||||||
|
version: v0.34.1
|
||||||
|
objectStorageConfig:
|
||||||
|
# loads the config from a secret named thanos-objstore-config in the same namespace
|
||||||
|
key: thanos.yaml
|
||||||
|
name: thanos-objstore-config
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 9090
|
||||||
|
targetPort: 9090
|
||||||
|
protocol: TCP
|
||||||
|
selector:
|
||||||
|
prometheus: prometheus
|
55
infrastructure/monitoring/thanos-query.deployment.yaml
Normal file
55
infrastructure/monitoring/thanos-query.deployment.yaml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: thanos-querier
|
||||||
|
labels:
|
||||||
|
app: thanos-querier
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: thanos-querier
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: thanos-querier
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: thanos
|
||||||
|
image: thanos
|
||||||
|
args:
|
||||||
|
- query
|
||||||
|
- --log.level=debug
|
||||||
|
- --query.replica-label=replica
|
||||||
|
- --endpoint=dnssrv+_grpc._tcp.thanos-store:10901
|
||||||
|
- --endpoint=dnssrv+_grpc._tcp.prometheus:9090
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 10902
|
||||||
|
- name: grpc
|
||||||
|
containerPort: 10901
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
port: http
|
||||||
|
path: /-/healthy
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
port: http
|
||||||
|
path: /-/ready
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: thanos-querier
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: thanos-querier
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
protocol: TCP
|
||||||
|
port: 10902
|
||||||
|
targetPort: http
|
||||||
|
- name: grpc
|
||||||
|
protocol: TCP
|
||||||
|
port: 10901
|
||||||
|
targetPort: grpc
|
71
infrastructure/monitoring/thanos-store.statefulset.yaml
Normal file
71
infrastructure/monitoring/thanos-store.statefulset.yaml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: thanos-store
|
||||||
|
labels:
|
||||||
|
app: thanos-store
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: thanos-store
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: thanos-store
|
||||||
|
thanos-store-api: "true"
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: thanos
|
||||||
|
image: thanos
|
||||||
|
args:
|
||||||
|
- store
|
||||||
|
- --log.level=debug
|
||||||
|
- --data-dir=/data
|
||||||
|
- --grpc-address=0.0.0.0:10901
|
||||||
|
- --http-address=0.0.0.0:10902
|
||||||
|
- --objstore.config-file=/etc/secret/thanos.yaml
|
||||||
|
- --index-cache-size=500MB
|
||||||
|
- --chunk-pool-size=500MB
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 10902
|
||||||
|
- name: grpc
|
||||||
|
containerPort: 10901
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
port: 10902
|
||||||
|
path: /-/healthy
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
port: 10902
|
||||||
|
path: /-/ready
|
||||||
|
volumeMounts:
|
||||||
|
- name: thanos-objstore-config
|
||||||
|
mountPath: /etc/secret
|
||||||
|
readOnly: true
|
||||||
|
- name: thanos-data
|
||||||
|
mountPath: /data
|
||||||
|
volumes:
|
||||||
|
- name: thanos-objstore-config
|
||||||
|
secret:
|
||||||
|
secretName: thanos-objstore-config
|
||||||
|
- name: thanos-data
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: thanos-store
|
||||||
|
name: thanos-store
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: grpc
|
||||||
|
port: 10901
|
||||||
|
targetPort: 10901
|
||||||
|
- name: http
|
||||||
|
port: 10902
|
||||||
|
targetPort: 10902
|
||||||
|
selector:
|
||||||
|
app: thanos-store
|
@ -17,3 +17,6 @@ spec:
|
|||||||
automated:
|
automated:
|
||||||
prune: true
|
prune: true
|
||||||
selfHeal: true
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- Replace=true
|
||||||
|
# because the prometheus-operator CRDs are too large
|
||||||
|
Loading…
x
Reference in New Issue
Block a user