monitoring swtich back to prometheus-operator

This commit is contained in:
Remy Moll 2025-01-05 16:26:46 +01:00
parent 2a56392af0
commit 16161bafb7
13 changed files with 319 additions and 593 deletions

View File

@ -37,7 +37,7 @@ datasources:
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus-server.monitoring.svc:80
url: http://prometheus.monitoring.svc:9090
isDefault: true
- name: Thanos
type: prometheus

View File

@ -3,4 +3,6 @@ kind: ConfigMap
metadata:
name: argocd-cmd-params-cm
data:
server.insecure: "true"
# server.insecure: "true"
# DID NOT FIX RELOAD LOOPS
# application.namespaces: "*"

View File

@ -7,3 +7,4 @@ data:
# switch to annotation based resource tracking as per
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_tracking/
application.resourceTrackingMethod: annotation+label
admin.enabled: "false"

View File

@ -9,16 +9,9 @@ spec:
routes:
- kind: Rule
match: Host(`argocd.kluster.moll.re`)
priority: 10
services:
- name: argocd-server
port: 80
- kind: Rule
match: Host(`argocd.kluster.moll.re`) && Header(`Content-Type`, `application/grpc`)
priority: 11
services:
- name: argocd-server
port: 80
scheme: h2c
port: 443
scheme: https
tls:
certResolver: default-tls

View File

@ -4,14 +4,15 @@ kind: Kustomization
namespace: argocd
resources:
- namespace.yaml
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.1/manifests/install.yaml
- https://raw.githubusercontent.com/argoproj/argo-cd/v2.13.3/manifests/install.yaml
- ingress.yaml
- argo-apps.application.yaml
- bootstrap-repo.sealedsecret.yaml
- argocd-oauth.sealedsecret.yaml
- servicemonitor.yaml
# DID NOT FIX RELOAD LOOPS
# - github.com/argoproj/argo-cd/examples/k8s-rbac/argocd-server-applications?ref=master
components:
- https://github.com/argoproj-labs/argocd-extensions/manifests
patches:
- path: argocd.configmap.yaml

View File

@ -0,0 +1,77 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-metrics
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-metrics
endpoints:
- port: metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-server-metrics
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-server-metrics
endpoints:
- port: metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-repo-server-metrics
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-repo-server
endpoints:
- port: metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-applicationset-controller-metrics
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-applicationset-controller
endpoints:
- port: metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-dex-server
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-dex-server
endpoints:
- port: metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: argocd-redis-haproxy-metrics
labels:
release: prometheus-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: argocd-redis-ha-haproxy
endpoints:
- port: http-exporter-port

View File

@ -6,8 +6,13 @@ namespace: monitoring
resources:
- namespace.yaml
# prometheus-operator crds
- https://github.com/prometheus-operator/prometheus-operator?ref=v0.79.2
# single prometheus instance with a thanos sidecar
- prometheus.yaml
- thanos-store.statefulset.yaml
- thanos-query.deployment.yaml
- thanos-objstore-config.sealedsecret.yaml
# - loki-objstore-config.sealedsecret.yaml
images:
- name: thanos
@ -21,8 +26,8 @@ helmCharts:
repo: https://grafana.github.io/helm-charts
version: 6.24.0
valuesFile: loki.values.yaml
- name: prometheus
releaseName: prometheus
- name: prometheus-node-exporter
releaseName: prometheus-node-exporter
repo: https://prometheus-community.github.io/helm-charts
version: 26.0.1
valuesFile: prometheus.values.yaml
version: 4.43.1
valuesFile: prometheus-node-exporter.values.yaml

View File

@ -0,0 +1,14 @@
prometheus:
monitor:
enabled: true
jobLabel: "node-exporter"
resources:
limits:
cpu: 200m
memory: 50Mi
requests:
cpu: 100m
memory: 30Mi

View File

@ -1,574 +0,0 @@
podSecurityPolicy:
enabled: true
server:
extraArgs:
log.level: debug
storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md
storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md
retention: 180d
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
statefulSet:
enabled: true
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "10902"
# sidecarContainers:
# thanos-sidecar:
# image: thanos
# resources:
# requests:
# memory: "512Mi"
# env:
# - name: GOOGLE_APPLICATION_CREDENTIALS
# value: /etc/secret/sa
# args:
# - "sidecar"
# - "--log.level=debug"
# - "--tsdb.path=/data/"
# - "--prometheus.url=http://127.0.0.1:9090"
# - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}"
# - "--reloader.config-file=/etc/prometheus-config/prometheus.yml"
# - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml"
# - "--reloader.rule-dir=/etc/prometheus-config/rules"
# ports:
# - name: sidecar-http
# containerPort: 10902
# - name: grpc
# containerPort: 10901
# - name: cluster
# containerPort: 10900
# volumeMounts:
# - name: storage-volume
# mountPath: /data
# - name: thanos-storage-secret
# mountPath: /etc/secret
# - name: config-volume
# mountPath: /etc/prometheus-config
# readOnly: false
# - name: prometheus-config-shared
# mountPath: /etc/prometheus-shared/
# readOnly: false
# # configPath: /etc/prometheus-shared/prometheus.yml
# replicaCount: 1
# persistentVolume:
# size: 20Gi
# storageClass: nfs-client
# extraVolumes: # spec.template.spec.volumes
# - name: prometheus-config-shared
# emptyDir: {}
# extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container
# - name: prometheus-config-shared
# mountPath: /etc/prometheus-shared/
# resources:
# requests:
# memory: 1Gi
# global:
# scrape_interval: 5s
# scrape_timeout: 4s
# external_labels:
# prometheus_group: KLUSTER
# prometheus_replica: '$(HOSTNAME)'
# evaluation_interval: 5s
# extraSecretMounts:
# - name: thanos-storage-secret
# mountPath: /etc/secret/
# subPath: sa
# readOnly: false
# secretName: thanos-objstore-config
# as thanos sidecar is taking care of the config reload
# we can disable the prometheus configmap reload
configmapReload:
prometheus:
enabled: false
## Prometheus server ConfigMap entries
##
serverFiles:
## Alerts configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
alerting_rules.yml: {}
# groups:
# - name: Instances
# rules:
# - alert: InstanceDown
# expr: up == 0
# for: 5m
# labels:
# severity: page
# annotations:
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
# summary: 'Instance {{ $labels.instance }} down'
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
alerts: {}
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
rules: {}
prometheus.yml:
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
## Below two files are DEPRECATED will be removed from this default values file
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
# Metric relabel configs to apply to samples before ingestion.
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
# metric_relabel_configs:
# - action: labeldrop
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints'
honor_labels: true
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Scrape config for slow service endpoints; same as above, but with a larger
# timeout and a larger interval
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
- job_name: 'prometheus-pushgateway'
honor_labels: true
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: pushgateway
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe services that have a value of `true`
- job_name: 'kubernetes-services'
honor_labels: true
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: service
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
# except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods'
honor_labels: true
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Example Scrape config for pods which should be scraped slower. An useful example
# would be stackriver-exporter which queries an API on every scrape of the pod
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Configuration of subcharts defined in Chart.yaml
## alertmanager sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
##
alertmanager:
enabled: false
## kube-state-metrics sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
##
kube-state-metrics:
## If false, kube-state-metrics sub-chart will not be installed
##
enabled: true
## prometheus-node-exporter sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
##
prometheus-node-exporter:
## If false, node-exporter will not be installed
##
enabled: true
rbac:
pspEnabled: false
containerSecurityContext:
allowPrivilegeEscalation: false
## prometheus-pushgateway sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
##
prometheus-pushgateway:
## If false, pushgateway will not be installed
##
enabled: false

View File

@ -0,0 +1,78 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring # needs to be the same as in the kustomization.yaml
---
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: prometheus
spec:
securityContext:
runAsUser: 65534 # same as the thanos sidecar
resources:
requests:
memory: 400Mi
retention: 730d
retentionSize: 3GiB
serviceAccountName: prometheus
enableAdminAPI: false
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
thanos:
version: v0.34.1
objectStorageConfig:
# loads the config from a secret named thanos-objstore-config in the same namespace
key: thanos.yaml
name: thanos-objstore-config
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
spec:
type: ClusterIP
ports:
- port: 9090
targetPort: 9090
protocol: TCP
selector:
prometheus: prometheus

View File

@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: thanos-querier
labels:
app: thanos-querier
spec:
replicas: 1
selector:
matchLabels:
app: thanos-querier
template:
metadata:
labels:
app: thanos-querier
spec:
containers:
- name: thanos
image: thanos
args:
- query
- --log.level=debug
- --query.replica-label=replica
- --endpoint=dnssrv+_grpc._tcp.thanos-store:10901
- --endpoint=dnssrv+_grpc._tcp.prometheus:9090
ports:
- name: http
containerPort: 10902
- name: grpc
containerPort: 10901
livenessProbe:
httpGet:
port: http
path: /-/healthy
readinessProbe:
httpGet:
port: http
path: /-/ready
---
apiVersion: v1
kind: Service
metadata:
name: thanos-querier
spec:
selector:
app: thanos-querier
ports:
- name: http
protocol: TCP
port: 10902
targetPort: http
- name: grpc
protocol: TCP
port: 10901
targetPort: grpc

View File

@ -0,0 +1,71 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: thanos-store
labels:
app: thanos-store
spec:
replicas: 1
selector:
matchLabels:
app: thanos-store
template:
metadata:
labels:
app: thanos-store
thanos-store-api: "true"
spec:
containers:
- name: thanos
image: thanos
args:
- store
- --log.level=debug
- --data-dir=/data
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config-file=/etc/secret/thanos.yaml
- --index-cache-size=500MB
- --chunk-pool-size=500MB
ports:
- name: http
containerPort: 10902
- name: grpc
containerPort: 10901
livenessProbe:
httpGet:
port: 10902
path: /-/healthy
readinessProbe:
httpGet:
port: 10902
path: /-/ready
volumeMounts:
- name: thanos-objstore-config
mountPath: /etc/secret
readOnly: true
- name: thanos-data
mountPath: /data
volumes:
- name: thanos-objstore-config
secret:
secretName: thanos-objstore-config
- name: thanos-data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: thanos-store
name: thanos-store
spec:
ports:
- name: grpc
port: 10901
targetPort: 10901
- name: http
port: 10902
targetPort: 10902
selector:
app: thanos-store

View File

@ -17,3 +17,6 @@ spec:
automated:
prune: true
selfHeal: true
syncOptions:
- Replace=true
# because the prometheus-operator CRDs are too large