monitoring cleanup
This commit is contained in:
parent
d6faeb3e4c
commit
ee20223507
apps/grafana
grafana-admin.sealedsecret.yamlgrafana-auth.sealedsecret.yamlgrafana.ingress.yamlgrafana.values.yamlkustomization.yamlnamespace.yaml
infrastructure
monitoring
kustomization.yamlloki.values.yamlnamespace.yamlprometheus.values.yamlthanos-objstore-config.sealedsecret.yaml
prometheus
kluster-deployments
@ -37,11 +37,11 @@ datasources:
|
||||
datasources:
|
||||
- name: Thanos
|
||||
type: prometheus
|
||||
url: http://thanos-querier.prometheus.svc:10902
|
||||
url: http://thanos-querier.monitoring.svc:10902
|
||||
isDefault: true
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus.prometheus.svc:9090
|
||||
url: http://prometheus.monitoring.svc:9090
|
||||
isDefault: false
|
||||
|
||||
dashboardProviders:
|
28
infrastructure/monitoring/kustomization.yaml
Normal file
28
infrastructure/monitoring/kustomization.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: monitoring
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
# prometheus-operator crds
|
||||
- thanos-objstore-config.sealedsecret.yaml
|
||||
# - loki-objstore-config.sealedsecret.yaml
|
||||
|
||||
images:
|
||||
- name: thanos
|
||||
newName: quay.io/thanos/thanos
|
||||
newTag: v0.37.2
|
||||
|
||||
|
||||
helmCharts:
|
||||
- name: loki
|
||||
releaseName: loki
|
||||
repo: https://grafana.github.io/helm-charts
|
||||
version: 6.24.0
|
||||
valuesFile: loki.values.yaml
|
||||
- name: prometheus
|
||||
releaseName: prometheus
|
||||
repo: https://prometheus-community.github.io/helm-charts
|
||||
version: 26.0.1
|
||||
valuesFile: prometheus.values.yaml
|
73
infrastructure/monitoring/loki.values.yaml
Normal file
73
infrastructure/monitoring/loki.values.yaml
Normal file
@ -0,0 +1,73 @@
|
||||
loki:
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2024-04-01"
|
||||
store: tsdb
|
||||
object_store: s3
|
||||
schema: v13
|
||||
index:
|
||||
prefix: loki_index_
|
||||
period: 24h
|
||||
pattern_ingester:
|
||||
enabled: true
|
||||
limits_config:
|
||||
allow_structured_metadata: true
|
||||
volume_enabled: true
|
||||
retention_period: 672h # 28 days retention
|
||||
ruler:
|
||||
enable_api: true
|
||||
storage:
|
||||
type: filesystem
|
||||
filesystem:
|
||||
chunks_directory: /var/loki/chunks
|
||||
rules_directory: /var/loki/rules
|
||||
admin_api_directory: /var/loki/admin
|
||||
|
||||
minio:
|
||||
enabled: false
|
||||
|
||||
deploymentMode: SingleBinary
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
persistence:
|
||||
# -- Enable StatefulSetAutoDeletePVC feature
|
||||
enableStatefulSetAutoDeletePVC: true
|
||||
# -- Enable persistent disk
|
||||
enabled: true
|
||||
# -- Size of persistent disk
|
||||
size: 10Gi
|
||||
# -- Storage class to be used.
|
||||
# If defined, storageClassName: <storageClass>.
|
||||
# If set to "-", storageClassName: "", which disables dynamic provisioning.
|
||||
# If empty or set to null, no storageClassName spec is
|
||||
# set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack).
|
||||
storageClass: nfs-client
|
||||
|
||||
# Zero out replica counts of other deployment modes
|
||||
backend:
|
||||
replicas: 0
|
||||
read:
|
||||
replicas: 0
|
||||
write:
|
||||
replicas: 0
|
||||
ingester:
|
||||
replicas: 0
|
||||
querier:
|
||||
replicas: 0
|
||||
queryFrontend:
|
||||
replicas: 0
|
||||
queryScheduler:
|
||||
replicas: 0
|
||||
distributor:
|
||||
replicas: 0
|
||||
compactor:
|
||||
replicas: 0
|
||||
indexGateway:
|
||||
replicas: 0
|
||||
bloomCompactor:
|
||||
replicas: 0
|
||||
bloomGateway:
|
||||
replicas: 0
|
573
infrastructure/monitoring/prometheus.values.yaml
Normal file
573
infrastructure/monitoring/prometheus.values.yaml
Normal file
@ -0,0 +1,573 @@
|
||||
podSecurityPolicy:
|
||||
enabled: true
|
||||
|
||||
server:
|
||||
extraArgs:
|
||||
log.level: debug
|
||||
storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
||||
storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md
|
||||
retention: 4h
|
||||
service:
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9090"
|
||||
statefulSet:
|
||||
enabled: true
|
||||
podAnnotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "10902"
|
||||
sidecarContainers:
|
||||
thanos-sidecar:
|
||||
image: thanos
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
env:
|
||||
- name: GOOGLE_APPLICATION_CREDENTIALS
|
||||
value: /etc/secret/sa
|
||||
args:
|
||||
- "sidecar"
|
||||
- "--log.level=debug"
|
||||
- "--tsdb.path=/data/"
|
||||
- "--prometheus.url=http://127.0.0.1:9090"
|
||||
- "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}"
|
||||
- "--reloader.config-file=/etc/prometheus-config/prometheus.yml"
|
||||
- "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml"
|
||||
- "--reloader.rule-dir=/etc/prometheus-config/rules"
|
||||
ports:
|
||||
- name: sidecar-http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
- name: cluster
|
||||
containerPort: 10900
|
||||
volumeMounts:
|
||||
- name: storage-volume
|
||||
mountPath: /data
|
||||
- name: thanos-storage-secret
|
||||
mountPath: /etc/secret
|
||||
- name: config-volume
|
||||
mountPath: /etc/prometheus-config
|
||||
readOnly: false
|
||||
- name: prometheus-config-shared
|
||||
mountPath: /etc/prometheus-shared/
|
||||
readOnly: false
|
||||
configPath: /etc/prometheus-shared/prometheus.yml
|
||||
replicaCount: 1
|
||||
persistentVolume:
|
||||
size: 20Gi
|
||||
extraVolumes: # spec.template.spec.volumes
|
||||
- name: prometheus-config-shared
|
||||
emptyDir: {}
|
||||
extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container
|
||||
- name: prometheus-config-shared
|
||||
mountPath: /etc/prometheus-shared/
|
||||
resources:
|
||||
requests:
|
||||
memory: 1Gi
|
||||
global:
|
||||
scrape_interval: 5s
|
||||
scrape_timeout: 4s
|
||||
external_labels:
|
||||
prometheus_group: KLUSTER
|
||||
prometheus_replica: '$(HOSTNAME)'
|
||||
evaluation_interval: 5s
|
||||
extraSecretMounts:
|
||||
- name: thanos-objstore-config
|
||||
mountPath: /etc/secret/
|
||||
subPath: sa
|
||||
readOnly: false
|
||||
secretName: thanos-storage-secret
|
||||
|
||||
# as thanos sidecar is taking care of the config reload
|
||||
# we can disable the prometheus configmap reload
|
||||
configmapReload:
|
||||
prometheus:
|
||||
enabled: false
|
||||
|
||||
## Prometheus server ConfigMap entries
|
||||
##
|
||||
serverFiles:
|
||||
## Alerts configuration
|
||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
|
||||
alerting_rules.yml: {}
|
||||
# groups:
|
||||
# - name: Instances
|
||||
# rules:
|
||||
# - alert: InstanceDown
|
||||
# expr: up == 0
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: page
|
||||
# annotations:
|
||||
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
|
||||
# summary: 'Instance {{ $labels.instance }} down'
|
||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
|
||||
alerts: {}
|
||||
|
||||
## Records configuration
|
||||
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
|
||||
recording_rules.yml: {}
|
||||
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
|
||||
rules: {}
|
||||
|
||||
prometheus.yml:
|
||||
rule_files:
|
||||
- /etc/config/recording_rules.yml
|
||||
- /etc/config/alerting_rules.yml
|
||||
## Below two files are DEPRECATED will be removed from this default values file
|
||||
- /etc/config/rules
|
||||
- /etc/config/alerts
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
# A scrape configuration for running Prometheus on a Kubernetes cluster.
|
||||
# This uses separate scrape configs for cluster components (i.e. API server, node)
|
||||
# and services to allow each to use different authentication configs.
|
||||
#
|
||||
# Kubernetes labels will be added as Prometheus labels on metrics via the
|
||||
# `labelmap` relabeling action.
|
||||
|
||||
# Scrape config for API servers.
|
||||
#
|
||||
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
||||
# service so this uses `endpoints` role and uses relabelling to only keep
|
||||
# the endpoints associated with the default/kubernetes service using the
|
||||
# default named port `https`. This works for single API server deployments as
|
||||
# well as HA API server deployments.
|
||||
- job_name: 'kubernetes-apiservers'
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
# Keep only the default/kubernetes service endpoints for the https port. This
|
||||
# will add targets for each API server which Kubernetes adds an endpoint to
|
||||
# the default/kubernetes service.
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
|
||||
- job_name: 'kubernetes-nodes'
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
|
||||
|
||||
- job_name: 'kubernetes-nodes-cadvisor'
|
||||
|
||||
# Default to scraping over https. If required, just disable this or change to
|
||||
# `http`.
|
||||
scheme: https
|
||||
|
||||
# This TLS & bearer token file config is used to connect to the actual scrape
|
||||
# endpoints for cluster components. This is separate to discovery auth
|
||||
# configuration because discovery & scraping are two separate concerns in
|
||||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
||||
# the cluster. Otherwise, more config options have to be provided within the
|
||||
# <kubernetes_sd_config>.
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# If your node certificates are self-signed or use a different CA to the
|
||||
# master CA, then disable certificate verification below. Note that
|
||||
# certificate verification is an integral part of a secure infrastructure
|
||||
# so this should only be disabled in a controlled environment. You can
|
||||
# disable certificate verification by uncommenting the line below.
|
||||
#
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# This configuration will work only on kubelet 1.7.3+
|
||||
# As the scrape endpoints for cAdvisor have changed
|
||||
# if you are using older version you need to change the replacement to
|
||||
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
|
||||
# more info here https://github.com/coreos/prometheus-operator/issues/633
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
|
||||
# Metric relabel configs to apply to samples before ingestion.
|
||||
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
|
||||
# metric_relabel_configs:
|
||||
# - action: labeldrop
|
||||
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
|
||||
|
||||
# Scrape config for service endpoints.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape`: Only scrape services that have a value of
|
||||
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
||||
# service then set this appropriately.
|
||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
||||
# then you can set any parameter
|
||||
- job_name: 'kubernetes-service-endpoints'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: (.+?)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: service
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
# Scrape config for slow service endpoints; same as above, but with a larger
|
||||
# timeout and a larger interval
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
||||
# service then set this appropriately.
|
||||
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
|
||||
# then you can set any parameter
|
||||
- job_name: 'kubernetes-service-endpoints-slow'
|
||||
honor_labels: true
|
||||
|
||||
scrape_interval: 5m
|
||||
scrape_timeout: 30s
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: (.+?)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: service
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
- job_name: 'prometheus-pushgateway'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: pushgateway
|
||||
|
||||
# Example scrape config for probing services via the Blackbox Exporter.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/probe`: Only probe services that have a value of `true`
|
||||
- job_name: 'kubernetes-services'
|
||||
honor_labels: true
|
||||
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: service
|
||||
|
||||
# Example scrape config for pods
|
||||
#
|
||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
||||
# following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
|
||||
# except if `prometheus.io/scrape-slow` is set to `true` as well.
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
||||
- job_name: 'kubernetes-pods'
|
||||
honor_labels: true
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
regex: (https?)
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
||||
replacement: '[$2]:$1'
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
||||
replacement: $2:$1
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_pod_phase]
|
||||
regex: Pending|Succeeded|Failed|Completed
|
||||
action: drop
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
# Example Scrape config for pods which should be scraped slower. An useful example
|
||||
# would be stackriver-exporter which queries an API on every scrape of the pod
|
||||
#
|
||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
||||
# following annotations:
|
||||
#
|
||||
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
|
||||
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
||||
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
|
||||
- job_name: 'kubernetes-pods-slow'
|
||||
honor_labels: true
|
||||
|
||||
scrape_interval: 5m
|
||||
scrape_timeout: 30s
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
regex: (https?)
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
|
||||
replacement: '[$2]:$1'
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
|
||||
action: replace
|
||||
regex: (\d+);((([0-9]+?)(\.|$)){4})
|
||||
replacement: $2:$1
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
|
||||
replacement: __param_$1
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_pod_phase]
|
||||
regex: Pending|Succeeded|Failed|Completed
|
||||
action: drop
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: node
|
||||
|
||||
|
||||
|
||||
|
||||
# Configuration of subcharts defined in Chart.yaml
|
||||
|
||||
## alertmanager sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
||||
##
|
||||
alertmanager:
|
||||
enabled: false
|
||||
|
||||
## kube-state-metrics sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
|
||||
##
|
||||
kube-state-metrics:
|
||||
## If false, kube-state-metrics sub-chart will not be installed
|
||||
##
|
||||
enabled: true
|
||||
|
||||
## prometheus-node-exporter sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
||||
##
|
||||
prometheus-node-exporter:
|
||||
## If false, node-exporter will not be installed
|
||||
##
|
||||
enabled: true
|
||||
|
||||
rbac:
|
||||
pspEnabled: false
|
||||
|
||||
containerSecurityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
|
||||
## prometheus-pushgateway sub-chart configurable values
|
||||
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
|
||||
##
|
||||
prometheus-pushgateway:
|
||||
## If false, pushgateway will not be installed
|
||||
##
|
||||
enabled: false
|
@ -0,0 +1,16 @@
|
||||
---
|
||||
apiVersion: bitnami.com/v1alpha1
|
||||
kind: SealedSecret
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: thanos-objstore-config
|
||||
namespace: monitoring
|
||||
spec:
|
||||
encryptedData:
|
||||
thanos.yaml: AgAqlul2V1idfgbWvq/0ljSFlxOOsQmwlGd+jRvDDyi1nlR8woHrp7lW6AxJ/8mBtb5htCuJzLgx+HVrN/EN+fRn5xG3D5+8xs4jWBOQ49MgLSAjJavFPcVY5xiBpGaw/N8aotlbfv6Wa2/+cmiAzVDPwnOj5zCS/EU58Tu2YFeVSbMUlu0NFAeyBW0DVT2enuVLToP4Ge4T0U9F99NHOh2zlVG82iI+4RxCu/WBkOU/urVleGwCYkcr/ItmXiwRXbwnWUtEUf28Q4ArpuZXFkKZUMoIwOjkXgOn/ySBLVvf0yy1+WOcYAIX9ouxu6i4T1GAZO9RnKeMJOIyebI3EOMA2dxQFpQg2/XhhHz2Ds2oDX/yr7vXbZJGyiCvTnnFUvFALKWIjRXXWphdqHDk6iP8tFIKVFsn7UxgMVFRcs6DmcMpBgFOcjpHr4HFZap5G9hI3cscmkNfwU+JOXkDEGRpZkkECza4wlQln8Wptq1qa+I+DSclqLOcvoEvNCJCIIgh5tINJ0KiZcrBvymUZZ9VduH4TFHR/UQK7M7It892TDNUlIp2UDWiuQ2DJysOJXmvSiNo8PGWSyDJwKJPhaWqXz9RUsb4D8gq/a+0qC7DOICrJEUj7WL8dwaKoQa32Cf+wopwrjFWSE7pAfiBJo+Dqa9jHIDv2hVsdU8NXqiFK35XHyUT4i0KWc+UZg4ObotGxYMvRtJuc3S7ZGTJ4YKDP5iThuNSuNd1pd1YjirpvVtL2o5BYh2i55F3DfVREofYpBCjK1e43mHOwEUYZ7Ff6p1+S0PXZnkL53xHMiiW3yr0v1g2ZYk7vzkENb9epzm24fNX/4ZiJdb0glEJmB674bgDSeh9PA5q8nJIKk6vsbrzfaAYWIn5Ai9MPbAVfg9pPkMyy9ydd+SqecujkWm++4dHqB1WJUg=
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: thanos-objstore-config
|
||||
namespace: monitoring
|
||||
type: Opaque
|
@ -1,20 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: prometheus
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
# prometheus-operator crds
|
||||
- https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.70.0/bundle.yaml
|
||||
- prometheus.yaml
|
||||
- thanos-objstore-config.sealedsecret.yaml
|
||||
# thanos deployment from kube-thanos project
|
||||
- thanos-store.statefulset.yaml
|
||||
- thanos-query.deployment.yaml
|
||||
|
||||
|
||||
images:
|
||||
- name: thanos
|
||||
newName: quay.io/thanos/thanos
|
||||
newTag: v0.37.2
|
@ -1,78 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: prometheus # needs to be the same as in the kustomization.yaml
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus
|
||||
spec:
|
||||
securityContext:
|
||||
runAsUser: 65534 # same as the thanos sidecar
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
retention: 730d
|
||||
retentionSize: 3GiB
|
||||
serviceAccountName: prometheus
|
||||
enableAdminAPI: false
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector: {}
|
||||
thanos:
|
||||
version: v0.34.1
|
||||
objectStorageConfig:
|
||||
# loads the config from a secret named thanos-objstore-config in the same namespace
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9090
|
||||
targetPort: 9090
|
||||
protocol: TCP
|
||||
selector:
|
||||
prometheus: prometheus
|
@ -1,16 +0,0 @@
|
||||
---
|
||||
apiVersion: bitnami.com/v1alpha1
|
||||
kind: SealedSecret
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: thanos-objstore-config
|
||||
namespace: prometheus
|
||||
spec:
|
||||
encryptedData:
|
||||
thanos.yaml: AgByW/LKzPh0QeNsHR8Us4bJ/0chIQErhfh5plY1tjqiZyNLlxZ+NygYYzVggW02k4gAsKs68trbLBbeTTEhpKYP8hUphNb13lrgp07wYpOQjUF57i6RjPM2QNJpO0qLSk/nOPIOtR3XKn+nXxdJDmh3j5y0zxVz5O7MLh7adwOaHlyWTLMJjI1cda8YljDp2FYs24lHHMw4gXAYUecGDJNQqw5Xy9IiGh8kBbcKe3j6bVCj1yxPbHszmvZ2s+Q+mnndXnoeLMhwjZhMF8/PETxmSZ2bs41k3lHm/2rcPQCJsl9CuJEGKhu6ndKrVhtury4/US/FheEOoGF0YZk/AQMHII/mxy8haPNxtQTDs4rfYz/BA8cMMZll44wxOY9gAOmhm3sG6GI9wcB1Z65p98xSuDaInknO80l07vwMAAvmrZbT53Fmefrxl+jE1pImcGEsL0MfP621nTXlOBW9keF+6aUOubrwjPKKSXdqZU21acNbaIeRQSJyaOBStAKLfnPFmaryGisgNu0hCk/WmszZ0/s/ilvdMdAD6kKoiKL/NWfXtHATh/fnd76bKfSzNQk6e+WWfomToYVU0HRgAaWnIzjB9Q4tjxkbRwteEodU+K1BvD4xQ0sfQB2vHlDjQGC3pjIUFCWG0SzQGb7oe6+X2CJpcNIBHwF661iELJpJkg8dLsPtwb+8Rj6BL+ZtyVKYv18nDNON0WVpwJb/IHHSmxfYD5b/q6fATCFj55IXK5Nr4VO65a2Sv5Iv0/TTUVkwb8dkMmwfs5qcQiZ4oKWx8Ol6GkjDZrFARUtHQ/9KiZ9xDj3tPic2TeQfKr27sgc4lEL8RSxaRKHkkxIAioea3YgFfBm7ZfoxMlzJnQ1vI2vDvJcRXhWKSGdXiKOddwLSVMZFsSRRi9AxH87Sjt7j1wvsA7xgBqc=
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: thanos-objstore-config
|
||||
namespace: prometheus
|
||||
type: Opaque
|
@ -1,55 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
labels:
|
||||
app: thanos-querier
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-querier
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-querier
|
||||
spec:
|
||||
containers:
|
||||
- name: thanos
|
||||
image: thanos
|
||||
args:
|
||||
- query
|
||||
- --log.level=debug
|
||||
- --query.replica-label=replica
|
||||
- --endpoint=dnssrv+_grpc._tcp.thanos-store:10901
|
||||
- --endpoint=dnssrv+_grpc._tcp.prometheus:9090
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
port: http
|
||||
path: /-/healthy
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
port: http
|
||||
path: /-/ready
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
spec:
|
||||
selector:
|
||||
app: thanos-querier
|
||||
ports:
|
||||
- name: http
|
||||
protocol: TCP
|
||||
port: 10902
|
||||
targetPort: http
|
||||
- name: grpc
|
||||
protocol: TCP
|
||||
port: 10901
|
||||
targetPort: grpc
|
@ -1,71 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: thanos-store
|
||||
labels:
|
||||
app: thanos-store
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-store
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-store
|
||||
thanos-store-api: "true"
|
||||
spec:
|
||||
containers:
|
||||
- name: thanos
|
||||
image: thanos
|
||||
args:
|
||||
- store
|
||||
- --log.level=debug
|
||||
- --data-dir=/data
|
||||
- --grpc-address=0.0.0.0:10901
|
||||
- --http-address=0.0.0.0:10902
|
||||
- --objstore.config-file=/etc/secret/thanos.yaml
|
||||
- --index-cache-size=500MB
|
||||
- --chunk-pool-size=500MB
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
port: 10902
|
||||
path: /-/healthy
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
port: 10902
|
||||
path: /-/ready
|
||||
volumeMounts:
|
||||
- name: thanos-objstore-config
|
||||
mountPath: /etc/secret
|
||||
readOnly: true
|
||||
- name: thanos-data
|
||||
mountPath: /data
|
||||
volumes:
|
||||
- name: thanos-objstore-config
|
||||
secret:
|
||||
secretName: thanos-objstore-config
|
||||
- name: thanos-data
|
||||
emptyDir: {}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: thanos-store
|
||||
name: thanos-store
|
||||
spec:
|
||||
ports:
|
||||
- name: grpc
|
||||
port: 10901
|
||||
targetPort: 10901
|
||||
- name: http
|
||||
port: 10902
|
||||
targetPort: 10902
|
||||
selector:
|
||||
app: thanos-store
|
18
kluster-deployments/grafana/application.yaml
Normal file
18
kluster-deployments/grafana/application.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: grafana-application
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: apps
|
||||
source:
|
||||
repoURL: ssh://git@git.kluster.moll.re:2222/remoll/k3s-infra.git
|
||||
targetRevision: main
|
||||
path: apps/grafana
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: grafana
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
0
kluster-deployments/monitoring/kustomization.yaml → kluster-deployments/grafana/kustomization.yaml
0
kluster-deployments/monitoring/kustomization.yaml → kluster-deployments/grafana/kustomization.yaml
@ -20,7 +20,7 @@ resources:
|
||||
- traefik/
|
||||
- external-dns/
|
||||
- external-services/
|
||||
- prometheus/application.yaml
|
||||
- monitoring/application.yaml
|
||||
- authelia/
|
||||
|
||||
# simple apps
|
||||
@ -35,7 +35,7 @@ resources:
|
||||
- linkding/
|
||||
- media/
|
||||
- minecraft/application.yaml
|
||||
- monitoring/
|
||||
- grafana/
|
||||
- ntfy/
|
||||
- paperless/
|
||||
- recipes/
|
||||
|
@ -1,14 +1,15 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: monitoring-application
|
||||
name: prometheus-application
|
||||
namespace: argocd
|
||||
|
||||
spec:
|
||||
project: apps
|
||||
project: infrastructure
|
||||
source:
|
||||
repoURL: ssh://git@git.kluster.moll.re:2222/remoll/k3s-infra.git
|
||||
repoURL: git@github.com:moll-re/bootstrap-k3s-infra.git
|
||||
targetRevision: main
|
||||
path: apps/monitoring
|
||||
path: infrastructure/prometheus
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
@ -1,22 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: prometheus-application
|
||||
namespace: argocd
|
||||
|
||||
spec:
|
||||
project: infrastructure
|
||||
source:
|
||||
repoURL: git@github.com:moll-re/bootstrap-k3s-infra.git
|
||||
targetRevision: main
|
||||
path: infrastructure/prometheus
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- Replace=true
|
||||
# because the prom crds exceed the default 256Ki limit
|
Loading…
x
Reference in New Issue
Block a user