monitoring cleanup

This commit is contained in:
Remy Moll 2024-12-23 22:40:35 +01:00
parent d6faeb3e4c
commit ee20223507
21 changed files with 717 additions and 270 deletions

@ -37,11 +37,11 @@ datasources:
datasources:
- name: Thanos
type: prometheus
url: http://thanos-querier.prometheus.svc:10902
url: http://thanos-querier.monitoring.svc:10902
isDefault: true
- name: Prometheus
type: prometheus
url: http://prometheus.prometheus.svc:9090
url: http://prometheus.monitoring.svc:9090
isDefault: false
dashboardProviders:

@ -0,0 +1,28 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- namespace.yaml
# prometheus-operator crds
- thanos-objstore-config.sealedsecret.yaml
# - loki-objstore-config.sealedsecret.yaml
images:
- name: thanos
newName: quay.io/thanos/thanos
newTag: v0.37.2
helmCharts:
- name: loki
releaseName: loki
repo: https://grafana.github.io/helm-charts
version: 6.24.0
valuesFile: loki.values.yaml
- name: prometheus
releaseName: prometheus
repo: https://prometheus-community.github.io/helm-charts
version: 26.0.1
valuesFile: prometheus.values.yaml

@ -0,0 +1,73 @@
loki:
commonConfig:
replication_factor: 1
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
pattern_ingester:
enabled: true
limits_config:
allow_structured_metadata: true
volume_enabled: true
retention_period: 672h # 28 days retention
ruler:
enable_api: true
storage:
type: filesystem
filesystem:
chunks_directory: /var/loki/chunks
rules_directory: /var/loki/rules
admin_api_directory: /var/loki/admin
minio:
enabled: false
deploymentMode: SingleBinary
singleBinary:
replicas: 1
persistence:
# -- Enable StatefulSetAutoDeletePVC feature
enableStatefulSetAutoDeletePVC: true
# -- Enable persistent disk
enabled: true
# -- Size of persistent disk
size: 10Gi
# -- Storage class to be used.
# If defined, storageClassName: <storageClass>.
# If set to "-", storageClassName: "", which disables dynamic provisioning.
# If empty or set to null, no storageClassName spec is
# set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack).
storageClass: nfs-client
# Zero out replica counts of other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0

@ -0,0 +1,573 @@
podSecurityPolicy:
enabled: true
server:
extraArgs:
log.level: debug
storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md
storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md
retention: 4h
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
statefulSet:
enabled: true
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "10902"
sidecarContainers:
thanos-sidecar:
image: thanos
resources:
requests:
memory: "512Mi"
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secret/sa
args:
- "sidecar"
- "--log.level=debug"
- "--tsdb.path=/data/"
- "--prometheus.url=http://127.0.0.1:9090"
- "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}"
- "--reloader.config-file=/etc/prometheus-config/prometheus.yml"
- "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml"
- "--reloader.rule-dir=/etc/prometheus-config/rules"
ports:
- name: sidecar-http
containerPort: 10902
- name: grpc
containerPort: 10901
- name: cluster
containerPort: 10900
volumeMounts:
- name: storage-volume
mountPath: /data
- name: thanos-storage-secret
mountPath: /etc/secret
- name: config-volume
mountPath: /etc/prometheus-config
readOnly: false
- name: prometheus-config-shared
mountPath: /etc/prometheus-shared/
readOnly: false
configPath: /etc/prometheus-shared/prometheus.yml
replicaCount: 1
persistentVolume:
size: 20Gi
extraVolumes: # spec.template.spec.volumes
- name: prometheus-config-shared
emptyDir: {}
extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container
- name: prometheus-config-shared
mountPath: /etc/prometheus-shared/
resources:
requests:
memory: 1Gi
global:
scrape_interval: 5s
scrape_timeout: 4s
external_labels:
prometheus_group: KLUSTER
prometheus_replica: '$(HOSTNAME)'
evaluation_interval: 5s
extraSecretMounts:
- name: thanos-objstore-config
mountPath: /etc/secret/
subPath: sa
readOnly: false
secretName: thanos-storage-secret
# as thanos sidecar is taking care of the config reload
# we can disable the prometheus configmap reload
configmapReload:
prometheus:
enabled: false
## Prometheus server ConfigMap entries
##
serverFiles:
## Alerts configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
alerting_rules.yml: {}
# groups:
# - name: Instances
# rules:
# - alert: InstanceDown
# expr: up == 0
# for: 5m
# labels:
# severity: page
# annotations:
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
# summary: 'Instance {{ $labels.instance }} down'
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
alerts: {}
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
rules: {}
prometheus.yml:
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
## Below two files are DEPRECATED will be removed from this default values file
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
# Metric relabel configs to apply to samples before ingestion.
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
# metric_relabel_configs:
# - action: labeldrop
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints'
honor_labels: true
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Scrape config for slow service endpoints; same as above, but with a larger
# timeout and a larger interval
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
- job_name: 'prometheus-pushgateway'
honor_labels: true
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: pushgateway
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe services that have a value of `true`
- job_name: 'kubernetes-services'
honor_labels: true
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: service
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
# except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods'
honor_labels: true
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Example Scrape config for pods which should be scraped slower. An useful example
# would be stackriver-exporter which queries an API on every scrape of the pod
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Configuration of subcharts defined in Chart.yaml
## alertmanager sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
##
alertmanager:
enabled: false
## kube-state-metrics sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
##
kube-state-metrics:
## If false, kube-state-metrics sub-chart will not be installed
##
enabled: true
## prometheus-node-exporter sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
##
prometheus-node-exporter:
## If false, node-exporter will not be installed
##
enabled: true
rbac:
pspEnabled: false
containerSecurityContext:
allowPrivilegeEscalation: false
## prometheus-pushgateway sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
##
prometheus-pushgateway:
## If false, pushgateway will not be installed
##
enabled: false

@ -0,0 +1,16 @@
---
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: thanos-objstore-config
namespace: monitoring
spec:
encryptedData:
thanos.yaml: AgAqlul2V1idfgbWvq/0ljSFlxOOsQmwlGd+jRvDDyi1nlR8woHrp7lW6AxJ/8mBtb5htCuJzLgx+HVrN/EN+fRn5xG3D5+8xs4jWBOQ49MgLSAjJavFPcVY5xiBpGaw/N8aotlbfv6Wa2/+cmiAzVDPwnOj5zCS/EU58Tu2YFeVSbMUlu0NFAeyBW0DVT2enuVLToP4Ge4T0U9F99NHOh2zlVG82iI+4RxCu/WBkOU/urVleGwCYkcr/ItmXiwRXbwnWUtEUf28Q4ArpuZXFkKZUMoIwOjkXgOn/ySBLVvf0yy1+WOcYAIX9ouxu6i4T1GAZO9RnKeMJOIyebI3EOMA2dxQFpQg2/XhhHz2Ds2oDX/yr7vXbZJGyiCvTnnFUvFALKWIjRXXWphdqHDk6iP8tFIKVFsn7UxgMVFRcs6DmcMpBgFOcjpHr4HFZap5G9hI3cscmkNfwU+JOXkDEGRpZkkECza4wlQln8Wptq1qa+I+DSclqLOcvoEvNCJCIIgh5tINJ0KiZcrBvymUZZ9VduH4TFHR/UQK7M7It892TDNUlIp2UDWiuQ2DJysOJXmvSiNo8PGWSyDJwKJPhaWqXz9RUsb4D8gq/a+0qC7DOICrJEUj7WL8dwaKoQa32Cf+wopwrjFWSE7pAfiBJo+Dqa9jHIDv2hVsdU8NXqiFK35XHyUT4i0KWc+UZg4ObotGxYMvRtJuc3S7ZGTJ4YKDP5iThuNSuNd1pd1YjirpvVtL2o5BYh2i55F3DfVREofYpBCjK1e43mHOwEUYZ7Ff6p1+S0PXZnkL53xHMiiW3yr0v1g2ZYk7vzkENb9epzm24fNX/4ZiJdb0glEJmB674bgDSeh9PA5q8nJIKk6vsbrzfaAYWIn5Ai9MPbAVfg9pPkMyy9ydd+SqecujkWm++4dHqB1WJUg=
template:
metadata:
creationTimestamp: null
name: thanos-objstore-config
namespace: monitoring
type: Opaque

@ -1,20 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: prometheus
resources:
- namespace.yaml
# prometheus-operator crds
- https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.70.0/bundle.yaml
- prometheus.yaml
- thanos-objstore-config.sealedsecret.yaml
# thanos deployment from kube-thanos project
- thanos-store.statefulset.yaml
- thanos-query.deployment.yaml
images:
- name: thanos
newName: quay.io/thanos/thanos
newTag: v0.37.2

@ -1,78 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: prometheus # needs to be the same as in the kustomization.yaml
---
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: prometheus
spec:
securityContext:
runAsUser: 65534 # same as the thanos sidecar
resources:
requests:
memory: 400Mi
retention: 730d
retentionSize: 3GiB
serviceAccountName: prometheus
enableAdminAPI: false
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
thanos:
version: v0.34.1
objectStorageConfig:
# loads the config from a secret named thanos-objstore-config in the same namespace
key: thanos.yaml
name: thanos-objstore-config
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
spec:
type: ClusterIP
ports:
- port: 9090
targetPort: 9090
protocol: TCP
selector:
prometheus: prometheus

@ -1,16 +0,0 @@
---
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: thanos-objstore-config
namespace: prometheus
spec:
encryptedData:
thanos.yaml: AgByW/LKzPh0QeNsHR8Us4bJ/0chIQErhfh5plY1tjqiZyNLlxZ+NygYYzVggW02k4gAsKs68trbLBbeTTEhpKYP8hUphNb13lrgp07wYpOQjUF57i6RjPM2QNJpO0qLSk/nOPIOtR3XKn+nXxdJDmh3j5y0zxVz5O7MLh7adwOaHlyWTLMJjI1cda8YljDp2FYs24lHHMw4gXAYUecGDJNQqw5Xy9IiGh8kBbcKe3j6bVCj1yxPbHszmvZ2s+Q+mnndXnoeLMhwjZhMF8/PETxmSZ2bs41k3lHm/2rcPQCJsl9CuJEGKhu6ndKrVhtury4/US/FheEOoGF0YZk/AQMHII/mxy8haPNxtQTDs4rfYz/BA8cMMZll44wxOY9gAOmhm3sG6GI9wcB1Z65p98xSuDaInknO80l07vwMAAvmrZbT53Fmefrxl+jE1pImcGEsL0MfP621nTXlOBW9keF+6aUOubrwjPKKSXdqZU21acNbaIeRQSJyaOBStAKLfnPFmaryGisgNu0hCk/WmszZ0/s/ilvdMdAD6kKoiKL/NWfXtHATh/fnd76bKfSzNQk6e+WWfomToYVU0HRgAaWnIzjB9Q4tjxkbRwteEodU+K1BvD4xQ0sfQB2vHlDjQGC3pjIUFCWG0SzQGb7oe6+X2CJpcNIBHwF661iELJpJkg8dLsPtwb+8Rj6BL+ZtyVKYv18nDNON0WVpwJb/IHHSmxfYD5b/q6fATCFj55IXK5Nr4VO65a2Sv5Iv0/TTUVkwb8dkMmwfs5qcQiZ4oKWx8Ol6GkjDZrFARUtHQ/9KiZ9xDj3tPic2TeQfKr27sgc4lEL8RSxaRKHkkxIAioea3YgFfBm7ZfoxMlzJnQ1vI2vDvJcRXhWKSGdXiKOddwLSVMZFsSRRi9AxH87Sjt7j1wvsA7xgBqc=
template:
metadata:
creationTimestamp: null
name: thanos-objstore-config
namespace: prometheus
type: Opaque

@ -1,55 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: thanos-querier
labels:
app: thanos-querier
spec:
replicas: 1
selector:
matchLabels:
app: thanos-querier
template:
metadata:
labels:
app: thanos-querier
spec:
containers:
- name: thanos
image: thanos
args:
- query
- --log.level=debug
- --query.replica-label=replica
- --endpoint=dnssrv+_grpc._tcp.thanos-store:10901
- --endpoint=dnssrv+_grpc._tcp.prometheus:9090
ports:
- name: http
containerPort: 10902
- name: grpc
containerPort: 10901
livenessProbe:
httpGet:
port: http
path: /-/healthy
readinessProbe:
httpGet:
port: http
path: /-/ready
---
apiVersion: v1
kind: Service
metadata:
name: thanos-querier
spec:
selector:
app: thanos-querier
ports:
- name: http
protocol: TCP
port: 10902
targetPort: http
- name: grpc
protocol: TCP
port: 10901
targetPort: grpc

@ -1,71 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: thanos-store
labels:
app: thanos-store
spec:
replicas: 1
selector:
matchLabels:
app: thanos-store
template:
metadata:
labels:
app: thanos-store
thanos-store-api: "true"
spec:
containers:
- name: thanos
image: thanos
args:
- store
- --log.level=debug
- --data-dir=/data
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config-file=/etc/secret/thanos.yaml
- --index-cache-size=500MB
- --chunk-pool-size=500MB
ports:
- name: http
containerPort: 10902
- name: grpc
containerPort: 10901
livenessProbe:
httpGet:
port: 10902
path: /-/healthy
readinessProbe:
httpGet:
port: 10902
path: /-/ready
volumeMounts:
- name: thanos-objstore-config
mountPath: /etc/secret
readOnly: true
- name: thanos-data
mountPath: /data
volumes:
- name: thanos-objstore-config
secret:
secretName: thanos-objstore-config
- name: thanos-data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: thanos-store
name: thanos-store
spec:
ports:
- name: grpc
port: 10901
targetPort: 10901
- name: http
port: 10902
targetPort: 10902
selector:
app: thanos-store

@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: grafana-application
namespace: argocd
spec:
project: apps
source:
repoURL: ssh://git@git.kluster.moll.re:2222/remoll/k3s-infra.git
targetRevision: main
path: apps/grafana
destination:
server: https://kubernetes.default.svc
namespace: grafana
syncPolicy:
automated:
prune: true
selfHeal: true

@ -20,7 +20,7 @@ resources:
- traefik/
- external-dns/
- external-services/
- prometheus/application.yaml
- monitoring/application.yaml
- authelia/
# simple apps
@ -35,7 +35,7 @@ resources:
- linkding/
- media/
- minecraft/application.yaml
- monitoring/
- grafana/
- ntfy/
- paperless/
- recipes/

@ -1,14 +1,15 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: monitoring-application
name: prometheus-application
namespace: argocd
spec:
project: apps
project: infrastructure
source:
repoURL: ssh://git@git.kluster.moll.re:2222/remoll/k3s-infra.git
repoURL: git@github.com:moll-re/bootstrap-k3s-infra.git
targetRevision: main
path: apps/monitoring
path: infrastructure/prometheus
destination:
server: https://kubernetes.default.svc
namespace: monitoring

@ -1,22 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: prometheus-application
namespace: argocd
spec:
project: infrastructure
source:
repoURL: git@github.com:moll-re/bootstrap-k3s-infra.git
targetRevision: main
path: infrastructure/prometheus
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- Replace=true
# because the prom crds exceed the default 256Ki limit