From f8e902881043182dfdce3f384f6bb594f884b7ad Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Mon, 23 Dec 2024 22:42:35 +0100 Subject: [PATCH] monitoring fixes --- apps/grafana/grafana-admin.sealedsecret.yaml | 8 +- apps/grafana/grafana-auth.sealedsecret.yaml | 6 +- apps/grafana/grafana.values.yaml | 15 ++- apps/grafana/kustomization.yaml | 2 +- infrastructure/monitoring/loki.values.yaml | 17 ++- infrastructure/monitoring/namespace.yaml | 2 + .../monitoring/prometheus.values.yaml | 127 +++++++++--------- kluster-deployments/grafana/application.yaml | 2 + .../monitoring/application.yaml | 4 +- 9 files changed, 103 insertions(+), 80 deletions(-) diff --git a/apps/grafana/grafana-admin.sealedsecret.yaml b/apps/grafana/grafana-admin.sealedsecret.yaml index c436b34..ec7d040 100644 --- a/apps/grafana/grafana-admin.sealedsecret.yaml +++ b/apps/grafana/grafana-admin.sealedsecret.yaml @@ -4,14 +4,14 @@ kind: SealedSecret metadata: creationTimestamp: null name: grafana-admin-secret - namespace: monitoring + namespace: grafana spec: encryptedData: - password: AgAwMLnsYN1y8JQSqgGQbNG/8jKensTDsEw6ogITdkhDRlJcg8HQ5t7a6xLzNCrLHLJiQW8YOoyLT4lvFkBRMOa2EYcrDvBiRD0PjygWLIscKa7dA+jpAUf/icD9zsiDnTym2yf+VUANcmEgE6DiNvlcsrcmYqiR4pKVUTDlKPNOjOpTJ3nXETb3/sbt69E0JSGwtkvusYQSXKLU9KLbciihv+ycdkdlC9xy9myd4+vYZYXSh/eAvyZeb/hsmdSX7yaASmupMvet6Qsdt99PNzFQxtbQH+LQvYalVZ8bjWZQvCN/p0bA4H15otKBfe8rtEwVthgvyEvo6TK0Mg0pFY/b3AOGFmImnT3rDmgG6S8KTZH0Jce17ksFqvELQmHjqHuYpQsPDl44glM8kWRJ9Mf/Z424LRwZlJNVcOkuVl4qFqPUjzd2rWIyF0RaD0BE012C0ThJxKn2l17lVJbNtdUiR3qNpW01ot2m0CgKd2kXbjDmgRgAll4WgrukfCIn9ZnE0gVCFLJuK3MOQAaipFYy/bDO0izwl9T8nldgcI8OfiC3NTk2O+Es5jJRXu0oJGaC3HrTB7wXiwOoELvAsxLTPxKBiN9mCHCMtZX0PEtrio0dFRQ6Pi5xPng0KVT0I9dvGNsPdhPETNOB913WEvbgP8Gt3cj016nCzk51eUsYbXPpNL2B4kmbIhecqW/8kwKQPwYjVlBSXj3NxjzwMY6PvOl1 - user: AgBqmjCYGMqy5zBE+vhtsynOvhWdHWDJDyl1D+laBtLjXTJwzRbNTdunHYo1ekwyqQ6Cr5pi4YMiLxAl1LIHF+Lfsp2QlY+ResAGzp9WgSBtNQDX3EmLDQofeWxMUDdMtMsE9wiKLCfNGDkRDsGquXTz+YFq03m1vH9cB8Bp+1ClWOTui+/Ce0MZlWsJZX1W8WXH7XTirtwUo0s53pc4AplUUH97ZEK3KSIxWa3gLCn0sAPDDLPX+JVA2xtpMq1XuVFiFifjzEtG2h0dejiF35FtSAR+rR4YmEfimk3QpRDfOqV5QUxvjCG+dTV49upSevF2mvbHW+o+lB6vEc6l9cZXvlbnMdaep3NmOsJcJ8wQIdFpFK4iVzFOTKSEbzLPlZ/J+sjS5vDXsfthorIO2faMA1iIf+I663zNxQU5btaK4TNYOZQlrFVjAmioRLkDhGZ6tDUPX/zMv+Crt+0HCwyEyhmvFZckDvezTZrxARSXXMKBVcvjHCyUNkz7ubZRiMU0PGM7fYuHr659e+XMRvj+LFA68ZaEIzCQpCFJenWWYAXgUdRG4LQ1LP2MwvRHpkOYSoRkHIpX7jOfhX82A60h/ta/CdbWifqNyL9OecvE3FKsZu/Kr0taw9W6nm6FBhQLgFkOnFrqp9dWnxfHruXuDBgcn0iE8nR7Ht2zS7hfQPeR4a3Y0xK3Plqbzdrb9HKnWQQhf14= + password: AgAU6g/CwKj+1gPpt4DLvLsS0YCvJdVHWw4W4bRhibE9brVvcJtGB3D9MTJrSLVVwusaE6OR59og7oW5ge3yTd/9bbclXYLrxEi7OwvkQjCvo8MfD8yhJO9nV4Xs9Mjk2Z4SHGYuq6wvcssuJrpz5f0XEC7ocTRA+u0UaE+/b4FrYF71uyKGvj8GSXgLZUjGPFsGfPzwJn7cLBmlclVHx1xGbFpUc042m5Mulpn0QolFQnOwZiW4PL8pQyz1MXVRwCsz0RJd5apZL3XJ4X7BLMoAp+diHQ2xi3zoU9VScp+J2QgvFdRKgDa6v7Jz1f+HCwq5W/DoegwFXBrcMIfF2YrnvTnc1PCVwD9IHOeylO7J2hfi8teQiqTvvRlVgdBTLqoqlVovemf5k6ke6JfjTwnsJjTNnL7MKN5Qt0o7N2XRZ3ba9jp8cKbI7fyFQKaU2QEf2PIkp82kEnixmpA1aATgeA3W4E5Km7sKHUEB81+pwnOe54tzD2ShgQX/+UiswhWYTT+gdZKL1udBBemUDC0z9PSJNTPTy+hq+G4CIzVQUYxlioM3c+3geF7YLU8yXisj84pk44GN9KX3z5x+M2+LZL7agAWPUjxtrP2V+id7dNJQfCm0aSMeo57dVfb4zlBUAAgKIKjX+j1KqCVqE9zEO2F/QX7mY6MJTP2me3wmY7JAVRJ7d6bbkyyoDhs8JErLYLp0A+Eh+qx8nWgM9ErPVSA0 + user: AgB8ZLG2EuERjg1nKdH/xadbUuIR2c8a9gF5fE8ctrp4DNDLLuuqmjyoHRiWpkrtfnE1yKg1rPP+asV9Lj5iVmE9J+OB3QUOeFS4MHciBNj7pa68zfFgnHP4kxMX6aXyKRQrYruYjHwfzCpOM1zyTEphuGlnokjQXxjF/mZsoM2NWn7WGReqfxqH95tJXfs9AUC5vVv/PHqd+KKRZH7+G1AnWVJ7RFQHedR7wyftO4/rkm8deMuZWtOLl25fAOyOr7+hSqT69s9/uTKSLJXjobSqtulqsR+v5lkwx2ThNKzmcEcuoenKG6lk8XLRSIscccZH3JTPh6IknQWUOC4nmYj+XUxE8Go0RX/4eL+D/6FrYrtp0gr3HOCLAGU4vAHMeKfJoyqykJVnvY6QY6bFgaziyOlWaoEHpg6g0vHHDwyX7HIDcQfJZGOLH9dhrWJ2sOkzyuuxfqWEgz/M2eBW4EUAudHwfTLPocSMUI+D6fjeciMojet5uxWMP7ZHh/E061f5+Vfk6CKYd9Kpi69Xah8KEyyHYP5NImkdIwjgllaEAd/FBE2+QJyTVZlUQC7y9ObagDMCUFaFbTS5QOLh5BOJDL5buEYFWG0IhoH47SC/pKeEOQH//uvoo27K9zvxTOQN1YOTrxCozmexMOsTIdhvU0dOnJDBrThSHKYLCeIokDOgUUT52FqDH51RoLoK3UkyGbMoq+M= template: metadata: creationTimestamp: null name: grafana-admin-secret - namespace: monitoring + namespace: grafana type: Opaque diff --git a/apps/grafana/grafana-auth.sealedsecret.yaml b/apps/grafana/grafana-auth.sealedsecret.yaml index 400a548..576e201 100644 --- a/apps/grafana/grafana-auth.sealedsecret.yaml +++ b/apps/grafana/grafana-auth.sealedsecret.yaml @@ -4,13 +4,13 @@ kind: SealedSecret metadata: creationTimestamp: null name: grafana-auth - namespace: monitoring + namespace: grafana spec: encryptedData: - client_secret: AgCcKsnS3u2eI+fNVC9hAZ3QRFOHFErAzs5aQgX51CSdJwM03SZUoTyrDi5JPcHUVyS3MbevFH5piMhDTARMI3bLOjYlcwMbpf77JCPa7o95Y9asA/FW3lXicYt3biN9xBXJBz7Ws3fVRtEzyf6DmbGedT9gaX8aPwrUVbP19RdyJiuu76oB1A/jdUkX4K+X6kVvmoP/BWdypk/kdQJrzBNt00DIXF4NHfYey36AuhpBtqYZs4faA/tBXMXLE4RxPNtcHwNfVjnRj3v3qzNufD1fnweJvLq2UfLMrQjoR9XDVnM0zkpautylkI7yrvcoEH7ljnf6b1FMogOEZUfH1BIdqTd/WwrrlCqE58OPfJWthIfN+pQ8LvdHsGo3jc9gXvfXS2cStyhP06eTZ4D79kG+RtDQGOsD/Wpx7EcM6hbB3+dIjcs3wEAIGjpIVtY9JayW8YeRnFApMuhDST1+hscm+LdoGvaSTlAuGzv9BbVrPX/Fo9XKeYHlbG/x71Er+vF8WbW0wUa46MHLvbEy376XIdJDYi+vjl4eqznZ6YhvPbawhoKXT8ZcKUcUAjVcMue/O/jCSPZplbn3vdSCeqPTiqVqDw9PTMIeWFUepgPMxiGpFRAqdwIecFBnYItq0dXoGlFrZpo0S6AECgZjxzUR5EgdkdPlDDs2CN+d9yP7f2S+gmL7AIlQr74NW1GrTGw2x/rD4IJhunh7 + client_secret: AgCEdC1/ERlPQyQP+bd9gcW33Yrvl4uRbx+RF5AY4vYAquOzxmLTygMl/WZlB5wlCE5idIHgto6/fUWVZrQbmfClRqsW2pFoddKQAtS9cQNXwMjLCm7e0lXk9GM9O3ZwktmklFbCu8XewHmefGHhoJ28vPxPMaINv1fM4zYKvNz5RHf0dJfTHgxb68wRYjAbE/eJpRcVE3a29Yw6Gfa8Mb+cFI7RTHvjuv9LBgWqM6b3qvvJ4wYR2WKuiQrnJ5xAtHpMAI/2R80qq151wlaZueDZ1PwjRBHURkmPTmwZnrMrmIugNge7Tpww+ArZlG9kDfSu1aTJidbXbcpN6fyt1qARTCYrBlbn60PTYLnPL/NObvMCpjS6DsYsYz7MJ7WoOupu46Ib5paZHmak+CilC6lb9LjJj4EKfRsagZmWT07JavhHBW/tqjB3GToccIz4fOAOdA9aU51J4wCL2ctp2SgzCEKe2EaBK/f9nDd9ASmmon9PDwRDVtG8yTukrNcZHNzodi09Af81DB0RNa36Z3Sjt5xu94paN+mjiOWGf2JduVEq+60NbPvDbPE9e1aVH3DdQcij2WGZaTE8dAGLSsLoOkIq3m2E+Mbk1Re1gI9H18xJM72ivb5uDe7pzReyvO5DY4Pfq8JgQhPxWcDq9ScmWS6Bb+jdCKytFq5NafSAl+akPbbwN+1GFu33if/P5D9I2TwOA8V1wyVU template: metadata: creationTimestamp: null name: grafana-auth - namespace: monitoring + namespace: grafana type: Opaque diff --git a/apps/grafana/grafana.values.yaml b/apps/grafana/grafana.values.yaml index 7148172..78144d4 100644 --- a/apps/grafana/grafana.values.yaml +++ b/apps/grafana/grafana.values.yaml @@ -35,13 +35,17 @@ datasources: datasources.yaml: apiVersion: 1 datasources: + - name: Prometheus + type: prometheus + url: http://prometheus-server.monitoring.svc:80 + isDefault: true - name: Thanos type: prometheus url: http://thanos-querier.monitoring.svc:10902 - isDefault: true - - name: Prometheus - type: prometheus - url: http://prometheus.monitoring.svc:9090 + isDefault: false + - name: Loki + type: loki + url: http://loki.monitoring.svc:3100 isDefault: false dashboardProviders: @@ -90,4 +94,5 @@ grafana.ini: api_url: https://auth.kluster.moll.re/api/oidc/authorization/userinfo tls_skip_verify_insecure: true auto_login: true - use_pkce: true \ No newline at end of file + use_pkce: true + role_attribute_path: contains(groups[*], 'apps_admin') && 'Admin' || 'Editor' diff --git a/apps/grafana/kustomization.yaml b/apps/grafana/kustomization.yaml index c1dd517..234a1ea 100644 --- a/apps/grafana/kustomization.yaml +++ b/apps/grafana/kustomization.yaml @@ -1,7 +1,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namespace: monitoring +namespace: grafana resources: - namespace.yaml diff --git a/infrastructure/monitoring/loki.values.yaml b/infrastructure/monitoring/loki.values.yaml index 9a5e487..6e1df67 100644 --- a/infrastructure/monitoring/loki.values.yaml +++ b/infrastructure/monitoring/loki.values.yaml @@ -5,13 +5,14 @@ loki: configs: - from: "2024-04-01" store: tsdb - object_store: s3 + object_store: filesystem schema: v13 index: prefix: loki_index_ period: 24h + auth_enabled: false pattern_ingester: - enabled: true + enabled: true limits_config: allow_structured_metadata: true volume_enabled: true @@ -19,6 +20,12 @@ loki: ruler: enable_api: true storage: + bucketNames: + # don't care since we use the filesystem + chunks: NOTUSED + ruler: NOTUSED + admin: NOTUSED + type: filesystem filesystem: chunks_directory: /var/loki/chunks @@ -46,6 +53,12 @@ singleBinary: # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). storageClass: nfs-client + +# -- Section for configuring optional Helm test +helm: + enabled: false + + # Zero out replica counts of other deployment modes backend: replicas: 0 diff --git a/infrastructure/monitoring/namespace.yaml b/infrastructure/monitoring/namespace.yaml index 0a074bd..1178cee 100644 --- a/infrastructure/monitoring/namespace.yaml +++ b/infrastructure/monitoring/namespace.yaml @@ -2,3 +2,5 @@ apiVersion: v1 kind: Namespace metadata: name: placeholder + labels: + pod-security.kubernetes.io/enforce: privileged diff --git a/infrastructure/monitoring/prometheus.values.yaml b/infrastructure/monitoring/prometheus.values.yaml index 1447d69..0d55740 100644 --- a/infrastructure/monitoring/prometheus.values.yaml +++ b/infrastructure/monitoring/prometheus.values.yaml @@ -6,7 +6,7 @@ server: log.level: debug storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md - retention: 4h + retention: 180d service: annotations: prometheus.io/scrape: "true" @@ -16,68 +16,69 @@ server: podAnnotations: prometheus.io/scrape: "true" prometheus.io/port: "10902" - sidecarContainers: - thanos-sidecar: - image: thanos - resources: - requests: - memory: "512Mi" - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /etc/secret/sa - args: - - "sidecar" - - "--log.level=debug" - - "--tsdb.path=/data/" - - "--prometheus.url=http://127.0.0.1:9090" - - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}" - - "--reloader.config-file=/etc/prometheus-config/prometheus.yml" - - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml" - - "--reloader.rule-dir=/etc/prometheus-config/rules" - ports: - - name: sidecar-http - containerPort: 10902 - - name: grpc - containerPort: 10901 - - name: cluster - containerPort: 10900 - volumeMounts: - - name: storage-volume - mountPath: /data - - name: thanos-storage-secret - mountPath: /etc/secret - - name: config-volume - mountPath: /etc/prometheus-config - readOnly: false - - name: prometheus-config-shared - mountPath: /etc/prometheus-shared/ - readOnly: false - configPath: /etc/prometheus-shared/prometheus.yml - replicaCount: 1 - persistentVolume: - size: 20Gi - extraVolumes: # spec.template.spec.volumes - - name: prometheus-config-shared - emptyDir: {} - extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container - - name: prometheus-config-shared - mountPath: /etc/prometheus-shared/ - resources: - requests: - memory: 1Gi - global: - scrape_interval: 5s - scrape_timeout: 4s - external_labels: - prometheus_group: KLUSTER - prometheus_replica: '$(HOSTNAME)' - evaluation_interval: 5s - extraSecretMounts: - - name: thanos-objstore-config - mountPath: /etc/secret/ - subPath: sa - readOnly: false - secretName: thanos-storage-secret + # sidecarContainers: + # thanos-sidecar: + # image: thanos + # resources: + # requests: + # memory: "512Mi" + # env: + # - name: GOOGLE_APPLICATION_CREDENTIALS + # value: /etc/secret/sa + # args: + # - "sidecar" + # - "--log.level=debug" + # - "--tsdb.path=/data/" + # - "--prometheus.url=http://127.0.0.1:9090" + # - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}" + # - "--reloader.config-file=/etc/prometheus-config/prometheus.yml" + # - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml" + # - "--reloader.rule-dir=/etc/prometheus-config/rules" + # ports: + # - name: sidecar-http + # containerPort: 10902 + # - name: grpc + # containerPort: 10901 + # - name: cluster + # containerPort: 10900 + # volumeMounts: + # - name: storage-volume + # mountPath: /data + # - name: thanos-storage-secret + # mountPath: /etc/secret + # - name: config-volume + # mountPath: /etc/prometheus-config + # readOnly: false + # - name: prometheus-config-shared + # mountPath: /etc/prometheus-shared/ + # readOnly: false + # # configPath: /etc/prometheus-shared/prometheus.yml + # replicaCount: 1 + # persistentVolume: + # size: 20Gi + # storageClass: nfs-client + # extraVolumes: # spec.template.spec.volumes + # - name: prometheus-config-shared + # emptyDir: {} + # extraVolumeMounts: # spec.template.spec.containers.volumeMounts for prometheus container + # - name: prometheus-config-shared + # mountPath: /etc/prometheus-shared/ + # resources: + # requests: + # memory: 1Gi + # global: + # scrape_interval: 5s + # scrape_timeout: 4s + # external_labels: + # prometheus_group: KLUSTER + # prometheus_replica: '$(HOSTNAME)' + # evaluation_interval: 5s + # extraSecretMounts: + # - name: thanos-storage-secret + # mountPath: /etc/secret/ + # subPath: sa + # readOnly: false + # secretName: thanos-objstore-config # as thanos sidecar is taking care of the config reload # we can disable the prometheus configmap reload diff --git a/kluster-deployments/grafana/application.yaml b/kluster-deployments/grafana/application.yaml index d6f93b4..cb56de5 100644 --- a/kluster-deployments/grafana/application.yaml +++ b/kluster-deployments/grafana/application.yaml @@ -16,3 +16,5 @@ spec: automated: prune: true selfHeal: true + syncOptions: + - Replace=true \ No newline at end of file diff --git a/kluster-deployments/monitoring/application.yaml b/kluster-deployments/monitoring/application.yaml index f6ee898..8e79792 100644 --- a/kluster-deployments/monitoring/application.yaml +++ b/kluster-deployments/monitoring/application.yaml @@ -1,7 +1,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: prometheus-application + name: monitoring-application namespace: argocd spec: @@ -9,7 +9,7 @@ spec: source: repoURL: git@github.com:moll-re/bootstrap-k3s-infra.git targetRevision: main - path: infrastructure/prometheus + path: infrastructure/monitoring destination: server: https://kubernetes.default.svc namespace: monitoring