From 2dcce7895ce2c40f40606e6ee8ffb40f0f48a315 Mon Sep 17 00:00:00 2001 From: genofire Date: Wed, 29 Nov 2023 09:29:13 +0100 Subject: [PATCH 1/2] fix: improve prometheusrules (to show more labels and fix messages) Signed-off-by: genofire --- charts/authentik/templates/prometheusrule.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/charts/authentik/templates/prometheusrule.yaml b/charts/authentik/templates/prometheusrule.yaml index 678349a..5d9bb1e 100644 --- a/charts/authentik/templates/prometheusrule.yaml +++ b/charts/authentik/templates/prometheusrule.yaml @@ -127,24 +127,24 @@ spec: - alert: NoWorkersConnected labels: severity: critical - expr: max without (pid) (authentik_admin_workers) < 1 + expr: max without (endpoint,instance,container,job,service) (authentik_admin_workers) < 1 for: 10m annotations: {{` summary: No workers connected - message: authentik instance {{ $labels.instance }}'s worker are either not running or not connected. + message: authentik instance {{ $labels.pod }}'s worker are either not running or not connected. `}} - alert: PendingMigrations labels: severity: critical - expr: max without (pid) (django_migrations_unapplied_total) > 0 + expr: max without (endpoint,instance,container,job,service) (django_migrations_unapplied_total) > 0 for: 10m annotations: {{` summary: Pending database migrations - message: authentik instance {{ $labels.instance }} has pending database migrations + message: authentik instance {{ $labels.pod }} has pending database migrations `}} - alert: FailedSystemTasks @@ -161,7 +161,7 @@ spec: - alert: DisconnectedOutposts labels: severity: critical - expr: sum by (outpost) (max without (pid) (authentik_outposts_connected{uid!~"specific.*"})) < 1 + expr: max without (endpoint,instance,container,pod,job,service) (authentik_outposts_connected{uid!~"specific.*"}) < 1 for: 30m annotations: {{` From 0aa7be50559b0e9c3e08166909b29c7ee80c9173 Mon Sep 17 00:00:00 2001 From: Jens Langhammer Date: Thu, 13 Feb 2025 23:01:32 +0100 Subject: [PATCH 2/2] rework to use by() instead of without() (and filter metrics based on namespace and service) --- charts/authentik/ci/ct-values-metrics.yaml | 46 +++++++++++++++++++ .../authentik/templates/prometheusrule.yaml | 9 ++-- 2 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 charts/authentik/ci/ct-values-metrics.yaml diff --git a/charts/authentik/ci/ct-values-metrics.yaml b/charts/authentik/ci/ct-values-metrics.yaml new file mode 100644 index 0000000..9b8c8f9 --- /dev/null +++ b/charts/authentik/ci/ct-values-metrics.yaml @@ -0,0 +1,46 @@ +replicas: 1 + +worker: + replicas: 1 + +image: + repository: ghcr.io/goauthentik/server + tag: 2023.10.4 + pullPolicy: IfNotPresent + +ingress: + enabled: true + hosts: + - host: authentik.domain.tld + paths: + - path: "/" + pathType: Prefix + +authentik: + log_level: debug + secret_key: 5up3r53cr37K3y + postgresql: + password: au7h3n71k + redis: + password: au7h3n71k + +postgresql: + enabled: false + postgresqlPassword: au7h3n71k + persistence: + enabled: false + +redis: + enabled: false + auth: + enabled: true + password: au7h3n71k + +blueprints: + - authentik-ci-blueprint + +prometheus: + serviceMonitor: + create: true + rules: + create: true diff --git a/charts/authentik/templates/prometheusrule.yaml b/charts/authentik/templates/prometheusrule.yaml index 5d9bb1e..dbaf594 100644 --- a/charts/authentik/templates/prometheusrule.yaml +++ b/charts/authentik/templates/prometheusrule.yaml @@ -127,7 +127,7 @@ spec: - alert: NoWorkersConnected labels: severity: critical - expr: max without (endpoint,instance,container,job,service) (authentik_admin_workers) < 1 + expr: max by (pod) (authentik_admin_workers{namespace="{{ $.Release.Namespace }}", service="{{ include "authentik.names.fullname" $ }}-metrics"}) < 1 for: 10m annotations: {{` @@ -135,11 +135,10 @@ spec: message: authentik instance {{ $labels.pod }}'s worker are either not running or not connected. `}} - - alert: PendingMigrations labels: severity: critical - expr: max without (endpoint,instance,container,job,service) (django_migrations_unapplied_total) > 0 + expr: max by (pod) (django_migrations_unapplied_total{namespace="{{ $.Release.Namespace }}", service="{{ include "authentik.names.fullname" $ }}-metrics"}) > 0 for: 10m annotations: {{` @@ -150,7 +149,7 @@ spec: - alert: FailedSystemTasks labels: severity: critical - expr: sum(increase(authentik_system_tasks{status="error"}[2h])) by (task_name, task_uid) > 0 + expr: sum(increase(authentik_system_tasks{status="error", namespace="{{ $.Release.Namespace }}", service="{{ include "authentik.names.fullname" $ }}-metrics"}[2h])) by (task_name, task_uid) > 0 for: 2h annotations: {{` @@ -161,7 +160,7 @@ spec: - alert: DisconnectedOutposts labels: severity: critical - expr: max without (endpoint,instance,container,pod,job,service) (authentik_outposts_connected{uid!~"specific.*"}) < 1 + expr: max by (outpost) (authentik_outposts_connected{namespace="{{ $.Release.Namespace }}", service="{{ include "authentik.names.fullname" $ }}-metrics", uid!~"specific.*"}) < 1 for: 30m annotations: {{`