From 66be8786390420c10eb805af4b824556266204ec Mon Sep 17 00:00:00 2001 From: upodroid Date: Sun, 1 Sep 2024 21:10:44 +0100 Subject: [PATCH 1/7] deploy monitoring stack --- kubernetes/apps/monitoring.yaml | 39 +++++++++++++++++++ kubernetes/gke-utility/helm/cert-manager.yaml | 3 +- kubernetes/gke-utility/helm/monitoring.yaml | 2 + .../gke-utility/monitoring/ingress.yaml | 23 +++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 kubernetes/apps/monitoring.yaml create mode 100644 kubernetes/gke-utility/helm/monitoring.yaml create mode 100644 kubernetes/gke-utility/monitoring/ingress.yaml diff --git a/kubernetes/apps/monitoring.yaml b/kubernetes/apps/monitoring.yaml new file mode 100644 index 00000000000..efa9993c489 --- /dev/null +++ b/kubernetes/apps/monitoring.yaml @@ -0,0 +1,39 @@ +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: monitoring +spec: + goTemplate: true + generators: + - clusters: + selector: + matchLabels: + clusterType: 'utility' + template: + metadata: + name: 'monitoring-{{ .name }}' + spec: + destination: + namespace: monitoring + server: "{{ .server }}" + project: default + sources: + - chart: kube-prometheus-stack + repoURL: https://prometheus-community.github.io/helm-charts + targetRevision: 62.3.1 + helm: + releaseName: monitoring + valueFiles: + - $values/kubernetes/{{ .name }}/helm/monitoring.yaml + - repoURL: https://github.com/borg-land/k8s.io + targetRevision: observability-stack + ref: values + - path: kubernetes/{{ .name }}/monitoring/ + repoURL: https://github.com/borg-land/k8s.io + targetRevision: observability-stack + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/kubernetes/gke-utility/helm/cert-manager.yaml b/kubernetes/gke-utility/helm/cert-manager.yaml index 259909be8f5..340a25d65c4 100644 --- a/kubernetes/gke-utility/helm/cert-manager.yaml +++ b/kubernetes/gke-utility/helm/cert-manager.yaml @@ -1,8 +1,7 @@ crds: enabled: true extraObjects: - - | - apiVersion: cert-manager.io/v1 + - apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: letsencrypt-prod diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml new file mode 100644 index 00000000000..4b0b000fc04 --- /dev/null +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -0,0 +1,2 @@ +grafana: + enabled: true diff --git a/kubernetes/gke-utility/monitoring/ingress.yaml b/kubernetes/gke-utility/monitoring/ingress.yaml new file mode 100644 index 00000000000..ddf587ba798 --- /dev/null +++ b/kubernetes/gke-utility/monitoring/ingress.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + # annotations: + # cert-manager.io/issuer: letsencrypt-prod +spec: + ingressClassName: nginx + # tls: + # - hosts: + # - monitoring.prow.k8s.io + # secretName: monitoring-prow-tls + rules: + - host: monitoring.prow.k8s.io + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: + number: 80 From 13624bbeb5b319b1adce5a522cd4a1c857c715c4 Mon Sep 17 00:00:00 2001 From: upodroid Date: Sun, 1 Sep 2024 21:59:19 +0100 Subject: [PATCH 2/7] add thanos --- kubernetes/gke-utility/helm/monitoring.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml index 4b0b000fc04..8d46ac774d9 100644 --- a/kubernetes/gke-utility/helm/monitoring.yaml +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -1,2 +1,18 @@ grafana: enabled: true +prometheus: + thanosService: + enabled: true + prometheusSpec: + thanos: + image: quay.io/thanos/thanos:v0.36.1 + listenLocal: false + objectStorageConfig: + secret: + type: GCS + config: + bucket: k8s-io-thanos + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false From 184fb514a183921ed2a5217030154c3a1eaff9bf Mon Sep 17 00:00:00 2001 From: upodroid Date: Sun, 1 Sep 2024 22:39:02 +0100 Subject: [PATCH 3/7] debug thanos --- kubernetes/apps/monitoring.yaml | 1 + kubernetes/gke-utility/helm/monitoring.yaml | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/kubernetes/apps/monitoring.yaml b/kubernetes/apps/monitoring.yaml index efa9993c489..3f4f34a6e87 100644 --- a/kubernetes/apps/monitoring.yaml +++ b/kubernetes/apps/monitoring.yaml @@ -37,3 +37,4 @@ spec: selfHeal: true syncOptions: - CreateNamespace=true + - ServerSideApply=true diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml index 8d46ac774d9..e2be1206eda 100644 --- a/kubernetes/gke-utility/helm/monitoring.yaml +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -7,6 +7,7 @@ prometheus: thanos: image: quay.io/thanos/thanos:v0.36.1 listenLocal: false + logLevel: debug objectStorageConfig: secret: type: GCS @@ -16,3 +17,11 @@ prometheus: podMonitorSelectorNilUsesHelmValues: false ruleSelectorNilUsesHelmValues: false probeSelectorNilUsesHelmValues: false + +# These endpoints aren't scrapable on GKE +coreDns: + enabled: false +kubeControllerManager: + enabled: false +kubeScheduler: + enabled: false From dc4cbbbceebf992f364de2b88c5ec03e25e0a774 Mon Sep 17 00:00:00 2001 From: upodroid Date: Sat, 5 Oct 2024 15:17:45 +0100 Subject: [PATCH 4/7] use auth proxy --- kubernetes/apps/monitoring.yaml | 3 +++ kubernetes/gke-utility/helm/monitoring.yaml | 20 ++++++++++++++++ .../gke-utility/monitoring/httproute.yaml | 15 ++++++++++++ .../gke-utility/monitoring/ingress.yaml | 23 ------------------- .../gke-utility/monitoring/kustomization.yaml | 5 ++++ 5 files changed, 43 insertions(+), 23 deletions(-) create mode 100644 kubernetes/gke-utility/monitoring/httproute.yaml delete mode 100644 kubernetes/gke-utility/monitoring/ingress.yaml create mode 100644 kubernetes/gke-utility/monitoring/kustomization.yaml diff --git a/kubernetes/apps/monitoring.yaml b/kubernetes/apps/monitoring.yaml index 3f4f34a6e87..62d5933a27a 100644 --- a/kubernetes/apps/monitoring.yaml +++ b/kubernetes/apps/monitoring.yaml @@ -38,3 +38,6 @@ spec: syncOptions: - CreateNamespace=true - ServerSideApply=true + managedNamespaceMetadata: + labels: + istio-injection: enabled diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml index e2be1206eda..20a61aaca93 100644 --- a/kubernetes/gke-utility/helm/monitoring.yaml +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -1,5 +1,25 @@ grafana: enabled: true + grafana.ini: + analytics: + reporting_enabled: false + check_for_updates: true + server: + root_url: https://monitoring.prow.k8s.io + auth: + oauth_auto_login: true + signout_redirect_url: https://oauth2-proxy.k8s.io/oauth2/sign_out + auth.proxy: + enabled: true + header_name: X-Auth-Request-Email + header_property: email + auto_sign_up: true + headers: Name:X-Auth-Request-User Email:X-Auth-Request-Email Groups:X-Auth-Request-Groups + users: + allow_sign_up: false + auto_assign_org: true + auto_assign_org_role: Editor + prometheus: thanosService: enabled: true diff --git a/kubernetes/gke-utility/monitoring/httproute.yaml b/kubernetes/gke-utility/monitoring/httproute.yaml new file mode 100644 index 00000000000..2ed411553d8 --- /dev/null +++ b/kubernetes/gke-utility/monitoring/httproute.yaml @@ -0,0 +1,15 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: grafana +spec: + hostnames: + - monitoring.prow.k8s.io + parentRefs: + - name: istio-ingressgateway + namespace: istio-system + sectionName: https + rules: + - backendRefs: + - name: monitoring-grafana + port: 80 diff --git a/kubernetes/gke-utility/monitoring/ingress.yaml b/kubernetes/gke-utility/monitoring/ingress.yaml deleted file mode 100644 index ddf587ba798..00000000000 --- a/kubernetes/gke-utility/monitoring/ingress.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: grafana - # annotations: - # cert-manager.io/issuer: letsencrypt-prod -spec: - ingressClassName: nginx - # tls: - # - hosts: - # - monitoring.prow.k8s.io - # secretName: monitoring-prow-tls - rules: - - host: monitoring.prow.k8s.io - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: grafana - port: - number: 80 diff --git a/kubernetes/gke-utility/monitoring/kustomization.yaml b/kubernetes/gke-utility/monitoring/kustomization.yaml new file mode 100644 index 00000000000..449338c0b6c --- /dev/null +++ b/kubernetes/gke-utility/monitoring/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - httproute.yaml From 999fcab31ff7e683cbd04847b4ea72b1570f7d69 Mon Sep 17 00:00:00 2001 From: upodroid Date: Sat, 5 Oct 2024 17:40:55 +0100 Subject: [PATCH 5/7] deploy thanos --- kubernetes/apps/monitoring.yaml | 23 ++++++++++++++++++++ kubernetes/gke-utility/helm/thanos.yaml | 28 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 kubernetes/gke-utility/helm/thanos.yaml diff --git a/kubernetes/apps/monitoring.yaml b/kubernetes/apps/monitoring.yaml index 62d5933a27a..85041a7106f 100644 --- a/kubernetes/apps/monitoring.yaml +++ b/kubernetes/apps/monitoring.yaml @@ -2,6 +2,8 @@ apiVersion: argoproj.io/v1alpha1 kind: ApplicationSet metadata: name: monitoring + annotations: + argocd.argoproj.io/sync-wave: "-3" spec: goTemplate: true generators: @@ -41,3 +43,24 @@ spec: managedNamespaceMetadata: labels: istio-injection: enabled +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: thanos +spec: + destination: + name: gke-utility + namespace: monitoring + project: default + sources: + - chart: thanos + repoURL: registry-1.docker.io/bitnamicharts + targetRevision: 15.7.27 + helm: + releaseName: thanos + valueFiles: + - $values/kubernetes/gke-utility//helm/thanos.yaml + - repoURL: https://github.com/borg-land/k8s.io + targetRevision: observability-stack + ref: values diff --git a/kubernetes/gke-utility/helm/thanos.yaml b/kubernetes/gke-utility/helm/thanos.yaml new file mode 100644 index 00000000000..75d2d0b3c77 --- /dev/null +++ b/kubernetes/gke-utility/helm/thanos.yaml @@ -0,0 +1,28 @@ +objstoreConfig: |- + type: GCS + config: + bucket: k8s-io-thanos +storegateway: + enabled: true +query: + dnsDiscovery: + sidecarsService: monitoring-kube-prometheus-thanos-discovery + sidecarsNamespace: monitoring +bucketweb: + enabled: true +compactor: + enabled: true +ruler: + enabled: true + alertmanagers: + - http://monitoring-kube-prometheus-alertmanager.monitoring.svc.cluster.local:9093 + config: |- + groups: + - name: "metamonitoring" + rules: + - alert: "PrometheusDown" + expr: absent(up{prometheus="monitoring/kube-prometheus"}) +metrics: + enabled: true + serviceMonitor: + enabled: true From f3219511451252ab94eaaefaf77d63789b484754 Mon Sep 17 00:00:00 2001 From: upodroid Date: Sat, 5 Oct 2024 17:58:41 +0100 Subject: [PATCH 6/7] point grafana to thanos --- kubernetes/gke-utility/helm/monitoring.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml index 20a61aaca93..5d2fb18d00e 100644 --- a/kubernetes/gke-utility/helm/monitoring.yaml +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -1,5 +1,9 @@ grafana: enabled: true + sidecar: + datasources: + # point the default Promtheus Datasource to Thanos + url: http://thanos-query-frontend.monitoring:9090/ grafana.ini: analytics: reporting_enabled: false @@ -19,6 +23,7 @@ grafana: allow_sign_up: false auto_assign_org: true auto_assign_org_role: Editor + prometheus: thanosService: From 77e2118779515effbff291acb80ad7a36f66ada0 Mon Sep 17 00:00:00 2001 From: upodroid Date: Sat, 5 Oct 2024 20:13:32 +0100 Subject: [PATCH 7/7] add cluster labels --- kubernetes/apps/monitoring.yaml | 2 ++ kubernetes/gke-utility/helm/monitoring.yaml | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/kubernetes/apps/monitoring.yaml b/kubernetes/apps/monitoring.yaml index 85041a7106f..d3ec539d4f2 100644 --- a/kubernetes/apps/monitoring.yaml +++ b/kubernetes/apps/monitoring.yaml @@ -14,6 +14,8 @@ spec: template: metadata: name: 'monitoring-{{ .name }}' + annotations: + argocd.argoproj.io/compare-options: ServerSideDiff=true,IncludeMutationWebhook=true spec: destination: namespace: monitoring diff --git a/kubernetes/gke-utility/helm/monitoring.yaml b/kubernetes/gke-utility/helm/monitoring.yaml index 5d2fb18d00e..80ec1e76e25 100644 --- a/kubernetes/gke-utility/helm/monitoring.yaml +++ b/kubernetes/gke-utility/helm/monitoring.yaml @@ -24,11 +24,13 @@ grafana: auto_assign_org: true auto_assign_org_role: Editor - prometheus: thanosService: enabled: true prometheusSpec: + externalLabels: + cluster: gke-utility + cloud: gke thanos: image: quay.io/thanos/thanos:v0.36.1 listenLocal: false