From c7deff82e128a7e6a5daca2a57810c21dad7999f Mon Sep 17 00:00:00 2001 From: mereta Date: Mon, 2 Dec 2024 16:42:57 +0000 Subject: [PATCH 01/12] Adding Example Health Checks --- cmd/hubble/daemon_linux.go | 12 ++++ cmd/legacy/daemon.go | 39 ++++++++++--- .../retina/templates/agent/daemonset.yaml | 20 +++++-- .../controller/helm/retina/values.yaml | 27 +++++++++ .../helm/retina/templates/daemonset.yaml | 8 +-- .../controller/helm/retina/values.yaml | 1 + pkg/enricher/enricher.go | 55 +++++++++++++++++++ pkg/enricher/types.go | 1 + pkg/module/metrics/metrics_module.go | 4 ++ 9 files changed, 151 insertions(+), 16 deletions(-) diff --git a/cmd/hubble/daemon_linux.go b/cmd/hubble/daemon_linux.go index 2c4c82a617..1e0a714c0f 100644 --- a/cmd/hubble/daemon_linux.go +++ b/cmd/hubble/daemon_linux.go @@ -7,6 +7,7 @@ package hubble import ( "context" "fmt" + "os" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -32,6 +33,7 @@ import ( "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/healthz" logf "sigs.k8s.io/controller-runtime/pkg/log" zapf "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -66,6 +68,16 @@ var ( return nil, nil, fmt.Errorf("creating new controller-runtime manager: %w", err) } + if err := ctrlManager.AddHealthzCheck("healthz", healthz.Ping); err != nil { + logger.Error("unable to set up healthz check", err) + os.Exit(1) + } + + if err := ctrlManager.AddReadyzCheck("readyz", healthz.Ping); err != nil { + logger.Error("unable to set up readyz check", err) + os.Exit(1) + } + return ctrlManager, ctrlManager.GetClient(), nil }), diff --git a/cmd/legacy/daemon.go b/cmd/legacy/daemon.go index 6e06ee73cc..87815b13b6 100644 --- a/cmd/legacy/daemon.go +++ b/cmd/legacy/daemon.go @@ -4,6 +4,7 @@ package legacy import ( "fmt" + "net/http" "os" "strings" "time" @@ -55,6 +56,10 @@ const ( nodeIPEnvKey = "NODE_IP" ) +var ( + healthzChecker healthz.Checker +) + var scheme = k8sruntime.NewScheme() func init() { @@ -215,14 +220,6 @@ func (d *Daemon) Start() error { } //+kubebuilder:scaffold:builder - - if healthCheckErr := mgr.AddHealthzCheck("healthz", healthz.Ping); healthCheckErr != nil { - mainLogger.Fatal("Unable to set up health check", zap.Error(healthCheckErr)) - } - if addReadyCheckErr := mgr.AddReadyzCheck("readyz", healthz.Ping); addReadyCheckErr != nil { - mainLogger.Fatal("Unable to set up ready check", zap.Error(addReadyCheckErr)) - } - // k8s Client used for informers cl := kubernetes.NewForConfigOrDie(mgr.GetConfig()) @@ -294,6 +291,21 @@ func (d *Daemon) Start() error { mainLogger.Fatal("unable to create metricsConfigController", zap.Error(err)) } } + + // Define a custom health check for advanced metrics + healthzChecker = healthz.CheckHandler{ + Checker: healthz.Checker(func(req *http.Request) error { + _, err := metricsModule.Status() + if err != nil { + mainLogger.Error("failed to get metrics module status fr advanced metrics", zap.Error(err)) + return err + } + return nil + }), + }.Checker + } else { + // Advanced Metric not enabled, Ping healthcheck + healthzChecker = healthz.Ping } controllerMgr, err := cm.NewControllerManager(daemonConfig, cl, tel) @@ -315,6 +327,17 @@ func (d *Daemon) Start() error { go controllerMgr.Start(ctx) mainLogger.Info("Started controller manager") + //Set health checks according to retina confiuration + if err := mgr.AddHealthzCheck("healthz", healthzChecker); err != nil { + mainLogger.Error("unable to set up custom health check", zap.Error(err)) + os.Exit(1) + } + + if err := mgr.AddReadyzCheck("readyz", healthzChecker); err != nil { + mainLogger.Error("unable to set up custom ready check", zap.Error(err)) + os.Exit(1) + } + // Start all registered controllers. This will block until container receives SIGTERM. if err := mgr.Start(ctx); err != nil { mainLogger.Fatal("unable to start manager", zap.Error(err)) diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml index af17a8cce0..3ef448dfb5 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml @@ -77,10 +77,22 @@ spec: cpu: {{ .Values.resources.limits.cpu | quote }} readinessProbe: httpGet: - path: /metrics - port: {{ .Values.agent.container.retina.ports.containerPort }} - initialDelaySeconds: 10 - periodSeconds: 30 + path: /readyz + port: {{ .Values.agent.container.retina.ports.healthPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold | default "3" }} + successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.agent.container.retina.ports.healthPort }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }} + successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }} env: - name: POD_NAME valueFrom: diff --git a/deploy/hubble/manifests/controller/helm/retina/values.yaml b/deploy/hubble/manifests/controller/helm/retina/values.yaml index e287335d3e..fd00be9a8c 100644 --- a/deploy/hubble/manifests/controller/helm/retina/values.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/values.yaml @@ -55,6 +55,7 @@ agent: metricsBindAddress: ":18080" ports: containerPort: 10093 + healthPort: 18081 enablePodLevel: true remoteContext: false @@ -933,3 +934,29 @@ tls: # -----BEGIN CERTIFICATE----- # ... # -----END CERTIFICATE----- +## @param readinessProbe.initialDelaySeconds [array] Initial delay seconds for readinessProbe +## @param readinessProbe.periodSeconds [array] Period seconds for readinessProbe +## @param readinessProbe.timeoutSeconds [array] Timeout seconds for readinessProbe +## @param readinessProbe.failureThreshold [array] Failure threshold for readinessProbe +## @param readinessProbe.successThreshold [array] Success threshold for readinessProbe +## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +## +readinessProbe: {} + # initialDelaySeconds: 30 + # periodSeconds: 10 + # timeoutSeconds: 15 + # failureThreshold: 5 + # successThreshold: 1 +## @param livenessProbe.initialDelaySeconds [array] Initial delay seconds for livenessProbe +## @param livenessProbe.periodSeconds [array] Period seconds for livenessProbe +## @param livenessProbe.timeoutSeconds [array] Timeout seconds for livenessProbe +## @param livenessProbe.failureThreshold [array] Failure threshold for livenessProbe +## @param livenessProbe.successThreshold [array] Success threshold for livenessProbe +## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +## +livenessProbe: {} + # initialDelaySeconds: 30 + # periodSeconds: 10 + # timeoutSeconds: 15 + # failureThreshold: 5 + # successThreshold: 1 diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml index f4e44332e0..e6a699a239 100644 --- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml @@ -49,8 +49,8 @@ spec: - name: {{ include "retina.name" . }} readinessProbe: httpGet: - path: /metrics - port: {{ .Values.retinaPort }} + path: /readyz + port: {{ .Values.daemonset.container.retina.ports.healthPort }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} @@ -58,8 +58,8 @@ spec: successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} livenessProbe: httpGet: - path: /metrics - port: {{ .Values.retinaPort }} + path: /healthz + port: {{ .Values.daemonset.container.retina.ports.healthPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} diff --git a/deploy/legacy/manifests/controller/helm/retina/values.yaml b/deploy/legacy/manifests/controller/helm/retina/values.yaml index f95da03629..b8fa3ec02a 100644 --- a/deploy/legacy/manifests/controller/helm/retina/values.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/values.yaml @@ -86,6 +86,7 @@ daemonset: metricsBindAddress: ":18080" ports: containerPort: 10093 + healthPort: 18081 # volume mounts with name and mountPath volumeMounts: diff --git a/pkg/enricher/enricher.go b/pkg/enricher/enricher.go index 98013cd2ca..10dec2a8c5 100644 --- a/pkg/enricher/enricher.go +++ b/pkg/enricher/enricher.go @@ -5,9 +5,13 @@ package enricher import ( "context" + "errors" + "io" "reflect" "sync" + "time" + "github.com/cilium/cilium/api/v1/flow" v1 "github.com/cilium/cilium/pkg/hubble/api/v1" "github.com/cilium/cilium/pkg/hubble/container" @@ -189,3 +193,54 @@ func (e *Enricher) Write(ev *v1.Event) { func (e *Enricher) ExportReader() *container.RingReader { return container.NewRingReader(e.outputRing, e.outputRing.OldestWrite()) } + +func (e *Enricher) Status() (float64, error) { + rate, err := e.getFlowRate(e.outputRing, time.Now()) + if err != nil { + e.l.Error("failed to get flow rate %w", zap.Error(err)) + return 0, err + } + return rate, nil +} + +// ref: "getFlowRate" "github.com/cilium/cilium/pkg/hubble/observer/local_observer.go" +func (en *Enricher) getFlowRate(ring *container.Ring, at time.Time) (float64, error) { + reader := container.NewRingReader(ring, ring.LastWriteParallel()) + count := 0 + since := at.Add(-1 * time.Minute) + var lastSeenEvent *v1.Event + for { + e, err := reader.Previous() + lost := e.GetLostEvent() + if lost != nil { + // a lost event means we read the complete ring buffer + // if we read at least one flow, update `since` to calculate the rate over the available time range + if lastSeenEvent != nil { + since = lastSeenEvent.Timestamp.AsTime() + } + break + } else if errors.Is(err, io.EOF) { + // an EOF error means the ring buffer is empty, ignore error and continue + break + } else if err != nil { + // unexpected error + return 0, err + } + if _, isFlowEvent := e.Event.(*flow.Flow); !isFlowEvent { + // ignore non flow events + continue + } + if err := e.Timestamp.CheckValid(); err != nil { + return 0, err + } + ts := e.Timestamp.AsTime() + if ts.Before(since) { + // scanned the last minute, exit loop + break + } + lastSeenEvent = e + count++ + } + fl := float64(count) / at.Sub(since).Seconds() + return fl, nil +} diff --git a/pkg/enricher/types.go b/pkg/enricher/types.go index ed76c7e5dd..b6f74e8238 100644 --- a/pkg/enricher/types.go +++ b/pkg/enricher/types.go @@ -13,4 +13,5 @@ type EnricherInterface interface { Run() Write(ev *v1.Event) ExportReader() *container.RingReader + Status() (float64, error) } diff --git a/pkg/module/metrics/metrics_module.go b/pkg/module/metrics/metrics_module.go index 0d38c06522..bc835b191b 100644 --- a/pkg/module/metrics/metrics_module.go +++ b/pkg/module/metrics/metrics_module.go @@ -139,6 +139,10 @@ func InitModule(ctx context.Context, return m } +func (m *Module) Status() (float64, error) { + return m.enricher.Status() +} + func (m *Module) Reconcile(spec *api.MetricsSpec) error { // If the new spec has not changed, then do nothing. if m.currentSpec != nil && m.currentSpec.Equals(spec) { From a1501ceb4c227bf2df1a7f338bc3baca3cb264cf Mon Sep 17 00:00:00 2001 From: mereta Date: Mon, 2 Dec 2024 16:56:54 +0000 Subject: [PATCH 02/12] Basic health checks --- pkg/enricher/enricher.go | 55 ---------------------------------------- pkg/enricher/types.go | 1 - 2 files changed, 56 deletions(-) diff --git a/pkg/enricher/enricher.go b/pkg/enricher/enricher.go index 10dec2a8c5..98013cd2ca 100644 --- a/pkg/enricher/enricher.go +++ b/pkg/enricher/enricher.go @@ -5,13 +5,9 @@ package enricher import ( "context" - "errors" - "io" "reflect" "sync" - "time" - "github.com/cilium/cilium/api/v1/flow" v1 "github.com/cilium/cilium/pkg/hubble/api/v1" "github.com/cilium/cilium/pkg/hubble/container" @@ -193,54 +189,3 @@ func (e *Enricher) Write(ev *v1.Event) { func (e *Enricher) ExportReader() *container.RingReader { return container.NewRingReader(e.outputRing, e.outputRing.OldestWrite()) } - -func (e *Enricher) Status() (float64, error) { - rate, err := e.getFlowRate(e.outputRing, time.Now()) - if err != nil { - e.l.Error("failed to get flow rate %w", zap.Error(err)) - return 0, err - } - return rate, nil -} - -// ref: "getFlowRate" "github.com/cilium/cilium/pkg/hubble/observer/local_observer.go" -func (en *Enricher) getFlowRate(ring *container.Ring, at time.Time) (float64, error) { - reader := container.NewRingReader(ring, ring.LastWriteParallel()) - count := 0 - since := at.Add(-1 * time.Minute) - var lastSeenEvent *v1.Event - for { - e, err := reader.Previous() - lost := e.GetLostEvent() - if lost != nil { - // a lost event means we read the complete ring buffer - // if we read at least one flow, update `since` to calculate the rate over the available time range - if lastSeenEvent != nil { - since = lastSeenEvent.Timestamp.AsTime() - } - break - } else if errors.Is(err, io.EOF) { - // an EOF error means the ring buffer is empty, ignore error and continue - break - } else if err != nil { - // unexpected error - return 0, err - } - if _, isFlowEvent := e.Event.(*flow.Flow); !isFlowEvent { - // ignore non flow events - continue - } - if err := e.Timestamp.CheckValid(); err != nil { - return 0, err - } - ts := e.Timestamp.AsTime() - if ts.Before(since) { - // scanned the last minute, exit loop - break - } - lastSeenEvent = e - count++ - } - fl := float64(count) / at.Sub(since).Seconds() - return fl, nil -} diff --git a/pkg/enricher/types.go b/pkg/enricher/types.go index b6f74e8238..ed76c7e5dd 100644 --- a/pkg/enricher/types.go +++ b/pkg/enricher/types.go @@ -13,5 +13,4 @@ type EnricherInterface interface { Run() Write(ev *v1.Event) ExportReader() *container.RingReader - Status() (float64, error) } From 9c6f39b81e8fb225f52579e1475248def330fc9c Mon Sep 17 00:00:00 2001 From: mereta Date: Mon, 2 Dec 2024 16:58:01 +0000 Subject: [PATCH 03/12] Basic health checks --- cmd/legacy/daemon.go | 30 +++++----------------------- pkg/module/metrics/metrics_module.go | 4 ---- 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/cmd/legacy/daemon.go b/cmd/legacy/daemon.go index 87815b13b6..8a0f822916 100644 --- a/cmd/legacy/daemon.go +++ b/cmd/legacy/daemon.go @@ -4,7 +4,6 @@ package legacy import ( "fmt" - "net/http" "os" "strings" "time" @@ -56,10 +55,6 @@ const ( nodeIPEnvKey = "NODE_IP" ) -var ( - healthzChecker healthz.Checker -) - var scheme = k8sruntime.NewScheme() func init() { @@ -291,21 +286,6 @@ func (d *Daemon) Start() error { mainLogger.Fatal("unable to create metricsConfigController", zap.Error(err)) } } - - // Define a custom health check for advanced metrics - healthzChecker = healthz.CheckHandler{ - Checker: healthz.Checker(func(req *http.Request) error { - _, err := metricsModule.Status() - if err != nil { - mainLogger.Error("failed to get metrics module status fr advanced metrics", zap.Error(err)) - return err - } - return nil - }), - }.Checker - } else { - // Advanced Metric not enabled, Ping healthcheck - healthzChecker = healthz.Ping } controllerMgr, err := cm.NewControllerManager(daemonConfig, cl, tel) @@ -327,14 +307,14 @@ func (d *Daemon) Start() error { go controllerMgr.Start(ctx) mainLogger.Info("Started controller manager") - //Set health checks according to retina confiuration - if err := mgr.AddHealthzCheck("healthz", healthzChecker); err != nil { - mainLogger.Error("unable to set up custom health check", zap.Error(err)) + //Set health checks + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + mainLogger.Error("unable to set up health check", zap.Error(err)) os.Exit(1) } - if err := mgr.AddReadyzCheck("readyz", healthzChecker); err != nil { - mainLogger.Error("unable to set up custom ready check", zap.Error(err)) + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + mainLogger.Error("unable to set up ready check", zap.Error(err)) os.Exit(1) } diff --git a/pkg/module/metrics/metrics_module.go b/pkg/module/metrics/metrics_module.go index bc835b191b..0d38c06522 100644 --- a/pkg/module/metrics/metrics_module.go +++ b/pkg/module/metrics/metrics_module.go @@ -139,10 +139,6 @@ func InitModule(ctx context.Context, return m } -func (m *Module) Status() (float64, error) { - return m.enricher.Status() -} - func (m *Module) Reconcile(spec *api.MetricsSpec) error { // If the new spec has not changed, then do nothing. if m.currentSpec != nil && m.currentSpec.Equals(spec) { From df90b99316baf690bd09504517875b8a31bd2328 Mon Sep 17 00:00:00 2001 From: mereta Date: Mon, 2 Dec 2024 17:00:50 +0000 Subject: [PATCH 04/12] Windows images --- .../controller/helm/retina/templates/agent/daemonset.yaml | 4 ++-- .../manifests/controller/helm/retina/templates/daemonset.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml index 3ef448dfb5..794daba79d 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml @@ -208,8 +208,8 @@ spec: - .\setkubeconfigpath.ps1; ./controller.exe --config ./retina/config.yaml --kubeconfig ./kubeconfig readinessProbe: httpGet: - path: /metrics - port: {{ .Values.agent.container.retina.ports.containerPort }} + path: /healthz + port: {{ .Values.agent.container.retina.ports.healthPort }} initialDelaySeconds: 15 periodSeconds: 10 env: diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml index e6a699a239..712a46d2c4 100644 --- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml @@ -222,8 +222,8 @@ spec: fieldPath: status.hostIP livenessProbe: httpGet: - path: /metrics - port: {{ .Values.retinaPort }} + path: /healthz + port: {{ .Values.daemonset.container.retina.ports.healthPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} From a200860d63a1bb65c815f0707e9b3e8ba65ecdfc Mon Sep 17 00:00:00 2001 From: Mereta Date: Wed, 4 Dec 2024 12:19:03 +0000 Subject: [PATCH 05/12] Update cmd/legacy/daemon.go Co-authored-by: Timothy J. Raymond Signed-off-by: Mereta --- cmd/legacy/daemon.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/legacy/daemon.go b/cmd/legacy/daemon.go index 8a0f822916..4b1a6f8426 100644 --- a/cmd/legacy/daemon.go +++ b/cmd/legacy/daemon.go @@ -307,7 +307,7 @@ func (d *Daemon) Start() error { go controllerMgr.Start(ctx) mainLogger.Info("Started controller manager") - //Set health checks + // Set health checks if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { mainLogger.Error("unable to set up health check", zap.Error(err)) os.Exit(1) From e4df97416aed4191d36aa6e194666eb511bf01b9 Mon Sep 17 00:00:00 2001 From: mereta Date: Wed, 4 Dec 2024 13:52:52 +0000 Subject: [PATCH 06/12] Applying PR suggestions --- cmd/hubble/daemon_linux.go | 13 +++++-------- cmd/legacy/daemon.go | 12 +++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/cmd/hubble/daemon_linux.go b/cmd/hubble/daemon_linux.go index 1e0a714c0f..4b5509daa5 100644 --- a/cmd/hubble/daemon_linux.go +++ b/cmd/hubble/daemon_linux.go @@ -7,7 +7,7 @@ package hubble import ( "context" "fmt" - "os" + "net/http" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -33,7 +33,6 @@ import ( "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/healthz" logf "sigs.k8s.io/controller-runtime/pkg/log" zapf "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -68,14 +67,12 @@ var ( return nil, nil, fmt.Errorf("creating new controller-runtime manager: %w", err) } - if err := ctrlManager.AddHealthzCheck("healthz", healthz.Ping); err != nil { - logger.Error("unable to set up healthz check", err) - os.Exit(1) + if err := ctrlManager.AddHealthzCheck("healthz", func(_ *http.Request) error { return nil }); err != nil { + logger.Error("unable to set up agent healthz check", err) } - if err := ctrlManager.AddReadyzCheck("readyz", healthz.Ping); err != nil { - logger.Error("unable to set up readyz check", err) - os.Exit(1) + if err := ctrlManager.AddReadyzCheck("readyz", func(_ *http.Request) error { return nil }); err != nil { + logger.Error("unable to set up agent readyz check", err) } return ctrlManager, ctrlManager.GetClient(), nil diff --git a/cmd/legacy/daemon.go b/cmd/legacy/daemon.go index 4b1a6f8426..d044a90478 100644 --- a/cmd/legacy/daemon.go +++ b/cmd/legacy/daemon.go @@ -4,6 +4,7 @@ package legacy import ( "fmt" + "net/http" "os" "strings" "time" @@ -21,7 +22,6 @@ import ( crcache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" kcfg "sigs.k8s.io/controller-runtime/pkg/client/config" - "sigs.k8s.io/controller-runtime/pkg/healthz" crmgr "sigs.k8s.io/controller-runtime/pkg/manager" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -308,14 +308,12 @@ func (d *Daemon) Start() error { mainLogger.Info("Started controller manager") // Set health checks - if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { - mainLogger.Error("unable to set up health check", zap.Error(err)) - os.Exit(1) + if err := mgr.AddHealthzCheck("healthz", func(_ *http.Request) error { return nil }); err != nil { + mainLogger.Error("unable to set up agent health check", zap.Error(err)) } - if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { - mainLogger.Error("unable to set up ready check", zap.Error(err)) - os.Exit(1) + if err := mgr.AddReadyzCheck("readyz", func(_ *http.Request) error { return nil }); err != nil { + mainLogger.Error("unable to set up agent ready check", zap.Error(err)) } // Start all registered controllers. This will block until container receives SIGTERM. From cf30c6cc174874f27f4f631641195fc2e7a15e69 Mon Sep 17 00:00:00 2001 From: mereta Date: Wed, 4 Dec 2024 14:00:09 +0000 Subject: [PATCH 07/12] Windows readiness probe --- .../controller/helm/retina/templates/daemonset.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml index 712a46d2c4..ce96d05020 100644 --- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml @@ -220,6 +220,15 @@ spec: fieldRef: apiVersion: v1 fieldPath: status.hostIP + readinessProbe: + httpGet: + path: /readyz + port: {{ .Values.daemonset.container.retina.ports.healthPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold | default "3" }} + successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} livenessProbe: httpGet: path: /healthz From 9004b865125235e46cfcefb3017293268a165ef3 Mon Sep 17 00:00:00 2001 From: mereta Date: Thu, 23 Jan 2025 11:21:03 +0000 Subject: [PATCH 08/12] Extend retina port to serve /health --- cmd/hubble/daemon_linux.go | 9 --------- cmd/legacy/daemon.go | 19 +++++++++---------- .../retina/templates/agent/daemonset.yaml | 12 ++++++------ .../controller/helm/retina/values.yaml | 3 +-- .../crds/retina.sh_tracesconfigurations.yaml | 2 +- .../helm/retina/templates/daemonset.yaml | 16 ++++++++-------- .../controller/helm/retina/values.yaml | 1 - pkg/server/server.go | 15 +++++++++++++++ 8 files changed, 40 insertions(+), 37 deletions(-) diff --git a/cmd/hubble/daemon_linux.go b/cmd/hubble/daemon_linux.go index 4b5509daa5..2c4c82a617 100644 --- a/cmd/hubble/daemon_linux.go +++ b/cmd/hubble/daemon_linux.go @@ -7,7 +7,6 @@ package hubble import ( "context" "fmt" - "net/http" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -67,14 +66,6 @@ var ( return nil, nil, fmt.Errorf("creating new controller-runtime manager: %w", err) } - if err := ctrlManager.AddHealthzCheck("healthz", func(_ *http.Request) error { return nil }); err != nil { - logger.Error("unable to set up agent healthz check", err) - } - - if err := ctrlManager.AddReadyzCheck("readyz", func(_ *http.Request) error { return nil }); err != nil { - logger.Error("unable to set up agent readyz check", err) - } - return ctrlManager, ctrlManager.GetClient(), nil }), diff --git a/cmd/legacy/daemon.go b/cmd/legacy/daemon.go index d044a90478..6e06ee73cc 100644 --- a/cmd/legacy/daemon.go +++ b/cmd/legacy/daemon.go @@ -4,7 +4,6 @@ package legacy import ( "fmt" - "net/http" "os" "strings" "time" @@ -22,6 +21,7 @@ import ( crcache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" kcfg "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/controller-runtime/pkg/healthz" crmgr "sigs.k8s.io/controller-runtime/pkg/manager" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -215,6 +215,14 @@ func (d *Daemon) Start() error { } //+kubebuilder:scaffold:builder + + if healthCheckErr := mgr.AddHealthzCheck("healthz", healthz.Ping); healthCheckErr != nil { + mainLogger.Fatal("Unable to set up health check", zap.Error(healthCheckErr)) + } + if addReadyCheckErr := mgr.AddReadyzCheck("readyz", healthz.Ping); addReadyCheckErr != nil { + mainLogger.Fatal("Unable to set up ready check", zap.Error(addReadyCheckErr)) + } + // k8s Client used for informers cl := kubernetes.NewForConfigOrDie(mgr.GetConfig()) @@ -307,15 +315,6 @@ func (d *Daemon) Start() error { go controllerMgr.Start(ctx) mainLogger.Info("Started controller manager") - // Set health checks - if err := mgr.AddHealthzCheck("healthz", func(_ *http.Request) error { return nil }); err != nil { - mainLogger.Error("unable to set up agent health check", zap.Error(err)) - } - - if err := mgr.AddReadyzCheck("readyz", func(_ *http.Request) error { return nil }); err != nil { - mainLogger.Error("unable to set up agent ready check", zap.Error(err)) - } - // Start all registered controllers. This will block until container receives SIGTERM. if err := mgr.Start(ctx); err != nil { mainLogger.Fatal("unable to start manager", zap.Error(err)) diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml index 27a708bb0e..e1e10275dc 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml @@ -80,8 +80,8 @@ spec: cpu: {{ .Values.resources.limits.cpu | quote }} readinessProbe: httpGet: - path: /readyz - port: {{ .Values.agent.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.agent.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} @@ -89,8 +89,8 @@ spec: successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} livenessProbe: httpGet: - path: /healthz - port: {{ .Values.agent.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.agent.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} @@ -215,8 +215,8 @@ spec: {{- end }} readinessProbe: httpGet: - path: /healthz - port: {{ .Values.agent.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.agent.container.retina.ports.containerPort }} initialDelaySeconds: 15 periodSeconds: 10 env: diff --git a/deploy/hubble/manifests/controller/helm/retina/values.yaml b/deploy/hubble/manifests/controller/helm/retina/values.yaml index d74f68bdb8..80c4a24c0a 100644 --- a/deploy/hubble/manifests/controller/helm/retina/values.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/values.yaml @@ -42,7 +42,7 @@ agent: init: enabled: true name: retina-agent-init - repository: ghcr.io/microsoft/retina/retina-initS + repository: ghcr.io/microsoft/retina/retina-init tag: "latest" pullPolicy: Always @@ -58,7 +58,6 @@ agent: metricsBindAddress: ":18080" ports: containerPort: 10093 - healthPort: 18081 # -- Node tolerations for pod assignment on nodes with taints # ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ tolerations: [] diff --git a/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml b/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml index a14f00316e..0f7217a55f 100644 --- a/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.16.3 name: tracesconfigurations.retina.sh spec: group: retina.sh diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml index 13db185078..7651d249f6 100644 --- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml @@ -49,8 +49,8 @@ spec: - name: {{ include "retina.name" . }} readinessProbe: httpGet: - path: /readyz - port: {{ .Values.daemonset.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.daemonset.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} @@ -58,8 +58,8 @@ spec: successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} livenessProbe: httpGet: - path: /healthz - port: {{ .Values.daemonset.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.daemonset.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} @@ -228,8 +228,8 @@ spec: fieldPath: status.hostIP readinessProbe: httpGet: - path: /readyz - port: {{ .Values.daemonset.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.daemonset.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} @@ -237,8 +237,8 @@ spec: successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} livenessProbe: httpGet: - path: /healthz - port: {{ .Values.daemonset.container.retina.ports.healthPort }} + path: /health + port: {{ .Values.daemonset.container.retina.ports.containerPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} diff --git a/deploy/legacy/manifests/controller/helm/retina/values.yaml b/deploy/legacy/manifests/controller/helm/retina/values.yaml index 060f6019f9..7dee8c519e 100644 --- a/deploy/legacy/manifests/controller/helm/retina/values.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/values.yaml @@ -87,7 +87,6 @@ daemonset: metricsBindAddress: ":18080" ports: containerPort: 10093 - healthPort: 18081 # volume mounts with name and mountPath volumeMounts: diff --git a/pkg/server/server.go b/pkg/server/server.go index d61e322eaa..733f331c1d 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -16,6 +16,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "go.uber.org/zap" "golang.org/x/sync/errgroup" + "sigs.k8s.io/controller-runtime/pkg/healthz" ) type Server struct { @@ -44,6 +45,8 @@ func (rt *Server) SetupHandlers() { exporter.RegisterMetricsServeCallback(func() { rt.servePrometheusMetrics() }) + rt.serveHealth() + rt.serveHealth2() rt.mux.HandleFunc("/debug/pprof/", pprof.Index) rt.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) rt.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) @@ -61,6 +64,18 @@ func (rt *Server) servePrometheusMetrics() { rt.mux.Get("/metrics", promhttp.HandlerFor(exporter.CombinedGatherer, promhttp.HandlerOpts{}).ServeHTTP) } +func (rt *Server) serveHealth() { + rt.mux.Get("/health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + rt.l.Error("serving health writing 20Ok") + }) +} + +func (rt *Server) serveHealth2() { + rt.mux.Get("/health2", healthz.CheckHandler{Checker: healthz.Ping}.ServeHTTP) + rt.l.Error("serving health2 with Ping") +} + func (rt *Server) Start(ctx context.Context, addr string) error { srv := &http.Server{Addr: addr, Handler: rt.mux} g, gctx := errgroup.WithContext(context.Background()) From cbcc57dfe1c66c4b4e4baed757e2803c29012c33 Mon Sep 17 00:00:00 2001 From: mereta Date: Thu, 23 Jan 2025 11:23:33 +0000 Subject: [PATCH 09/12] Undo Verison --- .../helm/retina/crds/retina.sh_tracesconfigurations.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml b/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml index 0f7217a55f..a14f00316e 100644 --- a/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.3 + controller-gen.kubebuilder.io/version: v0.14.0 name: tracesconfigurations.retina.sh spec: group: retina.sh From 8e06fd15ccfa3b1338b224a73f11809265d18850 Mon Sep 17 00:00:00 2001 From: mereta Date: Thu, 23 Jan 2025 11:24:27 +0000 Subject: [PATCH 10/12] Extend retina port to serve /health --- pkg/server/server.go | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pkg/server/server.go b/pkg/server/server.go index 733f331c1d..18aa5760db 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -46,7 +46,6 @@ func (rt *Server) SetupHandlers() { rt.servePrometheusMetrics() }) rt.serveHealth() - rt.serveHealth2() rt.mux.HandleFunc("/debug/pprof/", pprof.Index) rt.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) rt.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) @@ -65,15 +64,7 @@ func (rt *Server) servePrometheusMetrics() { } func (rt *Server) serveHealth() { - rt.mux.Get("/health", func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - rt.l.Error("serving health writing 20Ok") - }) -} - -func (rt *Server) serveHealth2() { - rt.mux.Get("/health2", healthz.CheckHandler{Checker: healthz.Ping}.ServeHTTP) - rt.l.Error("serving health2 with Ping") + rt.mux.Get("/health", healthz.CheckHandler{Checker: healthz.Ping}.ServeHTTP) } func (rt *Server) Start(ctx context.Context, addr string) error { From 201e8c9da5fccbf730d0f7475ecf28adbc4eeeee Mon Sep 17 00:00:00 2001 From: mereta Date: Thu, 23 Jan 2025 14:21:46 +0000 Subject: [PATCH 11/12] Remove defaults --- .../retina/templates/agent/daemonset.yaml | 38 ++++++++++----- .../controller/helm/retina/values.yaml | 24 +++++----- .../helm/retina/templates/daemonset.yaml | 48 +++++++++---------- .../controller/helm/retina/values.yaml | 24 +++++----- 4 files changed, 73 insertions(+), 61 deletions(-) diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml index e1e10275dc..bca4bac8c4 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml @@ -82,20 +82,20 @@ spec: httpGet: path: /health port: {{ .Values.agent.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.readinessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} livenessProbe: httpGet: path: /health port: {{ .Values.agent.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} env: - name: POD_NAME valueFrom: @@ -216,9 +216,21 @@ spec: readinessProbe: httpGet: path: /health - port: {{ .Values.agent.container.retina.ports.containerPort }} - initialDelaySeconds: 15 - periodSeconds: 10 + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + livenessProbe: + httpGet: + path: /health + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} env: - name: POD_NAME valueFrom: diff --git a/deploy/hubble/manifests/controller/helm/retina/values.yaml b/deploy/hubble/manifests/controller/helm/retina/values.yaml index 80c4a24c0a..58b58e07ef 100644 --- a/deploy/hubble/manifests/controller/helm/retina/values.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/values.yaml @@ -946,12 +946,12 @@ tls: ## @param readinessProbe.successThreshold [array] Success threshold for readinessProbe ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes ## -readinessProbe: {} - # initialDelaySeconds: 30 - # periodSeconds: 10 - # timeoutSeconds: 15 - # failureThreshold: 5 - # successThreshold: 1 +readinessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 15 + failureThreshold: 3 + successThreshold: 1 ## @param livenessProbe.initialDelaySeconds [array] Initial delay seconds for livenessProbe ## @param livenessProbe.periodSeconds [array] Period seconds for livenessProbe ## @param livenessProbe.timeoutSeconds [array] Timeout seconds for livenessProbe @@ -959,9 +959,9 @@ readinessProbe: {} ## @param livenessProbe.successThreshold [array] Success threshold for livenessProbe ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes ## -livenessProbe: {} - # initialDelaySeconds: 30 - # periodSeconds: 10 - # timeoutSeconds: 15 - # failureThreshold: 5 - # successThreshold: 1 +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 15 + failureThreshold: 3 + successThreshold: 1 diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml index 7651d249f6..1eddc9ba29 100644 --- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml @@ -50,21 +50,21 @@ spec: readinessProbe: httpGet: path: /health - port: {{ .Values.daemonset.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.readinessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} livenessProbe: httpGet: path: /health - port: {{ .Values.daemonset.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }} + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} image: {{ .Values.image.repository }}:{{ .Values.image.tag }} imagePullPolicy: {{ .Values.image.pullPolicy }} {{- if .Values.daemonset.container.retina.command }} @@ -229,21 +229,21 @@ spec: readinessProbe: httpGet: path: /health - port: {{ .Values.daemonset.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.readinessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.readinessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.readinessProbe.successThreshold | default "1" }} + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} livenessProbe: httpGet: path: /health - port: {{ .Values.daemonset.container.retina.ports.containerPort }} - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }} - successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }} + port: {{ .Values.retinaPort }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} securityContext: capabilities: add: diff --git a/deploy/legacy/manifests/controller/helm/retina/values.yaml b/deploy/legacy/manifests/controller/helm/retina/values.yaml index 7dee8c519e..658d6b80b7 100644 --- a/deploy/legacy/manifests/controller/helm/retina/values.yaml +++ b/deploy/legacy/manifests/controller/helm/retina/values.yaml @@ -173,12 +173,12 @@ affinity: [] ## @param readinessProbe.successThreshold [array] Success threshold for readinessProbe ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes ## -readinessProbe: {} - # initialDelaySeconds: 30 - # periodSeconds: 10 - # timeoutSeconds: 15 - # failureThreshold: 5 - # successThreshold: 1 +readinessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 15 + failureThreshold: 3 + successThreshold: 1 ## @param livenessProbe.initialDelaySeconds [array] Initial delay seconds for livenessProbe ## @param livenessProbe.periodSeconds [array] Period seconds for livenessProbe ## @param livenessProbe.timeoutSeconds [array] Timeout seconds for livenessProbe @@ -186,12 +186,12 @@ readinessProbe: {} ## @param livenessProbe.successThreshold [array] Success threshold for livenessProbe ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes ## -livenessProbe: {} - # initialDelaySeconds: 30 - # periodSeconds: 10 - # timeoutSeconds: 15 - # failureThreshold: 5 - # successThreshold: 1 +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 15 + failureThreshold: 3 + successThreshold: 1 ## @param priorityClassName [string] Indicates the pod's priority ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ From 6abe4025957dd65431ec477e2792b05895fec058 Mon Sep 17 00:00:00 2001 From: mereta Date: Wed, 29 Jan 2025 16:30:51 +0000 Subject: [PATCH 12/12] replace port --- .../controller/helm/retina/templates/agent/daemonset.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml index bca4bac8c4..e01d9bfdf8 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml @@ -81,7 +81,7 @@ spec: readinessProbe: httpGet: path: /health - port: {{ .Values.agent.container.retina.ports.containerPort }} + port: {{ .Values.retinaPort }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} @@ -90,7 +90,7 @@ spec: livenessProbe: httpGet: path: /health - port: {{ .Values.agent.container.retina.ports.containerPort }} + port: {{ .Values.retinaPort }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}