From fd1f6a8689072b2d93067ab578e133bccd1675b3 Mon Sep 17 00:00:00 2001 From: Alex Castilio dos Santos Date: Fri, 24 Jan 2025 12:42:42 +0000 Subject: [PATCH] chore: make heartbeat interval configurable Signed-off-by: Alex Castilio dos Santos --- cmd/standard/daemon.go | 6 ++-- .../helm/retina/templates/configmap.yaml | 2 ++ .../helm/retina/templates/operator.yaml | 1 + .../controller/helm/retina/values.yaml | 2 ++ operator/cmd/standard/deployment.go | 5 +-- operator/config/config.go | 6 ++-- operator/config/config_test.go | 36 +++++++++++++++++++ operator/config/testwith/config.yaml | 9 +++++ pkg/config/config.go | 1 + pkg/config/config_test.go | 1 + pkg/config/testwith/config.yaml | 1 + 11 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 operator/config/config_test.go create mode 100644 operator/config/testwith/config.yaml diff --git a/cmd/standard/daemon.go b/cmd/standard/daemon.go index 00655444bc..121bf70645 100644 --- a/cmd/standard/daemon.go +++ b/cmd/standard/daemon.go @@ -6,7 +6,6 @@ import ( "fmt" "os" "strings" - "time" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" @@ -48,8 +47,7 @@ import ( ) const ( - logFileName = "retina.log" - heartbeatInterval = 15 * time.Minute + logFileName = "retina.log" nodeNameEnvKey = "NODE_NAME" nodeIPEnvKey = "NODE_IP" @@ -309,7 +307,7 @@ func (d *Daemon) Start() error { defer controllerMgr.Stop(ctx) // start heartbeat goroutine for application insights - go tel.Heartbeat(ctx, heartbeatInterval) + go tel.Heartbeat(ctx, daemonConfig.TelemetryInterval) // Start controller manager, which will start http server and plugin manager. go controllerMgr.Start(ctx) diff --git a/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml b/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml index a53b8537a6..65075bc424 100644 --- a/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml +++ b/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml @@ -24,6 +24,7 @@ data: enableAnnotations: {{ .Values.enableAnnotations }} bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }} dataAggregationLevel: {{ .Values.dataAggregationLevel }} + telemetryInterval: {{ .Values.daemonset.telemetryInterval }} {{- end}} --- {{- if .Values.os.windows}} @@ -48,6 +49,7 @@ data: enableTelemetry: {{ .Values.enableTelemetry }} enablePodLevel: {{ .Values.enablePodLevel }} remoteContext: {{ .Values.remoteContext }} + telemetryInterval: {{ .Values.daemonset.telemetryInterval }} {{- end}} diff --git a/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml b/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml index 60cce2e458..61712b09a8 100644 --- a/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml +++ b/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml @@ -307,6 +307,7 @@ data: captureDebug: {{ .Values.capture.debug }} captureJobNumLimit: {{ .Values.capture.jobNumLimit }} enableManagedStorageAccount: {{ .Values.capture.enableManagedStorageAccount }} + telemetryInterval: {{ .Values.operator.telemetryInterval }} {{- if .Values.capture.enableManagedStorageAccount }} azureCredentialConfig: /etc/cloud-config/azure.json {{- end }} diff --git a/deploy/standard/manifests/controller/helm/retina/values.yaml b/deploy/standard/manifests/controller/helm/retina/values.yaml index 7dee8c519e..1f0298a6f7 100644 --- a/deploy/standard/manifests/controller/helm/retina/values.yaml +++ b/deploy/standard/manifests/controller/helm/retina/values.yaml @@ -27,6 +27,7 @@ operator: args: - "--config" - "/retina/operator-config.yaml" + telemetryInterval: "5m" image: repository: ghcr.io/microsoft/retina/retina-agent @@ -87,6 +88,7 @@ daemonset: metricsBindAddress: ":18080" ports: containerPort: 10093 + telemetryInterval: "15m" # volume mounts with name and mountPath volumeMounts: diff --git a/operator/cmd/standard/deployment.go b/operator/cmd/standard/deployment.go index 1371643d4e..f297aa622f 100644 --- a/operator/cmd/standard/deployment.go +++ b/operator/cmd/standard/deployment.go @@ -9,7 +9,6 @@ import ( "net/http" "net/http/pprof" "os" - "time" "go.uber.org/zap/zapcore" @@ -55,8 +54,6 @@ var ( MaxFileSizeMB = 100 MaxBackups = 3 MaxAgeDays = 30 - - HeartbeatFrequency = 5 * time.Minute ) func init() { @@ -255,7 +252,7 @@ func (o *Operator) Start() { } // start heartbeat goroutine for application insights - go tel.Heartbeat(ctx, HeartbeatFrequency) + go tel.Heartbeat(ctx, oconfig.TelemetryInterval) } func EnablePProf() { diff --git a/operator/config/config.go b/operator/config/config.go index ade4c9ab2f..c90c1bd832 100644 --- a/operator/config/config.go +++ b/operator/config/config.go @@ -2,6 +2,7 @@ package config import ( "fmt" + "time" "github.com/microsoft/retina/pkg/config" "github.com/spf13/viper" @@ -14,8 +15,9 @@ type OperatorConfig struct { EnableTelemetry bool `yaml:"enableTelemetry"` LogLevel string `yaml:"logLevel"` // EnableRetinaEndpoint indicates whether to enable RetinaEndpoint - EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"` - RemoteContext bool `yaml:"remoteContext"` + EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"` + RemoteContext bool `yaml:"remoteContext"` + TelemetryInterval time.Duration `yaml:"telemetryInterval"` } func GetConfig(cfgFileName string) (*OperatorConfig, error) { diff --git a/operator/config/config_test.go b/operator/config/config_test.go new file mode 100644 index 0000000000..31c847b293 --- /dev/null +++ b/operator/config/config_test.go @@ -0,0 +1,36 @@ +package config_test + +import ( + "testing" + "time" + + "github.com/microsoft/retina/operator/config" +) + +// type OperatorConfig struct { +// config.CaptureConfig `mapstructure:",squash"` +// +// InstallCRDs bool `yaml:"installCRDs"` +// EnableTelemetry bool `yaml:"enableTelemetry"` +// LogLevel string `yaml:"logLevel"` +// // EnableRetinaEndpoint indicates whether to enable RetinaEndpoint +// EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"` +// RemoteContext bool `yaml:"remoteContext"` +// TelemetryInterval time.Duration `yaml:"telemetryInterval"` +// } +// +func TestGetConfig(t *testing.T) { + c, err := config.GetConfig("./testwith/config.yaml") + if err != nil { + t.Errorf("Expected no error, instead got %+v", err) + } + if !c.InstallCRDs || + !c.EnableTelemetry || + c.LogLevel != "info" || + !c.EnableRetinaEndpoint || + !c.RemoteContext || + c.TelemetryInterval != 15*time.Minute { + t.Errorf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c) + } + +} diff --git a/operator/config/testwith/config.yaml b/operator/config/testwith/config.yaml new file mode 100644 index 0000000000..d3ac432670 --- /dev/null +++ b/operator/config/testwith/config.yaml @@ -0,0 +1,9 @@ +apiServer: + host: "0.0.0.0" + port: 10093 +installCRDs: true +enableTelemetry: true +logLevel: info +enableRetinaEndpoint: true +remoteContext: true +telemetryInterval: "15m" diff --git a/pkg/config/config.go b/pkg/config/config.go index 5a5098456e..b75cbdd700 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -67,6 +67,7 @@ type Config struct { BypassLookupIPOfInterest bool `yaml:"bypassLookupIPOfInterest"` DataAggregationLevel Level `yaml:"dataAggregationLevel"` MonitorSockPath string `yaml:"monitorSockPath"` + TelemetryInterval time.Duration `yaml:"telemetryInterval"` } func GetConfig(cfgFilename string) (*Config, error) { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 203be5912b..0cea1aa48e 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -26,6 +26,7 @@ func TestGetConfig(t *testing.T) { !c.EnableRetinaEndpoint || c.RemoteContext || c.EnableAnnotations || + c.TelemetryInterval != 15*time.Minute || c.DataAggregationLevel != Low { t.Fatalf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c) } diff --git a/pkg/config/testwith/config.yaml b/pkg/config/testwith/config.yaml index 08b1d0f0f9..edcb5d685a 100644 --- a/pkg/config/testwith/config.yaml +++ b/pkg/config/testwith/config.yaml @@ -9,3 +9,4 @@ metricsIntervalDuration: "10s" # used to export telemetry to AppInsights telemetryEnabled: true dataAggregationLevel: "low" +telemetryInterval: "15m"