From b20e6231a5a0807429336be0f4ba365fbf73e59a Mon Sep 17 00:00:00 2001 From: Alex Castilio dos Santos Date: Fri, 24 Jan 2025 10:09:51 +0000 Subject: [PATCH 1/2] chore: add APP_INSIGHTS_ID to image build Signed-off-by: Alex Castilio dos Santos --- .github/workflows/images.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/images.yaml b/.github/workflows/images.yaml index 915cc09698..3fa861bf49 100644 --- a/.github/workflows/images.yaml +++ b/.github/workflows/images.yaml @@ -52,6 +52,7 @@ jobs: IMAGE_NAMESPACE=${{ github.repository }} \ PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \ IMAGE_REGISTRY=${{ vars.ACR_NAME }} \ + APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \ BUILDX_ACTION=--push else make retina-image \ @@ -102,6 +103,7 @@ jobs: IMAGE_NAMESPACE=${{ github.repository }} \ PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \ IMAGE_REGISTRY=${{ vars.ACR_NAME }} \ + APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \ WINDOWS_YEARS=${{ matrix.year }} \ BUILDX_ACTION=--push else @@ -153,6 +155,7 @@ jobs: IMAGE_NAMESPACE=${{ github.repository }} \ PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \ IMAGE_REGISTRY=${{ vars.ACR_NAME }} \ + APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \ BUILDX_ACTION=--push else make retina-operator-image \ From b82c68c51717d9e71afd5115484c9b22510f9d38 Mon Sep 17 00:00:00 2001 From: Alex Castilio dos Santos Date: Fri, 24 Jan 2025 12:42:42 +0000 Subject: [PATCH 2/2] chore: make heartbeat interval configurable Signed-off-by: Alex Castilio dos Santos --- cmd/standard/daemon.go | 6 ++--- .../helm/retina/templates/configmap.yaml | 2 ++ .../helm/retina/templates/operator.yaml | 1 + .../controller/helm/retina/values.yaml | 2 ++ operator/cmd/standard/deployment.go | 5 +--- operator/config/config.go | 11 +++++++-- operator/config/config_test.go | 24 +++++++++++++++++++ operator/config/testwith/config.yaml | 9 +++++++ pkg/config/config.go | 6 +++++ pkg/config/config_test.go | 1 + pkg/config/testwith/config.yaml | 1 + 11 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 operator/config/config_test.go create mode 100644 operator/config/testwith/config.yaml diff --git a/cmd/standard/daemon.go b/cmd/standard/daemon.go index 00655444bc..121bf70645 100644 --- a/cmd/standard/daemon.go +++ b/cmd/standard/daemon.go @@ -6,7 +6,6 @@ import ( "fmt" "os" "strings" - "time" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" @@ -48,8 +47,7 @@ import ( ) const ( - logFileName = "retina.log" - heartbeatInterval = 15 * time.Minute + logFileName = "retina.log" nodeNameEnvKey = "NODE_NAME" nodeIPEnvKey = "NODE_IP" @@ -309,7 +307,7 @@ func (d *Daemon) Start() error { defer controllerMgr.Stop(ctx) // start heartbeat goroutine for application insights - go tel.Heartbeat(ctx, heartbeatInterval) + go tel.Heartbeat(ctx, daemonConfig.TelemetryInterval) // Start controller manager, which will start http server and plugin manager. go controllerMgr.Start(ctx) diff --git a/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml b/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml index a53b8537a6..65075bc424 100644 --- a/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml +++ b/deploy/standard/manifests/controller/helm/retina/templates/configmap.yaml @@ -24,6 +24,7 @@ data: enableAnnotations: {{ .Values.enableAnnotations }} bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }} dataAggregationLevel: {{ .Values.dataAggregationLevel }} + telemetryInterval: {{ .Values.daemonset.telemetryInterval }} {{- end}} --- {{- if .Values.os.windows}} @@ -48,6 +49,7 @@ data: enableTelemetry: {{ .Values.enableTelemetry }} enablePodLevel: {{ .Values.enablePodLevel }} remoteContext: {{ .Values.remoteContext }} + telemetryInterval: {{ .Values.daemonset.telemetryInterval }} {{- end}} diff --git a/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml b/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml index 60cce2e458..61712b09a8 100644 --- a/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml +++ b/deploy/standard/manifests/controller/helm/retina/templates/operator.yaml @@ -307,6 +307,7 @@ data: captureDebug: {{ .Values.capture.debug }} captureJobNumLimit: {{ .Values.capture.jobNumLimit }} enableManagedStorageAccount: {{ .Values.capture.enableManagedStorageAccount }} + telemetryInterval: {{ .Values.operator.telemetryInterval }} {{- if .Values.capture.enableManagedStorageAccount }} azureCredentialConfig: /etc/cloud-config/azure.json {{- end }} diff --git a/deploy/standard/manifests/controller/helm/retina/values.yaml b/deploy/standard/manifests/controller/helm/retina/values.yaml index 7dee8c519e..1f0298a6f7 100644 --- a/deploy/standard/manifests/controller/helm/retina/values.yaml +++ b/deploy/standard/manifests/controller/helm/retina/values.yaml @@ -27,6 +27,7 @@ operator: args: - "--config" - "/retina/operator-config.yaml" + telemetryInterval: "5m" image: repository: ghcr.io/microsoft/retina/retina-agent @@ -87,6 +88,7 @@ daemonset: metricsBindAddress: ":18080" ports: containerPort: 10093 + telemetryInterval: "15m" # volume mounts with name and mountPath volumeMounts: diff --git a/operator/cmd/standard/deployment.go b/operator/cmd/standard/deployment.go index 1371643d4e..f297aa622f 100644 --- a/operator/cmd/standard/deployment.go +++ b/operator/cmd/standard/deployment.go @@ -9,7 +9,6 @@ import ( "net/http" "net/http/pprof" "os" - "time" "go.uber.org/zap/zapcore" @@ -55,8 +54,6 @@ var ( MaxFileSizeMB = 100 MaxBackups = 3 MaxAgeDays = 30 - - HeartbeatFrequency = 5 * time.Minute ) func init() { @@ -255,7 +252,7 @@ func (o *Operator) Start() { } // start heartbeat goroutine for application insights - go tel.Heartbeat(ctx, HeartbeatFrequency) + go tel.Heartbeat(ctx, oconfig.TelemetryInterval) } func EnablePProf() { diff --git a/operator/config/config.go b/operator/config/config.go index ade4c9ab2f..d46d2dab23 100644 --- a/operator/config/config.go +++ b/operator/config/config.go @@ -2,6 +2,7 @@ package config import ( "fmt" + "time" "github.com/microsoft/retina/pkg/config" "github.com/spf13/viper" @@ -14,8 +15,9 @@ type OperatorConfig struct { EnableTelemetry bool `yaml:"enableTelemetry"` LogLevel string `yaml:"logLevel"` // EnableRetinaEndpoint indicates whether to enable RetinaEndpoint - EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"` - RemoteContext bool `yaml:"remoteContext"` + EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"` + RemoteContext bool `yaml:"remoteContext"` + TelemetryInterval time.Duration `yaml:"telemetryInterval"` } func GetConfig(cfgFileName string) (*OperatorConfig, error) { @@ -35,5 +37,10 @@ func GetConfig(cfgFileName string) (*OperatorConfig, error) { return nil, fmt.Errorf("error unmarshalling config: %w", err) } + // If unset, default telemetry interval to 5 minutes. + if cfg.TelemetryInterval == 0 { + cfg.TelemetryInterval = 5 * time.Minute + } + return &cfg, nil } diff --git a/operator/config/config_test.go b/operator/config/config_test.go new file mode 100644 index 0000000000..24a2b0f7ff --- /dev/null +++ b/operator/config/config_test.go @@ -0,0 +1,24 @@ +package config_test + +import ( + "testing" + "time" + + "github.com/microsoft/retina/operator/config" +) + +func TestGetConfig(t *testing.T) { + c, err := config.GetConfig("./testwith/config.yaml") + if err != nil { + t.Errorf("Expected no error, instead got %+v", err) + } + + if !c.InstallCRDs || + !c.EnableTelemetry || + c.LogLevel != "info" || + !c.EnableRetinaEndpoint || + !c.RemoteContext || + c.TelemetryInterval != 15*time.Minute { + t.Errorf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c) + } +} diff --git a/operator/config/testwith/config.yaml b/operator/config/testwith/config.yaml new file mode 100644 index 0000000000..d3ac432670 --- /dev/null +++ b/operator/config/testwith/config.yaml @@ -0,0 +1,9 @@ +apiServer: + host: "0.0.0.0" + port: 10093 +installCRDs: true +enableTelemetry: true +logLevel: info +enableRetinaEndpoint: true +remoteContext: true +telemetryInterval: "15m" diff --git a/pkg/config/config.go b/pkg/config/config.go index 5a5098456e..9c2c252d0d 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -67,6 +67,7 @@ type Config struct { BypassLookupIPOfInterest bool `yaml:"bypassLookupIPOfInterest"` DataAggregationLevel Level `yaml:"dataAggregationLevel"` MonitorSockPath string `yaml:"monitorSockPath"` + TelemetryInterval time.Duration `yaml:"telemetryInterval"` } func GetConfig(cfgFilename string) (*Config, error) { @@ -107,6 +108,11 @@ func GetConfig(cfgFilename string) (*Config, error) { log.Print("metricsInterval is deprecated, please use metricsIntervalDuration instead") } + // If unset, default telemetry interval to 15 minutes. + if config.TelemetryInterval == 0 { + config.TelemetryInterval = 15 * time.Minute + } + return &config, nil } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 203be5912b..0cea1aa48e 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -26,6 +26,7 @@ func TestGetConfig(t *testing.T) { !c.EnableRetinaEndpoint || c.RemoteContext || c.EnableAnnotations || + c.TelemetryInterval != 15*time.Minute || c.DataAggregationLevel != Low { t.Fatalf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c) } diff --git a/pkg/config/testwith/config.yaml b/pkg/config/testwith/config.yaml index 08b1d0f0f9..edcb5d685a 100644 --- a/pkg/config/testwith/config.yaml +++ b/pkg/config/testwith/config.yaml @@ -9,3 +9,4 @@ metricsIntervalDuration: "10s" # used to export telemetry to AppInsights telemetryEnabled: true dataAggregationLevel: "low" +telemetryInterval: "15m"