Skip to content

Commit

Permalink
chore: make heartbeat interval configurable
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Castilio dos Santos <[email protected]>
  • Loading branch information
alexcastilio committed Jan 24, 2025
1 parent b20e623 commit b82c68c
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 10 deletions.
6 changes: 2 additions & 4 deletions cmd/standard/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"os"
"strings"
"time"

"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -48,8 +47,7 @@ import (
)

const (
logFileName = "retina.log"
heartbeatInterval = 15 * time.Minute
logFileName = "retina.log"

nodeNameEnvKey = "NODE_NAME"
nodeIPEnvKey = "NODE_IP"
Expand Down Expand Up @@ -309,7 +307,7 @@ func (d *Daemon) Start() error {
defer controllerMgr.Stop(ctx)

// start heartbeat goroutine for application insights
go tel.Heartbeat(ctx, heartbeatInterval)
go tel.Heartbeat(ctx, daemonConfig.TelemetryInterval)

// Start controller manager, which will start http server and plugin manager.
go controllerMgr.Start(ctx)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ data:
enableAnnotations: {{ .Values.enableAnnotations }}
bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }}
dataAggregationLevel: {{ .Values.dataAggregationLevel }}
telemetryInterval: {{ .Values.daemonset.telemetryInterval }}
{{- end}}
---
{{- if .Values.os.windows}}
Expand All @@ -48,6 +49,7 @@ data:
enableTelemetry: {{ .Values.enableTelemetry }}
enablePodLevel: {{ .Values.enablePodLevel }}
remoteContext: {{ .Values.remoteContext }}
telemetryInterval: {{ .Values.daemonset.telemetryInterval }}
{{- end}}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ data:
captureDebug: {{ .Values.capture.debug }}
captureJobNumLimit: {{ .Values.capture.jobNumLimit }}
enableManagedStorageAccount: {{ .Values.capture.enableManagedStorageAccount }}
telemetryInterval: {{ .Values.operator.telemetryInterval }}
{{- if .Values.capture.enableManagedStorageAccount }}
azureCredentialConfig: /etc/cloud-config/azure.json
{{- end }}
Expand Down
2 changes: 2 additions & 0 deletions deploy/standard/manifests/controller/helm/retina/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ operator:
args:
- "--config"
- "/retina/operator-config.yaml"
telemetryInterval: "5m"

image:
repository: ghcr.io/microsoft/retina/retina-agent
Expand Down Expand Up @@ -87,6 +88,7 @@ daemonset:
metricsBindAddress: ":18080"
ports:
containerPort: 10093
telemetryInterval: "15m"

# volume mounts with name and mountPath
volumeMounts:
Expand Down
5 changes: 1 addition & 4 deletions operator/cmd/standard/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"net/http"
"net/http/pprof"
"os"
"time"

"go.uber.org/zap/zapcore"

Expand Down Expand Up @@ -55,8 +54,6 @@ var (
MaxFileSizeMB = 100
MaxBackups = 3
MaxAgeDays = 30

HeartbeatFrequency = 5 * time.Minute
)

func init() {
Expand Down Expand Up @@ -255,7 +252,7 @@ func (o *Operator) Start() {
}

// start heartbeat goroutine for application insights
go tel.Heartbeat(ctx, HeartbeatFrequency)
go tel.Heartbeat(ctx, oconfig.TelemetryInterval)
}

func EnablePProf() {
Expand Down
11 changes: 9 additions & 2 deletions operator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package config

import (
"fmt"
"time"

"github.com/microsoft/retina/pkg/config"
"github.com/spf13/viper"
Expand All @@ -14,8 +15,9 @@ type OperatorConfig struct {
EnableTelemetry bool `yaml:"enableTelemetry"`
LogLevel string `yaml:"logLevel"`
// EnableRetinaEndpoint indicates whether to enable RetinaEndpoint
EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"`
RemoteContext bool `yaml:"remoteContext"`
EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"`
RemoteContext bool `yaml:"remoteContext"`
TelemetryInterval time.Duration `yaml:"telemetryInterval"`
}

func GetConfig(cfgFileName string) (*OperatorConfig, error) {
Expand All @@ -35,5 +37,10 @@ func GetConfig(cfgFileName string) (*OperatorConfig, error) {
return nil, fmt.Errorf("error unmarshalling config: %w", err)
}

// If unset, default telemetry interval to 5 minutes.
if cfg.TelemetryInterval == 0 {
cfg.TelemetryInterval = 5 * time.Minute
}

return &cfg, nil
}
24 changes: 24 additions & 0 deletions operator/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package config_test

import (
"testing"
"time"

"github.com/microsoft/retina/operator/config"
)

func TestGetConfig(t *testing.T) {
c, err := config.GetConfig("./testwith/config.yaml")
if err != nil {
t.Errorf("Expected no error, instead got %+v", err)
}

if !c.InstallCRDs ||
!c.EnableTelemetry ||
c.LogLevel != "info" ||
!c.EnableRetinaEndpoint ||
!c.RemoteContext ||
c.TelemetryInterval != 15*time.Minute {
t.Errorf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c)
}
}
9 changes: 9 additions & 0 deletions operator/config/testwith/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiServer:
host: "0.0.0.0"
port: 10093
installCRDs: true
enableTelemetry: true
logLevel: info
enableRetinaEndpoint: true
remoteContext: true
telemetryInterval: "15m"
6 changes: 6 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type Config struct {
BypassLookupIPOfInterest bool `yaml:"bypassLookupIPOfInterest"`
DataAggregationLevel Level `yaml:"dataAggregationLevel"`
MonitorSockPath string `yaml:"monitorSockPath"`
TelemetryInterval time.Duration `yaml:"telemetryInterval"`
}

func GetConfig(cfgFilename string) (*Config, error) {
Expand Down Expand Up @@ -107,6 +108,11 @@ func GetConfig(cfgFilename string) (*Config, error) {
log.Print("metricsInterval is deprecated, please use metricsIntervalDuration instead")
}

// If unset, default telemetry interval to 15 minutes.
if config.TelemetryInterval == 0 {
config.TelemetryInterval = 15 * time.Minute
}

return &config, nil
}

Expand Down
1 change: 1 addition & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func TestGetConfig(t *testing.T) {
!c.EnableRetinaEndpoint ||
c.RemoteContext ||
c.EnableAnnotations ||
c.TelemetryInterval != 15*time.Minute ||
c.DataAggregationLevel != Low {
t.Fatalf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/config/testwith/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ metricsIntervalDuration: "10s"
# used to export telemetry to AppInsights
telemetryEnabled: true
dataAggregationLevel: "low"
telemetryInterval: "15m"

0 comments on commit b82c68c

Please sign in to comment.