From 5ced912fa438436b1dc844c53be81c35d75f2ec0 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 20 May 2024 11:32:04 -0700 Subject: [PATCH] sync yamls, agent log level to info and remove redundant test http server (#1256) * updates * update trivy ignore * update the addon-token-adapter image --- build/linux/installer/conf/container.conf | 26 ++++++------ build/linux/installer/conf/kube.conf | 38 ++++++++--------- .../conf/windows_rs_containerinventory.conf | 2 +- .../linux/installer/conf/windows_rs_perf.conf | 2 +- build/windows/installer/conf/fluent.conf | 6 +-- charts/azuremonitor-containers/values.yaml | 7 +++- kubernetes/ama-logs.yaml | 41 +++++++++++++++++-- source/plugins/go/src/oms.go | 9 ---- 8 files changed, 79 insertions(+), 52 deletions(-) diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 438c0891d..5f987b0a5 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -7,7 +7,7 @@ port 25225 bind 127.0.0.1 - + # MDM metrics from telegraf @type tcp @@ -22,7 +22,7 @@ @type containerinventory tag oneagent.containerInsights.CONTAINER_INVENTORY_BLOB run_interval 60 - @log_level debug + @log_level info #cadvisor perf @@ -30,7 +30,7 @@ @type cadvisor_perf tag oneagent.containerInsights.LINUX_PERF_BLOB run_interval 60 - @log_level debug + @log_level info #custom_metrics_mdm filter plugin @@ -42,13 +42,13 @@ @type telegraf2mdm - @log_level debug + @log_level info #containerinventory @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -66,15 +66,15 @@ retry_max_times 10 retry_wait 5s retry_max_interval 5m - flush_thread_count 5 + flush_thread_count 5 - keepalive true + keepalive true #cadvisorperf @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -92,14 +92,14 @@ retry_max_times 10 retry_wait 5s retry_max_interval 5m - flush_thread_count 5 + flush_thread_count 5 keepalive true @type mdm - @log_level debug + @log_level info @type file path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer @@ -109,7 +109,7 @@ retry_max_times 10 retry_wait 5s retry_max_interval 5m - flush_thread_count 5 + flush_thread_count 5 retry_mdm_post_wait_minutes 30 @@ -117,7 +117,7 @@ #InsightsMetrics @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -135,7 +135,7 @@ retry_max_times 10 retry_wait 5s retry_max_interval 5m - flush_thread_count 5 + flush_thread_count 5 keepalive true diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 9841072fa..5fbf1a378 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -10,7 +10,7 @@ @type forward @id out_perf_fwd - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -36,7 +36,7 @@ @type forward @id out_insights_metrics_fwd - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -73,13 +73,13 @@ @type kube_podinventory tag oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB run_interval 60 - @log_level debug + @log_level info #kubepodinventory @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -105,7 +105,7 @@ #kubeservices @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -134,13 +134,13 @@ @type kube_nodes tag oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB run_interval 60 - @log_level debug + @log_level info #containernodeinventory @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -171,7 +171,7 @@ #kubenodeinventory @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -197,7 +197,7 @@ @type mdm @id out_mdm_nodeinventory - @log_level debug + @log_level info @type file path /var/opt/microsoft/docker-cimprov/state/out_mdm_nodeinventory*.buffer @@ -219,13 +219,13 @@ @type kube_events tag oneagent.containerInsights.KUBE_EVENTS_BLOB run_interval 60 - @log_level debug + @log_level info #kubeevents @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -253,13 +253,13 @@ @type kube_podmdminventory run_interval 60 - @log_level debug + @log_level info @type mdm @id out_mdm_podinventory - @log_level debug + @log_level info @type file path /var/opt/microsoft/docker-cimprov/state/out_mdm_podinventory*.buffer @@ -282,7 +282,7 @@ @type kube_perfinventory tag oneagent.containerInsights.LINUX_PERF_BLOB run_interval 60 - @log_level debug + @log_level info #Kubernetes Persistent Volume inventory @@ -290,7 +290,7 @@ @type kube_pvinventory tag oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB run_interval 60 - @log_level debug + @log_level info #@include windows_rs_perf.conf @@ -300,7 +300,7 @@ @type kubestate_deployments tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB run_interval 60 - @log_level debug + @log_level info #Kubernetes object state - HPA @@ -308,13 +308,13 @@ @type kubestate_hpa tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB run_interval 60 - @log_level debug + @log_level info #kubepvinventory @type forward - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none @@ -340,7 +340,7 @@ @type mdm @id out_mdm_perf - @log_level debug + @log_level info @type file path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer diff --git a/build/linux/installer/conf/windows_rs_containerinventory.conf b/build/linux/installer/conf/windows_rs_containerinventory.conf index 7d8e95570..1fe721f67 100644 --- a/build/linux/installer/conf/windows_rs_containerinventory.conf +++ b/build/linux/installer/conf/windows_rs_containerinventory.conf @@ -2,7 +2,7 @@ @type forward @id out_ci_fwd - @log_level debug + @log_level info send_timeout 30 connect_timeout 30 heartbeat_type none diff --git a/build/linux/installer/conf/windows_rs_perf.conf b/build/linux/installer/conf/windows_rs_perf.conf index 9605b90f0..4a22967bb 100644 --- a/build/linux/installer/conf/windows_rs_perf.conf +++ b/build/linux/installer/conf/windows_rs_perf.conf @@ -3,5 +3,5 @@ @type win_cadvisor_perf tag oneagent.containerInsights.LINUX_PERF_BLOB run_interval 60 - @log_level debug + @log_level info \ No newline at end of file diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf index 5751dc225..b3e423265 100644 --- a/build/windows/installer/conf/fluent.conf +++ b/build/windows/installer/conf/fluent.conf @@ -8,7 +8,7 @@ @type cadvisor_perf tag oms.api.cadvisorperf run_interval 60 - @log_level debug + @log_level info #custom_metrics_mdm filter plugin @@ -21,7 +21,7 @@ @type mdm - @log_level debug + @log_level info @type file path /etc/amalogswindows/out_mdm_cdvisorperf.buffer @@ -31,7 +31,7 @@ retry_max_times 10 retry_wait 5s retry_max_interval 5m - flush_thread_count 5 + flush_thread_count 5 retry_mdm_post_wait_minutes 30 diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml index 47fa81174..05340b3af 100644 --- a/charts/azuremonitor-containers/values.yaml +++ b/charts/azuremonitor-containers/values.yaml @@ -182,9 +182,12 @@ amalogs: cpu: 150m memory: 750Mi daemonsetwindows: - limits: + requests: cpu: 500m - memory: 600Mi + memory: 700Mi + limits: + cpu: 2 + memory: 2Gi deployment: requests: cpu: 150m diff --git a/kubernetes/ama-logs.yaml b/kubernetes/ama-logs.yaml index 7bef3df24..f6491ded9 100644 --- a/kubernetes/ama-logs.yaml +++ b/kubernetes/ama-logs.yaml @@ -363,8 +363,9 @@ spec: # - --secret-name=aad-msi-auth-token # - --token-server-listening-port=8888 # - --health-server-listening-port=9999 + # - --restart-pod-waiting-minutes-on-broken-connection=240 # # Make sure this matching with version in AKS RP side - # image: mcr.microsoft.com/aks/msi/addon-token-adapter:master.221118.2 + # image: mcr.microsoft.com/aks/msi/addon-token-adapter:master.230804.1 # imagePullPolicy: IfNotPresent # env: # - name: AZMON_COLLECT_ENV @@ -446,6 +447,16 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + - name: AZMON_RESOURCE_OPTIMIZATION_ENABLED + value: "false" + - name: APPMONITORING_AUTOINSTRUMENTATION_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_PORT + value: "28331" + - name: CUSTOM_METRICS_DISABLED + value: "false" securityContext: privileged: true ports: @@ -739,8 +750,9 @@ spec: # - --secret-name=aad-msi-auth-token # - --token-server-listening-port=8888 # - --health-server-listening-port=9999 + # - --restart-pod-waiting-minutes-on-broken-connection=240 # # Make sure this matching with version in AKS RP side - # image: mcr.microsoft.com/aks/msi/addon-token-adapter:master.221118.2 + # image: mcr.microsoft.com/aks/msi/addon-token-adapter:master.230804.1 # imagePullPolicy: IfNotPresent # env: # - name: AZMON_COLLECT_ENV @@ -821,6 +833,14 @@ spec: # Uncomment below lines when the Addon-resizer VPA enabled # - name: RS_ADDON-RESIZER_VPA_ENABLED # value: "true" + - name: APPMONITORING_AUTOINSTRUMENTATION_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_PORT + value: "28331" + - name: CUSTOM_METRICS_DISABLED + value: "false" securityContext: privileged: true ports: @@ -990,7 +1010,7 @@ spec: # - --secret-name=aad-msi-auth-token # - --token-server-listening-port=7777 # - --health-server-listening-port=9999 - # image: "mcr.microsoft.com/aks/hcp/addon-token-adapter:20230120winbeta" + # image: "mcr.microsoft.com/aks/msi/addon-token-adapter:master.240102.1" # imagePullPolicy: Always # livenessProbe: # httpGet: @@ -1012,9 +1032,12 @@ spec: image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-3.1.20" imagePullPolicy: IfNotPresent resources: - limits: + requests: cpu: 500m memory: 700Mi + limits: + cpu: 2 + memory: 2Gi env: - name: CONTAINER_MEMORY_LIMIT_IN_BYTES valueFrom: @@ -1059,6 +1082,16 @@ spec: # Uncomment below lines for MSI Auth Mode testing # - name: USING_AAD_MSI_AUTH # value: "true" + - name: AZMON_WINDOWS_FLUENT_BIT_DISABLED + value: "true" + - name: APPMONITORING_AUTOINSTRUMENTATION_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_ENABLED + value: "false" + - name: APPMONITORING_OPENTELEMETRYLOGS_PORT + value: "28331" + - name: CUSTOM_METRICS_DISABLED + value: "false" volumeMounts: # Uncomment below lines when telegraf upgraded to 1.28.5 or higher # - name: kube-api-access diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index 244e5b44f..8268cc596 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -2069,15 +2069,6 @@ func GetControllerNameFromK8sPodName(podName string) (string, string) { // InitializePlugin reads and populates plugin configuration func InitializePlugin(pluginConfPath string, agentVersion string) { - go func() { - isTest := os.Getenv("ISTEST") - if strings.Compare(strings.ToLower(strings.TrimSpace(isTest)), "true") == 0 { - e1 := http.ListenAndServe("localhost:6060", nil) - if e1 != nil { - Log("HTTP Listen Error: %s \n", e1.Error()) - } - } - }() StdoutIgnoreNsSet = make(map[string]bool) StdoutIncludeSystemResourceSet = make(map[string]bool) StdoutIncludeSystemNamespaceSet = make(map[string]bool)