Skip to content

Commit 06be4b3

Browse files
BaarsgaardtheSuess
andauthored
Fix: Do not cache native resources created without CommonLabels (#1818)
* fix: Limit cache for k8s native resources * feat: Disable caching of ConfigMaps and Secrets feat: Toggle caching of ConfigMaps and Secrets with CommonLabels * chore: Move sharding log out of main (cyclomatic lint) * chore: Update commments * Refactor: Re-use new util func to create cacheLabelConfig for default setups * fix: Avoid labels.Everything returned from getLabelSelectors * feat: Add a way to disable the cache limitations for backwards compatibility * chore: Make cache improvements opt-in * chore: Update env var name and "watchLabeledReferencesOnly" description in Helm values * refactor: switch to `ENFORCE_CACHE_LABELS` env var --------- Co-authored-by: Dominik Süß <[email protected]>
1 parent 24bc80a commit 06be4b3

File tree

4 files changed

+80
-20
lines changed

4 files changed

+80
-20
lines changed

deploy/helm/grafana-operator/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ It's easier to just manage this configuration outside of the operator.
6868
| dashboard.annotations | object | `{}` | Annotations to add to the Grafana dashboard ConfigMap |
6969
| dashboard.enabled | bool | `false` | Whether to create a ConfigMap containing a dashboard monitoring the operator metrics. Consider enabling this if you are enabling the ServiceMonitor. Optionally, a GrafanaDashboard CR can be manually created pointing to the Grafana.com dashboard ID 22785 https://grafana.com/grafana/dashboards/22785-grafana-operator/ The Grafana.com dashboard is maintained by the community and does not necessarily match the JSON definition in this repository. |
7070
| dashboard.labels | object | `{}` | Labels to add to the Grafana dashboard ConfigMap |
71+
| enforceCacheLabels | string | `"off"` | Sets the `ENFORCE_CACHE_LABELS` environment variable, Enables the enforcment of cache labels, reducing memory usage significantly. Valid values are "off","safe" and "all". When using `safe` mode, ConfigMaps and Secrets are excluded from caching. When using `all` mode, ConfigMaps and Secrets are cached and require the `app.kubernetes.io/managed-by: grafana-operator` label. |
7172
| env | list | `[]` | Additional environment variables |
7273
| extraObjects | list | `[]` | Array of extra K8s objects to deploy |
7374
| fullnameOverride | string | `""` | Overrides the fully qualified app name. |

deploy/helm/grafana-operator/templates/deployment.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ spec:
6060
{{ else }}
6161
value: {{quote .Values.watchLabelSelectors }}
6262
{{- end }}
63+
- name: ENFORCE_CACHE_LABELS
64+
{{- if .Values.enforceCacheLabels }}
65+
value: {{quote .Values.enforceCacheLabels }}
66+
{{ else }}
67+
value: "off"
68+
{{- end }}
6369
- name: CLUSTER_DOMAIN
6470
{{- if and .Values.clusterDomain (eq .Values.clusterDomain "") }}
6571
value: ""

deploy/helm/grafana-operator/values.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ watchNamespaceSelector: ""
2424
watchLabelSelectors: ""
2525
# watchLabelSelectors: "partition in (customerA, customerB),environment!=qa"
2626

27+
# -- Sets the `ENFORCE_CACHE_LABELS` environment variable,
28+
# Enables the enforcment of cache labels, reducing memory usage significantly.
29+
# Valid values are "off","safe" and "all".
30+
# When using `safe` mode, ConfigMaps and Secrets are excluded from caching.
31+
# When using `all` mode, ConfigMaps and Secrets are cached and require the `app.kubernetes.io/managed-by: grafana-operator` label.
32+
enforceCacheLabels: "off"
33+
2734
# -- Sets the `CLUSTER_DOMAIN` environment variable,
2835
# it defines how internal Kubernetes services managed by the operator are addressed.
2936
# By default, this is empty, and internal services are addressed without a cluster domain specified, i.e., a

main.go

+66-20
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,29 @@ import (
2727
"strings"
2828
"syscall"
2929

30+
"github.com/KimMachineGun/automemlimit/memlimit"
3031
"go.uber.org/automaxprocs/maxprocs"
3132
uberzap "go.uber.org/zap"
32-
"go.uber.org/zap/zapcore"
33-
"sigs.k8s.io/controller-runtime/pkg/client"
34-
"sigs.k8s.io/controller-runtime/pkg/webhook"
35-
36-
"k8s.io/apimachinery/pkg/fields"
37-
"k8s.io/apimachinery/pkg/labels"
38-
39-
corev1 "k8s.io/api/core/v1"
40-
"sigs.k8s.io/controller-runtime/pkg/cache"
4133

42-
"github.com/KimMachineGun/automemlimit/memlimit"
4334
"github.com/go-logr/logr"
35+
"go.uber.org/zap/zapcore"
36+
4437
routev1 "github.com/openshift/api/route/v1"
38+
v1 "k8s.io/api/apps/v1"
39+
corev1 "k8s.io/api/core/v1"
40+
networkingv1 "k8s.io/api/networking/v1"
4541
discovery2 "k8s.io/client-go/discovery"
4642
"k8s.io/client-go/rest"
43+
"sigs.k8s.io/controller-runtime/pkg/cache"
44+
"sigs.k8s.io/controller-runtime/pkg/client"
45+
"sigs.k8s.io/controller-runtime/pkg/webhook"
4746

4847
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
4948
// to ensure that exec-entrypoint and run can make use of them.
5049
_ "k8s.io/client-go/plugin/pkg/client/auth"
5150

52-
"github.com/grafana/grafana-operator/v5/controllers/model"
51+
"k8s.io/apimachinery/pkg/fields"
52+
"k8s.io/apimachinery/pkg/labels"
5353
"k8s.io/apimachinery/pkg/runtime"
5454
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
5555
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -61,6 +61,7 @@ import (
6161
grafanav1beta1 "github.com/grafana/grafana-operator/v5/api/v1beta1"
6262
"github.com/grafana/grafana-operator/v5/controllers"
6363
"github.com/grafana/grafana-operator/v5/controllers/autodetect"
64+
"github.com/grafana/grafana-operator/v5/controllers/model"
6465
"github.com/grafana/grafana-operator/v5/embeds"
6566
//+kubebuilder:scaffold:imports
6667
)
@@ -77,6 +78,8 @@ const (
7778
// eg: 'partition in (customerA, customerB),environment!=qa'
7879
// If empty of undefined, the operator will watch all CRs.
7980
watchLabelSelectorsEnvVar = "WATCH_LABEL_SELECTORS"
81+
// Opt in to enable new experimental cache limits by setting this to `safe` or `all`. Valid values are `off`, `safe` and `all`
82+
enforceCacheLabelsEnvVar = "ENFORCE_CACHE_LABELS"
8083
// clusterDomainEnvVar is the constant for env variable CLUSTER_DOMAIN, which specifies the cluster domain to use for addressing.
8184
// By default, this is empty, and internal services are addressed without a cluster domain specified, i.e., a
8285
// relative domain name that will resolve regardless of if a custom domain is configured for the cluster. If you
@@ -99,7 +102,7 @@ func init() {
99102
//+kubebuilder:scaffold:scheme
100103
}
101104

102-
func main() {
105+
func main() { // nolint:gocyclo
103106
var metricsAddr string
104107
var enableLeaderElection bool
105108
var probeAddr string
@@ -135,12 +138,25 @@ func main() {
135138
setupLog.Error(err, "failed to adjust GOMAXPROCS")
136139
}
137140

141+
// Detect environment variables
138142
watchNamespace, _ := os.LookupEnv(watchNamespaceEnvVar)
139143
watchNamespaceSelector, _ := os.LookupEnv(watchNamespaceEnvSelector)
140144
watchLabelSelectors, _ := os.LookupEnv(watchLabelSelectorsEnvVar)
141145
if watchLabelSelectors != "" {
142146
setupLog.Info(fmt.Sprintf("sharding is enabled via %s=%s. Beware: Always label Grafana CRs before enabling to ensure labels are inherited. Existing Secrets/ConfigMaps referenced in CRs also need to be labeled to continue working.", watchLabelSelectorsEnvVar, watchLabelSelectors))
143147
}
148+
149+
enforceCacheLabelsLevel, _ := os.LookupEnv(enforceCacheLabelsEnvVar)
150+
enforceCacheLabels := false
151+
switch enforceCacheLabelsLevel {
152+
case "safe", "all":
153+
enforceCacheLabels = true
154+
setupLog.Info("label restrictions for cached resources are active", "level", enforceCacheLabelsLevel)
155+
case "off", "":
156+
default:
157+
setupLog.Error(fmt.Errorf("invalid value %s for %s", enforceCacheLabelsLevel, enforceCacheLabelsEnvVar), "falling back to disabling cache enforcement")
158+
}
159+
144160
clusterDomain, _ := os.LookupEnv(clusterDomainEnvVar)
145161

146162
// Fetch k8s api credentials and detect platform
@@ -157,13 +173,9 @@ func main() {
157173
}
158174

159175
controllerOptions := ctrl.Options{
160-
Scheme: scheme,
161-
Metrics: metricsserver.Options{
162-
BindAddress: metricsAddr,
163-
},
164-
WebhookServer: webhook.NewServer(webhook.Options{
165-
Port: 9443,
166-
}),
176+
Scheme: scheme,
177+
Metrics: metricsserver.Options{BindAddress: metricsAddr},
178+
WebhookServer: webhook.NewServer(webhook.Options{Port: 9443}),
167179
HealthProbeBindAddress: probeAddr,
168180
LeaderElection: enableLeaderElection,
169181
LeaderElectionID: "f75f3bba.integreatly.org",
@@ -175,6 +187,39 @@ func main() {
175187
setupLog.Error(err, fmt.Sprintf("unable to parse %s", watchLabelSelectorsEnvVar))
176188
os.Exit(1) //nolint
177189
}
190+
191+
if enforceCacheLabels {
192+
var cacheLabelConfig cache.ByObject
193+
if watchLabelSelectors != "" {
194+
// When sharding, limit cache according to shard labels
195+
cacheLabelConfig = cache.ByObject{Label: labelSelectors}
196+
setupLog.Info(fmt.Sprintf("sharding is enabled via %s=%s. Beware: Always label Grafana CRs before enabling to ensure labels are inherited. Existing Secrets/ConfigMaps referenced in CRs also need to be labeled to continue working.", watchLabelSelectorsEnvVar, watchLabelSelectors))
197+
} else {
198+
// Otherwise limit it to managed-by label
199+
cacheLabelConfig = cache.ByObject{Label: labels.SelectorFromSet(model.CommonLabels)}
200+
}
201+
202+
// ConfigMaps and secrets stay fully cached until we implement support for bypassing the cache for referenced objects
203+
controllerOptions.Cache.ByObject = map[client.Object]cache.ByObject{
204+
&v1.Deployment{}: cacheLabelConfig,
205+
&corev1.Service{}: cacheLabelConfig,
206+
&corev1.ServiceAccount{}: cacheLabelConfig,
207+
&networkingv1.Ingress{}: cacheLabelConfig,
208+
&corev1.PersistentVolumeClaim{}: cacheLabelConfig,
209+
&corev1.ConfigMap{}: cacheLabelConfig, // Matching just labeled ConfigMaps and Secrets greatly reduces cache size
210+
&corev1.Secret{}: cacheLabelConfig, // Omitting labels or supporting custom labels would require changes in Grafana Reconciler
211+
}
212+
if isOpenShift {
213+
controllerOptions.Cache.ByObject[&routev1.Route{}] = cacheLabelConfig
214+
}
215+
if enforceCacheLabelsLevel == "safe" {
216+
controllerOptions.Client.Cache = &client.CacheOptions{
217+
DisableFor: []client.Object{&corev1.ConfigMap{}, &corev1.Secret{}},
218+
}
219+
}
220+
}
221+
222+
// Determine Operator scope
178223
switch {
179224
case strings.Contains(watchNamespace, ","):
180225
// multi namespace scoped
@@ -185,7 +230,7 @@ func main() {
185230
controllerOptions.Cache.DefaultNamespaces = getNamespaceConfig(watchNamespace, labelSelectors)
186231
setupLog.Info("operator running in namespace scoped mode", "namespace", watchNamespace)
187232
case strings.Contains(watchNamespaceSelector, ":"):
188-
// namespace scoped
233+
// multi namespace scoped
189234
controllerOptions.Cache.DefaultNamespaces = getNamespaceConfigSelector(restConfig, watchNamespaceSelector, labelSelectors)
190235
setupLog.Info("operator running in namespace scoped mode using namespace selector", "namespace", watchNamespace)
191236

@@ -204,6 +249,7 @@ func main() {
204249
os.Exit(1) //nolint
205250
}
206251

252+
// Register controllers
207253
if err = (&controllers.GrafanaReconciler{
208254
Client: mgr.GetClient(),
209255
Scheme: mgr.GetScheme(),

0 commit comments

Comments
 (0)