Skip to content

Commit 37cade0

Browse files
authored
fix(log): add custom error handler for Kubernetes API errors (#1024)
# Description This pull request includes changes to improve error handling and logging in the Kubernetes watcher and to simplify error checking in the endpoint reconciler. The most important changes include adding a custom error handler for the Kubernetes watcher, importing necessary packages, and simplifying error handling logic. Improvements to error handling and logging: * [`pkg/k8s/watcher_linux.go`](diffhunk://#diff-1769e0320129167654a2a0d5f382b63fb459aadf221d3ba04df1f1a56188f6d2R105-R123): Added a custom error handler `retinaK8sErrorHandler` to log specific Kubernetes API server errors and tag them for easier identification. * [`pkg/k8s/watcher_linux.go`](diffhunk://#diff-1769e0320129167654a2a0d5f382b63fb459aadf221d3ba04df1f1a56188f6d2R23-R29): Registered the custom error handler in the `init` function to ensure it is used by the watcher. Code simplification: * [`pkg/controllers/operator/cilium-crds/endpoint/endpoint_controller.go`](diffhunk://#diff-0a6e7a396be9617c3c31afb9cf9f740b75e645a533833d049726db8321d13df9L536-R536): Simplified the error checking logic in `handlePodUpsert` by removing redundant error check. ## Checklist - [X] I have read the [contributing documentation](https://retina.sh/docs/contributing). - [X] I signed and signed-off the commits (`git commit -S -s ...`). See [this documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) on signing commits. - [X] I have correctly attributed the author(s) of the code. - [X] I have tested the changes locally. - [X] I have followed the project's style guidelines. - [X] I have updated the documentation, if necessary. - [X] I have added tests, if applicable. ## Testing I removed permission for retina agent to read nodes and services. I can see the completer error as as our custom message coming from retina. ``` time="2024-11-26T16:05:33Z" level=error msg="Potentially Network Error coming from K8s API Server failing to watch Services" actualError="pkg/mod/k8s.io/[email protected]/tools/cache/reflector.go:232: Failed to watch *v1.Service: failed to list *v1.Service: services is forbidden: User \"system:serviceaccount:kube-system:retina-agent\" cannot list resource \"services\" in API group \"\" at the cluster scope" subsys=k8s-watcher ``` --- Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information on how to contribute to this project.
1 parent 31bf97f commit 37cade0

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

pkg/controllers/operator/cilium-crds/endpoint/endpoint_controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ func (r *endpointReconciler) handlePodUpsert(ctx context.Context, newPEP *PodEnd
533533
// May end up getting another endpoint ID below if we try to create the CEP below.
534534
// No downside to this.
535535

536-
if !k8serrors.IsNotFound(err) && err != nil {
536+
if !k8serrors.IsNotFound(err) {
537537
r.l.WithError(err).WithFields(logrus.Fields{
538538
"podKey": newPEP.key.String(),
539539
"pep": newPEP,

pkg/k8s/watcher_linux.go

+37
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@ package k8s
22

33
import (
44
"context"
5+
"strings"
56
"sync"
67
"time"
78

9+
"k8s.io/apimachinery/pkg/util/runtime"
10+
811
agentK8s "github.com/cilium/cilium/daemon/k8s"
912
"github.com/cilium/cilium/pkg/hive/cell"
1013
"github.com/cilium/cilium/pkg/ipcache"
@@ -15,8 +18,17 @@ import (
1518
"github.com/cilium/cilium/pkg/logging"
1619
"github.com/cilium/cilium/pkg/logging/logfields"
1720
"github.com/cilium/cilium/pkg/option"
21+
"github.com/sirupsen/logrus"
1822
)
1923

24+
func init() {
25+
// Register custom error handler for the watcher
26+
// nolint:reassign // this is the only way to set the error handler
27+
runtime.ErrorHandlers = []func(error){
28+
k8sWatcherErrorHandler,
29+
}
30+
}
31+
2032
const (
2133
K8sAPIGroupCiliumEndpointV2 = "cilium/v2::CiliumEndpoint"
2234
K8sAPIGroupServiceV1Core = "core/v1::Service"
@@ -92,3 +104,28 @@ func Start(ctx context.Context, k *watchers.K8sWatcher) {
92104
<-syncdCache
93105
logger.Info("Kubernetes watcher synced")
94106
}
107+
108+
// retinaK8sErrorHandler is a custom error handler for the watcher
109+
// that logs the error and tags the error to easily identify
110+
func k8sWatcherErrorHandler(e error) {
111+
errStr := e.Error()
112+
logError := func(er, r string) {
113+
logger.WithFields(logrus.Fields{
114+
"underlyingError": er,
115+
"resource": r,
116+
}).Error("Error watching k8s resource")
117+
}
118+
119+
switch {
120+
case strings.Contains(errStr, "Failed to watch *v1.Node"):
121+
logError(errStr, "v1.Node")
122+
case strings.Contains(errStr, "Failed to watch *v2.CiliumEndpoint"):
123+
logError(errStr, "v2.CiliumEndpoint")
124+
case strings.Contains(errStr, "Failed to watch *v1.Service"):
125+
logError(errStr, "v1.Service")
126+
case strings.Contains(errStr, "Failed to watch *v2.CiliumNode"):
127+
logError(errStr, "v2.CiliumNode")
128+
default:
129+
k8s.K8sErrorHandler(e)
130+
}
131+
}

0 commit comments

Comments
 (0)