diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 0e0cfdfd6bc..e7a395b868c 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -1,7 +1,7 @@ # yaml-language-server: $schema=https://goreleaser.com/static/schema.json # vim: set ts=2 sw=2 tw=0 fo=cnqoj -version: 1 +version: 2 before: hooks: @@ -22,7 +22,7 @@ builds: - windows - darwin ldflags: - - -X github.com/microsoft/retina/cli/cmd.Version=v{{.Version}} + - -X github.com/microsoft/retina/internal/buildinfo.Version=v{{.Version}} main: cli/main.go archives: diff --git a/cli/cmd/shell.go b/cli/cmd/shell.go new file mode 100644 index 00000000000..b388c720eb7 --- /dev/null +++ b/cli/cmd/shell.go @@ -0,0 +1,173 @@ +package cmd + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/microsoft/retina/internal/buildinfo" + "github.com/microsoft/retina/shell" + "github.com/spf13/cobra" + v1 "k8s.io/api/core/v1" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/resource" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/scheme" + "k8s.io/kubectl/pkg/util/templates" +) + +var ( + configFlags *genericclioptions.ConfigFlags + matchVersionFlags *cmdutil.MatchVersionFlags + retinaShellImageRepo string + retinaShellImageVersion string + mountHostFilesystem bool + allowHostFilesystemWrite bool + hostPID bool + capabilities []string + timeout time.Duration +) + +var ( + // AKS requires clusters to allow access to MCR, so use this repository by default. + defaultRetinaShellImageRepo = "mcr.microsoft.com/containernetworking/retina-shell" + + // Default version is the same as CLI version, set at link time. + defaultRetinaShellImageVersion = buildinfo.Version + + defaultTimeout = 30 * time.Second + + errMissingRequiredRetinaShellImageVersionArg = errors.New("missing required --retina-shell-image-version") + errUnsupportedResourceType = errors.New("unsupported resource type") +) + +var shellCmd = &cobra.Command{ + Use: "shell (NODE | TYPE[[.VERSION].GROUP]/NAME)", + Short: "[EXPERIMENTAL] Interactively debug a node or pod", + Long: templates.LongDesc(` + [EXPERIMENTAL] This is an experimental command. The flags and behavior may change in the future. + + Start a shell with networking tools in a node or pod for adhoc debugging. + + * For nodes, this creates a pod on the node in the root network namespace. + * For pods, this creates an ephemeral container inside the pod's network namespace. + + You can override the default image used for the shell container with either + CLI flags (--retina-shell-image-repo and --retina-shell-image-version) or + environment variables (RETINA_SHELL_IMAGE_REPO and RETINA_SHELL_IMAGE_VERSION). + CLI flags take precedence over env vars. +`), + + Example: templates.Examples(` + # start a shell in a node + kubectl retina shell node0001 + + # start a shell in a node, with debug pod in kube-system namespace + kubectl retina shell -n kube-system node0001 + + # start a shell as an ephemeral container inside an existing pod + kubectl retina shell -n kube-system pod/coredns-d459997b4-7cpzx + + # start a shell in a node, mounting the host filesystem to /host with ability to chroot + kubectl retina shell node001 --mount-host-filesystem --capabilities SYS_CHROOT + + # start a shell in a node, with NET_RAW and NET_ADMIN capabilities + # (required for iptables and tcpdump) + kubectl retina shell node001 --capabilities NET_RAW,NET_ADMIN +`), + Args: cobra.ExactArgs(1), + RunE: func(_ *cobra.Command, args []string) error { + // retinaShellImageVersion defaults to the CLI version, but that might not be set if the CLI is built without -ldflags. + if retinaShellImageVersion == "" { + return errMissingRequiredRetinaShellImageVersionArg + } + + namespace, explicitNamespace, err := matchVersionFlags.ToRawKubeConfigLoader().Namespace() + if err != nil { + return fmt.Errorf("error retrieving namespace arg: %w", err) + } + + // This interprets the first arg as either a node or pod (same as kubectl): + // "node001" -> node + // "node/node001" -> node + // "pod/example-7cpzx" -> pod + r := resource.NewBuilder(configFlags). + WithScheme(scheme.Scheme, scheme.Scheme.PrioritizedVersionsAllGroups()...). + FilenameParam(explicitNamespace, &resource.FilenameOptions{}). + NamespaceParam(namespace).DefaultNamespace().ResourceNames("nodes", args[0]). + Do() + if rerr := r.Err(); rerr != nil { + return fmt.Errorf("error constructing resource builder: %w", rerr) + } + + restConfig, err := matchVersionFlags.ToRESTConfig() + if err != nil { + return fmt.Errorf("error constructing REST config: %w", err) + } + + config := shell.Config{ + RestConfig: restConfig, + RetinaShellImage: fmt.Sprintf("%s:%s", retinaShellImageRepo, retinaShellImageVersion), + MountHostFilesystem: mountHostFilesystem, + AllowHostFilesystemWrite: allowHostFilesystemWrite, + HostPID: hostPID, + Capabilities: capabilities, + Timeout: timeout, + } + + return r.Visit(func(info *resource.Info, err error) error { + if err != nil { + return err + } + + switch obj := info.Object.(type) { + case *v1.Node: + podDebugNamespace := namespace + nodeName := obj.Name + return shell.RunInNode(config, nodeName, podDebugNamespace) + case *v1.Pod: + return shell.RunInPod(config, obj.Namespace, obj.Name) + default: + gvk := obj.GetObjectKind().GroupVersionKind() + return fmt.Errorf("unsupported resource %s/%s: %w", gvk.GroupVersion(), gvk.Kind, errUnsupportedResourceType) + } + }) + }, +} + +func init() { + Retina.AddCommand(shellCmd) + shellCmd.PersistentPreRun = func(cmd *cobra.Command, _ []string) { + // Avoid printing full usage message if the command exits with an error. + cmd.SilenceUsage = true + cmd.SilenceErrors = true + + // Allow setting image repo and version via environment variables (CLI flags still take precedence). + if !cmd.Flags().Changed("retina-shell-image-repo") { + if envRepo := os.Getenv("RETINA_SHELL_IMAGE_REPO"); envRepo != "" { + retinaShellImageRepo = envRepo + } + } + if !cmd.Flags().Changed("retina-shell-image-version") { + if envVersion := os.Getenv("RETINA_SHELL_IMAGE_VERSION"); envVersion != "" { + retinaShellImageVersion = envVersion + } + } + } + shellCmd.Flags().StringVar(&retinaShellImageRepo, "retina-shell-image-repo", defaultRetinaShellImageRepo, "The container registry repository for the image to use for the shell container") + shellCmd.Flags().StringVar(&retinaShellImageVersion, "retina-shell-image-version", defaultRetinaShellImageVersion, "The version (tag) of the image to use for the shell container") + shellCmd.Flags().BoolVarP(&mountHostFilesystem, "mount-host-filesystem", "m", false, "Mount the host filesystem to /host. Applies only to nodes, not pods.") + shellCmd.Flags().BoolVarP(&allowHostFilesystemWrite, "allow-host-filesystem-write", "w", false, + "Allow write access to the host filesystem. Implies --mount-host-filesystem. Applies only to nodes, not pods.") + shellCmd.Flags().BoolVar(&hostPID, "host-pid", false, "Set HostPID on the shell container. Applies only to nodes, not pods.") + shellCmd.Flags().StringSliceVarP(&capabilities, "capabilities", "c", []string{}, "Add capabilities to the shell container") + shellCmd.Flags().DurationVar(&timeout, "timeout", defaultTimeout, "The maximum time to wait for the shell container to start") + + // configFlags and matchVersion flags are used to load kubeconfig. + // This uses the same mechanism as `kubectl debug` to connect to apiserver and attach to containers. + configFlags = genericclioptions.NewConfigFlags(true) + configFlags.AddFlags(shellCmd.PersistentFlags()) + matchVersionFlags = cmdutil.NewMatchVersionFlags(configFlags) + matchVersionFlags.AddFlags(shellCmd.PersistentFlags()) +} diff --git a/docs/06-Troubleshooting/shell.md b/docs/06-Troubleshooting/shell.md new file mode 100644 index 00000000000..11a32e08680 --- /dev/null +++ b/docs/06-Troubleshooting/shell.md @@ -0,0 +1,185 @@ +# Shell TSG + +**EXPERIMENTAL: `retina shell` is an experimental feature, so the flags and behavior may change in future versions.** + +The `retina shell` command allows you to start an interactive shell on a Kubernetes node or pod. This runs a container image with many common networking tools installed (`ping`, `curl`, etc.). + +## Testing connectivity + +Start a shell on a node or inside a pod + +```bash +# To start a shell in a node (root network namespace): +kubectl retina shell aks-nodepool1-15232018-vmss000001 + +# To start a shell inside a pod (pod network namespace): +kubectl retina shell -n kube-system pods/coredns-d459997b4-7cpzx +``` + +Check connectivity using `ping`: + +```text +root [ / ]# ping 10.224.0.4 +PING 10.224.0.4 (10.224.0.4) 56(84) bytes of data. +64 bytes from 10.224.0.4: icmp_seq=1 ttl=64 time=0.964 ms +64 bytes from 10.224.0.4: icmp_seq=2 ttl=64 time=1.13 ms +64 bytes from 10.224.0.4: icmp_seq=3 ttl=64 time=0.908 ms +64 bytes from 10.224.0.4: icmp_seq=4 ttl=64 time=1.07 ms +64 bytes from 10.224.0.4: icmp_seq=5 ttl=64 time=1.01 ms + +--- 10.224.0.4 ping statistics --- +5 packets transmitted, 5 received, 0% packet loss, time 4022ms +rtt min/avg/max/mdev = 0.908/1.015/1.128/0.077 ms +``` + +Check DNS resolution using `dig`: + +```text +root [ / ]# dig example.com +short +93.184.215.14 +``` + +The tools `nslookup` and `drill` are also available if you prefer those. + +Check connectivity to apiserver using `nc` and `curl`: + +```text +root [ / ]# nc -zv 10.0.0.1 443 +Ncat: Version 7.95 ( https://nmap.org/ncat ) +Ncat: Connected to 10.0.0.1:443. +Ncat: 0 bytes sent, 0 bytes received in 0.06 seconds. + +root [ / ]# curl -k https://10.0.0.1 +{ + "kind": "Status", + "apiVersion": "v1", + "metadata": {}, + "status": "Failure", + "message": "Unauthorized", + "reason": "Unauthorized", + "code": 401 +} +``` + +### nftables and iptables + +Accessing nftables and iptables rules requires `NET_RAW` and `NET_ADMIN` capabilities. + +```bash +kubectl retina shell aks-nodepool1-15232018-vmss000002 --capabilities NET_ADMIN,NET_RAW +``` + +Then you can run `iptables` and `nft`: + +```text +root [ / ]# iptables -nvL | head -n 2 +Chain INPUT (policy ACCEPT 1191K packets, 346M bytes) + pkts bytes target prot opt in out source destination +root [ / ]# nft list ruleset | head -n 2 +# Warning: table ip filter is managed by iptables-nft, do not touch! +table ip filter { +``` + +**If you see the error "Operation not permitted (you must be root)", check that your `kubectl retina shell` command sets `--capabilities NET_RAW,NET_ADMIN`.** + +`iptables` in the shell image uses `iptables-legacy`, which may or may not match the configuration on the node. For example, Ubuntu maps `iptables` to `iptables-nft`. To use the exact same `iptables` binary as installed on the node, you will need to `chroot` into the host filesystem (see below). + +## Accessing the host filesystem + +On nodes, you can mount the host filesystem to `/host`: + +```bash +kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem +``` + +This mounts the host filesystem (`/`) to `/host` in the debug pod: + +```text +root [ / ]# ls /host +NOTICE.txt bin boot dev etc home lib lib64 libx32 lost+found media mnt opt proc root run sbin srv sys tmp usr var +``` + +The host filesystem is mounted read-only by default. If you need write access, use the `--allow-host-filesystem-write` flag. + +Symlinks between files on the host filesystem may not resolve correctly. If you see "No such file or directory" errors for symlinks, try following the instructions below to `chroot` to the host filesystem. + +## Chroot to the host filesystem + +`chroot` requires the `SYS_CHROOT` capability: + +```bash +kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT +``` + +Then you can use `chroot` to switch to start a shell inside the host filesystem: + +```text +root [ / ]# chroot /host bash +root@aks-nodepool1-15232018-vmss000002:/# cat /etc/resolv.conf | tail -n 2 +nameserver 168.63.129.16 +search shncgv2kgepuhm1ls1dwgholsd.cx.internal.cloudapp.net +``` + +`chroot` allows you to: + +* Execute binaries installed on the node. +* Resolve symlinks that point to files in the host filesystem (such as /etc/resolv.conf -> /run/systemd/resolve/resolv.conf) +* Use `sysctl` to view or modify kernel parameters. +* Use `journalctl` to view systemd unit and kernel logs. +* Use `ip netns` to view network namespaces. (However, `ip netns exec` does not work.) + +## Systemctl + +`systemctl` commands require both `chroot` to the host filesystem and host PID: + +```bash +kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT --host-pid +``` + +Then `chroot` to the host filesystem and run `systemctl status`: + +```text +root [ / ]# chroot /host systemctl status | head -n 2 +● aks-nodepool1-15232018-vmss000002 + State: running +``` + +**If `systemctl` shows an error "Failed to connect to bus: No data available", check that the `retina shell` command has `--host-pid` set and that you have chroot'd to /host.** + +## Troubleshooting + +### Timeouts + +If `kubectl retina shell` fails with a timeout error, then: + +1. Increase the timeout by setting `--timeout` flag. +2. Check the pod using `kubectl describe pod` to determine why retina shell is failing to start. + +Example: + +```bash +kubectl retina shell --timeout 10m node001 # increase timeout to 10 minutes +``` + +### Firewalls and ImagePullBackoff + +Some clusters are behind a firewall that blocks pulling the retina-shell image. To workaround this: + +1. Replicate the retina-shell images to a container registry accessible from within the cluster. +2. Override the image used by Retina CLI with the environment variable `RETINA_SHELL_IMAGE_REPO`. + +Example: + +```bash +export RETINA_SHELL_IMAGE_REPO="example.azurecr.io/retina/retina-shell" +export RETINA_SHELL_IMAGE_VERSION=v0.0.1 # optional, if not set defaults to the Retina CLI version. +kubectl retina shell node0001 # this will use the image "example.azurecr.io/retina/retina-shell:v0.0.1" +``` + +## Limitations + +* Windows nodes and pods are not yet supported. +* `bpftool` and `bpftrace` are not supported. +* The shell image link `iptables` commands to `iptables-legacy`, even if the node itself links to `iptables-nft`. +* `nsenter` is not supported. +* `ip netns` will not work without `chroot` to the host filesystem. diff --git a/go.mod b/go.mod index 3ca7fb8f643..738c8962194 100644 --- a/go.mod +++ b/go.mod @@ -100,9 +100,11 @@ require ( github.com/evanphx/json-patch v5.9.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect + github.com/fatih/camelcase v1.0.0 // indirect github.com/fatih/color v1.16.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fvbommel/sortorder v1.1.0 // indirect github.com/go-errors/errors v1.4.2 // indirect github.com/go-gorp/gorp/v3 v3.1.0 // indirect github.com/go-jose/go-jose/v3 v3.0.3 // indirect @@ -171,7 +173,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/locker v1.0.1 // indirect - github.com/moby/moby v26.0.0+incompatible // indirect + github.com/moby/moby v26.1.0+incompatible // indirect github.com/moby/spdystream v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.1 // indirect github.com/moby/sys/sequential v0.5.0 // indirect @@ -226,9 +228,9 @@ require ( go.starlark.net v0.0.0-20230814145427-12f4cb8177e4 // indirect go.uber.org/dig v1.17.1 // indirect go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect - golang.org/x/crypto v0.28.0 // indirect + golang.org/x/crypto v0.30.0 // indirect golang.org/x/mod v0.21.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.26.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect @@ -259,11 +261,11 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.10.0 go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.9.0 - golang.org/x/sys v0.27.0 - golang.org/x/term v0.25.0 // indirect + golang.org/x/net v0.32.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.10.0 + golang.org/x/sys v0.28.0 + golang.org/x/term v0.27.0 // indirect google.golang.org/protobuf v1.35.2 gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect @@ -294,7 +296,7 @@ require ( github.com/aws/aws-sdk-go-v2 v1.32.6 github.com/aws/aws-sdk-go-v2/config v1.28.6 github.com/aws/aws-sdk-go-v2/credentials v1.17.47 - github.com/aws/aws-sdk-go-v2/service/s3 v1.70.0 + github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 github.com/cilium/cilium v1.16.0-pre.1.0.20240403152809-b9853ecbcaeb github.com/cilium/ebpf v0.16.0 @@ -315,7 +317,7 @@ require ( github.com/onsi/gomega v1.36.0 github.com/pkg/errors v0.9.1 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.60.1 + github.com/prometheus/common v0.61.0 github.com/safchain/ethtool v0.5.9 github.com/sirupsen/logrus v1.9.3 github.com/spf13/viper v1.19.0 diff --git a/go.sum b/go.sum index 0d1c352416e..10026d81a3c 100644 --- a/go.sum +++ b/go.sum @@ -149,8 +149,8 @@ github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 h1:50+XsN70R github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6/go.mod h1:WqgLmwY7so32kG01zD8CPTJWVWM+TzJoOVHwTg4aPug= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 h1:BbGDtTi0T1DYlmjBiCr/le3wzhA37O8QTC5/Ab8+EXk= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6/go.mod h1:hLMJt7Q8ePgViKupeymbqI0la+t9/iYFBjxQCFwuAwI= -github.com/aws/aws-sdk-go-v2/service/s3 v1.70.0 h1:HrHFR8RoS4l4EvodRMFcJMYQ8o3UhmALn2nbInXaxZA= -github.com/aws/aws-sdk-go-v2/service/s3 v1.70.0/go.mod h1:sT/iQz8JK3u/5gZkT+Hmr7GzVZehUMkRZpOaAwYXeGY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 h1:nyuzXooUNJexRT0Oy0UQY6AhOzxPxhtt4DcBIHyCnmw= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0/go.mod h1:sT/iQz8JK3u/5gZkT+Hmr7GzVZehUMkRZpOaAwYXeGY= github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 h1:rLnYAfXQ3YAccocshIH5mzNNwZBkBo+bP6EhIxak6Hw= github.com/aws/aws-sdk-go-v2/service/sso v1.24.7/go.mod h1:ZHtuQJ6t9A/+YDuxOLnbryAmITtr8UysSny3qcyvJTc= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 h1:JnhTZR3PiYDNKlXy50/pNeix9aGMo6lLpXwJ1mw8MD4= @@ -295,6 +295,8 @@ github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0 github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM= github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= +github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8= +github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= @@ -314,6 +316,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fvbommel/sortorder v1.1.0 h1:fUmoe+HLsBTctBDoaBwpQo5N+nrCp8g/BjKb/6ZQmYw= +github.com/fvbommel/sortorder v1.1.0/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-chi/chi v4.1.2+incompatible h1:fGFk2Gmi/YKXk0OmGfBh0WgmN3XB8lVnEyNz34tQRec= github.com/go-chi/chi v4.1.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= @@ -664,8 +668,8 @@ github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3N github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= -github.com/moby/moby v26.0.0+incompatible h1:2n9/cIWkxiEI1VsWgTGgXhxIWUbv42PyxEP9L+RReC0= -github.com/moby/moby v26.0.0+incompatible/go.mod h1:fDXVQ6+S340veQPv35CzDahGBmHsiclFwfEygB/TWMc= +github.com/moby/moby v26.1.0+incompatible h1:mjepCwMH0KpCgPvrXjqqyCeTCHgzO7p9TwZ2nQMI2qU= +github.com/moby/moby v26.1.0+incompatible/go.mod h1:fDXVQ6+S340veQPv35CzDahGBmHsiclFwfEygB/TWMc= github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= @@ -761,8 +765,8 @@ github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQy github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= +github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= @@ -930,8 +934,8 @@ golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.30.0 h1:RwoQn3GkWiMkzlX562cLB7OxWvjH1L8xutO2WoJcRoY= +golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= @@ -980,12 +984,12 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -995,8 +999,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1060,8 +1064,9 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1069,8 +1074,8 @@ golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -1080,8 +1085,8 @@ golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/pkg/managers/controllermanager/controllermanager.go b/pkg/managers/controllermanager/controllermanager.go index 8ca2ddf17f3..0ee6b46479b 100644 --- a/pkg/managers/controllermanager/controllermanager.go +++ b/pkg/managers/controllermanager/controllermanager.go @@ -12,7 +12,6 @@ import ( "github.com/microsoft/retina/pkg/log" pm "github.com/microsoft/retina/pkg/managers/pluginmanager" sm "github.com/microsoft/retina/pkg/managers/servermanager" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/pubsub" "github.com/microsoft/retina/pkg/telemetry" "go.uber.org/zap" @@ -46,15 +45,9 @@ func NewControllerManager(conf *kcfg.Config, kubeclient kubernetes.Interface, te factory.WaitForCacheSync(wait.NeverStop) } - // enabledPlugins := {api.PluginName(conf.EnabledPlugin[])} - enabledPlugins := []api.PluginName{} - for _, pluginName := range conf.EnabledPlugin { - enabledPlugins = append(enabledPlugins, api.PluginName(pluginName)) - } pMgr, err := pm.NewPluginManager( conf, tel, - enabledPlugins..., ) if err != nil { return nil, err diff --git a/pkg/managers/controllermanager/controllermanager_test.go b/pkg/managers/controllermanager/controllermanager_test.go index 465ff087ad6..841dda9dd66 100644 --- a/pkg/managers/controllermanager/controllermanager_test.go +++ b/pkg/managers/controllermanager/controllermanager_test.go @@ -11,8 +11,7 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" pm "github.com/microsoft/retina/pkg/managers/pluginmanager" - "github.com/microsoft/retina/pkg/plugin/api" - "github.com/microsoft/retina/pkg/plugin/api/mock" + plugin "github.com/microsoft/retina/pkg/plugin/mock" "github.com/microsoft/retina/pkg/telemetry" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -82,15 +81,15 @@ func TestControllerPluginManagerStartFail(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) pluginName := "mockplugin" - cfg := &kcfg.Config{ MetricsInterval: timeInter, EnablePodLevel: true, + EnabledPlugin: []string{pluginName}, } - mgr, err := pm.NewPluginManager(cfg, telemetry.NewNoopTelemetry(), api.PluginName(pluginName)) + mgr, err := pm.NewPluginManager(cfg, telemetry.NewNoopTelemetry()) require.NoError(t, err, "Expected no error, instead got %+v", err) - mockPlugin := mock.NewMockPlugin(ctl) + mockPlugin := plugin.NewMockPlugin(ctl) mockPlugin.EXPECT().Generate(gomock.Any()).Return(nil).AnyTimes() mockPlugin.EXPECT().Compile(gomock.Any()).Return(nil).AnyTimes() mockPlugin.EXPECT().Stop().Return(nil).AnyTimes() @@ -98,7 +97,7 @@ func TestControllerPluginManagerStartFail(t *testing.T) { mockPlugin.EXPECT().Name().Return(pluginName).AnyTimes() mockPlugin.EXPECT().Start(gomock.Any()).Return(errors.New("test error")).AnyTimes() - mgr.SetPlugin(api.PluginName(pluginName), mockPlugin) + mgr.SetPlugin(pluginName, mockPlugin) cm.pluginManager = mgr err = cm.Init(context.Background()) diff --git a/pkg/managers/filtermanager/cache.go b/pkg/managers/filtermanager/cache.go index 0063d2affd9..8ce5751214b 100644 --- a/pkg/managers/filtermanager/cache.go +++ b/pkg/managers/filtermanager/cache.go @@ -8,7 +8,7 @@ import ( "sync" ) -var fc *filterCache +var fc = &filterCache{data: make(map[string]requests)} // requests maps a requestor to a list of request metadata. // Nested maps @@ -24,10 +24,7 @@ type filterCache struct { data map[string]requests } -func newCache() *filterCache { - if fc == nil { - fc = &filterCache{data: make(map[string]requests)} - } +func getCache() *filterCache { return fc } diff --git a/pkg/managers/filtermanager/cache_test.go b/pkg/managers/filtermanager/cache_test.go index 14f12bff439..87e7d8d512d 100644 --- a/pkg/managers/filtermanager/cache_test.go +++ b/pkg/managers/filtermanager/cache_test.go @@ -6,6 +6,7 @@ package filtermanager import ( "fmt" "net" + "sync" "testing" "time" @@ -13,7 +14,7 @@ import ( ) func Test_newCache(t *testing.T) { - f := newCache() + f := getCache() assert.NotNil(t, f) f.data["1.1.1.1"] = requests{ @@ -22,12 +23,12 @@ func Test_newCache(t *testing.T) { }, } - f2 := newCache() + f2 := getCache() assert.Equal(t, f, f2) } func Test_IPs(t *testing.T) { - f := newCache() + f := getCache() f.reset() ips := f.ips() @@ -45,7 +46,7 @@ func Test_IPs(t *testing.T) { } func Test_reset(t *testing.T) { - f := newCache() + f := getCache() assert.NotNil(t, f) f.data["1.1.1.1"] = requests{} @@ -54,30 +55,45 @@ func Test_reset(t *testing.T) { } func Test_hasKey(t *testing.T) { - f := newCache() + f := getCache() f.data["1.1.1.1"] = requests{} assert.True(t, f.hasKey(net.ParseIP("1.1.1.1"))) assert.False(t, f.hasKey(net.ParseIP("2.2.2.2"))) } func addIPsHelper() { - f := newCache() + f := getCache() ip1 := net.ParseIP("1.1.1.1") ip2 := net.ParseIP("2.2.2.2") - go f.addIP(ip1, "trace1", RequestMetadata{RuleID: "task1"}) - go f.addIP(ip1, "trace1", RequestMetadata{RuleID: "task2"}) - go f.addIP(ip1, "trace2", RequestMetadata{RuleID: "task3"}) - go f.addIP(ip2, "trace1", RequestMetadata{RuleID: "task1"}) + wg := sync.WaitGroup{} + wg.Add(4) + + go func() { + f.addIP(ip1, "trace1", RequestMetadata{RuleID: "task1"}) + wg.Done() + }() + go func() { + f.addIP(ip1, "trace1", RequestMetadata{RuleID: "task2"}) + wg.Done() + }() + go func() { + f.addIP(ip1, "trace2", RequestMetadata{RuleID: "task3"}) + wg.Done() + }() + go func() { + f.addIP(ip2, "trace1", RequestMetadata{RuleID: "task1"}) + wg.Done() + }() // Wait for goroutines to finish. - time.Sleep(1 * time.Second) + wg.Wait() } func Test_addIP(t *testing.T) { addIPsHelper() - f := newCache() + f := getCache() assert.Equal(t, 2, len(f.data)) expectedData := map[string]requests{ @@ -106,7 +122,7 @@ func Test_addIP(t *testing.T) { func Test_deleteIP(t *testing.T) { addIPsHelper() - f := newCache() + f := getCache() assert.Equal(t, 2, len(f.data)) ip1 := net.ParseIP("1.1.1.1") @@ -202,13 +218,18 @@ func Test_deleteIP(t *testing.T) { } func Test_multiOp(t *testing.T) { - f := newCache() + f := getCache() + wg := sync.WaitGroup{} + wg.Add(100) for i := 0; i < 100; i++ { ip := net.ParseIP(fmt.Sprintf("%d.%d.%d.%d", i, i, i, i)) - go f.addIP(ip, Requestor(fmt.Sprintf("trace-%d", i)), RequestMetadata{RuleID: "task1"}) + go func() { + f.addIP(ip, Requestor(fmt.Sprintf("trace-%d", i)), RequestMetadata{RuleID: "task1"}) + wg.Done() + }() } - time.Sleep(1 * time.Second) + wg.Wait() assert.Equal(t, 100, len(f.data)) fn := func(i int) { @@ -216,9 +237,14 @@ func Test_multiOp(t *testing.T) { res := f.deleteIP(ip, Requestor(fmt.Sprintf("trace-%d", i)), RequestMetadata{RuleID: "task1"}) assert.True(t, res) } + wg.Add(100) for i := 0; i < 100; i++ { - go fn(i) + go func() { + i := i + fn(i) + wg.Done() + }() } - time.Sleep(1 * time.Second) + wg.Wait() assert.Equal(t, 0, len(f.data)) } diff --git a/pkg/managers/filtermanager/manager_linux.go b/pkg/managers/filtermanager/manager_linux.go index 8bd94a91744..a275fc2fedb 100644 --- a/pkg/managers/filtermanager/manager_linux.go +++ b/pkg/managers/filtermanager/manager_linux.go @@ -53,7 +53,7 @@ func Init(retry int) (*FilterManager, error) { f.l = log.Logger().Named("filter-manager") } if f.c == nil { - f.c = newCache() + f.c = getCache() } f.fm, err = filter.Init() return f, errors.Wrapf(err, "failed to initialize filter map") diff --git a/pkg/managers/pluginmanager/cells_linux.go b/pkg/managers/pluginmanager/cells_linux.go index 3008f2cd928..d1d0d0fa240 100644 --- a/pkg/managers/pluginmanager/cells_linux.go +++ b/pkg/managers/pluginmanager/cells_linux.go @@ -8,7 +8,6 @@ import ( v1 "github.com/cilium/cilium/pkg/hubble/api/v1" "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/telemetry" "github.com/sirupsen/logrus" ) @@ -44,11 +43,7 @@ func newPluginManager(params pluginManagerParams) (*PluginManager, error) { // Enable Metrics in retina metrics.InitializeMetrics() - enabledPlugins := []api.PluginName{} - for _, pluginName := range params.Config.EnabledPlugin { - enabledPlugins = append(enabledPlugins, api.PluginName(pluginName)) - } - pluginMgr, err := NewPluginManager(¶ms.Config, params.Telemetry, enabledPlugins...) + pluginMgr, err := NewPluginManager(¶ms.Config, params.Telemetry) if err != nil { return &PluginManager{}, err } diff --git a/pkg/managers/pluginmanager/pluginmanager.go b/pkg/managers/pluginmanager/pluginmanager.go index 7e882ee4c82..15d133382f8 100644 --- a/pkg/managers/pluginmanager/pluginmanager.go +++ b/pkg/managers/pluginmanager/pluginmanager.go @@ -13,9 +13,8 @@ import ( "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/managers/watchermanager" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin" "github.com/microsoft/retina/pkg/plugin/conntrack" - "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/telemetry" "github.com/pkg/errors" "go.uber.org/zap" @@ -37,27 +36,19 @@ var ( type PluginManager struct { cfg *kcfg.Config l *log.ZapLogger - plugins map[api.PluginName]api.Plugin + plugins map[string]plugin.Plugin tel telemetry.Telemetry watcherManager watchermanager.IWatcherManager } -func init() { - registry.RegisterPlugins() -} - -func NewPluginManager( - cfg *kcfg.Config, - tel telemetry.Telemetry, - pluginNames ...api.PluginName, -) (*PluginManager, error) { +func NewPluginManager(cfg *kcfg.Config, tel telemetry.Telemetry) (*PluginManager, error) { logger := log.Logger().Named("plugin-manager") mgr := &PluginManager{ cfg: cfg, l: logger, tel: tel, - plugins: map[api.PluginName]api.Plugin{}, + plugins: map[string]plugin.Plugin{}, } if mgr.cfg.EnablePodLevel { @@ -67,8 +58,8 @@ func NewPluginManager( mgr.l.Info("plugin manager has pod level disabled") } - for _, name := range pluginNames { - newPluginFn, ok := registry.PluginHandler[name] + for _, name := range cfg.EnabledPlugin { + newPluginFn, ok := plugin.Get(name) if !ok { return nil, fmt.Errorf("plugin %s not found in registry", name) } @@ -80,9 +71,9 @@ func NewPluginManager( func (p *PluginManager) Stop() { var wg sync.WaitGroup - for _, plugin := range p.plugins { + for _, pl := range p.plugins { wg.Add(1) - go func(plugin api.Plugin) { + go func(plugin plugin.Plugin) { defer wg.Done() if err := plugin.Stop(); err != nil { p.l.Error("failed to stop plugin", zap.Error(err)) @@ -91,32 +82,32 @@ func (p *PluginManager) Stop() { // even if some plugins fail to stop. } p.l.Info("Cleaned up resource for plugin", zap.String("name", plugin.Name())) - }(plugin) + }(pl) } wg.Wait() } // Reconcile reconciles a particular plugin. -func (p *PluginManager) Reconcile(ctx context.Context, plugin api.Plugin) error { - defer p.tel.StopPerf(p.tel.StartPerf(fmt.Sprintf("reconcile-%s", plugin.Name()))) +func (p *PluginManager) Reconcile(ctx context.Context, pl plugin.Plugin) error { + defer p.tel.StopPerf(p.tel.StartPerf("reconcile-" + pl.Name())) // Regenerate eBPF code and bpf object. // This maybe no-op for plugins that don't use eBPF. - if err := plugin.Generate(ctx); err != nil { + if err := pl.Generate(ctx); err != nil { return errors.Wrap(err, "failed to generate plugin") } - if err := plugin.Compile(ctx); err != nil { + if err := pl.Compile(ctx); err != nil { return errors.Wrap(err, "failed to compile plugin") } // Re-start plugin. - if err := plugin.Stop(); err != nil { + if err := pl.Stop(); err != nil { return errors.Wrap(err, "failed to stop plugin") } - if err := plugin.Init(); err != nil { + if err := pl.Init(); err != nil { return errors.Wrap(err, "failed to init plugin") } - p.l.Info("Reconciled plugin", zap.String("name", plugin.Name())) + p.l.Info("Reconciled plugin", zap.String("name", pl.Name())) return nil } @@ -196,22 +187,22 @@ func (p *PluginManager) Start(ctx context.Context) error { return nil } -func (p *PluginManager) SetPlugin(name api.PluginName, plugin api.Plugin) { +func (p *PluginManager) SetPlugin(name string, pl plugin.Plugin) { if p == nil { return } if p.plugins == nil { - p.plugins = map[api.PluginName]api.Plugin{} + p.plugins = map[string]plugin.Plugin{} } - p.plugins[name] = plugin + p.plugins[name] = pl } func (p *PluginManager) SetupChannel(c chan *v1.Event) { for name, plugin := range p.plugins { err := plugin.SetupChannel(c) if err != nil { - p.l.Error("failed to setup channel for plugin", zap.String("plugin name", string(name)), zap.Error(err)) + p.l.Error("failed to setup channel for plugin", zap.String("plugin name", name), zap.Error(err)) } } } diff --git a/pkg/managers/pluginmanager/pluginmanager_test.go b/pkg/managers/pluginmanager/pluginmanager_test.go index 0c25079dfda..2203468b5b4 100644 --- a/pkg/managers/pluginmanager/pluginmanager_test.go +++ b/pkg/managers/pluginmanager/pluginmanager_test.go @@ -13,8 +13,8 @@ import ( "github.com/microsoft/retina/pkg/log" watchermock "github.com/microsoft/retina/pkg/managers/watchermanager/mocks" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" - pluginmock "github.com/microsoft/retina/pkg/plugin/api/mock" + "github.com/microsoft/retina/pkg/plugin" + pluginmock "github.com/microsoft/retina/pkg/plugin/mock" "github.com/microsoft/retina/pkg/telemetry" dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" @@ -28,11 +28,11 @@ const ( ) var ( - cfgPodLevelEnabled = &kcfg.Config{ + cfgPodLevelEnabled = kcfg.Config{ MetricsInterval: timeInter, EnablePodLevel: true, } - cfgPodLevelDisabled = &kcfg.Config{ + cfgPodLevelDisabled = kcfg.Config{ MetricsInterval: timeInter, EnablePodLevel: false, } @@ -50,7 +50,7 @@ func TestNewManager(t *testing.T) { tel := telemetry.NewNoopTelemetry() tests := []struct { name string - cfg *kcfg.Config + cfg kcfg.Config pluginName string wantErr bool }{ @@ -81,7 +81,8 @@ func TestNewManager(t *testing.T) { } for _, tt := range tests { - mgr, err := NewPluginManager(tt.cfg, tel, api.PluginName(tt.pluginName)) + tt.cfg.EnabledPlugin = append(tt.cfg.EnabledPlugin, tt.pluginName) + mgr, err := NewPluginManager(&tt.cfg, tel) if tt.wantErr { require.NotNil(t, err, "Expected error but got nil") require.Nil(t, mgr, "Expected mgr to be nil but it isn't") @@ -89,7 +90,7 @@ func TestNewManager(t *testing.T) { require.Nil(t, err, "Expected nil but got error:%w", err) require.NotNil(t, mgr, "Expected mgr to be intialized but found nil") require.Condition(t, assert.Comparison(func() bool { - _, ok := mgr.plugins[api.PluginName(tt.pluginName)] + _, ok := mgr.plugins[tt.pluginName] return ok }), "plugin not found in mgr map") } @@ -101,7 +102,7 @@ func TestNewManagerStart(t *testing.T) { tel := telemetry.NewNoopTelemetry() tests := []struct { name string - cfg *kcfg.Config + cfg kcfg.Config pluginName string wantErr bool }{ @@ -120,12 +121,14 @@ func TestNewManagerStart(t *testing.T) { } for _, tt := range tests { - mgr, err := NewPluginManager(tt.cfg, tel, api.PluginName(tt.pluginName)) + tt.cfg.EnabledPlugin = append(tt.cfg.EnabledPlugin, tt.pluginName) + mgr, err := NewPluginManager(&tt.cfg, tel) + require.NoError(t, err) mgr.watcherManager = setupWatcherManagerMock(gomock.NewController(t)) require.Nil(t, err, "Expected nil but got error:%w", err) require.NotNil(t, mgr, "Expected mgr to be intialized but found nil") require.Condition(t, assert.Comparison(func() bool { - _, ok := mgr.plugins[api.PluginName(tt.pluginName)] + _, ok := mgr.plugins[tt.pluginName] return ok }), "plugin not found in mgr map") @@ -155,10 +158,11 @@ func TestNewManagerWithPluginStartFailure(t *testing.T) { pluginName := "mockplugin" + cfg := cfgPodLevelEnabled mgr := &PluginManager{ - cfg: cfgPodLevelEnabled, + cfg: &cfg, l: log.Logger().Named("plugin-manager"), - plugins: make(map[api.PluginName]api.Plugin), + plugins: make(map[string]plugin.Plugin), tel: telemetry.NewNoopTelemetry(), watcherManager: setupWatcherManagerMock(ctl), } @@ -171,7 +175,7 @@ func TestNewManagerWithPluginStartFailure(t *testing.T) { mockPlugin.EXPECT().Start(gomock.Any()).Return(errors.New("Plugin failed to start")).AnyTimes() mockPlugin.EXPECT().Name().Return(pluginName).AnyTimes() - mgr.plugins[api.PluginName(pluginName)] = mockPlugin + mgr.plugins[pluginName] = mockPlugin ctx, cancel := context.WithCancel(context.Background()) @@ -193,10 +197,11 @@ func TestNewManagerWithPluginReconcileFailure(t *testing.T) { pluginName := "mockplugin" + cfg := cfgPodLevelEnabled mgr := &PluginManager{ - cfg: cfgPodLevelEnabled, + cfg: &cfg, l: log.Logger().Named("plugin-manager"), - plugins: make(map[api.PluginName]api.Plugin), + plugins: make(map[string]plugin.Plugin), tel: telemetry.NewNoopTelemetry(), watcherManager: setupWatcherManagerMock(ctl), } @@ -209,7 +214,7 @@ func TestNewManagerWithPluginReconcileFailure(t *testing.T) { mockPlugin.EXPECT().Start(gomock.Any()).Return(nil).AnyTimes() mockPlugin.EXPECT().Name().Return(pluginName).AnyTimes() - mgr.plugins[api.PluginName(pluginName)] = mockPlugin + mgr.plugins[pluginName] = mockPlugin ctx, cancel := context.WithCancel(context.Background()) @@ -233,7 +238,7 @@ func TestPluginInit(t *testing.T) { tel := telemetry.NewNoopTelemetry() tests := []struct { name string - cfg *kcfg.Config + cfg kcfg.Config pluginName string wantErr bool }{ @@ -251,7 +256,8 @@ func TestPluginInit(t *testing.T) { }, } for _, tt := range tests { - mgr, err := NewPluginManager(tt.cfg, tel, api.PluginName(tt.pluginName)) + tt.cfg.EnabledPlugin = append(tt.cfg.EnabledPlugin, tt.pluginName) + mgr, err := NewPluginManager(&tt.cfg, tel) require.Nil(t, err, "Expected nil but got error:%w", err) for _, plugin := range mgr.plugins { if tt.wantErr { @@ -271,7 +277,7 @@ func TestPluginStartWithoutInit(t *testing.T) { tel := telemetry.NewNoopTelemetry() tests := []struct { name string - cfg *kcfg.Config + cfg kcfg.Config pluginName string wantErr bool initPlugin bool @@ -292,7 +298,8 @@ func TestPluginStartWithoutInit(t *testing.T) { }, } for _, tt := range tests { - mgr, err := NewPluginManager(tt.cfg, tel, api.PluginName(tt.pluginName)) + tt.cfg.EnabledPlugin = append(tt.cfg.EnabledPlugin, tt.pluginName) + mgr, err := NewPluginManager(&tt.cfg, tel) require.Nil(t, err, "Expected nil but got error:%w", err) for _, plugin := range mgr.plugins { if tt.initPlugin { @@ -316,7 +323,7 @@ func TestPluginStop(t *testing.T) { tel := telemetry.NewNoopTelemetry() tests := []struct { name string - cfg *kcfg.Config + cfg kcfg.Config pluginName string wantStartErr bool wantStopErr bool @@ -379,7 +386,8 @@ func TestPluginStop(t *testing.T) { }, } for _, tt := range tests { - mgr, err := NewPluginManager(tt.cfg, tel, api.PluginName(tt.pluginName)) + tt.cfg.EnabledPlugin = append(tt.cfg.EnabledPlugin, tt.pluginName) + mgr, err := NewPluginManager(&tt.cfg, tel) require.Nil(t, err, "Expected nil but got error:%w", err) for _, plugin := range mgr.plugins { if tt.initPlugin { @@ -411,10 +419,11 @@ func TestStopPluginManagerGracefully(t *testing.T) { pluginName := "mockplugin" + cfg := cfgPodLevelEnabled mgr := &PluginManager{ - cfg: cfgPodLevelEnabled, + cfg: &cfg, l: log.Logger().Named("plugin-manager"), - plugins: make(map[api.PluginName]api.Plugin), + plugins: make(map[string]plugin.Plugin), tel: telemetry.NewNoopTelemetry(), watcherManager: setupWatcherManagerMock(ctl), } @@ -427,7 +436,7 @@ func TestStopPluginManagerGracefully(t *testing.T) { mockPlugin.EXPECT().Start(gomock.Any()).Return(nil).AnyTimes() mockPlugin.EXPECT().Name().Return(pluginName).AnyTimes() - mgr.plugins[api.PluginName(pluginName)] = mockPlugin + mgr.plugins[pluginName] = mockPlugin ctx, cancel := context.WithCancel(context.Background()) g, errctx := errgroup.WithContext(ctx) @@ -450,10 +459,11 @@ func TestWatcherManagerFailure(t *testing.T) { m := watchermock.NewMockIWatcherManager(ctl) m.EXPECT().Start(gomock.Any()).Return(errors.New("error")).AnyTimes() + cfg := cfgPodLevelEnabled mgr := &PluginManager{ - cfg: cfgPodLevelEnabled, + cfg: &cfg, l: log.Logger().Named("plugin-manager"), - plugins: make(map[api.PluginName]api.Plugin), + plugins: make(map[string]plugin.Plugin), tel: telemetry.NewNoopTelemetry(), watcherManager: m, } diff --git a/pkg/plugin/ciliumeventobserver/ciliumeventobserver_linux.go b/pkg/plugin/ciliumeventobserver/ciliumeventobserver_linux.go index 0ec01cc3575..5373e5c4fdf 100644 --- a/pkg/plugin/ciliumeventobserver/ciliumeventobserver_linux.go +++ b/pkg/plugin/ciliumeventobserver/ciliumeventobserver_linux.go @@ -1,6 +1,3 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - // Package ciliumeventobserver contains the Retina CiliumEventObserver plugin. It uses unix socket to get events from cilium and decode them to flow objects. package ciliumeventobserver @@ -18,7 +15,7 @@ import ( "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "go.uber.org/zap" ) @@ -37,10 +34,14 @@ var ( errPodLevelDisabled = errors.New("pod level enricher is not initialized") ) -func New(cfg *kcfg.Config) api.Plugin { +func init() { + registry.Add(name, New) +} + +func New(cfg *kcfg.Config) registry.Plugin { return &ciliumeventobserver{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), retryDelay: defaultRetryDelay, maxAttempts: defaultAttempts, sockPath: cfg.MonitorSockPath, @@ -50,7 +51,7 @@ func New(cfg *kcfg.Config) api.Plugin { } func (c *ciliumeventobserver) Name() string { - return string(Name) + return name } func (c *ciliumeventobserver) Generate(_ context.Context) error { @@ -165,13 +166,13 @@ func (c *ciliumeventobserver) monitorLoop(ctx context.Context) error { return err //nolint:wrapcheck // Error is handled by the caller } c.l.Warn("Failed to decode payload from cilium", zap.Error(err)) - metrics.LostEventsCounter.WithLabelValues(parserMetric, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(parserMetric, name).Inc() continue } select { case c.payloadEvents <- &pl: default: - metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, name).Inc() } } } @@ -192,7 +193,7 @@ func (c *ciliumeventobserver) parserLoop(ctx context.Context) { select { case c.externalChannel <- ev: default: - metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, name).Inc() } } } diff --git a/pkg/plugin/ciliumeventobserver/types_linux.go b/pkg/plugin/ciliumeventobserver/types_linux.go index b347c1f33e6..3f9fad500c3 100644 --- a/pkg/plugin/ciliumeventobserver/types_linux.go +++ b/pkg/plugin/ciliumeventobserver/types_linux.go @@ -12,12 +12,9 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) -const ( - Name api.PluginName = "ciliumeventobserver" -) +const name = "ciliumeventobserver" type ciliumeventobserver struct { cfg *kcfg.Config diff --git a/pkg/plugin/dns/dns_linux.go b/pkg/plugin/dns/dns_linux.go index bba17d04e86..0e41bb7ca91 100644 --- a/pkg/plugin/dns/dns_linux.go +++ b/pkg/plugin/dns/dns_linux.go @@ -17,21 +17,25 @@ import ( "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/plugin/common" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "go.uber.org/zap" ) -func New(cfg *kcfg.Config) api.Plugin { +func init() { + registry.Add(name, New) +} + +func New(cfg *kcfg.Config) registry.Plugin { return &dns{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } func (d *dns) Name() string { - return string(Name) + return name } func (d *dns) Generate(ctx context.Context) error { @@ -152,7 +156,7 @@ func (d *dns) eventHandler(event *types.Event) { select { case d.externalChannel <- ev: default: - metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, name).Inc() } } } diff --git a/pkg/plugin/dns/dns_linux_test.go b/pkg/plugin/dns/dns_linux_test.go index a93fef65177..397c10dc557 100644 --- a/pkg/plugin/dns/dns_linux_test.go +++ b/pkg/plugin/dns/dns_linux_test.go @@ -31,7 +31,7 @@ import ( func TestStop(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) d := &dns{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), pid: 1234, } // Check nil tracer. @@ -59,7 +59,7 @@ func TestStart(t *testing.T) { defer e.Reader.Close() d := &dns{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), pid: 1234, cfg: &config.Config{ EnablePodLevel: true, @@ -90,7 +90,7 @@ func TestStart(t *testing.T) { func TestMalformedEventHandler(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) d := &dns{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } // Test nil event. @@ -112,7 +112,7 @@ func TestRequestEventHandler(t *testing.T) { metrics.InitializeMetrics() d := &dns{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), cfg: &config.Config{ EnablePodLevel: true, }, @@ -162,7 +162,7 @@ func TestResponseEventHandler(t *testing.T) { metrics.InitializeMetrics() d := &dns{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), cfg: &config.Config{ EnablePodLevel: true, }, diff --git a/pkg/plugin/dns/types_linux.go b/pkg/plugin/dns/types_linux.go index dd5a05077cf..9517cc9b2f8 100644 --- a/pkg/plugin/dns/types_linux.go +++ b/pkg/plugin/dns/types_linux.go @@ -8,13 +8,10 @@ import ( "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/plugin/common" ) -const ( - Name api.PluginName = "dns" -) +const name = "dns" var m metrics.CounterVec diff --git a/pkg/plugin/dropreason/dropreason_linux.go b/pkg/plugin/dropreason/dropreason_linux.go index 6d6a69af263..04096c87bd8 100644 --- a/pkg/plugin/dropreason/dropreason_linux.go +++ b/pkg/plugin/dropreason/dropreason_linux.go @@ -25,9 +25,9 @@ import ( "github.com/microsoft/retina/pkg/loader" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" plugincommon "github.com/microsoft/retina/pkg/plugin/common" _ "github.com/microsoft/retina/pkg/plugin/dropreason/_cprog" // nolint + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "github.com/pkg/errors" "go.uber.org/zap" @@ -42,20 +42,24 @@ const ( nfConntrackConfirmFn = "__nf_conntrack_confirm" ) +func init() { + registry.Add(name, New) +} + // New creates a new dropreason plugin. // When opts.EnablePodLevel=false, the enricher will not be used. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &dropReason{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } // Plugin API implementation for packet forward. -// Ref: github.com/microsoft/retina/pkg/plugin/api +// Ref: github.com/microsoft/retina/pkg/plugin func (dr *dropReason) Name() string { - return string(Name) + return name } func (dr *dropReason) Generate(ctx context.Context) error { @@ -399,7 +403,7 @@ func (dr *dropReason) processRecord(ctx context.Context, id int) { select { case dr.externalChannel <- ev: default: - metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, name).Inc() } } } @@ -421,7 +425,7 @@ func (dr *dropReason) readEventArrayData() error { if record.LostSamples > 0 { // dr.l.Warn("Lost samples in drop reason plugin", zap.Uint64("lost samples", record.LostSamples)) - metrics.LostEventsCounter.WithLabelValues(utils.Kernel, string(Name)).Add(float64(record.LostSamples)) + metrics.LostEventsCounter.WithLabelValues(utils.Kernel, name).Add(float64(record.LostSamples)) return nil } @@ -430,7 +434,7 @@ func (dr *dropReason) readEventArrayData() error { dr.l.Debug("Record sent to channel", zap.Any("record", record)) default: // dr.l.Warn("Channel is full, dropping record", zap.Any("record", record)) - metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, name).Inc() } return nil diff --git a/pkg/plugin/dropreason/dropreason_linux_test.go b/pkg/plugin/dropreason/dropreason_linux_test.go index d2a1bf0423d..e494a03f023 100644 --- a/pkg/plugin/dropreason/dropreason_linux_test.go +++ b/pkg/plugin/dropreason/dropreason_linux_test.go @@ -43,7 +43,7 @@ func TestStop(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } err := p.Stop() if err != nil { @@ -70,7 +70,7 @@ func TestShutdown(t *testing.T) { MetricsInterval: 100 * time.Second, EnablePodLevel: false, }, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx, cancel := context.WithCancel(context.Background()) @@ -93,7 +93,7 @@ func TestCompile(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } dir, _ := absPath() expectedOutputFile := fmt.Sprintf("%s/%s", dir, bpfObjectFileName) @@ -122,7 +122,7 @@ func TestProcessMapValue(t *testing.T) { metrics.InitializeMetrics() dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } testMetricKey := dropMetricKey{DropType: 1, ReturnVal: 2} @@ -167,7 +167,7 @@ func TestDropReasonRun_Error(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelDisabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, } @@ -225,7 +225,7 @@ func TestDropReasonRun(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, reader: mockedPerfReader, enricher: menricher, @@ -280,7 +280,7 @@ func TestDropReasonReadDataPodLevelEnabled(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, reader: mockedPerfReader, enricher: menricher, @@ -325,7 +325,7 @@ func TestDropReasonReadData_WithEmptyPerfArray(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, reader: mockedPerfReader, } @@ -367,7 +367,7 @@ func TestDropReasonReadData_WithPerfArrayLostSamples(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, reader: mockedPerfReader, } @@ -409,7 +409,7 @@ func TestDropReasonReadData_WithUnknownError(t *testing.T) { // Create drop reason instance dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), metricsMapData: mockedMap, reader: mockedPerfReader, } @@ -443,7 +443,7 @@ func TestDropReasonGenerate(t *testing.T) { // Instantiate the dropReason struct with a mocked logger and context. dr := &dropReason{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx := context.Background() diff --git a/pkg/plugin/dropreason/types_linux.go b/pkg/plugin/dropreason/types_linux.go index 3efb899317d..756f524af75 100644 --- a/pkg/plugin/dropreason/types_linux.go +++ b/pkg/plugin/dropreason/types_linux.go @@ -12,18 +12,17 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/utils" ) const ( - Name api.PluginName = "dropreason" - bpfSourceDir string = "_cprog" - bpfSourceFileName string = "drop_reason.c" - bpfObjectFileName string = "kprobe_bpf.o" - dynamicHeaderFileName string = "dynamic.h" - buffer int = 10000 - workers int = 2 + name string = "dropreason" + bpfSourceDir string = "_cprog" + bpfSourceFileName string = "drop_reason.c" + bpfObjectFileName string = "kprobe_bpf.o" + dynamicHeaderFileName string = "dynamic.h" + buffer int = 10000 + workers int = 2 ) // Determined via testing on a large cluster. diff --git a/pkg/plugin/windows/hnsstats/hnsstats_windows.go b/pkg/plugin/hnsstats/hnsstats_windows.go similarity index 95% rename from pkg/plugin/windows/hnsstats/hnsstats_windows.go rename to pkg/plugin/hnsstats/hnsstats_windows.go index e309bfc7063..a668dce4f9d 100644 --- a/pkg/plugin/windows/hnsstats/hnsstats_windows.go +++ b/pkg/plugin/hnsstats/hnsstats_windows.go @@ -15,7 +15,7 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "go.uber.org/zap" ) @@ -43,15 +43,26 @@ const ( zapPortField = "port" ) +func init() { + registry.Add(name, New) +} + +func New(cfg *kcfg.Config) registry.Plugin { + return &hnsstats{ + cfg: cfg, + l: log.Logger().Named(name), + } +} + func (h *hnsstats) Name() string { - return string(Name) + return name } -func (h *hnsstats) Generate(ctx context.Context) error { +func (h *hnsstats) Generate(context.Context) error { return nil } -func (h *hnsstats) Compile(ctx context.Context) error { +func (h *hnsstats) Compile(context.Context) error { return nil } @@ -78,8 +89,8 @@ func (h *hnsstats) Init() error { return nil } -func (h *hnsstats) SetupChannel(ch chan *v1.Event) error { - h.l.Warn("Plugin does not support SetupChannel", zap.String(zapPluginField, string(Name))) +func (h *hnsstats) SetupChannel(chan *v1.Event) error { + h.l.Warn("Plugin does not support SetupChannel", zap.String(zapPluginField, name)) return nil } @@ -214,12 +225,3 @@ func (d *hnsstats) Stop() error { d.l.Info("Exiting hnsstats Stop...") return nil } - -// New creates an hnsstats plugin. -func New(cfg *kcfg.Config) api.Plugin { - // Init logger - return &hnsstats{ - cfg: cfg, - l: log.Logger().Named(string(Name)), - } -} diff --git a/pkg/plugin/windows/hnsstats/hnsstats_windows_test.go b/pkg/plugin/hnsstats/hnsstats_windows_test.go similarity index 94% rename from pkg/plugin/windows/hnsstats/hnsstats_windows_test.go rename to pkg/plugin/hnsstats/hnsstats_windows_test.go index d46b2c1d468..fefea6d8373 100644 --- a/pkg/plugin/windows/hnsstats/hnsstats_windows_test.go +++ b/pkg/plugin/hnsstats/hnsstats_windows_test.go @@ -20,7 +20,7 @@ func TestShutdown(t *testing.T) { MetricsInterval: 100 * time.Second, EnablePodLevel: true, }, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } p.Init() ctx, cancel := context.WithCancel(context.Background()) diff --git a/pkg/plugin/windows/hnsstats/types_windows.go b/pkg/plugin/hnsstats/types_windows.go similarity index 98% rename from pkg/plugin/windows/hnsstats/types_windows.go rename to pkg/plugin/hnsstats/types_windows.go index f32dbf529f7..ab8511d5d35 100644 --- a/pkg/plugin/windows/hnsstats/types_windows.go +++ b/pkg/plugin/hnsstats/types_windows.go @@ -11,7 +11,6 @@ import ( "github.com/Microsoft/hcsshim/hcn" kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -19,8 +18,8 @@ import ( ) const ( - Name api.PluginName = "hnsstats" - HnsStatsEvent string = "hnsstatscount" + name string = "hnsstats" + HnsStatsEvent string = "hnsstatscount" // From HNSStats API PacketsReceived string = "win_packets_recv_count" PacketsSent string = "win_packets_sent_count" diff --git a/pkg/plugin/windows/hnsstats/vfp_counters_windows.go b/pkg/plugin/hnsstats/vfp_counters_windows.go similarity index 100% rename from pkg/plugin/windows/hnsstats/vfp_counters_windows.go rename to pkg/plugin/hnsstats/vfp_counters_windows.go diff --git a/pkg/plugin/include_linux.go b/pkg/plugin/include_linux.go new file mode 100644 index 00000000000..b3448be2993 --- /dev/null +++ b/pkg/plugin/include_linux.go @@ -0,0 +1,15 @@ +// nolint // don't complain about this file +package plugin + +// Plugins self-register via their init() funcs as long as they are imported. +import ( + _ "github.com/microsoft/retina/pkg/plugin/ciliumeventobserver" + _ "github.com/microsoft/retina/pkg/plugin/dns" + _ "github.com/microsoft/retina/pkg/plugin/dropreason" + _ "github.com/microsoft/retina/pkg/plugin/infiniband" + _ "github.com/microsoft/retina/pkg/plugin/linuxutil" + _ "github.com/microsoft/retina/pkg/plugin/mockplugin" + _ "github.com/microsoft/retina/pkg/plugin/packetforward" + _ "github.com/microsoft/retina/pkg/plugin/packetparser" + _ "github.com/microsoft/retina/pkg/plugin/tcpretrans" +) diff --git a/pkg/plugin/include_windows.go b/pkg/plugin/include_windows.go new file mode 100644 index 00000000000..1cbb240ecae --- /dev/null +++ b/pkg/plugin/include_windows.go @@ -0,0 +1,8 @@ +// nolint // don't complain about this file +package plugin + +// Plugins self-register via their init() funcs as long as they are imported. +import ( + _ "github.com/microsoft/retina/pkg/plugin/hnsstats" + _ "github.com/microsoft/retina/pkg/plugin/pktmon" +) diff --git a/pkg/plugin/infiniband/infiniband_linux.go b/pkg/plugin/infiniband/infiniband_linux.go index c40bc2a692a..c49b5acd231 100644 --- a/pkg/plugin/infiniband/infiniband_linux.go +++ b/pkg/plugin/infiniband/infiniband_linux.go @@ -12,29 +12,33 @@ import ( hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1" kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "go.uber.org/zap" ) var ErrAlreadyRunning = errors.New("infiniband plugin is already running") +func init() { + registry.Add(name, New) +} + // New creates a infiniband plugin. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &infiniband{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } func (ib *infiniband) Name() string { - return string(Name) + return name } -func (ib *infiniband) Generate(ctx context.Context) error { //nolint //implementing iface +func (ib *infiniband) Generate(context.Context) error { return nil } -func (ib *infiniband) Compile(ctx context.Context) error { //nolint // implementing iface +func (ib *infiniband) Compile(context.Context) error { return nil } @@ -53,8 +57,8 @@ func (ib *infiniband) Start(ctx context.Context) error { return ib.run(ctx) } -func (ib *infiniband) SetupChannel(ch chan *hubblev1.Event) error { // nolint // impl. iface - ib.l.Warn("Plugin does not support SetupChannel", zap.String("plugin", string(Name))) +func (ib *infiniband) SetupChannel(chan *hubblev1.Event) error { + ib.l.Warn("Plugin does not support SetupChannel", zap.String("plugin", name)) return nil } diff --git a/pkg/plugin/infiniband/infiniband_linux_test.go b/pkg/plugin/infiniband/infiniband_linux_test.go index e22b94b29c3..41d3be7435d 100644 --- a/pkg/plugin/infiniband/infiniband_linux_test.go +++ b/pkg/plugin/infiniband/infiniband_linux_test.go @@ -33,7 +33,7 @@ func TestStop(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &infiniband{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } err := p.Stop() if err != nil { @@ -60,7 +60,7 @@ func TestShutdown(t *testing.T) { MetricsInterval: 100 * time.Second, EnablePodLevel: true, }, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx, cancel := context.WithCancel(context.Background()) @@ -69,8 +69,6 @@ func TestShutdown(t *testing.T) { g.Go(func() error { return p.Start(errctx) }) - - time.Sleep(1 * time.Second) cancel() err := g.Wait() require.NoError(t, err) diff --git a/pkg/plugin/infiniband/types_linux.go b/pkg/plugin/infiniband/types_linux.go index aba0d53e02d..5aa613ac9bc 100644 --- a/pkg/plugin/infiniband/types_linux.go +++ b/pkg/plugin/infiniband/types_linux.go @@ -7,12 +7,9 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) -const ( - Name api.PluginName = "infiniband" -) +const name = "infiniband" //go:generate go run go.uber.org/mock/mockgen@v0.4.0 -source=types_linux.go -destination=infiniband_mock_generated.go -package=infiniband type infiniband struct { diff --git a/pkg/plugin/linuxutil/linuxutil_linux.go b/pkg/plugin/linuxutil/linuxutil_linux.go index 2733a02117e..715acc358a1 100644 --- a/pkg/plugin/linuxutil/linuxutil_linux.go +++ b/pkg/plugin/linuxutil/linuxutil_linux.go @@ -13,30 +13,34 @@ import ( hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1" kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/safchain/ethtool" "go.uber.org/zap" ) const defaultLimit = 2000 +func init() { + registry.Add(name, New) +} + // New creates a linuxutil plugin. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &linuxUtil{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } func (lu *linuxUtil) Name() string { - return string(Name) + return name } -func (lu *linuxUtil) Generate(ctx context.Context) error { +func (lu *linuxUtil) Generate(context.Context) error { return nil } -func (lu *linuxUtil) Compile(ctx context.Context) error { +func (lu *linuxUtil) Compile(context.Context) error { return nil } @@ -50,8 +54,8 @@ func (lu *linuxUtil) Start(ctx context.Context) error { return lu.run(ctx) } -func (lu *linuxUtil) SetupChannel(ch chan *hubblev1.Event) error { - lu.l.Debug("Plugin does not support SetupChannel", zap.String("plugin", string(Name))) +func (lu *linuxUtil) SetupChannel(chan *hubblev1.Event) error { + lu.l.Debug("Plugin does not support SetupChannel", zap.String("plugin", name)) return nil } diff --git a/pkg/plugin/linuxutil/linuxutil_linux_test.go b/pkg/plugin/linuxutil/linuxutil_linux_test.go index 50a2dc6b818..afa32d55720 100644 --- a/pkg/plugin/linuxutil/linuxutil_linux_test.go +++ b/pkg/plugin/linuxutil/linuxutil_linux_test.go @@ -33,7 +33,7 @@ func TestStop(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &linuxUtil{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } err := p.Stop() if err != nil { @@ -60,7 +60,7 @@ func TestShutdown(t *testing.T) { MetricsInterval: 100 * time.Second, EnablePodLevel: true, }, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx, cancel := context.WithCancel(context.Background()) diff --git a/pkg/plugin/linuxutil/types_linux.go b/pkg/plugin/linuxutil/types_linux.go index acd1b55b14e..a5263c3e256 100644 --- a/pkg/plugin/linuxutil/types_linux.go +++ b/pkg/plugin/linuxutil/types_linux.go @@ -6,12 +6,9 @@ import ( "github.com/cakturk/go-netstat/netstat" kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) -const ( - Name api.PluginName = "linuxutil" -) +const name = "linuxutil" //go:generate go run go.uber.org/mock/mockgen@v0.4.0 -source=types_linux.go -destination=linuxutil_mock_generated_linux.go -package=linuxutil type linuxUtil struct { diff --git a/pkg/plugin/api/mock/mock_plugin.go b/pkg/plugin/mock/plugin.go similarity index 77% rename from pkg/plugin/api/mock/mock_plugin.go rename to pkg/plugin/mock/plugin.go index bae2e3c848d..888d9267dd2 100644 --- a/pkg/plugin/api/mock/mock_plugin.go +++ b/pkg/plugin/mock/plugin.go @@ -5,15 +5,15 @@ // // Code generated by MockGen. DO NOT EDIT. -// Source: github.com/microsoft/retina/pkg/plugin/api (interfaces: Plugin) +// Source: github.com/microsoft/retina/pkg/plugin/ (interfaces: Plugin) // // Generated by this command: // -// mockgen -destination=mock/mock_plugin.go -copyright_file=../../lib/ignore_headers.txt -package=mock github.com/microsoft/retina/pkg/plugin/api Plugin +// mockgen -destination=mock/plugin.go -copyright_file=../lib/ignore_headers.txt -package=plugin github.com/microsoft/retina/pkg/plugin/ Plugin // -// Package mock is a generated GoMock package. -package mock +// Package plugin is a generated GoMock package. +package plugin import ( context "context" @@ -27,6 +27,7 @@ import ( type MockPlugin struct { ctrl *gomock.Controller recorder *MockPluginMockRecorder + isgomock struct{} } // MockPluginMockRecorder is the mock recorder for MockPlugin. @@ -47,31 +48,31 @@ func (m *MockPlugin) EXPECT() *MockPluginMockRecorder { } // Compile mocks base method. -func (m *MockPlugin) Compile(arg0 context.Context) error { +func (m *MockPlugin) Compile(ctx context.Context) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Compile", arg0) + ret := m.ctrl.Call(m, "Compile", ctx) ret0, _ := ret[0].(error) return ret0 } // Compile indicates an expected call of Compile. -func (mr *MockPluginMockRecorder) Compile(arg0 any) *gomock.Call { +func (mr *MockPluginMockRecorder) Compile(ctx any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Compile", reflect.TypeOf((*MockPlugin)(nil).Compile), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Compile", reflect.TypeOf((*MockPlugin)(nil).Compile), ctx) } // Generate mocks base method. -func (m *MockPlugin) Generate(arg0 context.Context) error { +func (m *MockPlugin) Generate(ctx context.Context) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Generate", arg0) + ret := m.ctrl.Call(m, "Generate", ctx) ret0, _ := ret[0].(error) return ret0 } // Generate indicates an expected call of Generate. -func (mr *MockPluginMockRecorder) Generate(arg0 any) *gomock.Call { +func (mr *MockPluginMockRecorder) Generate(ctx any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Generate", reflect.TypeOf((*MockPlugin)(nil).Generate), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Generate", reflect.TypeOf((*MockPlugin)(nil).Generate), ctx) } // Init mocks base method. @@ -117,17 +118,17 @@ func (mr *MockPluginMockRecorder) SetupChannel(arg0 any) *gomock.Call { } // Start mocks base method. -func (m *MockPlugin) Start(arg0 context.Context) error { +func (m *MockPlugin) Start(ctx context.Context) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Start", arg0) + ret := m.ctrl.Call(m, "Start", ctx) ret0, _ := ret[0].(error) return ret0 } // Start indicates an expected call of Start. -func (mr *MockPluginMockRecorder) Start(arg0 any) *gomock.Call { +func (mr *MockPluginMockRecorder) Start(ctx any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockPlugin)(nil).Start), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockPlugin)(nil).Start), ctx) } // Stop mocks base method. diff --git a/pkg/plugin/mockplugin/mockplugin.go b/pkg/plugin/mockplugin/mockplugin.go index ec06725dbe7..80249d19f27 100644 --- a/pkg/plugin/mockplugin/mockplugin.go +++ b/pkg/plugin/mockplugin/mockplugin.go @@ -9,12 +9,10 @@ import ( hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1" kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" ) -const ( - Name api.PluginName = "mockplugin" -) +const name = "mockplugin" const ( initialize = iota + 1 @@ -28,8 +26,12 @@ type MockPlugin struct { l *log.ZapLogger } +func init() { + registry.Add(name, New) +} + // New creates a mock plugin. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &MockPlugin{ cfg: cfg, } @@ -39,11 +41,11 @@ func (mp *MockPlugin) Name() string { return "mockplugin" } -func (mp *MockPlugin) Generate(ctx context.Context) error { +func (mp *MockPlugin) Generate(context.Context) error { return nil } -func (mp *MockPlugin) Compile(ctx context.Context) error { +func (mp *MockPlugin) Compile(context.Context) error { return nil } @@ -52,7 +54,7 @@ func (mp *MockPlugin) Init() error { return nil } -func (mp *MockPlugin) Start(ctx context.Context) error { +func (mp *MockPlugin) Start(context.Context) error { if mp.state != initialize { return fmt.Errorf("plugin not initialized") } @@ -68,10 +70,6 @@ func (mp *MockPlugin) Stop() error { return nil } -func (mp *MockPlugin) SetupChannel(ch chan *hubblev1.Event) error { +func (mp *MockPlugin) SetupChannel(chan *hubblev1.Event) error { return nil } - -func NewPluginFn(l *log.ZapLogger) api.Plugin { - return &MockPlugin{l: l} -} diff --git a/pkg/plugin/packetforward/packetforward_linux.go b/pkg/plugin/packetforward/packetforward_linux.go index e4682a1244b..3e14aee5415 100644 --- a/pkg/plugin/packetforward/packetforward_linux.go +++ b/pkg/plugin/packetforward/packetforward_linux.go @@ -13,28 +13,31 @@ import ( "syscall" "time" - kcfg "github.com/microsoft/retina/pkg/config" - "github.com/pkg/errors" - hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1" "github.com/cilium/ebpf" + kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/loader" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" "github.com/microsoft/retina/pkg/utils" + "github.com/pkg/errors" "go.uber.org/zap" _ "github.com/microsoft/retina/pkg/plugin/packetforward/_cprog" // nolint + "github.com/microsoft/retina/pkg/plugin/registry" ) //go:generate go run github.com/cilium/ebpf/cmd/bpf2go@master -cflags "-g -O2 -Wall -D__TARGET_ARCH_${GOARCH} -Wall" -target ${GOARCH} -type metric packetforward ./_cprog/packetforward.c -- -I../lib/_${GOARCH} -I../lib/common/libbpf/_src +func init() { + registry.Add(name, New) +} + // New creates a new packetforward plugin. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &packetForward{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } @@ -89,13 +92,13 @@ func updateMetrics(data *PacketForwardData) { } // Plugin API implementation for packet forward. -// Ref: github.com/microsoft/retina/pkg/plugin/api +// Ref: github.com/microsoft/retina/pkg/plugin func (p *packetForward) Name() string { - return string(Name) + return name } -func (p *packetForward) Generate(ctx context.Context) error { +func (p *packetForward) Generate(context.Context) error { // Use this function to parse p and generate header files under cprog. // Example: https://github.com/anubhabMajumdar/Retina/blob/c4bc06e7f922124f92536ffb5312bada5c2dfe99/pkg/plugin/custom/packetforward/packetforward.go#L77 p.l.Info("Packet forwarding metric header generated") @@ -192,8 +195,8 @@ func (p *packetForward) Stop() error { return nil } -func (p *packetForward) SetupChannel(ch chan *hubblev1.Event) error { - p.l.Debug("SetupChannel is not supported by plugin", zap.String("plugin", string(Name))) +func (p *packetForward) SetupChannel(chan *hubblev1.Event) error { + p.l.Debug("SetupChannel is not supported by plugin", zap.String("plugin", name)) return nil } diff --git a/pkg/plugin/packetforward/packetforward_linux_test.go b/pkg/plugin/packetforward/packetforward_linux_test.go index 7a64ff8381f..2393889ee4c 100644 --- a/pkg/plugin/packetforward/packetforward_linux_test.go +++ b/pkg/plugin/packetforward/packetforward_linux_test.go @@ -84,7 +84,7 @@ func TestStop(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &packetForward{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } err := p.Stop() if err != nil { @@ -112,7 +112,7 @@ func TestStop_NonNilMap(t *testing.T) { mockedMap := mocks.NewMockIMap(ctrl) mockedMap.EXPECT().Close().Return(errors.New("Error")).MinTimes(1) p := &packetForward{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), hashmapData: mockedMap, } err := p.Stop() @@ -136,7 +136,7 @@ func TestStop_NonNilMap(t *testing.T) { func TestCompile(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &packetForward{ - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } dir, _ := absPath() expectedOutputFile := fmt.Sprintf("%s/%s", dir, bpfObjectFileName) @@ -163,7 +163,7 @@ func TestShutdown(t *testing.T) { MetricsInterval: 100 * time.Second, EnablePodLevel: true, }, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx, cancel := context.WithCancel(context.Background()) @@ -190,7 +190,7 @@ func TestRun(t *testing.T) { p := &packetForward{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), hashmapData: mockedMap, } @@ -223,7 +223,7 @@ func TestRun_ReturnError_Ingress(t *testing.T) { p := &packetForward{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), hashmapData: mockedMap, } @@ -257,7 +257,7 @@ func TestRun_ReturnError_Egress(t *testing.T) { p := &packetForward{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), hashmapData: mockedMap, } diff --git a/pkg/plugin/packetforward/types_linux.go b/pkg/plugin/packetforward/types_linux.go index de83631e496..ac73e4f2e42 100644 --- a/pkg/plugin/packetforward/types_linux.go +++ b/pkg/plugin/packetforward/types_linux.go @@ -8,21 +8,20 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) const ( - PacketForwardSocketAttach int = 50 - Name api.PluginName = "packetforward" - socketIndex int = 0 - ingressKey uint32 = 0 - egressKey uint32 = 1 - ingressLabel string = "ingress" - egressLabel string = "egress" - bpfObjectFileName string = "packetforward_bpf.o" - bpfSourceDir string = "_cprog" - bpfSourceFileName string = "packetforward.c" - dynamicHeaderFileName string = "dynamic.h" + PacketForwardSocketAttach int = 50 + name string = "packetforward" + socketIndex int = 0 + ingressKey uint32 = 0 + egressKey uint32 = 1 + ingressLabel string = "ingress" + egressLabel string = "egress" + bpfObjectFileName string = "packetforward_bpf.o" + bpfSourceDir string = "_cprog" + bpfSourceFileName string = "packetforward.c" + dynamicHeaderFileName string = "dynamic.h" ) // Interface to https://pkg.go.dev/github.com/cilium/ebpf#Map. diff --git a/pkg/plugin/packetparser/packetparser_linux.go b/pkg/plugin/packetparser/packetparser_linux.go index 5e63a2324ed..5a8dd690ddf 100644 --- a/pkg/plugin/packetparser/packetparser_linux.go +++ b/pkg/plugin/packetparser/packetparser_linux.go @@ -31,7 +31,6 @@ import ( "github.com/microsoft/retina/pkg/loader" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" plugincommon "github.com/microsoft/retina/pkg/plugin/common" _ "github.com/microsoft/retina/pkg/plugin/lib/_amd64" // nolint _ "github.com/microsoft/retina/pkg/plugin/lib/_arm64" // nolint @@ -40,6 +39,7 @@ import ( _ "github.com/microsoft/retina/pkg/plugin/lib/common/libbpf/_include/uapi/linux" // nolint _ "github.com/microsoft/retina/pkg/plugin/lib/common/libbpf/_src" // nolint _ "github.com/microsoft/retina/pkg/plugin/packetparser/_cprog" // nolint + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/pubsub" "github.com/microsoft/retina/pkg/utils" "github.com/microsoft/retina/pkg/watchers/endpoint" @@ -52,16 +52,20 @@ import ( //go:generate go run github.com/cilium/ebpf/cmd/bpf2go@master -cflags "-g -O2 -Wall -D__TARGET_ARCH_${GOARCH} -Wall" -target ${GOARCH} -type packet packetparser ./_cprog/packetparser.c -- -I../lib/_${GOARCH} -I../lib/common/libbpf/_src -I../lib/common/libbpf/_include/linux -I../lib/common/libbpf/_include/uapi/linux -I../lib/common/libbpf/_include/asm -I../filter/_cprog/ -I../conntrack/_cprog/ var errNoOutgoingLinks = errors.New("could not determine any outgoing links") +func init() { + registry.Add(name, New) +} + // New creates a packetparser plugin. -func New(cfg *kcfg.Config) api.Plugin { +func New(cfg *kcfg.Config) registry.Plugin { return &packetParser{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } func (p *packetParser) Name() string { - return string(Name) + return name } func (p *packetParser) Generate(ctx context.Context) error { @@ -606,7 +610,7 @@ func (p *packetParser) processRecord(ctx context.Context, id int) { default: // Channel is full, drop the event. // We shouldn't slow down the reader. - metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, name).Inc() } } } @@ -642,7 +646,7 @@ func (p *packetParser) readData() { if record.LostSamples > 0 { // p.l.Warn("Lostsamples", zap.Uint64("lost samples", record.LostSamples)) - metrics.LostEventsCounter.WithLabelValues(utils.Kernel, string(Name)).Add(float64(record.LostSamples)) + metrics.LostEventsCounter.WithLabelValues(utils.Kernel, name).Add(float64(record.LostSamples)) return } @@ -651,7 +655,7 @@ func (p *packetParser) readData() { default: // Channel is full, drop the record. // We shouldn't slow down the perf array reader. - metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.BufferedChannel, name).Inc() } } diff --git a/pkg/plugin/packetparser/packetparser_linux_test.go b/pkg/plugin/packetparser/packetparser_linux_test.go index 80c010da56a..1a7a1bc3503 100644 --- a/pkg/plugin/packetparser/packetparser_linux_test.go +++ b/pkg/plugin/packetparser/packetparser_linux_test.go @@ -536,7 +536,7 @@ func TestPacketParseGenerate(t *testing.T) { // Instantiate the packetParser struct with a mocked logger and context. p := &packetParser{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } ctx := context.Background() @@ -567,7 +567,7 @@ func TestCompile(t *testing.T) { log.SetupZapLogger(log.GetDefaultLogOpts()) p := &packetParser{ cfg: cfgPodLevelEnabled, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } dir, _ := absPath() expectedOutputFile := fmt.Sprintf("%s/%s", dir, bpfObjectFileName) diff --git a/pkg/plugin/packetparser/types_linux.go b/pkg/plugin/packetparser/types_linux.go index 30c0a2b43de..4fc06f35c37 100644 --- a/pkg/plugin/packetparser/types_linux.go +++ b/pkg/plugin/packetparser/types_linux.go @@ -17,7 +17,6 @@ import ( "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) const ( @@ -32,16 +31,16 @@ const ( ) const ( - Name api.PluginName = "packetparser" - toEndpoint string = "toEndpoint" - fromEndpoint string = "fromEndpoint" - workers int = 2 - buffer int = 10000 - bpfSourceDir string = "_cprog" - bpfSourceFileName string = "packetparser.c" - bpfObjectFileName string = "packetparser_bpf.o" - dynamicHeaderFileName string = "dynamic.h" - tcFilterPriority uint16 = 0x1 + name string = "packetparser" + toEndpoint string = "toEndpoint" + fromEndpoint string = "fromEndpoint" + workers int = 2 + buffer int = 10000 + bpfSourceDir string = "_cprog" + bpfSourceFileName string = "packetparser.c" + bpfObjectFileName string = "packetparser_bpf.o" + dynamicHeaderFileName string = "dynamic.h" + tcFilterPriority uint16 = 0x1 ) type interfaceType string diff --git a/pkg/plugin/windows/pktmon/pktmon_plugin_windows.go b/pkg/plugin/pktmon/pktmon_windows.go similarity index 95% rename from pkg/plugin/windows/pktmon/pktmon_plugin_windows.go rename to pkg/plugin/pktmon/pktmon_windows.go index 21e9b6f3df6..4b0759f292c 100644 --- a/pkg/plugin/windows/pktmon/pktmon_plugin_windows.go +++ b/pkg/plugin/pktmon/pktmon_windows.go @@ -15,7 +15,7 @@ import ( "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" "github.com/microsoft/retina/pkg/metrics" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "go.uber.org/zap" "go.uber.org/zap/zapio" @@ -34,7 +34,7 @@ var ( ) const ( - Name = "pktmon" + name = "pktmon" connectionRetryAttempts = 5 eventChannelSize = 1000 ) @@ -51,6 +51,16 @@ type Plugin struct { stream observerv1.Observer_GetFlowsClient } +func init() { + registry.Add(name, New) +} + +func New(*kcfg.Config) registry.Plugin { + return &Plugin{ + l: log.Logger().Named(name), + } +} + func (p *Plugin) Init() error { return nil } @@ -245,7 +255,7 @@ func (p *Plugin) GetFlow(ctx context.Context) error { default: // Channel is full, drop the event. // We shouldn't slow down the reader. - metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, string(Name)).Inc() + metrics.LostEventsCounter.WithLabelValues(utils.ExternalChannel, name).Inc() } } } @@ -257,12 +267,6 @@ func (p *Plugin) SetupChannel(ch chan *v1.Event) error { return nil } -func New(_ *kcfg.Config) api.Plugin { - return &Plugin{ - l: log.Logger().Named(Name), - } -} - func (p *Plugin) Stop() error { if p.pktmonCmd != nil { err := p.pktmonCmd.Process.Kill() @@ -274,10 +278,10 @@ func (p *Plugin) Stop() error { return nil } -func (p *Plugin) Compile(_ context.Context) error { +func (p *Plugin) Compile(context.Context) error { return nil } -func (p *Plugin) Generate(_ context.Context) error { +func (p *Plugin) Generate(context.Context) error { return nil } diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go new file mode 100644 index 00000000000..fdb5537a447 --- /dev/null +++ b/pkg/plugin/plugin.go @@ -0,0 +1,15 @@ +// package plugin aliases types from plugin/registry to prevent import cycles. +package plugin + +import "github.com/microsoft/retina/pkg/plugin/registry" + +//go:generate go run go.uber.org/mock/mockgen@v0.4.0 -destination=mock/plugin.go -copyright_file=../lib/ignore_headers.txt -package=plugin github.com/microsoft/retina/pkg/plugin Plugin + +type ( + Plugin = registry.Plugin + Func = registry.PluginFunc +) + +func Get(name string) (Func, bool) { + return registry.Get(name) +} diff --git a/pkg/plugin/api/types.go b/pkg/plugin/registry/registry.go similarity index 55% rename from pkg/plugin/api/types.go rename to pkg/plugin/registry/registry.go index 59908da356a..48662bc7f8d 100644 --- a/pkg/plugin/api/types.go +++ b/pkg/plugin/registry/registry.go @@ -1,34 +1,26 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -// Package api provides the api for all Retina eBPF plugins. -package api +package registry import ( "context" v1 "github.com/cilium/cilium/pkg/hubble/api/v1" + kcfg "github.com/microsoft/retina/pkg/config" ) -//go:generate go run go.uber.org/mock/mockgen@v0.4.0 -destination=mock/mock_plugin.go -copyright_file=../../lib/ignore_headers.txt -package=mock github.com/microsoft/retina/pkg/plugin/api Plugin - const ( Meter string = "retina-meter" ServiceName string = "retina" ) -// PluginName provides the type for the name of the plugin. -type PluginName string - -// Plugin provides the interface that all Retina eBPF plugins must implement. +// Plugin provides the interface that all Retina plugins must implement. type Plugin interface { - // Name returns the name of the plugin + // Name returns the name of the plugin. Name() string // Generate generates the plugin specific header files. - // This maybe no-op for plugins that don't use eBPF. + // This may be no-op for plugins that don't use eBPF. Generate(ctx context.Context) error // Compile compiles the eBPF code to generate bpf object. - // This maybe no-op for plugins that don't use eBPF. + // This may be no-op for plugins that don't use eBPF. Compile(ctx context.Context) error // Init initializes plugin specific objects. Depend on a given configuration, it may initialize eBPF maps, etc. Init() error @@ -40,3 +32,21 @@ type Plugin interface { // This can be useful for plugins that need to send data to other components for post-processing. SetupChannel(chan *v1.Event) error } + +// PluginFunc is the Constructor func that all PLugins must provide to Register. +type PluginFunc func(*kcfg.Config) Plugin + +// plugins is the centralized list of Retina plugins their New functions to create them. +var plugins = map[string]PluginFunc{} + +func Add(name string, f PluginFunc) { + if _, ok := plugins[name]; ok { + panic("duplicate plugin registration for " + name) + } + plugins[name] = f +} + +func Get(name string) (PluginFunc, bool) { + f, ok := plugins[name] + return f, ok +} diff --git a/pkg/plugin/registry/registry_linux.go b/pkg/plugin/registry/registry_linux.go deleted file mode 100644 index f43b0218f1a..00000000000 --- a/pkg/plugin/registry/registry_linux.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -// Package registry contains the plugin registry for Retina. It is used for plugin registration and instantiation. -package registry - -import ( - kcfg "github.com/microsoft/retina/pkg/config" - - "github.com/microsoft/retina/pkg/plugin/api" - "github.com/microsoft/retina/pkg/plugin/ciliumeventobserver" - "github.com/microsoft/retina/pkg/plugin/dns" - "github.com/microsoft/retina/pkg/plugin/dropreason" - "github.com/microsoft/retina/pkg/plugin/infiniband" - "github.com/microsoft/retina/pkg/plugin/linuxutil" - "github.com/microsoft/retina/pkg/plugin/mockplugin" - "github.com/microsoft/retina/pkg/plugin/packetforward" - "github.com/microsoft/retina/pkg/plugin/packetparser" - "github.com/microsoft/retina/pkg/plugin/tcpretrans" -) - -type NewPluginFn func(*kcfg.Config) api.Plugin - -var PluginHandler map[api.PluginName]NewPluginFn - -func RegisterPlugins() { - PluginHandler = make(map[api.PluginName]NewPluginFn, 500) - PluginHandler[dropreason.Name] = dropreason.New - PluginHandler[packetforward.Name] = packetforward.New - PluginHandler[linuxutil.Name] = linuxutil.New - PluginHandler[infiniband.Name] = infiniband.New - PluginHandler[packetparser.Name] = packetparser.New - PluginHandler[dns.Name] = dns.New - PluginHandler[tcpretrans.Name] = tcpretrans.New - PluginHandler[mockplugin.Name] = mockplugin.New - PluginHandler[ciliumeventobserver.Name] = ciliumeventobserver.New -} diff --git a/pkg/plugin/registry/registry_windows.go b/pkg/plugin/registry/registry_windows.go deleted file mode 100644 index a8e3d648424..00000000000 --- a/pkg/plugin/registry/registry_windows.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. -package registry - -import ( - kcfg "github.com/microsoft/retina/pkg/config" - - "github.com/microsoft/retina/pkg/plugin/api" - "github.com/microsoft/retina/pkg/plugin/windows/hnsstats" - "github.com/microsoft/retina/pkg/plugin/windows/pktmon" -) - -type NewPluginFn func(*kcfg.Config) api.Plugin - -var PluginHandler map[api.PluginName]NewPluginFn - -func RegisterPlugins() { - PluginHandler = make(map[api.PluginName]NewPluginFn, 500) - PluginHandler[hnsstats.Name] = hnsstats.New - PluginHandler[pktmon.Name] = pktmon.New -} diff --git a/pkg/plugin/tcpretrans/tcpretrans_linux.go b/pkg/plugin/tcpretrans/tcpretrans_linux.go index 837d3f0db3f..9895dc78a98 100644 --- a/pkg/plugin/tcpretrans/tcpretrans_linux.go +++ b/pkg/plugin/tcpretrans/tcpretrans_linux.go @@ -19,21 +19,25 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" + "github.com/microsoft/retina/pkg/plugin/registry" "github.com/microsoft/retina/pkg/utils" "go.uber.org/zap" "golang.org/x/sys/unix" ) -func New(cfg *kcfg.Config) api.Plugin { +func init() { + registry.Add(name, New) +} + +func New(cfg *kcfg.Config) registry.Plugin { return &tcpretrans{ cfg: cfg, - l: log.Logger().Named(string(Name)), + l: log.Logger().Named(name), } } func (t *tcpretrans) Name() string { - return string(Name) + return name } func (t *tcpretrans) Generate(ctx context.Context) error { @@ -103,7 +107,7 @@ func (t *tcpretrans) Stop() error { } func (t *tcpretrans) SetupChannel(ch chan *v1.Event) error { - t.l.Warn("SetupChannel is not supported by plugin", zap.String("plugin", string(Name))) + t.l.Warn("SetupChannel is not supported by plugin", zap.String("plugin", name)) return nil } diff --git a/pkg/plugin/tcpretrans/types_linux.go b/pkg/plugin/tcpretrans/types_linux.go index e54a1db0015..b3856bbe3e1 100644 --- a/pkg/plugin/tcpretrans/types_linux.go +++ b/pkg/plugin/tcpretrans/types_linux.go @@ -10,12 +10,9 @@ import ( kcfg "github.com/microsoft/retina/pkg/config" "github.com/microsoft/retina/pkg/enricher" "github.com/microsoft/retina/pkg/log" - "github.com/microsoft/retina/pkg/plugin/api" ) -const ( - Name api.PluginName = "tcpretrans" -) +const name = "tcpretrans" type tcpretrans struct { cfg *kcfg.Config diff --git a/shell/README.md b/shell/README.md new file mode 100644 index 00000000000..1f3033814ca --- /dev/null +++ b/shell/README.md @@ -0,0 +1,14 @@ +# retina-shell + +Retina CLI provides a command to launch an interactive shell in a node or pod for adhoc debugging. + +* The CLI command `kubectl retina shell` creates a pod with `HostNetwork=true` (for node debugging) or an ephemeral container in an existing pod (for pod debugging). +* The container runs an image built from the Dockerfile in this directory. The image is based on Azure Linux and includes commonly-used networking tools. + +For testing, you can override the image used by `retina shell` either with CLI arguments +(`--retina-shell-image-repo` and `--retina-shell-image-version`) or environment variables +(`RETINA_SHELL_IMAGE_REPO` and `RETINA_SHELL_IMAGE_VERSION`). + +Run `kubectl retina shell -h` for full documentation and examples. + +Currently only Linux is supported; Windows support will be added in the future. diff --git a/shell/attach.go b/shell/attach.go new file mode 100644 index 00000000000..ea98d2536eb --- /dev/null +++ b/shell/attach.go @@ -0,0 +1,84 @@ +package shell + +import ( + "context" + "errors" + "fmt" + "os" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/genericiooptions" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/kubectl/pkg/cmd/attach" + "k8s.io/kubectl/pkg/cmd/exec" +) + +func attachToShell(restConfig *rest.Config, namespace, podName, containerName string, pod *v1.Pod) error { + attachOpts := &attach.AttachOptions{ + Config: restConfig, + StreamOptions: exec.StreamOptions{ + Namespace: namespace, + PodName: podName, + ContainerName: containerName, + IOStreams: genericiooptions.IOStreams{ + In: os.Stdin, + Out: os.Stdout, + ErrOut: os.Stderr, + }, + Stdin: true, + TTY: true, + Quiet: true, + }, + Attach: &attach.DefaultRemoteAttach{}, + AttachFunc: attach.DefaultAttachFunc, + Pod: pod, + } + + if err := attachOpts.Run(); err != nil { + return fmt.Errorf("error attaching to shell container: %w", err) + } + + return nil +} + +func waitForContainerRunning(ctx context.Context, timeout time.Duration, clientset *kubernetes.Clientset, namespace, podName, containerName string) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + for { + pod, err := clientset.CoreV1(). + Pods(namespace). + Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return waitTimeoutError(err, timeout, containerName) + } + return fmt.Errorf("error retrieving pod %s in namespace %s: %w", podName, namespace, err) + } + + for i := range pod.Status.ContainerStatuses { + status := pod.Status.ContainerStatuses[i] + if status.Name == containerName && status.State.Running != nil { + return nil + } + } + for i := range pod.Status.EphemeralContainerStatuses { + status := pod.Status.EphemeralContainerStatuses[i] + if status.Name == containerName && status.State.Running != nil { + return nil + } + } + + select { + case <-ctx.Done(): + return waitTimeoutError(context.DeadlineExceeded, timeout, containerName) + case <-time.After(1 * time.Second): + } + } +} + +func waitTimeoutError(err error, timeout time.Duration, containerName string) error { + return fmt.Errorf("timed out after %s waiting for container %s to start. The timeout can be increased by setting --timeout. Err: %w", timeout, containerName, err) +} diff --git a/shell/manifests.go b/shell/manifests.go new file mode 100644 index 00000000000..72a5df0989c --- /dev/null +++ b/shell/manifests.go @@ -0,0 +1,86 @@ +package shell + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilrand "k8s.io/apimachinery/pkg/util/rand" +) + +// convertToCapabilities converts a slice of strings to a slice of v1.Capability +func ephemeralContainerForPodDebug(config Config) v1.EphemeralContainer { + return v1.EphemeralContainer{ + EphemeralContainerCommon: v1.EphemeralContainerCommon{ + Name: randomRetinaShellContainerName(), + Image: config.RetinaShellImage, + Stdin: true, + TTY: true, + SecurityContext: &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + Add: stringSliceToCapabilities(config.Capabilities), + }, + }, + }, + } +} + +func hostNetworkPodForNodeDebug(config Config, debugPodNamespace, nodeName string) *v1.Pod { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: randomRetinaShellContainerName(), + Namespace: debugPodNamespace, + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + RestartPolicy: v1.RestartPolicyNever, + Tolerations: []v1.Toleration{{Operator: v1.TolerationOpExists}}, + HostNetwork: true, + HostPID: config.HostPID, + Containers: []v1.Container{ + { + Name: "retina-shell", + Image: config.RetinaShellImage, + Stdin: true, + TTY: true, + SecurityContext: &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + Add: stringSliceToCapabilities(config.Capabilities), + }, + }, + }, + }, + }, + } + + if config.MountHostFilesystem || config.AllowHostFilesystemWrite { + pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{ + Name: "host-filesystem", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/", + }, + }, + }) + pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, v1.VolumeMount{ + Name: "host-filesystem", + MountPath: "/host", + ReadOnly: !config.AllowHostFilesystemWrite, + }) + } + + return pod +} + +func randomRetinaShellContainerName() string { + const retinaShellContainerNameRandLen = 5 + return "retina-shell-" + utilrand.String(retinaShellContainerNameRandLen) +} + +func stringSliceToCapabilities(ss []string) []v1.Capability { + caps := make([]v1.Capability, 0, len(ss)) + for _, s := range ss { + caps = append(caps, v1.Capability(s)) + } + return caps +} diff --git a/shell/manifests_test.go b/shell/manifests_test.go new file mode 100644 index 00000000000..840d7d2e493 --- /dev/null +++ b/shell/manifests_test.go @@ -0,0 +1,91 @@ +package shell + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" +) + +const testRetinaImage = "retina-shell:v0.0.1" + +func TestEphemeralContainerForPodDebug(t *testing.T) { + ec := ephemeralContainerForPodDebug(Config{RetinaShellImage: testRetinaImage}) + assert.True(t, strings.HasPrefix(ec.Name, "retina-shell-"), "Ephemeral container name does not start with the expected prefix") + assert.Equal(t, testRetinaImage, ec.Image) + assert.Equal(t, []v1.Capability{"ALL"}, ec.SecurityContext.Capabilities.Drop) + assert.Empty(t, ec.SecurityContext.Capabilities.Add) +} + +func TestEphemeralContainerForPodDebugWithCapabilities(t *testing.T) { + ec := ephemeralContainerForPodDebug(Config{ + RetinaShellImage: testRetinaImage, + Capabilities: []string{"NET_RAW", "NET_ADMIN"}, + }) + assert.Equal(t, []v1.Capability{"NET_RAW", "NET_ADMIN"}, ec.SecurityContext.Capabilities.Add) +} + +func TestHostNetworkPodForNodeDebug(t *testing.T) { + config := Config{RetinaShellImage: testRetinaImage} + pod := hostNetworkPodForNodeDebug(config, "kube-system", "node0001") + assert.True(t, strings.HasPrefix(pod.Name, "retina-shell-"), "Pod name does not start with the expected prefix") + assert.Equal(t, "kube-system", pod.Namespace) + assert.Equal(t, "node0001", pod.Spec.NodeName) + assert.Equal(t, v1.RestartPolicyNever, pod.Spec.RestartPolicy) + assert.Equal(t, []v1.Toleration{{Operator: v1.TolerationOpExists}}, pod.Spec.Tolerations) + assert.True(t, pod.Spec.HostNetwork, "Pod does not have host network enabled") + assert.False(t, pod.Spec.HostPID) + assert.Len(t, pod.Spec.Containers, 1) + assert.Equal(t, testRetinaImage, pod.Spec.Containers[0].Image) + assert.Equal(t, []v1.Capability{"ALL"}, pod.Spec.Containers[0].SecurityContext.Capabilities.Drop) + assert.Empty(t, pod.Spec.Containers[0].SecurityContext.Capabilities.Add) + assert.Empty(t, pod.Spec.Volumes) + assert.Empty(t, pod.Spec.Containers[0].VolumeMounts) +} + +func TestHostNetworkPodForNodeDebugWithHostPID(t *testing.T) { + config := Config{ + RetinaShellImage: testRetinaImage, + HostPID: true, + } + pod := hostNetworkPodForNodeDebug(config, "kube-system", "node0001") + assert.True(t, pod.Spec.HostPID, "Pod does not have host PID enabled") +} + +func TestHostNetworkPodForNodeDebugWithCapabilities(t *testing.T) { + config := Config{ + RetinaShellImage: testRetinaImage, + Capabilities: []string{"NET_RAW", "NET_ADMIN"}, + } + pod := hostNetworkPodForNodeDebug(config, "kube-system", "node0001") + assert.Equal(t, []v1.Capability{"NET_RAW", "NET_ADMIN"}, pod.Spec.Containers[0].SecurityContext.Capabilities.Add) +} + +func TestHostNetworkPodForNodeDebugWithMountHostFilesystem(t *testing.T) { + config := Config{ + RetinaShellImage: testRetinaImage, + MountHostFilesystem: true, + } + pod := hostNetworkPodForNodeDebug(config, "kube-system", "node0001") + assert.Len(t, pod.Spec.Volumes, 1) + assert.Equal(t, "host-filesystem", pod.Spec.Volumes[0].Name) + assert.Len(t, pod.Spec.Containers[0].VolumeMounts, 1) + assert.Equal(t, "host-filesystem", pod.Spec.Containers[0].VolumeMounts[0].Name) + assert.Equal(t, "/host", pod.Spec.Containers[0].VolumeMounts[0].MountPath) + assert.True(t, pod.Spec.Containers[0].VolumeMounts[0].ReadOnly) +} + +func TestHostNetworkPodForNodeDebugWithMountHostFilesystemWithWriteAccess(t *testing.T) { + config := Config{ + RetinaShellImage: testRetinaImage, + AllowHostFilesystemWrite: true, + } + pod := hostNetworkPodForNodeDebug(config, "kube-system", "node0001") + assert.Len(t, pod.Spec.Volumes, 1) + assert.Equal(t, "host-filesystem", pod.Spec.Volumes[0].Name) + assert.Len(t, pod.Spec.Containers[0].VolumeMounts, 1) + assert.Equal(t, "host-filesystem", pod.Spec.Containers[0].VolumeMounts[0].Name) + assert.Equal(t, "/host", pod.Spec.Containers[0].VolumeMounts[0].MountPath) + assert.False(t, pod.Spec.Containers[0].VolumeMounts[0].ReadOnly) +} diff --git a/shell/shell.go b/shell/shell.go new file mode 100644 index 00000000000..c85a07338c2 --- /dev/null +++ b/shell/shell.go @@ -0,0 +1,106 @@ +package shell + +import ( + "context" + "fmt" + "os" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// Config is the configuration for starting a shell in a node or pod. +type Config struct { + RestConfig *rest.Config + RetinaShellImage string + HostPID bool + Capabilities []string + Timeout time.Duration + + // Host filesystem access applies only to nodes, not pods. + MountHostFilesystem bool + AllowHostFilesystemWrite bool +} + +// RunInPod starts an interactive shell in a pod by creating and attaching to an ephemeral container. +func RunInPod(config Config, podNamespace, podName string) error { + ctx := context.Background() + + clientset, err := kubernetes.NewForConfig(config.RestConfig) + if err != nil { + return fmt.Errorf("error constructing kube clientset: %w", err) + } + + pod, err := clientset.CoreV1(). + Pods(podNamespace). + Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error retrieving pod %s from namespace %s: %w", podName, podNamespace, err) + } + + err = validateOperatingSystemSupportedForNode(ctx, clientset, pod.Spec.NodeName) + if err != nil { + return fmt.Errorf("error validating operating system for node %s: %w", pod.Spec.NodeName, err) + } + + fmt.Printf("Starting ephemeral container in pod %s/%s\n", podNamespace, podName) + ephemeralContainer := ephemeralContainerForPodDebug(config) + pod.Spec.EphemeralContainers = append(pod.Spec.EphemeralContainers, ephemeralContainer) + + _, err = clientset.CoreV1(). + Pods(podNamespace). + UpdateEphemeralContainers(ctx, podName, pod, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error updating ephemeral containers: %w", err) + } + + if err := waitForContainerRunning(ctx, config.Timeout, clientset, podNamespace, podName, ephemeralContainer.Name); err != nil { + return fmt.Errorf("error waiting for containers running: %w", err) + } + + return attachToShell(config.RestConfig, podNamespace, podName, ephemeralContainer.Name, pod) +} + +// RunInNode starts an interactive shell on a node by creating a HostNetwork pod and attaching to it. +func RunInNode(config Config, nodeName, debugPodNamespace string) error { + ctx := context.Background() + + clientset, err := kubernetes.NewForConfig(config.RestConfig) + if err != nil { + return fmt.Errorf("error constructing kube clientset: %w", err) + } + + err = validateOperatingSystemSupportedForNode(ctx, clientset, nodeName) + if err != nil { + return fmt.Errorf("error validating operating system for node %s: %w", nodeName, err) + } + + pod := hostNetworkPodForNodeDebug(config, debugPodNamespace, nodeName) + + fmt.Printf("Starting host networking pod %s/%s on node %s\n", debugPodNamespace, pod.Name, nodeName) + _, err = clientset.CoreV1(). + Pods(debugPodNamespace). + Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("err creating pod %s in namespace %s: %w", pod.Name, debugPodNamespace, err) + } + + defer func() { + // Best-effort cleanup. + err = clientset.CoreV1(). + Pods(debugPodNamespace). + Delete(ctx, pod.Name, metav1.DeleteOptions{}) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to delete pod %s: %v\n", pod.Name, err) + } + }() + + err = waitForContainerRunning(ctx, config.Timeout, clientset, debugPodNamespace, pod.Name, pod.Spec.Containers[0].Name) + if err != nil { + return err + } + + return attachToShell(config.RestConfig, debugPodNamespace, pod.Name, pod.Spec.Containers[0].Name, pod) +} diff --git a/shell/validation.go b/shell/validation.go new file mode 100644 index 00000000000..fa97f8b15e0 --- /dev/null +++ b/shell/validation.go @@ -0,0 +1,28 @@ +package shell + +import ( + "context" + "errors" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +var errUnsupportedOperatingSystem = errors.New("unsupported OS (retina-shell requires Linux)") + +func validateOperatingSystemSupportedForNode(ctx context.Context, clientset *kubernetes.Clientset, nodeName string) error { + node, err := clientset.CoreV1(). + Nodes(). + Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error retrieving node %s: %w", nodeName, err) + } + + osLabel := node.Labels["kubernetes.io/os"] + if osLabel != "linux" { // Only Linux supported for now. + return errUnsupportedOperatingSystem + } + + return nil +}