Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: conntrack for data aggregation #283

Closed
wants to merge 46 commits into from
Closed
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
3f41d61
feat: conntrack POC
nddq Jul 29, 2024
56a6438
add stubs obj files
nddq Jul 29, 2024
f73dad8
update
nddq Jul 30, 2024
45e09ad
Merge branch 'main' into feat/conntrack
nddq Jul 30, 2024
fe5c440
save progress
nddq Aug 1, 2024
df5822d
Merge branch 'main' into feat/conntrack
nddq Aug 1, 2024
a4bccdd
save progress
nddq Aug 1, 2024
bd97f1f
save progress
nddq Aug 2, 2024
a69ab6f
update
nddq Aug 2, 2024
09afda7
remove conntrack_windows
nddq Aug 2, 2024
101de97
add lru cache for testing
nddq Aug 2, 2024
982a514
test isReply
nddq Aug 4, 2024
17436c1
linter
nddq Aug 4, 2024
67583e3
Merge branch 'main' into feat/conntrack
nddq Aug 5, 2024
31e312d
update bpf code
nddq Aug 5, 2024
204e700
o files for testig
nddq Aug 6, 2024
f3729e1
Merge branch 'main' into feat/conntrack
nddq Aug 6, 2024
5f66bce
formatting
nddq Aug 6, 2024
d416c81
add UTs
nddq Aug 7, 2024
b5dc3d6
revert
nddq Aug 8, 2024
11d4dc7
Merge branch 'main' into feat/conntrack
nddq Aug 8, 2024
c4f8161
remove lru cache
nddq Aug 8, 2024
460419b
Merge branch 'main' into feat/conntrack
nddq Aug 9, 2024
b69c78a
Merge branch 'main' into feat/conntrack
nddq Aug 12, 2024
fbac73e
add dataAggregationLevel toggle for conntrack
nddq Aug 12, 2024
e8c044e
init container gets retina config
nddq Aug 12, 2024
b54150f
Merge branch 'main' into feat/conntrack
nddq Aug 12, 2024
d88b4fd
fix ut
nddq Aug 13, 2024
83d1e3c
fix ut fr
nddq Aug 13, 2024
a07f630
Merge branch 'main' into feat/conntrack
nddq Aug 13, 2024
ac4bc5c
Update daemonset.yaml
nddq Aug 13, 2024
be93dbd
conntrack no longer a plugin
nddq Aug 14, 2024
e79189e
Merge branch 'main' into feat/conntrack
nddq Aug 14, 2024
8f5a7a4
conntrack keep track of traffic direction
nddq Aug 15, 2024
606190f
Merge branch 'main' into feat/conntrack
nddq Aug 15, 2024
fc46905
windows linter
nddq Aug 15, 2024
149a912
formatting
nddq Aug 15, 2024
95eda10
Merge branch 'main' into feat/conntrack
nddq Aug 15, 2024
c0024f9
Merge branch 'main' into feat/conntrack
nddq Aug 16, 2024
987571a
streamline conntrack
nddq Aug 19, 2024
563f467
Merge branch 'main' into feat/conntrack
nddq Aug 19, 2024
86b3f94
add libbpf components
nddq Aug 19, 2024
eee2e03
fix compile
nddq Aug 19, 2024
28012f7
last report for forward and reply direction individually
nddq Aug 19, 2024
ae1182b
conntrack keeps track of packets and bytes count per direction per flow
nddq Aug 19, 2024
532173d
Merge branch 'main' into feat/conntrack
nddq Aug 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cmd/hubble/daemon_linux.go
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@ import (
"github.com/microsoft/retina/pkg/config"
"github.com/microsoft/retina/pkg/managers/pluginmanager"
"github.com/microsoft/retina/pkg/managers/servermanager"
"github.com/microsoft/retina/pkg/plugin/conntrack"

retinak8s "github.com/microsoft/retina/pkg/k8s"

@@ -91,6 +92,31 @@ var (
},
)
}),
// Start the conntrack if high data aggregation level
cell.Invoke(func(l logrus.FieldLogger, lifecycle cell.Lifecycle, cfg config.Config) {
if cfg.DataAggregationLevel == config.High {
ct := conntrack.New(&cfg)
var wp *workerpool.WorkerPool
lifecycle.Append(
cell.Hook{
OnStart: func(cell.HookContext) error {
wp = workerpool.New(1)
l.Info("starting conntrack")
if err := wp.Submit("conntrack", ct.Run); err != nil {
return errors.Wrap(err, "failed to submit conntrack to workerpool")
}
return nil
},
OnStop: func(cell.HookContext) error {
if err := wp.Close(); err != nil {
return errors.Wrap(err, "failed to close conntrack workerpool")
}
return nil
},
},
)
}
}),
cell.Invoke(newDaemonPromise),
)
)
12 changes: 12 additions & 0 deletions cmd/legacy/daemon.go
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ import (
pc "github.com/microsoft/retina/pkg/controllers/daemon/pod"
kec "github.com/microsoft/retina/pkg/controllers/daemon/retinaendpoint"
sc "github.com/microsoft/retina/pkg/controllers/daemon/service"
"github.com/microsoft/retina/pkg/plugin/conntrack"

"github.com/microsoft/retina/pkg/enricher"
"github.com/microsoft/retina/pkg/log"
@@ -270,6 +271,17 @@ func (d *Daemon) Start() error {
mainLogger.Fatal("unable to create metricsConfigController", zap.Error(err))
}
}

// If data aggregation level is high, run conntrack.
if daemonConfig.DataAggregationLevel == config.High {
ct := conntrack.New(daemonConfig)
go func() {
if err := ct.Run(ctx); err != nil {
mainLogger.Error("failed to run conntrack", zap.Error(err))
}
}()
}

}

controllerMgr, err := cm.NewControllerManager(daemonConfig, cl, tel)
Original file line number Diff line number Diff line change
@@ -33,13 +33,18 @@ spec:
- name: retina-agent-init
image: {{ .Values.agent.init.repository }}:{{ .Values.agent.init.tag }}
imagePullPolicy: {{ .Values.agent.pullPolicy }}
args:
- --config
- "/retina/config/config.yaml"
terminationMessagePolicy: FallbackToLogsOnError
securityContext:
privileged: true
volumeMounts:
- name: bpf
mountPath: /sys/fs/bpf
mountPropagation: Bidirectional
- name: config
mountPath: /retina/config
- name: varrun
mountPath: /var/run
mountPropagation: Bidirectional
Original file line number Diff line number Diff line change
@@ -30,13 +30,18 @@ spec:
- name: init-retina
image: {{ .Values.image.initRepository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- --config
- "/retina/config/config.yaml"
terminationMessagePolicy: FallbackToLogsOnError
securityContext:
privileged: true
volumeMounts:
- name: bpf
mountPath: /sys/fs/bpf
mountPropagation: Bidirectional
- name: config
mountPath: /retina/config
containers:
- name: {{ include "retina.name" . }}
livenessProbe:
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ image:
enablePodLevel: false
remoteContext: false
enableAnnotations: false
bypassLookupIPOfInterest: false
bypassLookupIPOfInterest: true
dataAggregationLevel: "low"

imagePullSecrets: []
4 changes: 2 additions & 2 deletions docs/concepts/dataAggregation.md
Original file line number Diff line number Diff line change
@@ -3,5 +3,5 @@
Under Retina's hood, data are communicate between plugins and the control plane via [`Flow` objects](https://github.com/cilium/cilium/tree/main/api/v1/flow). Retina's data aggregation settings are designed to manage the amount of data that can be potentially generate by the agent .i.e the number of `flows` being generated. At a higher aggregation level, fewer `flows` objects are produced, which ensures resource efficiency in large clusters. Conversely, a lower level of aggregation results in more `flow` objects being generated, offering more detailed information regarding packets being observed at different points in the Linux kernel.The operational behaviors of Retina at each aggregation level are detailed in the table below:
| Level | Description|
|--- |--- |
| `low` | `packetparser` will attach a bpf program to the node's default interface in the node namespace, which will help capture metrics for `TO_NETWORK` and `FROM_NETWORK` packets. This will give users a more granular view of packet flows and offers more reliable apiserver latency metrics. |
| `high` | `packetparser` will not attach a bpf program to the node's default interface in the node namespace. As a result, packet observation at this location will be disabled, leading to a reduction in metrics being generated. This configuration is recommended when scalability is the primary concern. However, it is important to note that, due to the absence of packet observation at the default interface, the apiserver latency metrics may not be as reliable. |
| `low` | - `packetparser` will attach a bpf program to the node's default interface in the node namespace, which will help capture metrics for `TO_NETWORK` and `FROM_NETWORK` packets. This will give users a more granular view of packet flows and offers more reliable apiserver latency metrics. `conntrack` will not be enabled.|
| `high` | `packetparser` will not attach a bpf program to the node's default interface in the node namespace. As a result, packet observation at this location will be disabled, leading to a reduction in metrics being generated. This configuration is recommended when scalability is the primary concern. However, it is important to note that, due to the absence of packet observation at the default interface, the apiserver latency metrics may not be as reliable. `conntrack` will be enabled. |
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -125,7 +125,7 @@ require (
github.com/hashicorp/go-immutable-radix/v2 v2.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/hashicorp/hcl v1.0.1-vault-5 // indirect
github.com/hashicorp/serf v0.10.1 // indirect
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -478,8 +478,8 @@ github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.1-vault-5 h1:kI3hhbbyzr4dldA8UdTb7ZlVVlI2DACdCfz31RPDgJM=
24 changes: 16 additions & 8 deletions init/retina/main_linux.go
Original file line number Diff line number Diff line change
@@ -4,9 +4,12 @@
package main

import (
"flag"

"github.com/microsoft/retina/internal/buildinfo"
"github.com/microsoft/retina/pkg/bpf"
"github.com/microsoft/retina/pkg/ciliumfs"
"github.com/microsoft/retina/pkg/config"
"github.com/microsoft/retina/pkg/log"
"github.com/microsoft/retina/pkg/telemetry"
"go.uber.org/zap"
@@ -15,9 +18,20 @@ import (
func main() {
// Initialize logger
opts := log.GetDefaultLogOpts()
zl, err := log.SetupZapLogger(opts)
if err != nil {
panic(err)
}
l := zl.Named("init-retina").With(zap.String("version", buildinfo.Version))

// Enable telemetry if applicationInsightsID is provided
if buildinfo.ApplicationInsightsID != "" {
configPath := flag.String("config", "/retina/config/config.yaml", "path to the config file")
cfg, err := config.GetConfig(*configPath)
if err != nil {
l.Fatal("failed to get config", zap.Error(err))
}

// Enable telemetry if applicationInsightsID is provided and telemetry is enabled
if buildinfo.ApplicationInsightsID != "" && cfg.EnableTelemetry {
opts.EnableTelemetry = true
opts.ApplicationInsightsID = buildinfo.ApplicationInsightsID
// Initialize application insights
@@ -26,12 +40,6 @@ func main() {
defer telemetry.TrackPanic()
}

zl, err := log.SetupZapLogger(opts)
if err != nil {
panic(err)
}
l := zl.Named("init-retina").With(zap.String("version", buildinfo.Version))

// Setup BPF
bpf.Setup(l)

39 changes: 23 additions & 16 deletions pkg/bpf/setup_linux.go
Original file line number Diff line number Diff line change
@@ -4,51 +4,51 @@
package bpf

import (
"fmt"
"os"

"github.com/cilium/cilium/pkg/mountinfo"
plugincommon "github.com/microsoft/retina/pkg/plugin/common"
"github.com/microsoft/retina/pkg/plugin/filter"
"github.com/pkg/errors"
"go.uber.org/zap"
"golang.org/x/sys/unix"
)

func __mount() error {
func mount() error {
// Check if the path exists.
_, err := os.Stat(plugincommon.FilterMapPath)
_, err := os.Stat(plugincommon.MapPath)
if err != nil {
if os.IsNotExist(err) {
// Path does not exist. Create it.
err = os.MkdirAll(plugincommon.FilterMapPath, 0o755)
err = os.MkdirAll(plugincommon.MapPath, 0o755) //nolint:gomnd // 0o755 is the permission.
if err != nil {
return err
return errors.Wrap(err, "failed to create bpf filesystem path")
}
} else {
// Some other error. Return.
return err
return errors.Wrap(err, "failed to stat bpf filesystem path")
}
}
err = unix.Mount(plugincommon.FilterMapPath, plugincommon.FilterMapPath, "bpf", 0, "")
return err
err = unix.Mount(plugincommon.MapPath, plugincommon.MapPath, "bpf", 0, "")
return errors.Wrap(err, "failed to mount bpf filesystem")
}

func mountBpfFs() error {
// Check if /sys/fs/bpf is already mounted.
mounted, bpfMount, err := mountinfo.IsMountFS(mountinfo.FilesystemTypeBPFFS, plugincommon.FilterMapPath)
mounted, bpfMount, err := mountinfo.IsMountFS(mountinfo.FilesystemTypeBPFFS, plugincommon.MapPath)
if err != nil {
return err
return errors.Wrap(err, "failed to check if bpf filesystem is mounted")
}
if !mounted {
if err := __mount(); err != nil {
if err := mount(); err != nil {
return err
}
return nil
}
// Else mounted. Check the type of mount.
if !bpfMount {
// Custom mount of /sys/fs/bpf. Unknown setup. Exit.
return fmt.Errorf("%+s is already mounted but not as bpf. Not supported", plugincommon.FilterMapPath)
return errors.New("bpf filesystem is mounted but not as bpf type")
}
return nil
}
@@ -58,20 +58,27 @@ func Setup(l *zap.Logger) {
if err != nil {
l.Panic("Failed to mount bpf filesystem", zap.Error(err))
}
l.Info("BPF filesystem mounted successfully", zap.String("path", plugincommon.FilterMapPath))
l.Info("BPF filesystem mounted successfully", zap.String("path", plugincommon.MapPath))

// Delete existing filter map file.
err = os.Remove(plugincommon.FilterMapPath + "/" + plugincommon.FilterMapName)
err = os.Remove(plugincommon.MapPath + "/" + plugincommon.FilterMapName)
if err != nil && !os.IsNotExist(err) {
l.Panic("Failed to delete existing filter map file", zap.Error(err))
}
l.Info("Deleted existing filter map file", zap.String("path", plugincommon.FilterMapPath), zap.String("Map name", plugincommon.FilterMapName))
l.Info("Deleted existing filter map file", zap.String("path", plugincommon.MapPath), zap.String("Map name", plugincommon.FilterMapName))

// Delete existing conntrack map file.
err = os.Remove(plugincommon.MapPath + "/" + plugincommon.ConntrackMapName)
if err != nil && !os.IsNotExist(err) {
l.Panic("Failed to delete existing conntrack map file", zap.Error(err))
}
l.Info("Deleted existing conntrack map file", zap.String("path", plugincommon.MapPath), zap.String("Map name", plugincommon.ConntrackMapName))

// Initialize the filter map.
// This will create the filter map in kernel and pin it to /sys/fs/bpf.
_, err = filter.Init()
if err != nil {
l.Panic("Failed to initialize filter map", zap.Error(err))
}
l.Info("Filter map initialized successfully", zap.String("path", plugincommon.FilterMapPath), zap.String("Map name", plugincommon.FilterMapName))
l.Info("Filter map initialized successfully", zap.String("path", plugincommon.MapPath), zap.String("Map name", plugincommon.FilterMapName))
}
3 changes: 0 additions & 3 deletions pkg/hubble/parser/layer34/parser_linux.go
Original file line number Diff line number Diff line change
@@ -51,9 +51,6 @@ func (p *Parser) Decode(f *flow.Flow) *flow.Flow {
f.Source = p.ep.Decode(sourceIP)
f.Destination = p.ep.Decode(destIP)

// Add IsReply to flow.
p.decodeIsReply(f)

// Add L34 Summary to flow.
p.decodeSummary(f)

28 changes: 28 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -165,6 +165,34 @@ func InitializeMetrics() {
utils.InterfaceName,
)

FlowPacketsForward = exporter.CreatePrometheusGaugeVecForMetric(
exporter.DefaultRegistry,
utils.FlowPacketsForwardGaugeName,
flowPacketForwardDescription,
utils.FlowsGaugeLabels...,
)

FlowPacketsReply = exporter.CreatePrometheusGaugeVecForMetric(
exporter.DefaultRegistry,
utils.FlowPacketsReplyGaugeName,
flowPacketReplyDescription,
utils.FlowsGaugeLabels...,
)

FlowBytesForward = exporter.CreatePrometheusGaugeVecForMetric(
exporter.DefaultRegistry,
utils.FlowBytesForwardGaugeName,
flowBytesForwardDescription,
utils.FlowsGaugeLabels...,
)

FlowBytesReply = exporter.CreatePrometheusGaugeVecForMetric(
exporter.DefaultRegistry,
utils.FlowBytesReplyGaugeName,
flowBytesReplyDescription,
utils.FlowsGaugeLabels...,
)

isInitialized = true
metricsLogger.Info("Metrics initialized")
}
10 changes: 10 additions & 0 deletions pkg/metrics/types.go
Original file line number Diff line number Diff line change
@@ -41,6 +41,11 @@ const (
infinibandCounterStatsDescription = "InfiniBand Counter Statistics"
infinibandStatusParamsDescription = "InfiniBand Status Parameters"

flowPacketForwardDescription = "Number of forward packets per flow"
flowPacketReplyDescription = "Number of reply packets per flow"
flowBytesForwardDescription = "Number of forward bytes per flow"
flowBytesReplyDescription = "Number of reply bytes per flow"

// Control plane metrics
pluginManagerFailedToReconcileCounterDescription = "Number of times the plugin manager failed to reconcile the plugins"
lostEventsCounterDescription = "Number of events lost in control plane"
@@ -91,6 +96,11 @@ var (

InfinibandCounterStats IGaugeVec
InfinibandStatusParams IGaugeVec

FlowPacketsForward IGaugeVec
FlowPacketsReply IGaugeVec
FlowBytesForward IGaugeVec
FlowBytesReply IGaugeVec
)

func ToPrometheusType(metric interface{}) prometheus.Collector {
6 changes: 4 additions & 2 deletions pkg/plugin/common/constants.go
Original file line number Diff line number Diff line change
@@ -3,8 +3,10 @@
package common

const (
// FilterMapPath is the path to the BPF filter map.
FilterMapPath = "/sys/fs/bpf"
// MapPath is the path to the BPF filter map.
MapPath = "/sys/fs/bpf"
// FilterMapName is the name of the BPF filter map
FilterMapName = "retina_filter_map"
// ConntrackMapName is the name of the BPF conntrack map
ConntrackMapName = "retina_conntrack_map"
)
Loading