Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(conntrack): bpf code for conntrack + conntrack GC #610

Merged
merged 30 commits into from
Aug 29, 2024
Merged
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f68e7b5
feat(conntrack): bpf code for conntrack + conntrack GC
nddq Aug 15, 2024
2b868f3
linter
nddq Aug 15, 2024
0a8280b
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 15, 2024
4e025ab
obj files
nddq Aug 15, 2024
24eb6b4
windows linter
nddq Aug 15, 2024
44ff381
Update conntrack.c
nddq Aug 15, 2024
b3a6e86
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 16, 2024
6c80ec8
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 19, 2024
f891f1d
refactor
nddq Aug 20, 2024
90e22e2
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 20, 2024
59cf80d
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 20, 2024
93d2904
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 21, 2024
b7d4957
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 21, 2024
ed59d6a
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 21, 2024
1b69a2f
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 22, 2024
62fd2f6
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 26, 2024
cfa52da
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 27, 2024
bfbd17b
refactor
nddq Aug 27, 2024
0d14904
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 27, 2024
4b58c6c
merge conflict
nddq Aug 28, 2024
f83d313
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 28, 2024
fe48c1d
address comments
nddq Aug 28, 2024
89bcf08
correct is reply logic
nddq Aug 28, 2024
b7815ae
address Igor's comments
nddq Aug 28, 2024
9d6d3cb
change packet direction to not be enum
nddq Aug 28, 2024
e5df79d
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 29, 2024
0d208bf
addressed feedbacks
nddq Aug 29, 2024
3ac565d
addressed feedbacks
nddq Aug 29, 2024
ec836ef
go generate
nddq Aug 29, 2024
7be54ce
Merge branch 'main' into nddq/feat/conntrackbpf
nddq Aug 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions pkg/bpf/setup_linux.go
Original file line number Diff line number Diff line change
@@ -17,11 +17,11 @@ import (

func __mount() error {
// Check if the path exists.
_, err := os.Stat(plugincommon.FilterMapPath)
_, err := os.Stat(plugincommon.MapPath)
if err != nil {
if os.IsNotExist(err) {
// Path does not exist. Create it.
err = os.MkdirAll(plugincommon.FilterMapPath, 0o755)
err = os.MkdirAll(plugincommon.MapPath, 0o755) // nolint:gomnd // directory permissions
if err != nil {
return err
}
@@ -30,13 +30,13 @@ func __mount() error {
return err
}
}
err = unix.Mount(plugincommon.FilterMapPath, plugincommon.FilterMapPath, "bpf", 0, "")
err = unix.Mount(plugincommon.MapPath, plugincommon.MapPath, "bpf", 0, "")
return err
}

func mountBpfFs() error {
// Check if /sys/fs/bpf is already mounted.
mounted, bpfMount, err := mountinfo.IsMountFS(mountinfo.FilesystemTypeBPFFS, plugincommon.FilterMapPath)
mounted, bpfMount, err := mountinfo.IsMountFS(mountinfo.FilesystemTypeBPFFS, plugincommon.MapPath)
if err != nil {
return err
}
@@ -49,7 +49,7 @@ func mountBpfFs() error {
// Else mounted. Check the type of mount.
if !bpfMount {
// Custom mount of /sys/fs/bpf. Unknown setup. Exit.
return fmt.Errorf("%+s is already mounted but not as bpf. Not supported", plugincommon.FilterMapPath)
return fmt.Errorf("%+s is already mounted but not as bpf. Not supported", plugincommon.MapPath) // nolint:goerr113 // don't have to define a new error for this
}
return nil
}
@@ -59,21 +59,21 @@ func Setup(l *zap.Logger) error {
if err != nil {
return errors.Wrap(err, "failed to mount BPF filesystem")
}
l.Info("BPF filesystem mounted successfully", zap.String("path", plugincommon.FilterMapPath))
l.Info("BPF filesystem mounted successfully", zap.String("path", plugincommon.MapPath))

// Delete existing filter map file.
err = os.Remove(plugincommon.FilterMapPath + "/" + plugincommon.FilterMapName)
err = os.Remove(plugincommon.MapPath + "/" + plugincommon.FilterMapName)
if err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to delete existing filter map file")
}
l.Info("Deleted existing filter map file", zap.String("path", plugincommon.FilterMapPath), zap.String("Map name", plugincommon.FilterMapName))
l.Info("Deleted existing filter map file", zap.String("path", plugincommon.MapPath), zap.String("Map name", plugincommon.FilterMapName))

// Initialize the filter map.
// This will create the filter map in kernel and pin it to /sys/fs/bpf.
_, err = filter.Init()
if err != nil {
return errors.Wrap(err, "failed to initialize filter map")
}
l.Info("Filter map initialized successfully", zap.String("path", plugincommon.FilterMapPath), zap.String("Map name", plugincommon.FilterMapName))
l.Info("Filter map initialized successfully", zap.String("path", plugincommon.MapPath), zap.String("Map name", plugincommon.FilterMapName))
return nil
}
4 changes: 2 additions & 2 deletions pkg/plugin/common/constants.go
Original file line number Diff line number Diff line change
@@ -3,8 +3,8 @@
package common

const (
// FilterMapPath is the path to the BPF filter map.
FilterMapPath = "/sys/fs/bpf"
// MapPath is the path to pinned BPF maps
MapPath = "/sys/fs/bpf"
// FilterMapName is the name of the BPF filter map
FilterMapName = "retina_filter_map"
)
317 changes: 317 additions & 0 deletions pkg/plugin/conntrack/_cprog/conntrack.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include "vmlinux.h"
#include "compiler.h"
#include "bpf_helpers.h"
#include "conntrack.h"


/**
* The structure representing an ipv4 5-tuple key in the connection tracking map.
*/
struct ct_v4_key {
__u32 src_ip;
__u32 dst_ip;
__u16 src_port;
__u16 dst_port;
__u8 proto;
};
/**
* The structure representing a connection in the connection tracking map.
*/
struct ct_entry {
__u32 eviction_time; // eviction_time stores the time when the connection should be evicted from the map.
/**
* last_report_*_dir stores the time when the last packet event was reported in the send and reply direction respectively.
*/
__u32 last_report_tx_dir;
__u32 last_report_rx_dir;
/**
* traffic_direction indicates the direction of the connection in relation to the host.
* If the connection is initiated from within the host, the traffic_direction is egress. Otherwise, the traffic_direction is ingress.
*/
__u8 traffic_direction;
/**
* flags_seen_*_dir stores the flags seen in the send and reply direction respectively.
*/
__u8 flags_seen_tx_dir;
__u8 flags_seen_rx_dir;
bool is_closing; // is_closing indicates if the connection is closing.
};

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct ct_v4_key);
__type(value, struct ct_entry);
__uint(max_entries, CT_MAP_SIZE);
__uint(pinning, LIBBPF_PIN_BY_NAME); // needs pinning so this can be access from other processes .i.e debug cli
} retina_conntrack_map SEC(".maps");


/**
* Helper function to reverse a key.
* @arg reverse_key The key to store the reversed key.
* @arg key The key to be reversed.
*/
static inline void _ct_reverse_key(struct ct_v4_key *reverse_key, const struct ct_v4_key *key) {
if (!reverse_key || !key) {
return;
}
reverse_key->src_ip = key->dst_ip;
reverse_key->dst_ip = key->src_ip;
reverse_key->src_port = key->dst_port;
reverse_key->dst_port = key->src_port;
reverse_key->proto = key->proto;
}

/**
* Returns the traffic direction based on the observation point.
* @arg observation_point The point in the network stack where the packet is observed.
*/
static __always_inline __u8 _ct_get_traffic_direction(__u8 observation_point) {
if (observation_point & (OBSERVATION_POINT_FROM_ENDPOINT | OBSERVATION_POINT_TO_NETWORK)) {
return TRAFFIC_DIRECTION_EGRESS;
} else if (observation_point & (OBSERVATION_POINT_TO_ENDPOINT | OBSERVATION_POINT_FROM_NETWORK)) {
return TRAFFIC_DIRECTION_INGRESS;
} else {
return TRAFFIC_DIRECTION_UNKNOWN;
}
}

/**
* Create a new TCP connection.
* @arg key The key to be used to create the new connection.
* @arg flags The flags of the packet.
* @arg observation_point The point in the network stack where the packet is observed.
* @arg timeout The timeout for the connection.
*/
static __always_inline bool _ct_create_new_tcp_connection(struct ct_v4_key key, __u8 flags, __u8 observation_point, __u64 timeout) {
struct ct_entry new_value;
__builtin_memset(&new_value, 0, sizeof(struct ct_entry));
__u64 now = bpf_mono_now();
// Check for overflow
if (timeout > UINT64_MAX - now) {
return false;
}
new_value.eviction_time = now + timeout;
new_value.flags_seen_tx_dir = flags;
new_value.traffic_direction = _ct_get_traffic_direction(observation_point);
bpf_map_update_elem(&retina_conntrack_map, &key, &new_value, BPF_ANY);
return true;
}

/**
* Create a new UDP connection.
* @arg key The key to be used to create the new connection.
* @arg flags The flags of the packet.
* @arg observation_point The point in the network stack where the packet is observed.
*/
static __always_inline bool _ct_handle_udp_connection(struct ct_v4_key key, __u8 flags, __u8 observation_point) {
struct ct_entry new_value;
__builtin_memset(&new_value, 0, sizeof(struct ct_entry));
__u64 now = bpf_mono_now();
// Check for overflow
if (CT_CONNECTION_LIFETIME_NONTCP > UINT64_MAX - now) {
return false;
}
new_value.eviction_time = now + CT_CONNECTION_LIFETIME_NONTCP;
new_value.flags_seen_tx_dir = flags;
new_value.last_report_tx_dir = now;
new_value.traffic_direction = _ct_get_traffic_direction(observation_point);
bpf_map_update_elem(&retina_conntrack_map, &key, &new_value, BPF_ANY);
return true;
}

/**
* Handle a TCP connection.
* @arg key The key to be used to handle the connection.
* @arg reverse_key The reverse key to be used to handle the connection.
* @arg flags The flags of the packet.
* @arg observation_point The point in the network stack where the packet is observed.
*/
static __always_inline bool _ct_handle_tcp_connection(struct ct_v4_key key, struct ct_v4_key reverse_key, __u8 flags, __u8 observation_point) {
// Check if the packet is a SYN packet.
if (flags & TCP_SYN) {
// Create a new connection with a timeout of CT_SYN_TIMEOUT.
return _ct_create_new_tcp_connection(key, flags, observation_point, CT_SYN_TIMEOUT);
}

// The packet is not a SYN packet and the connection corresponding to this packet is not found.
// This might be because of an ongoing connection that started before Retina started tracking connections.
// Therefore we would have missed the SYN packet. A conntrack entry will be created with best effort.
struct ct_entry new_value;
__builtin_memset(&new_value, 0, sizeof(struct ct_entry));
__u64 now = bpf_mono_now();
// Check for overflow
if (CT_CONNECTION_LIFETIME_TCP > UINT64_MAX - now) {
return false;
}
new_value.eviction_time = now + CT_CONNECTION_LIFETIME_TCP;
new_value.is_closing = (flags & (TCP_FIN | TCP_RST)) != 0x0;
new_value.traffic_direction = _ct_get_traffic_direction(observation_point);

// Check for ACK flag. If the ACK flag is set, the packet is considered as a packet in the reply direction of the connection.
if (flags & TCP_ACK) {
new_value.flags_seen_rx_dir = flags;
new_value.last_report_rx_dir = now;
bpf_map_update_elem(&retina_conntrack_map, &reverse_key, &new_value, BPF_ANY);
} else { // Otherwise, the packet is considered as a packet in the send direction.
new_value.flags_seen_tx_dir = flags;
new_value.last_report_tx_dir = now;
bpf_map_update_elem(&retina_conntrack_map, &key, &new_value, BPF_ANY);
}
return true;
}

/**
* Handle a new connection.
* @arg key The key to be used to handle the connection.
* @arg reverse_key The reverse key to be used to handle the connection.
* @arg flags The flags of the packet.
* @arg observation_point The point in the network stack where the packet is observed.
*/
static __always_inline bool _ct_handle_new_connection(struct ct_v4_key key, struct ct_v4_key reverse_key, __u8 flags, __u8 observation_point) {
if (key.proto & IPPROTO_TCP) {
return _ct_handle_tcp_connection(key, reverse_key, flags, observation_point);
} else if (key.proto & IPPROTO_UDP) {
return _ct_handle_udp_connection(key, flags, observation_point);
} else {
return false; // We are not interested in other protocols.
}
}

/**
* Check if a packet should be reported to userspace. Update the corresponding conntrack entry.
* @arg flags The flags of the packet.
* @arg entry The entry of the connection in Retina's conntrack map.
* @arg direction The direction of the packet in relation to the connection.
* @arg protocol The protocol of the packet (TCP or UDP).
* Returns true if the packet should be reported to userspace. False otherwise.
*/
static __always_inline bool _ct_should_report_packet(struct ct_entry *entry, __u8 flags, __u8 direction, __u8 protocol) {
// Check for null parameters.
if (!entry) {
return false;
}

__u64 now = bpf_mono_now();
__u32 eviction_time = READ_ONCE(entry->eviction_time);
__u8 seen_flags;
__u32 last_report;
if (direction == CT_PACKET_DIR_TX) {
seen_flags = READ_ONCE(entry->flags_seen_tx_dir);
last_report = READ_ONCE(entry->last_report_tx_dir);
} else {
seen_flags = READ_ONCE(entry->flags_seen_rx_dir);
last_report = READ_ONCE(entry->last_report_rx_dir);
}
// OR the seen flags with the new flags.
flags |= seen_flags;

// Check if the connection timed out or if it is a TCP connection and FIN or RST flags are set.
if (now >= eviction_time || (protocol == IPPROTO_TCP && flags & (TCP_FIN | TCP_RST))) {
// The connection is closing or closed. Mark the connection as closing. Update the flags seen and last report time.
WRITE_ONCE(entry->is_closing, true);
if (direction == CT_PACKET_DIR_TX) {
WRITE_ONCE(entry->flags_seen_tx_dir, flags);
WRITE_ONCE(entry->last_report_tx_dir, now);
} else {
WRITE_ONCE(entry->flags_seen_rx_dir, flags);
WRITE_ONCE(entry->last_report_rx_dir, now);
}
return true; // Report the last packet received.
}
// Update the eviction time of the connection.
if (protocol == IPPROTO_TCP) {
// Check for overflow, only update the eviction time if there is no overflow.
if (CT_CONNECTION_LIFETIME_TCP > UINT64_MAX - now) {
return false;
}
WRITE_ONCE(entry->eviction_time, now + CT_CONNECTION_LIFETIME_TCP);
} else {
if (CT_CONNECTION_LIFETIME_NONTCP > UINT64_MAX - now) {
return false;
}
WRITE_ONCE(entry->eviction_time, now + CT_CONNECTION_LIFETIME_NONTCP);
}
// We will only report this packet iff a new flag is seen for the given direction or the report interval has passed.
if (flags != seen_flags || now - last_report >= CT_REPORT_INTERVAL) {
if (direction == CT_PACKET_DIR_TX) {
WRITE_ONCE(entry->flags_seen_tx_dir, flags);
WRITE_ONCE(entry->last_report_tx_dir, now);
} else {
WRITE_ONCE(entry->flags_seen_rx_dir, flags);
WRITE_ONCE(entry->last_report_rx_dir, now);
}
return true;
}
return false;
}

/**
* Process a packet and update the connection tracking map.
* @arg key The key to be used to lookup the connection in the map.
* @arg flags The flags of the packet.
* @arg observation_point The point in the network stack where the packet is observed.
* Returns true if the packet should be report to userspace. False otherwise.
*/
static __always_inline __attribute__((unused)) bool ct_process_packet(struct ct_v4_key key, __u8 flags, __u8 observation_point) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

key can be passed as a pointer like you do in other functions to avoid copying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, my initial plan was to pass in the pointer to the packet into conntrack and then construct the key within ct_process_packet. That way we don't have to do the subsequent function call to get the traffic_direction and is_reply
image
and instead, we can do it within ct_process_packet. However, we will need to relocate the packet struct to conntrack since cycle dependency is not allowed, i think. Curious about your opinion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't understand - why cannot we pass a pointer to they input key here - this function on;y seems to be accessing its values.

Where would we have a cyclical dependency? (Do you mean the eBPF verifier fails on that?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, and i could be wrong, there was some issue with the verifier when passing the key pointer from packetparser to conntrack, i can test it again to verify.
The cyclical dependency is applied to the statement where i wanted to pass the packet pointer from packetparser to conntrack. Since packetparser is already importing conntrack, conntrack can't import packetparser back to get the packet struct def. The reason why I think we should be passing the packet pointer to conntrack is because so that we don't have to do subsequent map look up for is_reply and traffic_direction, but instead set in immediately in ct_process_packet

// Lookup the connection in the map.
struct ct_entry *entry = bpf_map_lookup_elem(&retina_conntrack_map, &key);

// If the connection is found in the send direction, update the connection.
if (entry) {
return _ct_should_report_packet(entry, flags, CT_PACKET_DIR_TX, key.proto);
}

// The connection is not found in the send direction. Check the reply direction by reversing the key.
struct ct_v4_key reverse_key;
__builtin_memset(&reverse_key, 0, sizeof(struct ct_v4_key));
_ct_reverse_key(&reverse_key, &key);

// Lookup the connection in the map based on the reverse key.
entry = bpf_map_lookup_elem(&retina_conntrack_map, &reverse_key);

// If the connection is found based on the reverse key, meaning that the packet is a reply packet to an existing connection.
if (entry) {
return _ct_should_report_packet(entry, flags, CT_PACKET_DIR_RX, key.proto);
}

// If the connection is still not found, the connection is new.
return _ct_handle_new_connection(key, reverse_key, flags, observation_point);
}

/**
* Check if a packet is a reply packet to a connection.
* @arg key The key to be used to check if the packet is a reply packet.
*/
static __always_inline __attribute__((unused)) bool ct_is_reply_packet(struct ct_v4_key key) {
// Lookup the connection in the map.
struct ct_entry *entry = bpf_map_lookup_elem(&retina_conntrack_map, &key);
// We return false here because we found the connection in the send direction
// meaning that the packet is coming from the initiator of the connection and therefore not a reply packet.
return entry == NULL;
}

/**
* Get the traffic direction of a connection.
* @arg key The key to be used to get the traffic direction of the connection.
*/
static __always_inline __attribute__((unused)) __u8 ct_get_traffic_direction(struct ct_v4_key key) {
// Lookup the connection in the map.
struct ct_entry *entry = bpf_map_lookup_elem(&retina_conntrack_map, &key);
if (entry) {
return entry->traffic_direction;
}
// Construct the reverse key.
struct ct_v4_key reverse_key;
__builtin_memset(&reverse_key, 0, sizeof(struct ct_v4_key));
_ct_reverse_key(&reverse_key, &key);
// Lookup the connection in the map based on the reverse key.
entry = bpf_map_lookup_elem(&retina_conntrack_map, &reverse_key);
if (entry) {
return entry->traffic_direction;
}
return TRAFFIC_DIRECTION_UNKNOWN;
}
Loading