diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index ccf557b9c8..2da5453643 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -148,6 +148,7 @@ const ( const ( PACKET_ADD_MEMBERSHIP = 1 PACKET_RX_RING = 5 + PACKET_STATISTICS = 6 PACKET_AUXDATA = 8 PACKET_VERSION = 10 PACKET_HDRLEN = 11 @@ -211,6 +212,15 @@ type Tpacket2Hdr struct { _ [4]uint8 } +// TpacketStats is the statistics for a packet_mmap ring buffer from +// . +// +// +marshal +type TpacketStats struct { + Packets uint32 + Dropped uint32 +} + // TpacketAlignment is the alignment of a frame in a packet_mmap ring buffer // from . const ( diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 4e9e4454c6..e14f6fcf0e 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1869,6 +1869,16 @@ func getSockOptPacket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i default: return nil, syserr.ErrInvalidArgument } + case linux.PACKET_STATISTICS: + var tps tcpip.TpacketStats + if err := ep.GetSockOpt(&tps); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + v := linux.TpacketStats{ + Packets: tps.Packets, + Dropped: tps.Dropped, + } + return &v, nil } return nil, syserr.ErrProtocolNotAvailable } @@ -2785,6 +2795,9 @@ func setSockOptPacket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.PacketMMapVersionOption, int(v))) case linux.PACKET_RESERVE: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.PacketMMapReserveOption, int(v))) case linux.PACKET_ADD_MEMBERSHIP, linux.PACKET_AUXDATA: diff --git a/pkg/sentry/socket/netstack/packetmmap/endpoint.go b/pkg/sentry/socket/netstack/packetmmap/endpoint.go index 04f5804fea..a2c30d3029 100644 --- a/pkg/sentry/socket/netstack/packetmmap/endpoint.go +++ b/pkg/sentry/socket/netstack/packetmmap/endpoint.go @@ -77,8 +77,10 @@ type Endpoint struct { version int headerLen uint32 + received atomicbitops.Uint32 + dropped atomicbitops.Uint32 + stack *stack.Stack - stats *tcpip.TransportEndpointStats wq *waiter.Queue mappingsMu sync.Mutex `state:"nosave"` @@ -96,7 +98,6 @@ func (m *Endpoint) Init(ctx context.Context, opts stack.PacketMMapOpts) error { m.wq = opts.Wq m.cooked = opts.Cooked m.packetEP = opts.PacketEndpoint - m.stats = opts.Stats m.nicID = opts.NICID m.netProto = opts.NetProto m.version = opts.Version @@ -194,6 +195,7 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco if !m.rxRingBuffer.hasRoom() { m.mu.Unlock() m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } m.mu.Unlock() @@ -226,6 +228,7 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco } if netOffset > uint32(^uint16(0)) { m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } dataLength = uint32(pktBuf.Size()) @@ -246,6 +249,7 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco if err != nil || tpStatus != linux.TP_STATUS_KERNEL { m.mu.Unlock() m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } @@ -253,6 +257,7 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco if !ok { m.mu.Unlock() m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } m.rxRingBuffer.incHead() @@ -272,6 +277,7 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco if err := m.rxRingBuffer.writeFrame(slot, hdrView, pktBuf); err != nil { m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } @@ -279,9 +285,10 @@ func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoco defer m.mu.Unlock() if err := m.rxRingBuffer.writeStatus(slot, status); err != nil { m.stack.Stats().DroppedPackets.Increment() + m.dropped.Add(1) return } - m.stats.PacketsReceived.Increment() + m.received.Add(1) m.wq.Notify(waiter.ReadableEvents) } @@ -375,6 +382,16 @@ func (m *Endpoint) Mapped() bool { return m.mapped.Load() != 0 } +// Stats implements stack.PacketMMapEndpoint.Stats. +func (m *Endpoint) Stats() tcpip.TpacketStats { + rcv := m.received.Swap(0) + drop := m.dropped.Swap(0) + return tcpip.TpacketStats{ + Packets: uint32(rcv + drop), + Dropped: uint32(drop), + } +} + func toLinuxPacketType(pktType tcpip.PacketType) uint8 { switch pktType { case tcpip.PacketHost: diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index 77566dd0ae..fd6f501a60 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -203,7 +203,6 @@ type PacketMMapOpts struct { IsRx bool Cooked bool Stack *Stack - Stats *tcpip.TransportEndpointStats Wq *waiter.Queue NICID tcpip.NICID NetProto tcpip.NetworkProtocolNumber @@ -232,6 +231,10 @@ type PacketMMapEndpoint interface { // Readiness returns the events that the endpoint is ready for. Readiness(mask waiter.EventMask) waiter.EventMask + + // Stats returns the statistics for the endpoint that can be used for + // getsockopt(PACKET_STATISTICS). + Stats() tcpip.TpacketStats } // UnknownDestinationPacketDisposition enumerates the possible return values from diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 6d516d3ab7..d43fcba1f0 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1205,6 +1205,17 @@ type TpacketReq struct { func (*TpacketReq) isSettableSocketOption() {} +// TpacketStats is the statistics for a packet_mmap ring buffer from +// . +// +// +stateify savable +type TpacketStats struct { + Packets uint32 + Dropped uint32 +} + +func (*TpacketStats) isGettableSocketOption() {} + // EndpointState represents the state of an endpoint. type EndpointState uint8 diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index f8cfdc2c7e..6dc21a80bb 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -448,8 +448,19 @@ func (ep *endpoint) UpdateLastError(err tcpip.Error) { } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error { - return &tcpip.ErrNotSupported{} +func (ep *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { + ep.mu.RLock() + defer ep.mu.RUnlock() + switch opt.(type) { + case *tcpip.TpacketStats: + if ep.packetMMapEp == nil { + return nil + } + *(opt.(*tcpip.TpacketStats)) = ep.packetMMapEp.Stats() + return nil + default: + return &tcpip.ErrUnknownProtocolOption{} + } } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. @@ -577,7 +588,6 @@ func (ep *endpoint) GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) stack.Pa IsRx: isRx, Cooked: ep.cooked, Stack: ep.stack, - Stats: &ep.stats, Wq: ep.waiterQueue, NICID: ep.boundNIC, NetProto: ep.boundNetProto, diff --git a/test/syscalls/linux/packet_mmap.cc b/test/syscalls/linux/packet_mmap.cc index 716e5878cf..d0c9e2e8db 100644 --- a/test/syscalls/linux/packet_mmap.cc +++ b/test/syscalls/linux/packet_mmap.cc @@ -654,6 +654,65 @@ TEST(PacketMmapTest, PacketReserve) { TPACKET_ALIGN(sizeof(tpacket2_hdr) + sizeof(sockaddr_ll) + 16) + reserve); } +TEST(PacketMmapTest, PacketStatistics) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, 0), SyscallFailsWithErrno(EPERM)); + GTEST_SKIP() << "Missing packet socket capability"; + } + sockaddr_ll bind_addr = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_IP), + .sll_ifindex = ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()), + .sll_halen = ETH_ALEN, + }; + FileDescriptor mmap_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_DGRAM, 0)); + + uint32_t tp_frame_size = 65536 + 128; + uint32_t tp_block_size = tp_frame_size * 32; + uint32_t tp_block_nr = 2; + uint32_t tp_frame_nr = (tp_block_size * tp_block_nr) / tp_frame_size; + tpacket_req req = { + .tp_block_size = tp_block_size, + .tp_block_nr = tp_block_nr, + .tp_frame_size = tp_frame_size, + .tp_frame_nr = tp_frame_nr, + }; + void* ring = ASSERT_NO_ERRNO_AND_VALUE(MakePacketMmapRing( + mmap_sock.get(), reinterpret_cast(&bind_addr), + sizeof(bind_addr), &req, TPACKET_V2)); + auto ring_cleanup = Cleanup([ring, tp_block_size, tp_block_nr] { + ASSERT_THAT(munmap(ring, tp_block_size * tp_block_nr), SyscallSucceeds()); + }); + + std::string kMessage = "123abc"; + for (uint32_t i = 0; i < tp_frame_nr; i++) { + ASSERT_THAT( + sendto(mmap_sock.get(), kMessage.c_str(), kMessage.size(), + 0 /* flags */, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + } + // After sending tp_frame_nr packets the buffer is full and all future sent + // packets will be dropped. + int expected_dropped = 20; + for (int i = 0; i < expected_dropped; i++) { + ASSERT_THAT( + sendto(mmap_sock.get(), kMessage.c_str(), kMessage.size(), + 0 /* flags */, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + } + + struct tpacket_stats stats; + socklen_t stats_len = sizeof(stats); + EXPECT_THAT(getsockopt(mmap_sock.get(), SOL_PACKET, PACKET_STATISTICS, &stats, + &stats_len), + SyscallSucceeds()); + EXPECT_EQ(stats.tp_drops, expected_dropped); + EXPECT_EQ(stats.tp_packets, tp_frame_nr + expected_dropped); +} + } // namespace } // namespace testing } // namespace gvisor