Skip to content

Commit

Permalink
Merge pull request #401 from cjdoucette/remove_l3_cksum
Browse files Browse the repository at this point in the history
Remove dependencies on IPv4 hardware checksums
  • Loading branch information
AltraMayor authored Mar 31, 2020
2 parents 7118d27 + 9a4c15c commit b524b06
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 41 deletions.
8 changes: 2 additions & 6 deletions gk/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1050,14 +1050,10 @@ xmit_icmp(struct gatekeeper_if *iface, struct ipacket *packet,
icmp_ipv4->dst_addr = packet->flow.f.v4.src.s_addr;
icmp_ipv4->total_length = rte_cpu_to_be_16(pkt->data_len -
iface->l2_len_out);
/*
* The IP header checksum filed must be set to 0
* in order to offload the checksum calculation.
*/
icmp_ipv4->hdr_checksum = 0;

pkt->l2_len = iface->l2_len_out;
pkt->l3_len = sizeof(struct rte_ipv4_hdr);
pkt->ol_flags |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
set_ipv4_checksum(iface, pkt, icmp_ipv4);

icmph = (struct rte_icmp_hdr *)&icmp_ipv4[1];
icmph->icmp_type = ICMP_TIME_EXCEEDED;
Expand Down
20 changes: 10 additions & 10 deletions gt/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,9 +639,8 @@ decap_and_fill_eth(struct rte_mbuf *m, struct gt_config *gt_conf,
IPTOS_ECN_CE) &&
(pkt_info->outer_ecn == IPTOS_ECN_CE)) {
inner_ipv4_hdr->type_of_service |= IPTOS_ECN_CE;
inner_ipv4_hdr->hdr_checksum = 0;
m->l3_len = ipv4_hdr_len(inner_ipv4_hdr);
m->ol_flags |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
set_ipv4_checksum(&gt_conf->net->front, m, inner_ipv4_hdr);
}

neigh = &instance->neigh;
Expand Down Expand Up @@ -776,15 +775,13 @@ fill_notify_pkt_hdr(struct rte_mbuf *notify_pkt,
*/
notify_ipv4->dst_addr = ipv4_hdr->src_addr;

/*
* The IP header checksum filed must be set to 0
* in order to offload the checksum calculation.
*/
notify_ipv4->hdr_checksum = 0;
notify_pkt->ol_flags |= (PKT_TX_IPV4 | PKT_TX_IP_CKSUM);
notify_pkt->l3_len = sizeof(struct rte_ipv4_hdr);
notify_pkt->ol_flags |= PKT_TX_IPV4;

/* IPv4 checksum set in prep_notify_pkt(). */

if (likely(iface->ipv4_hw_udp_cksum))
notify_pkt->ol_flags |= PKT_TX_UDP_CKSUM;
notify_pkt->l3_len = sizeof(struct rte_ipv4_hdr);
} else if (likely(ethertype == RTE_ETHER_TYPE_IPV6)) {
struct rte_ipv6_hdr *ipv6_hdr =
(struct rte_ipv6_hdr *)pkt_info->outer_l3_hdr;
Expand All @@ -799,10 +796,11 @@ fill_notify_pkt_hdr(struct rte_mbuf *notify_pkt,
rte_memcpy(notify_ipv6->dst_addr, ipv6_hdr->src_addr,
sizeof(notify_ipv6->dst_addr));

notify_pkt->l3_len = sizeof(struct rte_ipv6_hdr);
notify_pkt->ol_flags |= PKT_TX_IPV6;

if (likely(iface->ipv6_hw_udp_cksum))
notify_pkt->ol_flags |= PKT_TX_UDP_CKSUM;
notify_pkt->l3_len = sizeof(struct rte_ipv6_hdr);
}

/* Fill up the UDP header. */
Expand Down Expand Up @@ -945,6 +943,8 @@ prep_notify_pkt(struct ggu_notify_pkt *ggu_pkt, struct gatekeeper_if *iface)
notify_ipv4->total_length = rte_cpu_to_be_16(
ggu_pkt->buf->data_len - ggu_pkt->buf->l2_len);

set_ipv4_checksum(iface, ggu_pkt->buf, notify_ipv4);

notify_udp = (struct rte_udp_hdr *)&notify_ipv4[1];
notify_udp->dgram_len = dgram_len_be;
if (likely(iface->ipv4_hw_udp_cksum)) {
Expand Down
26 changes: 26 additions & 0 deletions include/gatekeeper_net.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ struct gatekeeper_if {
*/
bool ipv6_hw_udp_cksum;

/* Whether IPv4 (L3) checksums should be enabled in hardware. */
bool ipv4_hw_cksum;

/*
* The fields below are for internal use.
* Configuration files should not refer to them.
Expand Down Expand Up @@ -497,6 +500,29 @@ hw_filter_eth_available(const struct gatekeeper_if *iface)
return iface->hw_filter_eth && iface->rss;
}

/*
* Compute the IPv4 checksum, either in hardware or software, depending
* on the capabilities of the NIC and the configuration.
*
* The pkt->l2_len and pkt->l3_len must be set before calling this function,
* although this is strictly only needed for hardware checksums.
*/
static inline void
set_ipv4_checksum(struct gatekeeper_if *iface, struct rte_mbuf *pkt,
struct rte_ipv4_hdr *ipv4)
{
/*
* The IP header checksum field must be set to 0 before
* computing the checksum (in hardware or software).
*/
ipv4->hdr_checksum = 0;
pkt->ol_flags |= PKT_TX_IPV4;
if (likely(iface->ipv4_hw_cksum))
pkt->ol_flags |= PKT_TX_IP_CKSUM;
else
ipv4->hdr_checksum = rte_ipv4_cksum(ipv4);
}

static inline int
max_prefix_len(int ip_type)
{
Expand Down
9 changes: 1 addition & 8 deletions lib/ipip.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,8 @@ encapsulate(struct rte_mbuf *pkt, uint8_t priority,
outer_ip4hdr->total_length =
rte_cpu_to_be_16(pkt->pkt_len - iface->l2_len_out);

/*
* The IP header checksum filed must be set to 0
* in order to offload the checksum calculation.
*/
outer_ip4hdr->hdr_checksum = 0;

pkt->l3_len = sizeof(struct rte_ipv4_hdr);
/* Offload checksum computation for the outer IPv4 header. */
pkt->ol_flags |= (PKT_TX_IPV4 | PKT_TX_IP_CKSUM);
set_ipv4_checksum(iface, pkt, outer_ip4hdr);
} else if (likely(gt_addr->proto == RTE_ETHER_TYPE_IPV6)) {
struct rte_ipv6_hdr *inner_ip6hdr;

Expand Down
25 changes: 18 additions & 7 deletions lib/net.c
Original file line number Diff line number Diff line change
Expand Up @@ -870,11 +870,11 @@ check_port_cksum(struct gatekeeper_if *iface, unsigned int port_idx,
if ((port_conf->txmode.offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) &&
!(dev_info->tx_offload_capa &
DEV_TX_OFFLOAD_IPV4_CKSUM)) {
G_LOG(NOTICE, "net: port %hu (%s) on the %s interface doesn't support offloading IPv4 checksumming\n",
G_LOG(NOTICE, "net: port %hu (%s) on the %s interface doesn't support offloading IPv4 checksumming; will use software IPv4 checksums\n",
iface->ports[port_idx], iface->pci_addrs[port_idx],
iface->name);
port_conf->txmode.offloads &= ~DEV_TX_OFFLOAD_IPV4_CKSUM;
return -1;
iface->ipv4_hw_cksum = false;
}

if ((port_conf->txmode.offloads & DEV_TX_OFFLOAD_UDP_CKSUM) &&
Expand Down Expand Up @@ -941,13 +941,24 @@ check_port_offloads(struct gatekeeper_if *iface,
/*
* Set up checksumming.
*
* If IPv4 is supported, both Grantor and Gatekeeper
* need to support IPv4 checksumming in hardware.
* Gatekeeper and Grantor do IPv4 checksumming in hardware,
* if available.
*
* Grantor also does UDP checksumming in hardware, if available.
*
* In both cases, we set up the devices to assume that
* IPv4 and UDP checksumming are supported unless querying
* the device in check_port_cksum() shows otherwise.
*
* Grantor uses UDP checksumming if available. Assume
* UDP checksumming is supported until shown otherwise.
* Note that the IPv4 checksum field is only computed over
* the IPv4 header and the UDP checksum is computed over an IPv4
* pseudoheader (i.e. not the direct bytes of the IPv4 header).
* Therefore, even though offloading checksum computations can cause
* checksum fields to be set to 0 or an intermediate value during
* processing, the IPv4 and UDP checksum operations do not overlap,
* and can be configured as hardware or software independently.
*/
if (ipv4_if_configured(iface))
if (ipv4_if_configured(iface) && iface->ipv4_hw_cksum)
port_conf->txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
if (!config.back_iface_enabled &&
(iface->ipv4_hw_udp_cksum || iface->ipv6_hw_udp_cksum))
Expand Down
7 changes: 1 addition & 6 deletions lls/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,14 +371,9 @@ xmit_icmp_reply(struct gatekeeper_if *iface, struct rte_mbuf *pkt)
icmp_ipv4->src_addr = icmp_ipv4->dst_addr;
icmp_ipv4->dst_addr = ip_addr_tmp;

/*
* The IP header checksum filed must be set to 0
* in order to offload the checksum calculation.
*/
icmp_ipv4->hdr_checksum = 0;
pkt->l2_len = iface->l2_len_out;
pkt->l3_len = ipv4_hdr_len(icmp_ipv4);
pkt->ol_flags |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
set_ipv4_checksum(iface, pkt, icmp_ipv4);

if (icmp_ipv4->next_proto_id != IPPROTO_ICMP)
return -1;
Expand Down
1 change: 1 addition & 0 deletions lua/gatekeeper/staticlib.lua
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ struct gatekeeper_if {
uint16_t num_tx_desc;
bool ipv4_hw_udp_cksum;
bool ipv6_hw_udp_cksum;
bool ipv4_hw_cksum;
/* This struct has hidden fields. */
};

Expand Down
12 changes: 8 additions & 4 deletions lua/net.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,13 @@ return function (gatekeeper_server)
local front_vlan_tag = 0x123
local front_vlan_insert = true
local front_mtu = 1500
local front_ipv4_hw_udp_cksum = true
local front_ipv6_hw_udp_cksum = true

local back_ports = {"enp133s0f1"}
local back_ips = {"10.0.2.1/24", "2001:db8:2::1/48"}
local back_bonding_mode = staticlib.c.BONDING_MODE_ROUND_ROBIN
local back_vlan_tag = 0x456
local back_vlan_insert = true
local back_mtu = 2048
local back_ipv4_hw_udp_cksum = true
local back_ipv6_hw_udp_cksum = true

-- XXX #155 These parameters should only be changed for performance reasons.
local front_arp_cache_timeout_sec = 7200 -- (2 hours)
Expand All @@ -44,6 +40,12 @@ return function (gatekeeper_server)
local front_ipv6_default_hop_limits = 255
local back_ipv6_default_hop_limits = 255
local rotate_log_interval_sec = 60 * 60 -- (1 hour)
local front_ipv4_hw_udp_cksum = true
local front_ipv6_hw_udp_cksum = true
local back_ipv4_hw_udp_cksum = true
local back_ipv6_hw_udp_cksum = true
local front_ipv4_hw_cksum = true
local back_ipv4_hw_cksum = true

--
-- End configuration of the network.
Expand Down Expand Up @@ -72,6 +74,7 @@ return function (gatekeeper_server)
front_iface.num_tx_desc = front_num_tx_desc
front_iface.ipv4_hw_udp_cksum = front_ipv4_hw_udp_cksum
front_iface.ipv6_hw_udp_cksum = front_ipv6_hw_udp_cksum
front_iface.ipv4_hw_cksum = front_ipv4_hw_cksum
local ret = staticlib.init_iface(front_iface, "front",
front_ports, front_ips, front_vlan_tag)
if ret < 0 then
Expand All @@ -92,6 +95,7 @@ return function (gatekeeper_server)
back_iface.num_tx_desc = back_num_tx_desc
back_iface.ipv4_hw_udp_cksum = back_ipv4_hw_udp_cksum
back_iface.ipv6_hw_udp_cksum = back_ipv6_hw_udp_cksum
back_iface.ipv4_hw_cksum = back_ipv4_hw_cksum
ret = staticlib.init_iface(back_iface, "back",
back_ports, back_ips, back_vlan_tag)
if ret < 0 then
Expand Down

0 comments on commit b524b06

Please sign in to comment.