diff --git a/gk/main.c b/gk/main.c index 3d55a022..2c821ac8 100644 --- a/gk/main.c +++ b/gk/main.c @@ -90,31 +90,55 @@ priority_from_delta_time(uint64_t present, uint64_t past) return integer_log_base_2(delta_time); } +static inline bool +is_addr6_mc(const struct in6_addr *addr6) +{ + /* + * @addr6 is multicast. + * See RFC 4291 section "2.7. Multicast Addresses". + */ + return addr6->s6_addr[0] == 0xFF; +} + +static inline bool +is_addr6_ll(const struct in6_addr *addr6) +{ + const uint8_t ll_prefix[] = {0xFE, 0x80, 0, 0, 0, 0, 0, 0}; + const uint64_t *pa64 = (const uint64_t *)addr6->s6_addr; + const uint64_t *pb64 = (const uint64_t *)ll_prefix; + + RTE_BUILD_BUG_ON(sizeof(ll_prefix) != sizeof(uint64_t)); + + /* + * @addr6 is link-local. + * See RFC 4291 section "2.5.6. Link-Local IPv6 Unicast + * Addresses". + */ + return *pa64 == *pb64; +} + static int -extract_packet_info(struct rte_mbuf *pkt, struct ipacket *packet) +extract_packet_info(struct rte_mbuf *pkt, struct ipacket *packet, + struct rte_mbuf **arp_bufs, uint16_t *num_arp, struct acl_search *acl6) { - int ret = 0; uint16_t ether_type; size_t ether_len; - struct rte_ether_hdr *eth_hdr; - struct rte_ipv4_hdr *ip4_hdr; - struct rte_ipv6_hdr *ip6_hdr; - uint16_t pkt_len = rte_pktmbuf_data_len(pkt); - eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + uint16_t pkt_len = rte_pktmbuf_data_len(pkt); + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); ether_type = rte_be_to_cpu_16(pkt_in_skip_l2(pkt, eth_hdr, &packet->l3_hdr)); ether_len = pkt_in_l2_hdr_len(pkt); switch (ether_type) { - case RTE_ETHER_TYPE_IPV4: - if (pkt_len < ether_len + sizeof(*ip4_hdr)) { - packet->flow.proto = 0; - G_LOG(NOTICE, - "Packet is too short to be IPv4 (%" PRIu16 ")\n", - pkt_len); - ret = -1; - goto out; + case RTE_ETHER_TYPE_IPV4: { + struct rte_ipv4_hdr *ip4_hdr; + + if (unlikely(pkt_len < ether_len + sizeof(*ip4_hdr))) { + G_LOG(DEBUG, "%s(): packet is too short to be IPv4 (%i)\n", + __func__, pkt_len); + return -EINVAL; } ip4_hdr = packet->l3_hdr; @@ -122,15 +146,15 @@ extract_packet_info(struct rte_mbuf *pkt, struct ipacket *packet) packet->flow.f.v4.src.s_addr = ip4_hdr->src_addr; packet->flow.f.v4.dst.s_addr = ip4_hdr->dst_addr; break; + } - case RTE_ETHER_TYPE_IPV6: - if (pkt_len < ether_len + sizeof(*ip6_hdr)) { - packet->flow.proto = 0; - G_LOG(NOTICE, - "Packet is too short to be IPv6 (%" PRIu16 ")\n", - pkt_len); - ret = -1; - goto out; + case RTE_ETHER_TYPE_IPV6: { + struct rte_ipv6_hdr *ip6_hdr; + + if (unlikely(pkt_len < ether_len + sizeof(*ip6_hdr))) { + G_LOG(DEBUG, "%s(): packet is too short to be IPv6 (%i)\n", + __func__, pkt_len); + return -EINVAL; } ip6_hdr = packet->l3_hdr; @@ -139,22 +163,26 @@ extract_packet_info(struct rte_mbuf *pkt, struct ipacket *packet) sizeof(packet->flow.f.v6.src.s6_addr)); rte_memcpy(packet->flow.f.v6.dst.s6_addr, ip6_hdr->dst_addr, sizeof(packet->flow.f.v6.dst.s6_addr)); + + if (unlikely(is_addr6_mc(&packet->flow.f.v6.dst) || + is_addr6_ll(&packet->flow.f.v6.dst))) { + add_pkt_acl(acl6, pkt); + return -ENOENT; + } break; + } case RTE_ETHER_TYPE_ARP: - packet->flow.proto = RTE_ETHER_TYPE_ARP; - ret = -1; - break; + arp_bufs[(*num_arp)++] = pkt; + return -ENOENT; default: - packet->flow.proto = 0; + /* Drop non-IP and non-ARP packets. */ log_unknown_l2("gk", ether_type); - ret = -1; - break; + return -EINVAL; } -out: packet->pkt = pkt; - return ret; + return 0; } static inline uint64_t @@ -1804,7 +1832,7 @@ prefetch_flow_entry(struct flow_entry *fe) static void parse_packet(struct ipacket *packet, struct rte_mbuf *pkt, - struct rte_mbuf **arp_bufs, uint16_t *num_arp, + struct rte_mbuf **arp_bufs, uint16_t *num_arp, struct acl_search *acl6, bool ipv4_configured_front, bool ipv6_configured_front, struct ip_flow **flow_arr, uint32_t *flow_hash_val_arr, int *num_ip_flows, struct gatekeeper_if *front, @@ -1815,19 +1843,12 @@ parse_packet(struct ipacket *packet, struct rte_mbuf *pkt, stats->tot_pkts_size += rte_pktmbuf_pkt_len(pkt); - ret = extract_packet_info(pkt, packet); - if (ret < 0) { - if (likely(packet->flow.proto == RTE_ETHER_TYPE_ARP)) { - stats->tot_pkts_num_distributed++; - stats->tot_pkts_size_distributed += - rte_pktmbuf_pkt_len(pkt); - - arp_bufs[(*num_arp)++] = pkt; - return; - } - - /* Drop non-IP and non-ARP packets. */ - drop_packet_front(pkt, instance); + ret = extract_packet_info(pkt, packet, arp_bufs, num_arp, acl6); + if (unlikely(ret < 0)) { + stats->tot_pkts_num_distributed++; + stats->tot_pkts_size_distributed += rte_pktmbuf_pkt_len(pkt); + if (unlikely(ret == -EINVAL)) + drop_packet_front(pkt, instance); return; } @@ -1912,16 +1933,18 @@ process_pkts_front(uint16_t port_front, uint16_t rx_queue_front, rte_prefetch0(rte_pktmbuf_mtod_offset( rx_bufs[i + PREFETCH_OFFSET], void *, 0)); - parse_packet(&pkt_arr[num_ip_flows], rx_bufs[i], arp_bufs, - &num_arp, ipv4_configured_front, ipv6_configured_front, + parse_packet(&pkt_arr[num_ip_flows], rx_bufs[i], + arp_bufs, &num_arp, &acl6, + ipv4_configured_front, ipv6_configured_front, flow_arr, flow_hash_val_arr, &num_ip_flows, front, instance); } /* Extract the rest packet and flow information. */ for (; i < num_rx; i++) { - parse_packet(&pkt_arr[num_ip_flows], rx_bufs[i], arp_bufs, - &num_arp, ipv4_configured_front, ipv6_configured_front, + parse_packet(&pkt_arr[num_ip_flows], rx_bufs[i], + arp_bufs, &num_arp, &acl6, + ipv4_configured_front, ipv6_configured_front, flow_arr, flow_hash_val_arr, &num_ip_flows, front, instance); } @@ -2238,15 +2261,11 @@ process_pkts_back(uint16_t port_back, uint16_t rx_queue_back, struct ipacket *packet = &pkt_arr[num_ip_flows]; struct rte_mbuf *pkt = rx_bufs[i]; - ret = extract_packet_info(pkt, packet); - if (ret < 0) { - if (likely(packet->flow.proto == RTE_ETHER_TYPE_ARP)) { - arp_bufs[num_arp++] = pkt; - continue; - } - - /* Drop non-IP and non-ARP packets. */ - drop_packet(pkt); + ret = extract_packet_info(pkt, packet, + arp_bufs, &num_arp, &acl6); + if (unlikely(ret < 0)) { + if (unlikely(ret == -EINVAL)) + drop_packet(pkt); continue; } diff --git a/gk/rt.c b/gk/rt.c index 68ad63bf..335160c3 100644 --- a/gk/rt.c +++ b/gk/rt.c @@ -1087,6 +1087,13 @@ init_gateway_fib_locked(const struct ip_prefix *ip_prefix, struct neighbor_hash_table *neigh_ht; struct gatekeeper_if *iface; + if (unlikely(ip_prefix->addr.proto != gw_addr->proto)) { + G_LOG(ERR, "%s(%s): IP prefix protocol (%hu) does not match the gateway address protocol (%hu)\n", + __func__, ip_prefix->str, ip_prefix->addr.proto, + gw_addr->proto); + return -EINVAL; + } + if (action == GK_FWD_GATEWAY_FRONT_NET) iface = &gk_conf->net->front; else if (likely(action == GK_FWD_GATEWAY_BACK_NET)) @@ -1138,6 +1145,56 @@ init_gateway_fib_locked(const struct ip_prefix *ip_prefix, return ret; } +/* + * Return 0 when @gw_addr is not included in @prefix. + * If not, or if there is an error, return a negative number. + */ +static int +check_gateway_prefix(const struct ip_prefix *prefix, struct ipaddr *gw_addr) +{ + if (unlikely(prefix->addr.proto != gw_addr->proto)) { + G_LOG(ERR, "%s(%s): IP prefix protocol (%hu) does not match the gateway address protocol (%hu)\n", + __func__, prefix->str, prefix->addr.proto, + gw_addr->proto); + return -EINVAL; + } + + if (gw_addr->proto == RTE_ETHER_TYPE_IPV4) { + uint32_t ip4_mask = + rte_cpu_to_be_32(~0ULL << (32 - prefix->len)); + if ((prefix->addr.ip.v4.s_addr ^ + gw_addr->ip.v4.s_addr) & ip4_mask) + return 0; + } else if (likely(gw_addr->proto == RTE_ETHER_TYPE_IPV6)) { + uint64_t ip6_mask; + uint64_t *pf = (uint64_t *)prefix->addr.ip.v6.s6_addr; + uint64_t *gw = (uint64_t *)gw_addr->ip.v6.s6_addr; + + if (prefix->len == 0) { + /* Do nothing. */ + } else if (prefix->len <= 64) { + ip6_mask = rte_cpu_to_be_64( + ~0ULL << (64 - prefix->len)); + if ((pf[0] ^ gw[0]) & ip6_mask) + return 0; + } else { + ip6_mask = rte_cpu_to_be_64( + ~0ULL << (128 - prefix->len)); + if ((pf[0] != gw[0]) || + ((pf[1] ^ gw[1]) & ip6_mask)) + return 0; + } + } else { + G_LOG(CRIT, "%s(%s): bug: unknown IP type %hu\n", + __func__, prefix->str, gw_addr->proto); + return -EINVAL; + } + + G_LOG(ERR, "%s(%s): the gateway address is included in the prefix, but gateways of Grantor entries cannot be neighbors of Gatekeeper servers (see issue #267 for details)\n", + __func__, prefix->str); + return -EPERM; +} + #define MAX_NUM_GRANTORS_PER_ENTRY \ ((1 << (RTE_SIZEOF_FIELD(struct gk_fib, u.grantor.set->num_entries) * 8)) - 1) @@ -1176,13 +1233,33 @@ init_grantor_fib_locked(const struct ip_prefix *ip_prefix, for (i = 0; i < num_addrs; i++) { struct neighbor_hash_table *neigh_ht; - if (unlikely(gt_addrs[i].proto != ip_prefix->addr.proto)) { - G_LOG(ERR, "%s(%s): failed to initialize a Grantor FIB entry, since the Grantor IP and the given IP prefix have different IP versions\n", - __func__, ip_prefix->str); + if (unlikely(ip_prefix->addr.proto != gt_addrs[i].proto)) { + G_LOG(ERR, "%s(%s): IP prefix protocol (%hu) does not match the Grantor address protocol (%hu)\n", + __func__, ip_prefix->str, + ip_prefix->addr.proto, gt_addrs[i].proto); ret = -EINVAL; goto put_ether_cache; } + /* + * Verify that the gateway IP address @gw_addrs[i] is NOT + * included in the prefix. + * + * This verification is needed because when a Gatekeeper + * server forwards a packet directly to a protected + * destination, it always forwards the packet to + * the gateway of the associated Grantor server + * (see gk_process_bpf() for details). Thus, the gateway + * cannot be a neighbor, otherwise the packets are not sent + * directly to the protected destination. + * + * Issue #267 discusses the assumptions behind this + * verification. + */ + ret = check_gateway_prefix(ip_prefix, &gw_addrs[i]); + if (unlikely(ret < 0)) + goto put_ether_cache; + /* Find the neighbor FIB entry for this gateway. */ neigh_fibs[i] = find_fib_entry_for_neighbor_locked( &gw_addrs[i], GK_FWD_GATEWAY_BACK_NET, gk_conf); @@ -1350,77 +1427,6 @@ add_fib_entry_locked(const struct ip_prefix *prefix, return 0; } -/* - * Return 0 when @gw_addr is not included in @prefix. - * If not, or if there is an error, return a negative number. - * - * Issue #267 discusses the assumptions behind this verification. - */ -static int -check_gateway_prefix(const struct ip_prefix *prefix, struct ipaddr *gw_addr) -{ - if (unlikely(prefix->addr.proto != gw_addr->proto)) { - G_LOG(ERR, "%s(%s): IP prefix protocol (%hu) does not match the gateway address protocol (%hu)\n", - __func__, prefix->str, prefix->addr.proto, - gw_addr->proto); - return -EINVAL; - } - - if (gw_addr->proto == RTE_ETHER_TYPE_IPV4) { - uint32_t ip4_mask = - rte_cpu_to_be_32(~0ULL << (32 - prefix->len)); - if ((prefix->addr.ip.v4.s_addr ^ - gw_addr->ip.v4.s_addr) & ip4_mask) - return 0; - } else if (likely(gw_addr->proto == RTE_ETHER_TYPE_IPV6)) { - uint64_t ip6_mask; - uint64_t *pf = (uint64_t *)prefix->addr.ip.v6.s6_addr; - uint64_t *gw = (uint64_t *)gw_addr->ip.v6.s6_addr; - - if (prefix->len == 0) { - /* Do nothing. */ - } else if (prefix->len <= 64) { - ip6_mask = rte_cpu_to_be_64( - ~0ULL << (64 - prefix->len)); - if ((pf[0] ^ gw[0]) & ip6_mask) - return 0; - } else { - ip6_mask = rte_cpu_to_be_64( - ~0ULL << (128 - prefix->len)); - if ((pf[0] != gw[0]) || - ((pf[1] ^ gw[1]) & ip6_mask)) - return 0; - } - } else { - G_LOG(CRIT, "%s(%s): bug: unknown IP type %hu\n", - __func__, prefix->str, gw_addr->proto); - return -EINVAL; - } - - G_LOG(ERR, "%s(%s): gateway address is in prefix, so gateway is not a neighbor\n", - __func__, prefix->str); - return -EPERM; -} - -/* - * Verify that the IP addresses of gateway FIB entries are not included in - * the prefix. - */ -static int -check_gateway_prefixes(const struct ip_prefix *prefix_info, - struct ipaddr *gw_addrs, unsigned int num_addrs) -{ - unsigned int i; - - for (i = 0; i < num_addrs; i++) { - int ret = check_gateway_prefix(prefix_info, &gw_addrs[i]); - if (unlikely(ret < 0)) - return ret; - } - - return 0; -} - static int check_longer_prefixes(const char *context, const struct rib_head *rib, const void *ip, uint8_t depth, const struct gk_fib *fib_table, @@ -1625,10 +1631,6 @@ add_fib_entry_numerical_locked(const struct ip_prefix *prefix_info, __func__, prefix_info->str); } - ret = check_gateway_prefixes(prefix_info, gw_addrs, num_addrs); - if (unlikely(ret < 0)) - return ret; - ret = check_prefix_exists_locked(prefix_info, gk_conf, NULL); if (ret != -ENOENT) { G_LOG(ERR, "%s(%s): prefix already exists or error occurred\n", @@ -1670,12 +1672,8 @@ update_fib_entry_numerical(const struct ip_prefix *prefix_info, { int fib_id; struct gk_fib *cur_fib; - int ret = check_prefix(prefix_info); - - if (unlikely(ret < 0)) - return ret; - ret = check_gateway_prefixes(prefix_info, gw_addrs, num_addrs); + int ret = check_prefix(prefix_info); if (unlikely(ret < 0)) return ret; @@ -1691,7 +1689,6 @@ update_fib_entry_numerical(const struct ip_prefix *prefix_info, ret = add_fib_entry_locked(prefix_info, gt_addrs, gw_addrs, num_addrs, action, props, gk_conf, cur_fib); rte_spinlock_unlock_tm(&gk_conf->lpm_tbl.lock); - return ret; }