Commit b210de4f authored by Aya Levin's avatar Aya Levin Committed by Jakub Kicinski
Browse files

net: ipv6: Validate GSO SKB before finish IPv6 processing



There are cases where GSO segment's length exceeds the egress MTU:
 - Forwarding of a TCP GRO skb, when DF flag is not set.
 - Forwarding of an skb that arrived on a virtualisation interface
   (virtio-net/vhost/tap) with TSO/GSO size set by other network
   stack.
 - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
   interface with a smaller MTU.
 - Arriving GRO skb (or GSO skb in a virtualised environment) that is
   bridged to a NETIF_F_TSO tunnel stacked over an interface with an
   insufficient MTU.

If so:
 - Consume the SKB and its segments.
 - Issue an ICMP packet with 'Packet Too Big' message containing the
   MTU, allowing the source host to reduce its Path MTU appropriately.

Note: These cases are handled in the same manner in IPv4 output finish.
This patch aligns the behavior of IPv6 and the one of IPv4.

Fixes: 9e508490 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation")
Signed-off-by: default avatarAya Levin <ayal@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent a2bc221b
Loading
Loading
Loading
Loading
+40 −1
Original line number Diff line number Diff line
@@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
	return -EINVAL;
}

static int
ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
				    struct sk_buff *skb, unsigned int mtu)
{
	struct sk_buff *segs, *nskb;
	netdev_features_t features;
	int ret = 0;

	/* Please see corresponding comment in ip_finish_output_gso
	 * describing the cases where GSO segment length exceeds the
	 * egress MTU.
	 */
	features = netif_skb_features(skb);
	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
	if (IS_ERR_OR_NULL(segs)) {
		kfree_skb(skb);
		return -ENOMEM;
	}

	consume_skb(skb);

	skb_list_walk_safe(segs, segs, nskb) {
		int err;

		skb_mark_not_on_list(segs);
		err = ip6_fragment(net, sk, segs, ip6_finish_output2);
		if (err && ret == 0)
			ret = err;
	}

	return ret;
}

static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	unsigned int mtu;

#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
	/* Policy lookup after SNAT yielded a new policy */
	if (skb_dst(skb)->xfrm) {
@@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
	}
#endif

	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
	mtu = ip6_skb_dst_mtu(skb);
	if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
		return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);

	if ((skb->len > mtu && !skb_is_gso(skb)) ||
	    dst_allfrag(skb_dst(skb)) ||
	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
		return ip6_fragment(net, sk, skb, ip6_finish_output2);