Commit 93c60b59 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tun-vnet-uso'

Andrew Melnychenko says:

====================
TUN/VirtioNet USO features support.

Added new offloads for TUN devices TUN_F_USO4 and TUN_F_USO6.
Technically they enable NETIF_F_GSO_UDP_L4
(and only if USO4 & USO6 are set simultaneously).
It allows the transmission of large UDP packets.

UDP Segmentation Offload (USO/GSO_UDP_L4) - ability to split UDP packets
into several segments. It's similar to UFO, except it doesn't use IP
fragmentation. The drivers may push big packets and the NIC will split
them(or assemble them in case of receive), but in the case of VirtioNet
we just pass big UDP to the host. So we are freeing the driver from doing
the unnecessary job of splitting. The same thing for several guests
on one host, we can pass big packets between guests.

Different features USO4 and USO6 are required for qemu where Windows
guests can enable disable USO receives for IPv4 and IPv6 separately.
On the other side, Linux can't really differentiate USO4 and USO6, for now.
For now, to enable USO for TUN it requires enabling USO4 and USO6 together.
In the future, there would be a mechanism to control UDP_L4 GSO separately.

New types for virtio-net already in virtio-net specification:
https://github.com/oasis-tcs/virtio-spec/issues/120

Test it WIP Qemu https://github.com/daynix/qemu/tree/USOv3



Changes since v4 & RFC:
 * Fixed typo and refactored.
 * Tun USO offload refactored.
 * Add support for guest-to-guest segmentation offload (thx Jason).
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dd8b3a80 418044e1
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -957,6 +957,10 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
			if (arg & TUN_F_TSO6)
				feature_mask |= NETIF_F_TSO6;
		}

		/* TODO: for now USO4 and USO6 should work simultaneously */
		if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6))
			features |= NETIF_F_GSO_UDP_L4;
	}

	/* tun/tap driver inverts the usage for TSO offloads, where
@@ -967,7 +971,8 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
	 * When user space turns off TSO, we turn off GSO/LRO so that
	 * user-space will not receive TSO frames.
	 */
	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6))
	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) ||
	    (feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6))
		features |= RX_OFFLOADS;
	else
		features &= ~RX_OFFLOADS;
@@ -1091,7 +1096,8 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
	case TUNSETOFFLOAD:
		/* let the user check for future flags */
		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
			    TUN_F_TSO_ECN | TUN_F_UFO))
			    TUN_F_TSO_ECN | TUN_F_UFO |
			    TUN_F_USO4 | TUN_F_USO6))
			return -EINVAL;

		rtnl_lock();
+7 −1
Original line number Diff line number Diff line
@@ -185,7 +185,7 @@ struct tun_struct {
	struct net_device	*dev;
	netdev_features_t	set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
			  NETIF_F_TSO6)
			  NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4)

	int			align;
	int			vnet_hdr_sz;
@@ -2878,6 +2878,12 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
		}

		arg &= ~TUN_F_UFO;

		/* TODO: for now USO4 and USO6 should work simultaneously */
		if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
			features |= NETIF_F_GSO_UDP_L4;
			arg &= ~(TUN_F_USO4 | TUN_F_USO6);
		}
	}

	/* This gives the user a way to test for new features in future by
+15 −4
Original line number Diff line number Diff line
@@ -60,13 +60,17 @@ static const unsigned long guest_offloads[] = {
	VIRTIO_NET_F_GUEST_TSO6,
	VIRTIO_NET_F_GUEST_ECN,
	VIRTIO_NET_F_GUEST_UFO,
	VIRTIO_NET_F_GUEST_CSUM
	VIRTIO_NET_F_GUEST_CSUM,
	VIRTIO_NET_F_GUEST_USO4,
	VIRTIO_NET_F_GUEST_USO6
};

#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
				(1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
				(1ULL << VIRTIO_NET_F_GUEST_UFO))
				(1ULL << VIRTIO_NET_F_GUEST_UFO)  | \
				(1ULL << VIRTIO_NET_F_GUEST_USO4) | \
				(1ULL << VIRTIO_NET_F_GUEST_USO6))

struct virtnet_stat_desc {
	char desc[ETH_GSTRING_LEN];
@@ -3085,7 +3089,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
		return -EOPNOTSUPP;
	}
@@ -3690,7 +3696,9 @@ static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
	return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO);
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
		(virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
}

static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
@@ -3759,6 +3767,8 @@ static int virtnet_probe(struct virtio_device *vdev)
			dev->hw_features |= NETIF_F_TSO6;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
			dev->hw_features |= NETIF_F_TSO_ECN;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
			dev->hw_features |= NETIF_F_GSO_UDP_L4;

		dev->features |= NETIF_F_GSO_ROBUST;

@@ -4036,6 +4046,7 @@ static struct virtio_device_id id_table[] = {
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
	VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
+9 −0
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
	case VIRTIO_NET_HDR_GSO_TCPV6:
		return protocol == cpu_to_be16(ETH_P_IPV6);
	case VIRTIO_NET_HDR_GSO_UDP:
	case VIRTIO_NET_HDR_GSO_UDP_L4:
		return protocol == cpu_to_be16(ETH_P_IP) ||
		       protocol == cpu_to_be16(ETH_P_IPV6);
	default:
@@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
	case VIRTIO_NET_HDR_GSO_TCPV4:
	case VIRTIO_NET_HDR_GSO_UDP:
	case VIRTIO_NET_HDR_GSO_UDP_L4:
		skb->protocol = cpu_to_be16(ETH_P_IP);
		break;
	case VIRTIO_NET_HDR_GSO_TCPV6:
@@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
			ip_proto = IPPROTO_UDP;
			thlen = sizeof(struct udphdr);
			break;
		case VIRTIO_NET_HDR_GSO_UDP_L4:
			gso_type = SKB_GSO_UDP_L4;
			ip_proto = IPPROTO_UDP;
			thlen = sizeof(struct udphdr);
			break;
		default:
			return -EINVAL;
		}
@@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
		else if (sinfo->gso_type & SKB_GSO_TCPV6)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
		else if (sinfo->gso_type & SKB_GSO_UDP_L4)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
		else
			return -EINVAL;
		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+2 −0
Original line number Diff line number Diff line
@@ -90,6 +90,8 @@
#define TUN_F_TSO6	0x04	/* I can handle TSO for IPv6 packets */
#define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
#define TUN_F_UFO	0x10	/* I can handle UFO packets */
#define TUN_F_USO4	0x20	/* I can handle USO for IPv4 packets */
#define TUN_F_USO6	0x40	/* I can handle USO for IPv6 packets */

/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
#define TUN_PKT_STRIP	0x0001
Loading