Commit 8aeab132 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull virtio updates from Michael Tsirkin:

 - 9k mtu perf improvements

 - vdpa feature provisioning

 - virtio blk SECURE ERASE support

 - fixes and cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio_pci: don't try to use intxif pin is zero
  vDPA: conditionally read MTU and MAC in dev cfg space
  vDPA: fix spars cast warning in vdpa_dev_net_mq_config_fill
  vDPA: check virtio device features to detect MQ
  vDPA: check VIRTIO_NET_F_RSS for max_virtqueue_paris's presence
  vDPA: only report driver features if FEATURES_OK is set
  vDPA: allow userspace to query features of a vDPA device
  virtio_blk: add SECURE ERASE command support
  vp_vdpa: support feature provisioning
  vdpa_sim_net: support feature provisioning
  vdpa: device feature provisioning
  virtio-net: use mtu size as buffer length for big packets
  virtio-net: introduce and use helper function for guest gso support checks
  virtio: drop vp_legacy_set_queue_size
  virtio_ring: make vring_alloc_queue_packed prettier
  virtio_ring: split: Operators use unified style
  vhost: add __init/__exit annotations to module init/exit funcs
parents 0e0073eb 71491c54
Loading
Loading
Loading
Loading
+92 −18
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}

static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool unmap)
{
	unsigned short segments = blk_rq_nr_discard_segments(req);
	unsigned short n = 0;
@@ -240,6 +240,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
		type = VIRTIO_BLK_T_WRITE_ZEROES;
		unmap = !(req->cmd_flags & REQ_NOUNMAP);
		break;
	case REQ_OP_SECURE_ERASE:
		type = VIRTIO_BLK_T_SECURE_ERASE;
		break;
	case REQ_OP_DRV_IN:
		type = VIRTIO_BLK_T_GET_ID;
		break;
@@ -251,8 +254,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
	vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
	vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));

	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
		if (virtblk_setup_discard_write_zeroes(req, unmap))
	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES ||
	    type == VIRTIO_BLK_T_SECURE_ERASE) {
		if (virtblk_setup_discard_write_zeroes_erase(req, unmap))
			return BLK_STS_RESOURCE;
	}

@@ -886,6 +890,8 @@ static int virtblk_probe(struct virtio_device *vdev)
	int err, index;

	u32 v, blk_size, max_size, sg_elems, opt_io_size;
	u32 max_discard_segs = 0;
	u32 discard_granularity = 0;
	u16 min_io_size;
	u8 physical_block_exp, alignment_offset;
	unsigned int queue_depth;
@@ -1043,27 +1049,14 @@ static int virtblk_probe(struct virtio_device *vdev)

	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
		virtio_cread(vdev, struct virtio_blk_config,
			     discard_sector_alignment, &v);
		if (v)
			q->limits.discard_granularity = v << SECTOR_SHIFT;
		else
			q->limits.discard_granularity = blk_size;
			     discard_sector_alignment, &discard_granularity);

		virtio_cread(vdev, struct virtio_blk_config,
			     max_discard_sectors, &v);
		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);

		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
			     &v);

		/*
		 * max_discard_seg == 0 is out of spec but we always
		 * handled it.
		 */
		if (!v)
			v = sg_elems;
		blk_queue_max_discard_segments(q,
					       min(v, MAX_DISCARD_SEGMENTS));
			     &max_discard_segs);
	}

	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
@@ -1072,6 +1065,85 @@ static int virtblk_probe(struct virtio_device *vdev)
		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
	}

	/* The discard and secure erase limits are combined since the Linux
	 * block layer uses the same limit for both commands.
	 *
	 * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
	 * are negotiated, we will use the minimum between the limits.
	 *
	 * discard sector alignment is set to the minimum between discard_sector_alignment
	 * and secure_erase_sector_alignment.
	 *
	 * max discard sectors is set to the minimum between max_discard_seg and
	 * max_secure_erase_seg.
	 */
	if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {

		virtio_cread(vdev, struct virtio_blk_config,
			     secure_erase_sector_alignment, &v);

		/* secure_erase_sector_alignment should not be zero, the device should set a
		 * valid number of sectors.
		 */
		if (!v) {
			dev_err(&vdev->dev,
				"virtio_blk: secure_erase_sector_alignment can't be 0\n");
			err = -EINVAL;
			goto out_cleanup_disk;
		}

		discard_granularity = min_not_zero(discard_granularity, v);

		virtio_cread(vdev, struct virtio_blk_config,
			     max_secure_erase_sectors, &v);

		/* max_secure_erase_sectors should not be zero, the device should set a
		 * valid number of sectors.
		 */
		if (!v) {
			dev_err(&vdev->dev,
				"virtio_blk: max_secure_erase_sectors can't be 0\n");
			err = -EINVAL;
			goto out_cleanup_disk;
		}

		blk_queue_max_secure_erase_sectors(q, v);

		virtio_cread(vdev, struct virtio_blk_config,
			     max_secure_erase_seg, &v);

		/* max_secure_erase_seg should not be zero, the device should set a
		 * valid number of segments
		 */
		if (!v) {
			dev_err(&vdev->dev,
				"virtio_blk: max_secure_erase_seg can't be 0\n");
			err = -EINVAL;
			goto out_cleanup_disk;
		}

		max_discard_segs = min_not_zero(max_discard_segs, v);
	}

	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) ||
	    virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
		/* max_discard_seg and discard_granularity will be 0 only
		 * if max_discard_seg and discard_sector_alignment fields in the virtio
		 * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
		 * In this case, we use default values.
		 */
		if (!max_discard_segs)
			max_discard_segs = sg_elems;

		blk_queue_max_discard_segments(q,
					       min(max_discard_segs, MAX_DISCARD_SEGMENTS));

		if (discard_granularity)
			q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT;
		else
			q->limits.discard_granularity = blk_size;
	}

	virtblk_update_capacity(vblk, false);
	virtio_device_ready(vdev);

@@ -1167,6 +1239,7 @@ static unsigned int features_legacy[] = {
	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
	VIRTIO_BLK_F_SECURE_ERASE,
}
;
static unsigned int features[] = {
@@ -1174,6 +1247,7 @@ static unsigned int features[] = {
	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
	VIRTIO_BLK_F_SECURE_ERASE,
};

static struct virtio_driver virtio_blk = {
+32 −16
Original line number Diff line number Diff line
@@ -225,6 +225,9 @@ struct virtnet_info {
	/* I like... big packets and I cannot lie! */
	bool big_packets;

	/* number of sg entries allocated for big packets */
	unsigned int big_packets_num_skbfrags;

	/* Host will merge rx buffers for big packets (shake it! shake it!) */
	bool mergeable_rx_bufs;

@@ -1331,10 +1334,10 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
	char *p;
	int i, err, offset;

	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
	sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);

	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
	/* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
	for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
		first = get_a_page(rq, gfp);
		if (!first) {
			if (list)
@@ -1365,7 +1368,7 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,

	/* chain first in list head */
	first->private = (unsigned long)list;
	err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
	err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2,
				  first, gfp);
	if (err < 0)
		give_pages(rq, first);
@@ -3682,13 +3685,35 @@ static int virtnet_validate(struct virtio_device *vdev)
	return 0;
}

static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
{
	return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO);
}

static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
{
	bool guest_gso = virtnet_check_guest_gso(vi);

	/* If device can receive ANY guest GSO packets, regardless of mtu,
	 * allocate packets of maximum size, otherwise limit it to only
	 * mtu size worth only.
	 */
	if (mtu > ETH_DATA_LEN || guest_gso) {
		vi->big_packets = true;
		vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
	}
}

static int virtnet_probe(struct virtio_device *vdev)
{
	int i, err = -ENOMEM;
	struct net_device *dev;
	struct virtnet_info *vi;
	u16 max_queue_pairs;
	int mtu;
	int mtu = 0;

	/* Find if host supports multiqueue/rss virtio_net device */
	max_queue_pairs = 1;
@@ -3776,13 +3801,6 @@ static int virtnet_probe(struct virtio_device *vdev)
	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
	spin_lock_init(&vi->refill_lock);

	/* If we can receive ANY GSO packets, we must allocate large ones. */
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
		vi->big_packets = true;

	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
		vi->mergeable_rx_bufs = true;

@@ -3848,12 +3866,10 @@ static int virtnet_probe(struct virtio_device *vdev)

		dev->mtu = mtu;
		dev->max_mtu = mtu;

		/* TODO: size buffers correctly in this case. */
		if (dev->mtu > ETH_DATA_LEN)
			vi->big_packets = true;
	}

	virtnet_set_big_packets(vi, mtu);

	if (vi->any_header_sg)
		dev->needed_headroom = vi->hdr_len;

+57 −16
Original line number Diff line number Diff line
@@ -600,6 +600,11 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
		}
		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
	}
	if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) {
		config.device_features =
			nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]);
		config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
	}

	/* Skip checking capability if user didn't prefer to configure any
	 * device networking attributes. It is likely that user might have used
@@ -799,51 +804,76 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba
	return msg->len;
}

static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
				       struct sk_buff *msg, u64 features,
static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features,
				       const struct virtio_net_config *config)
{
	u16 val_u16;

	if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0)
	if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0 &&
	    (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0)
		return 0;

	val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
	val_u16 = __virtio16_to_cpu(true, config->max_virtqueue_pairs);

	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16);
}

static int vdpa_dev_net_mtu_config_fill(struct sk_buff *msg, u64 features,
					const struct virtio_net_config *config)
{
	u16 val_u16;

	if ((features & BIT_ULL(VIRTIO_NET_F_MTU)) == 0)
		return 0;

	val_u16 = __virtio16_to_cpu(true, config->mtu);

	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16);
}

static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features,
					const struct virtio_net_config *config)
{
	if ((features & BIT_ULL(VIRTIO_NET_F_MAC)) == 0)
		return 0;

	return  nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR,
			sizeof(config->mac), config->mac);
}

static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
{
	struct virtio_net_config config = {};
	u64 features;
	u64 features_device;
	u16 val_u16;

	vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));

	if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
		    config.mac))
		return -EMSGSIZE;
	vdev->config->get_config(vdev, 0, &config, sizeof(config));

	val_u16 = __virtio16_to_cpu(true, config.status);
	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
		return -EMSGSIZE;

	val_u16 = __virtio16_to_cpu(true, config.mtu);
	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
		return -EMSGSIZE;
	features_device = vdev->config->get_device_features(vdev);

	features = vdev->config->get_driver_features(vdev);
	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features,
	if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device,
			      VDPA_ATTR_PAD))
		return -EMSGSIZE;

	return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
	if (vdpa_dev_net_mtu_config_fill(msg, features_device, &config))
		return -EMSGSIZE;

	if (vdpa_dev_net_mac_config_fill(msg, features_device, &config))
		return -EMSGSIZE;

	return vdpa_dev_net_mq_config_fill(msg, features_device, &config);
}

static int
vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
		     int flags, struct netlink_ext_ack *extack)
{
	u64 features_driver;
	u8 status = 0;
	u32 device_id;
	void *hdr;
	int err;
@@ -867,6 +897,17 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
		goto msg_err;
	}

	/* only read driver features after the feature negotiation is done */
	status = vdev->config->get_status(vdev);
	if (status & VIRTIO_CONFIG_S_FEATURES_OK) {
		features_driver = vdev->config->get_driver_features(vdev);
		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver,
				      VDPA_ATTR_PAD)) {
			err = -EMSGSIZE;
			goto msg_err;
		}
	}

	switch (device_id) {
	case VIRTIO_ID_NET:
		err = vdpa_dev_net_config_fill(vdev, msg);
+11 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
#include <linux/iova.h>
#include <uapi/linux/vdpa.h>

#include "vdpa_sim.h"

@@ -245,13 +246,22 @@ static const struct dma_map_ops vdpasim_dma_ops = {
static const struct vdpa_config_ops vdpasim_config_ops;
static const struct vdpa_config_ops vdpasim_batch_config_ops;

struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
			       const struct vdpa_dev_set_config *config)
{
	const struct vdpa_config_ops *ops;
	struct vdpasim *vdpasim;
	struct device *dev;
	int i, ret = -ENOMEM;

	if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
		if (config->device_features &
		    ~dev_attr->supported_features)
			return ERR_PTR(-EINVAL);
		dev_attr->supported_features =
			config->device_features;
	}

	if (batch_mapping)
		ops = &vdpasim_batch_config_ops;
	else
+2 −1
Original line number Diff line number Diff line
@@ -71,7 +71,8 @@ struct vdpasim {
	spinlock_t iommu_lock;
};

struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr);
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
			       const struct vdpa_dev_set_config *config);

/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
Loading