Commit 8ccd54fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull virtio updates from Michael Tsirkin:
 "virtio,vhost,vdpa: features, fixes, and cleanups:

   - reduction in interrupt rate in virtio

   - perf improvement for VDUSE

   - scalability for vhost-scsi

   - non power of 2 ring support for packed rings

   - better management for mlx5 vdpa

   - suspend for snet

   - VIRTIO_F_NOTIFICATION_DATA

   - shared backend with vdpa-sim-blk

   - user VA support in vdpa-sim

   - better struct packing for virtio

  and fixes, cleanups all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (52 commits)
  vhost_vdpa: fix unmap process in no-batch mode
  MAINTAINERS: make me a reviewer of VIRTIO CORE AND NET DRIVERS
  tools/virtio: fix build caused by virtio_ring changes
  virtio_ring: add a struct device forward declaration
  vdpa_sim_blk: support shared backend
  vdpa_sim: move buffer allocation in the devices
  vdpa/snet: use likely/unlikely macros in hot functions
  vdpa/snet: implement kick_vq_with_data callback
  virtio-vdpa: add VIRTIO_F_NOTIFICATION_DATA feature support
  virtio: add VIRTIO_F_NOTIFICATION_DATA feature support
  vdpa/snet: support the suspend vDPA callback
  vdpa/snet: support getting and setting VQ state
  MAINTAINERS: add vringh.h to Virtio Core and Net Drivers
  vringh: address kdoc warnings
  vdpa: address kdoc warnings
  virtio_ring: don't update event idx on get_buf
  vdpa_sim: add support for user VA
  vdpa_sim: replace the spinlock with a mutex to protect the state
  vdpa_sim: use kthread worker
  vdpa_sim: make devices agnostic for work management
  ...
parents 0835b5ee c82729e0
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -22212,6 +22212,7 @@ F: include/uapi/linux/virtio_console.h
VIRTIO CORE AND NET DRIVERS
M:	"Michael S. Tsirkin" <mst@redhat.com>
M:	Jason Wang <jasowang@redhat.com>
R:	Xuan Zhuo <xuanzhuo@linux.alibaba.com>
L:	virtualization@lists.linux-foundation.org
S:	Maintained
F:	Documentation/ABI/testing/sysfs-bus-vdpa
@@ -22225,6 +22226,7 @@ F: drivers/vdpa/
F:	drivers/virtio/
F:	include/linux/vdpa.h
F:	include/linux/virtio*.h
F:	include/linux/vringh.h
F:	include/uapi/linux/virtio_*.h
F:	tools/virtio/
+19 −3
Original line number Diff line number Diff line
@@ -391,7 +391,7 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
	ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
}

static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
static inline bool virtio_ccw_do_kvm_notify(struct virtqueue *vq, u32 data)
{
	struct virtio_ccw_vq_info *info = vq->priv;
	struct virtio_ccw_device *vcdev;
@@ -402,12 +402,22 @@ static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
	BUILD_BUG_ON(sizeof(struct subchannel_id) != sizeof(unsigned int));
	info->cookie = kvm_hypercall3(KVM_S390_VIRTIO_CCW_NOTIFY,
				      *((unsigned int *)&schid),
				      vq->index, info->cookie);
				      data, info->cookie);
	if (info->cookie < 0)
		return false;
	return true;
}

static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
{
	return virtio_ccw_do_kvm_notify(vq, vq->index);
}

static bool virtio_ccw_kvm_notify_with_data(struct virtqueue *vq)
{
	return virtio_ccw_do_kvm_notify(vq, vring_notification_data(vq));
}

static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
				   struct ccw1 *ccw, int index)
{
@@ -495,6 +505,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
					     struct ccw1 *ccw)
{
	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
	bool (*notify)(struct virtqueue *vq);
	int err;
	struct virtqueue *vq = NULL;
	struct virtio_ccw_vq_info *info;
@@ -502,6 +513,11 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
	unsigned long flags;
	bool may_reduce;

	if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA))
		notify = virtio_ccw_kvm_notify_with_data;
	else
		notify = virtio_ccw_kvm_notify;

	/* Allocate queue. */
	info = kzalloc(sizeof(struct virtio_ccw_vq_info), GFP_KERNEL);
	if (!info) {
@@ -524,7 +540,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
	may_reduce = vcdev->revision > 0;
	vq = vring_create_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN,
				    vdev, true, may_reduce, ctx,
				    virtio_ccw_kvm_notify, callback, name);
				    notify, callback, name);

	if (!vq) {
		/* For now, we fail if we can't get the requested size. */
+156 −105
Original line number Diff line number Diff line
@@ -778,12 +778,28 @@ static bool vq_is_tx(u16 idx)
	return idx % 2;
}

static u16 get_features_12_3(u64 features)
enum {
	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
	MLX5_VIRTIO_NET_F_CSUM = 10,
	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
};

static u16 get_features(u64 features)
{
	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}

static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
@@ -797,6 +813,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
	void *obj_context;
	u16 mlx_features;
	void *cmd_hdr;
	void *vq_ctx;
	void *in;
@@ -812,6 +829,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
		goto err_alloc;
	}

	mlx_features = get_features(ndev->mvdev.actual_features);
	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);

	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
@@ -822,7 +840,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
		 get_features_12_3(ndev->mvdev.actual_features));
		 mlx_features >> 3);
	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
		 mlx_features & 7);
	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));

@@ -2171,23 +2191,27 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
	return MLX5_VDPA_DATAVQ_GROUP;
}

enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
};

static u64 mlx_to_vritio_features(u16 dev_features)
{
	u64 result = 0;

	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);

	return result;
@@ -2298,6 +2322,113 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
	}
}

static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
{
	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
	int err;

	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
	MLX5_SET(query_vport_state_in, in, vport_number, vport);
	if (vport)
		MLX5_SET(query_vport_state_in, in, other_vport, 1);

	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
	if (err)
		return 0;

	return MLX5_GET(query_vport_state_out, out, state);
}

static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
{
	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
	    VPORT_STATE_UP)
		return true;

	return false;
}

static void update_carrier(struct work_struct *work)
{
	struct mlx5_vdpa_wq_ent *wqent;
	struct mlx5_vdpa_dev *mvdev;
	struct mlx5_vdpa_net *ndev;

	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
	mvdev = wqent->mvdev;
	ndev = to_mlx5_vdpa_ndev(mvdev);
	if (get_link_state(mvdev))
		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
	else
		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);

	if (ndev->config_cb.callback)
		ndev->config_cb.callback(ndev->config_cb.private);

	kfree(wqent);
}

static int queue_link_work(struct mlx5_vdpa_net *ndev)
{
	struct mlx5_vdpa_wq_ent *wqent;

	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
	if (!wqent)
		return -ENOMEM;

	wqent->mvdev = &ndev->mvdev;
	INIT_WORK(&wqent->work, update_carrier);
	queue_work(ndev->mvdev.wq, &wqent->work);
	return 0;
}

static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
{
	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
	struct mlx5_eqe *eqe = param;
	int ret = NOTIFY_DONE;

	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
		switch (eqe->sub_type) {
		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
			if (queue_link_work(ndev))
				return NOTIFY_DONE;

			ret = NOTIFY_OK;
			break;
		default:
			return NOTIFY_DONE;
		}
		return ret;
	}
	return ret;
}

static void register_link_notifier(struct mlx5_vdpa_net *ndev)
{
	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
		return;

	ndev->nb.notifier_call = event_handler;
	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
	ndev->nb_registered = true;
	queue_link_work(ndev);
}

static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
{
	if (!ndev->nb_registered)
		return;

	ndev->nb_registered = false;
	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
	if (ndev->mvdev.wq)
		flush_workqueue(ndev->mvdev.wq);
}

static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2567,10 +2698,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
				goto err_setup;
			}
			register_link_notifier(ndev);
			err = setup_driver(mvdev);
			if (err) {
				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
				goto err_setup;
				goto err_driver;
			}
		} else {
			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -2582,6 +2714,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
	up_write(&ndev->reslock);
	return;

err_driver:
	unregister_link_notifier(ndev);
err_setup:
	mlx5_vdpa_destroy_mr(&ndev->mvdev);
	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
@@ -2607,6 +2741,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
	mlx5_vdpa_info(mvdev, "performing device reset\n");

	down_write(&ndev->reslock);
	unregister_link_notifier(ndev);
	teardown_driver(ndev);
	clear_vqs_ready(ndev);
	mlx5_vdpa_destroy_mr(&ndev->mvdev);
@@ -2861,9 +2996,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
	mlx5_vdpa_info(mvdev, "suspending device\n");

	down_write(&ndev->reslock);
	ndev->nb_registered = false;
	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
	flush_workqueue(ndev->mvdev.wq);
	unregister_link_notifier(ndev);
	for (i = 0; i < ndev->cur_num_vqs; i++) {
		mvq = &ndev->vqs[i];
		suspend_vq(ndev, mvq);
@@ -3000,84 +3133,6 @@ struct mlx5_vdpa_mgmtdev {
	struct mlx5_vdpa_net *ndev;
};

static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
{
	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
	int err;

	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
	MLX5_SET(query_vport_state_in, in, vport_number, vport);
	if (vport)
		MLX5_SET(query_vport_state_in, in, other_vport, 1);

	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
	if (err)
		return 0;

	return MLX5_GET(query_vport_state_out, out, state);
}

static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
{
	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
	    VPORT_STATE_UP)
		return true;

	return false;
}

static void update_carrier(struct work_struct *work)
{
	struct mlx5_vdpa_wq_ent *wqent;
	struct mlx5_vdpa_dev *mvdev;
	struct mlx5_vdpa_net *ndev;

	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
	mvdev = wqent->mvdev;
	ndev = to_mlx5_vdpa_ndev(mvdev);
	if (get_link_state(mvdev))
		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
	else
		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);

	if (ndev->nb_registered && ndev->config_cb.callback)
		ndev->config_cb.callback(ndev->config_cb.private);

	kfree(wqent);
}

static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
{
	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
	struct mlx5_eqe *eqe = param;
	int ret = NOTIFY_DONE;
	struct mlx5_vdpa_wq_ent *wqent;

	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
		if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
			return NOTIFY_DONE;
		switch (eqe->sub_type) {
		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
			if (!wqent)
				return NOTIFY_DONE;

			wqent->mvdev = &ndev->mvdev;
			INIT_WORK(&wqent->work, update_carrier);
			queue_work(ndev->mvdev.wq, &wqent->work);
			ret = NOTIFY_OK;
			break;
		default:
			return NOTIFY_DONE;
		}
		return ret;
	}
	return ret;
}

static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
{
	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -3127,6 +3182,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
			return -EINVAL;
		}
		device_features &= add_config->device_features;
	} else {
		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
	}
	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
@@ -3258,9 +3315,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
		goto err_res2;
	}

	ndev->nb.notifier_call = event_handler;
	mlx5_notifier_register(mdev, &ndev->nb);
	ndev->nb_registered = true;
	mvdev->vdev.mdev = &mgtdev->mgtdev;
	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
	if (err)
@@ -3294,10 +3348,7 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *

	mlx5_vdpa_remove_debugfs(ndev->debugfs);
	ndev->debugfs = NULL;
	if (ndev->nb_registered) {
		ndev->nb_registered = false;
		mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
	}
	unregister_link_notifier(ndev);
	wq = mvdev->wq;
	mvdev->wq = NULL;
	destroy_workqueue(wq);
+1 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_ctrl.o
ifdef CONFIG_HWMON
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_hwmon.o
endif
+330 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * SolidRun DPU driver for control plane
 *
 * Copyright (C) 2022-2023 SolidRun
 *
 * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
 *
 */

#include <linux/iopoll.h>

#include "snet_vdpa.h"

enum snet_ctrl_opcodes {
	SNET_CTRL_OP_DESTROY = 1,
	SNET_CTRL_OP_READ_VQ_STATE,
	SNET_CTRL_OP_SUSPEND,
};

#define SNET_CTRL_TIMEOUT	        2000000

#define SNET_CTRL_DATA_SIZE_MASK	0x0000FFFF
#define SNET_CTRL_IN_PROCESS_MASK	0x00010000
#define SNET_CTRL_CHUNK_RDY_MASK	0x00020000
#define SNET_CTRL_ERROR_MASK		0x0FFC0000

#define SNET_VAL_TO_ERR(val)		(-(((val) & SNET_CTRL_ERROR_MASK) >> 18))
#define SNET_EMPTY_CTRL(val)		(((val) & SNET_CTRL_ERROR_MASK) || \
						!((val) & SNET_CTRL_IN_PROCESS_MASK))
#define SNET_DATA_READY(val)		((val) & (SNET_CTRL_ERROR_MASK | SNET_CTRL_CHUNK_RDY_MASK))

/* Control register used to read data from the DPU */
struct snet_ctrl_reg_ctrl {
	/* Chunk size in 4B words */
	u16 data_size;
	/* We are in the middle of a command */
	u16 in_process:1;
	/* A data chunk is ready and can be consumed */
	u16 chunk_ready:1;
	/* Error code */
	u16 error:10;
	/* Saved for future usage */
	u16 rsvd:4;
};

/* Opcode register */
struct snet_ctrl_reg_op {
	u16 opcode;
	/* Only if VQ index is relevant for the command */
	u16 vq_idx;
};

struct snet_ctrl_regs {
	struct snet_ctrl_reg_op op;
	struct snet_ctrl_reg_ctrl ctrl;
	u32 rsvd;
	u32 data[];
};

static struct snet_ctrl_regs __iomem *snet_get_ctrl(struct snet *snet)
{
	return snet->bar + snet->psnet->cfg.ctrl_off;
}

static int snet_wait_for_empty_ctrl(struct snet_ctrl_regs __iomem *regs)
{
	u32 val;

	return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_EMPTY_CTRL(val), 10,
				  SNET_CTRL_TIMEOUT);
}

static int snet_wait_for_empty_op(struct snet_ctrl_regs __iomem *regs)
{
	u32 val;

	return readx_poll_timeout(ioread32, &regs->op, val, !val, 10, SNET_CTRL_TIMEOUT);
}

static int snet_wait_for_data(struct snet_ctrl_regs __iomem *regs)
{
	u32 val;

	return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_DATA_READY(val), 10,
				  SNET_CTRL_TIMEOUT);
}

static u32 snet_read32_word(struct snet_ctrl_regs __iomem *ctrl_regs, u16 word_idx)
{
	return ioread32(&ctrl_regs->data[word_idx]);
}

static u32 snet_read_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs)
{
	return ioread32(&ctrl_regs->ctrl);
}

static void snet_write_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
{
	iowrite32(val, &ctrl_regs->ctrl);
}

static void snet_write_op(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
{
	iowrite32(val, &ctrl_regs->op);
}

static int snet_wait_for_dpu_completion(struct snet_ctrl_regs __iomem *ctrl_regs)
{
	/* Wait until the DPU finishes completely.
	 * It will clear the opcode register.
	 */
	return snet_wait_for_empty_op(ctrl_regs);
}

/* Reading ctrl from the DPU:
 * buf_size must be 4B aligned
 *
 * Steps:
 *
 * (1) Verify that the DPU is not in the middle of another operation by
 *     reading the in_process and error bits in the control register.
 * (2) Write the request opcode and the VQ idx in the opcode register
 *     and write the buffer size in the control register.
 * (3) Start readind chunks of data, chunk_ready bit indicates that a
 *     data chunk is available, we signal that we read the data by clearing the bit.
 * (4) Detect that the transfer is completed when the in_process bit
 *     in the control register is cleared or when the an error appears.
 */
static int snet_ctrl_read_from_dpu(struct snet *snet, u16 opcode, u16 vq_idx, void *buffer,
				   u32 buf_size)
{
	struct pci_dev *pdev = snet->pdev;
	struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
	u32 *bfr_ptr = (u32 *)buffer;
	u32 val;
	u16 buf_words;
	int ret;
	u16 words, i, tot_words = 0;

	/* Supported for config 2+ */
	if (!SNET_CFG_VER(snet, 2))
		return -EOPNOTSUPP;

	if (!IS_ALIGNED(buf_size, 4))
		return -EINVAL;

	mutex_lock(&snet->ctrl_lock);

	buf_words = buf_size / 4;

	/* Make sure control register is empty */
	ret = snet_wait_for_empty_ctrl(regs);
	if (ret) {
		SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
		goto exit;
	}

	/* We need to write the buffer size in the control register, and the opcode + vq index in
	 * the opcode register.
	 * We use a spinlock to serialize the writes.
	 */
	spin_lock(&snet->ctrl_spinlock);

	snet_write_ctrl(regs, buf_words);
	snet_write_op(regs, opcode | (vq_idx << 16));

	spin_unlock(&snet->ctrl_spinlock);

	while (buf_words != tot_words) {
		ret = snet_wait_for_data(regs);
		if (ret) {
			SNET_WARN(pdev, "Timeout waiting for control data\n");
			goto exit;
		}

		val = snet_read_ctrl(regs);

		/* Error? */
		if (val & SNET_CTRL_ERROR_MASK) {
			ret = SNET_VAL_TO_ERR(val);
			SNET_WARN(pdev, "Error while reading control data from DPU, err %d\n", ret);
			goto exit;
		}

		words = min_t(u16, val & SNET_CTRL_DATA_SIZE_MASK, buf_words - tot_words);

		for (i = 0; i < words; i++) {
			*bfr_ptr = snet_read32_word(regs, i);
			bfr_ptr++;
		}

		tot_words += words;

		/* Is the job completed? */
		if (!(val & SNET_CTRL_IN_PROCESS_MASK))
			break;

		/* Clear the chunk ready bit and continue */
		val &= ~SNET_CTRL_CHUNK_RDY_MASK;
		snet_write_ctrl(regs, val);
	}

	ret = snet_wait_for_dpu_completion(regs);
	if (ret)
		SNET_WARN(pdev, "Timeout waiting for the DPU to complete a control command\n");

exit:
	mutex_unlock(&snet->ctrl_lock);
	return ret;
}

/* Send a control message to the DPU using the old mechanism
 * used with config version 1.
 */
static int snet_send_ctrl_msg_old(struct snet *snet, u32 opcode)
{
	struct pci_dev *pdev = snet->pdev;
	struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
	int ret;

	mutex_lock(&snet->ctrl_lock);

	/* Old mechanism uses just 1 register, the opcode register.
	 * Make sure that the opcode register is empty, and that the DPU isn't
	 * processing an old message.
	 */
	ret = snet_wait_for_empty_op(regs);
	if (ret) {
		SNET_WARN(pdev, "Timeout waiting for previous control message to be ACKed\n");
		goto exit;
	}

	/* Write the message */
	snet_write_op(regs, opcode);

	/* DPU ACKs the message by clearing the opcode register */
	ret = snet_wait_for_empty_op(regs);
	if (ret)
		SNET_WARN(pdev, "Timeout waiting for a control message to be ACKed\n");

exit:
	mutex_unlock(&snet->ctrl_lock);
	return ret;
}

/* Send a control message to the DPU.
 * A control message is a message without payload.
 */
static int snet_send_ctrl_msg(struct snet *snet, u16 opcode, u16 vq_idx)
{
	struct pci_dev *pdev = snet->pdev;
	struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
	u32 val;
	int ret;

	/* If config version is not 2+, use the old mechanism */
	if (!SNET_CFG_VER(snet, 2))
		return snet_send_ctrl_msg_old(snet, opcode);

	mutex_lock(&snet->ctrl_lock);

	/* Make sure control register is empty */
	ret = snet_wait_for_empty_ctrl(regs);
	if (ret) {
		SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
		goto exit;
	}

	/* We need to clear the control register and write the opcode + vq index in the opcode
	 * register.
	 * We use a spinlock to serialize the writes.
	 */
	spin_lock(&snet->ctrl_spinlock);

	snet_write_ctrl(regs, 0);
	snet_write_op(regs, opcode | (vq_idx << 16));

	spin_unlock(&snet->ctrl_spinlock);

	/* The DPU ACKs control messages by setting the chunk ready bit
	 * without data.
	 */
	ret = snet_wait_for_data(regs);
	if (ret) {
		SNET_WARN(pdev, "Timeout waiting for control message to be ACKed\n");
		goto exit;
	}

	/* Check for errors */
	val = snet_read_ctrl(regs);
	ret = SNET_VAL_TO_ERR(val);

	/* Clear the chunk ready bit */
	val &= ~SNET_CTRL_CHUNK_RDY_MASK;
	snet_write_ctrl(regs, val);

	ret = snet_wait_for_dpu_completion(regs);
	if (ret)
		SNET_WARN(pdev, "Timeout waiting for DPU to complete a control command, err %d\n",
			  ret);

exit:
	mutex_unlock(&snet->ctrl_lock);
	return ret;
}

void snet_ctrl_clear(struct snet *snet)
{
	struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);

	snet_write_op(regs, 0);
}

int snet_destroy_dev(struct snet *snet)
{
	return snet_send_ctrl_msg(snet, SNET_CTRL_OP_DESTROY, 0);
}

int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state)
{
	return snet_ctrl_read_from_dpu(snet, SNET_CTRL_OP_READ_VQ_STATE, idx, state,
				       sizeof(*state));
}

int snet_suspend_dev(struct snet *snet)
{
	return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0);
}
Loading