Commit 785d21ba authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Replace deprecated git://github.com link in MAINTAINERS (Palmer
   Dabbelt)

 - Simplify vfio/mlx5 with module_pci_driver() helper (Shang XiaoJing)

 - Drop unnecessary buffer from ACPI call (Rafael Mendonca)

 - Correct latent missing include issue in iova-bitmap and fix support
   for unaligned bitmaps. Follow-up with better fix through refactor
   (Joao Martins)

 - Rework ccw mdev driver to split private data from parent structure,
   better aligning with the mdev lifecycle and allowing us to remove a
   temporary workaround (Eric Farman)

 - Add an interface to get an estimated migration data size for a
   device, allowing userspace to make informed decisions, ex. more
   accurately predicting VM downtime (Yishai Hadas)

 - Fix minor typo in vfio/mlx5 array declaration (Yishai Hadas)

 - Simplify module and Kconfig through consolidating SPAPR/EEH code and
   config options and folding virqfd module into main vfio module (Jason
   Gunthorpe)

 - Fix error path from device_register() across all vfio mdev and sample
   drivers (Alex Williamson)

 - Define migration pre-copy interface and implement for vfio/mlx5
   devices, allowing portions of the device state to be saved while the
   device continues operation, towards reducing the stop-copy state size
   (Jason Gunthorpe, Yishai Hadas, Shay Drory)

 - Implement pre-copy for hisi_acc devices (Shameer Kolothum)

 - Fixes to mdpy mdev driver remove path and error path on probe (Shang
   XiaoJing)

 - vfio/mlx5 fixes for incorrect return after copy_to_user() fault and
   incorrect buffer freeing (Dan Carpenter)

* tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio: (42 commits)
  vfio/mlx5: error pointer dereference in error handling
  vfio/mlx5: fix error code in mlx5vf_precopy_ioctl()
  samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe()
  hisi_acc_vfio_pci: Enable PRE_COPY flag
  hisi_acc_vfio_pci: Move the dev compatibility tests for early check
  hisi_acc_vfio_pci: Introduce support for PRE_COPY state transitions
  hisi_acc_vfio_pci: Add support for precopy IOCTL
  vfio/mlx5: Enable MIGRATION_PRE_COPY flag
  vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error
  vfio/mlx5: Introduce multiple loads
  vfio/mlx5: Consider temporary end of stream as part of PRE_COPY
  vfio/mlx5: Introduce vfio precopy ioctl implementation
  vfio/mlx5: Introduce SW headers for migration states
  vfio/mlx5: Introduce device transitions of PRE_COPY
  vfio/mlx5: Refactor to use queue based data chunks
  vfio/mlx5: Refactor migration file state
  vfio/mlx5: Refactor MKEY usage
  vfio/mlx5: Refactor PD usage
  vfio/mlx5: Enforce a single SAVE command at a time
  vfio: Extend the device migration protocol with PRE_COPY
  ...
parents 8fa590bf 70be6f32
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -21781,7 +21781,7 @@ M: Alex Williamson <alex.williamson@redhat.com>
R:	Cornelia Huck <cohuck@redhat.com>
L:	kvm@vger.kernel.org
S:	Maintained
T:	git git://github.com/awilliam/linux-vfio.git
T:	git https://github.com/awilliam/linux-vfio.git
F:	Documentation/ABI/testing/sysfs-devices-vfio-dev
F:	Documentation/driver-api/vfio.rst
F:	drivers/vfio/
+0 −1
Original line number Diff line number Diff line
@@ -1465,7 +1465,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);

	intel_gvt_destroy_vgpu(vgpu);
	vfio_free_device(vfio_dev);
}

static const struct vfio_device_ops intel_vgpu_dev_ops = {
+3 −2
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private,
					  char __user *buf, size_t count,
					  loff_t *ppos)
{
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
	struct ccw_schib_region *region;
@@ -27,12 +28,12 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private,
	mutex_lock(&private->io_mutex);
	region = private->region[i].data;

	if (cio_update_schib(private->sch)) {
	if (cio_update_schib(sch)) {
		ret = -ENODEV;
		goto out;
	}

	memcpy(region, &private->sch->schib, sizeof(*region));
	memcpy(region, &sch->schib, sizeof(*region));

	if (copy_to_user(buf, (void *)region + pos, count)) {
		ret = -EFAULT;
+74 −100
Original line number Diff line number Diff line
@@ -23,10 +23,10 @@
#include "vfio_ccw_private.h"

struct workqueue_struct *vfio_ccw_work_q;
static struct kmem_cache *vfio_ccw_io_region;
static struct kmem_cache *vfio_ccw_cmd_region;
static struct kmem_cache *vfio_ccw_schib_region;
static struct kmem_cache *vfio_ccw_crw_region;
struct kmem_cache *vfio_ccw_io_region;
struct kmem_cache *vfio_ccw_cmd_region;
struct kmem_cache *vfio_ccw_schib_region;
struct kmem_cache *vfio_ccw_crw_region;

debug_info_t *vfio_ccw_debug_msg_id;
debug_info_t *vfio_ccw_debug_trace_id;
@@ -36,10 +36,19 @@ debug_info_t *vfio_ccw_debug_trace_id;
 */
int vfio_ccw_sch_quiesce(struct subchannel *sch)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
	DECLARE_COMPLETION_ONSTACK(completion);
	int iretry, ret = 0;

	/*
	 * Probably an impossible situation, after being called through
	 * FSM callbacks. But in the event it did, register a warning
	 * and return as if things were fine.
	 */
	if (WARN_ON(!private))
		return 0;

	iretry = 255;
	do {

@@ -70,7 +79,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
	return ret;
}

static void vfio_ccw_sch_io_todo(struct work_struct *work)
void vfio_ccw_sch_io_todo(struct work_struct *work)
{
	struct vfio_ccw_private *private;
	struct irb *irb;
@@ -106,7 +115,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
		eventfd_signal(private->io_trigger, 1);
}

static void vfio_ccw_crw_todo(struct work_struct *work)
void vfio_ccw_crw_todo(struct work_struct *work)
{
	struct vfio_ccw_private *private;

@@ -121,90 +130,39 @@ static void vfio_ccw_crw_todo(struct work_struct *work)
 */
static void vfio_ccw_sch_irq(struct subchannel *sch)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);

	inc_irq_stat(IRQIO_CIO);
	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
}
	/*
	 * The subchannel should still be disabled at this point,
	 * so an interrupt would be quite surprising. As with an
	 * interrupt while the FSM is closed, let's attempt to
	 * disable the subchannel again.
	 */
	if (!private) {
		VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: unexpected interrupt\n",
				   sch->schid.cssid, sch->schid.ssid,
				   sch->schid.sch_no);

static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch)
{
	struct vfio_ccw_private *private;
		cio_disable_subchannel(sch);
		return;
	}

	private = kzalloc(sizeof(*private), GFP_KERNEL);
	if (!private)
		return ERR_PTR(-ENOMEM);

	private->sch = sch;
	mutex_init(&private->io_mutex);
	private->state = VFIO_CCW_STATE_STANDBY;
	INIT_LIST_HEAD(&private->crw);
	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);

	private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
				       GFP_KERNEL);
	if (!private->cp.guest_cp)
		goto out_free_private;

	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
					       GFP_KERNEL | GFP_DMA);
	if (!private->io_region)
		goto out_free_cp;

	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
						GFP_KERNEL | GFP_DMA);
	if (!private->cmd_region)
		goto out_free_io;

	private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region,
						  GFP_KERNEL | GFP_DMA);

	if (!private->schib_region)
		goto out_free_cmd;

	private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region,
						GFP_KERNEL | GFP_DMA);

	if (!private->crw_region)
		goto out_free_schib;
	return private;

out_free_schib:
	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
out_free_cmd:
	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
out_free_io:
	kmem_cache_free(vfio_ccw_io_region, private->io_region);
out_free_cp:
	kfree(private->cp.guest_cp);
out_free_private:
	mutex_destroy(&private->io_mutex);
	kfree(private);
	return ERR_PTR(-ENOMEM);
	inc_irq_stat(IRQIO_CIO);
	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
}

static void vfio_ccw_free_private(struct vfio_ccw_private *private)
static void vfio_ccw_free_parent(struct device *dev)
{
	struct vfio_ccw_crw *crw, *temp;
	struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev);

	list_for_each_entry_safe(crw, temp, &private->crw, next) {
		list_del(&crw->next);
		kfree(crw);
	kfree(parent);
}

	kmem_cache_free(vfio_ccw_crw_region, private->crw_region);
	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
	kmem_cache_free(vfio_ccw_io_region, private->io_region);
	kfree(private->cp.guest_cp);
	mutex_destroy(&private->io_mutex);
	kfree(private);
}
static int vfio_ccw_sch_probe(struct subchannel *sch)
{
	struct pmcw *pmcw = &sch->schib.pmcw;
	struct vfio_ccw_private *private;
	struct vfio_ccw_parent *parent;
	int ret = -ENOMEM;

	if (pmcw->qf) {
@@ -213,42 +171,50 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
		return -ENODEV;
	}

	private = vfio_ccw_alloc_private(sch);
	if (IS_ERR(private))
		return PTR_ERR(private);
	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
	if (!parent)
		return -ENOMEM;

	dev_set_name(&parent->dev, "parent");
	parent->dev.parent = &sch->dev;
	parent->dev.release = &vfio_ccw_free_parent;
	ret = device_register(&parent->dev);
	if (ret)
		goto out_free;

	dev_set_drvdata(&sch->dev, private);
	dev_set_drvdata(&sch->dev, parent);

	private->mdev_type.sysfs_name = "io";
	private->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
	private->mdev_types[0] = &private->mdev_type;
	ret = mdev_register_parent(&private->parent, &sch->dev,
	parent->mdev_type.sysfs_name = "io";
	parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
	parent->mdev_types[0] = &parent->mdev_type;
	ret = mdev_register_parent(&parent->parent, &sch->dev,
				   &vfio_ccw_mdev_driver,
				   private->mdev_types, 1);
				   parent->mdev_types, 1);
	if (ret)
		goto out_free;
		goto out_unreg;

	VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n",
			   sch->schid.cssid, sch->schid.ssid,
			   sch->schid.sch_no);
	return 0;

out_unreg:
	device_del(&parent->dev);
out_free:
	put_device(&parent->dev);
	dev_set_drvdata(&sch->dev, NULL);
	vfio_ccw_free_private(private);
	return ret;
}

static void vfio_ccw_sch_remove(struct subchannel *sch)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);

	mdev_unregister_parent(&private->parent);
	mdev_unregister_parent(&parent->parent);

	device_unregister(&parent->dev);
	dev_set_drvdata(&sch->dev, NULL);

	vfio_ccw_free_private(private);

	VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n",
			   sch->schid.cssid, sch->schid.ssid,
			   sch->schid.sch_no);
@@ -256,7 +222,11 @@ static void vfio_ccw_sch_remove(struct subchannel *sch)

static void vfio_ccw_sch_shutdown(struct subchannel *sch)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);

	if (WARN_ON(!private))
		return;

	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE);
	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
@@ -274,7 +244,8 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch)
 */
static int vfio_ccw_sch_event(struct subchannel *sch, int process)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
	unsigned long flags;
	int rc = -EAGAIN;

@@ -287,8 +258,10 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)

	rc = 0;

	if (cio_update_schib(sch))
	if (cio_update_schib(sch)) {
		if (private)
			vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
	}

out_unlock:
	spin_unlock_irqrestore(sch->lock, flags);
@@ -326,14 +299,15 @@ static void vfio_ccw_queue_crw(struct vfio_ccw_private *private,
static int vfio_ccw_chp_event(struct subchannel *sch,
			      struct chp_link *link, int event)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
	int mask = chp_ssd_get_mask(&sch->ssd_info, link);
	int retry = 255;

	if (!private || !mask)
		return 0;

	trace_vfio_ccw_chp_event(private->sch->schid, mask, event);
	trace_vfio_ccw_chp_event(sch->schid, mask, event);
	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n",
			   sch->schid.cssid,
			   sch->schid.ssid, sch->schid.sch_no,
+12 −15
Original line number Diff line number Diff line
@@ -18,15 +18,13 @@

static int fsm_io_helper(struct vfio_ccw_private *private)
{
	struct subchannel *sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	union orb *orb;
	int ccode;
	__u8 lpm;
	unsigned long flags;
	int ret;

	sch = private->sch;

	spin_lock_irqsave(sch->lock, flags);

	orb = cp_get_orb(&private->cp, (u32)virt_to_phys(sch), sch->lpm);
@@ -80,13 +78,11 @@ static int fsm_io_helper(struct vfio_ccw_private *private)

static int fsm_do_halt(struct vfio_ccw_private *private)
{
	struct subchannel *sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	unsigned long flags;
	int ccode;
	int ret;

	sch = private->sch;

	spin_lock_irqsave(sch->lock, flags);

	VFIO_CCW_TRACE_EVENT(2, "haltIO");
@@ -121,13 +117,11 @@ static int fsm_do_halt(struct vfio_ccw_private *private)

static int fsm_do_clear(struct vfio_ccw_private *private)
{
	struct subchannel *sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	unsigned long flags;
	int ccode;
	int ret;

	sch = private->sch;

	spin_lock_irqsave(sch->lock, flags);

	VFIO_CCW_TRACE_EVENT(2, "clearIO");
@@ -160,7 +154,7 @@ static int fsm_do_clear(struct vfio_ccw_private *private)
static void fsm_notoper(struct vfio_ccw_private *private,
			enum vfio_ccw_event event)
{
	struct subchannel *sch = private->sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);

	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n",
			   sch->schid.cssid,
@@ -228,7 +222,7 @@ static void fsm_async_retry(struct vfio_ccw_private *private,
static void fsm_disabled_irq(struct vfio_ccw_private *private,
			     enum vfio_ccw_event event)
{
	struct subchannel *sch = private->sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);

	/*
	 * An interrupt in a disabled state means a previous disable was not
@@ -238,7 +232,9 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private,
}
inline struct subchannel_id get_schid(struct vfio_ccw_private *p)
{
	return p->sch->schid;
	struct subchannel *sch = to_subchannel(p->vdev.dev->parent);

	return sch->schid;
}

/*
@@ -360,10 +356,11 @@ static void fsm_async_request(struct vfio_ccw_private *private,
static void fsm_irq(struct vfio_ccw_private *private,
		    enum vfio_ccw_event event)
{
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	struct irb *irb = this_cpu_ptr(&cio_irb);

	VFIO_CCW_TRACE_EVENT(6, "IRQ");
	VFIO_CCW_TRACE_EVENT(6, dev_name(&private->sch->dev));
	VFIO_CCW_TRACE_EVENT(6, dev_name(&sch->dev));

	memcpy(&private->irb, irb, sizeof(*irb));

@@ -376,7 +373,7 @@ static void fsm_irq(struct vfio_ccw_private *private,
static void fsm_open(struct vfio_ccw_private *private,
		     enum vfio_ccw_event event)
{
	struct subchannel *sch = private->sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	int ret;

	spin_lock_irq(sch->lock);
@@ -397,7 +394,7 @@ static void fsm_open(struct vfio_ccw_private *private,
static void fsm_close(struct vfio_ccw_private *private,
		      enum vfio_ccw_event event)
{
	struct subchannel *sch = private->sch;
	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
	int ret;

	spin_lock_irq(sch->lock);
Loading