Commit 17688215 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio

Pull vfio updates from Alex Williamson:

 - Improvements to mlx5 vfio-pci variant driver, including support for
   parallel migration per PF (Yishai Hadas)

 - Remove redundant iommu_present() check (Robin Murphy)

 - Ongoing refactoring to consolidate the VFIO driver facing API to use
   vfio_device (Jason Gunthorpe)

 - Use drvdata to store vfio_device among all vfio-pci and variant
   drivers (Jason Gunthorpe)

 - Remove redundant code now that IOMMU core manages group DMA ownership
   (Jason Gunthorpe)

 - Remove vfio_group from external API handling struct file ownership
   (Jason Gunthorpe)

 - Correct typo in uapi comments (Thomas Huth)

 - Fix coccicheck detected deadlock (Wan Jiabing)

 - Use rwsem to remove races and simplify code around container and kvm
   association to groups (Jason Gunthorpe)

 - Harden access to devices in low power states and use runtime PM to
   enable d3cold support for unused devices (Abhishek Sahu)

 - Fix dma_owner handling of fake IOMMU groups (Jason Gunthorpe)

 - Set driver_managed_dma on vfio-pci variant drivers (Jason Gunthorpe)

 - Pass KVM pointer directly rather than via notifier (Matthew Rosato)

* tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio: (38 commits)
  vfio: remove VFIO_GROUP_NOTIFY_SET_KVM
  vfio/pci: Add driver_managed_dma to the new vfio_pci drivers
  vfio: Do not manipulate iommu dma_owner for fake iommu groups
  vfio/pci: Move the unused device into low power state with runtime PM
  vfio/pci: Virtualize PME related registers bits and initialize to zero
  vfio/pci: Change the PF power state to D0 before enabling VFs
  vfio/pci: Invalidate mmaps and block the access in D3hot power state
  vfio: Change struct vfio_group::container_users to a non-atomic int
  vfio: Simplify the life cycle of the group FD
  vfio: Fully lock struct vfio_group::container
  vfio: Split up vfio_group_get_device_fd()
  vfio: Change struct vfio_group::opened from an atomic to bool
  vfio: Add missing locking for struct vfio_group::kvm
  kvm/vfio: Fix potential deadlock problem in vfio
  include/uapi/linux/vfio.h: Fix trivial typo - _IORW should be _IOWR instead
  vfio/pci: Use the struct file as the handle not the vfio_group
  kvm/vfio: Remove vfio_group from kvm
  vfio: Change vfio_group_set_kvm() to vfio_file_set_kvm()
  vfio: Change vfio_external_check_extension() to vfio_file_enforced_coherent()
  vfio: Remove vfio_external_group_match_file()
  ...
parents 8171acb8 421cfe65
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -262,10 +262,10 @@ Translation APIs for Mediated Devices
The following APIs are provided for translating user pfn to host pfn in a VFIO
driver::

	extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
	int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
				  int npage, int prot, unsigned long *phys_pfn);

	extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
	int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
				    int npage);

These functions call back into the back-end IOMMU module by using the pin_pages
+2 −2
Original line number Diff line number Diff line
@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;

static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
{
	struct kvm *kvm = vgpu->kvm;
	struct kvm *kvm = vgpu->vfio_device.kvm;
	int idx;
	bool ret;

@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,

	if (!vgpu->attached)
		return -EINVAL;
	pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
	pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
	if (is_error_noslot_pfn(pfn))
		return -EINVAL;
	return PageTransHuge(pfn_to_page(pfn));
+2 −6
Original line number Diff line number Diff line
@@ -227,11 +227,7 @@ struct intel_vgpu {
	struct mutex cache_lock;

	struct notifier_block iommu_notifier;
	struct notifier_block group_notifier;
	struct kvm *kvm;
	struct work_struct release_work;
	atomic_t released;
	struct vfio_group *vfio_group;

	struct kvm_page_track_notifier_node track_node;
#define NR_BKT (1 << 18)
@@ -732,7 +728,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
{
	if (!vgpu->attached)
		return -ESRCH;
	return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, false);
	return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false);
}

/**
@@ -750,7 +746,7 @@ static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu,
{
	if (!vgpu->attached)
		return -ESRCH;
	return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, true);
	return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true);
}

void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu);
+27 −88
Original line number Diff line number Diff line
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
	}
}

static void intel_vgpu_release_work(struct work_struct *work);

static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
		unsigned long size)
{
@@ -243,7 +241,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
	for (npage = 0; npage < total_pages; npage++) {
		unsigned long cur_gfn = gfn + npage;

		ret = vfio_group_unpin_pages(vgpu->vfio_group, &cur_gfn, 1);
		ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1);
		drm_WARN_ON(&i915->drm, ret != 1);
	}
}
@@ -266,7 +264,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
		unsigned long cur_gfn = gfn + npage;
		unsigned long pfn;

		ret = vfio_group_pin_pages(vgpu->vfio_group, &cur_gfn, 1,
		ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1,
				     IOMMU_READ | IOMMU_WRITE, &pfn);
		if (ret != 1) {
			gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
	return NOTIFY_OK;
}

static int intel_vgpu_group_notifier(struct notifier_block *nb,
				     unsigned long action, void *data)
{
	struct intel_vgpu *vgpu =
		container_of(nb, struct intel_vgpu, group_notifier);

	/* the only action we care about */
	if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
		vgpu->kvm = data;

		if (!data)
			schedule_work(&vgpu->release_work);
	}

	return NOTIFY_OK;
}

static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
{
	struct intel_vgpu *itr;
@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
		if (!itr->attached)
			continue;

		if (vgpu->kvm == itr->kvm) {
		if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
			ret = true;
			goto out;
		}
@@ -804,13 +785,11 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
	unsigned long events;
	int ret;
	struct vfio_group *vfio_group;

	vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
	vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;

	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
	ret = vfio_register_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, &events,
	ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
				     &vgpu->iommu_notifier);
	if (ret != 0) {
		gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
@@ -818,47 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
		goto out;
	}

	events = VFIO_GROUP_NOTIFY_SET_KVM;
	ret = vfio_register_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, &events,
				&vgpu->group_notifier);
	if (ret != 0) {
		gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
			ret);
		goto undo_iommu;
	}

	vfio_group =
		vfio_group_get_external_user_from_dev(vgpu->vfio_device.dev);
	if (IS_ERR_OR_NULL(vfio_group)) {
		ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
		gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
		goto undo_register;
	}
	vgpu->vfio_group = vfio_group;

	ret = -EEXIST;
	if (vgpu->attached)
		goto undo_group;
		goto undo_iommu;

	ret = -ESRCH;
	if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
	if (!vgpu->vfio_device.kvm ||
	    vgpu->vfio_device.kvm->mm != current->mm) {
		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
		goto undo_group;
		goto undo_iommu;
	}

	kvm_get_kvm(vgpu->vfio_device.kvm);

	ret = -EEXIST;
	if (__kvmgt_vgpu_exist(vgpu))
		goto undo_group;
		goto undo_iommu;

	vgpu->attached = true;
	kvm_get_kvm(vgpu->kvm);

	kvmgt_protect_table_init(vgpu);
	gvt_cache_init(vgpu);

	vgpu->track_node.track_write = kvmgt_page_track_write;
	vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
	kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
	kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
					 &vgpu->track_node);

	debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
			     &vgpu->nr_cache_entries);
@@ -868,16 +832,8 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
	atomic_set(&vgpu->released, 0);
	return 0;

undo_group:
	vfio_group_put_external_user(vgpu->vfio_group);
	vgpu->vfio_group = NULL;

undo_register:
	vfio_unregister_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY,
					&vgpu->group_notifier);

undo_iommu:
	vfio_unregister_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY,
	vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
				 &vgpu->iommu_notifier);
out:
	return ret;
@@ -894,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
	}
}

static void __intel_vgpu_release(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
	int ret;

@@ -907,41 +864,24 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)

	intel_gvt_release_vgpu(vgpu);

	ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_IOMMU_NOTIFY,
	ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
				       &vgpu->iommu_notifier);
	drm_WARN(&i915->drm, ret,
		 "vfio_unregister_notifier for iommu failed: %d\n", ret);

	ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_GROUP_NOTIFY,
					&vgpu->group_notifier);
	drm_WARN(&i915->drm, ret,
		 "vfio_unregister_notifier for group failed: %d\n", ret);

	debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));

	kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
	kvm_put_kvm(vgpu->kvm);
	kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
					   &vgpu->track_node);
	kvmgt_protect_table_destroy(vgpu);
	gvt_cache_destroy(vgpu);

	intel_vgpu_release_msi_eventfd_ctx(vgpu);
	vfio_group_put_external_user(vgpu->vfio_group);

	vgpu->kvm = NULL;
	vgpu->attached = false;
}

static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
	__intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
}

static void intel_vgpu_release_work(struct work_struct *work)
{
	struct intel_vgpu *vgpu =
		container_of(work, struct intel_vgpu, release_work);

	__intel_vgpu_release(vgpu);
	if (vgpu->vfio_device.kvm)
		kvm_put_kvm(vgpu->vfio_device.kvm);
}

static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1690,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
		return PTR_ERR(vgpu);
	}

	INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
	vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
			    &intel_vgpu_dev_ops);

@@ -1728,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {

int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{
	struct kvm *kvm = info->kvm;
	struct kvm *kvm = info->vfio_device.kvm;
	struct kvm_memory_slot *slot;
	int idx;

@@ -1758,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)

int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{
	struct kvm *kvm = info->kvm;
	struct kvm *kvm = info->vfio_device.kvm;
	struct kvm_memory_slot *slot;
	int idx;

+64 −1
Original line number Diff line number Diff line
@@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
enable_vfs_hca:
	num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
	for (vf = 0; vf < num_vfs; vf++) {
		/* Notify the VF before its enablement to let it set
		 * some stuff.
		 */
		blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
					     MLX5_PF_NOTIFY_ENABLE_VF, dev);
		err = mlx5_core_enable_hca(dev, vf + 1);
		if (err) {
			mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
@@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
	for (vf = num_vfs - 1; vf >= 0; vf--) {
		if (!sriov->vfs_ctx[vf].enabled)
			continue;
		/* Notify the VF before its disablement to let it clean
		 * some resources.
		 */
		blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
					     MLX5_PF_NOTIFY_DISABLE_VF, dev);
		err = mlx5_core_disable_hca(dev, vf + 1);
		if (err) {
			mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
@@ -257,7 +267,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
	struct pci_dev *pdev = dev->pdev;
	int total_vfs;
	int total_vfs, i;

	if (!mlx5_core_is_pf(dev))
		return 0;
@@ -269,6 +279,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
	if (!sriov->vfs_ctx)
		return -ENOMEM;

	for (i = 0; i < total_vfs; i++)
		BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);

	return 0;
}

@@ -281,3 +294,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)

	kfree(sriov->vfs_ctx);
}

/**
 * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
 * a notification block chain.
 *
 * @mdev: The mlx5 core device.
 * @vf_id: The VF id.
 * @nb: The notifier block to be unregistered.
 */
void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
					     int vf_id,
					     struct notifier_block *nb)
{
	struct mlx5_vf_context *vfs_ctx;
	struct mlx5_core_sriov *sriov;

	sriov = &mdev->priv.sriov;
	if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
		return;

	vfs_ctx = &sriov->vfs_ctx[vf_id];
	blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
}
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);

/**
 * mlx5_sriov_blocking_notifier_register - Register a VF notification
 * block chain.
 *
 * @mdev: The mlx5 core device.
 * @vf_id: The VF id.
 * @nb: The notifier block to be called upon the VF events.
 *
 * Returns 0 on success or an error code.
 */
int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
					  int vf_id,
					  struct notifier_block *nb)
{
	struct mlx5_vf_context *vfs_ctx;
	struct mlx5_core_sriov *sriov;

	sriov = &mdev->priv.sriov;
	if (vf_id < 0 || vf_id >= sriov->num_vfs)
		return -EINVAL;

	vfs_ctx = &sriov->vfs_ctx[vf_id];
	return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
}
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);
Loading