Commit 20b23b13 authored by Eric Auger's avatar Eric Auger Committed by Zheng Zengkai
Browse files

vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

virt inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I401IF


CVE: NA

------------------------------

Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults. This region only is exposed if the device
is attached to a nested domain.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: default avatarEric Auger <eric.auger@redhat.com>
Signed-off-by: default avatarKunkun <Jiang&lt;jiangkunkun@huawei.com>
Reviewed-by: default avatarKeqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parent ac16d334
Loading
Loading
Loading
Loading
+79 −0
Original line number Diff line number Diff line
@@ -309,6 +309,81 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
	return ret;
}

static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
				       struct vfio_pci_region *region)
{
	kfree(vdev->fault_pages);
}

static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
					     struct vfio_pci_region *region,
					     struct vfio_info_cap *caps)
{
	struct vfio_region_info_cap_fault cap = {
		.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
		.header.version = 1,
		.version = 1,
	};
	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}

static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
	.rw		= vfio_pci_dma_fault_rw,
	.release	= vfio_pci_dma_fault_release,
	.add_capability = vfio_pci_dma_fault_add_capability,
};

#define DMA_FAULT_RING_LENGTH 512

static int vfio_pci_dma_fault_init(struct vfio_pci_device *vdev)
{
	struct vfio_region_dma_fault *header;
	struct iommu_domain *domain;
	size_t size;
	int nested;
	int ret;

	domain = iommu_get_domain_for_dev(&vdev->pdev->dev);
	if (!domain)
		return 0;

	ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nested);
	if (ret || !nested)
		return ret;

	mutex_init(&vdev->fault_queue_lock);

	/*
	 * We provision 1 page for the header and space for
	 * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
	 */
	size = ALIGN(sizeof(struct iommu_fault) *
		     DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;

	vdev->fault_pages = kzalloc(size, GFP_KERNEL);
	if (!vdev->fault_pages)
		return -ENOMEM;

	ret = vfio_pci_register_dev_region(vdev,
		VFIO_REGION_TYPE_NESTED,
		VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
		&vfio_pci_dma_fault_regops, size,
		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
		vdev->fault_pages);
	if (ret)
		goto out;

	header = (struct vfio_region_dma_fault *)vdev->fault_pages;
	header->entry_size = sizeof(struct iommu_fault);
	header->nb_entries = DMA_FAULT_RING_LENGTH;
	header->offset = sizeof(struct vfio_region_dma_fault);
	return 0;
out:
	kfree(vdev->fault_pages);
	vdev->fault_pages = NULL;
	return ret;
}

static int vfio_pci_enable(struct vfio_pci_device *vdev)
{
	struct pci_dev *pdev = vdev->pdev;
@@ -407,6 +482,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
		}
	}

	ret = vfio_pci_dma_fault_init(vdev);
	if (ret)
		goto disable_exit;

	vfio_pci_probe_mmaps(vdev);

	return 0;
+6 −0
Original line number Diff line number Diff line
@@ -134,6 +134,8 @@ struct vfio_pci_device {
	int			ioeventfds_nr;
	struct eventfd_ctx	*err_trigger;
	struct eventfd_ctx	*req_trigger;
	u8			*fault_pages;
	struct mutex		fault_queue_lock;
	struct list_head	dummy_resources_list;
	struct mutex		ioeventfds_lock;
	struct list_head	ioeventfds_list;
@@ -170,6 +172,10 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
			       uint64_t data, int count, int fd);

extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev,
				    char __user *buf, size_t count,
				    loff_t *ppos, bool iswrite);

extern int vfio_pci_init_perm_bits(void);
extern void vfio_pci_uninit_perm_bits(void);

+44 −0
Original line number Diff line number Diff line
@@ -356,6 +356,50 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
	return done;
}

size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev, char __user *buf,
			     size_t count, loff_t *ppos, bool iswrite)
{
	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
	void *base = vdev->region[i].data;
	int ret = -EFAULT;

	if (pos >= vdev->region[i].size)
		return -EINVAL;

	count = min(count, (size_t)(vdev->region[i].size - pos));

	mutex_lock(&vdev->fault_queue_lock);

	if (iswrite) {
		struct vfio_region_dma_fault *header =
			(struct vfio_region_dma_fault *)base;
		u32 new_tail;

		if (pos != 0 || count != 4) {
			ret = -EINVAL;
			goto unlock;
		}

		if (copy_from_user((void *)&new_tail, buf, count))
			goto unlock;

		if (new_tail >= header->nb_entries) {
			ret = -EINVAL;
			goto unlock;
		}
		header->tail = new_tail;
	} else {
		if (copy_to_user(buf, base + pos, count))
			goto unlock;
	}
	*ppos += count;
	ret = count;
unlock:
	mutex_unlock(&vdev->fault_queue_lock);
	return ret;
}

static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
					bool test_mem)
{
+35 −0
Original line number Diff line number Diff line
@@ -329,6 +329,7 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_TYPE_GFX                    (1)
#define VFIO_REGION_TYPE_CCW			(2)
#define VFIO_REGION_TYPE_MIGRATION              (3)
#define VFIO_REGION_TYPE_NESTED			(4)

/* sub-types for VFIO_REGION_TYPE_PCI_* */

@@ -353,6 +354,9 @@ struct vfio_region_info_cap_type {
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID            (1)

/* sub-types for VFIO_REGION_TYPE_NESTED */
#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT	(1)

/**
 * struct vfio_region_gfx_edid - EDID region layout.
 *
@@ -998,6 +1002,37 @@ struct vfio_device_feature {
 */
#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)

/*
 * Capability exposed by the DMA fault region
 * @version: ABI version
 */
#define VFIO_REGION_INFO_CAP_DMA_FAULT	6

struct vfio_region_info_cap_fault {
	struct vfio_info_cap_header header;
	__u32 version;
};

/*
 * DMA Fault Region Layout
 * @tail: index relative to the start of the ring buffer at which the
 *        consumer finds the next item in the buffer
 * @entry_size: fault ring buffer entry size in bytes
 * @nb_entries: max capacity of the fault ring buffer
 * @offset: ring buffer offset relative to the start of the region
 * @head: index relative to the start of the ring buffer at which the
 *        producer (kernel) inserts items into the buffers
 */
struct vfio_region_dma_fault {
	/* Write-Only */
	__u32   tail;
	/* Read-Only */
	__u32   entry_size;
	__u32	nb_entries;
	__u32	offset;
	__u32   head;
};

/* -------- API for Type1 VFIO IOMMU -------- */

/**