Loading drivers/vfio/vfio.c +5 −0 Original line number Diff line number Diff line Loading @@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep) { struct vfio_container *container = filep->private_data; struct vfio_iommu_driver *driver = container->iommu_driver; if (driver && driver->ops->notify) driver->ops->notify(container->iommu_data, VFIO_IOMMU_CONTAINER_CLOSE); filep->private_data = NULL; Loading drivers/vfio/vfio_iommu_type1.c +217 −32 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/kthread.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> Loading Loading @@ -69,11 +70,15 @@ struct vfio_iommu { struct rb_root dma_list; struct blocking_notifier_head notifier; unsigned int dma_avail; unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; uint64_t num_non_pinned_groups; wait_queue_head_t vaddr_wait; bool v2; bool nesting; bool dirty_page_tracking; bool pinned_page_dirty_scope; bool container_open; }; struct vfio_domain { Loading @@ -92,6 +97,7 @@ struct vfio_dma { int prot; /* IOMMU_READ/WRITE */ bool iommu_mapped; bool lock_cap; /* capable(CAP_IPC_LOCK) */ bool vaddr_invalid; struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ unsigned long *bitmap; Loading Loading @@ -143,6 +149,8 @@ struct vfio_regions { #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) #define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) #define WAITED 1 static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, Loading Loading @@ -172,6 +180,31 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, return NULL; } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, dma_addr_t start, size_t size) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; struct vfio_dma *dma_res = NULL; while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); if (start < dma->iova + dma->size) { res = node; dma_res = dma; if (start >= dma->iova) break; node = node->rb_left; } else { node = node->rb_right; } } if (res && size && dma_res->iova >= start + size) res = NULL; return res; } static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) { struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; Loading Loading @@ -490,6 +523,61 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, return ret; } static int vfio_wait(struct vfio_iommu *iommu) { DEFINE_WAIT(wait); prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE); mutex_unlock(&iommu->lock); schedule(); mutex_lock(&iommu->lock); finish_wait(&iommu->vaddr_wait, &wait); if (kthread_should_stop() || !iommu->container_open || fatal_signal_pending(current)) { return -EFAULT; } return WAITED; } /* * Find dma struct and wait for its vaddr to be valid. iommu lock is dropped * if the task waits, but is re-locked on return. Return result in *dma_p. * Return 0 on success with no waiting, WAITED on success if waited, and -errno * on error. */ static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start, size_t size, struct vfio_dma **dma_p) { int ret; do { *dma_p = vfio_find_dma(iommu, start, size); if (!*dma_p) ret = -EINVAL; else if (!(*dma_p)->vaddr_invalid) ret = 0; else ret = vfio_wait(iommu); } while (ret > 0); return ret; } /* * Wait for all vaddr in the dma_list to become valid. iommu lock is dropped * if the task waits, but is re-locked on return. Return 0 on success with no * waiting, WAITED on success if waited, and -errno on error. */ static int vfio_wait_all_valid(struct vfio_iommu *iommu) { int ret = 0; while (iommu->vaddr_invalid_count && ret >= 0) ret = vfio_wait(iommu); return ret; } /* * Attempt to pin pages. We really don't want to track all the pfns and * the iommu can only map chunks of consecutive pfns anyway, so get the Loading Loading @@ -651,6 +739,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; dma_addr_t iova; if (!iommu || !user_pfn || !phys_pfn) return -EINVAL; Loading @@ -661,6 +750,22 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); /* * Wait for all necessary vaddr's to be valid so they can be used in * the main loop without dropping the lock, to avoid racing vs unmap. */ again: if (iommu->vaddr_invalid_count) { for (i = 0; i < npage; i++) { iova = user_pfn[i] << PAGE_SHIFT; ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma); if (ret < 0) goto pin_done; if (ret == WAITED) goto again; } } /* Fail if notifier list is empty */ if (!iommu->notifier.head) { ret = -EINVAL; Loading @@ -675,7 +780,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { dma_addr_t iova; struct vfio_pfn *vpfn; iova = user_pfn[i] << PAGE_SHIFT; Loading Loading @@ -961,6 +1065,10 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); vfio_dma_bitmap_free(dma); if (dma->vaddr_invalid) { iommu->vaddr_invalid_count--; wake_up_all(&iommu->vaddr_wait); } kfree(dma); iommu->dma_avail++; } Loading Loading @@ -1086,34 +1194,36 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, { struct vfio_dma *dma, *dma_last = NULL; size_t unmapped = 0, pgsize; int ret = 0, retries = 0; int ret = -EINVAL, retries = 0; unsigned long pgshift; dma_addr_t iova = unmap->iova; unsigned long size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; struct rb_node *n, *first_n; mutex_lock(&iommu->lock); pgshift = __ffs(iommu->pgsize_bitmap); pgsize = (size_t)1 << pgshift; if (unmap->iova & (pgsize - 1)) { ret = -EINVAL; if (iova & (pgsize - 1)) goto unlock; } if (!unmap->size || unmap->size & (pgsize - 1)) { ret = -EINVAL; if (unmap_all) { if (iova || size) goto unlock; size = SIZE_MAX; } else if (!size || size & (pgsize - 1)) { goto unlock; } if (unmap->iova + unmap->size - 1 < unmap->iova || unmap->size > SIZE_MAX) { ret = -EINVAL; if (iova + size - 1 < iova || size > SIZE_MAX) goto unlock; } /* When dirty tracking is enabled, allow only min supported pgsize */ if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) { ret = -EINVAL; goto unlock; } Loading Loading @@ -1150,21 +1260,25 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, * will only return success and a size of zero if there were no * mappings within the range. */ if (iommu->v2) { dma = vfio_find_dma(iommu, unmap->iova, 1); if (dma && dma->iova != unmap->iova) { ret = -EINVAL; if (iommu->v2 && !unmap_all) { dma = vfio_find_dma(iommu, iova, 1); if (dma && dma->iova != iova) goto unlock; } dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0); if (dma && dma->iova + dma->size != unmap->iova + unmap->size) { ret = -EINVAL; dma = vfio_find_dma(iommu, iova + size - 1, 0); if (dma && dma->iova + dma->size != iova + size) goto unlock; } } while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { if (!iommu->v2 && unmap->iova > dma->iova) ret = 0; n = first_n = vfio_find_dma_first_node(iommu, iova, size); while (n) { dma = rb_entry(n, struct vfio_dma, node); if (dma->iova >= iova + size) break; if (!iommu->v2 && iova > dma->iova) break; /* * Task with same address space who mapped this iova range is Loading @@ -1173,6 +1287,27 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (dma->task->mm != current->mm) break; if (invalidate_vaddr) { if (dma->vaddr_invalid) { struct rb_node *last_n = n; for (n = first_n; n != last_n; n = rb_next(n)) { dma = rb_entry(n, struct vfio_dma, node); dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; } ret = -EINVAL; unmapped = 0; break; } dma->vaddr_invalid = true; iommu->vaddr_invalid_count++; unmapped += dma->size; n = rb_next(n); continue; } if (!RB_EMPTY_ROOT(&dma->pfn_list)) { struct vfio_iommu_type1_dma_unmap nb_unmap; Loading Loading @@ -1202,12 +1337,13 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { ret = update_user_bitmap(bitmap->data, iommu, dma, unmap->iova, pgsize); iova, pgsize); if (ret) break; } unmapped += dma->size; n = rb_next(n); vfio_remove_dma(iommu, dma); } Loading Loading @@ -1311,6 +1447,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; dma_addr_t iova = map->iova; unsigned long vaddr = map->vaddr; size_t size = map->size; Loading @@ -1328,13 +1465,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (map->flags & VFIO_DMA_MAP_FLAG_READ) prot |= IOMMU_READ; if ((prot && set_vaddr) || (!prot && !set_vaddr)) return -EINVAL; mutex_lock(&iommu->lock); pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); WARN_ON((pgsize - 1) & PAGE_MASK); if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { if (!size || (size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } Loading @@ -1345,7 +1485,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } if (vfio_find_dma(iommu, iova, size)) { dma = vfio_find_dma(iommu, iova, size); if (set_vaddr) { if (!dma) { ret = -ENOENT; } else if (!dma->vaddr_invalid || dma->iova != iova || dma->size != size) { ret = -EINVAL; } else { dma->vaddr = vaddr; dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; wake_up_all(&iommu->vaddr_wait); } goto out_unlock; } else if (dma) { ret = -EEXIST; goto out_unlock; } Loading Loading @@ -1442,6 +1596,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret; ret = vfio_wait_all_valid(iommu); if (ret < 0) return ret; /* Arbitrarily pick the first domain in the list for lookups */ if (!list_empty(&iommu->domain_list)) d = list_first_entry(&iommu->domain_list, Loading Loading @@ -2417,8 +2575,10 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; iommu->container_open = true; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); init_waitqueue_head(&iommu->vaddr_wait); return iommu; } Loading Loading @@ -2487,6 +2647,8 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_NESTING_IOMMU: case VFIO_UNMAP_ALL: case VFIO_UPDATE_VADDR: return 1; case VFIO_DMA_CC_IOMMU: if (!iommu) Loading Loading @@ -2658,7 +2820,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_map map; unsigned long minsz; uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE | VFIO_DMA_MAP_FLAG_VADDR; minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); Loading @@ -2676,6 +2839,9 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_unmap unmap; struct vfio_bitmap bitmap = { 0 }; uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL; unsigned long minsz; int ret; Loading @@ -2684,8 +2850,12 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, if (copy_from_user(&unmap, (void __user *)arg, minsz)) return -EFAULT; if (unmap.argsz < minsz || unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) if (unmap.argsz < minsz || unmap.flags & ~mask) return -EINVAL; if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL | VFIO_DMA_UNMAP_FLAG_VADDR))) return -EINVAL; if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { Loading Loading @@ -2876,12 +3046,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, struct vfio_dma *dma; bool kthread = current->mm == NULL; size_t offset; int ret; *copied = 0; dma = vfio_find_dma(iommu, user_iova, 1); if (!dma) return -EINVAL; ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma); if (ret < 0) return ret; if ((write && !(dma->prot & IOMMU_WRITE)) || !(dma->prot & IOMMU_READ)) Loading Loading @@ -2973,6 +3144,19 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data, return domain; } static void vfio_iommu_type1_notify(void *iommu_data, enum vfio_iommu_notify_type event) { struct vfio_iommu *iommu = iommu_data; if (event != VFIO_IOMMU_CONTAINER_CLOSE) return; mutex_lock(&iommu->lock); iommu->container_open = false; mutex_unlock(&iommu->lock); wake_up_all(&iommu->vaddr_wait); } static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .name = "vfio-iommu-type1", .owner = THIS_MODULE, Loading @@ -2987,6 +3171,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .unregister_notifier = vfio_iommu_type1_unregister_notifier, .dma_rw = vfio_iommu_type1_dma_rw, .group_iommu_domain = vfio_iommu_type1_group_iommu_domain, .notify = vfio_iommu_type1_notify, }; static int __init vfio_iommu_type1_init(void) Loading include/linux/vfio.h +7 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device); /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ Loading Loading @@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops { void *data, size_t count, bool write); struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_group *group); void (*notify)(void *iommu_data, enum vfio_iommu_notify_type event); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); Loading include/uapi/linux/vfio.h +27 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,12 @@ */ #define VFIO_NOIOMMU_IOMMU 8 /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ #define VFIO_UNMAP_ALL 9 /* Supports the vaddr flag for DMA map and unmap */ #define VFIO_UPDATE_VADDR 10 /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between Loading Loading @@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail { * * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. * * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and * unblock translation of host virtual addresses in the iova range. The vaddr * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To * maintain memory consistency within the user application, the updated vaddr * must address the same memory object as originally mapped. Failure to do so * will result in user memory corruption and/or device misbehavior. iova and * size must match those in the original MAP_DMA call. Protection is not * changed, and the READ & WRITE flags must be 0. */ struct vfio_iommu_type1_dma_map { __u32 argsz; __u32 flags; #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) __u64 vaddr; /* Process virtual address */ __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ Loading @@ -1102,6 +1118,7 @@ struct vfio_bitmap { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. * * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap * before unmapping IO virtual addresses. When this flag is set, the user must * provide a struct vfio_bitmap in data[]. User must provide zero-allocated Loading @@ -1111,11 +1128,21 @@ struct vfio_bitmap { * indicates that the page at that offset from iova is dirty. A Bitmap of the * pages in the range of unmapped size is returned in the user-provided * vfio_bitmap.data. * * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size * must be 0. This cannot be combined with the get-dirty-bitmap flag. * * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host * virtual addresses in the iova range. Tasks that attempt to translate an * iova's vaddr will block. DMA to already-mapped pages continues. This * cannot be combined with the get-dirty-bitmap flag. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) #define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) #define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ __u8 data[]; Loading Loading
drivers/vfio/vfio.c +5 −0 Original line number Diff line number Diff line Loading @@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep) { struct vfio_container *container = filep->private_data; struct vfio_iommu_driver *driver = container->iommu_driver; if (driver && driver->ops->notify) driver->ops->notify(container->iommu_data, VFIO_IOMMU_CONTAINER_CLOSE); filep->private_data = NULL; Loading
drivers/vfio/vfio_iommu_type1.c +217 −32 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/kthread.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> Loading Loading @@ -69,11 +70,15 @@ struct vfio_iommu { struct rb_root dma_list; struct blocking_notifier_head notifier; unsigned int dma_avail; unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; uint64_t num_non_pinned_groups; wait_queue_head_t vaddr_wait; bool v2; bool nesting; bool dirty_page_tracking; bool pinned_page_dirty_scope; bool container_open; }; struct vfio_domain { Loading @@ -92,6 +97,7 @@ struct vfio_dma { int prot; /* IOMMU_READ/WRITE */ bool iommu_mapped; bool lock_cap; /* capable(CAP_IPC_LOCK) */ bool vaddr_invalid; struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ unsigned long *bitmap; Loading Loading @@ -143,6 +149,8 @@ struct vfio_regions { #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) #define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) #define WAITED 1 static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, Loading Loading @@ -172,6 +180,31 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, return NULL; } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, dma_addr_t start, size_t size) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; struct vfio_dma *dma_res = NULL; while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); if (start < dma->iova + dma->size) { res = node; dma_res = dma; if (start >= dma->iova) break; node = node->rb_left; } else { node = node->rb_right; } } if (res && size && dma_res->iova >= start + size) res = NULL; return res; } static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) { struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; Loading Loading @@ -490,6 +523,61 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, return ret; } static int vfio_wait(struct vfio_iommu *iommu) { DEFINE_WAIT(wait); prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE); mutex_unlock(&iommu->lock); schedule(); mutex_lock(&iommu->lock); finish_wait(&iommu->vaddr_wait, &wait); if (kthread_should_stop() || !iommu->container_open || fatal_signal_pending(current)) { return -EFAULT; } return WAITED; } /* * Find dma struct and wait for its vaddr to be valid. iommu lock is dropped * if the task waits, but is re-locked on return. Return result in *dma_p. * Return 0 on success with no waiting, WAITED on success if waited, and -errno * on error. */ static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start, size_t size, struct vfio_dma **dma_p) { int ret; do { *dma_p = vfio_find_dma(iommu, start, size); if (!*dma_p) ret = -EINVAL; else if (!(*dma_p)->vaddr_invalid) ret = 0; else ret = vfio_wait(iommu); } while (ret > 0); return ret; } /* * Wait for all vaddr in the dma_list to become valid. iommu lock is dropped * if the task waits, but is re-locked on return. Return 0 on success with no * waiting, WAITED on success if waited, and -errno on error. */ static int vfio_wait_all_valid(struct vfio_iommu *iommu) { int ret = 0; while (iommu->vaddr_invalid_count && ret >= 0) ret = vfio_wait(iommu); return ret; } /* * Attempt to pin pages. We really don't want to track all the pfns and * the iommu can only map chunks of consecutive pfns anyway, so get the Loading Loading @@ -651,6 +739,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; dma_addr_t iova; if (!iommu || !user_pfn || !phys_pfn) return -EINVAL; Loading @@ -661,6 +750,22 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); /* * Wait for all necessary vaddr's to be valid so they can be used in * the main loop without dropping the lock, to avoid racing vs unmap. */ again: if (iommu->vaddr_invalid_count) { for (i = 0; i < npage; i++) { iova = user_pfn[i] << PAGE_SHIFT; ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma); if (ret < 0) goto pin_done; if (ret == WAITED) goto again; } } /* Fail if notifier list is empty */ if (!iommu->notifier.head) { ret = -EINVAL; Loading @@ -675,7 +780,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { dma_addr_t iova; struct vfio_pfn *vpfn; iova = user_pfn[i] << PAGE_SHIFT; Loading Loading @@ -961,6 +1065,10 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); vfio_dma_bitmap_free(dma); if (dma->vaddr_invalid) { iommu->vaddr_invalid_count--; wake_up_all(&iommu->vaddr_wait); } kfree(dma); iommu->dma_avail++; } Loading Loading @@ -1086,34 +1194,36 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, { struct vfio_dma *dma, *dma_last = NULL; size_t unmapped = 0, pgsize; int ret = 0, retries = 0; int ret = -EINVAL, retries = 0; unsigned long pgshift; dma_addr_t iova = unmap->iova; unsigned long size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; struct rb_node *n, *first_n; mutex_lock(&iommu->lock); pgshift = __ffs(iommu->pgsize_bitmap); pgsize = (size_t)1 << pgshift; if (unmap->iova & (pgsize - 1)) { ret = -EINVAL; if (iova & (pgsize - 1)) goto unlock; } if (!unmap->size || unmap->size & (pgsize - 1)) { ret = -EINVAL; if (unmap_all) { if (iova || size) goto unlock; size = SIZE_MAX; } else if (!size || size & (pgsize - 1)) { goto unlock; } if (unmap->iova + unmap->size - 1 < unmap->iova || unmap->size > SIZE_MAX) { ret = -EINVAL; if (iova + size - 1 < iova || size > SIZE_MAX) goto unlock; } /* When dirty tracking is enabled, allow only min supported pgsize */ if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) { ret = -EINVAL; goto unlock; } Loading Loading @@ -1150,21 +1260,25 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, * will only return success and a size of zero if there were no * mappings within the range. */ if (iommu->v2) { dma = vfio_find_dma(iommu, unmap->iova, 1); if (dma && dma->iova != unmap->iova) { ret = -EINVAL; if (iommu->v2 && !unmap_all) { dma = vfio_find_dma(iommu, iova, 1); if (dma && dma->iova != iova) goto unlock; } dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0); if (dma && dma->iova + dma->size != unmap->iova + unmap->size) { ret = -EINVAL; dma = vfio_find_dma(iommu, iova + size - 1, 0); if (dma && dma->iova + dma->size != iova + size) goto unlock; } } while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { if (!iommu->v2 && unmap->iova > dma->iova) ret = 0; n = first_n = vfio_find_dma_first_node(iommu, iova, size); while (n) { dma = rb_entry(n, struct vfio_dma, node); if (dma->iova >= iova + size) break; if (!iommu->v2 && iova > dma->iova) break; /* * Task with same address space who mapped this iova range is Loading @@ -1173,6 +1287,27 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (dma->task->mm != current->mm) break; if (invalidate_vaddr) { if (dma->vaddr_invalid) { struct rb_node *last_n = n; for (n = first_n; n != last_n; n = rb_next(n)) { dma = rb_entry(n, struct vfio_dma, node); dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; } ret = -EINVAL; unmapped = 0; break; } dma->vaddr_invalid = true; iommu->vaddr_invalid_count++; unmapped += dma->size; n = rb_next(n); continue; } if (!RB_EMPTY_ROOT(&dma->pfn_list)) { struct vfio_iommu_type1_dma_unmap nb_unmap; Loading Loading @@ -1202,12 +1337,13 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { ret = update_user_bitmap(bitmap->data, iommu, dma, unmap->iova, pgsize); iova, pgsize); if (ret) break; } unmapped += dma->size; n = rb_next(n); vfio_remove_dma(iommu, dma); } Loading Loading @@ -1311,6 +1447,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; dma_addr_t iova = map->iova; unsigned long vaddr = map->vaddr; size_t size = map->size; Loading @@ -1328,13 +1465,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (map->flags & VFIO_DMA_MAP_FLAG_READ) prot |= IOMMU_READ; if ((prot && set_vaddr) || (!prot && !set_vaddr)) return -EINVAL; mutex_lock(&iommu->lock); pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); WARN_ON((pgsize - 1) & PAGE_MASK); if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { if (!size || (size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } Loading @@ -1345,7 +1485,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } if (vfio_find_dma(iommu, iova, size)) { dma = vfio_find_dma(iommu, iova, size); if (set_vaddr) { if (!dma) { ret = -ENOENT; } else if (!dma->vaddr_invalid || dma->iova != iova || dma->size != size) { ret = -EINVAL; } else { dma->vaddr = vaddr; dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; wake_up_all(&iommu->vaddr_wait); } goto out_unlock; } else if (dma) { ret = -EEXIST; goto out_unlock; } Loading Loading @@ -1442,6 +1596,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret; ret = vfio_wait_all_valid(iommu); if (ret < 0) return ret; /* Arbitrarily pick the first domain in the list for lookups */ if (!list_empty(&iommu->domain_list)) d = list_first_entry(&iommu->domain_list, Loading Loading @@ -2417,8 +2575,10 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; iommu->container_open = true; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); init_waitqueue_head(&iommu->vaddr_wait); return iommu; } Loading Loading @@ -2487,6 +2647,8 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_NESTING_IOMMU: case VFIO_UNMAP_ALL: case VFIO_UPDATE_VADDR: return 1; case VFIO_DMA_CC_IOMMU: if (!iommu) Loading Loading @@ -2658,7 +2820,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_map map; unsigned long minsz; uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE | VFIO_DMA_MAP_FLAG_VADDR; minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); Loading @@ -2676,6 +2839,9 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_unmap unmap; struct vfio_bitmap bitmap = { 0 }; uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL; unsigned long minsz; int ret; Loading @@ -2684,8 +2850,12 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, if (copy_from_user(&unmap, (void __user *)arg, minsz)) return -EFAULT; if (unmap.argsz < minsz || unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) if (unmap.argsz < minsz || unmap.flags & ~mask) return -EINVAL; if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL | VFIO_DMA_UNMAP_FLAG_VADDR))) return -EINVAL; if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { Loading Loading @@ -2876,12 +3046,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, struct vfio_dma *dma; bool kthread = current->mm == NULL; size_t offset; int ret; *copied = 0; dma = vfio_find_dma(iommu, user_iova, 1); if (!dma) return -EINVAL; ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma); if (ret < 0) return ret; if ((write && !(dma->prot & IOMMU_WRITE)) || !(dma->prot & IOMMU_READ)) Loading Loading @@ -2973,6 +3144,19 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data, return domain; } static void vfio_iommu_type1_notify(void *iommu_data, enum vfio_iommu_notify_type event) { struct vfio_iommu *iommu = iommu_data; if (event != VFIO_IOMMU_CONTAINER_CLOSE) return; mutex_lock(&iommu->lock); iommu->container_open = false; mutex_unlock(&iommu->lock); wake_up_all(&iommu->vaddr_wait); } static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .name = "vfio-iommu-type1", .owner = THIS_MODULE, Loading @@ -2987,6 +3171,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .unregister_notifier = vfio_iommu_type1_unregister_notifier, .dma_rw = vfio_iommu_type1_dma_rw, .group_iommu_domain = vfio_iommu_type1_group_iommu_domain, .notify = vfio_iommu_type1_notify, }; static int __init vfio_iommu_type1_init(void) Loading
include/linux/vfio.h +7 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device); /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ Loading Loading @@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops { void *data, size_t count, bool write); struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_group *group); void (*notify)(void *iommu_data, enum vfio_iommu_notify_type event); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); Loading
include/uapi/linux/vfio.h +27 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,12 @@ */ #define VFIO_NOIOMMU_IOMMU 8 /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ #define VFIO_UNMAP_ALL 9 /* Supports the vaddr flag for DMA map and unmap */ #define VFIO_UPDATE_VADDR 10 /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between Loading Loading @@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail { * * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. * * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and * unblock translation of host virtual addresses in the iova range. The vaddr * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To * maintain memory consistency within the user application, the updated vaddr * must address the same memory object as originally mapped. Failure to do so * will result in user memory corruption and/or device misbehavior. iova and * size must match those in the original MAP_DMA call. Protection is not * changed, and the READ & WRITE flags must be 0. */ struct vfio_iommu_type1_dma_map { __u32 argsz; __u32 flags; #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) __u64 vaddr; /* Process virtual address */ __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ Loading @@ -1102,6 +1118,7 @@ struct vfio_bitmap { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. * * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap * before unmapping IO virtual addresses. When this flag is set, the user must * provide a struct vfio_bitmap in data[]. User must provide zero-allocated Loading @@ -1111,11 +1128,21 @@ struct vfio_bitmap { * indicates that the page at that offset from iova is dirty. A Bitmap of the * pages in the range of unmapped size is returned in the user-provided * vfio_bitmap.data. * * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size * must be 0. This cannot be combined with the get-dirty-bitmap flag. * * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host * virtual addresses in the iova range. Tasks that attempt to translate an * iova's vaddr will block. DMA to already-mapped pages continues. This * cannot be combined with the get-dirty-bitmap flag. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) #define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) #define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ __u8 data[]; Loading