Loading drivers/vfio/pci/vfio_pci.c +327 −26 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ #include <linux/vfio.h> #include <linux/vgaarb.h> #include <linux/nospec.h> #include <linux/sched/mm.h> #include "vfio_pci_private.h" Loading Loading @@ -184,6 +185,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static void vfio_pci_disable(struct vfio_pci_device *vdev); static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND Loading Loading @@ -519,6 +521,10 @@ static void vfio_pci_release(void *device_data) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); if (vdev->err_trigger) eventfd_ctx_put(vdev->err_trigger); if (vdev->req_trigger) eventfd_ctx_put(vdev->req_trigger); } mutex_unlock(&vdev->reflck->lock); Loading Loading @@ -736,6 +742,12 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, return 0; } struct vfio_devices { struct vfio_device **devices; int cur_index; int max_index; }; static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { Loading Loading @@ -809,7 +821,7 @@ static long vfio_pci_ioctl(void *device_data, { void __iomem *io; size_t size; u16 orig_cmd; u16 cmd; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; Loading @@ -829,10 +841,7 @@ static long vfio_pci_ioctl(void *device_data, * Is it really there? Enable memory decode for * implicit access in pci_map_rom(). */ pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd); pci_write_config_word(pdev, PCI_COMMAND, orig_cmd | PCI_COMMAND_MEMORY); cmd = vfio_pci_memory_lock_and_enable(vdev); io = pci_map_rom(pdev, &size); if (io) { info.flags = VFIO_REGION_INFO_FLAG_READ; Loading @@ -840,8 +849,8 @@ static long vfio_pci_ioctl(void *device_data, } else { info.size = 0; } vfio_pci_memory_unlock_and_restore(vdev, cmd); pci_write_config_word(pdev, PCI_COMMAND, orig_cmd); break; } case VFIO_PCI_VGA_REGION_INDEX: Loading Loading @@ -984,8 +993,16 @@ static long vfio_pci_ioctl(void *device_data, return ret; } else if (cmd == VFIO_DEVICE_RESET) { return vdev->reset_works ? pci_try_reset_function(vdev->pdev) : -EINVAL; int ret; if (!vdev->reset_works) return -EINVAL; vfio_pci_zap_and_down_write_memory_lock(vdev); ret = pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); return ret; } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { struct vfio_pci_hot_reset_info hdr; Loading Loading @@ -1065,8 +1082,9 @@ static long vfio_pci_ioctl(void *device_data, int32_t *group_fds; struct vfio_pci_group_entry *groups; struct vfio_pci_group_info info; struct vfio_devices devs = { .cur_index = 0 }; bool slot = false; int i, count = 0, ret = 0; int i, group_idx, mem_idx = 0, count = 0, ret = 0; minsz = offsetofend(struct vfio_pci_hot_reset, count); Loading Loading @@ -1118,9 +1136,9 @@ static long vfio_pci_ioctl(void *device_data, * user interface and store the group and iommu ID. This * ensures the group is held across the reset. */ for (i = 0; i < hdr.count; i++) { for (group_idx = 0; group_idx < hdr.count; group_idx++) { struct vfio_group *group; struct fd f = fdget(group_fds[i]); struct fd f = fdget(group_fds[group_idx]); if (!f.file) { ret = -EBADF; break; Loading @@ -1133,8 +1151,9 @@ static long vfio_pci_ioctl(void *device_data, break; } groups[i].group = group; groups[i].id = vfio_external_user_iommu_id(group); groups[group_idx].group = group; groups[group_idx].id = vfio_external_user_iommu_id(group); } kfree(group_fds); Loading @@ -1153,13 +1172,63 @@ static long vfio_pci_ioctl(void *device_data, ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_validate_devs, &info, slot); if (!ret) if (ret) goto hot_reset_release; devs.max_index = count; devs.devices = kcalloc(count, sizeof(struct vfio_device *), GFP_KERNEL); if (!devs.devices) { ret = -ENOMEM; goto hot_reset_release; } /* * We need to get memory_lock for each device, but devices * can share mmap_sem, therefore we need to zap and hold * the vma_lock for each device, and only then get each * memory_lock. */ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_try_zap_and_vma_lock_cb, &devs, slot); if (ret) goto hot_reset_release; for (; mem_idx < devs.cur_index; mem_idx++) { struct vfio_pci_device *tmp; tmp = vfio_device_data(devs.devices[mem_idx]); ret = down_write_trylock(&tmp->memory_lock); if (!ret) { ret = -EBUSY; goto hot_reset_release; } mutex_unlock(&tmp->vma_lock); } /* User has access, do the reset */ ret = pci_reset_bus(vdev->pdev); hot_reset_release: for (i--; i >= 0; i--) vfio_group_put_external_user(groups[i].group); for (i = 0; i < devs.cur_index; i++) { struct vfio_device *device; struct vfio_pci_device *tmp; device = devs.devices[i]; tmp = vfio_device_data(device); if (i < mem_idx) up_write(&tmp->memory_lock); else mutex_unlock(&tmp->vma_lock); vfio_device_put(device); } kfree(devs.devices); for (group_idx--; group_idx >= 0; group_idx--) vfio_group_put_external_user(groups[group_idx].group); kfree(groups); return ret; Loading Loading @@ -1299,6 +1368,202 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf, return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) { struct vfio_pci_mmap_vma *mmap_vma, *tmp; /* * Lock ordering: * vma_lock is nested under mmap_sem for vm_ops callback paths. * The memory_lock semaphore is used by both code paths calling * into this function to zap vmas and the vm_ops.fault callback * to protect the memory enable state of the device. * * When zapping vmas we need to maintain the mmap_sem => vma_lock * ordering, which requires using vma_lock to walk vma_list to * acquire an mm, then dropping vma_lock to get the mmap_sem and * reacquiring vma_lock. This logic is derived from similar * requirements in uverbs_user_mmap_disassociate(). * * mmap_sem must always be the top-level lock when it is taken. * Therefore we can only hold the memory_lock write lock when * vma_list is empty, as we'd need to take mmap_sem to clear * entries. vma_list can only be guaranteed empty when holding * vma_lock, thus memory_lock is nested under vma_lock. * * This enables the vm_ops.fault callback to acquire vma_lock, * followed by memory_lock read lock, while already holding * mmap_sem without risk of deadlock. */ while (1) { struct mm_struct *mm = NULL; if (try) { if (!mutex_trylock(&vdev->vma_lock)) return 0; } else { mutex_lock(&vdev->vma_lock); } while (!list_empty(&vdev->vma_list)) { mmap_vma = list_first_entry(&vdev->vma_list, struct vfio_pci_mmap_vma, vma_next); mm = mmap_vma->vma->vm_mm; if (mmget_not_zero(mm)) break; list_del(&mmap_vma->vma_next); kfree(mmap_vma); mm = NULL; } if (!mm) return 1; mutex_unlock(&vdev->vma_lock); if (try) { if (!down_read_trylock(&mm->mmap_sem)) { mmput(mm); return 0; } } else { down_read(&mm->mmap_sem); } if (mmget_still_valid(mm)) { if (try) { if (!mutex_trylock(&vdev->vma_lock)) { up_read(&mm->mmap_sem); mmput(mm); return 0; } } else { mutex_lock(&vdev->vma_lock); } list_for_each_entry_safe(mmap_vma, tmp, &vdev->vma_list, vma_next) { struct vm_area_struct *vma = mmap_vma->vma; if (vma->vm_mm != mm) continue; list_del(&mmap_vma->vma_next); kfree(mmap_vma); zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } mutex_unlock(&vdev->vma_lock); } up_read(&mm->mmap_sem); mmput(mm); } } void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev) { vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); mutex_unlock(&vdev->vma_lock); } u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev) { u16 cmd; down_write(&vdev->memory_lock); pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd); if (!(cmd & PCI_COMMAND_MEMORY)) pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd | PCI_COMMAND_MEMORY); return cmd; } void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd) { pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd); up_write(&vdev->memory_lock); } /* Caller holds vma_lock */ static int __vfio_pci_add_vma(struct vfio_pci_device *vdev, struct vm_area_struct *vma) { struct vfio_pci_mmap_vma *mmap_vma; mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); if (!mmap_vma) return -ENOMEM; mmap_vma->vma = vma; list_add(&mmap_vma->vma_next, &vdev->vma_list); return 0; } /* * Zap mmaps on open so that we can fault them in on access and therefore * our vma_list only tracks mappings accessed since last zap. */ static void vfio_pci_mmap_open(struct vm_area_struct *vma) { zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } static void vfio_pci_mmap_close(struct vm_area_struct *vma) { struct vfio_pci_device *vdev = vma->vm_private_data; struct vfio_pci_mmap_vma *mmap_vma; mutex_lock(&vdev->vma_lock); list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { if (mmap_vma->vma == vma) { list_del(&mmap_vma->vma_next); kfree(mmap_vma); break; } } mutex_unlock(&vdev->vma_lock); } static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_device *vdev = vma->vm_private_data; vm_fault_t ret = VM_FAULT_NOPAGE; mutex_lock(&vdev->vma_lock); down_read(&vdev->memory_lock); if (!__vfio_pci_memory_enabled(vdev)) { ret = VM_FAULT_SIGBUS; mutex_unlock(&vdev->vma_lock); goto up_out; } if (__vfio_pci_add_vma(vdev, vma)) { ret = VM_FAULT_OOM; mutex_unlock(&vdev->vma_lock); goto up_out; } mutex_unlock(&vdev->vma_lock); if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) ret = VM_FAULT_SIGBUS; up_out: up_read(&vdev->memory_lock); return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { .open = vfio_pci_mmap_open, .close = vfio_pci_mmap_close, .fault = vfio_pci_mmap_fault, }; static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) { struct vfio_pci_device *vdev = device_data; Loading Loading @@ -1357,8 +1622,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, req_len, vma->vm_page_prot); /* * See remap_pfn_range(), called from vfio_pci_fault() but we can't * change vm_flags within the fault handler. Set them now. */ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &vfio_pci_mmap_ops; return 0; } static void vfio_pci_request(void *device_data, unsigned int count) Loading Loading @@ -1608,6 +1879,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&vdev->irqlock); mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) Loading Loading @@ -1861,12 +2135,6 @@ static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck) kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock); } struct vfio_devices { struct vfio_device **devices; int cur_index; int max_index; }; static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; Loading Loading @@ -1897,6 +2165,39 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) return 0; } static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; struct vfio_device *device; struct vfio_pci_device *vdev; if (devs->cur_index == devs->max_index) return -ENOSPC; device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -EINVAL; if (pci_dev_driver(pdev) != &vfio_pci_driver) { vfio_device_put(device); return -EBUSY; } vdev = vfio_device_data(device); /* * Locking multiple devices is prone to deadlock, runaway and * unwind if we hit contention. */ if (!vfio_pci_zap_and_vma_lock(vdev, true)) { vfio_device_put(device); return -EBUSY; } devs->devices[devs->cur_index++] = device; return 0; } /* * If a bus or slot reset is available for the provided device and: * - All of the devices affected by that bus or slot reset are unused Loading drivers/vfio/pci/vfio_pci_config.c +41 −9 Original line number Diff line number Diff line Loading @@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) *(__le32 *)(&p->write[off]) = cpu_to_le32(write); } /* Caller should hold memory_lock semaphore */ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) { u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); return cmd & PCI_COMMAND_MEMORY; } /* * Restore the *real* BARs after we detect a FLR or backdoor reset. * (backdoor = some device specific technique that we didn't catch) Loading Loading @@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, new_cmd = le32_to_cpu(val); phys_io = !!(phys_cmd & PCI_COMMAND_IO); virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); new_io = !!(new_cmd & PCI_COMMAND_IO); phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); phys_io = !!(phys_cmd & PCI_COMMAND_IO); virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); new_io = !!(new_cmd & PCI_COMMAND_IO); if (!new_mem) vfio_pci_zap_and_down_write_memory_lock(vdev); else down_write(&vdev->memory_lock); /* * If the user is writing mem/io enable (new_mem/io) and we Loading @@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, } count = vfio_default_config_write(vdev, pos, count, perm, offset, val); if (count < 0) if (count < 0) { if (offset == PCI_COMMAND) up_write(&vdev->memory_lock); return count; } /* * Save current memory/io enable bits in vconfig to allow for Loading @@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, *virt_cmd &= cpu_to_le16(~mask); *virt_cmd |= cpu_to_le16(new_cmd & mask); up_write(&vdev->memory_lock); } /* Emulate INTx disable */ Loading Loading @@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_EXP_DEVCAP, &cap); if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) { vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); } } /* Loading Loading @@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_AF_CAP, &cap); if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) { vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); } } return count; Loading Loading @@ -1462,7 +1486,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) if (ret) return ret; if (cap <= PCI_CAP_ID_MAX) { /* * ID 0 is a NULL capability, conflicting with our fake * PCI_CAP_ID_BASIC. As it has no content, consider it * hidden for now. */ if (cap && cap <= PCI_CAP_ID_MAX) { len = pci_cap_length[cap]; if (len == 0xFF) { /* Variable length */ len = vfio_cap_len(vdev, cap, pos); Loading Loading @@ -1728,9 +1757,12 @@ void vfio_config_free(struct vfio_pci_device *vdev) vdev->vconfig = NULL; kfree(vdev->pci_config_map); vdev->pci_config_map = NULL; if (vdev->msi_perm) { free_perm_bits(vdev->msi_perm); kfree(vdev->msi_perm); vdev->msi_perm = NULL; } } /* * Find the remaining number of bytes in a dword that match the given Loading drivers/vfio/pci/vfio_pci_intrs.c +14 −0 Original line number Diff line number Diff line Loading @@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) struct pci_dev *pdev = vdev->pdev; unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; int ret; u16 cmd; if (!is_irq_none(vdev)) return -EINVAL; Loading @@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) return -ENOMEM; /* return the number of supported vectors if we can't get all: */ cmd = vfio_pci_memory_lock_and_enable(vdev); ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); if (ret < nvec) { if (ret > 0) pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); kfree(vdev->ctx); return ret; } vfio_pci_memory_unlock_and_restore(vdev, cmd); vdev->num_ctx = nvec; vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : Loading @@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; u16 cmd; if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; Loading @@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, if (vdev->ctx[vector].trigger) { irq_bypass_unregister_producer(&vdev->ctx[vector].producer); cmd = vfio_pci_memory_lock_and_enable(vdev); free_irq(irq, vdev->ctx[vector].trigger); vfio_pci_memory_unlock_and_restore(vdev, cmd); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; Loading Loading @@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, * such a reset it would be unsuccessful. To avoid this, restore the * cached value of the message prior to enabling. */ cmd = vfio_pci_memory_lock_and_enable(vdev); if (msix) { struct msi_msg msg; Loading @@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ret = request_irq(irq, vfio_msihandler, 0, vdev->ctx[vector].name, trigger); vfio_pci_memory_unlock_and_restore(vdev, cmd); if (ret) { kfree(vdev->ctx[vector].name); eventfd_ctx_put(trigger); Loading Loading @@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; int i; u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); Loading @@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); /* * Both disable paths above use pci_intx_for_msi() to clear DisINTx Loading drivers/vfio/pci/vfio_pci_private.h +15 −0 Original line number Diff line number Diff line Loading @@ -92,6 +92,11 @@ struct vfio_pci_vf_token { int users; }; struct vfio_pci_mmap_vma { struct vm_area_struct *vma; struct list_head vma_next; }; struct vfio_pci_device { struct pci_dev *pdev; void __iomem *barmap[PCI_STD_NUM_BARS]; Loading Loading @@ -132,6 +137,9 @@ struct vfio_pci_device { struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; struct notifier_block nb; struct mutex vma_lock; struct list_head vma_list; struct rw_semaphore memory_lock; }; #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) Loading Loading @@ -174,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state); extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev); extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev); extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev); extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); #else Loading drivers/vfio/pci/vfio_pci_rdwr.c +20 −4 Original line number Diff line number Diff line Loading @@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t x_start = 0, x_end = 0; resource_size_t end; void __iomem *io; struct resource *res = &vdev->pdev->resource[bar]; ssize_t done; if (pci_resource_start(pdev, bar)) Loading @@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, count = min(count, (size_t)(end - pos)); if (res->flags & IORESOURCE_MEM) { down_read(&vdev->memory_lock); if (!__vfio_pci_memory_enabled(vdev)) { up_read(&vdev->memory_lock); return -EIO; } } if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the Loading @@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, * filling large ROM BARs much faster. */ io = pci_map_rom(pdev, &x_start); if (!io) return -ENOMEM; if (!io) { done = -ENOMEM; goto out; } x_end = end; } else { int ret = vfio_pci_setup_barmap(vdev, bar); if (ret) return ret; if (ret) { done = ret; goto out; } io = vdev->barmap[bar]; } Loading @@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); out: if (res->flags & IORESOURCE_MEM) up_read(&vdev->memory_lock); return done; } Loading Loading
drivers/vfio/pci/vfio_pci.c +327 −26 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ #include <linux/vfio.h> #include <linux/vgaarb.h> #include <linux/nospec.h> #include <linux/sched/mm.h> #include "vfio_pci_private.h" Loading Loading @@ -184,6 +185,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static void vfio_pci_disable(struct vfio_pci_device *vdev); static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND Loading Loading @@ -519,6 +521,10 @@ static void vfio_pci_release(void *device_data) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); if (vdev->err_trigger) eventfd_ctx_put(vdev->err_trigger); if (vdev->req_trigger) eventfd_ctx_put(vdev->req_trigger); } mutex_unlock(&vdev->reflck->lock); Loading Loading @@ -736,6 +742,12 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, return 0; } struct vfio_devices { struct vfio_device **devices; int cur_index; int max_index; }; static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { Loading Loading @@ -809,7 +821,7 @@ static long vfio_pci_ioctl(void *device_data, { void __iomem *io; size_t size; u16 orig_cmd; u16 cmd; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; Loading @@ -829,10 +841,7 @@ static long vfio_pci_ioctl(void *device_data, * Is it really there? Enable memory decode for * implicit access in pci_map_rom(). */ pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd); pci_write_config_word(pdev, PCI_COMMAND, orig_cmd | PCI_COMMAND_MEMORY); cmd = vfio_pci_memory_lock_and_enable(vdev); io = pci_map_rom(pdev, &size); if (io) { info.flags = VFIO_REGION_INFO_FLAG_READ; Loading @@ -840,8 +849,8 @@ static long vfio_pci_ioctl(void *device_data, } else { info.size = 0; } vfio_pci_memory_unlock_and_restore(vdev, cmd); pci_write_config_word(pdev, PCI_COMMAND, orig_cmd); break; } case VFIO_PCI_VGA_REGION_INDEX: Loading Loading @@ -984,8 +993,16 @@ static long vfio_pci_ioctl(void *device_data, return ret; } else if (cmd == VFIO_DEVICE_RESET) { return vdev->reset_works ? pci_try_reset_function(vdev->pdev) : -EINVAL; int ret; if (!vdev->reset_works) return -EINVAL; vfio_pci_zap_and_down_write_memory_lock(vdev); ret = pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); return ret; } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { struct vfio_pci_hot_reset_info hdr; Loading Loading @@ -1065,8 +1082,9 @@ static long vfio_pci_ioctl(void *device_data, int32_t *group_fds; struct vfio_pci_group_entry *groups; struct vfio_pci_group_info info; struct vfio_devices devs = { .cur_index = 0 }; bool slot = false; int i, count = 0, ret = 0; int i, group_idx, mem_idx = 0, count = 0, ret = 0; minsz = offsetofend(struct vfio_pci_hot_reset, count); Loading Loading @@ -1118,9 +1136,9 @@ static long vfio_pci_ioctl(void *device_data, * user interface and store the group and iommu ID. This * ensures the group is held across the reset. */ for (i = 0; i < hdr.count; i++) { for (group_idx = 0; group_idx < hdr.count; group_idx++) { struct vfio_group *group; struct fd f = fdget(group_fds[i]); struct fd f = fdget(group_fds[group_idx]); if (!f.file) { ret = -EBADF; break; Loading @@ -1133,8 +1151,9 @@ static long vfio_pci_ioctl(void *device_data, break; } groups[i].group = group; groups[i].id = vfio_external_user_iommu_id(group); groups[group_idx].group = group; groups[group_idx].id = vfio_external_user_iommu_id(group); } kfree(group_fds); Loading @@ -1153,13 +1172,63 @@ static long vfio_pci_ioctl(void *device_data, ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_validate_devs, &info, slot); if (!ret) if (ret) goto hot_reset_release; devs.max_index = count; devs.devices = kcalloc(count, sizeof(struct vfio_device *), GFP_KERNEL); if (!devs.devices) { ret = -ENOMEM; goto hot_reset_release; } /* * We need to get memory_lock for each device, but devices * can share mmap_sem, therefore we need to zap and hold * the vma_lock for each device, and only then get each * memory_lock. */ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_try_zap_and_vma_lock_cb, &devs, slot); if (ret) goto hot_reset_release; for (; mem_idx < devs.cur_index; mem_idx++) { struct vfio_pci_device *tmp; tmp = vfio_device_data(devs.devices[mem_idx]); ret = down_write_trylock(&tmp->memory_lock); if (!ret) { ret = -EBUSY; goto hot_reset_release; } mutex_unlock(&tmp->vma_lock); } /* User has access, do the reset */ ret = pci_reset_bus(vdev->pdev); hot_reset_release: for (i--; i >= 0; i--) vfio_group_put_external_user(groups[i].group); for (i = 0; i < devs.cur_index; i++) { struct vfio_device *device; struct vfio_pci_device *tmp; device = devs.devices[i]; tmp = vfio_device_data(device); if (i < mem_idx) up_write(&tmp->memory_lock); else mutex_unlock(&tmp->vma_lock); vfio_device_put(device); } kfree(devs.devices); for (group_idx--; group_idx >= 0; group_idx--) vfio_group_put_external_user(groups[group_idx].group); kfree(groups); return ret; Loading Loading @@ -1299,6 +1368,202 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf, return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) { struct vfio_pci_mmap_vma *mmap_vma, *tmp; /* * Lock ordering: * vma_lock is nested under mmap_sem for vm_ops callback paths. * The memory_lock semaphore is used by both code paths calling * into this function to zap vmas and the vm_ops.fault callback * to protect the memory enable state of the device. * * When zapping vmas we need to maintain the mmap_sem => vma_lock * ordering, which requires using vma_lock to walk vma_list to * acquire an mm, then dropping vma_lock to get the mmap_sem and * reacquiring vma_lock. This logic is derived from similar * requirements in uverbs_user_mmap_disassociate(). * * mmap_sem must always be the top-level lock when it is taken. * Therefore we can only hold the memory_lock write lock when * vma_list is empty, as we'd need to take mmap_sem to clear * entries. vma_list can only be guaranteed empty when holding * vma_lock, thus memory_lock is nested under vma_lock. * * This enables the vm_ops.fault callback to acquire vma_lock, * followed by memory_lock read lock, while already holding * mmap_sem without risk of deadlock. */ while (1) { struct mm_struct *mm = NULL; if (try) { if (!mutex_trylock(&vdev->vma_lock)) return 0; } else { mutex_lock(&vdev->vma_lock); } while (!list_empty(&vdev->vma_list)) { mmap_vma = list_first_entry(&vdev->vma_list, struct vfio_pci_mmap_vma, vma_next); mm = mmap_vma->vma->vm_mm; if (mmget_not_zero(mm)) break; list_del(&mmap_vma->vma_next); kfree(mmap_vma); mm = NULL; } if (!mm) return 1; mutex_unlock(&vdev->vma_lock); if (try) { if (!down_read_trylock(&mm->mmap_sem)) { mmput(mm); return 0; } } else { down_read(&mm->mmap_sem); } if (mmget_still_valid(mm)) { if (try) { if (!mutex_trylock(&vdev->vma_lock)) { up_read(&mm->mmap_sem); mmput(mm); return 0; } } else { mutex_lock(&vdev->vma_lock); } list_for_each_entry_safe(mmap_vma, tmp, &vdev->vma_list, vma_next) { struct vm_area_struct *vma = mmap_vma->vma; if (vma->vm_mm != mm) continue; list_del(&mmap_vma->vma_next); kfree(mmap_vma); zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } mutex_unlock(&vdev->vma_lock); } up_read(&mm->mmap_sem); mmput(mm); } } void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev) { vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); mutex_unlock(&vdev->vma_lock); } u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev) { u16 cmd; down_write(&vdev->memory_lock); pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd); if (!(cmd & PCI_COMMAND_MEMORY)) pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd | PCI_COMMAND_MEMORY); return cmd; } void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd) { pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd); up_write(&vdev->memory_lock); } /* Caller holds vma_lock */ static int __vfio_pci_add_vma(struct vfio_pci_device *vdev, struct vm_area_struct *vma) { struct vfio_pci_mmap_vma *mmap_vma; mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); if (!mmap_vma) return -ENOMEM; mmap_vma->vma = vma; list_add(&mmap_vma->vma_next, &vdev->vma_list); return 0; } /* * Zap mmaps on open so that we can fault them in on access and therefore * our vma_list only tracks mappings accessed since last zap. */ static void vfio_pci_mmap_open(struct vm_area_struct *vma) { zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } static void vfio_pci_mmap_close(struct vm_area_struct *vma) { struct vfio_pci_device *vdev = vma->vm_private_data; struct vfio_pci_mmap_vma *mmap_vma; mutex_lock(&vdev->vma_lock); list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { if (mmap_vma->vma == vma) { list_del(&mmap_vma->vma_next); kfree(mmap_vma); break; } } mutex_unlock(&vdev->vma_lock); } static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_device *vdev = vma->vm_private_data; vm_fault_t ret = VM_FAULT_NOPAGE; mutex_lock(&vdev->vma_lock); down_read(&vdev->memory_lock); if (!__vfio_pci_memory_enabled(vdev)) { ret = VM_FAULT_SIGBUS; mutex_unlock(&vdev->vma_lock); goto up_out; } if (__vfio_pci_add_vma(vdev, vma)) { ret = VM_FAULT_OOM; mutex_unlock(&vdev->vma_lock); goto up_out; } mutex_unlock(&vdev->vma_lock); if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) ret = VM_FAULT_SIGBUS; up_out: up_read(&vdev->memory_lock); return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { .open = vfio_pci_mmap_open, .close = vfio_pci_mmap_close, .fault = vfio_pci_mmap_fault, }; static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) { struct vfio_pci_device *vdev = device_data; Loading Loading @@ -1357,8 +1622,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, req_len, vma->vm_page_prot); /* * See remap_pfn_range(), called from vfio_pci_fault() but we can't * change vm_flags within the fault handler. Set them now. */ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &vfio_pci_mmap_ops; return 0; } static void vfio_pci_request(void *device_data, unsigned int count) Loading Loading @@ -1608,6 +1879,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&vdev->irqlock); mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) Loading Loading @@ -1861,12 +2135,6 @@ static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck) kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock); } struct vfio_devices { struct vfio_device **devices; int cur_index; int max_index; }; static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; Loading Loading @@ -1897,6 +2165,39 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) return 0; } static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; struct vfio_device *device; struct vfio_pci_device *vdev; if (devs->cur_index == devs->max_index) return -ENOSPC; device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -EINVAL; if (pci_dev_driver(pdev) != &vfio_pci_driver) { vfio_device_put(device); return -EBUSY; } vdev = vfio_device_data(device); /* * Locking multiple devices is prone to deadlock, runaway and * unwind if we hit contention. */ if (!vfio_pci_zap_and_vma_lock(vdev, true)) { vfio_device_put(device); return -EBUSY; } devs->devices[devs->cur_index++] = device; return 0; } /* * If a bus or slot reset is available for the provided device and: * - All of the devices affected by that bus or slot reset are unused Loading
drivers/vfio/pci/vfio_pci_config.c +41 −9 Original line number Diff line number Diff line Loading @@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) *(__le32 *)(&p->write[off]) = cpu_to_le32(write); } /* Caller should hold memory_lock semaphore */ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) { u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); return cmd & PCI_COMMAND_MEMORY; } /* * Restore the *real* BARs after we detect a FLR or backdoor reset. * (backdoor = some device specific technique that we didn't catch) Loading Loading @@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, new_cmd = le32_to_cpu(val); phys_io = !!(phys_cmd & PCI_COMMAND_IO); virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); new_io = !!(new_cmd & PCI_COMMAND_IO); phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); phys_io = !!(phys_cmd & PCI_COMMAND_IO); virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); new_io = !!(new_cmd & PCI_COMMAND_IO); if (!new_mem) vfio_pci_zap_and_down_write_memory_lock(vdev); else down_write(&vdev->memory_lock); /* * If the user is writing mem/io enable (new_mem/io) and we Loading @@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, } count = vfio_default_config_write(vdev, pos, count, perm, offset, val); if (count < 0) if (count < 0) { if (offset == PCI_COMMAND) up_write(&vdev->memory_lock); return count; } /* * Save current memory/io enable bits in vconfig to allow for Loading @@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, *virt_cmd &= cpu_to_le16(~mask); *virt_cmd |= cpu_to_le16(new_cmd & mask); up_write(&vdev->memory_lock); } /* Emulate INTx disable */ Loading Loading @@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_EXP_DEVCAP, &cap); if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) { vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); } } /* Loading Loading @@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_AF_CAP, &cap); if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) { vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); } } return count; Loading Loading @@ -1462,7 +1486,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) if (ret) return ret; if (cap <= PCI_CAP_ID_MAX) { /* * ID 0 is a NULL capability, conflicting with our fake * PCI_CAP_ID_BASIC. As it has no content, consider it * hidden for now. */ if (cap && cap <= PCI_CAP_ID_MAX) { len = pci_cap_length[cap]; if (len == 0xFF) { /* Variable length */ len = vfio_cap_len(vdev, cap, pos); Loading Loading @@ -1728,9 +1757,12 @@ void vfio_config_free(struct vfio_pci_device *vdev) vdev->vconfig = NULL; kfree(vdev->pci_config_map); vdev->pci_config_map = NULL; if (vdev->msi_perm) { free_perm_bits(vdev->msi_perm); kfree(vdev->msi_perm); vdev->msi_perm = NULL; } } /* * Find the remaining number of bytes in a dword that match the given Loading
drivers/vfio/pci/vfio_pci_intrs.c +14 −0 Original line number Diff line number Diff line Loading @@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) struct pci_dev *pdev = vdev->pdev; unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; int ret; u16 cmd; if (!is_irq_none(vdev)) return -EINVAL; Loading @@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) return -ENOMEM; /* return the number of supported vectors if we can't get all: */ cmd = vfio_pci_memory_lock_and_enable(vdev); ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); if (ret < nvec) { if (ret > 0) pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); kfree(vdev->ctx); return ret; } vfio_pci_memory_unlock_and_restore(vdev, cmd); vdev->num_ctx = nvec; vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : Loading @@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; u16 cmd; if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; Loading @@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, if (vdev->ctx[vector].trigger) { irq_bypass_unregister_producer(&vdev->ctx[vector].producer); cmd = vfio_pci_memory_lock_and_enable(vdev); free_irq(irq, vdev->ctx[vector].trigger); vfio_pci_memory_unlock_and_restore(vdev, cmd); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; Loading Loading @@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, * such a reset it would be unsuccessful. To avoid this, restore the * cached value of the message prior to enabling. */ cmd = vfio_pci_memory_lock_and_enable(vdev); if (msix) { struct msi_msg msg; Loading @@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ret = request_irq(irq, vfio_msihandler, 0, vdev->ctx[vector].name, trigger); vfio_pci_memory_unlock_and_restore(vdev, cmd); if (ret) { kfree(vdev->ctx[vector].name); eventfd_ctx_put(trigger); Loading Loading @@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; int i; u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); Loading @@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); /* * Both disable paths above use pci_intx_for_msi() to clear DisINTx Loading
drivers/vfio/pci/vfio_pci_private.h +15 −0 Original line number Diff line number Diff line Loading @@ -92,6 +92,11 @@ struct vfio_pci_vf_token { int users; }; struct vfio_pci_mmap_vma { struct vm_area_struct *vma; struct list_head vma_next; }; struct vfio_pci_device { struct pci_dev *pdev; void __iomem *barmap[PCI_STD_NUM_BARS]; Loading Loading @@ -132,6 +137,9 @@ struct vfio_pci_device { struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; struct notifier_block nb; struct mutex vma_lock; struct list_head vma_list; struct rw_semaphore memory_lock; }; #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) Loading Loading @@ -174,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state); extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev); extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev); extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev); extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); #else Loading
drivers/vfio/pci/vfio_pci_rdwr.c +20 −4 Original line number Diff line number Diff line Loading @@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t x_start = 0, x_end = 0; resource_size_t end; void __iomem *io; struct resource *res = &vdev->pdev->resource[bar]; ssize_t done; if (pci_resource_start(pdev, bar)) Loading @@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, count = min(count, (size_t)(end - pos)); if (res->flags & IORESOURCE_MEM) { down_read(&vdev->memory_lock); if (!__vfio_pci_memory_enabled(vdev)) { up_read(&vdev->memory_lock); return -EIO; } } if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the Loading @@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, * filling large ROM BARs much faster. */ io = pci_map_rom(pdev, &x_start); if (!io) return -ENOMEM; if (!io) { done = -ENOMEM; goto out; } x_end = end; } else { int ret = vfio_pci_setup_barmap(vdev, bar); if (ret) return ret; if (ret) { done = ret; goto out; } io = vdev->barmap[bar]; } Loading @@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); out: if (res->flags & IORESOURCE_MEM) up_read(&vdev->memory_lock); return done; } Loading