Commit 53a259da authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-pci-for-qemu-20140630.0' into staging



VFIO patches: MSI-X masking performance fix, Endian fixes, fix runstate on device error

# gpg: Signature made Mon 30 Jun 2014 18:13:40 BST using RSA key ID 3BB08B22
# gpg: Can't check signature: public key not found

* remotes/awilliam/tags/vfio-pci-for-qemu-20140630.0:
  vfio: use correct runstate
  vfio: Make BARs native endian
  vfio-pci: Fix MSI-X masking performance
  vfio-pci: Fix MSI/X debug code

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 8954000b ba29776f
Loading
Loading
Loading
Loading
+165 −115
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ typedef struct VFIOINTx {

typedef struct VFIOMSIVector {
    EventNotifier interrupt; /* eventfd triggered on interrupt */
    EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */
    struct VFIODevice *vdev; /* back pointer to device */
    MSIMessage msg; /* cache the MSI message so we know when it changes */
    int virq; /* KVM irqchip route for QEMU bypass */
@@ -642,9 +643,9 @@ static void vfio_msi_interrupt(void *opaque)
    MSIMessage msg;

    if (vdev->interrupt == VFIO_INT_MSIX) {
        msg = msi_get_message(&vdev->pdev, nr);
    } else if (vdev->interrupt == VFIO_INT_MSI) {
        msg = msix_get_message(&vdev->pdev, nr);
    } else if (vdev->interrupt == VFIO_INT_MSI) {
        msg = msi_get_message(&vdev->pdev, nr);
    } else {
        abort();
    }
@@ -682,11 +683,12 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
    for (i = 0; i < vdev->nr_vectors; i++) {
        if (!vdev->msi_vectors[i].use) {
            fds[i] = -1;
            continue;
        }

        } else if (vdev->msi_vectors[i].virq >= 0) {
            fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
        } else {
            fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
        }
    }

    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);

@@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
    return ret;
}

static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
                                  bool msix)
{
    int virq;

    if ((msix && !VFIO_ALLOW_KVM_MSIX) ||
        (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) {
        return;
    }

    if (event_notifier_init(&vector->kvm_interrupt, 0)) {
        return;
    }

    virq = kvm_irqchip_add_msi_route(kvm_state, *msg);
    if (virq < 0) {
        event_notifier_cleanup(&vector->kvm_interrupt);
        return;
    }

    if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
                                       NULL, virq) < 0) {
        kvm_irqchip_release_virq(kvm_state, virq);
        event_notifier_cleanup(&vector->kvm_interrupt);
        return;
    }

    vector->msg = *msg;
    vector->virq = virq;
}

static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
{
    kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
                                      vector->virq);
    kvm_irqchip_release_virq(kvm_state, vector->virq);
    vector->virq = -1;
    event_notifier_cleanup(&vector->kvm_interrupt);
}

static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg)
{
    kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
    vector->msg = msg;
}

static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
                                   MSIMessage *msg, IOHandler *handler)
{
@@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
            vdev->host.function, nr);

    vector = &vdev->msi_vectors[nr];
    vector->vdev = vdev;
    vector->use = true;

    msix_vector_use(pdev, nr);

    if (!vector->use) {
        vector->vdev = vdev;
        vector->virq = -1;
        if (event_notifier_init(&vector->interrupt, 0)) {
            error_report("vfio: Error: event_notifier_init failed");
        }
        vector->use = true;
        msix_vector_use(pdev, nr);
    }

    qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                        handler, NULL, vector);

    /*
     * Attempt to enable route through KVM irqchip,
     * default to userspace handling if unavailable.
     */
    vector->virq = msg && VFIO_ALLOW_KVM_MSIX ?
                   kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
    if (vector->virq < 0 ||
        kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                       NULL, vector->virq) < 0) {
    if (vector->virq >= 0) {
            kvm_irqchip_release_virq(kvm_state, vector->virq);
            vector->virq = -1;
        if (!msg) {
            vfio_remove_kvm_msi_virq(vector);
        } else {
            vfio_update_kvm_msi_virq(vector, *msg);
        }
        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                            handler, NULL, vector);
    } else {
        vfio_add_kvm_msi_virq(vector, msg, true);
    }

    /*
@@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
        irq_set->count = 1;
        pfd = (int32_t *)&irq_set->data;

        if (vector->virq >= 0) {
            *pfd = event_notifier_get_fd(&vector->kvm_interrupt);
        } else {
            *pfd = event_notifier_get_fd(&vector->interrupt);
        }

        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
        g_free(irq_set);
@@ -783,20 +837,23 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
    VFIOMSIVector *vector = &vdev->msi_vectors[nr];
    int argsz;
    struct vfio_irq_set *irq_set;
    int32_t *pfd;

    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
            vdev->host.function, nr);

    /*
     * XXX What's the right thing to do here?  This turns off the interrupt
     * completely, but do we really just want to switch the interrupt to
     * bouncing through userspace and let msix.c drop it?  Not sure.
     * There are still old guests that mask and unmask vectors on every
     * interrupt.  If we're using QEMU bypass with a KVM irqfd, leave all of
     * the KVM setup in place, simply switch VFIO to use the non-bypass
     * eventfd.  We'll then fire the interrupt through QEMU and the MSI-X
     * core will mask the interrupt and set pending bits, allowing it to
     * be re-asserted on unmask.  Nothing to do if already using QEMU mode.
     */
    msix_vector_unuse(pdev, nr);
    if (vector->virq >= 0) {
        int argsz;
        struct vfio_irq_set *irq_set;
        int32_t *pfd;

        argsz = sizeof(*irq_set) + sizeof(*pfd);

@@ -809,24 +866,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
        irq_set->count = 1;
        pfd = (int32_t *)&irq_set->data;

    *pfd = -1;
        *pfd = event_notifier_get_fd(&vector->interrupt);

        ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);

        g_free(irq_set);

    if (vector->virq < 0) {
        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                            NULL, NULL, NULL);
    } else {
        kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
                                          vector->virq);
        kvm_irqchip_release_virq(kvm_state, vector->virq);
        vector->virq = -1;
    }

    event_notifier_cleanup(&vector->interrupt);
    vector->use = false;
}

static void vfio_enable_msix(VFIODevice *vdev)
@@ -876,28 +921,28 @@ retry:
        VFIOMSIVector *vector = &vdev->msi_vectors[i];

        vector->vdev = vdev;
        vector->virq = -1;
        vector->use = true;

        if (event_notifier_init(&vector->interrupt, 0)) {
            error_report("vfio: Error: event_notifier_init failed");
        }

        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                            vfio_msi_interrupt, NULL, vector);

        vector->msg = msi_get_message(&vdev->pdev, i);

        /*
         * Attempt to enable route through KVM irqchip,
         * default to userspace handling if unavailable.
         */
        vector->virq = VFIO_ALLOW_KVM_MSI ?
                       kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1;
        if (vector->virq < 0 ||
            kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                           NULL, vector->virq) < 0) {
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                                vfio_msi_interrupt, NULL, vector);
        }
        vfio_add_kvm_msi_virq(vector, &vector->msg, false);
    }

    /* Set interrupt type prior to possible interrupts */
    vdev->interrupt = VFIO_INT_MSI;

    ret = vfio_enable_vectors(vdev, false);
    if (ret) {
        if (ret < 0) {
@@ -910,14 +955,10 @@ retry:
        for (i = 0; i < vdev->nr_vectors; i++) {
            VFIOMSIVector *vector = &vdev->msi_vectors[i];
            if (vector->virq >= 0) {
                kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
                                                  vector->virq);
                kvm_irqchip_release_virq(kvm_state, vector->virq);
                vector->virq = -1;
            } else {
                vfio_remove_kvm_msi_virq(vector);
            }
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                                NULL, NULL, NULL);
            }
            event_notifier_cleanup(&vector->interrupt);
        }

@@ -929,11 +970,17 @@ retry:
        }
        vdev->nr_vectors = 0;

        /*
         * Failing to setup MSI doesn't really fall within any specification.
         * Let's try leaving interrupts disabled and hope the guest figures
         * out to fall back to INTx for this device.
         */
        error_report("vfio: Error: Failed to enable MSI");
        vdev->interrupt = VFIO_INT_NONE;

        return;
    }

    vdev->interrupt = VFIO_INT_MSI;

    DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
            vdev->host.function, vdev->nr_vectors);
@@ -941,6 +988,20 @@ retry:

static void vfio_disable_msi_common(VFIODevice *vdev)
{
    int i;

    for (i = 0; i < vdev->nr_vectors; i++) {
        VFIOMSIVector *vector = &vdev->msi_vectors[i];
        if (vdev->msi_vectors[i].use) {
            if (vector->virq >= 0) {
                vfio_remove_kvm_msi_virq(vector);
            }
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                                NULL, NULL, NULL);
            event_notifier_cleanup(&vector->interrupt);
        }
    }

    g_free(vdev->msi_vectors);
    vdev->msi_vectors = NULL;
    vdev->nr_vectors = 0;
@@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
    for (i = 0; i < vdev->nr_vectors; i++) {
        if (vdev->msi_vectors[i].use) {
            vfio_msix_vector_release(&vdev->pdev, i);
            msix_vector_unuse(&vdev->pdev, i);
        }
    }

@@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev)

static void vfio_disable_msi(VFIODevice *vdev)
{
    int i;

    vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX);

    for (i = 0; i < vdev->nr_vectors; i++) {
        VFIOMSIVector *vector = &vdev->msi_vectors[i];

        if (!vector->use) {
            continue;
        }

        if (vector->virq >= 0) {
            kvm_irqchip_remove_irqfd_notifier(kvm_state,
                                              &vector->interrupt, vector->virq);
            kvm_irqchip_release_virq(kvm_state, vector->virq);
            vector->virq = -1;
        } else {
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
                                NULL, NULL, NULL);
        }

        event_notifier_cleanup(&vector->interrupt);
    }

    vfio_disable_msi_common(vdev);

    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev)
        }

        msg = msi_get_message(&vdev->pdev, i);

        if (msg.address != vector->msg.address ||
            msg.data != vector->msg.data) {

            DPRINTF("%s(%04x:%02x:%02x.%x) MSI vector %d changed\n",
                    __func__, vdev->host.domain, vdev->host.bus,
                    vdev->host.slot, vdev->host.function, i);

            kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
            vector->msg = msg;
        }
        vfio_update_kvm_msi_virq(vector, msg);
    }
}

@@ -1053,10 +1082,10 @@ static void vfio_bar_write(void *opaque, hwaddr addr,
        buf.byte = data;
        break;
    case 2:
        buf.word = cpu_to_le16(data);
        buf.word = data;
        break;
    case 4:
        buf.dword = cpu_to_le32(data);
        buf.dword = data;
        break;
    default:
        hw_error("vfio: unsupported write size, %d bytes", size);
@@ -1113,10 +1142,10 @@ static uint64_t vfio_bar_read(void *opaque,
        data = buf.byte;
        break;
    case 2:
        data = le16_to_cpu(buf.word);
        data = buf.word;
        break;
    case 4:
        data = le32_to_cpu(buf.dword);
        data = buf.dword;
        break;
    default:
        hw_error("vfio: unsupported read size, %d bytes", size);
@@ -1143,7 +1172,7 @@ static uint64_t vfio_bar_read(void *opaque,
static const MemoryRegionOps vfio_bar_ops = {
    .read = vfio_bar_read,
    .write = vfio_bar_write,
    .endianness = DEVICE_LITTLE_ENDIAN,
    .endianness = DEVICE_NATIVE_ENDIAN,
};

static void vfio_pci_load_rom(VFIODevice *vdev)
@@ -1205,21 +1234,42 @@ static void vfio_pci_load_rom(VFIODevice *vdev)
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
{
    VFIODevice *vdev = opaque;
    uint64_t val = ((uint64_t)1 << (size * 8)) - 1;
    union {
        uint8_t byte;
        uint16_t word;
        uint32_t dword;
        uint64_t qword;
    } buf;
    uint64_t data = 0;

    /* Load the ROM lazily when the guest tries to read it */
    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
        vfio_pci_load_rom(vdev);
    }

    memcpy(&val, vdev->rom + addr,
    memcpy(&buf, vdev->rom + addr,
           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);

    switch (size) {
    case 1:
        data = buf.byte;
        break;
    case 2:
        data = buf.word;
        break;
    case 4:
        data = buf.dword;
        break;
    default:
        hw_error("vfio: unsupported read size, %d bytes", size);
        break;
    }

    DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
            __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
            vdev->host.function, addr, size, val);
            vdev->host.function, addr, size, data);

    return val;
    return data;
}

static void vfio_rom_write(void *opaque, hwaddr addr,
@@ -1230,7 +1280,7 @@ static void vfio_rom_write(void *opaque, hwaddr addr,
static const MemoryRegionOps vfio_rom_ops = {
    .read = vfio_rom_read,
    .write = vfio_rom_write,
    .endianness = DEVICE_LITTLE_ENDIAN,
    .endianness = DEVICE_NATIVE_ENDIAN,
};

static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
@@ -4012,7 +4062,7 @@ static void vfio_err_notifier_handler(void *opaque)
                 __func__, vdev->host.domain, vdev->host.bus,
                 vdev->host.slot, vdev->host.function);

    vm_stop(RUN_STATE_IO_ERROR);
    vm_stop(RUN_STATE_INTERNAL_ERROR);
}

/*