Commit f1d0f15a authored by Michael S. Tsirkin's avatar Michael S. Tsirkin
Browse files

virtio: backend virtqueue notifier masking



some backends (notably vhost) can mask events
at their source in a way that is more efficient
than masking through kvm.
Specifically
- masking in kvm uses rcu write side so it has high latency
- in kvm on unmask we always send an interrupt
masking at source does not have these issues.

Add such support in virtio.h and use in virtio-pci.

Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 774345f9
Loading
Loading
Loading
Loading
+70 −9
Original line number Diff line number Diff line
@@ -510,6 +510,31 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
    }
}

static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
                                 unsigned int queue_no,
                                 unsigned int vector)
{
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    int ret;
    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
    return ret;
}

static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
                                      unsigned int queue_no,
                                      unsigned int vector)
{
    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    int ret;

    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
    assert(ret == 0);
}

static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
{
    PCIDevice *dev = &proxy->pci_dev;
@@ -531,6 +556,16 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
        if (ret < 0) {
            goto undo;
        }
        /* If guest supports masking, set up irqfd now.
         * Otherwise, delay until unmasked in the frontend.
         */
        if (proxy->vdev->guest_notifier_mask) {
            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
            if (ret < 0) {
                kvm_virtio_pci_vq_vector_release(proxy, vector);
                goto undo;
            }
        }
    }
    return 0;

@@ -540,6 +575,9 @@ undo:
        if (vector >= msix_nr_vectors_allocated(dev)) {
            continue;
        }
        if (proxy->vdev->guest_notifier_mask) {
            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
        }
        kvm_virtio_pci_vq_vector_release(proxy, vector);
    }
    return ret;
@@ -560,6 +598,12 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
        if (vector >= msix_nr_vectors_allocated(dev)) {
            continue;
        }
        /* If guest supports masking, clean up irqfd now.
         * Otherwise, it was cleaned when masked in the frontend.
         */
        if (proxy->vdev->guest_notifier_mask) {
            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
        }
        kvm_virtio_pci_vq_vector_release(proxy, vector);
    }
}
@@ -581,7 +625,19 @@ static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
        }
    }

    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
    /* If guest supports masking, irqfd is already setup, unmask it.
     * Otherwise, set it up now.
     */
    if (proxy->vdev->guest_notifier_mask) {
        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
        /* Test after unmasking to avoid losing events. */
        if (proxy->vdev->guest_notifier_pending &&
            proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
            event_notifier_set(n);
        }
    } else {
        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
    }
    return ret;
}

@@ -589,13 +645,14 @@ static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
                                             unsigned int queue_no,
                                             unsigned int vector)
{
    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    int ret;

    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
    assert(ret == 0);
    /* If guest supports masking, keep irqfd but mask it.
     * Otherwise, clean it up now.
     */ 
    if (proxy->vdev->guest_notifier_mask) {
        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
    } else {
        kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
    }
}

static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
@@ -668,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
        }
        vq = virtio_get_queue(vdev, queue_no);
        notifier = virtio_queue_get_guest_notifier(vq);
        if (event_notifier_test_and_clear(notifier)) {
        if (vdev->guest_notifier_pending) {
            if (vdev->guest_notifier_pending(vdev, queue_no)) {
                msix_set_pending(dev, vector);
            }
        } else if (event_notifier_test_and_clear(notifier)) {
            msix_set_pending(dev, vector);
        }
    }
+13 −0
Original line number Diff line number Diff line
@@ -126,6 +126,19 @@ struct VirtIODevice
    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
    void (*reset)(VirtIODevice *vdev);
    void (*set_status)(VirtIODevice *vdev, uint8_t val);
    /* Test and clear event pending status.
     * Should be called after unmask to avoid losing events.
     * If backend does not support masking,
     * must check in frontend instead.
     */
    bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
    /* Mask/unmask events from this vq. Any events reported
     * while masked will become pending.
     * If backend does not support masking,
     * must mask in frontend instead.
     */
    void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);

    VirtQueue *vq;
    const VirtIOBindings *binding;
    DeviceState *binding_opaque;