Commit 1c5880e7 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging



virtio, pci: fixes

A couple of bugfixes.

Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 06 Nov 2019 12:00:19 GMT
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio: notify virtqueue via host notifier when available
  hw/i386: AMD-Vi IVRS DMA alias support
  pci: Use PCI aliases when determining device IOMMU address space

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents d0f90e14 fcccb271
Loading
Loading
Loading
Loading
+120 −7
Original line number Diff line number Diff line
@@ -2517,12 +2517,105 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 */
#define IOAPIC_SB_DEVID   (uint64_t)PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))

/*
 * Insert IVHD entry for device and recurse, insert alias, or insert range as
 * necessary for the PCI topology.
 */
static void
insert_ivhd(PCIBus *bus, PCIDevice *dev, void *opaque)
{
    GArray *table_data = opaque;
    uint32_t entry;

    /* "Select" IVHD entry, type 0x2 */
    entry = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn) << 8 | 0x2;
    build_append_int_noprefix(table_data, entry, 4);

    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
        uint8_t sec = pci_bus_num(sec_bus);
        uint8_t sub = dev->config[PCI_SUBORDINATE_BUS];

        if (pci_bus_is_express(sec_bus)) {
            /*
             * Walk the bus if there are subordinates, otherwise use a range
             * to cover an entire leaf bus.  We could potentially also use a
             * range for traversed buses, but we'd need to take care not to
             * create both Select and Range entries covering the same device.
             * This is easier and potentially more compact.
             *
             * An example bare metal system seems to use Select entries for
             * root ports without a slot (ie. built-ins) and Range entries
             * when there is a slot.  The same system also only hard-codes
             * the alias range for an onboard PCIe-to-PCI bridge, apparently
             * making no effort to support nested bridges.  We attempt to
             * be more thorough here.
             */
            if (sec == sub) { /* leaf bus */
                /* "Start of Range" IVHD entry, type 0x3 */
                entry = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0)) << 8 | 0x3;
                build_append_int_noprefix(table_data, entry, 4);
                /* "End of Range" IVHD entry, type 0x4 */
                entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4;
                build_append_int_noprefix(table_data, entry, 4);
            } else {
                pci_for_each_device(sec_bus, sec, insert_ivhd, table_data);
            }
        } else {
            /*
             * If the secondary bus is conventional, then we need to create an
             * Alias range for everything downstream.  The range covers the
             * first devfn on the secondary bus to the last devfn on the
             * subordinate bus.  The alias target depends on legacy versus
             * express bridges, just as in pci_device_iommu_address_space().
             * DeviceIDa vs DeviceIDb as per the AMD IOMMU spec.
             */
            uint16_t dev_id_a, dev_id_b;

            dev_id_a = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0));

            if (pci_is_express(dev) &&
                pcie_cap_get_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) {
                dev_id_b = dev_id_a;
            } else {
                dev_id_b = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn);
            }

            /* "Alias Start of Range" IVHD entry, type 0x43, 8 bytes */
            build_append_int_noprefix(table_data, dev_id_a << 8 | 0x43, 4);
            build_append_int_noprefix(table_data, dev_id_b << 8 | 0x0, 4);

            /* "End of Range" IVHD entry, type 0x4 */
            entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4;
            build_append_int_noprefix(table_data, entry, 4);
        }
    }
}

/* For all PCI host bridges, walk and insert IVHD entries */
static int
ivrs_host_bridges(Object *obj, void *opaque)
{
    GArray *ivhd_blob = opaque;

    if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
        PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;

        if (bus) {
            pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob);
        }
    }

    return 0;
}

static void
build_amd_iommu(GArray *table_data, BIOSLinker *linker)
{
    int ivhd_table_len = 28;
    int ivhd_table_len = 24;
    int iommu_start = table_data->len;
    AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
    GArray *ivhd_blob = g_array_new(false, true, 1);

    /* IVRS header */
    acpi_data_push(table_data, sizeof(AcpiTableHeader));
@@ -2543,6 +2636,27 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
                             (1UL << 7),  /* PPRSup       */
                             1);

    /*
     * A PCI bus walk, for each PCI host bridge, is necessary to create a
     * complete set of IVHD entries.  Do this into a separate blob so that we
     * can calculate the total IVRS table length here and then append the new
     * blob further below.  Fall back to an entry covering all devices, which
     * is sufficient when no aliases are present.
     */
    object_child_foreach_recursive(object_get_root(),
                                   ivrs_host_bridges, ivhd_blob);

    if (!ivhd_blob->len) {
        /*
         *   Type 1 device entry reporting all devices
         *   These are 4-byte device entries currently reporting the range of
         *   Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
         */
        build_append_int_noprefix(ivhd_blob, 0x0000001, 4);
    }

    ivhd_table_len += ivhd_blob->len;

    /*
     * When interrupt remapping is supported, we add a special IVHD device
     * for type IO-APIC.
@@ -2550,6 +2664,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
    if (x86_iommu_ir_supported(x86_iommu_get_default())) {
        ivhd_table_len += 8;
    }

    /* IVHD length */
    build_append_int_noprefix(table_data, ivhd_table_len, 2);
    /* DeviceID */
@@ -2569,12 +2684,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
                             (1UL << 2)   | /* GTSup  */
                             (1UL << 6),    /* GASup  */
                             4);
    /*
     *   Type 1 device entry reporting all devices
     *   These are 4-byte device entries currently reporting the range of
     *   Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
     */
    build_append_int_noprefix(table_data, 0x0000001, 4);

    /* IVHD entries as found above */
    g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
    g_array_free(ivhd_blob, TRUE);

    /*
     * Add a special IVHD device type.
+40 −3
Original line number Diff line number Diff line
@@ -2646,12 +2646,49 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
{
    PCIBus *bus = pci_get_bus(dev);
    PCIBus *iommu_bus = bus;
    uint8_t devfn = dev->devfn;

    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
        iommu_bus = pci_get_bus(iommu_bus->parent_dev);
        PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);

        /*
         * The requester ID of the provided device may be aliased, as seen from
         * the IOMMU, due to topology limitations.  The IOMMU relies on a
         * requester ID to provide a unique AddressSpace for devices, but
         * conventional PCI buses pre-date such concepts.  Instead, the PCIe-
         * to-PCI bridge creates and accepts transactions on behalf of down-
         * stream devices.  When doing so, all downstream devices are masked
         * (aliased) behind a single requester ID.  The requester ID used
         * depends on the format of the bridge devices.  Proper PCIe-to-PCI
         * bridges, with a PCIe capability indicating such, follow the
         * guidelines of chapter 2.3 of the PCIe-to-PCI/X bridge specification,
         * where the bridge uses the seconary bus as the bridge portion of the
         * requester ID and devfn of 00.0.  For other bridges, typically those
         * found on the root complex such as the dmi-to-pci-bridge, we follow
         * the convention of typical bare-metal hardware, which uses the
         * requester ID of the bridge itself.  There are device specific
         * exceptions to these rules, but these are the defaults that the
         * Linux kernel uses when determining DMA aliases itself and believed
         * to be true for the bare metal equivalents of the devices emulated
         * in QEMU.
         */
        if (!pci_bus_is_express(iommu_bus)) {
            PCIDevice *parent = iommu_bus->parent_dev;

            if (pci_is_express(parent) &&
                pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
                devfn = PCI_DEVFN(0, 0);
                bus = iommu_bus;
            } else {
                devfn = parent->devfn;
                bus = parent_bus;
            }
        }

        iommu_bus = parent_bus;
    }
    if (iommu_bus && iommu_bus->iommu_fn) {
        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, dev->devfn);
        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
    }
    return &address_space_memory;
}
+4 −0
Original line number Diff line number Diff line
@@ -288,6 +288,10 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
        k->ioeventfd_assign(proxy, notifier, n, false);
    }

    if (r == 0) {
        virtio_queue_set_host_notifier_enabled(vq, assign);
    }

    return r;
}

+8 −1
Original line number Diff line number Diff line
@@ -128,6 +128,7 @@ struct VirtQueue
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
    bool host_notifier_enabled;
    QLIST_ENTRY(VirtQueue) node;
};

@@ -2271,7 +2272,7 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
    }

    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
    if (vq->handle_aio_output) {
    if (vq->host_notifier_enabled) {
        event_notifier_set(&vq->host_notifier);
    } else if (vq->handle_output) {
        vq->handle_output(vdev, vq);
@@ -3145,6 +3146,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
        vdev->vq[i].vdev = vdev;
        vdev->vq[i].queue_index = i;
        vdev->vq[i].host_notifier_enabled = false;
    }

    vdev->name = name;
@@ -3436,6 +3438,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
    return &vq->host_notifier;
}

void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
{
    vq->host_notifier_enabled = enabled;
}

int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
                                      MemoryRegion *mr, bool assign)
{
+1 −0
Original line number Diff line number Diff line
@@ -312,6 +312,7 @@ int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
void virtio_device_release_ioeventfd(VirtIODevice *vdev);
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
void virtio_queue_host_notifier_read(EventNotifier *n);
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
                                                VirtIOHandleAIOOutput handle_output);