Commit 586d1a99 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20160219.1' into staging



VFIO updates 2016-02-19

 - AER pre-enable and misc fixes (Cao jin and Chen Fan)
 - PCI_CAP_LIST_NEXT & PCI_MSIX_FLAGS cleanup (Wei Yang)
 - AMD XGBE KVM platform passthrough (Eric Auger)

# gpg: Signature made Fri 19 Feb 2016 17:28:36 GMT using RSA key ID 3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"

* remotes/awilliam/tags/vfio-update-20160219.1:
  vfio/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries
  hw/arm/sysbus-fdt: remove qemu_fdt_setprop returned value check
  hw/arm/sysbus-fdt: enable amd-xgbe dynamic instantiation
  hw/arm/sysbus-fdt: helpers for clock node generation
  device_tree: qemu_fdt_getprop_cell converted to use the error API
  device_tree: qemu_fdt_getprop converted to use the error API
  device_tree: introduce qemu_fdt_node_path
  device_tree: introduce load_device_tree_from_sysfs
  hw/vfio/platform: amd-xgbe device
  vfio/pci: replace 1 with PCI_CAP_LIST_NEXT to make code self-explain
  pcie_aer: expose pcie_aer_msg() interface
  aer: impove pcie_aer_init to support vfio device
  vfio: make the 4 bytes aligned for capability size
  pcie: modify the capability size assert

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 3ba32c10 b58b17f7
Loading
Loading
Loading
Loading
+172 −10
Original line number Diff line number Diff line
@@ -13,6 +13,10 @@

#include "qemu/osdep.h"

#ifdef CONFIG_LINUX
#include <dirent.h>
#endif

#include "qemu-common.h"
#include "qemu/error-report.h"
#include "sysemu/device_tree.h"
@@ -112,6 +116,102 @@ fail:
    return NULL;
}

#ifdef CONFIG_LINUX

#define SYSFS_DT_BASEDIR "/proc/device-tree"

/**
 * read_fstree: this function is inspired from dtc read_fstree
 * @fdt: preallocated fdt blob buffer, to be populated
 * @dirname: directory to scan under SYSFS_DT_BASEDIR
 * the search is recursive and the tree is searched down to the
 * leaves (property files).
 *
 * the function asserts in case of error
 */
static void read_fstree(void *fdt, const char *dirname)
{
    DIR *d;
    struct dirent *de;
    struct stat st;
    const char *root_dir = SYSFS_DT_BASEDIR;
    const char *parent_node;

    if (strstr(dirname, root_dir) != dirname) {
        error_setg(&error_fatal, "%s: %s must be searched within %s",
                   __func__, dirname, root_dir);
    }
    parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)];

    d = opendir(dirname);
    if (!d) {
        error_setg(&error_fatal, "%s cannot open %s", __func__, dirname);
    }

    while ((de = readdir(d)) != NULL) {
        char *tmpnam;

        if (!g_strcmp0(de->d_name, ".")
            || !g_strcmp0(de->d_name, "..")) {
            continue;
        }

        tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name);

        if (lstat(tmpnam, &st) < 0) {
            error_setg(&error_fatal, "%s cannot lstat %s", __func__, tmpnam);
        }

        if (S_ISREG(st.st_mode)) {
            gchar *val;
            gsize len;

            if (!g_file_get_contents(tmpnam, &val, &len, NULL)) {
                error_setg(&error_fatal, "%s not able to extract info from %s",
                           __func__, tmpnam);
            }

            if (strlen(parent_node) > 0) {
                qemu_fdt_setprop(fdt, parent_node,
                                 de->d_name, val, len);
            } else {
                qemu_fdt_setprop(fdt, "/", de->d_name, val, len);
            }
            g_free(val);
        } else if (S_ISDIR(st.st_mode)) {
            char *node_name;

            node_name = g_strdup_printf("%s/%s",
                                        parent_node, de->d_name);
            qemu_fdt_add_subnode(fdt, node_name);
            g_free(node_name);
            read_fstree(fdt, tmpnam);
        }

        g_free(tmpnam);
    }

    closedir(d);
}

/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */
void *load_device_tree_from_sysfs(void)
{
    void *host_fdt;
    int host_fdt_size;

    host_fdt = create_device_tree(&host_fdt_size);
    read_fstree(host_fdt, SYSFS_DT_BASEDIR);
    if (fdt_check_header(host_fdt)) {
        error_setg(&error_fatal,
                   "%s host device tree extracted into memory is invalid",
                   __func__);
    }
    return host_fdt;
}

#endif /* CONFIG_LINUX */

static int findnode_nofail(void *fdt, const char *node_path)
{
    int offset;
@@ -126,6 +226,60 @@ static int findnode_nofail(void *fdt, const char *node_path)
    return offset;
}

char **qemu_fdt_node_path(void *fdt, const char *name, char *compat,
                          Error **errp)
{
    int offset, len, ret;
    const char *iter_name;
    unsigned int path_len = 16, n = 0;
    GSList *path_list = NULL, *iter;
    char **path_array;

    offset = fdt_node_offset_by_compatible(fdt, -1, compat);

    while (offset >= 0) {
        iter_name = fdt_get_name(fdt, offset, &len);
        if (!iter_name) {
            offset = len;
            break;
        }
        if (!strcmp(iter_name, name)) {
            char *path;

            path = g_malloc(path_len);
            while ((ret = fdt_get_path(fdt, offset, path, path_len))
                  == -FDT_ERR_NOSPACE) {
                path_len += 16;
                path = g_realloc(path, path_len);
            }
            path_list = g_slist_prepend(path_list, path);
            n++;
        }
        offset = fdt_node_offset_by_compatible(fdt, offset, compat);
    }

    if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
        error_setg(errp, "%s: abort parsing dt for %s/%s: %s",
                   __func__, name, compat, fdt_strerror(offset));
        for (iter = path_list; iter; iter = iter->next) {
            g_free(iter->data);
        }
        g_slist_free(path_list);
        return NULL;
    }

    path_array = g_new(char *, n + 1);
    path_array[n--] = NULL;

    for (iter = path_list; iter; iter = iter->next) {
        path_array[n--] = iter->data;
    }

    g_slist_free(path_list);

    return path_array;
}

int qemu_fdt_setprop(void *fdt, const char *node_path,
                     const char *property, const void *val, int size)
{
@@ -179,31 +333,39 @@ int qemu_fdt_setprop_string(void *fdt, const char *node_path,
}

const void *qemu_fdt_getprop(void *fdt, const char *node_path,
                             const char *property, int *lenp)
                             const char *property, int *lenp, Error **errp)
{
    int len;
    const void *r;

    if (!lenp) {
        lenp = &len;
    }
    r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp);
    if (!r) {
        error_report("%s: Couldn't get %s/%s: %s", __func__,
        error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__,
                  node_path, property, fdt_strerror(*lenp));
        exit(1);
    }
    return r;
}

uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
                               const char *property)
                               const char *property, int *lenp, Error **errp)
{
    int len;
    const uint32_t *p = qemu_fdt_getprop(fdt, node_path, property, &len);
    if (len != 4) {
        error_report("%s: %s/%s not 4 bytes long (not a cell?)",
    const uint32_t *p;

    if (!lenp) {
        lenp = &len;
    }
    p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp);
    if (!p) {
        return 0;
    } else if (*lenp != 4) {
        error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)",
                   __func__, node_path, property);
        exit(1);
        *lenp = -EINVAL;
        return 0;
    }
    return be32_to_cpu(*p);
}
+4 −2
Original line number Diff line number Diff line
@@ -437,8 +437,10 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
        return 0;
    }

    acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells");
    scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells");
    acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells",
                                   NULL, &error_fatal);
    scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells",
                                   NULL, &error_fatal);
    if (acells == 0 || scells == 0) {
        fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
        goto fail;
+306 −13
Original line number Diff line number Diff line
@@ -22,6 +22,11 @@
 */

#include "qemu/osdep.h"
#include <libfdt.h>
#include "qemu-common.h"
#ifdef CONFIG_LINUX
#include <linux/vfio.h>
#endif
#include "hw/arm/sysbus-fdt.h"
#include "qemu/error-report.h"
#include "sysemu/device_tree.h"
@@ -29,6 +34,7 @@
#include "sysemu/sysemu.h"
#include "hw/vfio/vfio-platform.h"
#include "hw/vfio/vfio-calxeda-xgmac.h"
#include "hw/vfio/vfio-amd-xgbe.h"
#include "hw/arm/fdt.h"

/*
@@ -57,6 +63,146 @@ typedef struct NodeCreationPair {
    int (*add_fdt_node_fn)(SysBusDevice *sbdev, void *opaque);
} NodeCreationPair;

/* helpers */

typedef struct HostProperty {
    const char *name;
    bool optional;
} HostProperty;

#ifdef CONFIG_LINUX

/**
 * copy_properties_from_host
 *
 * copies properties listed in an array from host device tree to
 * guest device tree. If a non optional property is not found, the
 * function asserts. An optional property is ignored if not found
 * in the host device tree.
 * @props: array of HostProperty to copy
 * @nb_props: number of properties in the array
 * @host_dt: host device tree blob
 * @guest_dt: guest device tree blob
 * @node_path: host dt node path where the property is supposed to be
              found
 * @nodename: guest node name the properties should be added to
 */
static void copy_properties_from_host(HostProperty *props, int nb_props,
                                      void *host_fdt, void *guest_fdt,
                                      char *node_path, char *nodename)
{
    int i, prop_len;
    const void *r;
    Error *err = NULL;

    for (i = 0; i < nb_props; i++) {
        r = qemu_fdt_getprop(host_fdt, node_path,
                             props[i].name,
                             &prop_len,
                             props[i].optional ? &err : &error_fatal);
        if (r) {
            qemu_fdt_setprop(guest_fdt, nodename,
                             props[i].name, r, prop_len);
        } else {
            if (prop_len != -FDT_ERR_NOTFOUND) {
                /* optional property not returned although property exists */
                error_report_err(err);
            } else {
                error_free(err);
            }
        }
    }
}

/* clock properties whose values are copied/pasted from host */
static HostProperty clock_copied_properties[] = {
    {"compatible", false},
    {"#clock-cells", false},
    {"clock-frequency", true},
    {"clock-output-names", true},
};

/**
 * fdt_build_clock_node
 *
 * Build a guest clock node, used as a dependency from a passthrough'ed
 * device. Most information are retrieved from the host clock node.
 * Also check the host clock is a fixed one.
 *
 * @host_fdt: host device tree blob from which info are retrieved
 * @guest_fdt: guest device tree blob where the clock node is added
 * @host_phandle: phandle of the clock in host device tree
 * @guest_phandle: phandle to assign to the guest node
 */
static void fdt_build_clock_node(void *host_fdt, void *guest_fdt,
                                uint32_t host_phandle,
                                uint32_t guest_phandle)
{
    char *node_path = NULL;
    char *nodename;
    const void *r;
    int ret, node_offset, prop_len, path_len = 16;

    node_offset = fdt_node_offset_by_phandle(host_fdt, host_phandle);
    if (node_offset <= 0) {
        error_setg(&error_fatal,
                   "not able to locate clock handle %d in host device tree",
                   host_phandle);
    }
    node_path = g_malloc(path_len);
    while ((ret = fdt_get_path(host_fdt, node_offset, node_path, path_len))
            == -FDT_ERR_NOSPACE) {
        path_len += 16;
        node_path = g_realloc(node_path, path_len);
    }
    if (ret < 0) {
        error_setg(&error_fatal,
                   "not able to retrieve node path for clock handle %d",
                   host_phandle);
    }

    r = qemu_fdt_getprop(host_fdt, node_path, "compatible", &prop_len,
                         &error_fatal);
    if (strcmp(r, "fixed-clock")) {
        error_setg(&error_fatal,
                   "clock handle %d is not a fixed clock", host_phandle);
    }

    nodename = strrchr(node_path, '/');
    qemu_fdt_add_subnode(guest_fdt, nodename);

    copy_properties_from_host(clock_copied_properties,
                              ARRAY_SIZE(clock_copied_properties),
                              host_fdt, guest_fdt,
                              node_path, nodename);

    qemu_fdt_setprop_cell(guest_fdt, nodename, "phandle", guest_phandle);

    g_free(node_path);
}

/**
 * sysfs_to_dt_name: convert the name found in sysfs into the node name
 * for instance e0900000.xgmac is converted into xgmac@e0900000
 * @sysfs_name: directory name in sysfs
 *
 * returns the device tree name upon success or NULL in case the sysfs name
 * does not match the expected format
 */
static char *sysfs_to_dt_name(const char *sysfs_name)
{
    gchar **substrings =  g_strsplit(sysfs_name, ".", 2);
    char *dt_name = NULL;

    if (!substrings || !substrings[0] || !substrings[1]) {
        goto out;
    }
    dt_name = g_strdup_printf("%s@%s", substrings[1], substrings[0]);
out:
    g_strfreev(substrings);
    return dt_name;
}

/* Device Specific Code */

/**
@@ -71,7 +217,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
    PlatformBusDevice *pbus = data->pbus;
    void *fdt = data->fdt;
    const char *parent_node = data->pbus_node_name;
    int compat_str_len, i, ret = -1;
    int compat_str_len, i;
    char *nodename;
    uint32_t *irq_attr, *reg_attr;
    uint64_t mmio_base, irq_number;
@@ -96,12 +242,8 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
        reg_attr[2 * i + 1] = cpu_to_be32(
                                memory_region_size(&vdev->regions[i]->mem));
    }
    ret = qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
    qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
                     vbasedev->num_regions * 2 * sizeof(uint32_t));
    if (ret) {
        error_report("could not set reg property of node %s", nodename);
        goto fail_reg;
    }

    irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
    for (i = 0; i < vbasedev->num_irqs; i++) {
@@ -111,22 +253,173 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
        irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
        irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
    }
    ret = qemu_fdt_setprop(fdt, nodename, "interrupts",
    qemu_fdt_setprop(fdt, nodename, "interrupts",
                     irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
    if (ret) {
        error_report("could not set interrupts property of node %s",
                     nodename);
    g_free(irq_attr);
    g_free(reg_attr);
    g_free(nodename);
    return 0;
}

/* AMD xgbe properties whose values are copied/pasted from host */
static HostProperty amd_xgbe_copied_properties[] = {
    {"compatible", false},
    {"dma-coherent", true},
    {"amd,per-channel-interrupt", true},
    {"phy-mode", false},
    {"mac-address", true},
    {"amd,speed-set", false},
    {"amd,serdes-blwc", true},
    {"amd,serdes-cdr-rate", true},
    {"amd,serdes-pq-skew", true},
    {"amd,serdes-tx-amp", true},
    {"amd,serdes-dfe-tap-config", true},
    {"amd,serdes-dfe-tap-enable", true},
    {"clock-names", false},
};

/**
 * add_amd_xgbe_fdt_node
 *
 * Generates the combined xgbe/phy node following kernel >=4.2
 * binding documentation:
 * Documentation/devicetree/bindings/net/amd-xgbe.txt:
 * Also 2 clock nodes are created (dma and ptp)
 *
 * Asserts in case of error
 */
static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
{
    PlatformBusFDTData *data = opaque;
    PlatformBusDevice *pbus = data->pbus;
    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
    VFIODevice *vbasedev = &vdev->vbasedev;
    VFIOINTp *intp;
    const char *parent_node = data->pbus_node_name;
    char **node_path, *nodename, *dt_name;
    void *guest_fdt = data->fdt, *host_fdt;
    const void *r;
    int i, prop_len;
    uint32_t *irq_attr, *reg_attr, *host_clock_phandles;
    uint64_t mmio_base, irq_number;
    uint32_t guest_clock_phandles[2];

    host_fdt = load_device_tree_from_sysfs();

    dt_name = sysfs_to_dt_name(vbasedev->name);
    if (!dt_name) {
        error_setg(&error_fatal, "%s incorrect sysfs device name %s",
                    __func__, vbasedev->name);
    }
    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
                                   &error_fatal);
    if (!node_path || !node_path[0]) {
        error_setg(&error_fatal, "%s unable to retrieve node path for %s/%s",
                   __func__, dt_name, vdev->compat);
    }

    if (node_path[1]) {
        error_setg(&error_fatal, "%s more than one node matching %s/%s!",
                   __func__, dt_name, vdev->compat);
    }

    g_free(dt_name);

    if (vbasedev->num_regions != 5) {
        error_setg(&error_fatal, "%s Does the host dt node combine XGBE/PHY?",
                   __func__);
    }

    /* generate nodes for DMA_CLK and PTP_CLK */
    r = qemu_fdt_getprop(host_fdt, node_path[0], "clocks",
                         &prop_len, &error_fatal);
    if (prop_len != 8) {
        error_setg(&error_fatal, "%s clocks property should contain 2 handles",
                   __func__);
    }
    host_clock_phandles = (uint32_t *)r;
    guest_clock_phandles[0] = qemu_fdt_alloc_phandle(guest_fdt);
    guest_clock_phandles[1] = qemu_fdt_alloc_phandle(guest_fdt);

    /**
     * clock handles fetched from host dt are in be32 layout whereas
     * rest of the code uses cpu layout. Also guest clock handles are
     * in cpu layout.
     */
    fdt_build_clock_node(host_fdt, guest_fdt,
                         be32_to_cpu(host_clock_phandles[0]),
                         guest_clock_phandles[0]);

    fdt_build_clock_node(host_fdt, guest_fdt,
                         be32_to_cpu(host_clock_phandles[1]),
                         guest_clock_phandles[1]);

    /* combined XGBE/PHY node */
    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
    nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
                               vbasedev->name, mmio_base);
    qemu_fdt_add_subnode(guest_fdt, nodename);

    copy_properties_from_host(amd_xgbe_copied_properties,
                       ARRAY_SIZE(amd_xgbe_copied_properties),
                       host_fdt, guest_fdt,
                       node_path[0], nodename);

    qemu_fdt_setprop_cells(guest_fdt, nodename, "clocks",
                           guest_clock_phandles[0],
                           guest_clock_phandles[1]);

    reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
    for (i = 0; i < vbasedev->num_regions; i++) {
        mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
        reg_attr[2 * i] = cpu_to_be32(mmio_base);
        reg_attr[2 * i + 1] = cpu_to_be32(
                                memory_region_size(&vdev->regions[i]->mem));
    }
    qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
                     vbasedev->num_regions * 2 * sizeof(uint32_t));

    irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
    for (i = 0; i < vbasedev->num_irqs; i++) {
        irq_number = platform_bus_get_irqn(pbus, sbdev , i)
                         + data->irq_start;
        irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
        irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
        /*
          * General device interrupt and PCS auto-negotiation interrupts are
          * level-sensitive while the 4 per-channel interrupts are edge
          * sensitive
          */
        QLIST_FOREACH(intp, &vdev->intp_list, next) {
            if (intp->pin == i) {
                break;
            }
        }
        if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
            irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
        } else {
            irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
        }
    }
    qemu_fdt_setprop(guest_fdt, nodename, "interrupts",
                     irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));

    g_free(host_fdt);
    g_strfreev(node_path);
    g_free(irq_attr);
fail_reg:
    g_free(reg_attr);
    g_free(nodename);
    return ret;
    return 0;
}

#endif /* CONFIG_LINUX */

/* list of supported dynamic sysbus devices */
static const NodeCreationPair add_fdt_node_functions[] = {
#ifdef CONFIG_LINUX
    {TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node},
    {TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node},
#endif
    {"", NULL}, /* last element */
};

+4 −2
Original line number Diff line number Diff line
@@ -478,8 +478,10 @@ static void vexpress_modify_dtb(const struct arm_boot_info *info, void *fdt)
    uint32_t acells, scells, intc;
    const VEDBoardInfo *daughterboard = (const VEDBoardInfo *)info;

    acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells");
    scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells");
    acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells",
                                   NULL, &error_fatal);
    scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells",
                                   NULL, &error_fatal);
    intc = find_int_controller(fdt);
    if (!intc) {
        /* Not fatal, we just won't provide virtio. This will
+1 −1
Original line number Diff line number Diff line
@@ -126,7 +126,7 @@ static int ioh3420_initfn(PCIDevice *d)
        goto err_pcie_cap;
    }
    pcie_cap_root_init(d);
    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
    if (rc < 0) {
        goto err;
    }
Loading