Commit a544c911 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20180817.0' into staging



VFIO update 2018-08-17

 - Enhance balloon inhibitor for multiple users and use around vfio
   device assignment (Alex Williamson)

# gpg: Signature made Fri 17 Aug 2018 17:43:37 BST
# gpg:                using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-update-20180817.0:
  vfio/ccw/pci: Allow devices to opt-in for ballooning
  vfio: Inhibit ballooning based on group attachment to a container
  kvm: Use inhibit to prevent ballooning without synchronous mmu
  balloon: Allow multiple inhibit users

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 0abaa41d 238e9172
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include "trace.h"
#include "hw/irq.h"
#include "sysemu/sev.h"
#include "sysemu/balloon.h"

#include "hw/boards.h"

@@ -1698,6 +1699,9 @@ static int kvm_init(MachineState *ms)
    s->many_ioeventfds = kvm_check_many_ioeventfds();

    s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
    if (!s->sync_mmu) {
        qemu_balloon_inhibit(true);
    }

    return 0;

+10 −3
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@

#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/atomic.h"
#include "exec/cpu-common.h"
#include "sysemu/kvm.h"
#include "sysemu/balloon.h"
@@ -37,16 +38,22 @@
static QEMUBalloonEvent *balloon_event_fn;
static QEMUBalloonStatus *balloon_stat_fn;
static void *balloon_opaque;
static bool balloon_inhibited;
static int balloon_inhibit_count;

bool qemu_balloon_is_inhibited(void)
{
    return balloon_inhibited;
    return atomic_read(&balloon_inhibit_count) > 0;
}

void qemu_balloon_inhibit(bool state)
{
    balloon_inhibited = state;
    if (state) {
        atomic_inc(&balloon_inhibit_count);
    } else {
        atomic_dec(&balloon_inhibit_count);
    }

    assert(atomic_read(&balloon_inhibit_count) >= 0);
}

static bool have_balloon(Error **errp)
+9 −0
Original line number Diff line number Diff line
@@ -349,6 +349,15 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
        }
    }

    /*
     * All vfio-ccw devices are believed to operate in a way compatible with
     * memory ballooning, ie. pages pinned in the host are in the current
     * working set of the guest driver and therefore never overlap with pages
     * available to the guest balloon driver.  This needs to be set before
     * vfio_get_device() for vfio common to handle the balloon inhibitor.
     */
    vcdev->vdev.balloon_allowed = true;

    if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
        goto out_err;
    }
+51 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/range.h"
#include "sysemu/balloon.h"
#include "sysemu/kvm.h"
#include "trace.h"
#include "qapi/error.h"
@@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,

    space = vfio_get_address_space(as);

    /*
     * VFIO is currently incompatible with memory ballooning insofar as the
     * madvise to purge (zap) the page from QEMU's address space does not
     * interact with the memory API and therefore leaves stale virtual to
     * physical mappings in the IOMMU if the page was previously pinned.  We
     * therefore add a balloon inhibit for each group added to a container,
     * whether the container is used individually or shared.  This provides
     * us with options to allow devices within a group to opt-in and allow
     * ballooning, so long as it is done consistently for a group (for instance
     * if the device is an mdev device where it is known that the host vendor
     * driver will never pin pages outside of the working set of the guest
     * driver, which would thus not be ballooning candidates).
     *
     * The first opportunity to induce pinning occurs here where we attempt to
     * attach the group to existing containers within the AddressSpace.  If any
     * pages are already zapped from the virtual address space, such as from a
     * previous ballooning opt-in, new pinning will cause valid mappings to be
     * re-established.  Likewise, when the overall MemoryListener for a new
     * container is registered, a replay of mappings within the AddressSpace
     * will occur, re-establishing any previously zapped pages as well.
     *
     * NB. Balloon inhibiting does not currently block operation of the
     * balloon driver or revoke previously pinned pages, it only prevents
     * calling madvise to modify the virtual mapping of ballooned pages.
     */
    qemu_balloon_inhibit(true);

    QLIST_FOREACH(container, &space->containers, next) {
        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
            group->container = container;
@@ -1232,6 +1260,7 @@ close_fd_exit:
    close(fd);

put_space_exit:
    qemu_balloon_inhibit(false);
    vfio_put_address_space(space);

    return ret;
@@ -1352,6 +1381,9 @@ void vfio_put_group(VFIOGroup *group)
        return;
    }

    if (!group->balloon_allowed) {
        qemu_balloon_inhibit(false);
    }
    vfio_kvm_device_del_group(group);
    vfio_disconnect_container(group);
    QLIST_REMOVE(group, next);
@@ -1387,6 +1419,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
        return ret;
    }

    /*
     * Clear the balloon inhibitor for this group if the driver knows the
     * device operates compatibly with ballooning.  Setting must be consistent
     * per group, but since compatibility is really only possible with mdev
     * currently, we expect singleton groups.
     */
    if (vbasedev->balloon_allowed != group->balloon_allowed) {
        if (!QLIST_EMPTY(&group->device_list)) {
            error_setg(errp,
                       "Inconsistent device balloon setting within group");
            return -1;
        }

        if (!group->balloon_allowed) {
            group->balloon_allowed = true;
            qemu_balloon_inhibit(false);
        }
    }

    vbasedev->fd = fd;
    vbasedev->group = group;
    QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+25 −1
Original line number Diff line number Diff line
@@ -2804,12 +2804,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
    VFIODevice *vbasedev_iter;
    VFIOGroup *group;
    char *tmp, group_path[PATH_MAX], *group_name;
    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
    Error *err = NULL;
    ssize_t len;
    struct stat st;
    int groupid;
    int i, ret;
    bool is_mdev;

    if (!vdev->vbasedev.sysfsdev) {
        if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2869,6 +2870,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
        }
    }

    /*
     * Mediated devices *might* operate compatibly with memory ballooning, but
     * we cannot know for certain, it depends on whether the mdev vendor driver
     * stays in sync with the active working set of the guest driver.  Prevent
     * the x-balloon-allowed option unless this is minimally an mdev device.
     */
    tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
    subsys = realpath(tmp, NULL);
    g_free(tmp);
    is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
    free(subsys);

    trace_vfio_mdev(vdev->vbasedev.name, is_mdev);

    if (vdev->vbasedev.balloon_allowed && !is_mdev) {
        error_setg(errp, "x-balloon-allowed only potentially compatible "
                   "with mdev devices");
        vfio_put_group(group);
        goto error;
    }

    ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
    if (ret) {
        vfio_put_group(group);
@@ -3170,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = {
    DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
                    VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
    DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
    DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
                     vbasedev.balloon_allowed, false),
    DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
    DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
    DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
Loading