Commit 6e8a73e9 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging



Pull request

# gpg: Signature made Wed 11 Mar 2020 12:40:36 GMT
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  aio-posix: remove idle poll handlers to improve scalability
  aio-posix: support userspace polling of fd monitoring
  aio-posix: add io_uring fd monitoring implementation
  aio-posix: simplify FDMonOps->update() prototype
  aio-posix: extract ppoll(2) and epoll(7) fd monitoring
  aio-posix: move RCU_READ_LOCK() into run_poll_handlers()
  aio-posix: completely stop polling when disabled
  aio-posix: remove confusing QLIST_SAFE_REMOVE()
  qemu/queue.h: clear linked list pointers on remove

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents ba298832 d37d0e36
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1885,6 +1885,8 @@ L: qemu-block@nongnu.org
S: Supported
F: util/async.c
F: util/aio-*.c
F: util/aio-*.h
F: util/fdmon-*.c
F: block/io.c
F: migration/block*
F: include/block/aio.h
+5 −0
Original line number Diff line number Diff line
@@ -4093,6 +4093,11 @@ if test "$linux_io_uring" != "no" ; then
    linux_io_uring_cflags=$($pkg_config --cflags liburing)
    linux_io_uring_libs=$($pkg_config --libs liburing)
    linux_io_uring=yes

    # io_uring is used in libqemuutil.a where per-file -libs variables are not
    # seen by programs linking the archive.  It's not ideal, but just add the
    # library dependency globally.
    LIBS="$linux_io_uring_libs $LIBS"
  else
    if test "$linux_io_uring" = "yes" ; then
      feature_not_found "linux io_uring" "Install liburing devel"
+69 −2
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@
#ifndef QEMU_AIO_H
#define QEMU_AIO_H

#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -52,6 +55,56 @@ struct ThreadPool;
struct LinuxAioState;
struct LuringState;

/* Is polling disabled? */
bool aio_poll_disabled(AioContext *ctx);

/* Callbacks for file descriptor monitoring implementations */
typedef struct {
    /*
     * update:
     * @ctx: the AioContext
     * @old_node: the existing handler or NULL if this file descriptor is being
     *            monitored for the first time
     * @new_node: the new handler or NULL if this file descriptor is being
     *            removed
     *
     * Add/remove/modify a monitored file descriptor.
     *
     * Called with ctx->list_lock acquired.
     */
    void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node);

    /*
     * wait:
     * @ctx: the AioContext
     * @ready_list: list for handlers that become ready
     * @timeout: maximum duration to wait, in nanoseconds
     *
     * Wait for file descriptors to become ready and place them on ready_list.
     *
     * Called with ctx->list_lock incremented but not locked.
     *
     * Returns: number of ready file descriptors.
     */
    int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout);

    /*
     * need_wait:
     * @ctx: the AioContext
     *
     * Tell aio_poll() when to stop userspace polling early because ->wait()
     * has fds ready.
     *
     * File descriptor monitoring implementations that cannot poll fd readiness
     * from userspace should use aio_poll_disabled() here.  This ensures that
     * file descriptors are not starved by handlers that frequently make
     * progress via userspace polling.
     *
     * Returns: true if ->wait() should be called, false otherwise.
     */
    bool (*need_wait)(AioContext *ctx);
} FDMonOps;

/*
 * Each aio_bh_poll() call carves off a slice of the BH list, so that newly
 * scheduled BHs are not processed until the next aio_bh_poll() call.  All
@@ -65,6 +118,8 @@ struct BHListSlice {
    QSIMPLEQ_ENTRY(BHListSlice) next;
};

typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;

struct AioContext {
    GSource source;

@@ -150,6 +205,10 @@ struct AioContext {
     * locking.
     */
    struct LuringState *linux_io_uring;

    /* State for file descriptor monitoring using Linux io_uring */
    struct io_uring fdmon_io_uring;
    AioHandlerSList submit_list;
#endif

    /* TimerLists for calling timers - one per clock type.  Has its own
@@ -168,13 +227,21 @@ struct AioContext {
    int64_t poll_grow;      /* polling time growth factor */
    int64_t poll_shrink;    /* polling time shrink factor */

    /*
     * List of handlers participating in userspace polling.  Protected by
     * ctx->list_lock.  Iterated and modified mostly by the event loop thread
     * from aio_poll() with ctx->list_lock incremented.  aio_set_fd_handler()
     * only touches the list to delete nodes if ctx->list_lock's count is zero.
     */
    AioHandlerList poll_aio_handlers;

    /* Are we in polling mode or monitoring file descriptors? */
    bool poll_started;

    /* epoll(7) state used when built with CONFIG_EPOLL */
    int epollfd;
    bool epoll_enabled;
    bool epoll_available;

    const FDMonOps *fdmon_ops;
};

/**
+15 −4
Original line number Diff line number Diff line
@@ -142,6 +142,8 @@ struct { \
                (elm)->field.le_next->field.le_prev =                   \
                    (elm)->field.le_prev;                               \
        *(elm)->field.le_prev = (elm)->field.le_next;                   \
        (elm)->field.le_next = NULL;                                    \
        (elm)->field.le_prev = NULL;                                    \
} while (/*CONSTCOND*/0)

/*
@@ -225,12 +227,15 @@ struct { \
} while (/*CONSTCOND*/0)

#define QSLIST_REMOVE_HEAD(head, field) do {                             \
        (head)->slh_first = (head)->slh_first->field.sle_next;          \
        typeof((head)->slh_first) elm = (head)->slh_first;               \
        (head)->slh_first = elm->field.sle_next;                         \
        elm->field.sle_next = NULL;                                      \
} while (/*CONSTCOND*/0)

#define QSLIST_REMOVE_AFTER(slistelm, field) do {                       \
        (slistelm)->field.sle_next =                                    \
            QSLIST_NEXT(QSLIST_NEXT((slistelm), field), field);         \
        typeof(slistelm) next = (slistelm)->field.sle_next;             \
        (slistelm)->field.sle_next = next->field.sle_next;              \
        next->field.sle_next = NULL;                                    \
} while (/*CONSTCOND*/0)

#define QSLIST_REMOVE(head, elm, type, field) do {                      \
@@ -241,6 +246,7 @@ struct { \
        while (curelm->field.sle_next != (elm))                         \
            curelm = curelm->field.sle_next;                            \
        curelm->field.sle_next = curelm->field.sle_next->field.sle_next; \
        (elm)->field.sle_next = NULL;                                   \
    }                                                                   \
} while (/*CONSTCOND*/0)

@@ -304,8 +310,10 @@ struct { \
} while (/*CONSTCOND*/0)

#define QSIMPLEQ_REMOVE_HEAD(head, field) do {                          \
    if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
    typeof((head)->sqh_first) elm = (head)->sqh_first;                  \
    if (((head)->sqh_first = elm->field.sqe_next) == NULL)              \
        (head)->sqh_last = &(head)->sqh_first;                          \
    elm->field.sqe_next = NULL;                                         \
} while (/*CONSTCOND*/0)

#define QSIMPLEQ_SPLIT_AFTER(head, elm, field, removed) do {            \
@@ -329,6 +337,7 @@ struct { \
        if ((curelm->field.sqe_next =                                   \
            curelm->field.sqe_next->field.sqe_next) == NULL)            \
                (head)->sqh_last = &(curelm)->field.sqe_next;           \
        (elm)->field.sqe_next = NULL;                                   \
    }                                                                   \
} while (/*CONSTCOND*/0)

@@ -446,6 +455,8 @@ union { \
            (head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \
        (elm)->field.tqe_circ.tql_prev->tql_next = (elm)->field.tqe_next; \
        (elm)->field.tqe_circ.tql_prev = NULL;                          \
        (elm)->field.tqe_circ.tql_next = NULL;                          \
        (elm)->field.tqe_next = NULL;                                   \
} while (/*CONSTCOND*/0)

/* remove @left, @right and all elements in between from @head */
+3 −0
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@ util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o
util-obj-y += main-loop.o
util-obj-$(call lnot,$(CONFIG_ATOMIC64)) += atomic64.o
util-obj-$(CONFIG_POSIX) += aio-posix.o
util-obj-$(CONFIG_POSIX) += fdmon-poll.o
util-obj-$(CONFIG_EPOLL_CREATE1) += fdmon-epoll.o
util-obj-$(CONFIG_LINUX_IO_URING) += fdmon-io_uring.o
util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
Loading