Commit 73fd282e authored by Stefan Hajnoczi's avatar Stefan Hajnoczi
Browse files

aio-posix: add io_uring fd monitoring implementation



The recent Linux io_uring API has several advantages over ppoll(2) and
epoll(2).  Details are given in the source code.

Add an io_uring implementation and make it the default on Linux.
Performance is the same as with epoll(7) but later patches add
optimizations that take advantage of io_uring.

It is necessary to change how aio_set_fd_handler() deals with deleting
AioHandlers since removing monitored file descriptors is asynchronous in
io_uring.  fdmon_io_uring_remove() marks the AioHandler deleted and
aio_set_fd_handler() will let it handle deletion in that case.

Signed-off-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
Link: https://lore.kernel.org/r/20200305170806.1313245-6-stefanha@redhat.com
Message-Id: <20200305170806.1313245-6-stefanha@redhat.com>
parent b321051c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -4093,6 +4093,11 @@ if test "$linux_io_uring" != "no" ; then
    linux_io_uring_cflags=$($pkg_config --cflags liburing)
    linux_io_uring_libs=$($pkg_config --libs liburing)
    linux_io_uring=yes

    # io_uring is used in libqemuutil.a where per-file -libs variables are not
    # seen by programs linking the archive.  It's not ideal, but just add the
    # library dependency globally.
    LIBS="$linux_io_uring_libs $LIBS"
  else
    if test "$linux_io_uring" = "yes" ; then
      feature_not_found "linux io_uring" "Install liburing devel"
+9 −0
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@
#ifndef QEMU_AIO_H
#define QEMU_AIO_H

#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -96,6 +99,8 @@ struct BHListSlice {
    QSIMPLEQ_ENTRY(BHListSlice) next;
};

typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;

struct AioContext {
    GSource source;

@@ -181,6 +186,10 @@ struct AioContext {
     * locking.
     */
    struct LuringState *linux_io_uring;

    /* State for file descriptor monitoring using Linux io_uring */
    struct io_uring fdmon_io_uring;
    AioHandlerSList submit_list;
#endif

    /* TimerLists for calling timers - one per clock type.  Has its own
+1 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ util-obj-$(call lnot,$(CONFIG_ATOMIC64)) += atomic64.o
util-obj-$(CONFIG_POSIX) += aio-posix.o
util-obj-$(CONFIG_POSIX) += fdmon-poll.o
util-obj-$(CONFIG_EPOLL_CREATE1) += fdmon-epoll.o
util-obj-$(CONFIG_LINUX_IO_URING) += fdmon-io_uring.o
util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+16 −4
Original line number Diff line number Diff line
@@ -57,10 +57,16 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
        g_source_remove_poll(&ctx->source, &node->pfd);
    }

    node->pfd.revents = 0;

    /* If the fd monitor has already marked it deleted, leave it alone */
    if (QLIST_IS_INSERTED(node, node_deleted)) {
        return false;
    }

    /* If a read is in progress, just mark the node as deleted */
    if (qemu_lockcnt_count(&ctx->list_lock)) {
        QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
        node->pfd.revents = 0;
        return false;
    }
    /* Otherwise, delete it for real.  We can't just mark it as
@@ -126,9 +132,6 @@ void aio_set_fd_handler(AioContext *ctx,

        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
    }
    if (node) {
        deleted = aio_remove_fd_handler(ctx, node);
    }

    /* No need to order poll_disable_cnt writes against other updates;
     * the counter is only used to avoid wasting time and latency on
@@ -140,6 +143,9 @@ void aio_set_fd_handler(AioContext *ctx,
               atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);

    ctx->fdmon_ops->update(ctx, node, new_node);
    if (node) {
        deleted = aio_remove_fd_handler(ctx, node);
    }
    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);

@@ -565,11 +571,17 @@ void aio_context_setup(AioContext *ctx)
    ctx->fdmon_ops = &fdmon_poll_ops;
    ctx->epollfd = -1;

    /* Use the fastest fd monitoring implementation if available */
    if (fdmon_io_uring_setup(ctx)) {
        return;
    }

    fdmon_epoll_setup(ctx);
}

void aio_context_destroy(AioContext *ctx)
{
    fdmon_io_uring_destroy(ctx);
    fdmon_epoll_disable(ctx);
}

+19 −1
Original line number Diff line number Diff line
@@ -27,10 +27,14 @@ struct AioHandler {
    IOHandler *io_poll_begin;
    IOHandler *io_poll_end;
    void *opaque;
    bool is_external;
    QLIST_ENTRY(AioHandler) node;
    QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
    QLIST_ENTRY(AioHandler) node_deleted;
#ifdef CONFIG_LINUX_IO_URING
    QSLIST_ENTRY(AioHandler) node_submitted;
    unsigned flags; /* see fdmon-io_uring.c */
#endif
    bool is_external;
};

/* Add a handler to a ready list */
@@ -58,4 +62,18 @@ static inline void fdmon_epoll_disable(AioContext *ctx)
}
#endif /* !CONFIG_EPOLL_CREATE1 */

#ifdef CONFIG_LINUX_IO_URING
bool fdmon_io_uring_setup(AioContext *ctx);
void fdmon_io_uring_destroy(AioContext *ctx);
#else
static inline bool fdmon_io_uring_setup(AioContext *ctx)
{
    return false;
}

static inline void fdmon_io_uring_destroy(AioContext *ctx)
{
}
#endif /* !CONFIG_LINUX_IO_URING */

#endif /* AIO_POSIX_H */
Loading