Commit eb4a299b authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: cache struct io_notif



kmalloc'ing struct io_notif is too expensive when done frequently, cache
them as many other resources in io_uring. Keep two list, the first one
is from where we're getting notifiers, it's protected by ->uring_lock.
The second is protected by ->completion_lock, to which we queue released
notifiers. Then we splice one list into another when needed.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9dec18f7fcbab9f4bd40b96e5ae158b119945230.1657643355.git.asml.silence@gmail.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent eb42cebb
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -249,6 +249,9 @@ struct io_ring_ctx {
		struct xarray		io_bl_xa;
		struct list_head	io_buffers_cache;

		/* struct io_notif cache, protected by uring_lock */
		struct list_head	notif_list;

		struct io_hash_table	cancel_table_locked;
		struct list_head	cq_overflow_list;
		struct io_alloc_cache	apoll_cache;
@@ -259,6 +262,10 @@ struct io_ring_ctx {
	struct io_wq_work_list	locked_free_list;
	unsigned int		locked_free_nr;

	/* struct io_notif cache protected by completion_lock */
	struct list_head	notif_list_locked;
	unsigned int		notif_locked_nr;

	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
	struct io_sq_data	*sq_data;	/* if using sq thread polling */

+3 −0
Original line number Diff line number Diff line
@@ -321,6 +321,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
	INIT_WQ_LIST(&ctx->locked_free_list);
	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
	INIT_LIST_HEAD(&ctx->notif_list);
	INIT_LIST_HEAD(&ctx->notif_list_locked);
	return ctx;
err:
	kfree(ctx->dummy_ubuf);
@@ -2493,6 +2495,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
	WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);

	io_notif_cache_purge(ctx);
	io_mem_free(ctx->rings);
	io_mem_free(ctx->sq_sqes);

+50 −7
Original line number Diff line number Diff line
@@ -15,10 +15,12 @@ static void __io_notif_complete_tw(struct callback_head *cb)

	io_cq_lock(ctx);
	io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq, true);

	list_add(&notif->cache_node, &ctx->notif_list_locked);
	ctx->notif_locked_nr++;
	io_cq_unlock_post(ctx);

	percpu_ref_put(&ctx->refs);
	kfree(notif);
}

static inline void io_notif_complete(struct io_notif *notif)
@@ -45,21 +47,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
	queue_work(system_unbound_wq, &notif->commit_work);
}

static void io_notif_splice_cached(struct io_ring_ctx *ctx)
	__must_hold(&ctx->uring_lock)
{
	spin_lock(&ctx->completion_lock);
	list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
	ctx->notif_locked_nr = 0;
	spin_unlock(&ctx->completion_lock);
}

void io_notif_cache_purge(struct io_ring_ctx *ctx)
	__must_hold(&ctx->uring_lock)
{
	io_notif_splice_cached(ctx);

	while (!list_empty(&ctx->notif_list)) {
		struct io_notif *notif = list_first_entry(&ctx->notif_list,
						struct io_notif, cache_node);

		list_del(&notif->cache_node);
		kfree(notif);
	}
}

static inline bool io_notif_has_cached(struct io_ring_ctx *ctx)
	__must_hold(&ctx->uring_lock)
{
	if (likely(!list_empty(&ctx->notif_list)))
		return true;
	if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_NOTIF_SPLICE_BATCH))
		return false;
	io_notif_splice_cached(ctx);
	return !list_empty(&ctx->notif_list);
}

struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
				struct io_notif_slot *slot)
	__must_hold(&ctx->uring_lock)
{
	struct io_notif *notif;

	if (likely(io_notif_has_cached(ctx))) {
		notif = list_first_entry(&ctx->notif_list,
					 struct io_notif, cache_node);
		list_del(&notif->cache_node);
	} else {
		notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
		if (!notif)
			return NULL;

	notif->seq = slot->seq++;
	notif->tag = slot->tag;
		/* pre-initialise some fields */
		notif->ctx = ctx;
		notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
		notif->uarg.callback = io_uring_tx_zerocopy_callback;
	}

	notif->seq = slot->seq++;
	notif->tag = slot->tag;
	/* master ref owned by io_notif_slot, will be dropped on flush */
	refcount_set(&notif->uarg.refcnt, 1);
	percpu_ref_get(&ctx->refs);
+5 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
#include <net/sock.h>
#include <linux/nospec.h>

#define IO_NOTIF_SPLICE_BATCH	32

struct io_notif {
	struct ubuf_info	uarg;
	struct io_ring_ctx	*ctx;
@@ -13,6 +15,8 @@ struct io_notif {
	u64			tag;
	/* see struct io_notif_slot::seq */
	u32			seq;
	/* hook into ctx->notif_list and ctx->notif_list_locked */
	struct list_head	cache_node;

	union {
		struct callback_head	task_work;
@@ -41,6 +45,7 @@ struct io_notif_slot {
};

int io_notif_unregister(struct io_ring_ctx *ctx);
void io_notif_cache_purge(struct io_ring_ctx *ctx);

struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
				struct io_notif_slot *slot);