Commit d56d938b authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: do ctx initiated file note removal



Another preparation patch. When full quiesce is done on ctx exit, use
task_work infra to remove corresponding to the ctx io_uring->xa entries.
For that we use the back tctx map. Also use ->in_idle to prevent
removing it while we traversing ->xa on cancellation, just ignore it.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 13bf43f5
Loading
Loading
Loading
Loading
+46 −2
Original line number Diff line number Diff line
@@ -987,6 +987,7 @@ static const struct io_op_def io_op_defs[] = {
	[IORING_OP_UNLINKAT] = {},
};

static void io_uring_del_task_file(unsigned long index);
static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
					 struct task_struct *task,
					 struct files_struct *files);
@@ -8536,10 +8537,33 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
	return executed;
}

struct io_tctx_exit {
	struct callback_head		task_work;
	struct completion		completion;
	unsigned long			index;
};

static void io_tctx_exit_cb(struct callback_head *cb)
{
	struct io_uring_task *tctx = current->io_uring;
	struct io_tctx_exit *work;

	work = container_of(cb, struct io_tctx_exit, task_work);
	/*
	 * When @in_idle, we're in cancellation and it's racy to remove the
	 * node. It'll be removed by the end of cancellation, just ignore it.
	 */
	if (!atomic_read(&tctx->in_idle))
		io_uring_del_task_file(work->index);
	complete(&work->completion);
}

static void io_ring_exit_work(struct work_struct *work)
{
	struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
					       exit_work);
	struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
	struct io_tctx_exit exit;
	struct io_tctx_node *node;
	int ret;

	/*
	 * If we're doing polled IO and end up having requests being
@@ -8550,6 +8574,26 @@ static void io_ring_exit_work(struct work_struct *work)
	do {
		io_uring_try_cancel_requests(ctx, NULL, NULL);
	} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));

	mutex_lock(&ctx->uring_lock);
	while (!list_empty(&ctx->tctx_list)) {
		node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
					ctx_node);
		exit.index = (unsigned long)node->file;
		init_completion(&exit.completion);
		init_task_work(&exit.task_work, io_tctx_exit_cb);
		ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
		if (WARN_ON_ONCE(ret))
			continue;
		wake_up_process(node->task);

		mutex_unlock(&ctx->uring_lock);
		wait_for_completion(&exit.completion);
		cond_resched();
		mutex_lock(&ctx->uring_lock);
	}
	mutex_unlock(&ctx->uring_lock);

	io_ring_ctx_free(ctx);
}