Commit 53ae7e11 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.3-2023-03-03' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:
 "Here's a set of fixes/changes that didn't make the first cut, either
  because they got queued before I sent the early merge request, or
  fixes that came in afterwards. In detail:

   - Don't set MSG_NOSIGNAL on recv/recvmsg opcodes, as AF_PACKET will
     error out (David)

   - Fix for spurious poll wakeups (me)

   - Fix for a file leak for buffered reads in certain conditions
     (Joseph)

   - Don't allow registered buffers of mixed types (Pavel)

   - Improve handling of huge pages for registered buffers (Pavel)

   - Provided buffer ring size calculation fix (Wojciech)

   - Minor cleanups (me)"

* tag 'io_uring-6.3-2023-03-03' of git://git.kernel.dk/linux:
  io_uring/poll: don't pass in wake func to io_init_poll_iocb()
  io_uring: fix fget leak when fs don't support nowait buffered read
  io_uring/poll: allow some retries for poll triggering spuriously
  io_uring: remove MSG_NOSIGNAL from recvmsg
  io_uring/rsrc: always initialize 'folio' to NULL
  io_uring/rsrc: optimise registered huge pages
  io_uring/rsrc: optimise single entry advance
  io_uring/rsrc: disallow multi-source reg buffers
  io_uring: remove unused wq_list_merge
  io_uring: fix size calculation when registering buf ring
  io_uring/rsrc: fix a comment in io_import_fixed()
  io_uring: rename 'in_idle' to 'in_cancel'
  io_uring: consolidate the put_ref-and-return section of adding work
parents 9d0281b5 1947ddf9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -58,7 +58,7 @@ struct io_uring_task {

	struct xarray			xa;
	struct wait_queue_head		wait;
	atomic_t			in_idle;
	atomic_t			in_cancel;
	atomic_t			inflight_tracked;
	struct percpu_counter		inflight;

+16 −16
Original line number Diff line number Diff line
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
	struct io_uring_task *tctx = task->io_uring;

	percpu_counter_sub(&tctx->inflight, nr);
	if (unlikely(atomic_read(&tctx->in_idle)))
	if (unlikely(atomic_read(&tctx->in_cancel)))
		wake_up(&tctx->wait);
	put_task_struct_many(task, nr);
}
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)

	ctx_flush_and_put(ctx, &uring_locked);

	/* relaxed read is enough as only the task itself sets ->in_idle */
	if (unlikely(atomic_read(&tctx->in_idle)))
	/* relaxed read is enough as only the task itself sets ->in_cancel */
	if (unlikely(atomic_read(&tctx->in_cancel)))
		io_uring_drop_tctx_refs(current);

	trace_io_uring_task_work_run(tctx, count, loops);
@@ -1285,17 +1285,15 @@ static void io_req_local_work_add(struct io_kiocb *req)

	percpu_ref_get(&ctx->refs);

	if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
		percpu_ref_put(&ctx->refs);
		return;
	}
	if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
		goto put_ref;

	/* needed for the following wake up */
	smp_mb__after_atomic();

	if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
	if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
		io_move_task_work_from_local(ctx);
		percpu_ref_put(&ctx->refs);
		return;
		goto put_ref;
	}

	if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
@@ -1305,6 +1303,8 @@ static void io_req_local_work_add(struct io_kiocb *req)

	if (READ_ONCE(ctx->cq_waiting))
		wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);

put_ref:
	percpu_ref_put(&ctx->refs);
}

@@ -1777,7 +1777,7 @@ int io_req_prep_async(struct io_kiocb *req)
	const struct io_issue_def *def = &io_issue_defs[req->opcode];

	/* assign early for deferred execution for non-fixed file */
	if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
	if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE) && !req->file)
		req->file = io_file_get_normal(req, req->cqe.fd);
	if (!cdef->prep_async)
		return 0;
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)

	work = container_of(cb, struct io_tctx_exit, task_work);
	/*
	 * When @in_idle, we're in cancellation and it's racy to remove the
	 * When @in_cancel, we're in cancellation and it's racy to remove the
	 * node. It'll be removed by the end of cancellation, just ignore it.
	 * tctx can be NULL if the queueing of this task_work raced with
	 * work cancelation off the exec path.
	 */
	if (tctx && !atomic_read(&tctx->in_idle))
	if (tctx && !atomic_read(&tctx->in_cancel))
		io_uring_del_tctx_node((unsigned long)work->ctx);
	complete(&work->completion);
}
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
	if (tctx->io_wq)
		io_wq_exit_start(tctx->io_wq);

	atomic_inc(&tctx->in_idle);
	atomic_inc(&tctx->in_cancel);
	do {
		bool loop = false;

@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
	if (cancel_all) {
		/*
		 * We shouldn't run task_works after cancel, so just leave
		 * ->in_idle set for normal exit.
		 * ->in_cancel set for normal exit.
		 */
		atomic_dec(&tctx->in_idle);
		atomic_dec(&tctx->in_cancel);
		/* for exec all current's requests should be gone, kill tctx */
		__io_uring_free(current);
	}
+1 −1
Original line number Diff line number Diff line
@@ -505,7 +505,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
	}

	pages = io_pin_pages(reg.ring_addr,
			     struct_size(br, bufs, reg.ring_entries),
			     flex_array_size(br, bufs, reg.ring_entries),
			     &nr_pages);
	if (IS_ERR(pages)) {
		kfree(free_bl);
+1 −1
Original line number Diff line number Diff line
@@ -567,7 +567,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	sr->flags = READ_ONCE(sqe->ioprio);
	if (sr->flags & ~(RECVMSG_FLAGS))
		return -EINVAL;
	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
	sr->msg_flags = READ_ONCE(sqe->msg_flags);
	if (sr->msg_flags & MSG_DONTWAIT)
		req->flags |= REQ_F_NOWAIT;
	if (sr->msg_flags & MSG_ERRQUEUE)
+19 −7
Original line number Diff line number Diff line
@@ -51,6 +51,9 @@ struct io_poll_table {

#define IO_WQE_F_DOUBLE		1

static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
			void *key);

static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe)
{
	unsigned long priv = (unsigned long)wqe->private;
@@ -164,15 +167,14 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
	}
}

static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
			      wait_queue_func_t wake_func)
static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
{
	poll->head = NULL;
#define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
	/* mask in events that we always want/need */
	poll->events = events | IO_POLL_UNMASK;
	INIT_LIST_HEAD(&poll->wait.entry);
	init_waitqueue_func_entry(&poll->wait, wake_func);
	init_waitqueue_func_entry(&poll->wait, io_poll_wake);
}

static inline void io_poll_remove_entry(struct io_poll *poll)
@@ -508,7 +510,7 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,

		/* mark as double wq entry */
		wqe_private |= IO_WQE_F_DOUBLE;
		io_init_poll_iocb(poll, first->events, first->wait.func);
		io_init_poll_iocb(poll, first->events);
		if (!io_poll_double_prepare(req)) {
			/* the request is completing, just back off */
			kfree(poll);
@@ -569,7 +571,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,

	INIT_HLIST_NODE(&req->hash_node);
	req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
	io_init_poll_iocb(poll, mask, io_poll_wake);
	io_init_poll_iocb(poll, mask);
	poll->file = req->file;
	req->apoll_events = poll->events;

@@ -650,6 +652,14 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
	__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
}

/*
 * We can't reliably detect loops in repeated poll triggers and issue
 * subsequently failing. But rather than fail these immediately, allow a
 * certain amount of retries before we give up. Given that this condition
 * should _rarely_ trigger even once, we should be fine with a larger value.
 */
#define APOLL_MAX_RETRY		128

static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
					     unsigned issue_flags)
{
@@ -665,14 +675,18 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
		if (entry == NULL)
			goto alloc_apoll;
		apoll = container_of(entry, struct async_poll, cache);
		apoll->poll.retries = APOLL_MAX_RETRY;
	} else {
alloc_apoll:
		apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
		if (unlikely(!apoll))
			return NULL;
		apoll->poll.retries = APOLL_MAX_RETRY;
	}
	apoll->double_poll = NULL;
	req->apoll = apoll;
	if (unlikely(!--apoll->poll.retries))
		return NULL;
	return apoll;
}

@@ -694,8 +708,6 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
		return IO_APOLL_ABORTED;
	if (!file_can_poll(req->file))
		return IO_APOLL_ABORTED;
	if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
		return IO_APOLL_ABORTED;
	if (!(req->flags & REQ_F_APOLL_MULTISHOT))
		mask |= EPOLLONESHOT;

Loading