Commit 4e6b2b2e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:
 "Nothing major, just a few minor tweaks:

   - Tweak for the TCP zero-copy io_uring self test (Pavel)

   - Rather than use our internal cached value of number of CQ events
     available, use what the user can see (Dylan)

   - Fix a typo in a comment, added in this release (me)

   - Don't allow wrapping while adding provided buffers (me)

   - Fix a double poll race, and add a lockdep assertion for it too
     (Pavel)"

* tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux:
  io_uring/poll: lockdep annote io_poll_req_insert_locked
  io_uring/poll: fix double poll req->flags races
  io_uring: check for rollover of buffer ID when providing buffers
  io_uring: calculate CQEs from the user visible value
  io_uring: fix typo in io_uring.h comment
  selftests/net: don't tests batched TCP io_uring zc
parents f5020a08 5576035f
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -222,7 +222,7 @@ enum io_uring_op {


/*
/*
 * sqe->uring_cmd_flags
 * sqe->uring_cmd_flags
 * IORING_URING_CMD_FIXED	use registered buffer; pass thig flag
 * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
 *				along with setting sqe->buf_index.
 *				along with setting sqe->buf_index.
 */
 */
#define IORING_URING_CMD_FIXED	(1U << 0)
#define IORING_URING_CMD_FIXED	(1U << 0)
+8 −2
Original line number Original line Diff line number Diff line
@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
	return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
	return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
}
}


static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
{
	return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
}

static bool io_match_linked(struct io_kiocb *head)
static bool io_match_linked(struct io_kiocb *head)
{
{
	struct io_kiocb *req;
	struct io_kiocb *req;
@@ -2315,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
static inline bool io_should_wake(struct io_wait_queue *iowq)
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
{
	struct io_ring_ctx *ctx = iowq->ctx;
	struct io_ring_ctx *ctx = iowq->ctx;
	int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
	int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;


	/*
	/*
	 * Wake up if we have enough events, or if a timeout occurred since we
	 * Wake up if we have enough events, or if a timeout occurred since we
@@ -2399,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
			return ret;
			return ret;
		io_cqring_overflow_flush(ctx);
		io_cqring_overflow_flush(ctx);


		if (io_cqring_events(ctx) >= min_events)
		/* if user messes with these they will just get an early return */
		if (__io_cqring_events_user(ctx) >= min_events)
			return 0;
			return 0;
	} while (ret > 0);
	} while (ret > 0);


+2 −0
Original line number Original line Diff line number Diff line
@@ -346,6 +346,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
	tmp = READ_ONCE(sqe->off);
	tmp = READ_ONCE(sqe->off);
	if (tmp > USHRT_MAX)
	if (tmp > USHRT_MAX)
		return -E2BIG;
		return -E2BIG;
	if (tmp + p->nbufs >= USHRT_MAX)
		return -EINVAL;
	p->bid = tmp;
	p->bid = tmp;
	return 0;
	return 0;
}
}
+19 −12
Original line number Original line Diff line number Diff line
@@ -116,6 +116,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req)
	struct io_hash_table *table = &req->ctx->cancel_table_locked;
	struct io_hash_table *table = &req->ctx->cancel_table_locked;
	u32 index = hash_long(req->cqe.user_data, table->hash_bits);
	u32 index = hash_long(req->cqe.user_data, table->hash_bits);


	lockdep_assert_held(&req->ctx->uring_lock);

	hlist_add_head(&req->hash_node, &table->hbs[index].list);
	hlist_add_head(&req->hash_node, &table->hbs[index].list);
}
}


@@ -394,7 +396,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
	return 1;
	return 1;
}
}


static void io_poll_double_prepare(struct io_kiocb *req)
/* fails only when polling is already completing by the first entry */
static bool io_poll_double_prepare(struct io_kiocb *req)
{
{
	struct wait_queue_head *head;
	struct wait_queue_head *head;
	struct io_poll *poll = io_poll_get_single(req);
	struct io_poll *poll = io_poll_get_single(req);
@@ -403,20 +406,20 @@ static void io_poll_double_prepare(struct io_kiocb *req)
	rcu_read_lock();
	rcu_read_lock();
	head = smp_load_acquire(&poll->head);
	head = smp_load_acquire(&poll->head);
	/*
	/*
	 * poll arm may not hold ownership and so race with
	 * poll arm might not hold ownership and so race for req->flags with
	 * io_poll_wake() by modifying req->flags. There is only one
	 * io_poll_wake(). There is only one poll entry queued, serialise with
	 * poll entry queued, serialise with it by taking its head lock.
	 * it by taking its head lock. As we're still arming the tw hanlder
	 * is not going to be run, so there are no races with it.
	 */
	 */
	if (head)
	if (head) {
		spin_lock_irq(&head->lock);
		spin_lock_irq(&head->lock);

		req->flags |= REQ_F_DOUBLE_POLL;
		req->flags |= REQ_F_DOUBLE_POLL;
		if (req->opcode == IORING_OP_POLL_ADD)
		if (req->opcode == IORING_OP_POLL_ADD)
			req->flags |= REQ_F_ASYNC_DATA;
			req->flags |= REQ_F_ASYNC_DATA;

	if (head)
		spin_unlock_irq(&head->lock);
		spin_unlock_irq(&head->lock);
	}
	rcu_read_unlock();
	rcu_read_unlock();
	return !!head;
}
}


static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
@@ -454,7 +457,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
		/* mark as double wq entry */
		/* mark as double wq entry */
		wqe_private |= IO_WQE_F_DOUBLE;
		wqe_private |= IO_WQE_F_DOUBLE;
		io_init_poll_iocb(poll, first->events, first->wait.func);
		io_init_poll_iocb(poll, first->events, first->wait.func);
		io_poll_double_prepare(req);
		if (!io_poll_double_prepare(req)) {
			/* the request is completing, just back off */
			kfree(poll);
			return;
		}
		*poll_ptr = poll;
		*poll_ptr = poll;
	} else {
	} else {
		/* fine to modify, there is no poll queued to race with us */
		/* fine to modify, there is no poll queued to race with us */
+1 −1
Original line number Original line Diff line number Diff line
@@ -29,7 +29,7 @@ if [[ "$#" -eq "0" ]]; then
	for IP in "${IPs[@]}"; do
	for IP in "${IPs[@]}"; do
		for mode in $(seq 1 3); do
		for mode in $(seq 1 3); do
			$0 "$IP" udp -m "$mode" -t 1 -n 32
			$0 "$IP" udp -m "$mode" -t 1 -n 32
			$0 "$IP" tcp -m "$mode" -t 1 -n 32
			$0 "$IP" tcp -m "$mode" -t 1 -n 1
		done
		done
	done
	done