Commit a88c3869 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - syzbot report on a crash on 32-bit arm with highmem, and went digging
   to check for potentially similar issues and found one more (me)

 - Fix a syzbot report with PROVE_LOCKING=y and setting up the ring in a
   disabled state (me)

 - Fix for race with CPU hotplut and io-wq init (Jeff)

* tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux:
  io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls()
  io_uring: don't allow IORING_SETUP_NO_MMAP rings on highmem pages
  io_uring: ensure io_lockdep_assert_cq_locked() handles disabled rings
  io_uring/kbuf: don't allow registered buffer rings on highmem pages
parents af95dc6f 0f8baa3c
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
	wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL);
	if (!wq)
		return ERR_PTR(-ENOMEM);
	ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
	if (ret)
		goto err_wq;

	refcount_inc(&data->hash->refs);
	wq->hash = data->hash;
@@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
	wq->task = get_task_struct(data->task);
	atomic_set(&wq->worker_refs, 1);
	init_completion(&wq->worker_done);
	ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
	if (ret)
		goto err;

	return wq;
err:
	io_wq_put_hash(data->hash);
	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);

	free_cpumask_var(wq->cpu_mask);
err_wq:
	kfree(wq);
	return ERR_PTR(ret);
}
+15 −1
Original line number Diff line number Diff line
@@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
{
	struct page **page_array;
	unsigned int nr_pages;
	int ret;
	int ret, i;

	*npages = 0;

@@ -2716,6 +2716,20 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
	 */
	if (page_array[0] != page_array[ret - 1])
		goto err;

	/*
	 * Can't support mapping user allocated ring memory on 32-bit archs
	 * where it could potentially reside in highmem. Just fail those with
	 * -EINVAL, just like we did on kernels that didn't support this
	 * feature.
	 */
	for (i = 0; i < nr_pages; i++) {
		if (PageHighMem(page_array[i])) {
			ret = -EINVAL;
			goto err;
		}
	}

	*pages = page_array;
	*npages = nr_pages;
	return page_to_virt(page_array[0]);
+27 −14
Original line number Diff line number Diff line
@@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
			bool cancel_all);

#define io_lockdep_assert_cq_locked(ctx)				\
	do {								\
		lockdep_assert(in_task());				\
									\
		if (ctx->flags & IORING_SETUP_IOPOLL) {			\
			lockdep_assert_held(&ctx->uring_lock);		\
		} else if (!ctx->task_complete) {			\
			lockdep_assert_held(&ctx->completion_lock);	\
		} else if (ctx->submitter_task->flags & PF_EXITING) {	\
			lockdep_assert(current_work());			\
		} else {						\
			lockdep_assert(current == ctx->submitter_task);	\
		}							\
	} while (0)
#if defined(CONFIG_PROVE_LOCKING)
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
{
	lockdep_assert(in_task());

	if (ctx->flags & IORING_SETUP_IOPOLL) {
		lockdep_assert_held(&ctx->uring_lock);
	} else if (!ctx->task_complete) {
		lockdep_assert_held(&ctx->completion_lock);
	} else if (ctx->submitter_task) {
		/*
		 * ->submitter_task may be NULL and we can still post a CQE,
		 * if the ring has been setup with IORING_SETUP_R_DISABLED.
		 * Not from an SQE, as those cannot be submitted, but via
		 * updating tagged resources.
		 */
		if (ctx->submitter_task->flags & PF_EXITING)
			lockdep_assert(current_work());
		else
			lockdep_assert(current == ctx->submitter_task);
	}
}
#else
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
{
}
#endif

static inline void io_req_task_work_add(struct io_kiocb *req)
{
+19 −8
Original line number Diff line number Diff line
@@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
{
	struct io_uring_buf_ring *br;
	struct page **pages;
	int nr_pages;
	int i, nr_pages;

	pages = io_pin_pages(reg->ring_addr,
			     flex_array_size(br, bufs, reg->ring_entries),
@@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	if (IS_ERR(pages))
		return PTR_ERR(pages);

	/*
	 * Apparently some 32-bit boxes (ARM) will return highmem pages,
	 * which then need to be mapped. We could support that, but it'd
	 * complicate the code and slowdown the common cases quite a bit.
	 * So just error out, returning -EINVAL just like we did on kernels
	 * that didn't support mapped buffer rings.
	 */
	for (i = 0; i < nr_pages; i++)
		if (PageHighMem(pages[i]))
			goto error_unpin;

	br = page_address(pages[0]);
#ifdef SHM_COLOUR
	/*
@@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
	 * this transparently.
	 */
	if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
		int i;

		for (i = 0; i < nr_pages; i++)
			unpin_user_page(pages[i]);
		return -EINVAL;
	}
	if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
		goto error_unpin;
#endif
	bl->buf_pages = pages;
	bl->buf_nr_pages = nr_pages;
@@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	bl->is_mapped = 1;
	bl->is_mmap = 0;
	return 0;
error_unpin:
	for (i = 0; i < nr_pages; i++)
		unpin_user_page(pages[i]);
	kvfree(pages);
	return -EINVAL;
}

static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,