Commit f6f360ae authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "This one looks a bit bigger than it is, but that's mainly because 2/3
  of it is enabling IORING_OP_CLOSE to close direct file descriptors.

  We've had a few folks using them and finding it confusing that the way
  to close them is through using -1 for file update, this just brings
  API symmetry for direct descriptors. Hence I think we should just do
  this now and have a better API for 5.15 release. There's some room for
  de-duplicating the close code, but we're leaving that for the next
  merge window.

  Outside of that, just small fixes:

   - Poll race fixes (Hao)

   - io-wq core dump exit fix (me)

   - Reschedule around potentially intensive tctx and buffer iterators
     on teardown (me)

   - Fix for always ending up punting files update to io-wq (me)

   - Put the provided buffer meta data under memcg accounting (me)

   - Tweak for io_write(), removing dead code that was added with the
     iterator changes in this release (Pavel)"

* tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
  io_uring: make OP_CLOSE consistent with direct open
  io_uring: kill extra checks in io_write()
  io_uring: don't punt files update to io-wq unconditionally
  io_uring: put provided buffer meta data under memcg accounting
  io_uring: allow conditional reschedule for intensive iterators
  io_uring: fix potential req refcount underflow
  io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow
  io_uring: fix race between poll completion and cancel_hash insertion
  io-wq: ensure we exit if thread group is exiting
parents 2d70de4e 7df778be
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -584,7 +584,8 @@ static int io_wqe_worker(void *data)

			if (!get_signal(&ksig))
				continue;
			if (fatal_signal_pending(current))
			if (fatal_signal_pending(current) ||
			    signal_group_exit(current->signal))
				break;
			continue;
		}
+70 −15
Original line number Diff line number Diff line
@@ -502,6 +502,7 @@ struct io_poll_update {
struct io_close {
	struct file			*file;
	int				fd;
	u32				file_slot;
};

struct io_timeout_data {
@@ -1098,6 +1099,8 @@ static int io_req_prep_async(struct io_kiocb *req);

static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
				 unsigned int issue_flags, u32 slot_index);
static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);

static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);

static struct kmem_cache *req_cachep;
@@ -3605,7 +3608,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
		iov_iter_save_state(iter, state);
	}
	req->result = iov_iter_count(iter);
	ret2 = 0;

	/* Ensure we clear previously set non-block flag */
	if (!force_nonblock)
@@ -3670,8 +3672,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
	} else {
copy_iov:
		iov_iter_restore(iter, state);
		if (ret2 > 0)
			iov_iter_advance(iter, ret2);
		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
		return ret ?: -EAGAIN;
	}
@@ -4387,7 +4387,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
	int i, bid = pbuf->bid;

	for (i = 0; i < pbuf->nbufs; i++) {
		buf = kmalloc(sizeof(*buf), GFP_KERNEL);
		buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
		if (!buf)
			break;

@@ -4594,12 +4594,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;
	if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
	    sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
	    sqe->rw_flags || sqe->buf_index)
		return -EINVAL;
	if (req->flags & REQ_F_FIXED_FILE)
		return -EBADF;

	req->close.fd = READ_ONCE(sqe->fd);
	req->close.file_slot = READ_ONCE(sqe->file_index);
	if (req->close.file_slot && req->close.fd)
		return -EINVAL;

	return 0;
}

@@ -4611,6 +4615,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
	struct file *file = NULL;
	int ret = -EBADF;

	if (req->close.file_slot) {
		ret = io_close_fixed(req, issue_flags);
		goto err;
	}

	spin_lock(&files->file_lock);
	fdt = files_fdtable(files);
	if (close->fd >= fdt->max_fds) {
@@ -5338,7 +5347,7 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
	if (req->poll.events & EPOLLONESHOT)
		flags = 0;
	if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
		req->poll.done = true;
		req->poll.events |= EPOLLONESHOT;
		flags = 0;
	}
	if (flags & IORING_CQE_F_MORE)
@@ -5367,10 +5376,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
	} else {
		bool done;

		if (req->poll.done) {
			spin_unlock(&ctx->completion_lock);
			return;
		}
		done = __io_poll_complete(req, req->result);
		if (done) {
			io_poll_remove_double(req);
			hash_del(&req->hash_node);
			req->poll.done = true;
		} else {
			req->result = 0;
			add_wait_queue(req->poll.head, &req->poll.wait);
@@ -5508,6 +5522,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked)

	hash_del(&req->hash_node);
	io_poll_remove_double(req);
	apoll->poll.done = true;
	spin_unlock(&ctx->completion_lock);

	if (!READ_ONCE(apoll->poll.canceled))
@@ -5828,6 +5843,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
	struct io_ring_ctx *ctx = req->ctx;
	struct io_poll_table ipt;
	__poll_t mask;
	bool done;

	ipt.pt._qproc = io_poll_queue_proc;

@@ -5836,13 +5852,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)

	if (mask) { /* no async, we'd stolen it */
		ipt.error = 0;
		io_poll_complete(req, mask);
		done = io_poll_complete(req, mask);
	}
	spin_unlock(&ctx->completion_lock);

	if (mask) {
		io_cqring_ev_posted(ctx);
		if (poll->events & EPOLLONESHOT)
		if (done)
			io_put_req(req);
	}
	return ipt.error;
@@ -6333,19 +6349,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
	struct io_uring_rsrc_update2 up;
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;

	up.offset = req->rsrc_update.offset;
	up.data = req->rsrc_update.arg;
	up.nr = 0;
	up.tags = 0;
	up.resv = 0;

	mutex_lock(&ctx->uring_lock);
	io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
	ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
					&up, req->rsrc_update.nr_args);
	mutex_unlock(&ctx->uring_lock);
	io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));

	if (ret < 0)
		req_set_fail(req);
@@ -8400,6 +8413,44 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
	return ret;
}

static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
{
	unsigned int offset = req->close.file_slot - 1;
	struct io_ring_ctx *ctx = req->ctx;
	struct io_fixed_file *file_slot;
	struct file *file;
	int ret, i;

	io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
	ret = -ENXIO;
	if (unlikely(!ctx->file_data))
		goto out;
	ret = -EINVAL;
	if (offset >= ctx->nr_user_files)
		goto out;
	ret = io_rsrc_node_switch_start(ctx);
	if (ret)
		goto out;

	i = array_index_nospec(offset, ctx->nr_user_files);
	file_slot = io_fixed_file_slot(&ctx->file_table, i);
	ret = -EBADF;
	if (!file_slot->file_ptr)
		goto out;

	file = (struct file *)(file_slot->file_ptr & FFS_MASK);
	ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
	if (ret)
		goto out;

	file_slot->file_ptr = 0;
	io_rsrc_node_switch(ctx, ctx->file_data);
	ret = 0;
out:
	io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
	return ret;
}

static int __io_sqe_files_update(struct io_ring_ctx *ctx,
				 struct io_uring_rsrc_update2 *up,
				 unsigned nr_args)
@@ -9166,8 +9217,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
	struct io_buffer *buf;
	unsigned long index;

	xa_for_each(&ctx->io_buffers, index, buf)
	xa_for_each(&ctx->io_buffers, index, buf) {
		__io_remove_buffers(ctx, buf, index, -1U);
		cond_resched();
	}
}

static void io_req_cache_free(struct list_head *list)
@@ -9665,8 +9718,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
	struct io_tctx_node *node;
	unsigned long index;

	xa_for_each(&tctx->xa, index, node)
	xa_for_each(&tctx->xa, index, node) {
		io_uring_del_tctx_node(index);
		cond_resched();
	}
	if (wq) {
		/*
		 * Must be after io_uring_del_task_file() (removes nodes under