Commit 0bc7eb03 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "Mostly fixes for regressions in this cycle, but also a few fixes that
  predate this release.

  The odd one out is a tweak to the direct files added in this release,
  where attempting to reuse a slot is allowed instead of needing an
  explicit removal of that slot first. It's a considerable improvement
  in usability to that API, hence I'm sending it for -rc2.

   - io-wq race fix and cleanup (Hao)

   - loop_rw_iter() type fix

   - SQPOLL max worker race fix

   - Allow poll arm for O_NONBLOCK files, fixing a case where it's
     impossible to properly use io_uring if you cannot modify the file
     flags

   - Allow direct open to simply reuse a slot, instead of needing it
     explicitly removed first (Pavel)

   - Fix a case where we missed signal mask restoring in cqring_wait, if
     we hit -EFAULT (Xiaoguang)"

* tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block:
  io_uring: allow retry for O_NONBLOCK if async is supported
  io_uring: auto-removal for direct open/accept
  io_uring: fix missing sigmask restore in io_cqring_wait()
  io_uring: pin SQPOLL data before unlocking ring lock
  io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS arg items
  io-wq: fix potential race of acct->nr_workers
  io-wq: code clean of io_wqe_create_worker()
  io_uring: ensure symmetry in handling iter types in loop_rw_iter()
parents 36d6753b 5d329e12
Loading
Loading
Loading
Loading
+13 −14
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
#include <uapi/linux/io_uring.h>

#include "io-wq.h"

@@ -176,7 +177,6 @@ static void io_worker_ref_put(struct io_wq *wq)
static void io_worker_exit(struct io_worker *worker)
{
	struct io_wqe *wqe = worker->wqe;
	struct io_wqe_acct *acct = io_wqe_get_acct(worker);

	if (refcount_dec_and_test(&worker->ref))
		complete(&worker->ref_done);
@@ -186,7 +186,6 @@ static void io_worker_exit(struct io_worker *worker)
	if (worker->flags & IO_WORKER_F_FREE)
		hlist_nulls_del_rcu(&worker->nulls_node);
	list_del_rcu(&worker->all_list);
	acct->nr_workers--;
	preempt_disable();
	io_wqe_dec_running(worker);
	worker->flags = 0;
@@ -246,8 +245,6 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
 */
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
{
	bool do_create = false;

	/*
	 * Most likely an attempt to queue unbounded work on an io_wq that
	 * wasn't setup with any unbounded workers.
@@ -256,20 +253,17 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
		pr_warn_once("io-wq is not configured for unbound workers");

	raw_spin_lock(&wqe->lock);
	if (acct->nr_workers < acct->max_workers) {
		acct->nr_workers++;
		do_create = true;
	if (acct->nr_workers == acct->max_workers) {
		raw_spin_unlock(&wqe->lock);
		return true;
	}
	acct->nr_workers++;
	raw_spin_unlock(&wqe->lock);
	if (do_create) {
	atomic_inc(&acct->nr_running);
	atomic_inc(&wqe->wq->worker_refs);
	return create_io_worker(wqe->wq, wqe, acct->index);
}

	return true;
}

static void io_wqe_inc_running(struct io_worker *worker)
{
	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
@@ -574,6 +568,7 @@ static int io_wqe_worker(void *data)
		}
		/* timed out, exit unless we're the last worker */
		if (last_timeout && acct->nr_workers > 1) {
			acct->nr_workers--;
			raw_spin_unlock(&wqe->lock);
			__set_current_state(TASK_RUNNING);
			break;
@@ -1287,6 +1282,10 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
{
	int i, node, prev = 0;

	BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND   != (int) IO_WQ_BOUND);
	BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
	BUILD_BUG_ON((int) IO_WQ_ACCT_NR      != 2);

	for (i = 0; i < 2; i++) {
		if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
			new_count[i] = task_rlimit(current, RLIMIT_NPROC);
+68 −37
Original line number Diff line number Diff line
@@ -2843,7 +2843,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
	return __io_file_supports_nowait(req->file, rw);
}

static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
		      int rw)
{
	struct io_ring_ctx *ctx = req->ctx;
	struct kiocb *kiocb = &req->rw.kiocb;
@@ -2865,8 +2866,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	if (unlikely(ret))
		return ret;

	/* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */
	if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK))
	/*
	 * If the file is marked O_NONBLOCK, still allow retry for it if it
	 * supports async. Otherwise it's impossible to use O_NONBLOCK files
	 * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
	 */
	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
	    ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
		req->flags |= REQ_F_NOWAIT;

	ioprio = READ_ONCE(sqe->ioprio);
@@ -3263,12 +3269,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
				ret = nr;
			break;
		}
		if (!iov_iter_is_bvec(iter)) {
			iov_iter_advance(iter, nr);
		} else {
			req->rw.len -= nr;
			req->rw.addr += nr;
		}
		ret += nr;
		if (nr != iovec.iov_len)
			break;
		req->rw.len -= nr;
		req->rw.addr += nr;
		iov_iter_advance(iter, nr);
	}

	return ret;
@@ -3346,7 +3355,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	if (unlikely(!(req->file->f_mode & FMODE_READ)))
		return -EBADF;
	return io_prep_rw(req, sqe);
	return io_prep_rw(req, sqe, READ);
}

/*
@@ -3539,7 +3548,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
		return -EBADF;
	return io_prep_rw(req, sqe);
	return io_prep_rw(req, sqe, WRITE);
}

static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -7515,6 +7524,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
			break;
	} while (1);

	if (uts) {
		struct timespec64 ts;

		if (get_timespec64(&ts, uts))
			return -EFAULT;
		timeout = timespec64_to_jiffies(&ts);
	}

	if (sig) {
#ifdef CONFIG_COMPAT
		if (in_compat_syscall())
@@ -7528,14 +7545,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
			return ret;
	}

	if (uts) {
		struct timespec64 ts;

		if (get_timespec64(&ts, uts))
			return -EFAULT;
		timeout = timespec64_to_jiffies(&ts);
	}

	init_waitqueue_func_entry(&iowq.wq, io_wake_function);
	iowq.wq.private = current;
	INIT_LIST_HEAD(&iowq.wq.entry);
@@ -8284,11 +8293,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
#endif
}

static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
				 struct io_rsrc_node *node, void *rsrc)
{
	struct io_rsrc_put *prsrc;

	prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
	if (!prsrc)
		return -ENOMEM;

	prsrc->tag = *io_get_tag_slot(data, idx);
	prsrc->rsrc = rsrc;
	list_add(&prsrc->list, &node->rsrc_list);
	return 0;
}

static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
				 unsigned int issue_flags, u32 slot_index)
{
	struct io_ring_ctx *ctx = req->ctx;
	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
	bool needs_switch = false;
	struct io_fixed_file *file_slot;
	int ret = -EBADF;

@@ -8304,9 +8329,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,

	slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
	file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
	ret = -EBADF;
	if (file_slot->file_ptr)

	if (file_slot->file_ptr) {
		struct file *old_file;

		ret = io_rsrc_node_switch_start(ctx);
		if (ret)
			goto err;

		old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
		ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
					    ctx->rsrc_node, old_file);
		if (ret)
			goto err;
		file_slot->file_ptr = 0;
		needs_switch = true;
	}

	*io_get_tag_slot(ctx->file_data, slot_index) = 0;
	io_fixed_file_set(file_slot, file);
@@ -8318,27 +8356,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,

	ret = 0;
err:
	if (needs_switch)
		io_rsrc_node_switch(ctx, ctx->file_data);
	io_ring_submit_unlock(ctx, !force_nonblock);
	if (ret)
		fput(file);
	return ret;
}

static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
				 struct io_rsrc_node *node, void *rsrc)
{
	struct io_rsrc_put *prsrc;

	prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
	if (!prsrc)
		return -ENOMEM;

	prsrc->tag = *io_get_tag_slot(data, idx);
	prsrc->rsrc = rsrc;
	list_add(&prsrc->list, &node->rsrc_list);
	return 0;
}

static int __io_sqe_files_update(struct io_ring_ctx *ctx,
				 struct io_uring_rsrc_update2 *up,
				 unsigned nr_args)
@@ -10560,9 +10585,11 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
			 * ordering. Fine to drop uring_lock here, we hold
			 * a ref to the ctx.
			 */
			refcount_inc(&sqd->refs);
			mutex_unlock(&ctx->uring_lock);
			mutex_lock(&sqd->lock);
			mutex_lock(&ctx->uring_lock);
			if (sqd->thread)
				tctx = sqd->thread->io_uring;
		}
	} else {
@@ -10577,16 +10604,20 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
	if (ret)
		goto err;

	if (sqd)
	if (sqd) {
		mutex_unlock(&sqd->lock);
		io_put_sq_data(sqd);
	}

	if (copy_to_user(arg, new_count, sizeof(new_count)))
		return -EFAULT;

	return 0;
err:
	if (sqd)
	if (sqd) {
		mutex_unlock(&sqd->lock);
		io_put_sq_data(sqd);
	}
	return ret;
}

+7 −1
Original line number Diff line number Diff line
@@ -317,13 +317,19 @@ enum {
	IORING_REGISTER_IOWQ_AFF		= 17,
	IORING_UNREGISTER_IOWQ_AFF		= 18,

	/* set/get max number of workers */
	/* set/get max number of io-wq workers */
	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,

	/* this goes last */
	IORING_REGISTER_LAST
};

/* io-wq worker categories */
enum {
	IO_WQ_BOUND,
	IO_WQ_UNBOUND,
};

/* deprecated, see struct io_uring_rsrc_update */
struct io_uring_files_update {
	__u32 offset;