Commit c98c70ed authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.1-2022-10-13' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:
 "A collection of fixes that ended up either being later than the
  initial pull, or dependent on multiple branches (6.0-late being one of
  them) and hence deferred purposely. This contains:

   - Cleanup fixes for the single submitter late 6.0 change, which we
     pushed to 6.1 to keep the 6.0 changes small (Dylan, Pavel)

   - Fix for IORING_OP_CONNECT not handling -EINPROGRESS correctly (me)

   - Ensure that the zc sendmsg variant gets audited correctly (me)

   - Regression fix from this merge window where kiocb_end_write()
     doesn't always gets called, which can cause issues with fs freezing
     (me)

   - Registered files SCM handling fix (Pavel)

   - Regression fix for big sqe dumping in fdinfo (Pavel)

   - Registered buffers accounting fix (Pavel)

   - Remove leftover notification structures, we killed them off late in
     6.0 (Pavel)

   - Minor optimizations (Pavel)

   - Cosmetic variable shadowing fix (Stefan)"

* tag 'io_uring-6.1-2022-10-13' of git://git.kernel.dk/linux:
  io_uring/rw: ensure kiocb_end_write() is always called
  io_uring: fix fdinfo sqe offsets calculation
  io_uring: local variable rw shadows outer variable in io_write
  io_uring/opdef: remove 'audit_skip' from SENDMSG_ZC
  io_uring: optimise locking for local tw with submit_wait
  io_uring: remove redundant memory barrier in io_req_local_work_add
  io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT
  io_uring: remove notif leftovers
  io_uring: correct pinned_vm accounting
  io_uring/af_unix: defer registered files gc to io_uring release
  io_uring: limit registration w/ SINGLE_ISSUER
  io_uring: remove io_register_submitter
  io_uring: simplify __io_uring_add_tctx_node
parents 6d84c258 2ec33a6c
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
@@ -34,9 +34,6 @@ struct io_file_table {
	unsigned int alloc_hint;
};

struct io_notif;
struct io_notif_slot;

struct io_hash_bucket {
	spinlock_t		lock;
	struct hlist_head	list;
@@ -242,8 +239,6 @@ struct io_ring_ctx {
		unsigned		nr_user_files;
		unsigned		nr_user_bufs;
		struct io_mapped_ubuf	**user_bufs;
		struct io_notif_slot	*notif_slots;
		unsigned		nr_notif_slots;

		struct io_submit_state	submit_state;

+2 −0
Original line number Diff line number Diff line
@@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
 *	@csum_level: indicates the number of consecutive checksums found in
 *		the packet minus one that have been verified as
 *		CHECKSUM_UNNECESSARY (max 3)
 *	@scm_io_uring: SKB holds io_uring registered files
 *	@dst_pending_confirm: need to confirm neighbour
 *	@decrypted: Decrypted SKB
 *	@slow_gro: state present at GRO time, slower prepare step required
@@ -982,6 +983,7 @@ struct sk_buff {
#endif
	__u8			slow_gro:1;
	__u8			csum_not_inet:1;
	__u8			scm_io_uring:1;

#ifdef CONFIG_NET_SCHED
	__u16			tc_index;	/* traffic control index */
+1 −1
Original line number Diff line number Diff line
@@ -94,7 +94,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
		sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
		if (sq_idx > sq_mask)
			continue;
		sqe = &ctx->sq_sqes[sq_idx << 1];
		sqe = &ctx->sq_sqes[sq_idx << sq_shift];
		seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
			      "addr:0x%llx, rw_flags:0x%x, buf_index:%d "
			      "user_data:%llu",
+21 −12
Original line number Diff line number Diff line
@@ -1106,6 +1106,8 @@ static void io_req_local_work_add(struct io_kiocb *req)

	if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
		return;
	/* need it for the following io_cqring_wake() */
	smp_mb__after_atomic();

	if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
		io_move_task_work_from_local(ctx);
@@ -1117,8 +1119,7 @@ static void io_req_local_work_add(struct io_kiocb *req)

	if (ctx->has_evfd)
		io_eventfd_signal(ctx);
	io_cqring_wake(ctx);

	__io_cqring_wake(ctx);
}

static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
@@ -2585,12 +2586,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
	io_sq_thread_finish(ctx);

	if (ctx->mm_account) {
		mmdrop(ctx->mm_account);
		ctx->mm_account = NULL;
	}

	io_rsrc_refs_drop(ctx);
	/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
	io_wait_rsrc_data(ctx->buf_data);
@@ -2631,8 +2626,11 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
	}
#endif
	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
	WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);

	if (ctx->mm_account) {
		mmdrop(ctx->mm_account);
		ctx->mm_account = NULL;
	}
	io_mem_free(ctx->rings);
	io_mem_free(ctx->sq_sqes);

@@ -3229,8 +3227,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
			mutex_unlock(&ctx->uring_lock);
			goto out;
		}
		if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
		if (flags & IORING_ENTER_GETEVENTS) {
			if (ctx->syscall_iopoll)
				goto iopoll_locked;
			/*
			 * Ignore errors, we'll soon call io_cqring_wait() and
			 * it should handle ownership problems if any.
			 */
			if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
				(void)io_run_local_work_locked(ctx);
		}
		mutex_unlock(&ctx->uring_lock);
	}

@@ -3355,7 +3361,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
	if (fd < 0)
		return fd;

	ret = __io_uring_add_tctx_node(ctx, false);
	ret = __io_uring_add_tctx_node(ctx);
	if (ret) {
		put_unused_fd(fd);
		return ret;
@@ -3890,6 +3896,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
	if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
		return -ENXIO;

	if (ctx->submitter_task && ctx->submitter_task != current)
		return -EEXIST;

	if (ctx->restricted) {
		if (opcode >= IORING_REGISTER_LAST)
			return -EINVAL;
+16 −2
Original line number Diff line number Diff line
@@ -203,17 +203,24 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx)
	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
}

static inline void io_cqring_wake(struct io_ring_ctx *ctx)
/* requires smb_mb() prior, see wq_has_sleeper() */
static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
{
	/*
	 * wake_up_all() may seem excessive, but io_wake_function() and
	 * io_should_wake() handle the termination of the loop and only
	 * wake as many waiters as we need to.
	 */
	if (wq_has_sleeper(&ctx->cq_wait))
	if (waitqueue_active(&ctx->cq_wait))
		wake_up_all(&ctx->cq_wait);
}

static inline void io_cqring_wake(struct io_ring_ctx *ctx)
{
	smp_mb();
	__io_cqring_wake(ctx);
}

static inline bool io_sqring_full(struct io_ring_ctx *ctx)
{
	struct io_rings *r = ctx->rings;
@@ -268,6 +275,13 @@ static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
	return ret;
}

static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
{
	if (llist_empty(&ctx->work_llist))
		return 0;
	return __io_run_local_work(ctx, true);
}

static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
{
	if (!*locked) {
Loading