Commit b44d1ddc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - Use thread info versions of flag testing, as discussed last week.

 - The series enabling PF_IO_WORKER to just take signals, instead of
   needing to special case that they do not in a bunch of places. Ends
   up being pretty trivial to do, and then we can revert all the special
   casing we're currently doing.

 - Kill dead pointer assignment

 - Fix hashed part of async work queue trace

 - Fix sign extension issue for IORING_OP_PROVIDE_BUFFERS

 - Fix a link completion ordering regression in this merge window

 - Cancellation fixes

* tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block:
  io_uring: remove unsued assignment to pointer io
  io_uring: don't cancel extra on files match
  io_uring: don't cancel-track common timeouts
  io_uring: do post-completion chore on t-out cancel
  io_uring: fix timeout cancel return code
  Revert "signal: don't allow STOP on PF_IO_WORKER threads"
  Revert "kernel: freezer should treat PF_IO_WORKER like PF_KTHREAD for freezing"
  Revert "kernel: treat PF_IO_WORKER like PF_KTHREAD for ptrace/signals"
  Revert "signal: don't allow sending any signals to PF_IO_WORKER threads"
  kernel: stop masking signals in create_io_thread()
  io_uring: handle signals for IO threads like a normal thread
  kernel: don't call do_exit() for PF_IO_WORKER threads
  io_uring: maintain CQE order of a failed link
  io-wq: fix race around pending work on teardown
  io_uring: do ctx sqd ejection in a clear context
  io_uring: fix provide_buffers sign extension
  io_uring: don't skip file_end_write() on reissue
  io_uring: correct io_queue_async_work() traces
  io_uring: don't use {test,clear}_tsk_thread_flag() for current
parents abed516e 2b8ed1c9
Loading
Loading
Loading
Loading
+21 −11
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
#include <linux/freezer.h>

#include "../kernel/sched/sched.h"
#include "io-wq.h"
@@ -388,11 +387,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)

static bool io_flush_signals(void)
{
	if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
	if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
		__set_current_state(TASK_RUNNING);
		if (current->task_works)
			task_work_run();
		clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
		tracehook_notify_signal();
		return true;
	}
	return false;
@@ -505,10 +502,15 @@ static int io_wqe_worker(void *data)
		if (io_flush_signals())
			continue;
		ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
		if (try_to_freeze() || ret)
		if (signal_pending(current)) {
			struct ksignal ksig;

			if (!get_signal(&ksig))
				continue;
		if (fatal_signal_pending(current))
			break;
		}
		if (ret)
			continue;
		/* timed out, exit unless we're the fixed worker */
		if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
		    !(worker->flags & IO_WORKER_F_FIXED))
@@ -716,9 +718,13 @@ static int io_wq_manager(void *data)
		set_current_state(TASK_INTERRUPTIBLE);
		io_wq_check_workers(wq);
		schedule_timeout(HZ);
		try_to_freeze();
		if (fatal_signal_pending(current))
		if (signal_pending(current)) {
			struct ksignal ksig;

			if (!get_signal(&ksig))
				continue;
			set_bit(IO_WQ_BIT_EXIT, &wq->state);
		}
	} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));

	io_wq_check_workers(wq);
@@ -1065,7 +1071,11 @@ static void io_wq_destroy(struct io_wq *wq)

	for_each_node(node) {
		struct io_wqe *wqe = wq->wqes[node];
		WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
		struct io_cb_cancel_data match = {
			.fn		= io_wq_work_match_all,
			.cancel_all	= true,
		};
		io_wqe_cancel_pending_work(wqe, &match);
		kfree(wqe);
	}
	io_wq_put_hash(wq->hash);
+51 −47
Original line number Diff line number Diff line
@@ -78,7 +78,6 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
#include <linux/freezer.h>

#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -1095,8 +1094,6 @@ static bool io_match_task(struct io_kiocb *head,
	io_for_each_link(req, head) {
		if (req->flags & REQ_F_INFLIGHT)
			return true;
		if (req->task->files == files)
			return true;
	}
	return false;
}
@@ -1239,16 +1236,16 @@ static void io_queue_async_work(struct io_kiocb *req)
	BUG_ON(!tctx);
	BUG_ON(!tctx->io_wq);

	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
					&req->work, req->flags);
	/* init ->work of the whole link before punting */
	io_prep_async_link(req);
	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
					&req->work, req->flags);
	io_wq_enqueue(tctx->io_wq, &req->work);
	if (link)
		io_queue_linked_timeout(link);
}

static void io_kill_timeout(struct io_kiocb *req)
static void io_kill_timeout(struct io_kiocb *req, int status)
{
	struct io_timeout_data *io = req->async_data;
	int ret;
@@ -1258,31 +1255,11 @@ static void io_kill_timeout(struct io_kiocb *req)
		atomic_set(&req->ctx->cq_timeouts,
			atomic_read(&req->ctx->cq_timeouts) + 1);
		list_del_init(&req->timeout.list);
		io_cqring_fill_event(req, 0);
		io_cqring_fill_event(req, status);
		io_put_req_deferred(req, 1);
	}
}

/*
 * Returns true if we found and killed one or more timeouts
 */
static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
			     struct files_struct *files)
{
	struct io_kiocb *req, *tmp;
	int canceled = 0;

	spin_lock_irq(&ctx->completion_lock);
	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
		if (io_match_task(req, tsk, files)) {
			io_kill_timeout(req);
			canceled++;
		}
	}
	spin_unlock_irq(&ctx->completion_lock);
	return canceled != 0;
}

static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
	do {
@@ -1327,7 +1304,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
			break;

		list_del_init(&req->timeout.list);
		io_kill_timeout(req);
		io_kill_timeout(req, 0);
	} while (!list_empty(&ctx->timeout_list));

	ctx->cq_last_tm_flush = seq;
@@ -2524,13 +2501,12 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
	int cflags = 0;

	if (req->rw.kiocb.ki_flags & IOCB_WRITE)
		kiocb_end_write(req);
	if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
		return;
	if (res != req->result)
		req_set_fail_links(req);

	if (req->rw.kiocb.ki_flags & IOCB_WRITE)
		kiocb_end_write(req);
	if (req->flags & REQ_F_BUFFER_SELECTED)
		cflags = io_put_rw_kbuf(req);
	__io_req_complete(req, issue_flags, res, cflags);
@@ -3978,6 +3954,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_provide_buffers_prep(struct io_kiocb *req,
				   const struct io_uring_sqe *sqe)
{
	unsigned long size;
	struct io_provide_buf *p = &req->pbuf;
	u64 tmp;

@@ -3991,7 +3968,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
	p->addr = READ_ONCE(sqe->addr);
	p->len = READ_ONCE(sqe->len);

	if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
	size = (unsigned long)p->len * p->nbufs;
	if (!access_ok(u64_to_user_ptr(p->addr), size))
		return -EFAULT;

	p->bgid = READ_ONCE(sqe->buf_group);
@@ -4820,7 +4798,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
			ret = -ENOMEM;
			goto out;
		}
		io = req->async_data;
		memcpy(req->async_data, &__io, sizeof(__io));
		return -EAGAIN;
	}
@@ -5583,6 +5560,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,

	data->mode = io_translate_timeout_mode(flags);
	hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
	if (is_timeout_link)
		io_req_track_inflight(req);
	return 0;
}
@@ -6479,8 +6457,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
	ret = io_init_req(ctx, req, sqe);
	if (unlikely(ret)) {
fail_req:
		io_put_req(req);
		io_req_complete(req, ret);
		if (link->head) {
			/* fail even hard links since we don't submit */
			link->head->flags |= REQ_F_FAIL_LINK;
@@ -6488,6 +6464,8 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
			io_req_complete(link->head, -ECANCELED);
			link->head = NULL;
		}
		io_put_req(req);
		io_req_complete(req, ret);
		return ret;
	}
	ret = io_req_prep(req, sqe);
@@ -6764,8 +6742,13 @@ static int io_sq_thread(void *data)
			timeout = jiffies + sqd->sq_thread_idle;
			continue;
		}
		if (fatal_signal_pending(current))
		if (signal_pending(current)) {
			struct ksignal ksig;

			if (!get_signal(&ksig))
				continue;
			break;
		}
		sqt_spin = false;
		cap_entries = !list_is_singular(&sqd->ctx_list);
		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -6808,7 +6791,6 @@ static int io_sq_thread(void *data)

			mutex_unlock(&sqd->lock);
			schedule();
			try_to_freeze();
			mutex_lock(&sqd->lock);
			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
				io_ring_clear_wakeup_flag(ctx);
@@ -6873,7 +6855,7 @@ static int io_run_task_work_sig(void)
		return 1;
	if (!signal_pending(current))
		return 0;
	if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
	if (test_thread_flag(TIF_NOTIFY_SIGNAL))
		return -ERESTARTSYS;
	return -EINTR;
}
@@ -8563,6 +8545,14 @@ static void io_ring_exit_work(struct work_struct *work)
	struct io_tctx_node *node;
	int ret;

	/* prevent SQPOLL from submitting new requests */
	if (ctx->sq_data) {
		io_sq_thread_park(ctx->sq_data);
		list_del_init(&ctx->sqd_list);
		io_sqd_update_thread_idle(ctx->sq_data);
		io_sq_thread_unpark(ctx->sq_data);
	}

	/*
	 * If we're doing polled IO and end up having requests being
	 * submitted async (out-of-line), then completions can come in while
@@ -8599,6 +8589,28 @@ static void io_ring_exit_work(struct work_struct *work)
	io_ring_ctx_free(ctx);
}

/* Returns true if we found and killed one or more timeouts */
static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
			     struct files_struct *files)
{
	struct io_kiocb *req, *tmp;
	int canceled = 0;

	spin_lock_irq(&ctx->completion_lock);
	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
		if (io_match_task(req, tsk, files)) {
			io_kill_timeout(req, -ECANCELED);
			canceled++;
		}
	}
	io_commit_cqring(ctx);
	spin_unlock_irq(&ctx->completion_lock);

	if (canceled != 0)
		io_cqring_ev_posted(ctx);
	return canceled != 0;
}

static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
	unsigned long index;
@@ -8614,14 +8626,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
		io_unregister_personality(ctx, index);
	mutex_unlock(&ctx->uring_lock);

	/* prevent SQPOLL from submitting new requests */
	if (ctx->sq_data) {
		io_sq_thread_park(ctx->sq_data);
		list_del_init(&ctx->sqd_list);
		io_sqd_update_thread_idle(ctx->sq_data);
		io_sq_thread_unpark(ctx->sq_data);
	}

	io_kill_timeouts(ctx, NULL, NULL);
	io_poll_remove_all(ctx, NULL, NULL);

+8 −8
Original line number Diff line number Diff line
@@ -1948,8 +1948,14 @@ static __latent_entropy struct task_struct *copy_process(
	p = dup_task_struct(current, node);
	if (!p)
		goto fork_out;
	if (args->io_thread)
	if (args->io_thread) {
		/*
		 * Mark us an IO worker, and block any signal that isn't
		 * fatal or STOP
		 */
		p->flags |= PF_IO_WORKER;
		siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
	}

	/*
	 * This _must_ happen before we call free_task(), i.e. before we jump
@@ -2438,14 +2444,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
		.stack_size	= (unsigned long)arg,
		.io_thread	= 1,
	};
	struct task_struct *tsk;

	tsk = copy_process(NULL, 0, node, &args);
	if (!IS_ERR(tsk)) {
		sigfillset(&tsk->blocked);
		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
	}
	return tsk;
	return copy_process(NULL, 0, node, &args);
}

/*
+1 −1
Original line number Diff line number Diff line
@@ -134,7 +134,7 @@ bool freeze_task(struct task_struct *p)
		return false;
	}

	if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))
	if (!(p->flags & PF_KTHREAD))
		fake_signal_wake_up(p);
	else
		wake_up_state(p, TASK_INTERRUPTIBLE);
+1 −1
Original line number Diff line number Diff line
@@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
	audit_ptrace(task);

	retval = -EPERM;
	if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
	if (unlikely(task->flags & PF_KTHREAD))
		goto out;
	if (same_thread_group(task, current))
		goto out;
Loading