Commit 5bbb336b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Highlights from this cycles are things like request recycling and
  task_work optimizations, which net us anywhere from 10-20% of speedups
  on workloads that mostly are inline.

  This work was originally done to put io_uring under memcg, which adds
  considerable overhead. But it's a really nice win as well. Also worth
  highlighting is the LOOKUP_CACHED work in the VFS, and using it in
  io_uring. Greatly speeds up the fast path for file opens.

  Summary:

   - Put io_uring under memcg protection. We accounted just the rings
     themselves under rlimit memlock before, now we account everything.

   - Request cache recycling, persistent across invocations (Pavel, me)

   - First part of a cleanup/improvement to buffer registration (Bijan)

   - SQPOLL fixes (Hao)

   - File registration NULL pointer fixup (Dan)

   - LOOKUP_CACHED support for io_uring

   - Disable /proc/thread-self/ for io_uring, like we do for /proc/self

   - Add Pavel to the io_uring MAINTAINERS entry

   - Tons of code cleanups and optimizations (Pavel)

   - Support for skip entries in file registration (Noah)"

* tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux-block: (103 commits)
  io_uring: tctx->task_lock should be IRQ safe
  proc: don't allow async path resolution of /proc/thread-self components
  io_uring: kill cached requests from exiting task closing the ring
  io_uring: add helper to free all request caches
  io_uring: allow task match to be passed to io_req_cache_free()
  io-wq: clear out worker ->fs and ->files
  io_uring: optimise io_init_req() flags setting
  io_uring: clean io_req_find_next() fast check
  io_uring: don't check PF_EXITING from syscall
  io_uring: don't split out consume out of SQE get
  io_uring: save ctx put/get for task_work submit
  io_uring: don't duplicate io_req_task_queue()
  io_uring: optimise SQPOLL mm/files grabbing
  io_uring: optimise out unlikely link queue
  io_uring: take compl state from submit state
  io_uring: inline io_complete_rw_common()
  io_uring: move res check out of io_rw_reissue()
  io_uring: simplify iopoll reissuing
  io_uring: clean up io_req_free_batch_finish()
  io_uring: move submit side state closer in the ring
  ...
parents 9820b4dc 0b81e80c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -6830,6 +6830,9 @@ F: include/linux/fs.h
F:	include/linux/fs_types.h
F:	include/uapi/linux/fs.h
F:	include/uapi/linux/openat2.h
X:	fs/io-wq.c
X:	fs/io-wq.h
X:	fs/io_uring.c
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
M:	Riku Voipio <riku.voipio@iki.fi>
@@ -9263,6 +9266,7 @@ F: include/uapi/linux/iommu.h
IO_URING
M:	Jens Axboe <axboe@kernel.dk>
R:	Pavel Begunkov <asml.silence@gmail.com>
L:	io-uring@vger.kernel.org
S:	Maintained
T:	git git://git.kernel.dk/linux-block
@@ -9270,6 +9274,7 @@ T: git git://git.kernel.dk/liburing
F:	fs/io-wq.c
F:	fs/io-wq.h
F:	fs/io_uring.c
F:	include/linux/io_uring.h
F:	include/uapi/linux/io_uring.h
IPMI SUBSYSTEM
+25 −11
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@
#include <linux/close_range.h>
#include <net/sock.h>

#include "internal.h"

unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
@@ -732,36 +734,48 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}

/*
 * variant of close_fd that gets a ref on the file for later fput.
 * The caller must ensure that filp_close() called on the file, and then
 * an fput().
 * See close_fd_get_file() below, this variant assumes current->files->file_lock
 * is held.
 */
int close_fd_get_file(unsigned int fd, struct file **res)
int __close_fd_get_file(unsigned int fd, struct file **res)
{
	struct files_struct *files = current->files;
	struct file *file;
	struct fdtable *fdt;

	spin_lock(&files->file_lock);
	fdt = files_fdtable(files);
	if (fd >= fdt->max_fds)
		goto out_unlock;
		goto out_err;
	file = fdt->fd[fd];
	if (!file)
		goto out_unlock;
		goto out_err;
	rcu_assign_pointer(fdt->fd[fd], NULL);
	__put_unused_fd(files, fd);
	spin_unlock(&files->file_lock);
	get_file(file);
	*res = file;
	return 0;

out_unlock:
	spin_unlock(&files->file_lock);
out_err:
	*res = NULL;
	return -ENOENT;
}

/*
 * variant of close_fd that gets a ref on the file for later fput.
 * The caller must ensure that filp_close() called on the file, and then
 * an fput().
 */
int close_fd_get_file(unsigned int fd, struct file **res)
{
	struct files_struct *files = current->files;
	int ret;

	spin_lock(&files->file_lock);
	ret = __close_fd_get_file(fd, res);
	spin_unlock(&files->file_lock);

	return ret;
}

void do_close_on_exec(struct files_struct *files)
{
	unsigned i;
+1 −0
Original line number Diff line number Diff line
@@ -133,6 +133,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
		const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
extern int __close_fd_get_file(unsigned int fd, struct file **res);

long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
+12 −19
Original line number Diff line number Diff line
@@ -64,9 +64,7 @@ struct io_worker {
#endif
	const struct cred *cur_creds;
	const struct cred *saved_creds;
	struct files_struct *restore_files;
	struct nsproxy *restore_nsproxy;
	struct fs_struct *restore_fs;
};

#if BITS_PER_LONG == 64
@@ -156,19 +154,19 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
		worker->cur_creds = worker->saved_creds = NULL;
	}

	if (current->files != worker->restore_files) {
	if (current->files) {
		__acquire(&wqe->lock);
		raw_spin_unlock_irq(&wqe->lock);
		dropped_lock = true;

		task_lock(current);
		current->files = worker->restore_files;
		current->files = NULL;
		current->nsproxy = worker->restore_nsproxy;
		task_unlock(current);
	}

	if (current->fs != worker->restore_fs)
		current->fs = worker->restore_fs;
	if (current->fs)
		current->fs = NULL;

	/*
	 * If we have an active mm, we need to drop the wq lock before unusing
@@ -329,11 +327,11 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
	allow_kernel_signal(SIGINT);

	current->flags |= PF_IO_WORKER;
	current->fs = NULL;
	current->files = NULL;

	worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
	worker->restore_files = current->files;
	worker->restore_nsproxy = current->nsproxy;
	worker->restore_fs = current->fs;
	io_wqe_inc_running(wqe, worker);
}

@@ -555,23 +553,21 @@ static void io_worker_handle_work(struct io_worker *worker)

		/* handle a whole dependent link */
		do {
			struct io_wq_work *old_work, *next_hashed, *linked;
			struct io_wq_work *next_hashed, *linked;
			unsigned int hash = io_get_work_hash(work);

			next_hashed = wq_next_work(work);
			io_impersonate_work(worker, work);
			wq->do_work(work);
			io_assign_current_work(worker, NULL);

			old_work = work;
			linked = wq->do_work(work);

			linked = wq->free_work(work);
			work = next_hashed;
			if (!work && linked && !io_wq_is_hashed(linked)) {
				work = linked;
				linked = NULL;
			}
			io_assign_current_work(worker, work);
			wq->free_work(old_work);

			if (linked)
				io_wqe_enqueue(wqe, linked);

@@ -850,11 +846,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
	struct io_wq *wq = wqe->wq;

	do {
		struct io_wq_work *old_work = work;

		work->flags |= IO_WQ_WORK_CANCEL;
		work = wq->do_work(work);
		wq->free_work(old_work);
		wq->do_work(work);
		work = wq->free_work(work);
	} while (work);
}

@@ -944,7 +938,6 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
	 */
	spin_lock_irqsave(&worker->lock, flags);
	if (worker->cur_work &&
	    !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
	    match->fn(worker->cur_work, match->data)) {
		send_sig(SIGINT, worker->task, 1);
		match->nr_running++;
+2 −12
Original line number Diff line number Diff line
@@ -9,7 +9,6 @@ enum {
	IO_WQ_WORK_CANCEL	= 1,
	IO_WQ_WORK_HASHED	= 2,
	IO_WQ_WORK_UNBOUND	= 4,
	IO_WQ_WORK_NO_CANCEL	= 8,
	IO_WQ_WORK_CONCURRENT	= 16,

	IO_WQ_WORK_FILES	= 32,
@@ -28,15 +27,6 @@ enum io_wq_cancel {
	IO_WQ_CANCEL_NOTFOUND,	/* work not found */
};

struct io_wq_work_node {
	struct io_wq_work_node *next;
};

struct io_wq_work_list {
	struct io_wq_work_node *first;
	struct io_wq_work_node *last;
};

static inline void wq_list_add_after(struct io_wq_work_node *node,
				     struct io_wq_work_node *pos,
				     struct io_wq_work_list *list)
@@ -107,8 +97,8 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
	return container_of(work->list.next, struct io_wq_work, list);
}

typedef void (free_work_fn)(struct io_wq_work *);
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work *);

struct io_wq_data {
	struct user_struct *user;
Loading