Commit 4962a856 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "A mix of fixes and a few stragglers. In detail:

   - Revert the bogus __read_mostly that we discussed for the initial
     pull request.

   - Fix a merge window regression with fixed file registration error
     path handling.

   - Fix io-wq numa node affinities.

   - Series abstracting out an io_identity struct, making it both easier
     to see what the personality items are, and also easier to to adopt
     more. Use this to cover audit logging.

   - Fix for read-ahead disabled block condition in async buffered
     reads, and using single page read-ahead to unify what
     generic_file_buffer_read() path is used.

   - Series for REQ_F_COMP_LOCKED fix and removal of it (Pavel)

   - Poll fix (Pavel)"

* tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block: (21 commits)
  io_uring: use blk_queue_nowait() to check if NOWAIT supported
  mm: use limited read-ahead to satisfy read
  mm: mark async iocb read as NOWAIT once some data has been copied
  io_uring: fix double poll mask init
  io-wq: inherit audit loginuid and sessionid
  io_uring: use percpu counters to track inflight requests
  io_uring: assign new io_identity for task if members have changed
  io_uring: store io_identity in io_uring_task
  io_uring: COW io_identity on mismatch
  io_uring: move io identity items into separate struct
  io_uring: rely solely on work flags to determine personality.
  io_uring: pass required context in as flags
  io-wq: assign NUMA node locality if appropriate
  io_uring: fix error path cleanup in io_sqe_files_register()
  Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly"
  io_uring: fix REQ_F_COMP_LOCKED by killing it
  io_uring: dig out COMP_LOCK from deep call chain
  io_uring: don't put a poll req under spinlock
  io_uring: don't unnecessarily clear F_LINK_TIMEOUT
  io_uring: don't set COMP_LOCKED if won't put
  ...
parents 38525c69 9ba0d0c8
Loading
Loading
Loading
Loading
+31 −20
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/fs_struct.h>
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>

#include "io-wq.h"

@@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
		mmput(worker->mm);
		worker->mm = NULL;
	}
	if (!work->mm)
		return;

	if (mmget_not_zero(work->mm)) {
		kthread_use_mm(work->mm);
		worker->mm = work->mm;
		/* hang on to this mm */
		work->mm = NULL;
	if (mmget_not_zero(work->identity->mm)) {
		kthread_use_mm(work->identity->mm);
		worker->mm = work->identity->mm;
		return;
	}

@@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
				      struct io_wq_work *work)
{
#ifdef CONFIG_BLK_CGROUP
	if (work->blkcg_css != worker->blkcg_css) {
		kthread_associate_blkcg(work->blkcg_css);
		worker->blkcg_css = work->blkcg_css;
	if (!(work->flags & IO_WQ_WORK_BLKCG))
		return;
	if (work->identity->blkcg_css != worker->blkcg_css) {
		kthread_associate_blkcg(work->identity->blkcg_css);
		worker->blkcg_css = work->identity->blkcg_css;
	}
#endif
}
@@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
static void io_wq_switch_creds(struct io_worker *worker,
			       struct io_wq_work *work)
{
	const struct cred *old_creds = override_creds(work->creds);
	const struct cred *old_creds = override_creds(work->identity->creds);

	worker->cur_creds = work->creds;
	worker->cur_creds = work->identity->creds;
	if (worker->saved_creds)
		put_cred(old_creds); /* creds set by previous switch */
	else
@@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker,
static void io_impersonate_work(struct io_worker *worker,
				struct io_wq_work *work)
{
	if (work->files && current->files != work->files) {
	if ((work->flags & IO_WQ_WORK_FILES) &&
	    current->files != work->identity->files) {
		task_lock(current);
		current->files = work->files;
		current->nsproxy = work->nsproxy;
		current->files = work->identity->files;
		current->nsproxy = work->identity->nsproxy;
		task_unlock(current);
	}
	if (work->fs && current->fs != work->fs)
		current->fs = work->fs;
	if (work->mm != worker->mm)
	if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
		current->fs = work->identity->fs;
	if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
		io_wq_switch_mm(worker, work);
	if (worker->cur_creds != work->creds)
	if ((work->flags & IO_WQ_WORK_CREDS) &&
	    worker->cur_creds != work->identity->creds)
		io_wq_switch_creds(worker, work);
	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
	io_wq_switch_blkcg(worker, work);
#ifdef CONFIG_AUDIT
	current->loginuid = work->identity->loginuid;
	current->sessionid = work->identity->sessionid;
#endif
}

static void io_assign_current_work(struct io_worker *worker,
@@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker,
		cond_resched();
	}

#ifdef CONFIG_AUDIT
	current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
	current->sessionid = AUDIT_SID_UNSET;
#endif

	spin_lock_irq(&worker->lock);
	worker->cur_work = work;
	spin_unlock_irq(&worker->lock);
@@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
		kfree(worker);
		return false;
	}
	kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));

	raw_spin_lock_irq(&wqe->lock);
	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+9 −9
Original line number Diff line number Diff line
#ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H

#include <linux/io_uring.h>

struct io_wq;

enum {
@@ -10,6 +12,12 @@ enum {
	IO_WQ_WORK_NO_CANCEL	= 8,
	IO_WQ_WORK_CONCURRENT	= 16,

	IO_WQ_WORK_FILES	= 32,
	IO_WQ_WORK_FS		= 64,
	IO_WQ_WORK_MM		= 128,
	IO_WQ_WORK_CREDS	= 256,
	IO_WQ_WORK_BLKCG	= 512,

	IO_WQ_HASH_SHIFT	= 24,	/* upper 8 bits are used for hash key */
};

@@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,

struct io_wq_work {
	struct io_wq_work_node list;
	struct files_struct *files;
	struct mm_struct *mm;
#ifdef CONFIG_BLK_CGROUP
	struct cgroup_subsys_state *blkcg_css;
#endif
	const struct cred *creds;
	struct nsproxy *nsproxy;
	struct fs_struct *fs;
	unsigned long fsize;
	struct io_identity *identity;
	unsigned flags;
};

+388 −260

File changed.

Preview size limit exceeded, changes collapsed.

+4 −0
Original line number Diff line number Diff line
@@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
	kuid_t kloginuid;
	int rv;

	/* Don't let kthreads write their own loginuid */
	if (current->flags & PF_KTHREAD)
		return -EPERM;

	rcu_read_lock();
	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
		rcu_read_unlock();
+21 −6
Original line number Diff line number Diff line
@@ -4,18 +4,33 @@

#include <linux/sched.h>
#include <linux/xarray.h>
#include <linux/percpu-refcount.h>

struct io_identity {
	struct files_struct		*files;
	struct mm_struct		*mm;
#ifdef CONFIG_BLK_CGROUP
	struct cgroup_subsys_state	*blkcg_css;
#endif
	const struct cred		*creds;
	struct nsproxy			*nsproxy;
	struct fs_struct		*fs;
	unsigned long			fsize;
#ifdef CONFIG_AUDIT
	kuid_t				loginuid;
	unsigned int			sessionid;
#endif
	refcount_t			count;
};

struct io_uring_task {
	/* submission side */
	struct xarray		xa;
	struct wait_queue_head	wait;
	struct file		*last;
	atomic_long_t		req_issue;

	/* completion side */
	bool			in_idle ____cacheline_aligned_in_smp;
	atomic_long_t		req_complete;
	struct percpu_counter	inflight;
	struct io_identity	__identity;
	struct io_identity	*identity;
	bool			in_idle;
};

#if defined(CONFIG_IO_URING)
Loading