Commit 76d3ccec authored by Matteo Rizzo's avatar Matteo Rizzo Committed by Jens Axboe
Browse files

io_uring: add a sysctl to disable io_uring system-wide



Introduce a new sysctl (io_uring_disabled) which can be either 0, 1, or
2. When 0 (the default), all processes are allowed to create io_uring
instances, which is the current behavior.  When 1, io_uring creation is
disabled (io_uring_setup() will fail with -EPERM) for unprivileged
processes not in the kernel.io_uring_group group.  When 2, calls to
io_uring_setup() fail with -EPERM regardless of privilege.

Signed-off-by: default avatarMatteo Rizzo <matteorizzo@google.com>
[JEM: modified to add io_uring_group]
Signed-off-by: default avatarJeff Moyer <jmoyer@redhat.com>
Link: https://lore.kernel.org/r/x49y1i42j1z.fsf@segfault.boston.devel.redhat.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 32f5dea0
Loading
Loading
Loading
Loading
+29 −0
Original line number Diff line number Diff line
@@ -450,6 +450,35 @@ this allows system administrators to override the
``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.


io_uring_disabled
=================

Prevents all processes from creating new io_uring instances. Enabling this
shrinks the kernel's attack surface.

= ======================================================================
0 All processes can create io_uring instances as normal. This is the
  default setting.
1 io_uring creation is disabled (io_uring_setup() will fail with
  -EPERM) for unprivileged processes not in the io_uring_group group.
  Existing io_uring instances can still be used.  See the
  documentation for io_uring_group for more information.
2 io_uring creation is disabled for all processes. io_uring_setup()
  always fails with -EPERM. Existing io_uring instances can still be
  used.
= ======================================================================


io_uring_group
==============

When io_uring_disabled is set to 1, a process must either be
privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
to create an io_uring instance.  If io_uring_group is set to -1 (the
default), only processes with the CAP_SYS_ADMIN capability may create
io_uring instances.


kexec_load_disabled
===================

+50 −0
Original line number Diff line number Diff line
@@ -150,6 +150,31 @@ static void io_queue_sqe(struct io_kiocb *req);

struct kmem_cache *req_cachep;

static int __read_mostly sysctl_io_uring_disabled;
static int __read_mostly sysctl_io_uring_group = -1;

#ifdef CONFIG_SYSCTL
static struct ctl_table kernel_io_uring_disabled_table[] = {
	{
		.procname	= "io_uring_disabled",
		.data		= &sysctl_io_uring_disabled,
		.maxlen		= sizeof(sysctl_io_uring_disabled),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= SYSCTL_ZERO,
		.extra2		= SYSCTL_TWO,
	},
	{
		.procname	= "io_uring_group",
		.data		= &sysctl_io_uring_group,
		.maxlen		= sizeof(gid_t),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{},
};
#endif

struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
@@ -4070,9 +4095,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
	return io_uring_create(entries, &p, params);
}

static inline bool io_uring_allowed(void)
{
	int disabled = READ_ONCE(sysctl_io_uring_disabled);
	kgid_t io_uring_group;

	if (disabled == 2)
		return false;

	if (disabled == 0 || capable(CAP_SYS_ADMIN))
		return true;

	io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group);
	if (!gid_valid(io_uring_group))
		return false;

	return in_group_p(io_uring_group);
}

SYSCALL_DEFINE2(io_uring_setup, u32, entries,
		struct io_uring_params __user *, params)
{
	if (!io_uring_allowed())
		return -EPERM;

	return io_uring_setup(entries, params);
}

@@ -4666,6 +4712,10 @@ static int __init io_uring_init(void)
				offsetof(struct io_kiocb, cmd.data),
				sizeof_field(struct io_kiocb, cmd.data), NULL);

#ifdef CONFIG_SYSCTL
	register_sysctl_init("kernel", kernel_io_uring_disabled_table);
#endif

	return 0;
};
__initcall(io_uring_init);