Commit 5bd2182d authored by Paul Moore's avatar Paul Moore
Browse files

audit,io_uring,io-wq: add some basic audit support to io_uring



This patch adds basic auditing to io_uring operations, regardless of
their context.  This is accomplished by allocating audit_context
structures for the io-wq worker and io_uring SQPOLL kernel threads
as well as explicitly auditing the io_uring operations in
io_issue_sqe().  Individual io_uring operations can bypass auditing
through the "audit_skip" field in the struct io_op_def definition for
the operation; although great care must be taken so that security
relevant io_uring operations do not bypass auditing; please contact
the audit mailing list (see the MAINTAINERS file) with any questions.

The io_uring operations are audited using a new AUDIT_URINGOP record,
an example is shown below:

  type=UNKNOWN[1336] msg=audit(1631800225.981:37289):
    uring_op=19 success=yes exit=0 items=0 ppid=15454 pid=15681
    uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
    subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
    key=(null)

Thanks to Richard Guy Briggs for review and feedback.

Signed-off-by: default avatarPaul Moore <paul@paul-moore.com>
parent 12c5e81d
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
#include <linux/audit.h>

#include "io-wq.h"

@@ -562,6 +563,8 @@ static int io_wqe_worker(void *data)
	snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
	set_task_comm(current, buf);

	audit_alloc_kernel(current);

	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
		long ret;

@@ -601,6 +604,7 @@ static int io_wqe_worker(void *data)
		io_worker_handle_work(worker);
	}

	audit_free(current);
	io_worker_exit(worker);
	return 0;
}
+49 −6
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@
#include <linux/pagemap.h>
#include <linux/io_uring.h>
#include <linux/tracehook.h>
#include <linux/audit.h>

#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -917,6 +918,8 @@ struct io_op_def {
	unsigned		needs_async_setup : 1;
	/* should block plug */
	unsigned		plug : 1;
	/* skip auditing */
	unsigned		audit_skip : 1;
	/* size of async data needed, if any */
	unsigned short		async_size;
};
@@ -930,6 +933,7 @@ static const struct io_op_def io_op_defs[] = {
		.buffer_select		= 1,
		.needs_async_setup	= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_WRITEV] = {
@@ -939,16 +943,19 @@ static const struct io_op_def io_op_defs[] = {
		.pollout		= 1,
		.needs_async_setup	= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_FSYNC] = {
		.needs_file		= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_READ_FIXED] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.pollin			= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_WRITE_FIXED] = {
@@ -957,15 +964,20 @@ static const struct io_op_def io_op_defs[] = {
		.unbound_nonreg_file	= 1,
		.pollout		= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_POLL_ADD] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_POLL_REMOVE] = {
		.audit_skip		= 1,
	},
	[IORING_OP_POLL_REMOVE] = {},
	[IORING_OP_SYNC_FILE_RANGE] = {
		.needs_file		= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_SENDMSG] = {
		.needs_file		= 1,
@@ -983,18 +995,23 @@ static const struct io_op_def io_op_defs[] = {
		.async_size		= sizeof(struct io_async_msghdr),
	},
	[IORING_OP_TIMEOUT] = {
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_timeout_data),
	},
	[IORING_OP_TIMEOUT_REMOVE] = {
		/* used by timeout updates' prep() */
		.audit_skip		= 1,
	},
	[IORING_OP_ACCEPT] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.pollin			= 1,
	},
	[IORING_OP_ASYNC_CANCEL] = {},
	[IORING_OP_ASYNC_CANCEL] = {
		.audit_skip		= 1,
	},
	[IORING_OP_LINK_TIMEOUT] = {
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_timeout_data),
	},
	[IORING_OP_CONNECT] = {
@@ -1009,14 +1026,19 @@ static const struct io_op_def io_op_defs[] = {
	},
	[IORING_OP_OPENAT] = {},
	[IORING_OP_CLOSE] = {},
	[IORING_OP_FILES_UPDATE] = {},
	[IORING_OP_STATX] = {},
	[IORING_OP_FILES_UPDATE] = {
		.audit_skip		= 1,
	},
	[IORING_OP_STATX] = {
		.audit_skip		= 1,
	},
	[IORING_OP_READ] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.pollin			= 1,
		.buffer_select		= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_WRITE] = {
@@ -1025,39 +1047,50 @@ static const struct io_op_def io_op_defs[] = {
		.unbound_nonreg_file	= 1,
		.pollout		= 1,
		.plug			= 1,
		.audit_skip		= 1,
		.async_size		= sizeof(struct io_async_rw),
	},
	[IORING_OP_FADVISE] = {
		.needs_file		= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_MADVISE] = {},
	[IORING_OP_SEND] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.pollout		= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_RECV] = {
		.needs_file		= 1,
		.unbound_nonreg_file	= 1,
		.pollin			= 1,
		.buffer_select		= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_OPENAT2] = {
	},
	[IORING_OP_EPOLL_CTL] = {
		.unbound_nonreg_file	= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_SPLICE] = {
		.needs_file		= 1,
		.hash_reg_file		= 1,
		.unbound_nonreg_file	= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_PROVIDE_BUFFERS] = {
		.audit_skip		= 1,
	},
	[IORING_OP_REMOVE_BUFFERS] = {
		.audit_skip		= 1,
	},
	[IORING_OP_PROVIDE_BUFFERS] = {},
	[IORING_OP_REMOVE_BUFFERS] = {},
	[IORING_OP_TEE] = {
		.needs_file		= 1,
		.hash_reg_file		= 1,
		.unbound_nonreg_file	= 1,
		.audit_skip		= 1,
	},
	[IORING_OP_SHUTDOWN] = {
		.needs_file		= 1,
@@ -6591,6 +6624,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
	if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
		creds = override_creds(req->creds);

	if (!io_op_defs[req->opcode].audit_skip)
		audit_uring_entry(req->opcode);

	switch (req->opcode) {
	case IORING_OP_NOP:
		ret = io_nop(req, issue_flags);
@@ -6706,6 +6742,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
		break;
	}

	if (!io_op_defs[req->opcode].audit_skip)
		audit_uring_exit(!ret, ret);

	if (creds)
		revert_creds(creds);
	if (ret)
@@ -7360,6 +7399,8 @@ static int io_sq_thread(void *data)
		set_cpus_allowed_ptr(current, cpu_online_mask);
	current->flags |= PF_NO_SETAFFINITY;

	audit_alloc_kernel(current);

	mutex_lock(&sqd->lock);
	while (1) {
		bool cap_entries, sqt_spin = false;
@@ -7425,6 +7466,8 @@ static int io_sq_thread(void *data)
	io_run_task_work();
	mutex_unlock(&sqd->lock);

	audit_free(current);

	complete(&sqd->exited);
	do_exit(0);
}
+26 −0
Original line number Diff line number Diff line
@@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
/* These are defined in auditsc.c */
				/* Public API */
extern int  audit_alloc(struct task_struct *task);
extern int  audit_alloc_kernel(struct task_struct *task);
extern void __audit_free(struct task_struct *task);
extern void __audit_uring_entry(u8 op);
extern void __audit_uring_exit(int success, long code);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
				  unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -323,6 +326,21 @@ static inline void audit_free(struct task_struct *task)
	if (unlikely(task->audit_context))
		__audit_free(task);
}
static inline void audit_uring_entry(u8 op)
{
	/*
	 * We intentionally check audit_context() before audit_enabled as most
	 * Linux systems (as of ~2021) rely on systemd which forces audit to
	 * be enabled regardless of the user's audit configuration.
	 */
	if (unlikely(audit_context() && audit_enabled))
		__audit_uring_entry(op);
}
static inline void audit_uring_exit(int success, long code)
{
	if (unlikely(!audit_dummy_context()))
		__audit_uring_exit(success, code);
}
static inline void audit_syscall_entry(int major, unsigned long a0,
				       unsigned long a1, unsigned long a2,
				       unsigned long a3)
@@ -554,8 +572,16 @@ static inline int audit_alloc(struct task_struct *task)
{
	return 0;
}
static inline int audit_alloc_kernel(struct task_struct *task)
{
	return 0;
}
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_uring_entry(u8 op)
{ }
static inline void audit_uring_exit(int success, long code)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
				       unsigned long a1, unsigned long a2,
				       unsigned long a3)
+1 −0
Original line number Diff line number Diff line
@@ -118,6 +118,7 @@
#define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
#define AUDIT_BPF		1334	/* BPF subsystem */
#define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
#define AUDIT_URINGOP		1336	/* io_uring operation */

#define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
+2 −0
Original line number Diff line number Diff line
@@ -103,10 +103,12 @@ struct audit_context {
	enum {
		AUDIT_CTX_UNUSED,	/* audit_context is currently unused */
		AUDIT_CTX_SYSCALL,	/* in use by syscall */
		AUDIT_CTX_URING,	/* in use by io_uring */
	} context;
	enum audit_state    state, current_state;
	unsigned int	    serial;     /* serial number for record */
	int		    major;      /* syscall number */
	int		    uring_op;   /* uring operation */
	struct timespec64   ctime;      /* time of syscall entry */
	unsigned long	    argv[4];    /* syscall arguments */
	long		    return_code;/* syscall return code */
Loading