Commit ee692a21 authored by Jens Axboe's avatar Jens Axboe
Browse files

fs,io_uring: add infrastructure for uring-cmd



file_operations->uring_cmd is a file private handler.
This is somewhat similar to ioctl but hopefully a lot more sane and
useful as it can be used to enable many io_uring capabilities for the
underlying operation.

IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't
know what is in this command type, it's for the provider of ->uring_cmd()
to deal with.

Co-developed-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Signed-off-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 2bb04df7
Loading
Loading
Loading
Loading
+117 −18
Original line number Diff line number Diff line
@@ -202,13 +202,6 @@ struct io_rings {
	struct io_uring_cqe	cqes[] ____cacheline_aligned_in_smp;
};

enum io_uring_cmd_flags {
	IO_URING_F_COMPLETE_DEFER	= 1,
	IO_URING_F_UNLOCKED		= 2,
	/* int's last bit, sign checks are usually faster than a bit test */
	IO_URING_F_NONBLOCK		= INT_MIN,
};

struct io_mapped_ubuf {
	u64		ubuf;
	u64		ubuf_end;
@@ -972,6 +965,7 @@ struct io_kiocb {
		struct io_xattr		xattr;
		struct io_socket	sock;
		struct io_nop		nop;
		struct io_uring_cmd	uring_cmd;
	};

	u8				opcode;
@@ -1050,6 +1044,14 @@ struct io_cancel_data {
	int seq;
};

/*
 * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
 * the following sqe if SQE128 is used.
 */
#define uring_cmd_pdu_size(is_sqe128)				\
	((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -	\
		offsetof(struct io_uring_sqe, cmd))

struct io_op_def {
	/* needs req->file assigned */
	unsigned		needs_file : 1;
@@ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = {
	[IORING_OP_SOCKET] = {
		.audit_skip		= 1,
	},
	[IORING_OP_URING_CMD] = {
		.needs_file		= 1,
		.plug			= 1,
		.needs_async_setup	= 1,
		.async_size		= uring_cmd_pdu_size(1),
	},
};

/* requests with any of those set should undergo io_disarm_next() */
@@ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode)
		return "GETXATTR";
	case IORING_OP_SOCKET:
		return "SOCKET";
	case IORING_OP_URING_CMD:
		return "URING_CMD";
	case IORING_OP_LAST:
		return "INVALID";
	}
@@ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req,
	const char __user *name;
	int ret;

	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;
	if (unlikely(sqe->ioprio))
		return -EINVAL;
	if (unlikely(req->flags & REQ_F_FIXED_FILE))
		return -EBADF;

@@ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req,
	const char __user *name;
	int ret;

	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;
	if (unlikely(sqe->ioprio))
		return -EINVAL;
	if (unlikely(req->flags & REQ_F_FIXED_FILE))
		return -EBADF;

@@ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
	return 0;
}

static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
{
	req->uring_cmd.task_work_cb(&req->uring_cmd);
}

void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
			void (*task_work_cb)(struct io_uring_cmd *))
{
	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);

	req->uring_cmd.task_work_cb = task_work_cb;
	req->io_task_work.func = io_uring_cmd_work;
	io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);

/*
 * Called by consumers of io_uring_cmd, if they originally returned
 * -EIOCBQUEUED upon receiving the command.
 */
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
{
	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);

	if (ret < 0)
		req_set_fail(req);
	if (req->ctx->flags & IORING_SETUP_CQE32)
		__io_req_complete32(req, 0, ret, 0, res2, 0);
	else
		io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);

static int io_uring_cmd_prep_async(struct io_kiocb *req)
{
	size_t cmd_size;

	cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);

	memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
	return 0;
}

static int io_uring_cmd_prep(struct io_kiocb *req,
			     const struct io_uring_sqe *sqe)
{
	struct io_uring_cmd *ioucmd = &req->uring_cmd;

	if (sqe->rw_flags)
		return -EINVAL;
	ioucmd->cmd = sqe->cmd;
	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
	return 0;
}

static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
{
	struct io_uring_cmd *ioucmd = &req->uring_cmd;
	struct io_ring_ctx *ctx = req->ctx;
	struct file *file = req->file;
	int ret;

	if (!req->file->f_op->uring_cmd)
		return -EOPNOTSUPP;

	if (ctx->flags & IORING_SETUP_SQE128)
		issue_flags |= IO_URING_F_SQE128;
	if (ctx->flags & IORING_SETUP_CQE32)
		issue_flags |= IO_URING_F_CQE32;
	if (ctx->flags & IORING_SETUP_IOPOLL)
		issue_flags |= IO_URING_F_IOPOLL;

	if (req_has_async_data(req))
		ioucmd->cmd = req->async_data;

	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
	if (ret == -EAGAIN) {
		if (!req_has_async_data(req)) {
			if (io_alloc_async_data(req))
				return -ENOMEM;
			io_uring_cmd_prep_async(req);
		}
		return -EAGAIN;
	}

	if (ret != -EIOCBQUEUED)
		io_uring_cmd_done(ioucmd, ret, 0);
	return 0;
}

static int io_shutdown_prep(struct io_kiocb *req,
			    const struct io_uring_sqe *sqe)
{
@@ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_socket *sock = &req->sock;

	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;
	if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
		return -EINVAL;

	sock->domain = READ_ONCE(sqe->fd);
@@ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
		return io_getxattr_prep(req, sqe);
	case IORING_OP_SOCKET:
		return io_socket_prep(req, sqe);
	case IORING_OP_URING_CMD:
		return io_uring_cmd_prep(req, sqe);
	}

	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req)
		return io_recvmsg_prep_async(req);
	case IORING_OP_CONNECT:
		return io_connect_prep_async(req);
	case IORING_OP_URING_CMD:
		return io_uring_cmd_prep_async(req);
	}
	printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
		    req->opcode);
@@ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
	case IORING_OP_SOCKET:
		ret = io_socket(req, issue_flags);
		break;
	case IORING_OP_URING_CMD:
		ret = io_uring_cmd(req, issue_flags);
		break;
	default:
		ret = -EINVAL;
		break;
@@ -12699,6 +12796,8 @@ static int __init io_uring_init(void)

	BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));

	BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);

	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
				SLAB_ACCOUNT);
	return 0;
+2 −0
Original line number Diff line number Diff line
@@ -1953,6 +1953,7 @@ struct dir_context {
#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)

struct iov_iter;
struct io_uring_cmd;

struct file_operations {
	struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
				   struct file *file_out, loff_t pos_out,
				   loff_t len, unsigned int remap_flags);
	int (*fadvise)(struct file *, loff_t, loff_t, int);
	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
} __randomize_layout;

struct inode_operations {
+33 −0
Original line number Diff line number Diff line
@@ -5,7 +5,32 @@
#include <linux/sched.h>
#include <linux/xarray.h>

enum io_uring_cmd_flags {
	IO_URING_F_COMPLETE_DEFER	= 1,
	IO_URING_F_UNLOCKED		= 2,
	/* int's last bit, sign checks are usually faster than a bit test */
	IO_URING_F_NONBLOCK		= INT_MIN,

	/* ctx state flags, for URING_CMD */
	IO_URING_F_SQE128		= 4,
	IO_URING_F_CQE32		= 8,
	IO_URING_F_IOPOLL		= 16,
};

struct io_uring_cmd {
	struct file	*file;
	const void	*cmd;
	/* callback to defer completions to task context */
	void (*task_work_cb)(struct io_uring_cmd *cmd);
	u32		cmd_op;
	u32		pad;
	u8		pdu[32]; /* available inline for free use */
};

#if defined(CONFIG_IO_URING)
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
			void (*task_work_cb)(struct io_uring_cmd *));
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
@@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
		__io_uring_free(tsk);
}
#else
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
		ssize_t ret2)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
			void (*task_work_cb)(struct io_uring_cmd *))
{
}
static inline struct sock *io_uring_get_socket(struct file *file)
{
	return NULL;
+13 −8
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ struct io_uring_sqe {
	union {
		__u64	off;	/* offset into file */
		__u64	addr2;
		__u32	cmd_op;
	};
	union {
		__u64	addr;	/* pointer to buffer or iovecs */
@@ -61,14 +62,17 @@ struct io_uring_sqe {
		__s32	splice_fd_in;
		__u32	file_index;
	};
	union {
		struct {
			__u64	addr3;
			__u64	__pad2[1];

		};
		/*
	 * If the ring is initialized with IORING_SETUP_SQE128, then this field
	 * contains 64-bytes of padding, doubling the size of the SQE.
		 * If the ring is initialized with IORING_SETUP_SQE128, then
		 * this field is used for 80 bytes of arbitrary command data
		 */
	__u64	__big_sqe_pad[0];
		__u8	cmd[0];
	};
};

enum {
@@ -175,6 +179,7 @@ enum io_uring_op {
	IORING_OP_FGETXATTR,
	IORING_OP_GETXATTR,
	IORING_OP_SOCKET,
	IORING_OP_URING_CMD,

	/* this goes last, obviously */
	IORING_OP_LAST,