Commit ebdeb7c0 authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring: add support for 128-byte SQEs



Normal SQEs are 64-bytes in length, which is fine for all the commands
we support. However, in preparation for supporting passthrough IO,
provide an option for setting up a ring with 128-byte SQEs.

We continue to use the same type for io_uring_sqe, it's marked and
commented with a zero sized array pad at the end. This provides up
to 80 bytes of data for a passthrough command - 64 bytes for the
extra added data, and 16 bytes available at the end of the existing
SQE.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent b5ba65df
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -8519,8 +8519,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
	 *    though the application is the one updating it.
	 */
	head = READ_ONCE(ctx->sq_array[sq_idx]);
	if (likely(head < ctx->sq_entries))
	if (likely(head < ctx->sq_entries)) {
		/* double index for 128-byte SQEs, twice as long */
		if (ctx->flags & IORING_SETUP_SQE128)
			head <<= 1;
		return &ctx->sq_sqes[head];
	}

	/* drop invalid entries */
	ctx->cq_extra--;
@@ -11689,6 +11693,9 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
	rings->sq_ring_entries = p->sq_entries;
	rings->cq_ring_entries = p->cq_entries;

	if (p->flags & IORING_SETUP_SQE128)
		size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
	else
		size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
	if (size == SIZE_MAX) {
		io_mem_free(ctx->rings);
@@ -11933,7 +11940,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
			IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
			IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG))
			IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
			IORING_SETUP_SQE128))
		return -EINVAL;

	return io_uring_create(entries, &p, params);
+8 −0
Original line number Diff line number Diff line
@@ -63,6 +63,12 @@ struct io_uring_sqe {
	};
	__u64	addr3;
	__u64	__pad2[1];

	/*
	 * If the ring is initialized with IORING_SETUP_SQE128, then this field
	 * contains 64-bytes of padding, doubling the size of the SQE.
	 */
	__u64	__big_sqe_pad[0];
};

enum {
@@ -119,6 +125,8 @@ enum {
 */
#define IORING_SETUP_TASKRUN_FLAG	(1U << 9)

#define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */

enum io_uring_op {
	IORING_OP_NOP,
	IORING_OP_READV,