Commit 71f28f31 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

ublk_drv: add io_uring based userspace block driver

This is the driver part of userspace block driver(ublk driver), the other
part is userspace daemon part(ublksrv)[1].

The two parts communicate by io_uring's IORING_OP_URING_CMD with one
shared cmd buffer for storing io command, and the buffer is read only for
ublksrv, each io command is indexed by io request tag directly, and is
written by ublk driver.

For example, when one READ io request is submitted to ublk block driver,
ublk driver stores the io command into cmd buffer first, then completes
one IORING_OP_URING_CMD for notifying ublksrv, and the URING_CMD is issued
to ublk driver beforehand by ublksrv for getting notification of any new
io request, and each URING_CMD is associated with one io request by tag.

After ublksrv gets the io command, it translates and handles the ublk io
request, such as, for the ublk-loop target, ublksrv translates the request
into same request on another file or disk, like the kernel loop block
driver. In ublksrv's implementation, the io is still handled by io_uring,
and share same ring with IORING_OP_URING_CMD command. When the target io
request is done, the same IORING_OP_URING_CMD is issued to ublk driver for
both committing io request result and getting future notification of new
io request.

Another thing done by ublk driver is to copy data between kernel io
request and ublksrv's io buffer:

1) before ubsrv handles WRITE request, copy the request's data into
   ublksrv's userspace io buffer, so that ublksrv can handle the write
   request

2) after ubsrv handles READ request, copy ublksrv's userspace io buffer
   into this READ request, then ublk driver can complete the READ request

Zero copy may be switched if mm is ready to support it.

ublk driver doesn't handle any logic of the specific user space driver,
so it is small/simple enough.

[1] ublksrv

https://github.com/ming1/ubdsrv



Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220713140711.97356-2-ming.lei@redhat.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 96388f57
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -408,6 +408,15 @@ config BLK_DEV_RBD

	  If unsure, say N.

config BLK_DEV_UBLK
	tristate "Userspace block driver (Experimental)"
	select IO_URING
	help
	  io_uring based userspace block driver. Together with ublk server, ublk
	  has been working well, but interface with userspace or command data
	  definition isn't finalized yet, and might change according to future
	  requirement, so mark is as experimental now.

source "drivers/block/rnbd/Kconfig"

endif # BLK_DEV
+2 −0
Original line number Diff line number Diff line
@@ -39,4 +39,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/

obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk/

obj-$(CONFIG_BLK_DEV_UBLK)			+= ublk_drv.o

swim_mod-y	:= swim.o swim_asm.o
+1530 −0

File added.

Preview size limit exceeded, changes collapsed.

+156 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef USER_BLK_DRV_CMD_INC_H
#define USER_BLK_DRV_CMD_INC_H

#include <linux/types.h>

/* ublk server command definition */

/*
 * Admin commands, issued by ublk server, and handled by ublk driver.
 */
#define	UBLK_CMD_GET_QUEUE_AFFINITY	0x01
#define	UBLK_CMD_GET_DEV_INFO	0x02
#define	UBLK_CMD_ADD_DEV		0x04
#define	UBLK_CMD_DEL_DEV		0x05
#define	UBLK_CMD_START_DEV	0x06
#define	UBLK_CMD_STOP_DEV	0x07

/*
 * IO commands, issued by ublk server, and handled by ublk driver.
 *
 * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request
 *      from ublk driver, should be issued only when starting device. After
 *      the associated cqe is returned, request's tag can be retrieved via
 *      cqe->userdata.
 *
 * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled
 *      this IO request, request's handling result is committed to ublk
 *      driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
 *      handled before completing io request.
 */
#define	UBLK_IO_FETCH_REQ		0x20
#define	UBLK_IO_COMMIT_AND_FETCH_REQ	0x21

/* only ABORT means that no re-fetch */
#define UBLK_IO_RES_OK			0
#define UBLK_IO_RES_ABORT		(-ENODEV)

#define UBLKSRV_CMD_BUF_OFFSET	0
#define UBLKSRV_IO_BUF_OFFSET	0x80000000

/* tag bit is 12bit, so at most 4096 IOs for each queue */
#define UBLK_MAX_QUEUE_DEPTH	4096

/*
 * zero copy requires 4k block size, and can remap ublk driver's io
 * request into ublksrv's vm space
 */
#define UBLK_F_SUPPORT_ZERO_COPY	(1UL << 0)

/* device state */
#define UBLK_S_DEV_DEAD	0
#define UBLK_S_DEV_LIVE	1

/* shipped via sqe->cmd of io_uring command */
struct ublksrv_ctrl_cmd {
	/* sent to which device, must be valid */
	__u32	dev_id;

	/* sent to which queue, must be -1 if the cmd isn't for queue */
	__u16	queue_id;
	/*
	 * cmd specific buffer, can be IN or OUT.
	 */
	__u16	len;
	__u64	addr;

	/* inline data */
	__u64	data[2];
};

struct ublksrv_ctrl_dev_info {
	__u16	nr_hw_queues;
	__u16	queue_depth;
	__u16	block_size;
	__u16	state;

	__u32	rq_max_blocks;
	__u32	dev_id;

	__u64   dev_blocks;

	__s32	ublksrv_pid;
	__s32	reserved0;
	__u64	flags[2];

	/* For ublksrv internal use, invisible to ublk driver */
	__u64	ublksrv_flags;
	__u64	reserved1[9];
};

#define		UBLK_IO_OP_READ		0
#define		UBLK_IO_OP_WRITE		1
#define		UBLK_IO_OP_FLUSH		2
#define		UBLK_IO_OP_DISCARD	3
#define		UBLK_IO_OP_WRITE_SAME	4
#define		UBLK_IO_OP_WRITE_ZEROES	5

#define		UBLK_IO_F_FAILFAST_DEV		(1U << 8)
#define		UBLK_IO_F_FAILFAST_TRANSPORT	(1U << 9)
#define		UBLK_IO_F_FAILFAST_DRIVER	(1U << 10)
#define		UBLK_IO_F_META			(1U << 11)
#define		UBLK_IO_F_INTEGRITY		(1U << 12)
#define		UBLK_IO_F_FUA			(1U << 13)
#define		UBLK_IO_F_PREFLUSH		(1U << 14)
#define		UBLK_IO_F_NOUNMAP		(1U << 15)
#define		UBLK_IO_F_SWAP			(1U << 16)

/*
 * io cmd is described by this structure, and stored in share memory, indexed
 * by request tag.
 *
 * The data is stored by ublk driver, and read by ublksrv after one fetch command
 * returns.
 */
struct ublksrv_io_desc {
	/* op: bit 0-7, flags: bit 8-31 */
	__u32		op_flags;

	__u32		nr_sectors;

	/* start sector for this io */
	__u64		start_sector;

	/* buffer address in ublksrv daemon vm space, from ublk driver */
	__u64		addr;
};

static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod)
{
	return iod->op_flags & 0xff;
}

static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
{
	return iod->op_flags >> 8;
}

/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
	__u16	q_id;

	/* for fetch/commit which result */
	__u16	tag;

	/* io result, it is valid for COMMIT* command only */
	__s32	result;

	/*
	 * userspace buffer address in ublksrv daemon process, valid for
	 * FETCH* command only
	 */
	__u64	addr;
};

#endif