Commit caa1ee43 authored by Changpeng Liu's avatar Changpeng Liu Committed by Michael S. Tsirkin
Browse files

vhost-user-blk: add discard/write zeroes features support



Linux commit 1f23816b8 "virtio_blk: add discard and write zeroes support"
added the support in the Guest kernel, while here also enable the features
support with vhost-user-blk driver. Also enable the test example utility
with DISCARD and WRITE ZEROES commands.

Signed-off-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent a56de056
Loading
Loading
Loading
Loading
+107 −33
Original line number Diff line number Diff line
@@ -63,6 +63,20 @@ static size_t vub_iov_size(const struct iovec *iov,
    return len;
}

static size_t vub_iov_to_buf(const struct iovec *iov,
                             const unsigned int iov_cnt, void *buf)
{
    size_t len;
    unsigned int i;

    len = 0;
    for (i = 0; i < iov_cnt; i++) {
        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
        len += iov[i].iov_len;
    }
    return len;
}

static void vub_panic_cb(VuDev *vu_dev, const char *buf)
{
    VugDev *gdev;
@@ -161,6 +175,44 @@ vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
    return rc;
}

static int
vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
                         uint32_t type)
{
    struct virtio_blk_discard_write_zeroes *desc;
    ssize_t size;
    void *buf;

    size = vub_iov_size(iov, iovcnt);
    if (size != sizeof(*desc)) {
        fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
        return -1;
    }
    buf = g_new0(char, size);
    vub_iov_to_buf(iov, iovcnt, buf);

    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
    VubDev *vdev_blk = req->vdev_blk;
    desc = (struct virtio_blk_discard_write_zeroes *)buf;
    uint64_t range[2] = { le64toh(desc->sector) << 9,
                          le32toh(desc->num_sectors) << 9 };
    if (type == VIRTIO_BLK_T_DISCARD) {
        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
            g_free(buf);
            return 0;
        }
    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
            g_free(buf);
            return 0;
        }
    }
    #endif

    g_free(buf);
    return -1;
}

static void
vub_flush(VubReq *req)
{
@@ -216,8 +268,9 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
    in_num--;

    type = le32toh(req->out->type);
    switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
        case VIRTIO_BLK_T_IN: {
    switch (type & ~VIRTIO_BLK_T_BARRIER) {
    case VIRTIO_BLK_T_IN:
    case VIRTIO_BLK_T_OUT: {
        ssize_t ret = 0;
        bool is_write = type & VIRTIO_BLK_T_OUT;
        req->sector_num = le64toh(req->out->sector);
@@ -234,12 +287,11 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
        vub_req_complete(req);
        break;
    }
        case VIRTIO_BLK_T_FLUSH: {
    case VIRTIO_BLK_T_FLUSH:
        vub_flush(req);
        req->in->status = VIRTIO_BLK_S_OK;
        vub_req_complete(req);
        break;
        }
    case VIRTIO_BLK_T_GET_ID: {
        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
                          VIRTIO_BLK_ID_BYTES);
@@ -249,11 +301,22 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
        vub_req_complete(req);
        break;
    }
        default: {
            req->in->status = VIRTIO_BLK_S_UNSUPP;
    case VIRTIO_BLK_T_DISCARD:
    case VIRTIO_BLK_T_WRITE_ZEROES: {
        int rc;
        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
        if (rc == 0) {
            req->in->status = VIRTIO_BLK_S_OK;
        } else {
            req->in->status = VIRTIO_BLK_S_IOERR;
        }
        vub_req_complete(req);
        break;
    }
    default:
        req->in->status = VIRTIO_BLK_S_UNSUPP;
        vub_req_complete(req);
        break;
    }

    return 0;
@@ -317,6 +380,10 @@ vub_get_features(VuDev *dev)
               1ull << VIRTIO_BLK_F_TOPOLOGY |
               1ull << VIRTIO_BLK_F_BLK_SIZE |
               1ull << VIRTIO_BLK_F_FLUSH |
               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
               1ull << VIRTIO_BLK_F_DISCARD |
               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
               #endif
               1ull << VIRTIO_BLK_F_CONFIG_WCE |
               1ull << VIRTIO_F_VERSION_1 |
               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
@@ -478,6 +545,13 @@ vub_initialize_config(int fd, struct virtio_blk_config *config)
    config->min_io_size = 1;
    config->opt_io_size = 1;
    config->num_queues = 1;
    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
    config->max_discard_sectors = 32768;
    config->max_discard_seg = 1;
    config->discard_sector_alignment = config->blk_size >> 9;
    config->max_write_zeroes_sectors = 32768;
    config->max_write_zeroes_seg = 1;
    #endif
}

static VubDev *
+4 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@ static const int user_feature_bits[] = {
    VIRTIO_BLK_F_RO,
    VIRTIO_BLK_F_FLUSH,
    VIRTIO_BLK_F_CONFIG_WCE,
    VIRTIO_BLK_F_DISCARD,
    VIRTIO_BLK_F_WRITE_ZEROES,
    VIRTIO_F_VERSION_1,
    VIRTIO_RING_F_INDIRECT_DESC,
    VIRTIO_RING_F_EVENT_IDX,
@@ -204,6 +206,8 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
    virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
    virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
    virtio_add_feature(&features, VIRTIO_BLK_F_RO);
    virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
    virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);

    if (s->config_wce) {
        virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
+48 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@
#define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
#define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */

/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -84,6 +86,39 @@ struct virtio_blk_config {

	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
	uint16_t num_queues;

	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
	/*
	 * The maximum discard sectors (in 512-byte sectors) for
	 * one segment.
	 */
	uint32_t max_discard_sectors;
	/*
	 * The maximum number of discard segments in a
	 * discard command.
	 */
	uint32_t max_discard_seg;
	/* Discard commands must be aligned to this number of sectors. */
	uint32_t discard_sector_alignment;

	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
	/*
	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
	 * one segment.
	 */
	uint32_t max_write_zeroes_sectors;
	/*
	 * The maximum number of segments in a write zeroes
	 * command.
	 */
	uint32_t max_write_zeroes_seg;
	/*
	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
	 * deallocation of one or more of the sectors.
	 */
	uint8_t write_zeroes_may_unmap;

	uint8_t unused1[3];
} QEMU_PACKED;

/*
@@ -137,6 +172,19 @@ struct virtio_blk_outhdr {
	__virtio64 sector;
};

/* Unmap this range (only valid for write zeroes command) */
#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001

/* Discard/write zeroes range for each request. */
struct virtio_blk_discard_write_zeroes {
	/* discard/write zeroes start sector */
	uint64_t sector;
	/* number of discard/write zeroes sectors */
	uint32_t num_sectors;
	/* flags for this range */
	uint32_t flags;
};

#ifndef VIRTIO_BLK_NO_LEGACY
struct virtio_scsi_inhdr {
	__virtio32 errors;