Commit 26830e92 authored by Vladimir Sementsov-Ogievskiy's avatar Vladimir Sementsov-Ogievskiy Committed by zhuyanting
Browse files

block/io: refactor padding



We have similar padding code in bdrv_co_pwritev,
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
it.

[Squashed in Vladimir's qemu-iotests 077 fix
--Stefan]

Signed-off-by: default avatarVladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Acked-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
Signed-off-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 7a3f542f)
*prereq for 292d06b9
Signed-off-by: default avatarMichael Roth <mdroth@linux.vnet.ibm.com>
parent a0944481
Loading
Loading
Loading
Loading
+200 −165
Original line number Diff line number Diff line
@@ -1408,78 +1408,200 @@ out:
}

/*
 * Handle a read request in coroutine context
 * Request padding
 *
 *  |<---- align ----->|                     |<----- align ---->|
 *  |<- head ->|<------------- bytes ------------->|<-- tail -->|
 *  |          |       |                     |     |            |
 * -*----------$-------*-------- ... --------*-----$------------*---
 *  |          |       |                     |     |            |
 *  |          offset  |                     |     end          |
 *  ALIGN_DOWN(offset) ALIGN_UP(offset)      ALIGN_DOWN(end)   ALIGN_UP(end)
 *  [buf   ... )                             [tail_buf          )
 *
 * @buf is an aligned allocation needed to store @head and @tail paddings. @head
 * is placed at the beginning of @buf and @tail at the @end.
 *
 * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
 * around tail, if tail exists.
 *
 * @merge_reads is true for small requests,
 * if @buf_len == @head + bytes + @tail. In this case it is possible that both
 * head and tail exist but @buf_len == align and @tail_buf == @buf.
 */
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags)
{
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
    BdrvTrackedRequest req;
typedef struct BdrvRequestPadding {
    uint8_t *buf;
    size_t buf_len;
    uint8_t *tail_buf;
    size_t head;
    size_t tail;
    bool merge_reads;
    QEMUIOVector local_qiov;
} BdrvRequestPadding;

static bool bdrv_init_padding(BlockDriverState *bs,
                              int64_t offset, int64_t bytes,
                              BdrvRequestPadding *pad)
{
    uint64_t align = bs->bl.request_alignment;
    uint8_t *head_buf = NULL;
    uint8_t *tail_buf = NULL;
    size_t sum;

    memset(pad, 0, sizeof(*pad));

    pad->head = offset & (align - 1);
    pad->tail = ((offset + bytes) & (align - 1));
    if (pad->tail) {
        pad->tail = align - pad->tail;
    }

    if ((!pad->head && !pad->tail) || !bytes) {
        return false;
    }

    sum = pad->head + bytes + pad->tail;
    pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
    pad->buf = qemu_blockalign(bs, pad->buf_len);
    pad->merge_reads = sum == pad->buf_len;
    if (pad->tail) {
        pad->tail_buf = pad->buf + pad->buf_len - align;
    }

    return true;
}

static int bdrv_padding_rmw_read(BdrvChild *child,
                                 BdrvTrackedRequest *req,
                                 BdrvRequestPadding *pad,
                                 bool zero_middle)
{
    QEMUIOVector local_qiov;
    bool use_local_qiov = false;
    BlockDriverState *bs = child->bs;
    uint64_t align = bs->bl.request_alignment;
    int ret;

    trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
    assert(req->serialising && pad->buf);

    if (!drv) {
        return -ENOMEDIUM;
    if (pad->head || pad->merge_reads) {
        uint64_t bytes = pad->merge_reads ? pad->buf_len : align;

        qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);

        if (pad->head) {
            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
        }
        if (pad->merge_reads && pad->tail) {
            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        }
        ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            return ret;
        }
        if (pad->head) {
            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
        }
        if (pad->merge_reads && pad->tail) {
            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
        }

    ret = bdrv_check_byte_request(bs, offset, bytes);
        if (pad->merge_reads) {
            goto zero_mem;
        }
    }

    if (pad->tail) {
        qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        ret = bdrv_aligned_preadv(
                child, req,
                req->overlap_offset + req->overlap_bytes - align,
                align, align, &local_qiov, 0);
        if (ret < 0) {
            return ret;
        }
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
    }

    bdrv_inc_in_flight(bs);
zero_mem:
    if (zero_middle) {
        memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
    }

    /* Don't do copy-on-read if we read data before write operation */
    if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
        flags |= BDRV_REQ_COPY_ON_READ;
    return 0;
}

    /* Align read if necessary by padding qiov */
    if (offset & (align - 1)) {
        head_buf = qemu_blockalign(bs, align);
        qemu_iovec_init(&local_qiov, qiov->niov + 2);
        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
        use_local_qiov = true;
static void bdrv_padding_destroy(BdrvRequestPadding *pad)
{
    if (pad->buf) {
        qemu_vfree(pad->buf);
        qemu_iovec_destroy(&pad->local_qiov);
    }
}

        bytes += offset & (align - 1);
        offset = offset & ~(align - 1);
/*
 * bdrv_pad_request
 *
 * Exchange request parameters with padded request if needed. Don't include RMW
 * read of padding, bdrv_padding_rmw_read() should be called separately if
 * needed.
 *
 * All parameters except @bs are in-out: they represent original request at
 * function call and padded (if padding needed) at function finish.
 *
 * Function always succeeds.
 */
static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
                             int64_t *offset, unsigned int *bytes,
                             BdrvRequestPadding *pad)
{
    if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
        return false;
    }

    if ((offset + bytes) & (align - 1)) {
        if (!use_local_qiov) {
            qemu_iovec_init(&local_qiov, qiov->niov + 1);
            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
            use_local_qiov = true;
    qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
                             *qiov, 0, *bytes,
                             pad->buf + pad->buf_len - pad->tail, pad->tail);
    *bytes += pad->head + pad->tail;
    *offset -= pad->head;
    *qiov = &pad->local_qiov;

    return true;
}

int coroutine_fn bdrv_co_preadv(BdrvChild *child,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags)
{
    BlockDriverState *bs = child->bs;
    BdrvTrackedRequest req;
    BdrvRequestPadding pad;
    int ret;

    trace_bdrv_co_preadv(bs, offset, bytes, flags);

    ret = bdrv_check_byte_request(bs, offset, bytes);
    if (ret < 0) {
        return ret;
    }
        tail_buf = qemu_blockalign(bs, align);
        qemu_iovec_add(&local_qiov, tail_buf,
                       align - ((offset + bytes) & (align - 1)));

        bytes = ROUND_UP(bytes, align);
    bdrv_inc_in_flight(bs);

    /* Don't do copy-on-read if we read data before write operation */
    if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
        flags |= BDRV_REQ_COPY_ON_READ;
    }

    bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);

    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
    ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
    ret = bdrv_aligned_preadv(child, &req, offset, bytes,
                              bs->bl.request_alignment,
                              qiov, flags);
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);

    if (use_local_qiov) {
        qemu_iovec_destroy(&local_qiov);
        qemu_vfree(head_buf);
        qemu_vfree(tail_buf);
    }
    bdrv_padding_destroy(&pad);

    return ret;
}
@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
                                                BdrvTrackedRequest *req)
{
    BlockDriverState *bs = child->bs;
    uint8_t *buf = NULL;
    QEMUIOVector local_qiov;
    uint64_t align = bs->bl.request_alignment;
    unsigned int head_padding_bytes, tail_padding_bytes;
    int ret = 0;
    bool padding;
    BdrvRequestPadding pad;

    head_padding_bytes = offset & (align - 1);
    tail_padding_bytes = (align - (offset + bytes)) & (align - 1);


    assert(flags & BDRV_REQ_ZERO_WRITE);
    if (head_padding_bytes || tail_padding_bytes) {
        buf = qemu_blockalign(bs, align);
        qemu_iovec_init_buf(&local_qiov, buf, align);
    }
    if (head_padding_bytes) {
        uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);

        /* RMW the unaligned part before head. */
    padding = bdrv_init_padding(bs, offset, bytes, &pad);
    if (padding) {
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
        ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        memset(buf + head_padding_bytes, 0, zero_bytes);
        ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
        bdrv_padding_rmw_read(child, req, &pad, true);

        if (pad.head || pad.merge_reads) {
            int64_t aligned_offset = offset & ~(align - 1);
            int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;

            qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
            ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
                                       align, &local_qiov,
                                       flags & ~BDRV_REQ_ZERO_WRITE);
        if (ret < 0) {
            goto fail;
            if (ret < 0 || pad.merge_reads) {
                /* Error or all work is done */
                goto out;
            }
            offset += write_bytes - pad.head;
            bytes -= write_bytes - pad.head;
        }
        offset += zero_bytes;
        bytes -= zero_bytes;
    }

    assert(!bytes || (offset & (align - 1)) == 0);
@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
        ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
                                   NULL, flags);
        if (ret < 0) {
            goto fail;
            goto out;
        }
        bytes -= aligned_bytes;
        offset += aligned_bytes;
@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,

    assert(!bytes || (offset & (align - 1)) == 0);
    if (bytes) {
        assert(align == tail_padding_bytes + bytes);
        /* RMW the unaligned part after tail. */
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        ret = bdrv_aligned_preadv(child, req, offset, align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
        assert(align == pad.tail + bytes);

        memset(buf, 0, bytes);
        qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
        ret = bdrv_aligned_pwritev(child, req, offset, align, align,
                                   &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
    }
fail:
    qemu_vfree(buf);
    return ret;

out:
    bdrv_padding_destroy(&pad);

    return ret;
}

/*
@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
    BlockDriverState *bs = child->bs;
    BdrvTrackedRequest req;
    uint64_t align = bs->bl.request_alignment;
    uint8_t *head_buf = NULL;
    uint8_t *tail_buf = NULL;
    QEMUIOVector local_qiov;
    bool use_local_qiov = false;
    BdrvRequestPadding pad;
    int ret;

    trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        goto out;
    }

    if (offset & (align - 1)) {
        QEMUIOVector head_qiov;

    if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
        mark_request_serialising(&req, align);
        wait_serialising_requests(&req);

        head_buf = qemu_blockalign(bs, align);
        qemu_iovec_init_buf(&head_qiov, head_buf, align);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
        ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
                                  align, &head_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        qemu_iovec_init(&local_qiov, qiov->niov + 2);
        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
        use_local_qiov = true;

        bytes += offset & (align - 1);
        offset = offset & ~(align - 1);

        /* We have read the tail already if the request is smaller
         * than one aligned block.
         */
        if (bytes < align) {
            qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
            bytes = align;
        }
    }

    if ((offset + bytes) & (align - 1)) {
        QEMUIOVector tail_qiov;
        size_t tail_bytes;
        bool waited;

        mark_request_serialising(&req, align);
        waited = wait_serialising_requests(&req);
        assert(!waited || !use_local_qiov);

        tail_buf = qemu_blockalign(bs, align);
        qemu_iovec_init_buf(&tail_qiov, tail_buf, align);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
                                  align, align, &tail_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);

        if (!use_local_qiov) {
            qemu_iovec_init(&local_qiov, qiov->niov + 1);
            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
            use_local_qiov = true;
        }

        tail_bytes = (offset + bytes) & (align - 1);
        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);

        bytes = ROUND_UP(bytes, align);
        bdrv_padding_rmw_read(child, &req, &pad, false);
    }

    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);
                               qiov, flags);

fail:
    bdrv_padding_destroy(&pad);

    if (use_local_qiov) {
        qemu_iovec_destroy(&local_qiov);
    }
    qemu_vfree(head_buf);
    qemu_vfree(tail_buf);
out:
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);

    return ret;
}