Commit 54b89db5 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging



Pull request

# gpg: Signature made Tue 27 Aug 2019 21:16:27 BST
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block/qcow2: implement .bdrv_co_pwritev(_compressed)_part
  block/qcow2: implement .bdrv_co_preadv_part
  block/qcow2: refactor qcow2_co_preadv to use buffer-based io
  block/io: introduce bdrv_co_p{read, write}v_part
  block/io: bdrv_aligned_pwritev: use and support qiov_offset
  block/io: bdrv_aligned_preadv: use and support qiov_offset
  block/io: bdrv_co_do_copy_on_readv: lazy allocation
  block/io: bdrv_co_do_copy_on_readv: use and support qiov_offset
  block: define .*_part io handlers in BlockDriver
  block/io: refactor padding
  util/iov: improve qemu_iovec_is_zero
  util/iov: introduce qemu_iovec_init_extended

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents d39b6263 5396234b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -674,7 +674,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        return NULL;
    }

    if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
    if (compress && !block_driver_can_compress(target->drv)) {
        error_setg(errp, "Compression is not supported for this drive %s",
                   bdrv_get_device_name(target));
        return NULL;
+324 −217

File changed.

Preview size limit exceeded, changes collapsed.

+9 −5
Original line number Diff line number Diff line
@@ -452,8 +452,9 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
     * interface.  This avoids double I/O throttling and request tracking,
     * which can lead to deadlock when block layer copy-on-read is enabled.
     */
    ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
                                  qiov->size, qiov, 0);
    ret = bs->drv->bdrv_co_preadv_part(bs,
                                       src_cluster_offset + offset_in_cluster,
                                       qiov->size, qiov, 0, 0);
    if (ret < 0) {
        return ret;
    }
@@ -828,7 +829,6 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
    assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
    assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
    assert(start->offset + start->nb_bytes <= end->offset);
    assert(!m->data_qiov || m->data_qiov->size == data_bytes);

    if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
        return 0;
@@ -860,7 +860,11 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
    /* The part of the buffer where the end region is located */
    end_buffer = start_buffer + buffer_size - end->nb_bytes;

    qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
    qemu_iovec_init(&qiov, 2 + (m->data_qiov ?
                                qemu_iovec_subvec_niov(m->data_qiov,
                                                       m->data_qiov_offset,
                                                       data_bytes)
                                : 0));

    qemu_co_mutex_unlock(&s->lock);
    /* First we read the existing data from both COW regions. We
@@ -903,7 +907,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
        if (start->nb_bytes) {
            qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
        }
        qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
        qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes);
        if (end->nb_bytes) {
            qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
        }
+65 −66
Original line number Diff line number Diff line
@@ -76,7 +76,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
                           uint64_t file_cluster_offset,
                           uint64_t offset,
                           uint64_t bytes,
                           QEMUIOVector *qiov);
                           QEMUIOVector *qiov,
                           size_t qiov_offset);

static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
{
@@ -1967,21 +1968,18 @@ out:
    return ret;
}

static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                                        uint64_t bytes, QEMUIOVector *qiov,
                                        int flags)
static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
                                             uint64_t offset, uint64_t bytes,
                                             QEMUIOVector *qiov,
                                             size_t qiov_offset, int flags)
{
    BDRVQcow2State *s = bs->opaque;
    int offset_in_cluster;
    int ret;
    unsigned int cur_bytes; /* number of bytes in current iteration */
    uint64_t cluster_offset = 0;
    uint64_t bytes_done = 0;
    QEMUIOVector hd_qiov;
    uint8_t *cluster_data = NULL;

    qemu_iovec_init(&hd_qiov, qiov->niov);

    while (bytes != 0) {

        /* prepare next request */
@@ -2000,34 +1998,31 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,

        offset_in_cluster = offset_into_cluster(s, offset);

        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);

        switch (ret) {
        case QCOW2_CLUSTER_UNALLOCATED:

            if (bs->backing) {
                BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
                                     &hd_qiov, 0);
                ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes,
                                          qiov, qiov_offset, 0);
                if (ret < 0) {
                    goto fail;
                }
            } else {
                /* Note: in this case, no need to wait */
                qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
                qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
            }
            break;

        case QCOW2_CLUSTER_ZERO_PLAIN:
        case QCOW2_CLUSTER_ZERO_ALLOC:
            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
            qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
            break;

        case QCOW2_CLUSTER_COMPRESSED:
            ret = qcow2_co_preadv_compressed(bs, cluster_offset,
                                             offset, cur_bytes,
                                             &hd_qiov);
                                             qiov, qiov_offset);
            if (ret < 0) {
                goto fail;
            }
@@ -2059,19 +2054,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                }

                assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                qemu_iovec_reset(&hd_qiov);
                qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
            }

                BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
            ret = bdrv_co_preadv(s->data_file,
                ret = bdrv_co_pread(s->data_file,
                                    cluster_offset + offset_in_cluster,
                                 cur_bytes, &hd_qiov, 0);
                                    cur_bytes, cluster_data, 0);
                if (ret < 0) {
                    goto fail;
                }
            if (bs->encrypted) {
                assert(s->crypto);

                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
                if (qcow2_co_decrypt(bs, cluster_offset, offset,
@@ -2079,7 +2070,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                    ret = -EIO;
                    goto fail;
                }
                qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
                qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes);
            } else {
                BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
                ret = bdrv_co_preadv_part(s->data_file,
                                          cluster_offset + offset_in_cluster,
                                          cur_bytes, qiov, qiov_offset, 0);
                if (ret < 0) {
                    goto fail;
                }
            }
            break;

@@ -2091,12 +2090,11 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,

        bytes -= cur_bytes;
        offset += cur_bytes;
        bytes_done += cur_bytes;
        qiov_offset += cur_bytes;
    }
    ret = 0;

fail:
    qemu_iovec_destroy(&hd_qiov);
    qemu_vfree(cluster_data);

    return ret;
@@ -2105,7 +2103,8 @@ fail:
/* Check if it's possible to merge a write request with the writing of
 * the data from the COW regions */
static bool merge_cow(uint64_t offset, unsigned bytes,
                      QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
                      QEMUIOVector *qiov, size_t qiov_offset,
                      QCowL2Meta *l2meta)
{
    QCowL2Meta *m;

@@ -2134,11 +2133,12 @@ static bool merge_cow(uint64_t offset, unsigned bytes,

        /* Make sure that adding both COW regions to the QEMUIOVector
         * does not exceed IOV_MAX */
        if (hd_qiov->niov > IOV_MAX - 2) {
        if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
            continue;
        }

        m->data_qiov = hd_qiov;
        m->data_qiov = qiov;
        m->data_qiov_offset = qiov_offset;
        return true;
    }

@@ -2220,24 +2220,22 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
    return 0;
}

static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                         uint64_t bytes, QEMUIOVector *qiov,
                                         int flags)
static coroutine_fn int qcow2_co_pwritev_part(
        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
    BDRVQcow2State *s = bs->opaque;
    int offset_in_cluster;
    int ret;
    unsigned int cur_bytes; /* number of sectors in current iteration */
    uint64_t cluster_offset;
    QEMUIOVector hd_qiov;
    QEMUIOVector encrypted_qiov;
    uint64_t bytes_done = 0;
    uint8_t *cluster_data = NULL;
    QCowL2Meta *l2meta = NULL;

    trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);

    qemu_iovec_init(&hd_qiov, qiov->niov);

    qemu_co_mutex_lock(&s->lock);

    while (bytes != 0) {
@@ -2270,9 +2268,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,

        qemu_co_mutex_unlock(&s->lock);

        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);

        if (bs->encrypted) {
            assert(s->crypto);
            if (!cluster_data) {
@@ -2285,9 +2280,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                }
            }

            assert(hd_qiov.size <=
                   QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
            qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
            assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
            qemu_iovec_to_buf(qiov, qiov_offset + bytes_done,
                              cluster_data, cur_bytes);

            if (qcow2_co_encrypt(bs, cluster_offset, offset,
                                 cluster_data, cur_bytes) < 0) {
@@ -2295,8 +2290,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                goto out_unlocked;
            }

            qemu_iovec_reset(&hd_qiov);
            qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
            qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes);
        }

        /* Try to efficiently initialize the physical space with zeroes */
@@ -2309,13 +2303,17 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         * writing of the guest data together with that of the COW regions.
         * If it's not possible (or not necessary) then write the
         * guest data now. */
        if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
        if (!merge_cow(offset, cur_bytes,
                       bs->encrypted ? &encrypted_qiov : qiov,
                       bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta))
        {
            BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
            trace_qcow2_writev_data(qemu_coroutine_self(),
                                    cluster_offset + offset_in_cluster);
            ret = bdrv_co_pwritev(s->data_file,
                                  cluster_offset + offset_in_cluster,
                                  cur_bytes, &hd_qiov, 0);
            ret = bdrv_co_pwritev_part(
                    s->data_file, cluster_offset + offset_in_cluster, cur_bytes,
                    bs->encrypted ? &encrypted_qiov : qiov,
                    bs->encrypted ? 0 : qiov_offset + bytes_done, 0);
            if (ret < 0) {
                goto out_unlocked;
            }
@@ -2344,7 +2342,6 @@ out_locked:

    qemu_co_mutex_unlock(&s->lock);

    qemu_iovec_destroy(&hd_qiov);
    qemu_vfree(cluster_data);
    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);

@@ -4009,8 +4006,9 @@ fail:
/* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
static coroutine_fn int
qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
                            uint64_t bytes, QEMUIOVector *qiov)
qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
                                 uint64_t offset, uint64_t bytes,
                                 QEMUIOVector *qiov, size_t qiov_offset)
{
    BDRVQcow2State *s = bs->opaque;
    int ret;
@@ -4047,7 +4045,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        /* Zero-pad last write if image size is not cluster aligned */
        memset(buf + bytes, 0, s->cluster_size - bytes);
    }
    qemu_iovec_to_buf(qiov, 0, buf, bytes);
    qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);

    out_buf = g_malloc(s->cluster_size);

@@ -4055,7 +4053,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
                                buf, s->cluster_size);
    if (out_len == -ENOMEM) {
        /* could not compress: write normal cluster */
        ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
        ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
        if (ret < 0) {
            goto fail;
        }
@@ -4097,7 +4095,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
                           uint64_t file_cluster_offset,
                           uint64_t offset,
                           uint64_t bytes,
                           QEMUIOVector *qiov)
                           QEMUIOVector *qiov,
                           size_t qiov_offset)
{
    BDRVQcow2State *s = bs->opaque;
    int ret = 0, csize, nb_csectors;
@@ -4128,7 +4127,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
        goto fail;
    }

    qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
    qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);

fail:
    qemu_vfree(out_buf);
@@ -4665,8 +4664,8 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
    BDRVQcow2State *s = bs->opaque;

    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
    return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
                                    qiov->size, qiov, 0);
    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
                                         qiov->size, qiov, 0, 0);
}

static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
@@ -4675,8 +4674,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
    BDRVQcow2State *s = bs->opaque;

    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
    return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
                                   qiov->size, qiov, 0);
    return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
                                        qiov->size, qiov, 0, 0);
}

/*
@@ -5218,8 +5217,8 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
    .bdrv_co_block_status = qcow2_co_block_status,

    .bdrv_co_preadv         = qcow2_co_preadv,
    .bdrv_co_pwritev        = qcow2_co_pwritev,
    .bdrv_co_preadv_part    = qcow2_co_preadv_part,
    .bdrv_co_pwritev_part   = qcow2_co_pwritev_part,
    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,

    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
@@ -5227,7 +5226,7 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
    .bdrv_co_truncate       = qcow2_co_truncate,
    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
    .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
    .bdrv_make_empty        = qcow2_make_empty,

    .bdrv_snapshot_create   = qcow2_snapshot_create,
+1 −0
Original line number Diff line number Diff line
@@ -420,6 +420,7 @@ typedef struct QCowL2Meta
     * from @cow_start and @cow_end into one single write operation.
     */
    QEMUIOVector *data_qiov;
    size_t data_qiov_offset;

    /** Pointer to next L2Meta of the same write request */
    struct QCowL2Meta *next;
Loading