Commit aaffb853 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-10-28' into staging



Block patches for softfreeze:
- iotest patches
- Improve performance of the mirror block job in write-blocking mode
- Limit memory usage for the backup block job
- Add discard and write-zeroes support to the NVMe host block driver
- Fix a bug in the mirror job
- Prevent the qcow2 driver from creating technically non-compliant qcow2
  v3 images (where there is not enough extra data for snapshot table
  entries)
- Allow callers of bdrv_truncate() (etc.) to determine whether the file
  must be resized to the exact given size or whether it is OK for block
  devices not to shrink

# gpg: Signature made Mon 28 Oct 2019 12:13:53 GMT
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-10-28: (69 commits)
  qemu-iotests: restrict 264 to qcow2 only
  Revert "qemu-img: Check post-truncation size"
  block: Pass truncate exact=true where reasonable
  block: Let format drivers pass @exact
  block: Evaluate @exact in protocol drivers
  block: Add @exact parameter to bdrv_co_truncate()
  block: Do not truncate file node when formatting
  block/cor: Drop cor_co_truncate()
  block: Handle filter truncation like native impl.
  iotests: Test qcow2's snapshot table handling
  iotests: Add peek_file* functions
  qcow2: Fix v3 snapshot table entry compliancy
  qcow2: Repair snapshot table with too many entries
  qcow2: Fix overly long snapshot tables
  qcow2: Keep track of the snapshot table length
  qcow2: Fix broken snapshot table entries
  qcow2: Add qcow2_check_fix_snapshot_table()
  qcow2: Separate qcow2_check_read_snapshot_table()
  qcow2: Write v3-compliant snapshot list on upgrade
  qcow2: Put qcow2_upgrade() into its own function
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 9bb73502 ba9c4513
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -1178,8 +1178,9 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
    return ret;
}

int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
                                     unsigned int bytes,
                                     QEMUIOVector *qiov, size_t qiov_offset,
                                     BdrvRequestFlags flags)
{
    int ret;
@@ -1207,11 +1208,19 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
        flags |= BDRV_REQ_FUA;
    }

    ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
    ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
                               flags);
    bdrv_dec_in_flight(bs);
    return ret;
}

int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
{
    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
}

typedef struct BlkRwCo {
    BlockBackend *blk;
    int64_t offset;
@@ -2063,15 +2072,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
}

int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
                 Error **errp)
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
                 PreallocMode prealloc, Error **errp)
{
    if (!blk_is_available(blk)) {
        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

    return bdrv_truncate(blk->root, offset, prealloc, errp);
    return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
}

static void blk_pdiscard_entry(void *opaque)
+94 −88
Original line number Diff line number Diff line
@@ -18,6 +18,11 @@
#include "qapi/error.h"
#include "block/block-copy.h"
#include "sysemu/block-backend.h"
#include "qemu/units.h"

#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
#define BLOCK_COPY_MAX_MEM (128 * MiB)

static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
                                                       int64_t start,
@@ -61,6 +66,7 @@ void block_copy_state_free(BlockCopyState *s)
    }

    bdrv_release_dirty_bitmap(s->copy_bitmap);
    shres_destroy(s->mem);
    g_free(s);
}

@@ -71,7 +77,8 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
    BlockCopyState *s;
    BdrvDirtyBitmap *copy_bitmap;
    uint32_t max_transfer =
            MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer,
            MIN_NON_ZERO(INT_MAX,
                         MIN_NON_ZERO(source->bs->bl.max_transfer,
                                      target->bs->bl.max_transfer));

    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
@@ -89,19 +96,31 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
        .cluster_size = cluster_size,
        .len = bdrv_dirty_bitmap_size(copy_bitmap),
        .write_flags = write_flags,
        .mem = shres_create(BLOCK_COPY_MAX_MEM),
    };

    s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
    if (max_transfer < cluster_size) {
        /*
     * Set use_copy_range, consider the following:
     * 1. Compression is not supported for copy_range.
     * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
     *    that in here. If max_transfer is smaller than the job->cluster_size,
     *    we do not use copy_range (in that case it's zero after aligning down
     *    above).
         * copy_range does not respect max_transfer. We don't want to bother
         * with requests smaller than block-copy cluster size, so fallback to
         * buffered copying (read and write respect max_transfer on their
         * behalf).
         */
    s->use_copy_range =
        !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
        s->use_copy_range = false;
        s->copy_size = cluster_size;
    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
        /* Compression is not supported for copy_range */
        s->use_copy_range = false;
        s->copy_size = MAX(cluster_size, BLOCK_COPY_MAX_BUFFER);
    } else {
        /*
         * copy_range does not respect max_transfer (it's a TODO), so we factor
         * that in here.
         */
        s->use_copy_range = true;
        s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
                           QEMU_ALIGN_DOWN(max_transfer, cluster_size));
    }

    QLIST_INIT(&s->inflight_reqs);

@@ -120,81 +139,73 @@ void block_copy_set_callbacks(
}

/*
 * Copy range to target with a bounce buffer and return the bytes copied. If
 * error occurred, return a negative error number
 * block_copy_do_copy
 *
 * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
 * cover last cluster when s->len is not aligned to clusters.
 *
 * No sync here: nor bitmap neighter intersecting requests handling, only copy.
 *
 * Returns 0 on success.
 */
static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
                                                      int64_t start,
                                                      int64_t end,
                                                      bool *error_is_read,
                                                      void **bounce_buffer)
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
                                           int64_t start, int64_t end,
                                           bool *error_is_read)
{
    int ret;
    int nbytes;
    int nbytes = MIN(end, s->len) - start;
    void *bounce_buffer = NULL;

    assert(QEMU_IS_ALIGNED(start, s->cluster_size));
    bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
    nbytes = MIN(s->cluster_size, s->len - start);
    if (!*bounce_buffer) {
        *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
    assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));

    if (s->use_copy_range) {
        ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
                                 0, s->write_flags);
        if (ret < 0) {
            trace_block_copy_copy_range_fail(s, start, ret);
            s->use_copy_range = false;
            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
            /* Fallback to read+write with allocated buffer */
        } else {
            goto out;
        }
    }

    ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0);
    /*
     * In case of failed copy_range request above, we may proceed with buffered
     * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
     * be properly limited, so don't care too much.
     */

    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);

    ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
    if (ret < 0) {
        trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
        trace_block_copy_read_fail(s, start, ret);
        if (error_is_read) {
            *error_is_read = true;
        }
        goto fail;
        goto out;
    }

    ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer,
    ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
                         s->write_flags);
    if (ret < 0) {
        trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
        trace_block_copy_write_fail(s, start, ret);
        if (error_is_read) {
            *error_is_read = false;
        }
        goto fail;
        goto out;
    }

    return nbytes;
fail:
    bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
    return ret;

}

/*
 * Copy range to target and return the bytes copied. If error occurred, return a
 * negative error number.
 */
static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
                                                int64_t start,
                                                int64_t end)
{
    int ret;
    int nr_clusters;
    int nbytes;
out:
    qemu_vfree(bounce_buffer);

    assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
    assert(QEMU_IS_ALIGNED(start, s->cluster_size));
    nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
    nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
    bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
                            s->cluster_size * nr_clusters);
    ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
                             0, s->write_flags);
    if (ret < 0) {
        trace_block_copy_with_offload_fail(s, start, ret);
        bdrv_set_dirty_bitmap(s->copy_bitmap, start,
                              s->cluster_size * nr_clusters);
    return ret;
}

    return nbytes;
}

/*
 * Check if the cluster starting at offset is allocated or not.
 * return via pnum the number of contiguous clusters sharing this allocation.
@@ -271,7 +282,6 @@ int coroutine_fn block_copy(BlockCopyState *s,
{
    int ret = 0;
    int64_t end = bytes + start; /* bytes */
    void *bounce_buffer = NULL;
    int64_t status_bytes;
    BlockCopyInFlightReq req;

@@ -289,7 +299,7 @@ int coroutine_fn block_copy(BlockCopyState *s,
    block_copy_inflight_req_begin(s, &req, start, end);

    while (start < end) {
        int64_t dirty_end;
        int64_t next_zero, chunk_end;

        if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
            trace_block_copy_skip(s, start);
@@ -297,10 +307,14 @@ int coroutine_fn block_copy(BlockCopyState *s,
            continue; /* already copied */
        }

        dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
                                                (end - start));
        if (dirty_end < 0) {
            dirty_end = end;
        chunk_end = MIN(end, start + s->copy_size);

        next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
                                                chunk_end - start);
        if (next_zero >= 0) {
            assert(next_zero > start); /* start is dirty */
            assert(next_zero < chunk_end); /* no need to do MIN() */
            chunk_end = next_zero;
        }

        if (s->skip_unallocated) {
@@ -311,34 +325,26 @@ int coroutine_fn block_copy(BlockCopyState *s,
                continue;
            }
            /* Clamp to known allocated region */
            dirty_end = MIN(dirty_end, start + status_bytes);
            chunk_end = MIN(chunk_end, start + status_bytes);
        }

        trace_block_copy_process(s, start);

        if (s->use_copy_range) {
            ret = block_copy_with_offload(s, start, dirty_end);
            if (ret < 0) {
                s->use_copy_range = false;
            }
        }
        if (!s->use_copy_range) {
            ret = block_copy_with_bounce_buffer(s, start, dirty_end,
                                                error_is_read, &bounce_buffer);
        }
        bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);

        co_get_from_shres(s->mem, chunk_end - start);
        ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
        co_put_to_shres(s->mem, chunk_end - start);
        if (ret < 0) {
            bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
            break;
        }

        start += ret;
        s->progress_bytes_callback(ret, s->progress_opaque);
        s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
        start = chunk_end;
        ret = 0;
    }

    if (bounce_buffer) {
        qemu_vfree(bounce_buffer);
    }

    block_copy_inflight_req_end(&req);

    return ret;
+3 −2
Original line number Diff line number Diff line
@@ -155,7 +155,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
    }

    if (base_len < len) {
        ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL);
        ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL);
        if (ret) {
            goto out;
        }
@@ -471,7 +471,8 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
        ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
        ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF,
                           &local_err);
        if (ret < 0) {
            error_report_err(local_err);
            goto ro_cleanup;
+0 −8
Original line number Diff line number Diff line
@@ -73,13 +73,6 @@ static int64_t cor_getlength(BlockDriverState *bs)
}


static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset,
                                        PreallocMode prealloc, Error **errp)
{
    return bdrv_co_truncate(bs->file, offset, prealloc, errp);
}


static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
                                      uint64_t offset, uint64_t bytes,
                                      QEMUIOVector *qiov, int flags)
@@ -139,7 +132,6 @@ static BlockDriver bdrv_copy_on_read = {
    .bdrv_child_perm                    = cor_child_perm,

    .bdrv_getlength                     = cor_getlength,
    .bdrv_co_truncate                   = cor_co_truncate,

    .bdrv_co_preadv                     = cor_co_preadv,
    .bdrv_co_pwritev                    = cor_co_pwritev,
+4 −4
Original line number Diff line number Diff line
@@ -113,8 +113,8 @@ static ssize_t block_crypto_init_func(QCryptoBlock *block,
     * available to the guest, so we must take account of that
     * which will be used by the crypto header
     */
    return blk_truncate(data->blk, data->size + headerlen, data->prealloc,
                        errp);
    return blk_truncate(data->blk, data->size + headerlen, false,
                        data->prealloc, errp);
}


@@ -297,7 +297,7 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
}

static int coroutine_fn
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset,
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
                         PreallocMode prealloc, Error **errp)
{
    BlockCrypto *crypto = bs->opaque;
@@ -311,7 +311,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset,

    offset += payload_offset;

    return bdrv_co_truncate(bs->file, offset, prealloc, errp);
    return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
}

static void block_crypto_close(BlockDriverState *bs)
Loading