Commit e5714b5b authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-05-28' into staging



Block patches:
- qcow2: Use threads for encrypted I/O
- qemu-img rebase: Optimizations
- backup job: Allow any source node, and some refactoring
- Some general simplifications in the block layer

# gpg: Signature made Tue 28 May 2019 20:26:56 BST
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-05-28: (21 commits)
  blockdev: loosen restrictions on drive-backup source node
  qcow2-bitmap: initialize bitmap directory alignment
  qcow2: skip writing zero buffers to empty COW areas
  qemu-img: rebase: Reuse in-chain BlockDriverState
  qemu-img: rebase: Reduce reads on in-chain rebase
  qemu-img: rebase: Reuse parent BlockDriverState
  block: Make bdrv_root_attach_child() unref child_bs on failure
  block: Use bdrv_unref_child() for all children in bdrv_close()
  block/backup: refactor: split out backup_calculate_cluster_size
  block/backup: unify different modes code path
  block/backup: refactor and tolerate unallocated cluster skipping
  block/backup: move to copy_bitmap with granularity
  block/backup: simplify backup_incremental_init_copy_bitmap
  qcow2: do encryption in threads
  qcow2: bdrv_co_pwritev: move encryption code out of the lock
  qcow2: qcow2_co_preadv: improve locking
  qcow2-threads: split out generic path
  qcow2-threads: qcow2_co_do_compress: protect queuing by mutex
  qcow2-threads: use thread_pool_submit_co
  qcow2: add separate file for threaded data processing functions
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 8c1ecb59 a2d665c1
Loading
Loading
Loading
Loading
+21 −25
Original line number Diff line number Diff line
@@ -2243,6 +2243,13 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
    }
}

/*
 * This function steals the reference to child_bs from the caller.
 * That reference is later dropped by bdrv_root_unref_child().
 *
 * On failure NULL is returned, errp is set and the reference to
 * child_bs is also dropped.
 */
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                  const char *child_name,
                                  const BdrvChildRole *child_role,
@@ -2255,6 +2262,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
    ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
    if (ret < 0) {
        bdrv_abort_perm_update(child_bs);
        bdrv_unref(child_bs);
        return NULL;
    }

@@ -2274,6 +2282,14 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
    return child;
}

/*
 * This function transfers the reference to child_bs from the caller
 * to parent_bs. That reference is later dropped by parent_bs on
 * bdrv_close() or if someone calls bdrv_unref_child().
 *
 * On failure NULL is returned, errp is set and the reference to
 * child_bs is also dropped.
 */
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                             BlockDriverState *child_bs,
                             const char *child_name,
@@ -2401,12 +2417,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
    /* If backing_hd was already part of bs's backing chain, and
     * inherits_from pointed recursively to bs then let's update it to
     * point directly to bs (else it will become NULL). */
    if (update_inherits_from) {
    if (bs->backing && update_inherits_from) {
        backing_hd->inherits_from = bs;
    }
    if (!bs->backing) {
        bdrv_unref(backing_hd);
    }

out:
    bdrv_refresh_limits(bs, NULL);
@@ -2594,7 +2607,6 @@ BdrvChild *bdrv_open_child(const char *filename,
                           const BdrvChildRole *child_role,
                           bool allow_none, Error **errp)
{
    BdrvChild *c;
    BlockDriverState *bs;

    bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -2603,13 +2615,7 @@ BdrvChild *bdrv_open_child(const char *filename,
        return NULL;
    }

    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
    if (!c) {
        bdrv_unref(bs);
        return NULL;
    }

    return c;
    return bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
}

/* TODO Future callers may need to specify parent/child_role in order for
@@ -3877,22 +3883,12 @@ static void bdrv_close(BlockDriverState *bs)
        bs->drv = NULL;
    }

    bdrv_set_backing_hd(bs, NULL, &error_abort);

    if (bs->file != NULL) {
        bdrv_unref_child(bs, bs->file);
        bs->file = NULL;
    }

    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
        /* TODO Remove bdrv_unref() from drivers' close function and use
         * bdrv_unref_child() here */
        if (child->bs->inherits_from == bs) {
            child->bs->inherits_from = NULL;
        }
        bdrv_detach_child(child);
        bdrv_unref_child(bs, child);
    }

    bs->backing = NULL;
    bs->file = NULL;
    g_free(bs->opaque);
    bs->opaque = NULL;
    atomic_set(&bs->copy_on_read, 0);
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o
block-obj-$(CONFIG_VVFAT) += vvfat.o
block-obj-$(CONFIG_DMG) += dmg.o

block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-$(CONFIG_QED) += qed-check.o
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
+105 −138
Original line number Diff line number Diff line
@@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
    int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
    int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;

    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
    hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
    nbytes = MIN(job->cluster_size, job->len - start);
    if (!*bounce_buffer) {
        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,

    return nbytes;
fail:
    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
    hbitmap_set(job->copy_bitmap, start, job->cluster_size);
    return ret;

}
@@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
    int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;

    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
    nbytes = MIN(job->copy_range_size, end - start);
    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
                  nr_clusters);
    hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
                            read_flags, write_flags);
    if (ret < 0) {
        trace_backup_do_cow_copy_range_fail(job, start, ret);
        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
                    nr_clusters);
        hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
        return ret;
    }

@@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
    cow_request_begin(&cow_request, job, start, end);

    while (start < end) {
        if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
        if (!hbitmap_get(job->copy_bitmap, start)) {
            trace_backup_do_cow_skip(job, start);
            start += job->cluster_size;
            continue; /* already copied */
@@ -298,12 +298,16 @@ static void backup_clean(Job *job)
    assert(s->target);
    blk_unref(s->target);
    s->target = NULL;

    if (s->copy_bitmap) {
        hbitmap_free(s->copy_bitmap);
        s->copy_bitmap = NULL;
    }
}

void backup_do_checkpoint(BlockJob *job, Error **errp)
{
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
    int64_t len;

    assert(block_job_driver(job) == &backup_job_driver);

@@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
        return;
    }

    len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
    hbitmap_set(backup_job->copy_bitmap, 0, len);
    hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
}

static void backup_drain(BlockJob *job)
@@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
    return false;
}

static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
static bool bdrv_is_unallocated_range(BlockDriverState *bs,
                                      int64_t offset, int64_t bytes)
{
    int64_t end = offset + bytes;

    while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
        if (bytes == 0) {
            return true;
        }
        offset += bytes;
        bytes = end - offset;
    }

    return offset >= end;
}

static int coroutine_fn backup_loop(BackupBlockJob *job)
{
    int ret;
    bool error_is_read;
    int64_t cluster;
    int64_t offset;
    HBitmapIter hbi;
    BlockDriverState *bs = blk_bs(job->common.blk);

    hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
    while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
    while ((offset = hbitmap_iter_next(&hbi)) != -1) {
        if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
            bdrv_is_unallocated_range(bs, offset, job->cluster_size))
        {
            hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
            continue;
        }

        do {
            if (yield_and_check(job)) {
                return 0;
            }
            ret = backup_do_cow(job, cluster * job->cluster_size,
            ret = backup_do_cow(job, offset,
                                job->cluster_size, &error_is_read, false);
            if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
                           BLOCK_ERROR_ACTION_REPORT)
@@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* init copy_bitmap from sync_bitmap */
static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
{
    BdrvDirtyBitmapIter *dbi;
    int64_t offset;
    int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
                               job->cluster_size);

    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
    while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
        int64_t cluster = offset / job->cluster_size;
        int64_t next_cluster;

        offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
        if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
            break;
        }
    uint64_t offset = 0;
    uint64_t bytes = job->len;

        offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset,
                                             UINT64_MAX);
        if (offset == -1) {
            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
            break;
        }
    while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
                                             &offset, &bytes))
    {
        hbitmap_set(job->copy_bitmap, offset, bytes);

        next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
        hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
        if (next_cluster >= end) {
        offset += bytes;
        if (offset >= job->len) {
            break;
        }

        bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
        bytes = job->len - offset;
    }

    /* TODO job_progress_set_remaining() would make more sense */
    job_progress_update(&job->common.job,
        job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);

    bdrv_dirty_iter_free(dbi);
        job->len - hbitmap_count(job->copy_bitmap));
}

static int coroutine_fn backup_run(Job *job, Error **errp)
{
    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
    BlockDriverState *bs = blk_bs(s->common.blk);
    int64_t offset, nb_clusters;
    int ret = 0;

    QLIST_INIT(&s->inflight_reqs);
    qemu_co_rwlock_init(&s->flush_rwlock);

    nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
    job_progress_set_remaining(job, s->len);

    s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
    if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        backup_incremental_init_copy_bitmap(s);
    } else {
        hbitmap_set(s->copy_bitmap, 0, nb_clusters);
        hbitmap_set(s->copy_bitmap, 0, s->len);
    }


    s->before_write.notify = backup_before_write_notify;
    bdrv_add_before_write_notifier(bs, &s->before_write);

@@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
             * notify callback service CoW requests. */
            job_yield(job);
        }
    } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        ret = backup_run_incremental(s);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
        for (offset = 0; offset < s->len;
             offset += s->cluster_size) {
            bool error_is_read;
            int alloced = 0;

            if (yield_and_check(s)) {
                break;
            }

            if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
                int i;
                int64_t n;

                /* Check to see if these blocks are already in the
                 * backing file. */

                for (i = 0; i < s->cluster_size;) {
                    /* bdrv_is_allocated() only returns true/false based
                     * on the first set of sectors it comes across that
                     * are are all in the same state.
                     * For that reason we must verify each sector in the
                     * backup cluster length.  We end up copying more than
                     * needed but at some point that is always the case. */
                    alloced =
                        bdrv_is_allocated(bs, offset + i,
                                          s->cluster_size - i, &n);
                    i += n;

                    if (alloced || n == 0) {
                        break;
                    }
                }

                /* If the above loop never found any sectors that are in
                 * the topmost image, skip this backup. */
                if (alloced == 0) {
                    continue;
                }
            }
            /* FULL sync mode we copy the whole drive. */
            if (alloced < 0) {
                ret = alloced;
            } else {
                ret = backup_do_cow(s, offset, s->cluster_size,
                                    &error_is_read, false);
            }
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
                    backup_error_action(s, error_is_read, -ret);
                if (action == BLOCK_ERROR_ACTION_REPORT) {
                    break;
    } else {
                    offset -= s->cluster_size;
                    continue;
                }
            }
        }
        ret = backup_loop(s);
    }

    notifier_with_return_remove(&s->before_write);
@@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
    /* wait until pending backup_do_cow() calls have completed */
    qemu_co_rwlock_wrlock(&s->flush_rwlock);
    qemu_co_rwlock_unlock(&s->flush_rwlock);
    hbitmap_free(s->copy_bitmap);

    return ret;
}
@@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = {
    .drain                  = backup_drain,
};

static int64_t backup_calculate_cluster_size(BlockDriverState *target,
                                             Error **errp)
{
    int ret;
    BlockDriverInfo bdi;

    /*
     * If there is no backing file on the target, we cannot rely on COW if our
     * backup cluster size is smaller than the target cluster size. Even for
     * targets with a backing file, try to avoid COW if possible.
     */
    ret = bdrv_get_info(target, &bdi);
    if (ret == -ENOTSUP && !target->backing) {
        /* Cluster size is not defined */
        warn_report("The target block device doesn't provide "
                    "information about the block size and it doesn't have a "
                    "backing file. The default block size of %u bytes is "
                    "used. If the actual block size of the target exceeds "
                    "this default, the backup may be unusable",
                    BACKUP_CLUSTER_SIZE_DEFAULT);
        return BACKUP_CLUSTER_SIZE_DEFAULT;
    } else if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
            "Couldn't determine the cluster size of the target image, "
            "which has no backing file");
        error_append_hint(errp,
            "Aborting, since this may create an unusable destination image\n");
        return ret;
    } else if (ret < 0 && target->backing) {
        /* Not fatal; just trudge on ahead. */
        return BACKUP_CLUSTER_SIZE_DEFAULT;
    }

    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}

BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, int64_t speed,
                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  JobTxn *txn, Error **errp)
{
    int64_t len;
    BlockDriverInfo bdi;
    BackupBlockJob *job = NULL;
    int ret;
    int64_t cluster_size;
    HBitmap *copy_bitmap = NULL;

    assert(bs);
    assert(target);
@@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

    cluster_size = backup_calculate_cluster_size(target, errp);
    if (cluster_size < 0) {
        goto error;
    }

    copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));

    /* job->len is fixed, so we can't allow resize */
    job = block_job_create(job_id, &backup_job_driver, txn, bs,
                           BLK_PERM_CONSISTENT_READ,
@@ -657,33 +644,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,

    /* Detect image-fleecing (and similar) schemes */
    job->serialize_target_writes = bdrv_chain_contains(target, bs);

    /* If there is no backing file on the target, we cannot rely on COW if our
     * backup cluster size is smaller than the target cluster size. Even for
     * targets with a backing file, try to avoid COW if possible. */
    ret = bdrv_get_info(target, &bdi);
    if (ret == -ENOTSUP && !target->backing) {
        /* Cluster size is not defined */
        warn_report("The target block device doesn't provide "
                    "information about the block size and it doesn't have a "
                    "backing file. The default block size of %u bytes is "
                    "used. If the actual block size of the target exceeds "
                    "this default, the backup may be unusable",
                    BACKUP_CLUSTER_SIZE_DEFAULT);
        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
    } else if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
            "Couldn't determine the cluster size of the target image, "
            "which has no backing file");
        error_append_hint(errp,
            "Aborting, since this may create an unusable destination image\n");
        goto error;
    } else if (ret < 0 && target->backing) {
        /* Not fatal; just trudge on ahead. */
        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
    } else {
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }
    job->cluster_size = cluster_size;
    job->copy_bitmap = copy_bitmap;
    copy_bitmap = NULL;
    job->use_copy_range = true;
    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
                                        blk_get_max_transfer(job->target));
@@ -699,6 +662,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    return &job->common;

 error:
    if (copy_bitmap) {
        assert(!job || !job->copy_bitmap);
        hbitmap_free(copy_bitmap);
    }
    if (sync_bitmap) {
        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
    }
+1 −2
Original line number Diff line number Diff line
@@ -392,7 +392,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                       perm, BLK_PERM_ALL, blk, errp);
    if (!blk->root) {
        bdrv_unref(bs);
        blk_unref(blk);
        return NULL;
    }
@@ -800,12 +799,12 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    bdrv_ref(bs);
    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                       blk->perm, blk->shared_perm, blk, errp);
    if (blk->root == NULL) {
        return -EPERM;
    }
    bdrv_ref(bs);

    notifier_list_notify(&blk->insert_bs_notifiers, blk);
    if (tgm->throttle_state) {
+1 −2
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@
#include "qapi/error.h"
#include "qemu/cutils.h"

#include "block/block_int.h"
#include "qcow2.h"

/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
@@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
        dir_offset = *offset;
    }

    dir = g_try_malloc(dir_size);
    dir = g_try_malloc0(dir_size);
    if (dir == NULL) {
        return -ENOMEM;
    }
Loading