Commit eee13dfe authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Kevin Wolf
Browse files

mirror: allow customizing the granularity



The desired granularity may be very different depending on the kind of
operation (e.g. continuous replication vs. collapse-to-raw) and whether
the VM is expected to perform lots of I/O while mirroring is in progress.

Allow the user to customize it, while providing a sane default so that
in general there will be no extra allocated space in the target compared
to the source.

Reviewed-by: default avatarEric Blake <eblake@redhat.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarKevin Wolf <kwolf@redhat.com>
parent 50717e94
Loading
Loading
Loading
Loading
+33 −19
Original line number Diff line number Diff line
@@ -17,9 +17,6 @@
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"

#define BLOCK_SIZE                       (1 << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)

#define SLICE_TIME 100000000ULL /* ns */

typedef struct MirrorBlockJob {
@@ -31,6 +28,7 @@ typedef struct MirrorBlockJob {
    bool synced;
    bool should_complete;
    int64_t sector_num;
    int64_t granularity;
    size_t buf_size;
    unsigned long *cow_bitmap;
    HBitmapIter hbi;
@@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
    BlockDriverState *source = s->common.bs;
    BlockDriverState *target = s->target;
    QEMUIOVector qiov;
    int ret, nb_sectors;
    int ret, nb_sectors, sectors_per_chunk;
    int64_t end, sector_num, chunk_num;
    struct iovec iov;

@@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
     * is very large, we need to do COW ourselves.  The first time a cluster is
     * copied, copy it entirely.
     *
     * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
     * powers of two, the number of sectors to copy cannot exceed one cluster.
     * Because both the granularity and the cluster size are powers of two, the
     * number of sectors to copy cannot exceed one cluster.
     */
    sector_num = s->sector_num;
    nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
    chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
    sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
    chunk_num = sector_num / sectors_per_chunk;
    if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
        trace_mirror_cow(s, sector_num);
        bdrv_round_to_clusters(s->target,
                               sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
                               sector_num, sectors_per_chunk,
                               &sector_num, &nb_sectors);
    }

@@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
        goto fail;
    }
    if (s->cow_bitmap) {
        bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
                   nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
        bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
                   nb_sectors / sectors_per_chunk);
    }
    return 0;

@@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, length;
    int64_t sector_num, end, sectors_per_chunk, length;
    BlockDriverInfo bdi;
    char backing_filename[1024];
    int ret = 0;
@@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        bdrv_get_info(s->target, &bdi);
        if (s->buf_size < bdi.cluster_size) {
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = bdi.cluster_size;
            length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
            length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->common.len >> BDRV_SECTOR_BITS;
    s->buf = qemu_blockalign(bs, s->buf_size);
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;

    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base;
        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_co_is_allocated_above(bs, base,
                                             sector_num, next - sector_num, &n);

@@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;

            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
            } else {
                delay_ns = 0;
            }
@@ -332,7 +331,7 @@ static BlockJobType mirror_job_type = {
};

void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, MirrorSyncMode mode,
                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  BlockDriverCompletionFunc *cb,
@@ -340,6 +339,20 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
{
    MirrorBlockJob *s;

    if (granularity == 0) {
        /* Choose the default granularity based on the target file's cluster
         * size, clamped between 4k and 64k.  */
        BlockDriverInfo bdi;
        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
            granularity = MAX(4096, bdi.cluster_size);
            granularity = MIN(65536, granularity);
        } else {
            granularity = 65536;
        }
    }

    assert ((granularity & (granularity - 1)) == 0);

    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
        !bdrv_iostatus_is_enabled(bs)) {
@@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
    s->on_target_error = on_target_error;
    s->target = target;
    s->mode = mode;
    s->buf_size = BLOCK_SIZE;
    s->granularity = granularity;
    s->buf_size = granularity;

    bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
    bdrv_set_dirty_tracking(bs, granularity);
    bdrv_set_enable_write_cache(s->target, true);
    bdrv_set_on_error(s->target, on_target_error, on_target_error);
    bdrv_iostatus_enable(s->target);
+14 −1
Original line number Diff line number Diff line
@@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target,
                      enum MirrorSyncMode sync,
                      bool has_mode, enum NewImageMode mode,
                      bool has_speed, int64_t speed,
                      bool has_granularity, uint32_t granularity,
                      bool has_on_source_error, BlockdevOnError on_source_error,
                      bool has_on_target_error, BlockdevOnError on_target_error,
                      Error **errp)
@@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target,
    if (!has_mode) {
        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
    }
    if (!has_granularity) {
        granularity = 0;
    }
    if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
        error_set(errp, QERR_INVALID_PARAMETER, device);
        return;
    }
    if (granularity & (granularity - 1)) {
        error_set(errp, QERR_INVALID_PARAMETER, device);
        return;
    }

    bs = bdrv_find(device);
    if (!bs) {
@@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target,
        return;
    }

    mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
    mirror_start(bs, target_bs, speed, granularity, sync,
                 on_source_error, on_target_error,
                 block_job_cb, bs, &local_err);
    if (local_err != NULL) {
        bdrv_delete(target_bs);
+1 −1
Original line number Diff line number Diff line
@@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)

    qmp_drive_mirror(device, filename, !!format, format,
                     full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                     true, mode, false, 0,
                     true, mode, false, 0, false, 0,
                     false, 0, false, 0, &errp);
    hmp_handle_error(mon, &errp);
}
+2 −1
Original line number Diff line number Diff line
@@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
 * @bs: Block device to operate on.
 * @target: Block device to write to.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @granularity: The chosen granularity for the dirty bitmap.
 * @mode: Whether to collapse all images in the chain to the target.
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
@@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
 * @bs will be switched to read from @target.
 */
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, MirrorSyncMode mode,
                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  BlockDriverCompletionFunc *cb,
+7 −1
Original line number Diff line number Diff line
@@ -1636,6 +1636,11 @@
#        (all the disk, only the sectors allocated in the topmost image, or
#        only new I/O).
#
# @granularity: #optional granularity of the dirty bitmap, default is 64K
#               if the image format doesn't have clusters, 4K if the clusters
#               are smaller than that, else the cluster size.  Must be a
#               power of 2 between 512 and 64M (since 1.4).
#
# @on-source-error: #optional the action to take on an error on the source,
#                   default 'report'.  'stop' and 'enospc' can only be used
#                   if the block device supports io-status (see BlockInfo).
@@ -1652,7 +1657,8 @@
{ 'command': 'drive-mirror',
  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
            '*speed': 'int', '*on-source-error': 'BlockdevOnError',
            '*speed': 'int', '*granularity': 'uint32',
            '*on-source-error': 'BlockdevOnError',
            '*on-target-error': 'BlockdevOnError' } }

##
Loading