Commit d541e201 authored by Kevin Wolf's avatar Kevin Wolf
Browse files

Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-11' into queue-block



Block patches for the block queue.

# gpg: Signature made Thu May 11 14:28:41 2017 CEST
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* mreitz/tags/pull-block-2017-05-11: (22 commits)
  MAINTAINERS: Add qemu-progress to the block layer
  qcow2: Discard/zero clusters by byte count
  qcow2: Assert that cluster operations are aligned
  qcow2: Optimize write zero of unaligned tail cluster
  iotests: Add test 179 to cover write zeroes with unmap
  iotests: Improve _filter_qemu_img_map
  qcow2: Optimize zero_single_l2() to minimize L2 churn
  qcow2: Make distinction between zero cluster types obvious
  qcow2: Name typedef for cluster type
  qcow2: Correctly report status of preallocated zero clusters
  block: Update comments on BDRV_BLOCK_* meanings
  qcow2: Use consistent switch indentation
  qcow2: Nicer variable names in qcow2_update_snapshot_refcount()
  tests: Add coverage for recent block geometry fixes
  blkdebug: Add ability to override unmap geometries
  blkdebug: Simplify override logic
  blkdebug: Add pass-through write_zero and discard support
  blkdebug: Refactor error injection
  blkdebug: Sanity check block layer guarantees
  qemu-io: Switch 'map' output to byte-based reporting
  ...

Signed-off-by: default avatarKevin Wolf <kwolf@redhat.com>
parents 698bdfa0 8dd30c86
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1170,6 +1170,7 @@ F: include/block/
F: qemu-img*
F: qemu-io*
F: tests/qemu-iotests/
F: util/qemu-progress.c
T: git git://repo.or.cz/qemu/kevin.git block

Block I/O path
+217 −47
Original line number Diff line number Diff line
/*
 * Block protocol for I/O error injection
 *
 * Copyright (C) 2016-2017 Red Hat, Inc.
 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -37,7 +38,12 @@
typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
    int align;
    uint64_t align;
    uint64_t max_transfer;
    uint64_t opt_write_zero;
    uint64_t max_write_zero;
    uint64_t opt_discard;
    uint64_t max_discard;

    /* For blkdebug_refresh_filename() */
    char *config_file;
@@ -342,6 +348,31 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Required alignment in bytes",
        },
        {
            .name = "max-transfer",
            .type = QEMU_OPT_SIZE,
            .help = "Maximum transfer size in bytes",
        },
        {
            .name = "opt-write-zero",
            .type = QEMU_OPT_SIZE,
            .help = "Optimum write zero alignment in bytes",
        },
        {
            .name = "max-write-zero",
            .type = QEMU_OPT_SIZE,
            .help = "Maximum write zero size in bytes",
        },
        {
            .name = "opt-discard",
            .type = QEMU_OPT_SIZE,
            .help = "Optimum discard alignment in bytes",
        },
        {
            .name = "max-discard",
            .type = QEMU_OPT_SIZE,
            .help = "Maximum discard size in bytes",
        },
        { /* end of list */ }
    },
};
@@ -352,8 +383,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
    uint64_t align;
    int ret;
    uint64_t align;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -382,19 +413,69 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

    /* Set request alignment */
    align = qemu_opt_get_size(opts, "align", 0);
    if (align < INT_MAX && is_power_of_2(align)) {
        s->align = align;
    } else if (align) {
        error_setg(errp, "Invalid alignment");
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
        bs->file->bs->supported_zero_flags;
    ret = -EINVAL;

    /* Set alignment overrides */
    s->align = qemu_opt_get_size(opts, "align", 0);
    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
                   s->align);
        goto out;
    }
    align = MAX(s->align, bs->file->bs->bl.request_alignment);

    ret = 0;
    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
    if (s->max_transfer &&
        (s->max_transfer >= INT_MAX ||
         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
                   s->max_transfer);
        goto out;
    }

    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
    if (s->opt_write_zero &&
        (s->opt_write_zero >= INT_MAX ||
         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
                   s->opt_write_zero);
        goto out;
    }

    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
    if (s->max_write_zero &&
        (s->max_write_zero >= INT_MAX ||
         !QEMU_IS_ALIGNED(s->max_write_zero,
                          MAX(s->opt_write_zero, align)))) {
        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
                   s->max_write_zero);
        goto out;
    }

    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
    if (s->opt_discard &&
        (s->opt_discard >= INT_MAX ||
         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
                   s->opt_discard);
        goto out;
    }

    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
    if (s->max_discard &&
        (s->max_discard >= INT_MAX ||
         !QEMU_IS_ALIGNED(s->max_discard,
                          MAX(s->opt_discard, align)))) {
        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
                   s->max_discard);
        goto out;
    }

    ret = 0;
out:
    if (ret < 0) {
        g_free(s->config_file);
@@ -403,11 +484,30 @@ out:
    return ret;
}

static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
{
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
    bool immediately = rule->options.inject.immediately;
    BlkdebugRule *rule = NULL;
    int error;
    bool immediately;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
        uint64_t inject_offset = rule->options.inject.offset;

        if (inject_offset == -1 ||
            (bytes && inject_offset >= offset &&
             inject_offset < offset + bytes))
        {
            break;
        }
    }

    if (!rule || !rule->options.inject.error) {
        return 0;
    }

    immediately = rule->options.inject.immediately;
    error = rule->options.inject.error;

    if (rule->options.inject.once) {
        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
@@ -426,21 +526,18 @@ static int coroutine_fn
blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                   QEMUIOVector *qiov, int flags)
{
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
        uint64_t inject_offset = rule->options.inject.offset;
    int err;

        if (inject_offset == -1 ||
            (inject_offset >= offset && inject_offset < offset + bytes))
        {
            break;
        }
    /* Sanity check block layer guarantees */
    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    if (bs->bl.max_transfer) {
        assert(bytes <= bs->bl.max_transfer);
    }

    if (rule && rule->options.inject.error) {
        return inject_error(bs, rule);
    err = rule_check(bs, offset, bytes);
    if (err) {
        return err;
    }

    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -450,21 +547,18 @@ static int coroutine_fn
blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
{
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
        uint64_t inject_offset = rule->options.inject.offset;
    int err;

        if (inject_offset == -1 ||
            (inject_offset >= offset && inject_offset < offset + bytes))
        {
            break;
        }
    /* Sanity check block layer guarantees */
    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    if (bs->bl.max_transfer) {
        assert(bytes <= bs->bl.max_transfer);
    }

    if (rule && rule->options.inject.error) {
        return inject_error(bs, rule);
    err = rule_check(bs, offset, bytes);
    if (err) {
        return err;
    }

    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -472,22 +566,81 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,

static int blkdebug_co_flush(BlockDriverState *bs)
{
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
    int err = rule_check(bs, 0, 0);

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
        if (rule->options.inject.offset == -1) {
            break;
    if (err) {
        return err;
    }

    return bdrv_co_flush(bs->file->bs);
}

static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
                                                  int64_t offset, int count,
                                                  BdrvRequestFlags flags)
{
    uint32_t align = MAX(bs->bl.request_alignment,
                         bs->bl.pwrite_zeroes_alignment);
    int err;

    /* Only pass through requests that are larger than requested
     * preferred alignment (so that we test the fallback to writes on
     * unaligned portions), and check that the block layer never hands
     * us anything unaligned that crosses an alignment boundary.  */
    if (count < align) {
        assert(QEMU_IS_ALIGNED(offset, align) ||
               QEMU_IS_ALIGNED(offset + count, align) ||
               DIV_ROUND_UP(offset, align) ==
               DIV_ROUND_UP(offset + count, align));
        return -ENOTSUP;
    }
    assert(QEMU_IS_ALIGNED(offset, align));
    assert(QEMU_IS_ALIGNED(count, align));
    if (bs->bl.max_pwrite_zeroes) {
        assert(count <= bs->bl.max_pwrite_zeroes);
    }

    if (rule && rule->options.inject.error) {
        return inject_error(bs, rule);
    err = rule_check(bs, offset, count);
    if (err) {
        return err;
    }

    return bdrv_co_flush(bs->file->bs);
    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}

static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
                                             int64_t offset, int count)
{
    uint32_t align = bs->bl.pdiscard_alignment;
    int err;

    /* Only pass through requests that are larger than requested
     * minimum alignment, and ensure that unaligned requests do not
     * cross optimum discard boundaries. */
    if (count < bs->bl.request_alignment) {
        assert(QEMU_IS_ALIGNED(offset, align) ||
               QEMU_IS_ALIGNED(offset + count, align) ||
               DIV_ROUND_UP(offset, align) ==
               DIV_ROUND_UP(offset + count, align));
        return -ENOTSUP;
    }
    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
    if (align && count >= align) {
        assert(QEMU_IS_ALIGNED(offset, align));
        assert(QEMU_IS_ALIGNED(count, align));
    }
    if (bs->bl.max_pdiscard) {
        assert(count <= bs->bl.max_pdiscard);
    }

    err = rule_check(bs, offset, count);
    if (err) {
        return err;
    }

    return bdrv_co_pdiscard(bs->file->bs, offset, count);
}

static void blkdebug_close(BlockDriverState *bs)
{
@@ -715,6 +868,21 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
    if (s->align) {
        bs->bl.request_alignment = s->align;
    }
    if (s->max_transfer) {
        bs->bl.max_transfer = s->max_transfer;
    }
    if (s->opt_write_zero) {
        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
    }
    if (s->max_write_zero) {
        bs->bl.max_pwrite_zeroes = s->max_write_zero;
    }
    if (s->opt_discard) {
        bs->bl.pdiscard_alignment = s->opt_discard;
    }
    if (s->max_discard) {
        bs->bl.max_pdiscard = s->max_discard;
    }
}

static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -742,6 +910,8 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_co_preadv         = blkdebug_co_preadv,
    .bdrv_co_pwritev        = blkdebug_co_pwritev,
    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
+105 −78
Original line number Diff line number Diff line
@@ -309,7 +309,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
        uint64_t *l2_table, uint64_t stop_flags)
{
    int i;
    int first_cluster_type;
    QCow2ClusterType first_cluster_type;
    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
    uint64_t first_entry = be64_to_cpu(l2_table[0]);
    uint64_t offset = first_entry & mask;
@@ -321,8 +321,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
    /* must be allocated */
    first_cluster_type = qcow2_get_cluster_type(first_entry);
    assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
           (first_cluster_type == QCOW2_CLUSTER_ZERO &&
            (first_entry & L2E_OFFSET_MASK) != 0));
           first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -334,14 +333,21 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
	return i;
}

static int count_contiguous_clusters_by_type(int nb_clusters,
/*
 * Checks how many consecutive unallocated clusters in a given L2
 * table have the same cluster type.
 */
static int count_contiguous_clusters_unallocated(int nb_clusters,
                                                 uint64_t *l2_table,
                                             int wanted_type)
                                                 QCow2ClusterType wanted_type)
{
    int i;

    assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
           wanted_type == QCOW2_CLUSTER_UNALLOCATED);
    for (i = 0; i < nb_clusters; i++) {
        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
        uint64_t entry = be64_to_cpu(l2_table[i]);
        QCow2ClusterType type = qcow2_get_cluster_type(entry);

        if (type != wanted_type) {
            break;
@@ -493,6 +499,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int l1_bits, c;
    unsigned int offset_in_cluster;
    uint64_t bytes_available, bytes_needed, nb_clusters;
    QCow2ClusterType type;
    int ret;

    offset_in_cluster = offset_into_cluster(s, offset);
@@ -515,13 +522,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    l1_index = offset >> l1_bits;
    if (l1_index >= s->l1_size) {
        ret = QCOW2_CLUSTER_UNALLOCATED;
        type = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
    if (!l2_offset) {
        ret = QCOW2_CLUSTER_UNALLOCATED;
        type = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

@@ -550,38 +557,37 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     * true */
    assert(nb_clusters <= INT_MAX);

    ret = qcow2_get_cluster_type(*cluster_offset);
    switch (ret) {
    case QCOW2_CLUSTER_COMPRESSED:
        /* Compressed clusters can only be processed one by one */
        c = 1;
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
    case QCOW2_CLUSTER_ZERO:
        if (s->qcow_version < 3) {
    type = qcow2_get_cluster_type(*cluster_offset);
    if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
                                type == QCOW2_CLUSTER_ZERO_ALLOC)) {
        qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
                                " in pre-v3 image (L2 offset: %#" PRIx64
                                ", L2 index: %#x)", l2_offset, l2_index);
        ret = -EIO;
        goto fail;
    }
        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
                                              QCOW2_CLUSTER_ZERO);
        *cluster_offset = 0;
    switch (type) {
    case QCOW2_CLUSTER_COMPRESSED:
        /* Compressed clusters can only be processed one by one */
        c = 1;
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
    case QCOW2_CLUSTER_ZERO_PLAIN:
    case QCOW2_CLUSTER_UNALLOCATED:
        /* how many empty clusters ? */
        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
                                              QCOW2_CLUSTER_UNALLOCATED);
        c = count_contiguous_clusters_unallocated(nb_clusters,
                                                  &l2_table[l2_index], type);
        *cluster_offset = 0;
        break;
    case QCOW2_CLUSTER_ZERO_ALLOC:
    case QCOW2_CLUSTER_NORMAL:
        /* how many allocated clusters ? */
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
        if (offset_into_cluster(s, *cluster_offset)) {
            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
            qcow2_signal_corruption(bs, true, -1, -1,
                                    "Cluster allocation offset %#"
                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
                                    ", L2 index: %#x)", *cluster_offset,
                                    l2_offset, l2_index);
@@ -608,7 +614,7 @@ out:
    assert(bytes_available - offset_in_cluster <= UINT_MAX);
    *bytes = bytes_available - offset_in_cluster;

    return ret;
    return type;

fail:
    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
@@ -866,7 +872,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
        int cluster_type = qcow2_get_cluster_type(l2_entry);
        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);

        switch(cluster_type) {
        case QCOW2_CLUSTER_NORMAL:
@@ -876,7 +882,8 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
            break;
        case QCOW2_CLUSTER_UNALLOCATED:
        case QCOW2_CLUSTER_COMPRESSED:
        case QCOW2_CLUSTER_ZERO:
        case QCOW2_CLUSTER_ZERO_PLAIN:
        case QCOW2_CLUSTER_ZERO_ALLOC:
            break;
        default:
            abort();
@@ -1177,8 +1184,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     * wrong with our code. */
    assert(nb_clusters > 0);

    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO &&
        (entry & L2E_OFFSET_MASK) != 0 && (entry & QCOW_OFLAG_COPIED) &&
    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
        (entry & QCOW_OFLAG_COPIED) &&
        (!*host_offset ||
         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
    {
@@ -1510,13 +1517,13 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
            }
            break;

            case QCOW2_CLUSTER_ZERO:
                /* Preallocated zero clusters should be discarded in any case */
                if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) {
        case QCOW2_CLUSTER_ZERO_PLAIN:
            if (!full_discard) {
                continue;
            }
            break;

        case QCOW2_CLUSTER_ZERO_ALLOC:
        case QCOW2_CLUSTER_NORMAL:
        case QCOW2_CLUSTER_COMPRESSED:
            break;
@@ -1542,35 +1549,36 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
}

int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, enum qcow2_discard_type type,
                          bool full_discard)
{
    BDRVQcow2State *s = bs->opaque;
    uint64_t end_offset;
    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
    int64_t cleared;
    int ret;

    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

    /* The caller must cluster-align start; round end down except at EOF */
    /* Caller must pass aligned values, except at image end */
    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
        end_offset = start_of_cluster(s, end_offset);
    }
    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);

    nb_clusters = size_to_clusters(s, end_offset - offset);
    nb_clusters = size_to_clusters(s, bytes);

    s->cache_discards = true;

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
        if (ret < 0) {
        cleared = discard_single_l2(bs, offset, nb_clusters, type,
                                    full_discard);
        if (cleared < 0) {
            ret = cleared;
            goto fail;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
        nb_clusters -= cleared;
        offset += (cleared * s->cluster_size);
    }

    ret = 0;
@@ -1594,6 +1602,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    int l2_index;
    int ret;
    int i;
    bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);

    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
    if (ret < 0) {
@@ -1606,12 +1615,22 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t old_offset;
        QCow2ClusterType cluster_type;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);

        /* Update L2 entries */
        /*
         * Minimize L2 changes if the cluster already reads back as
         * zeroes with correct allocation.
         */
        cluster_type = qcow2_get_cluster_type(old_offset);
        if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
            (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
            continue;
        }

        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
        if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
        } else {
@@ -1624,31 +1643,39 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
}

int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
                        int flags)
int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, int flags)
{
    BDRVQcow2State *s = bs->opaque;
    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
    int64_t cleared;
    int ret;

    /* Caller must pass aligned values, except at image end */
    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);

    /* The zero flag is only supported by version 3 and newer */
    if (s->qcow_version < 3) {
        return -ENOTSUP;
    }

    /* Each L2 table is handled by its own loop iteration */
    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
    nb_clusters = size_to_clusters(s, bytes);

    s->cache_discards = true;

    while (nb_clusters > 0) {
        ret = zero_single_l2(bs, offset, nb_clusters, flags);
        if (ret < 0) {
        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
        if (cleared < 0) {
            ret = cleared;
            goto fail;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
        nb_clusters -= cleared;
        offset += (cleared * s->cluster_size);
    }

    ret = 0;
@@ -1732,14 +1759,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            int64_t offset = l2_entry & L2E_OFFSET_MASK;
            int cluster_type = qcow2_get_cluster_type(l2_entry);
            bool preallocated = offset != 0;
            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);

            if (cluster_type != QCOW2_CLUSTER_ZERO) {
            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
                continue;
            }

            if (!preallocated) {
            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                if (!bs->backing) {
                    /* not backed; therefore we can simply deallocate the
                     * cluster */
@@ -1774,7 +1801,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                                        "%#" PRIx64 " unaligned (L2 offset: %#"
                                        PRIx64 ", L2 index: %#x)", offset,
                                        l2_offset, j);
                if (!preallocated) {
                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1784,7 +1811,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
            if (ret < 0) {
                if (!preallocated) {
                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1793,7 +1820,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
            if (ret < 0) {
                if (!preallocated) {
                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
+71 −77

File changed.

Preview size limit exceeded, changes collapsed.

+3 −4
Original line number Diff line number Diff line
@@ -440,9 +440,8 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    /* The VM state isn't needed any more in the active L1 table; in fact, it
     * hurts by causing expensive COW for the next snapshot. */
    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
                           align_offset(sn->vm_state_size, s->cluster_size)
                                >> BDRV_SECTOR_BITS,
    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
                          align_offset(sn->vm_state_size, s->cluster_size),
                          QCOW2_DISCARD_NEVER, false);

#ifdef DEBUG_ALLOC
Loading