Commit 0b919fae authored by Kevin Wolf's avatar Kevin Wolf Committed by Stefan Hajnoczi
Browse files

qcow2: Batch discards



This optimises the discard operation for freed clusters by batching
discard requests (both snapshot deletion and bdrv_discard end up
updating the refcounts cluster by cluster).

Note that we don't discard asynchronously, but keep s->lock held. This
is to avoid that a freed cluster is reallocated and written to while the
discard is still in flight.

Signed-off-by: default avatarKevin Wolf <kwolf@redhat.com>
Signed-off-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
parent 67af674e
Loading
Loading
Loading
Loading
+18 −4
Original line number Diff line number Diff line
@@ -1377,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    nb_clusters = size_to_clusters(s, end_offset - offset);

    s->cache_discards = true;

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
        ret = discard_single_l2(bs, offset, nb_clusters);
        if (ret < 0) {
            return ret;
            goto fail;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
    }

    return 0;
    ret = 0;
fail:
    s->cache_discards = false;
    qcow2_process_discards(bs, ret);

    return ret;
}

/*
@@ -1450,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
    /* Each L2 table is handled by its own loop iteration */
    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);

    s->cache_discards = true;

    while (nb_clusters > 0) {
        ret = zero_single_l2(bs, offset, nb_clusters);
        if (ret < 0) {
            return ret;
            goto fail;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
    }

    return 0;
    ret = 0;
fail:
    s->cache_discards = false;
    qcow2_process_discards(bs, ret);

    return ret;
}
+79 −3
Original line number Diff line number Diff line
@@ -420,6 +420,74 @@ fail_block:
    return ret;
}

void qcow2_process_discards(BlockDriverState *bs, int ret)
{
    BDRVQcowState *s = bs->opaque;
    Qcow2DiscardRegion *d, *next;

    QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
        QTAILQ_REMOVE(&s->discards, d, next);

        /* Discard is optional, ignore the return value */
        if (ret >= 0) {
            bdrv_discard(bs->file,
                         d->offset >> BDRV_SECTOR_BITS,
                         d->bytes >> BDRV_SECTOR_BITS);
        }

        g_free(d);
    }
}

static void update_refcount_discard(BlockDriverState *bs,
                                    uint64_t offset, uint64_t length)
{
    BDRVQcowState *s = bs->opaque;
    Qcow2DiscardRegion *d, *p, *next;

    QTAILQ_FOREACH(d, &s->discards, next) {
        uint64_t new_start = MIN(offset, d->offset);
        uint64_t new_end = MAX(offset + length, d->offset + d->bytes);

        if (new_end - new_start <= length + d->bytes) {
            /* There can't be any overlap, areas ending up here have no
             * references any more and therefore shouldn't get freed another
             * time. */
            assert(d->bytes + length == new_end - new_start);
            d->offset = new_start;
            d->bytes = new_end - new_start;
            goto found;
        }
    }

    d = g_malloc(sizeof(*d));
    *d = (Qcow2DiscardRegion) {
        .bs     = bs,
        .offset = offset,
        .bytes  = length,
    };
    QTAILQ_INSERT_TAIL(&s->discards, d, next);

found:
    /* Merge discard requests if they are adjacent now */
    QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
        if (p == d
            || p->offset > d->offset + d->bytes
            || d->offset > p->offset + p->bytes)
        {
            continue;
        }

        /* Still no overlap possible */
        assert(p->offset == d->offset + d->bytes
            || d->offset == p->offset + p->bytes);

        QTAILQ_REMOVE(&s->discards, p, next);
        d->offset = MIN(d->offset, p->offset);
        d->bytes += p->bytes;
    }
}

/* XXX: cache several refcount block clusters ? */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
    int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
@@ -488,15 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
            s->free_cluster_index = cluster_index;
        }
        refcount_block[block_index] = cpu_to_be16(refcount);

        if (refcount == 0 && s->discard_passthrough[type]) {
            /* Try discarding, ignore errors */
            /* FIXME Doing this cluster by cluster will be painfully slow */
            bdrv_discard(bs->file, cluster_offset, 1);
            update_refcount_discard(bs, cluster_offset, s->cluster_size);
        }
    }

    ret = 0;
fail:
    if (!s->cache_discards) {
        qcow2_process_discards(bs, ret);
    }

    /* Write last changed block to disk */
    if (refcount_block) {
        int wret;
@@ -755,6 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    l1_table = NULL;
    l1_size2 = l1_size * sizeof(uint64_t);

    s->cache_discards = true;

    /* WARNING: qcow2_snapshot_goto relies on this function not using the
     * l1_table_offset when it is the current s->l1_table_offset! Be careful
     * when changing this! */
@@ -867,6 +940,9 @@ fail:
        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
    }

    s->cache_discards = false;
    qcow2_process_discards(bs, ret);

    /* Update L1 only if it isn't deleted anyway (addend = -1) */
    if (ret == 0 && addend >= 0 && l1_modified) {
        for (i = 0; i < l1_size; i++) {
+1 −0
Original line number Diff line number Diff line
@@ -486,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
    }

    QLIST_INIT(&s->cluster_allocs);
    QTAILQ_INIT(&s->discards);

    /* read qcow2 extensions */
    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
+11 −0
Original line number Diff line number Diff line
@@ -147,6 +147,13 @@ typedef struct Qcow2Feature {
    char    name[46];
} QEMU_PACKED Qcow2Feature;

typedef struct Qcow2DiscardRegion {
    BlockDriverState *bs;
    uint64_t offset;
    uint64_t bytes;
    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
} Qcow2DiscardRegion;

typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
@@ -199,6 +206,8 @@ typedef struct BDRVQcowState {
    size_t unknown_header_fields_size;
    void* unknown_header_fields;
    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
    bool cache_discards;
} BDRVQcowState;

/* XXX: use std qcow open function ? */
@@ -374,6 +383,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix);

void qcow2_process_discards(BlockDriverState *bs, int ret);

/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);