Commit fb23ae6e authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'kwolf/for-anthony' into staging

* kwolf/for-anthony:
  test-coroutine: add performance test for nesting
  coroutine: adding configure option for sigaltstack coroutine backend
  coroutine: adding configure choose mechanism for coroutine backend
  coroutine: adding sigaltstack method (.c source)
  qcow2: Reduce number of I/O requests
  qcow2: Add qcow2_alloc_clusters_at()
  qcow2: Factor out count_cow_clusters
  qmp: convert blockdev-snapshot-sync to a wrapper around transactions
  add mode field to blockdev-snapshot-sync transaction item
  rename blockdev-group-snapshot-sync
  qapi: complete implementation of unions
  use QSIMPLEQ_FOREACH_SAFE when freeing list elements
  Add 'make check-block'
  make check: Add qemu-iotests subset
  qemu-iotests: Mark some tests as quick
  qcow2: Add error messages in qcow2_truncate
  block: handle -EBUSY in bdrv_commit_all()
  qcow2: Add some tracing
  qed: do not evict in-use L2 table cache entries
  Group snapshot: Fix format name for backing file
parents 79122e93 7e849a99
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o
ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
else
ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y)
coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o
else
coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o
endif
endif
coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o

#######################################################################
+6 −2
Original line number Diff line number Diff line
@@ -1244,14 +1244,18 @@ ro_cleanup:
    return ret;
}

void bdrv_commit_all(void)
int bdrv_commit_all(void)
{
    BlockDriverState *bs;

    QTAILQ_FOREACH(bs, &bdrv_states, list) {
        bdrv_commit(bs);
        int ret = bdrv_commit(bs);
        if (ret < 0) {
            return ret;
        }
    }
    return 0;
}

struct BdrvTrackedRequest {
    BlockDriverState *bs;
+1 −1
Original line number Diff line number Diff line
@@ -165,7 +165,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs);
int bdrv_commit(BlockDriverState *bs);
void bdrv_commit_all(void);
int bdrv_commit_all(void);
int bdrv_change_backing_file(BlockDriverState *bs,
    const char *backing_file, const char *backing_fmt);
void bdrv_register(BlockDriver *bdrv);
+18 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include "block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
#include "trace.h"

typedef struct Qcow2CachedTable {
    void*   table;
@@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        return 0;
    }

    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
                                  c == s->l2_table_cache, i);

    if (c->depends) {
        ret = qcow2_cache_flush_dependency(bs, c);
    } else if (c->depends_on_flush) {
@@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)

int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
    BDRVQcowState *s = bs->opaque;
    int result = 0;
    int ret;
    int i;

    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);

    for (i = 0; i < c->size; i++) {
        ret = qcow2_cache_entry_flush(bs, c, i);
        if (ret < 0 && result != -ENOSPC) {
@@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
    int i;
    int ret;

    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                          offset, read_from_disk);

    /* Check if the table is already cached */
    for (i = 0; i < c->size; i++) {
        if (c->entries[i].offset == offset) {
@@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,

    /* If not, write a table back and replace it */
    i = qcow2_cache_find_entry_to_replace(c);
    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
                                        c == s->l2_table_cache, i);
    if (i < 0) {
        return i;
    }
@@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
        return ret;
    }

    trace_qcow2_cache_get_read(qemu_coroutine_self(),
                               c == s->l2_table_cache, i);
    c->entries[i].offset = 0;
    if (read_from_disk) {
        if (c == s->l2_table_cache) {
@@ -258,6 +272,10 @@ found:
    c->entries[i].cache_hits++;
    c->entries[i].ref++;
    *table = c->entries[i].table;

    trace_qcow2_cache_get_done(qemu_coroutine_self(),
                               c == s->l2_table_cache, i);

    return 0;
}

+199 −80
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "block_int.h"
#include "block/qcow2.h"
#include "trace.h"

int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
{
@@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)

    old_l2_offset = s->l1_table[l1_index];

    trace_qcow2_l2_allocate(bs, l1_index);

    /* allocate a new l2 entry */

    l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
@@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)

    /* allocate a new entry in the l2 cache */

    trace_qcow2_l2_allocate_get_empty(bs, l1_index);
    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
    if (ret < 0) {
        return ret;
@@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    /* write the l2 table to the file */
    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);

    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
    ret = qcow2_cache_flush(bs, s->l2_table_cache);
    if (ret < 0) {
@@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    }

    /* update the L1 entry */
    trace_qcow2_l2_allocate_write_l1(bs, l1_index);
    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
    ret = write_l1_entry(bs, l1_index);
    if (ret < 0) {
@@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    }

    *table = l2_table;
    trace_qcow2_l2_allocate_done(bs, l1_index, 0);
    return 0;

fail:
    trace_qcow2_l2_allocate_done(bs, l1_index, ret);
    qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
    s->l1_table[l1_index] = old_l2_offset;
    return ret;
@@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    BDRVQcowState *s = bs->opaque;
    int i, j = 0, l2_index, ret;
    uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
    uint64_t cluster_offset = m->cluster_offset;
    uint64_t cluster_offset = m->alloc_offset;
    bool cow = false;

    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);

    if (m->nb_clusters == 0)
        return 0;

@@ -667,64 +677,15 @@ err:
 }

/*
 * alloc_cluster_offset
 *
 * For a given offset of the disk image, return cluster offset in qcow2 file.
 * If the offset is not found, allocate a new cluster.
 *
 * If the cluster was already allocated, m->nb_clusters is set to 0,
 * other fields in m are meaningless.
 *
 * If the cluster is newly allocated, m->nb_clusters is set to the number of
 * contiguous clusters that have been allocated. In this case, the other
 * fields of m are valid and contain information about the first allocated
 * cluster.
 *
 * If the request conflicts with another write request in flight, the coroutine
 * is queued and will be reentered when the dependency has completed.
 *
 * Return 0 on success and -errno in error cases
 * Returns the number of contiguous clusters that can be used for an allocating
 * write, but require COW to be performed (this includes yet unallocated space,
 * which must copy from the backing file)
 */
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int n_start, int n_end, int *num, QCowL2Meta *m)
static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
    uint64_t *l2_table, int l2_index)
{
    BDRVQcowState *s = bs->opaque;
    int l2_index, ret;
    uint64_t l2_offset, *l2_table;
    int64_t cluster_offset;
    unsigned int nb_clusters, i = 0;
    QCowL2Meta *old_alloc;

    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
    if (ret < 0) {
        return ret;
    }

again:
    nb_clusters = size_to_clusters(s, n_end << 9);

    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);

    cluster_offset = be64_to_cpu(l2_table[l2_index]);

    /* We keep all QCOW_OFLAG_COPIED clusters */

    if (cluster_offset & QCOW_OFLAG_COPIED) {
        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
                &l2_table[l2_index], 0, 0);

        cluster_offset &= ~QCOW_OFLAG_COPIED;
        m->nb_clusters = 0;

        goto out;
    }

    /* for the moment, multiple compressed clusters are not managed */

    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
        nb_clusters = 1;

    /* how many available clusters ? */
    int i = 0;
    uint64_t cluster_offset;

    while (i < nb_clusters) {
        i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
@@ -745,8 +706,39 @@ again:
                (cluster_offset & QCOW_OFLAG_COMPRESSED))
            break;
    }

    assert(i <= nb_clusters);
    nb_clusters = i;
    return i;
}

/*
 * Allocates new clusters for the given guest_offset.
 *
 * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
 * contain the number of clusters that have been allocated and are contiguous
 * in the image file.
 *
 * If *host_offset is non-zero, it specifies the offset in the image file at
 * which the new clusters must start. *nb_clusters can be 0 on return in this
 * case if the cluster at host_offset is already in use. If *host_offset is
 * zero, the clusters can be allocated anywhere in the image file.
 *
 * *host_offset is updated to contain the offset into the image file at which
 * the first allocated cluster starts.
 *
 * Return 0 on success and -errno in error cases. -EAGAIN means that the
 * function has been waiting for another request and the allocation must be
 * restarted, but the whole request should not be failed.
 */
static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
    uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table)
{
    BDRVQcowState *s = bs->opaque;
    int64_t cluster_offset;
    QCowL2Meta *old_alloc;

    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
                                         *host_offset, *nb_clusters);

    /*
     * Check if there already is an AIO write request in flight which allocates
@@ -755,8 +747,8 @@ again:
     */
    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {

        uint64_t start = offset >> s->cluster_bits;
        uint64_t end = start + nb_clusters;
        uint64_t start = guest_offset >> s->cluster_bits;
        uint64_t end = start + *nb_clusters;
        uint64_t old_start = old_alloc->offset >> s->cluster_bits;
        uint64_t old_end = old_start + old_alloc->nb_clusters;

@@ -765,58 +757,185 @@ again:
        } else {
            if (start < old_start) {
                /* Stop at the start of a running allocation */
                nb_clusters = old_start - start;
                *nb_clusters = old_start - start;
            } else {
                nb_clusters = 0;
                *nb_clusters = 0;
            }

            if (nb_clusters == 0) {
            if (*nb_clusters == 0) {
                /* Wait for the dependency to complete. We need to recheck
                 * the free/allocated clusters when we continue. */
                qemu_co_mutex_unlock(&s->lock);
                qemu_co_queue_wait(&old_alloc->dependent_requests);
                qemu_co_mutex_lock(&s->lock);
                goto again;
                return -EAGAIN;
            }
        }
    }

    if (!nb_clusters) {
    if (!*nb_clusters) {
        abort();
    }

    /* save info needed for meta data update */
    m->offset = offset;
    m->n_start = n_start;
    m->nb_clusters = nb_clusters;
    /* Allocate new clusters */
    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
    if (*host_offset == 0) {
        cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
    } else {
        cluster_offset = *host_offset;
        *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters);
    }

    QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
    if (cluster_offset < 0) {
        return cluster_offset;
    }
    *host_offset = cluster_offset;
    return 0;
}

/*
 * alloc_cluster_offset
 *
 * For a given offset on the virtual disk, find the cluster offset in qcow2
 * file. If the offset is not found, allocate a new cluster.
 *
 * If the cluster was already allocated, m->nb_clusters is set to 0 and
 * other fields in m are meaningless.
 *
 * If the cluster is newly allocated, m->nb_clusters is set to the number of
 * contiguous clusters that have been allocated. In this case, the other
 * fields of m are valid and contain information about the first allocated
 * cluster.
 *
 * If the request conflicts with another write request in flight, the coroutine
 * is queued and will be reentered when the dependency has completed.
 *
 * Return 0 on success and -errno in error cases
 */
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int n_start, int n_end, int *num, QCowL2Meta *m)
{
    BDRVQcowState *s = bs->opaque;
    int l2_index, ret, sectors;
    uint64_t l2_offset, *l2_table;
    unsigned int nb_clusters, keep_clusters;
    uint64_t cluster_offset;

    /* allocate a new cluster */
    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
                                      n_start, n_end);

    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
    if (cluster_offset < 0) {
        ret = cluster_offset;
    /* Find L2 entry for the first involved cluster */
    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
    if (ret < 0) {
        return ret;
    }

    /*
     * Calculate the number of clusters to look for. We stop at L2 table
     * boundaries to keep things simple.
     */
again:
    nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
                      s->l2_size - l2_index);

    cluster_offset = be64_to_cpu(l2_table[l2_index]);

    /*
     * Check how many clusters are already allocated and don't need COW, and how
     * many need a new allocation.
     */
    if (cluster_offset & QCOW_OFLAG_COPIED) {
        /* We keep all QCOW_OFLAG_COPIED clusters */
        keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
                                                  &l2_table[l2_index], 0, 0);
        assert(keep_clusters <= nb_clusters);
        nb_clusters -= keep_clusters;
    } else {
        /* For the moment, overwrite compressed clusters one by one */
        if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
            nb_clusters = 1;
        } else {
            nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
        }

        keep_clusters = 0;
        cluster_offset = 0;
    }

    cluster_offset &= ~QCOW_OFLAG_COPIED;

    /* If there is something left to allocate, do that now */
    *m = (QCowL2Meta) {
        .cluster_offset     = cluster_offset,
        .nb_clusters        = 0,
    };
    qemu_co_queue_init(&m->dependent_requests);

    if (nb_clusters > 0) {
        uint64_t alloc_offset;
        uint64_t alloc_cluster_offset;
        uint64_t keep_bytes = keep_clusters * s->cluster_size;

        /* Calculate start and size of allocation */
        alloc_offset = offset + keep_bytes;

        if (keep_clusters == 0) {
            alloc_cluster_offset = 0;
        } else {
            alloc_cluster_offset = cluster_offset + keep_bytes;
        }

        /* Allocate, if necessary at a given offset in the image file */
        ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
                                      &nb_clusters, l2_table);
        if (ret == -EAGAIN) {
            goto again;
        } else if (ret < 0) {
            goto fail;
        }

out:
        /* save info needed for meta data update */
        if (nb_clusters > 0) {
            int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
            int avail_sectors = (keep_clusters + nb_clusters)
                                << (s->cluster_bits - BDRV_SECTOR_BITS);

            *m = (QCowL2Meta) {
                .cluster_offset = keep_clusters == 0 ?
                                  alloc_cluster_offset : cluster_offset,
                .alloc_offset   = alloc_cluster_offset,
                .offset         = alloc_offset,
                .n_start        = keep_clusters == 0 ? n_start : 0,
                .nb_clusters    = nb_clusters,
                .nb_available   = MIN(requested_sectors, avail_sectors),
            };
            qemu_co_queue_init(&m->dependent_requests);
            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
        }
    }

    /* Some cleanup work */
    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
    if (ret < 0) {
        goto fail_put;
    }

    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
    m->cluster_offset = cluster_offset;
    sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
    if (sectors > n_end) {
        sectors = n_end;
    }

    *num = m->nb_available - n_start;
    assert(sectors > n_start);
    *num = sectors - n_start;

    return 0;

fail:
    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
fail_put:
    if (nb_clusters > 0) {
        QLIST_REMOVE(m, next_in_flight);
    }
    return ret;
}

Loading