Commit 0b0cb9d3 authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'kwolf/for-anthony' into staging

* kwolf/for-anthony: (39 commits)
  qemu-iotests: add 036 autoclear feature bit test
  qemu-iotests: add qcow2.py set-feature-bit command
  fdc-test: introduced qtest read_without_media
  fdc: fix implied seek while there is no media in drive
  qcow2: fix autoclear image header update
  xen: Don't peek behind the BlockDriverState abstraction
  xen: Don't change -drive if=xen device name during machine init
  block: Replace bdrv_get_format() by bdrv_get_format_name()
  qemu-img: document qed format on qemu-img man page
  qemu-iotests: COW with many AIO requests on the same cluster
  qemu-iotests: Some backing file COW tests
  qcow2: Fix avail_sectors in cluster allocation code
  qcow2: Simplify calculation for COW area at the end
  qcow2: always operate caches in writeback mode
  ide: support enable/disable write cache
  block: always open drivers in writeback mode
  block: add bdrv_set_enable_write_cache
  block: copy enable_write_cache in bdrv_append
  savevm: flush after saving vm state
  block: flush in writethrough mode after writes
  ...
parents df6606f4 d551cd50
Loading
Loading
Loading
Loading
+78 −12
Original line number Diff line number Diff line
@@ -649,12 +649,13 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
    bs->opaque = g_malloc0(drv->instance_size);

    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
    open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
@@ -1000,6 +1001,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
    tmp.buffer_alignment  = bs_top->buffer_alignment;
    tmp.copy_on_read      = bs_top->copy_on_read;

    tmp.enable_write_cache = bs_top->enable_write_cache;

    /* i/o timing parameters */
    tmp.slice_time        = bs_top->slice_time;
    tmp.slice_start       = bs_top->slice_start;
@@ -1032,7 +1035,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
     * swapping bs_new and bs_top contents. */
    tmp.backing_hd = bs_new;
    pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
    bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
    pstrcpy(tmp.backing_format, sizeof(tmp.backing_format),
            bs_top->drv ? bs_top->drv->format_name : "");

    /* swap contents of the fixed new bs and the current top */
    *bs_new = *bs_top;
@@ -1222,14 +1226,14 @@ bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
 * free of errors) or -errno when an internal error occurred. The results of the
 * check are stored in res.
 */
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
    if (bs->drv->bdrv_check == NULL) {
        return -ENOTSUP;
    }

    memset(res, 0, sizeof(*res));
    return bs->drv->bdrv_check(bs, res);
    return bs->drv->bdrv_check(bs, res, fix);
}

#define COMMIT_BUF_SECTORS 2048
@@ -1758,8 +1762,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
        return ret;
    }

    /* No flush needed for cache modes that use O_DSYNC */
    if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
    /* No flush needed for cache modes that already do it */
    if (bs->enable_write_cache) {
        bdrv_flush(bs);
    }

@@ -1808,6 +1812,9 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
                                      cluster_nb_sectors);
    } else {
        /* This does not change the data on the disk, it is not necessary
         * to flush even in cache=writethrough mode.
         */
        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
                                  &bounce_qiov);
    }
@@ -1977,6 +1984,10 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
    }

    if (ret == 0 && !bs->enable_write_cache) {
        ret = bdrv_co_flush(bs);
    }

    if (bs->dirty_bitmap) {
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
    }
@@ -2371,6 +2382,11 @@ int bdrv_enable_write_cache(BlockDriverState *bs)
    return bs->enable_write_cache;
}

void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
{
    bs->enable_write_cache = wce;
}

int bdrv_is_encrypted(BlockDriverState *bs)
{
    if (bs->backing_hd && bs->backing_hd->encrypted)
@@ -2413,13 +2429,9 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
    return ret;
}

void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
const char *bdrv_get_format_name(BlockDriverState *bs)
{
    if (!bs->drv) {
        buf[0] = '\0';
    } else {
        pstrcpy(buf, buf_size, bs->drv->format_name);
    }
    return bs->drv ? bs->drv->format_name : NULL;
}

void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
@@ -2466,6 +2478,11 @@ const char *bdrv_get_device_name(BlockDriverState *bs)
    return bs->device_name;
}

int bdrv_get_flags(BlockDriverState *bs)
{
    return bs->open_flags;
}

void bdrv_flush_all(void)
{
    BlockDriverState *bs;
@@ -2569,6 +2586,55 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    return data.ret;
}

/*
 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
 *
 * Return true if the given sector is allocated in any image between
 * BASE and TOP (inclusive).  BASE can be NULL to check if the given
 * sector is allocated in any image of the chain.  Return false otherwise.
 *
 * 'pnum' is set to the number of sectors (including and immediately following
 *  the specified sector) that are known to be in the same
 *  allocated/unallocated state.
 *
 */
int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
                                            BlockDriverState *base,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum)
{
    BlockDriverState *intermediate;
    int ret, n = nb_sectors;

    intermediate = top;
    while (intermediate && intermediate != base) {
        int pnum_inter;
        ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
                                   &pnum_inter);
        if (ret < 0) {
            return ret;
        } else if (ret) {
            *pnum = pnum_inter;
            return 1;
        }

        /*
         * [sector_num, nb_sectors] is unallocated on top but intermediate
         * might have
         *
         * [sector_num+x, nr_sectors] allocated.
         */
        if (n > pnum_inter) {
            n = pnum_inter;
        }

        intermediate = intermediate->backing_hd;
    }

    *pnum = n;
    return 0;
}

BlockInfoList *qmp_query_block(Error **errp)
{
    BlockInfoList *head = NULL, *cur_item = NULL;
+15 −2
Original line number Diff line number Diff line
@@ -165,6 +165,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
    int nb_sectors);
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
    int nb_sectors, int *pnum);
int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
                                            BlockDriverState *base,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
    const char *backing_file);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
@@ -183,10 +187,17 @@ typedef struct BdrvCheckResult {
    int corruptions;
    int leaks;
    int check_errors;
    int corruptions_fixed;
    int leaks_fixed;
    BlockFragInfo bfi;
} BdrvCheckResult;

int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res);
typedef enum {
    BDRV_FIX_LEAKS    = 1,
    BDRV_FIX_ERRORS   = 2,
} BdrvCheckMode;

int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);

/* async block I/O */
typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
@@ -280,11 +291,12 @@ BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
int bdrv_is_read_only(BlockDriverState *bs);
int bdrv_is_sg(BlockDriverState *bs);
int bdrv_enable_write_cache(BlockDriverState *bs);
void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
int bdrv_is_inserted(BlockDriverState *bs);
int bdrv_media_changed(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
void bdrv_eject(BlockDriverState *bs, bool eject_flag);
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size);
const char *bdrv_get_format_name(BlockDriverState *bs);
BlockDriverState *bdrv_find(const char *name);
BlockDriverState *bdrv_next(BlockDriverState *bs);
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
@@ -296,6 +308,7 @@ int bdrv_query_missing_keys(void);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
                         void *opaque);
const char *bdrv_get_device_name(BlockDriverState *bs);
int bdrv_get_flags(BlockDriverState *bs);
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+2 −23
Original line number Diff line number Diff line
@@ -40,11 +40,9 @@ struct Qcow2Cache {
    struct Qcow2Cache*      depends;
    int                     size;
    bool                    depends_on_flush;
    bool                    writethrough;
};

Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
    bool writethrough)
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;
@@ -53,7 +51,6 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
    c = g_malloc0(sizeof(*c));
    c->size = num_tables;
    c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
    c->writethrough = writethrough;

    for (i = 0; i < c->size; i++) {
        c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
@@ -307,13 +304,8 @@ found:
    *table = NULL;

    assert(c->entries[i].ref >= 0);

    if (c->writethrough) {
        return qcow2_cache_entry_flush(bs, c, i);
    } else {
    return 0;
}
}

void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
{
@@ -329,16 +321,3 @@ void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
found:
    c->entries[i].dirty = true;
}

bool qcow2_cache_set_writethrough(BlockDriverState *bs, Qcow2Cache *c,
    bool enable)
{
    bool old = c->writethrough;

    if (!old && enable) {
        qcow2_cache_flush(bs, c);
    }

    c->writethrough = enable;
    return old;
}
+11 −5
Original line number Diff line number Diff line
@@ -540,7 +540,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
        if (l2_offset) {
            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
        }
        l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
    }

    /* find the cluster offset for the given disk offset */
@@ -643,11 +642,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    }

    if (m->nb_available & (s->cluster_sectors - 1)) {
        uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
        cow = true;
        qemu_co_mutex_unlock(&s->lock);
        ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
                m->nb_available - end, s->cluster_sectors);
        ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available,
                           align_offset(m->nb_available, s->cluster_sectors));
        qemu_co_mutex_lock(&s->lock);
        if (ret < 0)
            goto err;
@@ -949,8 +947,16 @@ again:

        /* save info needed for meta data update */
        if (nb_clusters > 0) {
            /*
             * requested_sectors: Number of sectors from the start of the first
             * newly allocated cluster to the end of the (possibly shortened
             * before) write request.
             *
             * avail_sectors: Number of sectors from the start of the first
             * newly allocated to the end of the last newly allocated cluster.
             */
            int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
            int avail_sectors = (keep_clusters + nb_clusters)
            int avail_sectors = nb_clusters
                                << (s->cluster_bits - BDRV_SECTOR_BITS);

            *m = (QCowL2Meta) {
+35 −22
Original line number Diff line number Diff line
@@ -367,7 +367,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    }

    for(i = 0; i < table_size; i++) {
        cpu_to_be64s(&new_table[i]);
        be64_to_cpus(&new_table[i]);
    }

    /* Hook up the new refcount table in the qcow2 header */
@@ -726,13 +726,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t old_offset, old_l2_offset;
    int i, j, l1_modified = 0, nb_csectors, refcount;
    int ret;
    bool old_l2_writethrough, old_refcount_writethrough;

    /* Switch caches to writeback mode during update */
    old_l2_writethrough =
        qcow2_cache_set_writethrough(bs, s->l2_table_cache, false);
    old_refcount_writethrough =
        qcow2_cache_set_writethrough(bs, s->refcount_block_cache, false);

    l2_table = NULL;
    l1_table = NULL;
@@ -856,11 +849,6 @@ fail:
        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
    }

    /* Enable writethrough cache mode again */
    qcow2_cache_set_writethrough(bs, s->l2_table_cache, old_l2_writethrough);
    qcow2_cache_set_writethrough(bs, s->refcount_block_cache,
        old_refcount_writethrough);

    /* Update L1 only if it isn't deleted anyway (addend = -1) */
    if (addend >= 0 && l1_modified) {
        for(i = 0; i < l1_size; i++)
@@ -1122,11 +1110,12 @@ fail:
 * Returns 0 if no errors are found, the number of errors in case the image is
 * detected as corrupted, and -errno when an internal error occurred.
 */
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix)
{
    BDRVQcowState *s = bs->opaque;
    int64_t size;
    int nb_clusters, refcount1, refcount2, i;
    int64_t size, i;
    int nb_clusters, refcount1, refcount2;
    QCowSnapshot *sn;
    uint16_t *refcount_table;
    int ret;
@@ -1170,14 +1159,15 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)

        /* Refcount blocks are cluster aligned */
        if (offset & (s->cluster_size - 1)) {
            fprintf(stderr, "ERROR refcount block %d is not "
            fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
                "cluster aligned; refcount table entry corrupted\n", i);
            res->corruptions++;
            continue;
        }

        if (cluster >= nb_clusters) {
            fprintf(stderr, "ERROR refcount block %d is outside image\n", i);
            fprintf(stderr, "ERROR refcount block %" PRId64
                    " is outside image\n", i);
            res->corruptions++;
            continue;
        }
@@ -1186,7 +1176,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)
            inc_refcounts(bs, res, refcount_table, nb_clusters,
                offset, s->cluster_size);
            if (refcount_table[cluster] != 1) {
                fprintf(stderr, "ERROR refcount block %d refcount=%d\n",
                fprintf(stderr, "ERROR refcount block %" PRId64
                    " refcount=%d\n",
                    i, refcount_table[cluster]);
                res->corruptions++;
            }
@@ -1197,7 +1188,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)
    for(i = 0; i < nb_clusters; i++) {
        refcount1 = get_refcount(bs, i);
        if (refcount1 < 0) {
            fprintf(stderr, "Can't get refcount for cluster %d: %s\n",
            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
                i, strerror(-refcount1));
            res->check_errors++;
            continue;
@@ -1205,9 +1196,31 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)

        refcount2 = refcount_table[i];
        if (refcount1 != refcount2) {
            fprintf(stderr, "%s cluster %d refcount=%d reference=%d\n",
                   refcount1 < refcount2 ? "ERROR" : "Leaked",

            /* Check if we're allowed to fix the mismatch */
            int *num_fixed = NULL;
            if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
                num_fixed = &res->leaks_fixed;
            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
                num_fixed = &res->corruptions_fixed;
            }

            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
                   num_fixed != NULL     ? "Repairing" :
                   refcount1 < refcount2 ? "ERROR" :
                                           "Leaked",
                   i, refcount1, refcount2);

            if (num_fixed) {
                ret = update_refcount(bs, i << s->cluster_bits, 1,
                                      refcount2 - refcount1);
                if (ret >= 0) {
                    (*num_fixed)++;
                    continue;
                }
            }

            /* And if we couldn't, print an error */
            if (refcount1 < refcount2) {
                res->corruptions++;
            } else {
Loading