Commit 82d76dc7 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/famz/tags/block-pull-request' into staging



# gpg: Signature made Fri 30 Jun 2017 15:08:45 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/block-pull-request:
  block: Exploit BDRV_BLOCK_EOF for larger zero blocks
  block: Add BDRV_BLOCK_EOF to bdrv_get_block_status()

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 6db174ae c61e684e
Loading
Loading
Loading
Loading
+33 −9
Original line number Diff line number Diff line
@@ -1711,15 +1711,16 @@ typedef struct BdrvCoGetBlockStatusData {
 * Drivers not implementing the functionality are assumed to not support
 * backing files, hence all their sectors are reported as allocated.
 *
 * If 'sector_num' is beyond the end of the disk image the return value is 0
 * and 'pnum' is set to 0.
 * If 'sector_num' is beyond the end of the disk image the return value is
 * BDRV_BLOCK_EOF and 'pnum' is set to 0.
 *
 * 'pnum' is set to the number of sectors (including and immediately following
 * the specified sector) that are known to be in the same
 * allocated/unallocated state.
 *
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 * beyond the end of the disk image it will be clamped; if 'pnum' is set to
 * the end of the image, then the returned value will include BDRV_BLOCK_EOF.
 *
 * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
 * points to the BDS which the sector range is allocated in.
@@ -1740,7 +1741,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (sector_num >= total_sectors) {
        *pnum = 0;
        return 0;
        return BDRV_BLOCK_EOF;
    }

    n = total_sectors - sector_num;
@@ -1751,6 +1752,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    if (!bs->drv->bdrv_co_get_block_status) {
        *pnum = nb_sectors;
        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
        if (sector_num + nb_sectors == total_sectors) {
            ret |= BDRV_BLOCK_EOF;
        }
        if (bs->drv->protocol_name) {
            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
        }
@@ -1799,10 +1803,13 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
            /* Ignore errors.  This is just providing extra information, it
             * is useful but not necessary.
             */
            if (!file_pnum) {
                /* !file_pnum indicates an offset at or beyond the EOF; it is
                 * perfectly valid for the format block driver to point to such
                 * offsets, so catch it and mark everything as zero */
            if (ret2 & BDRV_BLOCK_EOF &&
                (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
                /*
                 * It is valid for the format block driver to read
                 * beyond the end of the underlying file's current
                 * size; such areas read as zero.
                 */
                ret |= BDRV_BLOCK_ZERO;
            } else {
                /* Limit request to the range reported by the protocol driver */
@@ -1814,6 +1821,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

out:
    bdrv_dec_in_flight(bs);
    if (ret >= 0 && sector_num + *pnum == total_sectors) {
        ret |= BDRV_BLOCK_EOF;
    }
    return ret;
}

@@ -1826,16 +1836,30 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
{
    BlockDriverState *p;
    int64_t ret = 0;
    bool first = true;

    assert(bs != base);
    for (p = bs; p != base; p = backing_bs(p)) {
        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
        if (ret < 0) {
            break;
        }
        if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
            /*
             * Reading beyond the end of the file continues to read
             * zeroes, but we can only widen the result to the
             * unallocated length we learned from an earlier
             * iteration.
             */
            *pnum = nb_sectors;
        }
        if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
            break;
        }
        /* [sector_num, pnum] unallocated on this layer, which could be only
         * the first part of [sector_num, nb_sectors].  */
        nb_sectors = MIN(nb_sectors, *pnum);
        first = false;
    }
    return ret;
}
+2 −0
Original line number Diff line number Diff line
@@ -129,6 +129,7 @@ typedef struct HDGeometry {
 * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
 * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
 *                       layer (short for DATA || ZERO), set by block layer
 * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
 *
 * Internal flag:
 * BDRV_BLOCK_RAW: used internally to indicate that the request was
@@ -157,6 +158,7 @@ typedef struct HDGeometry {
#define BDRV_BLOCK_OFFSET_VALID 0x04
#define BDRV_BLOCK_RAW          0x08
#define BDRV_BLOCK_ALLOCATED    0x10
#define BDRV_BLOCK_EOF          0x20
#define BDRV_BLOCK_OFFSET_MASK  BDRV_SECTOR_MASK

typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+0 −4
Original line number Diff line number Diff line
@@ -334,8 +334,6 @@ $QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map

# Repeat with backing file holding unallocated cluster.
# TODO: Note that this forces an allocation, because we aren't yet able to
# quickly detect that reads beyond EOF of the backing file are always zero
CLUSTER_SIZE=2048 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024))

# Write at the front: sector-wise, the request is:
@@ -371,8 +369,6 @@ $QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map

# Repeat with backing file holding zero'd cluster
# TODO: Note that this forces an allocation, because we aren't yet able to
# quickly detect that reads beyond EOF of the backing file are always zero
$QEMU_IO -c "write -z $size 512" "$TEST_IMG.base" | _filter_qemu_io

# Write at the front: sector-wise, the request is:
+6 −6
Original line number Diff line number Diff line
@@ -310,19 +310,19 @@ wrote 512/512 bytes at offset 134217728
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 512/512 bytes at offset 134219264
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 1024/1024 bytes at offset 134218240
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 2048/2048 bytes at offset 134217728
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -336,19 +336,19 @@ wrote 512/512 bytes at offset 134217728
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 512/512 bytes at offset 134219264
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 1024/1024 bytes at offset 134218240
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
2048/2048 bytes allocated at offset 128 MiB
[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
wrote 2048/2048 bytes at offset 134217728
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)