Commit 25bfd5a7 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging



Pull request

v2:
 * Drop merge failure from a previous pull request that broke virtio-blk on ARM
   guests
 * Add Parallels XML patch series

# gpg: Signature made Mon 22 Jan 2018 16:00:40 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block/parallels: add backing support to readv/writev
  block/parallels: replace some magic numbers
  block/parallels: move some structures into header
  configure: add dependency
  docs/interop/prl-xml: description of Parallels Disk format
  block: add block_set_io_throttle virtio-blk-pci QMP example

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 238e2d93 bcbb3866
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -47,3 +47,5 @@ block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
dmg-bz2.o-libs     := $(BZIP2_LIBS)
qcow.o-libs        := -lz
linux-aio.o-libs   := -laio
parallels.o-cflags := $(LIBXML2_CFLAGS)
parallels.o-libs   := $(LIBXML2_LIBS)
+48 −60
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include "qemu/bswap.h"
#include "qemu/bitmap.h"
#include "migration/blocker.h"
#include "parallels.h"

/**************************************************************/

@@ -45,30 +46,6 @@
#define HEADER_INUSE_MAGIC  (0x746F6E59)
#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)

#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */


// always little-endian
typedef struct ParallelsHeader {
    char magic[16]; // "WithoutFreeSpace"
    uint32_t version;
    uint32_t heads;
    uint32_t cylinders;
    uint32_t tracks;
    uint32_t bat_entries;
    uint64_t nb_sectors;
    uint32_t inuse;
    uint32_t data_off;
    char padding[12];
} QEMU_PACKED ParallelsHeader;


typedef enum ParallelsPreallocMode {
    PRL_PREALLOC_MODE_FALLOCATE = 0,
    PRL_PREALLOC_MODE_TRUNCATE = 1,
    PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;

static QEnumLookup prealloc_mode_lookup = {
    .array = (const char *const[]) {
        "falloc",
@@ -77,34 +54,6 @@ static QEnumLookup prealloc_mode_lookup = {
    .size = PRL_PREALLOC_MODE__MAX
};

typedef struct BDRVParallelsState {
    /** Locking is conservative, the lock protects
     *   - image file extending (truncate, fallocate)
     *   - any access to block allocation table
     */
    CoMutex lock;

    ParallelsHeader *header;
    uint32_t header_size;
    bool header_unclean;

    unsigned long *bat_dirty_bmap;
    unsigned int  bat_dirty_block;

    uint32_t *bat_bitmap;
    unsigned int bat_size;

    int64_t  data_end;
    uint64_t prealloc_size;
    ParallelsPreallocMode prealloc_mode;

    unsigned int tracks;

    unsigned int off_multiplier;
    Error *migration_blocker;
} BDRVParallelsState;


#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"

@@ -193,6 +142,7 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors, int *pnum)
{
    int ret;
    BDRVParallelsState *s = bs->opaque;
    int64_t pos, space, idx, to_allocate, i, len;

@@ -221,7 +171,6 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
        return len;
    }
    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
        int ret;
        space += s->prealloc_size;
        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
            ret = bdrv_pwrite_zeroes(bs->file,
@@ -237,6 +186,37 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
        }
    }

    /* Try to read from backing to fill empty clusters
     * FIXME: 1. previous write_zeroes may be redundant
     *        2. most of data we read from backing will be rewritten by
     *           parallels_co_writev. On aligned-to-cluster write we do not need
     *           this read at all.
     *        3. it would be good to combine write of data from backing and new
     *           data into one write call */
    if (bs->backing) {
        int64_t nb_cow_sectors = to_allocate * s->tracks;
        int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
        QEMUIOVector qiov;
        struct iovec iov = {
            .iov_len = nb_cow_bytes,
            .iov_base = qemu_blockalign(bs, nb_cow_bytes)
        };
        qemu_iovec_init_external(&qiov, &iov, 1);

        ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors,
                            &qiov);
        if (ret < 0) {
            qemu_vfree(iov.iov_base);
            return ret;
        }

        ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov);
        qemu_vfree(iov.iov_base);
        if (ret < 0) {
            return ret;
        }
    }

    for (i = 0; i < to_allocate; i++) {
        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
        s->data_end += s->tracks;
@@ -360,12 +340,19 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,

        nbytes = n << BDRV_SECTOR_BITS;

        if (position < 0) {
            qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
        } else {
        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);

        if (position < 0) {
            if (bs->backing) {
                ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
                if (ret < 0) {
                    break;
                }
            } else {
                qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
            }
        } else {
            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
            if (ret < 0) {
                break;
@@ -527,8 +514,9 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
    header.version = cpu_to_le32(HEADER_VERSION);
    /* don't care much about geometry, it is not used on image level */
    header.heads = cpu_to_le32(16);
    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
    header.heads = cpu_to_le32(HEADS_NUMBER);
    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
                                   / HEADS_NUMBER / SEC_IN_CYL);
    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
    header.bat_entries = cpu_to_le32(bat_entries);
    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
@@ -798,7 +786,7 @@ static BlockDriver bdrv_parallels = {
    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
    .bdrv_co_readv  = parallels_co_readv,
    .bdrv_co_writev = parallels_co_writev,

    .supports_backing = true,
    .bdrv_create    = parallels_create,
    .bdrv_check     = parallels_check,
    .create_opts    = &parallels_create_opts,

block/parallels.h

0 → 100644
+88 −0
Original line number Diff line number Diff line
/*
* Block driver for Parallels disk image format
*
* Copyright (c) 2015-2017 Virtuozzo, Inc.
* Authors:
*         2016-2017 Klim S. Kireev <klim.kireev@virtuozzo.com>
*         2015 Denis V. Lunev <den@openvz.org>
*
* This code was originally based on comparing different disk images created
* by Parallels. Currently it is based on opened OpenVZ sources
* available at
*     https://github.com/OpenVZ/ploop
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_PARALLELS_H
#define BLOCK_PARALLELS_H
#include "qemu/coroutine.h"
#include "qemu/typedefs.h"

#define HEADS_NUMBER 16
#define SEC_IN_CYL 32
#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */

/* always little-endian */
typedef struct ParallelsHeader {
    char magic[16]; /* "WithoutFreeSpace" */
    uint32_t version;
    uint32_t heads;
    uint32_t cylinders;
    uint32_t tracks;
    uint32_t bat_entries;
    uint64_t nb_sectors;
    uint32_t inuse;
    uint32_t data_off;
    char padding[12];
} QEMU_PACKED ParallelsHeader;

typedef enum ParallelsPreallocMode {
    PRL_PREALLOC_MODE_FALLOCATE = 0,
    PRL_PREALLOC_MODE_TRUNCATE = 1,
    PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;

typedef struct BDRVParallelsState {
    /** Locking is conservative, the lock protects
     *   - image file extending (truncate, fallocate)
     *   - any access to block allocation table
     */
    CoMutex lock;

    ParallelsHeader *header;
    uint32_t header_size;
    bool header_unclean;

    unsigned long *bat_dirty_bmap;
    unsigned int  bat_dirty_block;

    uint32_t *bat_bitmap;
    unsigned int bat_size;

    int64_t  data_end;
    uint64_t prealloc_size;
    ParallelsPreallocMode prealloc_mode;

    unsigned int tracks;

    unsigned int off_multiplier;
    Error *migration_blocker;
} BDRVParallelsState;

#endif
+27 −0
Original line number Diff line number Diff line
@@ -435,6 +435,7 @@ tcmalloc="no"
jemalloc="no"
replication="yes"
vxhs=""
libxml2=""

supported_cpu="no"
supported_os="no"
@@ -1298,6 +1299,10 @@ for opt do
  ;;
  --enable-numa) numa="yes"
  ;;
  --disable-libxml2) libxml2="no"
  ;;
  --enable-libxml2) libxml2="yes"
  ;;
  --disable-tcmalloc) tcmalloc="no"
  ;;
  --enable-tcmalloc) tcmalloc="yes"
@@ -1573,6 +1578,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  tpm             TPM support
  libssh2         ssh block device support
  numa            libnuma support
  libxml2         for Parallels image format
  tcmalloc        tcmalloc support
  jemalloc        jemalloc support
  replication     replication support
@@ -3748,6 +3754,20 @@ EOF
  fi
fi

##########################################
# libxml2 probe
if test "$libxml2" != "no" ; then
    if $pkg_config --exists libxml-2.0; then
        libxml2="yes"
        libxml2_cflags=$($pkg_config --cflags libxml-2.0)
        libxml2_libs=$($pkg_config --libs libxml-2.0)
    else
        if test "$libxml2" = "yes"; then
            feature_not_found "libxml2" "Install libxml2 devel"
        fi
        libxml2="no"
    fi
fi

##########################################
# glusterfs probe
@@ -5630,6 +5650,7 @@ echo "lzo support $lzo"
echo "snappy support    $snappy"
echo "bzip2 support     $bzip2"
echo "NUMA host support $numa"
echo "libxml2           $libxml2"
echo "tcmalloc support  $tcmalloc"
echo "jemalloc support  $jemalloc"
echo "avx2 optimization $avx2_opt"
@@ -6299,6 +6320,12 @@ if test "$have_rtnetlink" = "yes" ; then
  echo "CONFIG_RTNETLINK=y" >> $config_host_mak
fi

if test "$libxml2" = "yes" ; then
  echo "CONFIG_LIBXML2=y" >> $config_host_mak
  echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak
  echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak
fi

if test "$replication" = "yes" ; then
  echo "CONFIG_REPLICATION=y" >> $config_host_mak
fi
+158 −0
Original line number Diff line number Diff line
= License =

Copyright (c) 2015-2017, Virtuozzo, Inc.
Authors:
        2015 Denis Lunev <den@openvz.org>
        2015 Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
        2016-2017 Klim Kireev <klim.kireev@virtuozzo.com>
        2016-2017 Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>

This work is licensed under the terms of the GNU GPL, version 2 or later.
See the COPYING file in the top-level directory.

This specification contains minimal information about Parallels Disk Format,
which is enough to proper work with QEMU. Nevertheless, Parallels Cloud Server
and Parallels Desktop are able to add some unspecified nodes to xml and use
them, but they are for internal work and don't affect functionality. Also it
uses auxiliary xml "Snapshot.xml", which allows to store optional snapshot
information, but it doesn't influence open/read/write functionality. QEMU and
other software should not use fields not covered in this document and
Snapshot.xml file and must leave them as is.

= Parallels Disk Format =

Parallels disk consists of two parts: the set of snapshots and the disk
descriptor file, which stores information about all files and snapshots.

== Definitions ==
    Snapshot       a record of the contents captured at a particular time,
                   capable of storing current state. A snapshot has UUID and
                   parent UUID.

 Snapshot image    an overlay representing the difference between this
                   snapshot and some earlier snapshot.

    Overlay        an image storing the different sectors between two captured
                   states.

   Root image      snapshot image with no parent, the root of snapshot tree.

    Storage        the backing storage for a subset of the virtual disk. When
                   there is more than one storage in a Parallels disk then that
                   is referred to as a split image. In this case every storage
                   covers specific address space area of the disk and has its
                   particular root image. Split images are not considered here
                   and are not supported. Each storage consists of disk
                   parameters and a list of images. The list of images always
                   contains a root image and may also contain overlays. The
                   root image can be an expandable Parallels image file or
                   plain. Overlays must be expandable.

  Description      DiskDescriptor.xml stores information about disk parameters,
     file          snapshots, storages.

     Top           The overlay between actual state and some previous snapshot.
   Snapshot        It is not a snapshot in the classical sense because it
                   serves as the active image that the guest writes to.

    Sector         a 512-byte data chunk.

== Description file ==
All information is placed in a single XML element Parallels_disk_image.
The element has only one attribute "Version", that must be 1.0.
Schema of DiskDescriptor.xml:

<Parallels_disk_image Version="1.0">
    <Disk_Parameters>
        ...
    </Disk_Parameters>
    <StorageData>
        ...
    </StorageData>
    <Snapshots>
        ...
    </Snapshots>
</Parallels_disk_image>

== Disk_Parameters element ==
The Disk_Parameters element describes the physical layout of the virtual disk
and some general settings.

The Disk_Parameters element MUST contain the following child elements:
    * Disk_size - number of sectors in the disk,
                  desired size of the disk.
    * Cylinders - number of the disk cylinders.
    * Heads     - number of the disk heads.
    * Sectors   - number of the disk sectors per cylinder
                  (sector size is 512 bytes)
                  Limitation: Product of the Heads, Sectors and Cylinders
                  values MUST be equal to the value of the Disk_size parameter.
    * Padding   - must be 0. Parallels Cloud Server and Parallels Desktop may
                  use padding set to 1, however this case is not covered
                  by this spec, QEMU and other software should not open
                  such disks and should not create them.

== StorageData element ==
This element of the file describes the root image and all snapshot images.

The StorageData element consists of the Storage child element, as shown below:
<StorageData>
    <Storage>
        ...
    </Storage>
</StorageData>

A Storage element has following child elements:
    * Start     - start sector of the storage, in case of non split storage
                  equals to 0.
    * End       - number of sector following the last sector, in case of non
                  split storage equals to Disk_size.
    * Blocksize - storage cluster size, number of sectors per one cluster.
                  Cluster size for each "Compressed" (see below) image in
                  parallels disk must be equal to this field. Note: cluster
                  size for Parallels Expandable Image is in 'tracks' field of
                  its header (see docs/interop/parallels.txt).
    * Several Image child elements.

Each Image element has following child elements:
    * GUID - image identifier, UUID in curly brackets.
             For instance, {12345678-9abc-def1-2345-6789abcdef12}.
             The GUID is used by the Snapshots element to reference images
             (see below)
    * Type - image type of the element. It can be:
             "Plain" for raw files.
             "Compressed" for expanding disks.
    * File - path to image file. Path can be relative to DiskDecriptor.xml or
             absolute.

== Snapshots element ==
The Snapshots element describes the snapshot relations with the snapshot tree.

The element contains the set of Shot child elements, as shown below:
<Snapshots>
    <TopGUID> ... </TopGUID> /* Optional child element */
    <Shot>
        ...
    </Shot>
    <Shot>
        ...
    </Shot>
    ...
</Snapshots>

Each Shot element contains the following child elements:
    * GUID       - an image GUID.
    * ParentGUID - GUID of the image of the parent snapshot.

The software may traverse snapshots from child to parent using <ParentGUID>
field as reference. ParentGUID of root snapshot is
{00000000-0000-0000-0000-000000000000}. There should be only one root
snapshot. Top snapshot could be described via two ways: via TopGUID child
element of the Snapshots element or via predefined GUID
{5fbaabe3-6958-40ff-92a7-860e329aab41}. If TopGUID is defined, predefined GUID is
interpreted as usual GUID. All snapshot images (except Top Snapshot) should be
opened read-only. There is another predefined GUID,
BackupID = {704718e1-2314-44c8-9087-d78ed36b0f4e}, which is used by original and
some third-party software for backup, QEMU and other software may operate with
images with GUID = BackupID as usual, however, it is not recommended to use this
GUID for new disks. Top snapshot cannot have this GUID.
Loading