Commit 6f6831f6 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20140225' into staging



migration/next for 20140225

# gpg: Signature made Tue 25 Feb 2014 14:04:31 GMT using RSA key ID 5872D723
# gpg: Can't check signature: public key not found

* remotes/juanquintela/tags/migration/20140225:
  rdma: rename 'x-rdma' => 'rdma'
  Fix two XBZRLE corruption issues
  Fix vmstate_info_int32_le comparison/assign
  qemu_file: use fwrite() correctly

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents bc3fbad8 41310c68
Loading
Loading
Loading
Loading
+49 −15
Original line number Diff line number Diff line
@@ -122,7 +122,6 @@ static void check_guest_throttling(void);
#define RAM_SAVE_FLAG_XBZRLE   0x40
/* 0x80 is reserved in migration.h start with 0x100 next */


static struct defconfig_file {
    const char *filename;
    /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -133,6 +132,7 @@ static struct defconfig_file {
    { NULL }, /* end of list */
};

static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];

int qemu_read_default_config_files(bool userconfig)
{
@@ -273,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
    return size;
}

/* This is the last block that we have visited serching for dirty pages
 */
static RAMBlock *last_seen_block;
/* This is the last block from where we have sent data */
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;

/* Update the xbzrle cache to reflect a page that's been sent as all 0.
 * The important thing is that a stale (not-yet-0'd) page be replaced
 * by the new data.
 * As a bonus, if the page wasn't in the cache it gets added so that
 * when a small write is made into the 0'd page it gets XBZRLE sent
 */
static void xbzrle_cache_zero_page(ram_addr_t current_addr)
{
    if (ram_bulk_stage || !migrate_use_xbzrle()) {
        return;
    }

    /* We don't care if this fails to allocate a new cache page
     * as long as it updated an old one */
    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
}

#define ENCODING_FLAG_XBZRLE 0x1

static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -329,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
    return bytes_sent;
}


/* This is the last block that we have visited serching for dirty pages
 */
static RAMBlock *last_seen_block;
/* This is the last block from where we have sent data */
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;

static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
@@ -512,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
        } else {
            int ret;
            uint8_t *p;
            bool send_async = true;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;

@@ -522,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
            ret = ram_control_save_page(f, block->offset,
                               offset, TARGET_PAGE_SIZE, &bytes_sent);

            current_addr = block->offset + offset;
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
                    if (bytes_sent > 0) {
@@ -536,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                                            RAM_SAVE_FLAG_COMPRESS);
                qemu_put_byte(f, 0);
                bytes_sent++;
                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
                 * page would be stale
                 */
                xbzrle_cache_zero_page(current_addr);
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
                if (!last_stage) {
                    /* We must send exactly what's in the xbzrle cache
                     * even if the page wasn't xbzrle compressed, so that
                     * it's right next time.
                     */
                    p = get_cached_data(XBZRLE.cache, current_addr);

                    /* Can't send this cached data async, since the cache page
                     * might get updated before it gets to the wire
                     */
                    send_async = false;
                }
            }

            /* XBZRLE overflow or normal page */
            if (bytes_sent == -1) {
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                if (send_async) {
                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
                } else {
                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                }
                bytes_sent += TARGET_PAGE_SIZE;
                acct_info.norm_pages++;
            }
+10 −14
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
high-performance RDMA hardware using the following command:

QEMU Monitor Command:
$ migrate_set_capability x-rdma-pin-all on # disabled by default
$ migrate_set_capability rdma-pin-all on # disabled by default

Performing this action will cause all 8GB to be pinned, so if that's
not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device

Next, on the destination machine, add the following to the QEMU command line:

qemu ..... -incoming x-rdma:host:port
qemu ..... -incoming rdma:host:port

Finally, perform the actual migration on the source machine:

QEMU Monitor Command:
$ migrate -d x-rdma:host:port
$ migrate -d rdma:host:port

PERFORMANCE
===========
@@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
active use and the VM itself is completely idle using the same 40 gbps
infiniband link:

1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps

These numbers would of course scale up to whatever size virtual machine
you have to migrate using RDMA.
@@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.

TODO:
=====
1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
   renamed to 'rdma' after the experimental phase of this work has
   completed upstream.
2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
   are not compatible with infinband memory pinning and will result in
   an aborted migration (but with the source VM left unaffected).
3. Use of the recent /proc/<pid>/pagemap would likely speed up
2. Use of the recent /proc/<pid>/pagemap would likely speed up
   the use of KSM and ballooning while using RDMA.
4. Also, some form of balloon-device usage tracking would also
3. Also, some form of balloon-device usage tracking would also
   help alleviate some issues.
5. Move UNREGISTER requests to a separate thread.
6. Use LRU to provide more fine-grained direction of UNREGISTER
4. Use LRU to provide more fine-grained direction of UNREGISTER
   requests for unpinning memory in an overcommitted environment.
7. Expose UNREGISTER support to the user by way of workload-specific
5. Expose UNREGISTER support to the user by way of workload-specific
   hints about application behavior.
+1 −1
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
 * @addr: page address
 * @pdata: pointer to the page
 */
int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);

/**
 * cache_resize: resize the page cache. In case of size reduction the extra
+1 −1
Original line number Diff line number Diff line
@@ -3412,7 +3412,7 @@ void rdma_start_outgoing_migration(void *opaque,
    }

    ret = qemu_rdma_source_init(rdma, &local_err,
        s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
        s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);

    if (ret) {
        goto err;
+3 −3
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
    if (strstart(uri, "tcp:", &p))
        tcp_start_incoming_migration(p, errp);
#ifdef CONFIG_RDMA
    else if (strstart(uri, "x-rdma:", &p))
    else if (strstart(uri, "rdma:", &p))
        rdma_start_incoming_migration(p, errp);
#endif
#if !defined(WIN32)
@@ -438,7 +438,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
    if (strstart(uri, "tcp:", &p)) {
        tcp_start_outgoing_migration(s, p, &local_err);
#ifdef CONFIG_RDMA
    } else if (strstart(uri, "x-rdma:", &p)) {
    } else if (strstart(uri, "rdma:", &p)) {
        rdma_start_outgoing_migration(s, p, &local_err);
#endif
#if !defined(WIN32)
@@ -532,7 +532,7 @@ bool migrate_rdma_pin_all(void)

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
    return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
}

bool migrate_auto_converge(void)
Loading