Commit 1b8c4589 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20200507a' into staging



Migration pull 2020-05-07

Mostly tidy-ups, but two new features:
  cpu-throttle-tailslow for making a gentler throttle
  xbzrle encoding rate measurement for getting a feal for xbzrle
performance.

# gpg: Signature made Thu 07 May 2020 18:00:27 BST
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20200507a:
  migration/multifd: Do error_free after migrate_set_error to avoid memleaks
  migration/multifd: fix memleaks in multifd_new_send_channel_async
  migration/xbzrle: add encoding rate
  migration/rdma: fix a memleak on error path in rdma_start_incoming_migration
  migration/ram: Consolidate variable reset after placement in ram_load_postcopy()
  migration/throttle: Add cpu-throttle-tailslow migration parameter
  migration/colo: Add missing error-propagation code
  docs/devel/migration: start a debugging section
  migration: move the units of migrate parameters from milliseconds to ms
  monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed()
  migration/migration: improve error reporting for migrate parameters
  migration: fix bad indentation in error_report()

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 3c7adbc6 13f2cb21
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure to
save/restore state devices.  This infrastructure is shared with the
savevm/loadvm functionality.

Debugging
=========

The migration stream can be analyzed thanks to `scripts/analyze_migration.py`.

Example usage:

.. code-block:: shell

  $ qemu-system-x86_64
   (qemu) migrate "exec:cat > mig"
  $ ./scripts/analyze_migration.py -f mig
  {
    "ram (3)": {
        "section sizes": {
            "pc.ram": "0x0000000008000000",
  ...

See also ``analyze_migration.py -h`` help for more options.

Common infrastructure
=====================

+3 −0
Original line number Diff line number Diff line
@@ -443,6 +443,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s,

    /* Disable block migration */
    migrate_set_block_enabled(false, &local_err);
    if (local_err) {
        goto out;
    }
    qemu_mutex_lock_iothread();

#ifdef CONFIG_REPLICATION
+31 −13
Original line number Diff line number Diff line
@@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
    params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
    params->has_cpu_throttle_increment = true;
    params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
    params->has_cpu_throttle_tailslow = true;
    params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
    params->has_tls_creds = true;
    params->tls_creds = g_strdup(s->parameters.tls_creds);
    params->has_tls_hostname = true;
@@ -930,6 +932,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
        info->xbzrle_cache->pages = xbzrle_counters.pages;
        info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
        info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
        info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
        info->xbzrle_cache->overflow = xbzrle_counters.overflow;
    }

@@ -1202,16 +1205,19 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
    }

    if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
        error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
                         " range of 0 to %zu bytes/second", SIZE_MAX);
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "max_bandwidth",
                   "an integer in the range of 0 to "stringify(SIZE_MAX)
                   " bytes/second");
        return false;
    }

    if (params->has_downtime_limit &&
        (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
                         "the range of 0 to %d milliseconds",
                         MAX_MIGRATE_DOWNTIME);
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "downtime_limit",
                   "an integer in the range of 0 to "
                    stringify(MAX_MIGRATE_DOWNTIME)" ms");
        return false;
    }

@@ -1324,6 +1330,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
        dest->cpu_throttle_increment = params->cpu_throttle_increment;
    }

    if (params->has_cpu_throttle_tailslow) {
        dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
    }

    if (params->has_tls_creds) {
        assert(params->tls_creds->type == QTYPE_QSTRING);
        dest->tls_creds = g_strdup(params->tls_creds->u.s);
@@ -1412,6 +1422,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
        s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
    }

    if (params->has_cpu_throttle_tailslow) {
        s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
    }

    if (params->has_tls_creds) {
        g_free(s->parameters.tls_creds);
        assert(params->tls_creds->type == QTYPE_QSTRING);
@@ -2107,9 +2121,10 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
void qmp_migrate_set_downtime(double value, Error **errp)
{
    if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
                         "the range of 0 to %d seconds",
                         MAX_MIGRATE_DOWNTIME_SECONDS);
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "downtime_limit",
                   "an integer in the range of 0 to "
                    stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds");
        return;
    }

@@ -3593,6 +3608,8 @@ static Property migration_properties[] = {
    DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
                      parameters.cpu_throttle_increment,
                      DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
    DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
                      parameters.cpu_throttle_tailslow, false),
    DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
                      parameters.max_bandwidth, MAX_THROTTLE),
    DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
@@ -3699,6 +3716,7 @@ static void migration_instance_init(Object *obj)
    params->has_throttle_trigger_threshold = true;
    params->has_cpu_throttle_initial = true;
    params->has_cpu_throttle_increment = true;
    params->has_cpu_throttle_tailslow = true;
    params->has_max_bandwidth = true;
    params->has_downtime_limit = true;
    params->has_x_checkpoint_delay = true;
+5 −0
Original line number Diff line number Diff line
@@ -550,6 +550,7 @@ void multifd_save_cleanup(void)
        multifd_send_state->ops->send_cleanup(p, &local_err);
        if (local_err) {
            migrate_set_error(migrate_get_current(), local_err);
            error_free(local_err);
        }
    }
    qemu_sem_destroy(&multifd_send_state->channels_ready);
@@ -688,6 +689,7 @@ out:
    if (local_err) {
        trace_multifd_send_error(p->id);
        multifd_send_terminate_threads(local_err);
        error_free(local_err);
    }

    /*
@@ -727,6 +729,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
         * its status.
         */
        p->quit = true;
        object_unref(OBJECT(sioc));
        error_free(local_err);
    } else {
        p->c = QIO_CHANNEL(sioc);
        qio_channel_set_delay(p->c, false);
@@ -963,6 +967,7 @@ static void *multifd_recv_thread(void *opaque)

    if (local_err) {
        multifd_recv_terminate_threads(local_err);
        error_free(local_err);
    }
    qemu_mutex_lock(&p->mutex);
    p->running = false;
+62 −12
Original line number Diff line number Diff line
@@ -327,6 +327,10 @@ struct RAMState {
    uint64_t num_dirty_pages_period;
    /* xbzrle misses since the beginning of the period */
    uint64_t xbzrle_cache_miss_prev;
    /* Amount of xbzrle pages since the beginning of the period */
    uint64_t xbzrle_pages_prev;
    /* Amount of xbzrle encoded bytes since the beginning of the period */
    uint64_t xbzrle_bytes_prev;

    /* compression statistics since the beginning of the period */
    /* amount of count that no free thread to compress data */
@@ -616,20 +620,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
 * able to complete migration. Some workloads dirty memory way too
 * fast and will not effectively converge, even with auto-converge.
 */
static void mig_throttle_guest_down(void)
static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
                                    uint64_t bytes_dirty_threshold)
{
    MigrationState *s = migrate_get_current();
    uint64_t pct_initial = s->parameters.cpu_throttle_initial;
    uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
    uint64_t pct_increment = s->parameters.cpu_throttle_increment;
    bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
    int pct_max = s->parameters.max_cpu_throttle;

    uint64_t throttle_now = cpu_throttle_get_percentage();
    uint64_t cpu_now, cpu_ideal, throttle_inc;

    /* We have not started throttling yet. Let's start it. */
    if (!cpu_throttle_active()) {
        cpu_throttle_set(pct_initial);
    } else {
        /* Throttling already on, just increase the rate */
        cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
                         pct_max));
        if (!pct_tailslow) {
            throttle_inc = pct_increment;
        } else {
            /* Compute the ideal CPU percentage used by Guest, which may
             * make the dirty rate match the dirty rate threshold. */
            cpu_now = 100 - throttle_now;
            cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
                        bytes_dirty_period);
            throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
        }
        cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
    }
}

@@ -696,6 +714,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
        return -1;
    }

    /*
     * Reaching here means the page has hit the xbzrle cache, no matter what
     * encoding result it is (normal encoding, overflow or skipping the page),
     * count the page as encoded. This is used to caculate the encoding rate.
     *
     * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
     * 2nd page turns out to be skipped (i.e. no new bytes written to the
     * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
     * skipped page included. In this way, the encoding rate can tell if the
     * guest page is good for xbzrle encoding.
     */
    xbzrle_counters.pages++;
    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);

    /* save current buffer into memory */
@@ -726,6 +756,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
    } else if (encoded_len == -1) {
        trace_save_xbzrle_page_overflow();
        xbzrle_counters.overflow++;
        xbzrle_counters.bytes += TARGET_PAGE_SIZE;
        return -1;
    }

@@ -736,8 +767,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
    qemu_put_be16(rs->f, encoded_len);
    qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
    bytes_xbzrle += encoded_len + 1 + 2;
    xbzrle_counters.pages++;
    xbzrle_counters.bytes += bytes_xbzrle;
    /*
     * Like compressed_size (please see update_compress_thread_counts),
     * the xbzrle encoded bytes don't count the 8 byte header with
     * RAM_SAVE_FLAG_CONTINUE.
     */
    xbzrle_counters.bytes += bytes_xbzrle - 8;
    ram_counters.transferred += bytes_xbzrle;

    return 1;
@@ -870,9 +905,23 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
    }

    if (migrate_use_xbzrle()) {
        double encoded_size, unencoded_size;

        xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
            rs->xbzrle_cache_miss_prev) / page_count;
        rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
        unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
                         TARGET_PAGE_SIZE;
        encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
        if (xbzrle_counters.pages == rs->xbzrle_pages_prev) {
            xbzrle_counters.encoding_rate = 0;
        } else if (!encoded_size) {
            xbzrle_counters.encoding_rate = UINT64_MAX;
        } else {
            xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
        }
        rs->xbzrle_pages_prev = xbzrle_counters.pages;
        rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
    }

    if (migrate_use_compression()) {
@@ -919,7 +968,8 @@ static void migration_trigger_throttle(RAMState *rs)
            (++rs->dirty_rate_high_cnt >= 2)) {
            trace_migration_throttle();
            rs->dirty_rate_high_cnt = 0;
            mig_throttle_guest_down();
            mig_throttle_guest_down(bytes_dirty_period,
                                    bytes_dirty_threshold);
        }
    }
}
@@ -3132,7 +3182,7 @@ static int ram_load_postcopy(QEMUFile *f)
    /* Temporary page that is later 'placed' */
    void *postcopy_host_page = mis->postcopy_tmp_page;
    void *this_host = NULL;
    bool all_zero = false;
    bool all_zero = true;
    int target_pages = 0;

    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
@@ -3159,7 +3209,6 @@ static int ram_load_postcopy(QEMUFile *f)
        addr &= TARGET_PAGE_MASK;

        trace_ram_load_postcopy_loop((uint64_t)addr, flags);
        place_needed = false;
        if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
                     RAM_SAVE_FLAG_COMPRESS_PAGE)) {
            block = ram_block_from_stream(f, flags);
@@ -3184,9 +3233,7 @@ static int ram_load_postcopy(QEMUFile *f)
             */
            page_buffer = postcopy_host_page +
                          ((uintptr_t)host & (block->page_size - 1));
            /* If all TP are zero then we can optimise the place */
            if (target_pages == 1) {
                all_zero = true;
                this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
                                                    block->page_size);
            } else {
@@ -3206,7 +3253,6 @@ static int ram_load_postcopy(QEMUFile *f)
             */
            if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
                place_needed = true;
                target_pages = 0;
            }
            place_source = postcopy_host_page;
        }
@@ -3288,6 +3334,10 @@ static int ram_load_postcopy(QEMUFile *f)
                ret = postcopy_place_page(mis, place_dest,
                                          place_source, block);
            }
            place_needed = false;
            target_pages = 0;
            /* Assume we have a zero page until we detect something different */
            all_zero = true;
        }
    }

Loading