Commit 648ba915 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180214a' into staging



Migration pull 20180214

Note that the 'Add test for migration to bad destination' displays
a 'Connection refused' during running, but still gives the correct exit
code and OK (It's checking that the source doesn't fail when
it can't connect, so that's the right error).
If it's particularly disliked that patch can be skipped individually.

# gpg: Signature made Wed 14 Feb 2018 15:33:04 GMT
# gpg:                using RSA key 0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20180214a:
  migration: pass MigrationState to migrate_init()
  migration: allow send_rq to fail
  migration: provide postcopy_fault_thread_notify()
  migration: reuse mis->userfault_quit_fd
  migration: better error handling with QEMUFile
  tests/migration: Add test for migration to bad destination
  migration: Fix early failure cleanup
  tests/migration: Add source to PC boot block
  migration: improve documentation of postcopy-ram
  migration/xen: Check return value of qemu_fclose

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 0402ca3c 3e0c8050
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -387,8 +387,8 @@ doesn't finish in a given time the switch is made to postcopy.
Enabling postcopy
-----------------

To enable postcopy, issue this command on the monitor prior to the
start of migration:
To enable postcopy, issue this command on the monitor (both source and
destination) prior to the start of migration:

``migrate_set_capability postcopy-ram on``

+2 −1
Original line number Diff line number Diff line
@@ -1041,7 +1041,8 @@ ETEXI
        .params     = "",
        .help       = "Followup to a migration command to switch the migration"
                      " to postcopy mode. The postcopy-ram capability must "
                      "be set before the original migration command.",
                      "be set on both source and destination before the "
                      "original migration command .",
        .cmd        = hmp_migrate_start_postcopy,
    },

+35 −12
Original line number Diff line number Diff line
@@ -205,17 +205,35 @@ static void deferred_incoming_migration(Error **errp)
 * Send a message on the return channel back to the source
 * of the migration.
 */
static void migrate_send_rp_message(MigrationIncomingState *mis,
static int migrate_send_rp_message(MigrationIncomingState *mis,
                                   enum mig_rp_message_type message_type,
                                   uint16_t len, void *data)
{
    int ret = 0;

    trace_migrate_send_rp_message((int)message_type, len);
    qemu_mutex_lock(&mis->rp_mutex);

    /*
     * It's possible that the file handle got lost due to network
     * failures.
     */
    if (!mis->to_src_file) {
        ret = -EIO;
        goto error;
    }

    qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
    qemu_put_be16(mis->to_src_file, len);
    qemu_put_buffer(mis->to_src_file, data, len);
    qemu_fflush(mis->to_src_file);

    /* It's possible that qemu file got error during sending */
    ret = qemu_file_get_error(mis->to_src_file);

error:
    qemu_mutex_unlock(&mis->rp_mutex);
    return ret;
}

/* Request a range of pages from the source VM at the given
@@ -225,11 +243,12 @@ static void migrate_send_rp_message(MigrationIncomingState *mis,
 *   Start: Address offset within the RB
 *   Len: Length in bytes required - must be a multiple of pagesize
 */
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
                              ram_addr_t start, size_t len)
{
    uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
    size_t msglen = 12; /* start + len */
    enum mig_rp_message_type msg_type;

    *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
    *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
@@ -241,10 +260,12 @@ void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
        bufc[msglen++] = rbname_len;
        memcpy(bufc + msglen, rbname, rbname_len);
        msglen += rbname_len;
        migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
        msg_type = MIG_RP_MSG_REQ_PAGES_ID;
    } else {
        migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
        msg_type = MIG_RP_MSG_REQ_PAGES;
    }

    return migrate_send_rp_message(mis, msg_type, msglen, bufc);
}

void qemu_start_incoming_migration(const char *uri, Error **errp)
@@ -1237,10 +1258,8 @@ bool migration_is_idle(void)
    return false;
}

MigrationState *migrate_init(void)
void migrate_init(MigrationState *s)
{
    MigrationState *s = migrate_get_current();

    /*
     * Reinitialise all migration state, except
     * parameters/capabilities that the user set, and
@@ -1270,7 +1289,6 @@ MigrationState *migrate_init(void)
    s->vm_was_running = false;
    s->iteration_initial_bytes = 0;
    s->threshold_size = 0;
    return s;
}

static GSList *migration_blockers;
@@ -1378,7 +1396,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
        migrate_set_block_incremental(s, true);
    }

    s = migrate_init();
    migrate_init(s);

    if (strstart(uri, "tcp:", &p)) {
        tcp_start_outgoing_migration(s, p, &local_err);
@@ -1709,6 +1727,11 @@ static void *source_return_path_thread(void *opaque)
        header_type = qemu_get_be16(rp);
        header_len = qemu_get_be16(rp);

        if (qemu_file_get_error(rp)) {
            mark_source_rp_bad(ms);
            goto out;
        }

        if (header_type >= MIG_RP_MSG_MAX ||
            header_type == MIG_RP_MSG_INVALID) {
            error_report("RP: Received invalid message 0x%04x length 0x%04x",
+6 −4
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@ struct MigrationIncomingState {
    bool           have_fault_thread;
    QemuThread     fault_thread;
    QemuSemaphore  fault_thread_sem;
    /* Set this when we want the fault thread to quit */
    bool           fault_thread_quit;

    bool           have_listen_thread;
    QemuThread     listen_thread;
@@ -42,8 +44,8 @@ struct MigrationIncomingState {

    /* For the kernel to send us notifications */
    int       userfault_fd;
    /* To tell the fault_thread to quit */
    int       userfault_quit_fd;
    /* To notify the fault_thread to wake, e.g., when need to quit */
    int       userfault_event_fd;
    QEMUFile *to_src_file;
    QemuMutex rp_mutex;    /* We send replies from multiple threads */
    void     *postcopy_tmp_page;
@@ -191,7 +193,7 @@ void migrate_fd_error(MigrationState *s, const Error *error);

void migrate_fd_connect(MigrationState *s, Error *error_in);

MigrationState *migrate_init(void);
void migrate_init(MigrationState *s);
bool migration_is_blocked(Error **errp);
/* True if outgoing migration has entered postcopy phase */
bool migration_in_postcopy(void);
@@ -228,7 +230,7 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
                          uint32_t value);
void migrate_send_rp_pong(MigrationIncomingState *mis,
                          uint32_t value);
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
                              ram_addr_t start, size_t len);

#endif
+38 −22
Original line number Diff line number Diff line
@@ -377,27 +377,18 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
    trace_postcopy_ram_incoming_cleanup_entry();

    if (mis->have_fault_thread) {
        uint64_t tmp64;

        if (qemu_ram_foreach_block(cleanup_range, mis)) {
            return -1;
        }
        /*
         * Tell the fault_thread to exit, it's an eventfd that should
         * currently be at 0, we're going to increment it to 1
         */
        tmp64 = 1;
        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
        /* Let the fault thread quit */
        atomic_set(&mis->fault_thread_quit, 1);
        postcopy_fault_thread_notify(mis);
        trace_postcopy_ram_incoming_cleanup_join();
        qemu_thread_join(&mis->fault_thread);
        } else {
            /* Not much we can do here, but may as well report it */
            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
                         strerror(errno));
        }

        trace_postcopy_ram_incoming_cleanup_closeuf();
        close(mis->userfault_fd);
        close(mis->userfault_quit_fd);
        close(mis->userfault_event_fd);
        mis->have_fault_thread = false;
    }

@@ -520,7 +511,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
        pfd[0].fd = mis->userfault_fd;
        pfd[0].events = POLLIN;
        pfd[0].revents = 0;
        pfd[1].fd = mis->userfault_quit_fd;
        pfd[1].fd = mis->userfault_event_fd;
        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
        pfd[1].revents = 0;

@@ -530,9 +521,19 @@ static void *postcopy_ram_fault_thread(void *opaque)
        }

        if (pfd[1].revents) {
            uint64_t tmp64 = 0;

            /* Consume the signal */
            if (read(mis->userfault_event_fd, &tmp64, 8) != 8) {
                /* Nothing obviously nicer than posting this error. */
                error_report("%s: read() failed", __func__);
            }

            if (atomic_read(&mis->fault_thread_quit)) {
                trace_postcopy_ram_fault_thread_quit();
                break;
            }
        }

        ret = read(mis->userfault_fd, &msg, sizeof(msg));
        if (ret != sizeof(msg)) {
@@ -610,9 +611,9 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
    }

    /* Now an eventfd we use to tell the fault-thread to quit */
    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
    if (mis->userfault_quit_fd == -1) {
        error_report("%s: Opening userfault_quit_fd: %s", __func__,
    mis->userfault_event_fd = eventfd(0, EFD_CLOEXEC);
    if (mis->userfault_event_fd == -1) {
        error_report("%s: Opening userfault_event_fd: %s", __func__,
                     strerror(errno));
        close(mis->userfault_fd);
        return -1;
@@ -813,6 +814,21 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)

/* ------------------------------------------------------------------------- */

void postcopy_fault_thread_notify(MigrationIncomingState *mis)
{
    uint64_t tmp64 = 1;

    /*
     * Wakeup the fault_thread.  It's an eventfd that should currently
     * be at 0, we're going to increment it to 1
     */
    if (write(mis->userfault_event_fd, &tmp64, 8) != 8) {
        /* Not much we can do here, but may as well report it */
        error_report("%s: incrementing failed: %s", __func__,
                     strerror(errno));
    }
}

/**
 * postcopy_discard_send_init: Called at the start of each RAMBlock before
 *   asking to discard individual ranges.
Loading