Commit b3f7f0c5 authored by zhanghailiang's avatar zhanghailiang Committed by Amit Shah
Browse files

COLO: Implement the process of failover for primary VM



For primary side, if COLO gets failover request from users.
To be exact, gets 'x_colo_lost_heartbeat' command.
COLO thread will exit the loop while the failover BH does the
cleanup work and resumes VM.

Signed-off-by: default avatarzhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: default avatarLi Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: default avatarDr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: default avatarAmit Shah <amit.shah@redhat.com>
Signed-off-by: default avatarAmit Shah <amit@amitshah.net>
parent aef06085
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -32,4 +32,7 @@ void *colo_process_incoming_thread(void *opaque);
bool migration_incoming_in_colo_state(void);

COLOMode get_colo_mode(void);

/* failover */
void colo_do_failover(MigrationState *s);
#endif
+1 −0
Original line number Diff line number Diff line
@@ -21,5 +21,6 @@ FailoverStatus failover_set_state(FailoverStatus old_state,
                                     FailoverStatus new_state);
FailoverStatus failover_get_state(void);
void failover_request_active(Error **errp);
bool failover_request_is_active(void);

#endif
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ static void colo_failover_bh(void *opaque)
        return;
    }

    /* TODO: Do failover work */
    colo_do_failover(NULL);
}

void failover_request_active(Error **errp)
+50 −0
Original line number Diff line number Diff line
@@ -41,6 +41,40 @@ bool migration_incoming_in_colo_state(void)
    return mis && (mis->state == MIGRATION_STATUS_COLO);
}

static bool colo_runstate_is_stopped(void)
{
    return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
}

static void primary_vm_do_failover(void)
{
    MigrationState *s = migrate_get_current();
    int old_state;

    migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                      MIGRATION_STATUS_COMPLETED);

    old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
                                   FAILOVER_STATUS_COMPLETED);
    if (old_state != FAILOVER_STATUS_ACTIVE) {
        error_report("Incorrect state (%s) while doing failover for Primary VM",
                     FailoverStatus_lookup[old_state]);
        return;
    }
}

void colo_do_failover(MigrationState *s)
{
    /* Make sure VM stopped while failover happened. */
    if (!colo_runstate_is_stopped()) {
        vm_stop_force_state(RUN_STATE_COLO);
    }

    if (get_colo_mode() == COLO_MODE_PRIMARY) {
        primary_vm_do_failover();
    }
}

static void colo_send_message(QEMUFile *f, COLOMessage msg,
                              Error **errp)
{
@@ -162,9 +196,20 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
    bioc->usage = 0;

    qemu_mutex_lock_iothread();
    if (failover_get_state() != FAILOVER_STATUS_NONE) {
        qemu_mutex_unlock_iothread();
        goto out;
    }
    vm_stop_force_state(RUN_STATE_COLO);
    qemu_mutex_unlock_iothread();
    trace_colo_vm_state_change("run", "stop");
    /*
     * Failover request bh could be called after vm_stop_force_state(),
     * So we need check failover_request_is_active() again.
     */
    if (failover_get_state() != FAILOVER_STATUS_NONE) {
        goto out;
    }

    /* Disable block migration */
    s->params.blk = 0;
@@ -259,6 +304,11 @@ static void colo_process_checkpoint(MigrationState *s)
    trace_colo_vm_state_change("stop", "run");

    while (s->state == MIGRATION_STATUS_COLO) {
        if (failover_get_state() != FAILOVER_STATUS_NONE) {
            error_report("failover request");
            goto out;
        }

        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
        if (current_time - checkpoint_time <
            s->parameters.x_checkpoint_delay) {