Commit b43bea01 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-request' into staging



Migration pull request

This series fixes problems with migration-cancel while using multifd.
In some cases it can hang waiting in a semaphore.

Please apply.

# gpg: Signature made Thu 25 Jul 2019 11:56:57 BST
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration-pull-request:
  migration: fix migrate_cancel multifd migration leads destination hung forever
  migration: Make explicit that we are quitting multifd
  migration: fix migrate_cancel leads live_migration thread hung forever
  migration: fix migrate_cancel leads live_migration thread endless loop

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 7ea53245 f193bc0c
Loading
Loading
Loading
Loading
+57 −9
Original line number Diff line number Diff line
@@ -677,6 +677,8 @@ typedef struct {
    QemuMutex mutex;
    /* is this channel thread running */
    bool running;
    /* should this thread finish */
    bool quit;
    /* array of pages to receive */
    MultiFDPages_t *pages;
    /* packet allocated len */
@@ -920,7 +922,7 @@ struct {
 * false.
 */

static void multifd_send_pages(void)
static int multifd_send_pages(void)
{
    int i;
    static int next_channel;
@@ -933,6 +935,11 @@ static void multifd_send_pages(void)
        p = &multifd_send_state->params[i];

        qemu_mutex_lock(&p->mutex);
        if (p->quit) {
            error_report("%s: channel %d has already quit!", __func__, i);
            qemu_mutex_unlock(&p->mutex);
            return -1;
        }
        if (!p->pending_job) {
            p->pending_job++;
            next_channel = (i + 1) % migrate_multifd_channels();
@@ -951,9 +958,11 @@ static void multifd_send_pages(void)
    ram_counters.transferred += transferred;;
    qemu_mutex_unlock(&p->mutex);
    qemu_sem_post(&p->sem);

    return 1;
}

static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
{
    MultiFDPages_t *pages = multifd_send_state->pages;

@@ -968,15 +977,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
        pages->used++;

        if (pages->used < pages->allocated) {
            return;
            return 1;
        }
    }

    multifd_send_pages();
    if (multifd_send_pages() < 0) {
        return -1;
    }

    if (pages->block != block) {
        multifd_queue_page(block, offset);
        return  multifd_queue_page(block, offset);
    }

    return 1;
}

static void multifd_send_terminate_threads(Error *err)
@@ -1049,7 +1062,10 @@ static void multifd_send_sync_main(void)
        return;
    }
    if (multifd_send_state->pages->used) {
        multifd_send_pages();
        if (multifd_send_pages() < 0) {
            error_report("%s: multifd_send_pages fail", __func__);
            return;
        }
    }
    for (i = 0; i < migrate_multifd_channels(); i++) {
        MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -1058,6 +1074,12 @@ static void multifd_send_sync_main(void)

        qemu_mutex_lock(&p->mutex);

        if (p->quit) {
            error_report("%s: channel %d has already quit", __func__, i);
            qemu_mutex_unlock(&p->mutex);
            return;
        }

        p->packet_num = multifd_send_state->packet_num++;
        p->flags |= MULTIFD_FLAG_SYNC;
        p->pending_job++;
@@ -1077,7 +1099,8 @@ static void *multifd_send_thread(void *opaque)
{
    MultiFDSendParams *p = opaque;
    Error *local_err = NULL;
    int ret;
    int ret = 0;
    uint32_t flags = 0;

    trace_multifd_send_thread_start(p->id);
    rcu_register_thread();
@@ -1095,7 +1118,7 @@ static void *multifd_send_thread(void *opaque)
        if (p->pending_job) {
            uint32_t used = p->pages->used;
            uint64_t packet_num = p->packet_num;
            uint32_t flags = p->flags;
            flags = p->flags;

            p->next_packet_size = used * qemu_target_page_size();
            multifd_send_fill_packet(p);
@@ -1144,6 +1167,17 @@ out:
        multifd_send_terminate_threads(local_err);
    }

    /*
     * Error happen, I will exit, but I can't just leave, tell
     * who pay attention to me.
     */
    if (ret != 0) {
        if (flags & MULTIFD_FLAG_SYNC) {
            qemu_sem_post(&multifd_send_state->sem_sync);
        }
        qemu_sem_post(&multifd_send_state->channels_ready);
    }

    qemu_mutex_lock(&p->mutex);
    p->running = false;
    qemu_mutex_unlock(&p->mutex);
@@ -1234,6 +1268,7 @@ static void multifd_recv_terminate_threads(Error *err)
        MultiFDRecvParams *p = &multifd_recv_state->params[i];

        qemu_mutex_lock(&p->mutex);
        p->quit = true;
        /* We could arrive here for two reasons:
           - normal quit, i.e. everything went fine, just finished
           - error quit: We close the channels so the channel threads
@@ -1256,6 +1291,12 @@ int multifd_load_cleanup(Error **errp)
        MultiFDRecvParams *p = &multifd_recv_state->params[i];

        if (p->running) {
            p->quit = true;
            /*
             * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
             * however try to wakeup it without harm in cleanup phase.
             */
            qemu_sem_post(&p->sem_sync);
            qemu_thread_join(&p->thread);
        }
        object_unref(OBJECT(p->c));
@@ -1319,6 +1360,10 @@ static void *multifd_recv_thread(void *opaque)
        uint32_t used;
        uint32_t flags;

        if (p->quit) {
            break;
        }

        ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
                                       p->packet_len, &local_err);
        if (ret == 0) {   /* EOF */
@@ -1390,6 +1435,7 @@ int multifd_load_setup(void)

        qemu_mutex_init(&p->mutex);
        qemu_sem_init(&p->sem_sync, 0);
        p->quit = false;
        p->id = i;
        p->pages = multifd_pages_init(page_count);
        p->packet_len = sizeof(MultiFDPacket_t)
@@ -2033,7 +2079,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
                                 ram_addr_t offset)
{
    multifd_queue_page(block, offset);
    if (multifd_queue_page(block, offset) < 0) {
        return -1;
    }
    ram_counters.normal++;

    return 1;