migration: Revert postcopy-blocktime commit set (ee86981b) · Commits · SUMMER2020 / students / proj-2021291

docs/devel/migration.rst

+0 −14

Original line number	Diff line number	Diff line
		@@ -401,20 +401,6 @@ will now cause the transition from precopy to postcopy.
		It can be issued immediately after migration is started or any
		time later on. Issuing it after the end of a migration is harmless.

		Blocktime is a postcopy live migration metric, intended to show how
		long the vCPU was in state of interruptable sleep due to pagefault.
		That metric is calculated both for all vCPUs as overlapped value, and
		separately for each vCPU. These values are calculated on destination
		side. To enable postcopy blocktime calculation, enter following
		command on destination monitor:

		``migrate_set_capability postcopy-blocktime on``

		Postcopy blocktime can be retrieved by query-migrate qmp command.
		postcopy-blocktime value of qmp command will show overlapped blocking
		time for all vCPU, postcopy-vcpu-blocktime will show list of blocking
		time per vCPU.

		.. note::
		During the postcopy phase, the bandwidth limits set using
		``migrate_set_speed`` is ignored (to avoid delaying requested pages that

hmp.c

+0 −15

Original line number	Diff line number	Diff line
		@@ -264,21 +264,6 @@ void hmp_info_migrate(Monitor mon, const QDict qdict)
		info->cpu_throttle_percentage);
		}

		if (info->has_postcopy_blocktime) {
		monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
		info->postcopy_blocktime);
		}

		if (info->has_postcopy_vcpu_blocktime) {
		Visitor *v;
		char *str;
		v = string_output_visitor_new(false, &str);
		visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
		visit_complete(v, &str);
		monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
		g_free(str);
		visit_free(v);
		}
		qapi_free_MigrationInfo(info);
		qapi_free_MigrationCapabilityStatusList(caps);
		}

migration/migration.c

+4 −47

Original line number	Diff line number	Diff line
		@@ -596,15 +596,14 @@ static void populate_disk_info(MigrationInfo *info)
		}
		}

		static void fill_source_migration_info(MigrationInfo *info)
		MigrationInfo qmp_query_migrate(Error *errp)
		{
		MigrationInfo info = g_malloc0(sizeof(info));
		MigrationState *s = migrate_get_current();

		switch (s->state) {
		case MIGRATION_STATUS_NONE:
		/* no migration has happened ever */
		/* do not overwrite destination migration status */
		return;
		break;
		case MIGRATION_STATUS_SETUP:
		info->has_status = true;
		@@ -655,6 +654,8 @@ static void fill_source_migration_info(MigrationInfo *info)
		break;
		}
		info->status = s->state;

		return info;
		}

		/**
		@@ -718,41 +719,6 @@ static bool migrate_caps_check(bool *cap_list,
		return true;
		}

		static void fill_destination_migration_info(MigrationInfo *info)
		{
		MigrationIncomingState *mis = migration_incoming_get_current();

		switch (mis->state) {
		case MIGRATION_STATUS_NONE:
		return;
		break;
		case MIGRATION_STATUS_SETUP:
		case MIGRATION_STATUS_CANCELLING:
		case MIGRATION_STATUS_CANCELLED:
		case MIGRATION_STATUS_ACTIVE:
		case MIGRATION_STATUS_POSTCOPY_ACTIVE:
		case MIGRATION_STATUS_FAILED:
		case MIGRATION_STATUS_COLO:
		info->has_status = true;
		break;
		case MIGRATION_STATUS_COMPLETED:
		info->has_status = true;
		fill_destination_postcopy_migration_info(info);
		break;
		}
		info->status = mis->state;
		}

		MigrationInfo qmp_query_migrate(Error *errp)
		{
		MigrationInfo info = g_malloc0(sizeof(info));

		fill_destination_migration_info(info);
		fill_source_migration_info(info);

		return info;
		}

		void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
		Error **errp)
		{
		@@ -1541,15 +1507,6 @@ bool migrate_zero_blocks(void)
		return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
		}

		bool migrate_postcopy_blocktime(void)
		{
		MigrationState *s;

		s = migrate_get_current();

		return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
		}

		bool migrate_use_compression(void)
		{
		MigrationState *s;

migration/migration.h

+0 −13

Original line number	Diff line number	Diff line
		@@ -22,8 +22,6 @@
		#include "hw/qdev.h"
		#include "io/channel.h"

		struct PostcopyBlocktimeContext;

		/* State for the incoming migration */
		struct MigrationIncomingState {
		QEMUFile *from_src_file;
		@@ -61,20 +59,10 @@ struct MigrationIncomingState {
		/* The coroutine we should enter (back) after failover */
		Coroutine *migration_incoming_co;
		QemuSemaphore colo_incoming_sem;

		/*
		* PostcopyBlocktimeContext to keep information for postcopy
		* live migration, to calculate vCPU block time
		* */
		struct PostcopyBlocktimeContext *blocktime_ctx;
		};

		MigrationIncomingState *migration_incoming_get_current(void);
		void migration_incoming_state_destroy(void);
		/*
		* Functions to work with blocktime context
		*/
		void fill_destination_postcopy_migration_info(MigrationInfo *info);

		#define TYPE_MIGRATION "migration"

		@@ -235,7 +223,6 @@ int migrate_compress_level(void);
		int migrate_compress_threads(void);
		int migrate_decompress_threads(void);
		bool migrate_use_events(void);
		bool migrate_postcopy_blocktime(void);

		/* Sending on the return path - generic and then for each message type */
		void migrate_send_rp_shut(MigrationIncomingState *mis,

migration/postcopy-ram.c

+1 −257

Original line number	Diff line number	Diff line
		@@ -61,101 +61,6 @@ struct PostcopyDiscardState {
		#include <sys/eventfd.h>
		#include <linux/userfaultfd.h>

		typedef struct PostcopyBlocktimeContext {
		/* time when page fault initiated per vCPU */
		int64_t *page_fault_vcpu_time;
		/* page address per vCPU */
		uintptr_t *vcpu_addr;
		int64_t total_blocktime;
		/* blocktime per vCPU */
		int64_t *vcpu_blocktime;
		/* point in time when last page fault was initiated */
		int64_t last_begin;
		/* number of vCPU are suspended */
		int smp_cpus_down;

		/*
		* Handler for exit event, necessary for
		* releasing whole blocktime_ctx
		*/
		Notifier exit_notifier;
		} PostcopyBlocktimeContext;

		static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
		{
		g_free(ctx->page_fault_vcpu_time);
		g_free(ctx->vcpu_addr);
		g_free(ctx->vcpu_blocktime);
		g_free(ctx);
		}

		static void migration_exit_cb(Notifier n, void data)
		{
		PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
		exit_notifier);
		destroy_blocktime_context(ctx);
		}

		static struct PostcopyBlocktimeContext *blocktime_context_new(void)
		{
		PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
		ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
		ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
		ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);

		ctx->exit_notifier.notify = migration_exit_cb;
		qemu_add_exit_notifier(&ctx->exit_notifier);
		return ctx;
		}

		static int64List get_vcpu_blocktime_list(PostcopyBlocktimeContext ctx)
		{
		int64List list = NULL, entry = NULL;
		int i;

		for (i = smp_cpus - 1; i >= 0; i--) {
		entry = g_new0(int64List, 1);
		entry->value = ctx->vcpu_blocktime[i];
		entry->next = list;
		list = entry;
		}

		return list;
		}

		/*
		* This function just populates MigrationInfo from postcopy's
		* blocktime context. It will not populate MigrationInfo,
		* unless postcopy-blocktime capability was set.
		*
		* @info: pointer to MigrationInfo to populate
		*/
		void fill_destination_postcopy_migration_info(MigrationInfo *info)
		{
		MigrationIncomingState *mis = migration_incoming_get_current();
		PostcopyBlocktimeContext *bc = mis->blocktime_ctx;

		if (!bc) {
		return;
		}

		info->has_postcopy_blocktime = true;
		info->postcopy_blocktime = bc->total_blocktime;
		info->has_postcopy_vcpu_blocktime = true;
		info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
		}

		static uint64_t get_postcopy_total_blocktime(void)
		{
		MigrationIncomingState *mis = migration_incoming_get_current();
		PostcopyBlocktimeContext *bc = mis->blocktime_ctx;

		if (!bc) {
		return 0;
		}

		return bc->total_blocktime;
		}

		/**
		* receive_ufd_features: check userfault fd features, to request only supported
		@@ -248,19 +153,6 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
		}
		}

		#ifdef UFFD_FEATURE_THREAD_ID
		if (migrate_postcopy_blocktime() && mis &&
		UFFD_FEATURE_THREAD_ID & supported_features) {
		/* kernel supports that feature */
		/* don't create blocktime_context if it exists */
		if (!mis->blocktime_ctx) {
		mis->blocktime_ctx = blocktime_context_new();
		}

		asked_features \|= UFFD_FEATURE_THREAD_ID;
		}
		#endif

		/*
		* request features, even if asked_features is 0, due to
		* kernel expects UFFD_API before UFFDIO_REGISTER, per
		@@ -531,9 +423,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
		munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
		mis->postcopy_tmp_zero_page = NULL;
		}
		trace_postcopy_ram_incoming_cleanup_blocktime(
		get_postcopy_total_blocktime());

		trace_postcopy_ram_incoming_cleanup_exit();
		return 0;
		}
		@@ -605,142 +494,6 @@ static int ram_block_enable_notify(const char block_name, void host_addr,
		return 0;
		}

		static int get_mem_fault_cpu_index(uint32_t pid)
		{
		CPUState *cpu_iter;

		CPU_FOREACH(cpu_iter) {
		if (cpu_iter->thread_id == pid) {
		trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
		return cpu_iter->cpu_index;
		}
		}
		trace_get_mem_fault_cpu_index(-1, pid);
		return -1;
		}

		/*
		* This function is being called when pagefault occurs. It
		* tracks down vCPU blocking time.
		*
		* @addr: faulted host virtual address
		* @ptid: faulted process thread id
		* @rb: ramblock appropriate to addr
		*/
		static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
		RAMBlock *rb)
		{
		int cpu, already_received;
		MigrationIncomingState *mis = migration_incoming_get_current();
		PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
		int64_t now_ms;

		if (!dc \|\| ptid == 0) {
		return;
		}
		cpu = get_mem_fault_cpu_index(ptid);
		if (cpu < 0) {
		return;
		}

		now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
		if (dc->vcpu_addr[cpu] == 0) {
		atomic_inc(&dc->smp_cpus_down);
		}

		atomic_xchg__nocheck(&dc->last_begin, now_ms);
		atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
		atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);

		/* check it here, not at the begining of the function,
		* due to, check could accur early than bitmap_set in
		* qemu_ufd_copy_ioctl */
		already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
		if (already_received) {
		atomic_xchg__nocheck(&dc->vcpu_addr[cpu], 0);
		atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], 0);
		atomic_dec(&dc->smp_cpus_down);
		}
		trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu],
		cpu, already_received);
		}

		/*
		* This function just provide calculated blocktime per cpu and trace it.
		* Total blocktime is calculated in mark_postcopy_blocktime_end.
		*
		*
		* Assume we have 3 CPU
		*
		* S1 E1 S1 E1
		* -----*********------------xxx*************------------------------> CPU1
		*
		* S2 E2
		* ------------****************xxx---------------------------------------> CPU2
		*
		* S3 E3
		* ------------------------**xxx******-------------------------------> CPU3
		*
		* We have sequence S1,S2,E1,S3,S1,E2,E3,E1
		* S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3
		* S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
		* it's a part of total blocktime.
		* S1 - here is last_begin
		* Legend of the picture is following:
		* * - means blocktime per vCPU
		* x - means overlapped blocktime (total blocktime)
		*
		* @addr: host virtual address
		*/
		static void mark_postcopy_blocktime_end(uintptr_t addr)
		{
		MigrationIncomingState *mis = migration_incoming_get_current();
		PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
		int i, affected_cpu = 0;
		int64_t now_ms;
		bool vcpu_total_blocktime = false;
		int64_t read_vcpu_time;

		if (!dc) {
		return;
		}

		now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

		/* lookup cpu, to clear it,
		* that algorithm looks straighforward, but it's not
		* optimal, more optimal algorithm is keeping tree or hash
		* where key is address value is a list of */
		for (i = 0; i < smp_cpus; i++) {
		uint64_t vcpu_blocktime = 0;

		read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
		if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr \|\|
		read_vcpu_time == 0) {
		continue;
		}
		atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
		vcpu_blocktime = now_ms - read_vcpu_time;
		affected_cpu += 1;
		/* we need to know is that mark_postcopy_end was due to
		* faulted page, another possible case it's prefetched
		* page and in that case we shouldn't be here */
		if (!vcpu_total_blocktime &&
		atomic_fetch_add(&dc->smp_cpus_down, 0) == smp_cpus) {
		vcpu_total_blocktime = true;
		}
		/* continue cycle, due to one page could affect several vCPUs */
		dc->vcpu_blocktime[i] += vcpu_blocktime;
		}

		atomic_sub(&dc->smp_cpus_down, affected_cpu);
		if (vcpu_total_blocktime) {
		dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
		}
		trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
		affected_cpu);
		}

		/*
		* Handle faults detected by the USERFAULT markings
		*/
		@@ -818,11 +571,8 @@ static void postcopy_ram_fault_thread(void opaque)
		rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
		trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
		qemu_ram_get_idstr(rb),
		rb_offset,
		msg.arg.pagefault.feat.ptid);
		rb_offset);

		mark_postcopy_blocktime_begin((uintptr_t)(msg.arg.pagefault.address),
		msg.arg.pagefault.feat.ptid, rb);
		/*
		* Send the request to the source - we want to request one
		* of our host page sizes (which is >= TPS)
		@@ -912,8 +662,6 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
		if (!ret) {
		ramblock_recv_bitmap_set_range(rb, host_addr,
		pagesize / qemu_target_page_size());
		mark_postcopy_blocktime_end((uintptr_t)host_addr);

		}
		return ret;
		}
		@@ -1011,10 +759,6 @@ void postcopy_get_tmp_page(MigrationIncomingState mis)

		#else
		/* No target OS support, stubs just fail */
		void fill_destination_postcopy_migration_info(MigrationInfo *info)
		{
		}

		bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
		{
		error_report("%s: No OS support", __func__);