Commit 6dc2a774 authored by Sunil Muthuswamy's avatar Sunil Muthuswamy Committed by Wei Liu
Browse files

x86/Hyper-V: Support for free page reporting



Linux has support for free page reporting now (36e66c55) for
virtualized environment. On Hyper-V when virtually backed VMs are
configured, Hyper-V will advertise cold memory discard capability,
when supported. This patch adds the support to hook into the free
page reporting infrastructure and leverage the Hyper-V cold memory
discard hint hypercall to report/free these pages back to the host.

Signed-off-by: default avatarSunil Muthuswamy <sunilmut@microsoft.com>
Tested-by: default avatarMatheus Castello <matheus@castello.eng.br>
Reviewed-by: default avatarMichael Kelley <mikelley@microsoft.com>
Tested-by: default avatarNathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/SN4PR2101MB0880121FA4E2FEC67F35C1DCC0649@SN4PR2101MB0880.namprd21.prod.outlook.com


Signed-off-by: default avatarWei Liu <wei.liu@kernel.org>
parent 1b602808
Loading
Loading
Loading
Loading
+50 −1
Original line number Diff line number Diff line
@@ -498,6 +498,8 @@ void __init hyperv_init(void)
		x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
#endif

	/* Query the VMs extended capability once, so that it can be cached. */
	hv_query_ext_cap(0);
	return;

remove_cpuhp_state:
@@ -601,7 +603,7 @@ EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);

enum hv_isolation_type hv_get_isolation_type(void)
{
	if (!(ms_hyperv.features_b & HV_ISOLATION))
	if (!(ms_hyperv.priv_high & HV_ISOLATION))
		return HV_ISOLATION_TYPE_NONE;
	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
}
@@ -612,3 +614,50 @@ bool hv_is_isolation_supported(void)
	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
}
EXPORT_SYMBOL_GPL(hv_is_isolation_supported);

/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
bool hv_query_ext_cap(u64 cap_query)
{
	/*
	 * The address of the 'hv_extended_cap' variable will be used as an
	 * output parameter to the hypercall below and so it should be
	 * compatible with 'virt_to_phys'. Which means, it's address should be
	 * directly mapped. Use 'static' to keep it compatible; stack variables
	 * can be virtually mapped, making them imcompatible with
	 * 'virt_to_phys'.
	 * Hypercall input/output addresses should also be 8-byte aligned.
	 */
	static u64 hv_extended_cap __aligned(8);
	static bool hv_extended_cap_queried;
	u64 status;

	/*
	 * Querying extended capabilities is an extended hypercall. Check if the
	 * partition supports extended hypercall, first.
	 */
	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
		return false;

	/* Extended capabilities do not change at runtime. */
	if (hv_extended_cap_queried)
		return hv_extended_cap & cap_query;

	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
				 &hv_extended_cap);

	/*
	 * The query extended capabilities hypercall should not fail under
	 * any normal circumstances. Avoid repeatedly making the hypercall, on
	 * error.
	 */
	hv_extended_cap_queried = true;
	status &= HV_HYPERCALL_RESULT_MASK;
	if (status != HV_STATUS_SUCCESS) {
		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
		       status);
		return false;
	}

	return hv_extended_cap & cap_query;
}
EXPORT_SYMBOL_GPL(hv_query_ext_cap);
+5 −4
Original line number Diff line number Diff line
@@ -265,12 +265,13 @@ static void __init ms_hyperv_init_platform(void)
	 * Extract the features and hints
	 */
	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
	ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES);
	ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);

	pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
		ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
	pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
		ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
		ms_hyperv.misc_features);

	ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
	ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@ -316,7 +317,7 @@ static void __init ms_hyperv_init_platform(void)
		x86_platform.calibrate_cpu = hv_get_tsc_khz;
	}

	if (ms_hyperv.features_b & HV_ISOLATION) {
	if (ms_hyperv.priv_high & HV_ISOLATION) {
		ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
		ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);

+1 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@ config HYPERV_UTILS
config HYPERV_BALLOON
	tristate "Microsoft Hyper-V Balloon driver"
	depends on HYPERV
	select PAGE_REPORTING
	help
	  Select this option to enable Hyper-V Balloon driver.

+89 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/memory.h>
#include <linux/notifier.h>
#include <linux/percpu_counter.h>
#include <linux/page_reporting.h>

#include <linux/hyperv.h>
#include <asm/hyperv-tlfs.h>
@@ -563,6 +564,8 @@ struct hv_dynmem_device {
	 * The negotiated version agreed by host.
	 */
	__u32 version;

	struct page_reporting_dev_info pr_dev_info;
};

static struct hv_dynmem_device dm_device;
@@ -1568,6 +1571,89 @@ static void balloon_onchannelcallback(void *context)

}

/* Hyper-V only supports reporting 2MB pages or higher */
#define HV_MIN_PAGE_REPORTING_ORDER	9
#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER)
static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
		    struct scatterlist *sgl, unsigned int nents)
{
	unsigned long flags;
	struct hv_memory_hint *hint;
	int i;
	u64 status;
	struct scatterlist *sg;

	WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
	WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN);
	local_irq_save(flags);
	hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
	if (!hint) {
		local_irq_restore(flags);
		return -ENOSPC;
	}

	hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD;
	hint->reserved = 0;
	for_each_sg(sgl, sg, nents, i) {
		union hv_gpa_page_range *range;

		range = &hint->ranges[i];
		range->address_space = 0;
		/* page reporting only reports 2MB pages or higher */
		range->page.largepage = 1;
		range->page.additional_pages =
			(sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1;
		range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
		range->base_large_pfn =
			page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER;
	}

	status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0,
				     hint, NULL);
	local_irq_restore(flags);
	if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
		pr_err("Cold memory discard hypercall failed with status %llx\n",
			status);
		return -EINVAL;
	}

	return 0;
}

static void enable_page_reporting(void)
{
	int ret;

	/* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */
	if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) {
		pr_debug("Cold memory discard is only supported on 2MB pages and above\n");
		return;
	}

	if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) {
		pr_debug("Cold memory discard hint not supported by Hyper-V\n");
		return;
	}

	BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
	dm_device.pr_dev_info.report = hv_free_page_report;
	ret = page_reporting_register(&dm_device.pr_dev_info);
	if (ret < 0) {
		dm_device.pr_dev_info.report = NULL;
		pr_err("Failed to enable cold memory discard: %d\n", ret);
	} else {
		pr_info("Cold memory discard hint enabled\n");
	}
}

static void disable_page_reporting(void)
{
	if (dm_device.pr_dev_info.report) {
		page_reporting_unregister(&dm_device.pr_dev_info);
		dm_device.pr_dev_info.report = NULL;
	}
}

static int balloon_connect_vsp(struct hv_device *dev)
{
	struct dm_version_request version_req;
@@ -1713,6 +1799,7 @@ static int balloon_probe(struct hv_device *dev,
	if (ret != 0)
		return ret;

	enable_page_reporting();
	dm_device.state = DM_INITIALIZED;

	dm_device.thread =
@@ -1727,6 +1814,7 @@ static int balloon_probe(struct hv_device *dev,
probe_error:
	dm_device.state = DM_INIT_ERROR;
	dm_device.thread  = NULL;
	disable_page_reporting();
	vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
	unregister_memory_notifier(&hv_memory_nb);
@@ -1749,6 +1837,7 @@ static int balloon_remove(struct hv_device *dev)
	cancel_work_sync(&dm->ha_wrk.wrk);

	kthread_stop(dm->thread);
	disable_page_reporting();
	vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
	unregister_memory_notifier(&hv_memory_nb);
+33 −2
Original line number Diff line number Diff line
@@ -89,9 +89,9 @@
#define HV_ACCESS_STATS				BIT(8)
#define HV_DEBUGGING				BIT(11)
#define HV_CPU_MANAGEMENT			BIT(12)
#define HV_ENABLE_EXTENDED_HYPERCALLS		BIT(20)
#define HV_ISOLATION				BIT(22)


/*
 * TSC page layout.
 */
@@ -159,11 +159,18 @@ struct ms_hyperv_tsc_page {
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0

/* Extended hypercalls */
#define HV_EXT_CALL_QUERY_CAPABILITIES		0x8001
#define HV_EXT_CALL_MEMORY_HEAT_HINT		0x8003

#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)

/* Extended capability bits */
#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8)

enum HV_GENERIC_SET_FORMAT {
	HV_GENERIC_SET_SPARSE_4K,
	HV_GENERIC_SET_ALL,
@@ -408,8 +415,10 @@ struct hv_guest_mapping_flush {
 *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
 */
#define HV_MAX_FLUSH_PAGES (2048)
#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB		0
#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB		1

/* HvFlushGuestPhysicalAddressList hypercall */
/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */
union hv_gpa_page_range {
	u64 address_space;
	struct {
@@ -417,6 +426,12 @@ union hv_gpa_page_range {
		u64 largepage:1;
		u64 basepfn:52;
	} page;
	struct {
		u64 reserved:12;
		u64 page_size:1;
		u64 reserved1:8;
		u64 base_large_pfn:43;
	};
};

/*
@@ -774,4 +789,20 @@ struct hv_input_unmap_device_interrupt {
#define HV_SOURCE_SHADOW_NONE               0x0
#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE   0x1

/*
 * The whole argument should fit in a page to be able to pass to the hypervisor
 * in one hypercall.
 */
#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES  \
	((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \
		sizeof(union hv_gpa_page_range))

/* HvExtCallMemoryHeatHint hypercall */
#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD	2
struct hv_memory_hint {
	u64 type:2;
	u64 reserved:62;
	union hv_gpa_page_range ranges[];
} __packed;

#endif
Loading