Commit 1be50225 authored by Kefeng Wang's avatar Kefeng Wang
Browse files

mm: add control to allow specified high-order pages stored on PCP list

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9Q9DF


CVE: NA

-------------------------------------------------

The high-order pages stored on PCP list may not always win, so it is
disabled by default for high-orders except PMD_ORDER.

Adding a new control pcp_allow_high_order to allow user to enable/disable
the specified high-order(only order 4 for now) pages stored on PCP list
or not, note, the all pages on pcplists will be drained when disable it.

Signed-off-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
parent 44984691
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -220,6 +220,15 @@ writing the corresponding bit to 1::
	echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align
	echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align

The kernel could enable high-orders(greated than PAGE_ALLOC_COSTLY_ORDER, only
support order 4 for now) be stored on PCP lists(except PMD order), which could
reduce the zone lock contention when allocate hige-order pages frequently. It
is possible to enable order 4 pages stored on PCP lists by writing 4 or disable
it back by writing 0::

        echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order
        echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order

khugepaged will be automatically started when one or more hugepage
sizes are enabled (either by directly setting "always" or "madvise",
or by setting "inherit" while the top-level enabled is set to "always"
+1 −0
Original line number Diff line number Diff line
@@ -335,6 +335,7 @@ extern void page_frag_free(void *addr);

void page_alloc_init_cpuhp(void);
int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_zone_pages(void);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
+1 −0
Original line number Diff line number Diff line
@@ -104,6 +104,7 @@ extern unsigned long transparent_hugepage_flags;
extern unsigned long huge_anon_orders_always;
extern unsigned long huge_anon_orders_madvise;
extern unsigned long huge_anon_orders_inherit;
extern unsigned long huge_pcp_allow_orders;

static inline bool hugepage_global_enabled(void)
{
+31 −0
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
unsigned long huge_anon_orders_always __read_mostly;
unsigned long huge_anon_orders_madvise __read_mostly;
unsigned long huge_anon_orders_inherit __read_mostly;
unsigned long huge_pcp_allow_orders __read_mostly;

unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
					 unsigned long vm_flags, bool smaps,
@@ -417,6 +418,35 @@ static ssize_t use_zero_page_store(struct kobject *kobj,
}
static struct kobj_attribute use_zero_page_attr = __ATTR_RW(use_zero_page);

static ssize_t pcp_allow_high_order_show(struct kobject *kobj,
					 struct kobj_attribute *attr, char *buf)
{
	return sysfs_emit(buf, "%lu\n", READ_ONCE(huge_pcp_allow_orders));
}
static ssize_t pcp_allow_high_order_store(struct kobject *kobj,
		struct kobj_attribute *attr, const char *buf, size_t count)
{
	unsigned long value;
	int ret;

	ret = kstrtoul(buf, 10, &value);
	if (ret < 0)
		return ret;

	/* Only enable order 4 now, 0 is to disable it */
	if (value != 0 && value != (PAGE_ALLOC_COSTLY_ORDER + 1))
		return -EINVAL;

	if (value == 0)
		drain_all_zone_pages();

	WRITE_ONCE(huge_pcp_allow_orders, value);

	return count;
}
static struct kobj_attribute pcp_allow_high_order_attr =
	__ATTR_RW(pcp_allow_high_order);

static ssize_t hpage_pmd_size_show(struct kobject *kobj,
				   struct kobj_attribute *attr, char *buf)
{
@@ -531,6 +561,7 @@ static struct attribute *hugepage_attr[] = {
	&enabled_attr.attr,
	&defrag_attr.attr,
	&use_zero_page_attr.attr,
	&pcp_allow_high_order_attr.attr,
	&hpage_pmd_size_attr.attr,
#ifdef CONFIG_SHMEM
	&shmem_enabled_attr.attr,
+17 −1
Original line number Diff line number Diff line
@@ -528,7 +528,7 @@ static void bad_page(struct page *page, const char *reason)
static inline unsigned int order_to_pindex(int migratetype, int order)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	if (order > PAGE_ALLOC_COSTLY_ORDER) {
	if (order > PAGE_ALLOC_COSTLY_ORDER + 1) {
		VM_BUG_ON(order != HPAGE_PMD_ORDER);
		return NR_LOWORDER_PCP_LISTS;
	}
@@ -560,6 +560,8 @@ static inline bool pcp_allowed_order(unsigned int order)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	if (order == HPAGE_PMD_ORDER)
		return true;
	if (order == READ_ONCE(huge_pcp_allow_orders))
		return true;
#endif
	return false;
}
@@ -6829,6 +6831,20 @@ void zone_pcp_reset(struct zone *zone)
	}
}

void drain_all_zone_pages(void)
{
	struct zone *zone;

	mutex_lock(&pcp_batch_high_lock);
	for_each_populated_zone(zone)
		__zone_set_pageset_high_and_batch(zone, 0, 0, 1);
	__drain_all_pages(NULL, true);
	for_each_populated_zone(zone)
		__zone_set_pageset_high_and_batch(zone, zone->pageset_high_min,
				zone->pageset_high_max, zone->pageset_batch);
	mutex_unlock(&pcp_batch_high_lock);
}

#ifdef CONFIG_MEMORY_HOTREMOVE
/*
 * All pages in the range must be in a single zone, must not contain holes,