Unverified Commit 32b5a32c authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!13824 mm: some mTHPs improve

Merge Pull Request from: @wedm23414 
 

Barry Song (2):
  mm: huge_mm: fix undefined reference to `mthp_stats' for
    CONFIG_SYSFS=n
  mm: count the number of anonymous THPs per size

Maíra Canal (1):
  mm: fix docs for the kernel parameter ``thp_anon=``

Ryan Roberts (3):
  mm: override mTHP "enabled" defaults at kernel cmdline
  mm: cleanup count_mthp_stat() definition
  mm: tidy up shmem mTHP controls and stats

-- 
2.34.1 
 
Link:https://gitee.com/openeuler/kernel/pulls/13824

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents a91ec6de 54c177f9
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -6523,6 +6523,15 @@
			<deci-seconds>: poll all this frequency
			0: no polling (default)

	thp_anon=	[KNL]
			Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
			state is one of "always", "madvise", "never" or "inherit".
			Control the default behavior of the system with respect
			to anonymous transparent hugepages.
			Can be used multiple times for multiple anon THP sizes.
			See Documentation/admin-guide/mm/transhuge.rst for more
			details.

	threadirqs	[KNL]
			Force threading of all interrupt handlers except those
			marked explicitly IRQF_NO_THREAD.
+36 −7
Original line number Diff line number Diff line
@@ -317,13 +317,37 @@ processes. Exceeding the number would block the collapse::

A higher value may increase memory footprint for some workloads.

Boot parameter
==============

You can change the sysfs boot time defaults of Transparent Hugepage
Support by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
to the kernel command line.
Boot parameters
===============

You can change the sysfs boot time default for the top-level "enabled"
control by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
kernel command line.

Alternatively, each supported anonymous THP size can be controlled by
passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
supported anonymous THP)  and ``<state>`` is one of ``always``, ``madvise``,
``never`` or ``inherit``.

For example, the following will set 16K, 32K, 64K THP to ``always``,
set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
to ``never``::

	thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never

``thp_anon=`` may be specified multiple times to configure all THP sizes as
required. If ``thp_anon=`` is specified at least once, any anon THP sizes
not explicitly configured on the command line are implicitly set to
``never``.

``transparent_hugepage`` setting only affects the global toggle. If
``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
However, if a valid ``thp_anon`` setting is provided by the user, the
PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
is not defined within a valid ``thp_anon``, its policy will default to
``never``.

Hugepages in tmpfs/shmem
========================
@@ -560,6 +584,11 @@ split_deferred
        it would free up some memory. Pages on split queue are going to
        be split under memory pressure, if splitting is possible.

nr_anon
       the number of anonymous THP we have in the whole system. These THPs
       might be currently entirely mapped or have partially unmapped/unused
       subpages.

As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
+46 −29
Original line number Diff line number Diff line
@@ -120,6 +120,52 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
#define HPAGE_PUD_MASK	(~(HPAGE_PUD_SIZE - 1))
#define HPAGE_PUD_SIZE	((1UL) << HPAGE_PUD_SHIFT)

enum mthp_stat_item {
	MTHP_STAT_ANON_FAULT_ALLOC,
	MTHP_STAT_ANON_FAULT_FALLBACK,
	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
	MTHP_STAT_SWPOUT,
	MTHP_STAT_SWPOUT_FALLBACK,
	MTHP_STAT_SHMEM_ALLOC,
	MTHP_STAT_SHMEM_FALLBACK,
	MTHP_STAT_SHMEM_FALLBACK_CHARGE,
	MTHP_STAT_SPLIT,
	MTHP_STAT_SPLIT_FAILED,
	MTHP_STAT_SPLIT_DEFERRED,
	MTHP_STAT_NR_ANON,
	__MTHP_STAT_COUNT
};

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
struct mthp_stat {
	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
};

DECLARE_PER_CPU(struct mthp_stat, mthp_stats);

static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
{
	if (order <= 0 || order > PMD_ORDER)
		return;

	this_cpu_add(mthp_stats.stats[order][item], delta);
}

static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{
	mod_mthp_stat(order, item, 1);
}

#else
static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
{
}

static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

extern unsigned long transparent_hugepage_flags;
@@ -276,35 +322,6 @@ struct thpsize {

#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)

enum mthp_stat_item {
	MTHP_STAT_ANON_FAULT_ALLOC,
	MTHP_STAT_ANON_FAULT_FALLBACK,
	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
	MTHP_STAT_SWPOUT,
	MTHP_STAT_SWPOUT_FALLBACK,
	MTHP_STAT_SHMEM_ALLOC,
	MTHP_STAT_SHMEM_FALLBACK,
	MTHP_STAT_SHMEM_FALLBACK_CHARGE,
	MTHP_STAT_SPLIT,
	MTHP_STAT_SPLIT_FAILED,
	MTHP_STAT_SPLIT_DEFERRED,
	__MTHP_STAT_COUNT
};

struct mthp_stat {
	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
};

DECLARE_PER_CPU(struct mthp_stat, mthp_stats);

static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{
	if (order <= 0 || order > PMD_ORDER)
		return;

	this_cpu_inc(mthp_stats.stats[order][item]);
}

#define transparent_hugepage_use_zero_page()				\
	(transparent_hugepage_flags &					\
	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
+219 −32
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@ unsigned long huge_anon_orders_always __read_mostly;
unsigned long huge_anon_orders_madvise __read_mostly;
unsigned long huge_anon_orders_inherit __read_mostly;
unsigned long huge_pcp_allow_orders __read_mostly;
static bool anon_orders_configured __initdata;

unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
					 unsigned long vm_flags,
@@ -637,7 +638,7 @@ static void thpsize_release(struct kobject *kobj);
static DEFINE_SPINLOCK(huge_anon_orders_lock);
static LIST_HEAD(thpsize_list);

static ssize_t thpsize_enabled_show(struct kobject *kobj,
static ssize_t anon_enabled_show(struct kobject *kobj,
				 struct kobj_attribute *attr, char *buf)
{
	int order = to_thpsize(kobj)->order;
@@ -655,7 +656,7 @@ static ssize_t thpsize_enabled_show(struct kobject *kobj,
	return sysfs_emit(buf, "%s\n", output);
}

static ssize_t thpsize_enabled_store(struct kobject *kobj,
static ssize_t anon_enabled_store(struct kobject *kobj,
				  struct kobj_attribute *attr,
				  const char *buf, size_t count)
{
@@ -699,19 +700,35 @@ static ssize_t thpsize_enabled_store(struct kobject *kobj,
	return ret;
}

static struct kobj_attribute thpsize_enabled_attr =
	__ATTR(enabled, 0644, thpsize_enabled_show, thpsize_enabled_store);
static struct kobj_attribute anon_enabled_attr =
	__ATTR(enabled, 0644, anon_enabled_show, anon_enabled_store);

static struct attribute *thpsize_attrs[] = {
	&thpsize_enabled_attr.attr,
static struct attribute *anon_ctrl_attrs[] = {
	&anon_enabled_attr.attr,
	NULL,
};

static const struct attribute_group anon_ctrl_attr_grp = {
	.attrs = anon_ctrl_attrs,
};

static struct attribute *file_ctrl_attrs[] = {
#ifdef CONFIG_SHMEM
	&thpsize_shmem_enabled_attr.attr,
#endif
	NULL,
};

static const struct attribute_group thpsize_attr_group = {
	.attrs = thpsize_attrs,
static const struct attribute_group file_ctrl_attr_grp = {
	.attrs = file_ctrl_attrs,
};

static struct attribute *any_ctrl_attrs[] = {
	NULL,
};

static const struct attribute_group any_ctrl_attr_grp = {
	.attrs = any_ctrl_attrs,
};

static const struct kobj_type thpsize_ktype = {
@@ -750,64 +767,134 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
#ifdef CONFIG_SHMEM
DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC);
DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK);
DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
#endif
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
DEFINE_MTHP_STAT_ATTR(nr_anon, MTHP_STAT_NR_ANON);

static struct attribute *stats_attrs[] = {
static struct attribute *anon_stats_attrs[] = {
	&anon_fault_alloc_attr.attr,
	&anon_fault_fallback_attr.attr,
	&anon_fault_fallback_charge_attr.attr,
#ifndef CONFIG_SHMEM
	&swpout_attr.attr,
	&swpout_fallback_attr.attr,
#endif
	&split_deferred_attr.attr,
	&nr_anon_attr.attr,
	NULL,
};

static struct attribute_group anon_stats_attr_grp = {
	.name = "stats",
	.attrs = anon_stats_attrs,
};

static struct attribute *file_stats_attrs[] = {
#ifdef CONFIG_SHMEM
	&shmem_alloc_attr.attr,
	&shmem_fallback_attr.attr,
	&shmem_fallback_charge_attr.attr,
#endif
	NULL,
};

static struct attribute_group file_stats_attr_grp = {
	.name = "stats",
	.attrs = file_stats_attrs,
};

static struct attribute *any_stats_attrs[] = {
#ifdef CONFIG_SHMEM
	&swpout_attr.attr,
	&swpout_fallback_attr.attr,
#endif
	&split_attr.attr,
	&split_failed_attr.attr,
	&split_deferred_attr.attr,
	NULL,
};

static struct attribute_group stats_attr_group = {
static struct attribute_group any_stats_attr_grp = {
	.name = "stats",
	.attrs = stats_attrs,
	.attrs = any_stats_attrs,
};

static int sysfs_add_group(struct kobject *kobj,
			   const struct attribute_group *grp)
{
	int ret = -ENOENT;

	/*
	 * If the group is named, try to merge first, assuming the subdirectory
	 * was already created. This avoids the warning emitted by
	 * sysfs_create_group() if the directory already exists.
	 */
	if (grp->name)
		ret = sysfs_merge_group(kobj, grp);
	if (ret)
		ret = sysfs_create_group(kobj, grp);

	return ret;
}

static struct thpsize *thpsize_create(int order, struct kobject *parent)
{
	unsigned long size = (PAGE_SIZE << order) / SZ_1K;
	struct thpsize *thpsize;
	int ret;
	int ret = -ENOMEM;

	thpsize = kzalloc(sizeof(*thpsize), GFP_KERNEL);
	if (!thpsize)
		return ERR_PTR(-ENOMEM);
		goto err;

	thpsize->order = order;

	ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent,
				   "hugepages-%lukB", size);
	if (ret) {
		kfree(thpsize);
		return ERR_PTR(ret);
		goto err;
	}

	ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group);
	if (ret) {
		kobject_put(&thpsize->kobj);
		return ERR_PTR(ret);

	ret = sysfs_add_group(&thpsize->kobj, &any_ctrl_attr_grp);
	if (ret)
		goto err_put;

	ret = sysfs_add_group(&thpsize->kobj, &any_stats_attr_grp);
	if (ret)
		goto err_put;

	if (BIT(order) & THP_ORDERS_ALL_ANON) {
		ret = sysfs_add_group(&thpsize->kobj, &anon_ctrl_attr_grp);
		if (ret)
			goto err_put;

		ret = sysfs_add_group(&thpsize->kobj, &anon_stats_attr_grp);
		if (ret)
			goto err_put;
	}

	ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group);
	if (ret) {
		kobject_put(&thpsize->kobj);
		return ERR_PTR(ret);
	if (BIT(order) & THP_ORDERS_ALL_FILE_DEFAULT) {
		ret = sysfs_add_group(&thpsize->kobj, &file_ctrl_attr_grp);
		if (ret)
			goto err_put;

		ret = sysfs_add_group(&thpsize->kobj, &file_stats_attr_grp);
		if (ret)
			goto err_put;
	}

	thpsize->order = order;
	return thpsize;
err_put:
	kobject_put(&thpsize->kobj);
err:
	return ERR_PTR(ret);
}

static void thpsize_release(struct kobject *kobj)
@@ -827,6 +914,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
	 * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time
	 * constant so we have to do this here.
	 */
	if (!anon_orders_configured)
		huge_anon_orders_inherit = BIT(PMD_ORDER);

	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
@@ -847,7 +935,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
		goto remove_hp_group;
	}

	orders = THP_ORDERS_ALL_ANON;
	orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
	order = highest_order(orders);
	while (orders) {
		thpsize = thpsize_create(order, *hugepage_kobj);
@@ -990,6 +1078,100 @@ static int __init setup_transparent_hugepage(char *str)
}
__setup("transparent_hugepage=", setup_transparent_hugepage);

static inline int get_order_from_str(const char *size_str)
{
	unsigned long size;
	char *endptr;
	int order;

	size = memparse(size_str, &endptr);

	if (!is_power_of_2(size))
		goto err;
	order = get_order(size);
	if (BIT(order) & ~THP_ORDERS_ALL_ANON)
		goto err;

	return order;
err:
	pr_err("invalid size %s in thp_anon boot parameter\n", size_str);
	return -EINVAL;
}

static char str_dup[PAGE_SIZE] __initdata;
static int __init setup_thp_anon(char *str)
{
	char *token, *range, *policy, *subtoken;
	unsigned long always, inherit, madvise;
	char *start_size, *end_size;
	int start, end, nr;
	char *p;

	if (!str || strlen(str) + 1 > PAGE_SIZE)
		goto err;
	strcpy(str_dup, str);

	always = huge_anon_orders_always;
	madvise = huge_anon_orders_madvise;
	inherit = huge_anon_orders_inherit;
	p = str_dup;
	while ((token = strsep(&p, ";")) != NULL) {
		range = strsep(&token, ":");
		policy = token;

		if (!policy)
			goto err;

		while ((subtoken = strsep(&range, ",")) != NULL) {
			if (strchr(subtoken, '-')) {
				start_size = strsep(&subtoken, "-");
				end_size = subtoken;

				start = get_order_from_str(start_size);
				end = get_order_from_str(end_size);
			} else {
				start = end = get_order_from_str(subtoken);
			}

			if (start < 0 || end < 0 || start > end)
				goto err;

			nr = end - start + 1;
			if (!strcmp(policy, "always")) {
				bitmap_set(&always, start, nr);
				bitmap_clear(&inherit, start, nr);
				bitmap_clear(&madvise, start, nr);
			} else if (!strcmp(policy, "madvise")) {
				bitmap_set(&madvise, start, nr);
				bitmap_clear(&inherit, start, nr);
				bitmap_clear(&always, start, nr);
			} else if (!strcmp(policy, "inherit")) {
				bitmap_set(&inherit, start, nr);
				bitmap_clear(&madvise, start, nr);
				bitmap_clear(&always, start, nr);
			} else if (!strcmp(policy, "never")) {
				bitmap_clear(&inherit, start, nr);
				bitmap_clear(&madvise, start, nr);
				bitmap_clear(&always, start, nr);
			} else {
				pr_err("invalid policy %s in thp_anon boot parameter\n", policy);
				goto err;
			}
		}
	}

	huge_anon_orders_always = always;
	huge_anon_orders_madvise = madvise;
	huge_anon_orders_inherit = inherit;
	anon_orders_configured = true;
	return 1;

err:
	pr_warn("thp_anon=%s: error parsing string, ignoring setting\n", str);
	return 0;
}
__setup("thp_anon=", setup_thp_anon);

pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
	if (likely(vma->vm_flags & VM_WRITE))
@@ -3259,8 +3441,9 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
	struct deferred_split *ds_queue = get_deferred_split_queue(folio);
	/* reset xarray order to new order after split */
	XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
	struct anon_vma *anon_vma = NULL;
	bool is_anon = folio_test_anon(folio);
	struct address_space *mapping = NULL;
	struct anon_vma *anon_vma = NULL;
	int order = folio_order(folio);
	int extra_pins, ret;
	pgoff_t end;
@@ -3272,7 +3455,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
	if (new_order >= folio_order(folio))
		return -EINVAL;

	if (folio_test_anon(folio)) {
	if (is_anon) {
		/* order-1 is not supported for anonymous THP. */
		if (new_order == 1) {
			VM_WARN_ONCE(1, "Cannot split to order-1 folio");
@@ -3312,7 +3495,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
	if (folio_test_writeback(folio))
		return -EBUSY;

	if (folio_test_anon(folio)) {
	if (is_anon) {
		/*
		 * The caller does not necessarily hold an mmap_lock that would
		 * prevent the anon_vma disappearing so we first we take a
@@ -3425,6 +3608,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
			}
		}

		if (is_anon) {
			mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
			mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order));
		}
		__split_huge_page(page, list, end, new_order);
		ret = 0;
	} else {
+0 −2
Original line number Diff line number Diff line
@@ -4581,9 +4581,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)

	folio_ref_add(folio, nr_pages - 1);
	add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
#endif
	add_reliable_folio_counter(folio, vma->vm_mm, nr_pages);
	folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
	folio_add_lru_vma(folio, vma);
Loading