Commit e9fdff87 authored by Muchun Song's avatar Muchun Song Committed by Linus Torvalds
Browse files

mm: hugetlb: add a kernel parameter hugetlb_free_vmemmap

Add a kernel parameter hugetlb_free_vmemmap to enable the feature of
freeing unused vmemmap pages associated with each hugetlb page on boot.

We disable PMD mapping of vmemmap pages for x86-64 arch when this feature
is enabled.  Because vmemmap_remap_free() depends on vmemmap being base
page mapped.

Link: https://lkml.kernel.org/r/20210510030027.56044-8-songmuchun@bytedance.com


Signed-off-by: default avatarMuchun Song <songmuchun@bytedance.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Reviewed-by: default avatarBarry Song <song.bao.hua@hisilicon.com>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Tested-by: default avatarChen Huang <chenhuang5@huawei.com>
Tested-by: default avatarBodeddula Balasubramaniam <bodeddub@amazon.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: HORIGUCHI NAOYA <naoya.horiguchi@nec.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ad2fa371
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -1567,6 +1567,23 @@
			Documentation/admin-guide/mm/hugetlbpage.rst.
			Format: size[KMG]

	hugetlb_free_vmemmap=
			[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
			enabled.
			Allows heavy hugetlb users to free up some more
			memory (6 * PAGE_SIZE for each 2MB hugetlb page).
			This feauture is not free though. Large page
			tables are not used to back vmemmap pages which
			can lead to a performance degradation for some
			workloads. Also there will be memory allocation
			required when hugetlb pages are freed from the
			pool which can lead to corner cases under heavy
			memory pressure.
			Format: { on | off (default) }

			on:  enable the feature
			off: disable the feature

	hung_task_panic=
			[KNL] Should the hung task detector generate panics.
			Format: 0 | 1
+3 −0
Original line number Diff line number Diff line
@@ -153,6 +153,9 @@ default_hugepagesz

	will all result in 256 2M huge pages being allocated.  Valid default
	huge page size is architecture dependent.
hugetlb_free_vmemmap
	When CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set, this enables freeing
	unused vmemmap pages associated with each HugeTLB page.

When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
indicates the current number of pre-allocated huge pages of the default size.
+6 −2
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/gfp.h>
#include <linux/kcore.h>
#include <linux/bootmem_info.h>
#include <linux/hugetlb.h>

#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -1609,7 +1610,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));

	if (end - start < PAGES_PER_SECTION * sizeof(struct page))
	if ((is_hugetlb_free_vmemmap_enabled()  && !altmap) ||
	    end - start < PAGES_PER_SECTION * sizeof(struct page))
		err = vmemmap_populate_basepages(start, end, node, NULL);
	else if (boot_cpu_has(X86_FEATURE_PSE))
		err = vmemmap_populate_hugepages(start, end, node, altmap);
@@ -1637,6 +1639,8 @@ void register_page_bootmem_memmap(unsigned long section_nr,
	pmd_t *pmd;
	unsigned int nr_pmd_pages;
	struct page *page;
	bool base_mapping = !boot_cpu_has(X86_FEATURE_PSE) ||
			    is_hugetlb_free_vmemmap_enabled();

	for (; addr < end; addr = next) {
		pte_t *pte = NULL;
@@ -1662,7 +1666,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
		}
		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);

		if (!boot_cpu_has(X86_FEATURE_PSE)) {
		if (base_mapping) {
			next = (addr + PAGE_SIZE) & PAGE_MASK;
			pmd = pmd_offset(pud, addr);
			if (pmd_none(*pmd))
+19 −0
Original line number Diff line number Diff line
@@ -892,6 +892,20 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
}
#endif

#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
extern bool hugetlb_free_vmemmap_enabled;

static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
	return hugetlb_free_vmemmap_enabled;
}
#else
static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
	return false;
}
#endif

#else	/* CONFIG_HUGETLB_PAGE */
struct hstate {};

@@ -1046,6 +1060,11 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
					pte_t *ptep, pte_t pte, unsigned long sz)
{
}

static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
	return false;
}
#endif	/* CONFIG_HUGETLB_PAGE */

static inline spinlock_t *huge_pte_lock(struct hstate *h,
+24 −0
Original line number Diff line number Diff line
@@ -168,6 +168,8 @@
 * (last) level. So this type of HugeTLB page can be optimized only when its
 * size of the struct page structs is greater than 2 pages.
 */
#define pr_fmt(fmt)	"HugeTLB: " fmt

#include "hugetlb_vmemmap.h"

/*
@@ -180,6 +182,28 @@
#define RESERVE_VMEMMAP_NR		2U
#define RESERVE_VMEMMAP_SIZE		(RESERVE_VMEMMAP_NR << PAGE_SHIFT)

bool hugetlb_free_vmemmap_enabled;

static int __init early_hugetlb_free_vmemmap_param(char *buf)
{
	/* We cannot optimize if a "struct page" crosses page boundaries. */
	if ((!is_power_of_2(sizeof(struct page)))) {
		pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n");
		return 0;
	}

	if (!buf)
		return -EINVAL;

	if (!strcmp(buf, "on"))
		hugetlb_free_vmemmap_enabled = true;
	else if (strcmp(buf, "off"))
		return -EINVAL;

	return 0;
}
early_param("hugetlb_free_vmemmap", early_hugetlb_free_vmemmap_param);

static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h)
{
	return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT;