Unverified Commit 357e8ab1 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3044 mm: hugetlb: Skip initialization of gigantic tail struct pages if freed by HVO

Merge Pull Request from: @ci-robot 
 
PR sync from: Jinjiang Tu <tujinjiang@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/SEQFWVLMJCSUOJXMC3CRL6DQ62SBEZFG/ 
This series moves the boot time initialization of tail struct pages of a
gigantic page to later on in the boot. Only the
HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages
are initialized at the start. If HVO is successful, then no more tail struct
pages need to be initialized. For a 1G hugepage, this series avoid
initialization of 262144 - 63 = 262081 struct pages per hugepage.

When tested on a 512G system (allocating 500 1G hugepages), the
kexec-boot times with DEFERRED_STRUCT_PAGE_INIT enabled are:

- with patches, HVO enabled: 1.32 seconds
- with patches, HVO disabled: 2.15 seconds
- without patches, HVO enabled: 3.90  seconds
- without patches, HVO disabled: 3.58 seconds

This represents an approximately 70% reduction in boot time and will
significantly reduce server downtime when using a large number of
gigantic pages.

Usama Arif (4):
  mm: hugetlb_vmemmap: use nid of the head page to reallocate it
  memblock: pass memblock_type to memblock_setclr_flag
  memblock: introduce MEMBLOCK_RSRV_NOINIT flag
  mm: hugetlb: skip initialization of gigantic tail struct pages if
    freed by HVO


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8JXRC 
 
Link:https://gitee.com/openeuler/kernel/pulls/3044

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents f2a26a25 abdb3a3e
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
 * via a driver, and never indicated in the firmware-provided memory map as
 * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
 * kernel resource tree.
 * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
 * not initialized (only for reserved regions).
 */
enum memblock_flags {
	MEMBLOCK_NONE		= 0x0,	/* No special request */
@@ -47,6 +49,7 @@ enum memblock_flags {
	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
	MEMBLOCK_RSRV_NOINIT	= 0x10,	/* don't initialize struct pages */
};

/**
@@ -125,6 +128,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);

void memblock_free_all(void);
void memblock_free(void *ptr, size_t size);
@@ -259,6 +263,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
	return m->flags & MEMBLOCK_NOMAP;
}

static inline bool memblock_is_reserved_noinit(struct memblock_region *m)
{
	return m->flags & MEMBLOCK_RSRV_NOINIT;
}

static inline bool memblock_is_driver_managed(struct memblock_region *m)
{
	return m->flags & MEMBLOCK_DRIVER_MANAGED;
+59 −9
Original line number Diff line number Diff line
@@ -3196,6 +3196,16 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
	}

found:

	/*
	 * Only initialize the head struct page in memmap_init_reserved_pages,
	 * rest of the struct pages will be initialized by the HugeTLB
	 * subsystem itself.
	 * The head struct page is used to get folio information by the HugeTLB
	 * subsystem like zone id and node id.
	 */
	memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE),
		huge_page_size(h) - PAGE_SIZE);
	/* Put them into a private list first because mem_map is not up yet */
	INIT_LIST_HEAD(&m->list);
	list_add(&m->list, &huge_boot_pages);
@@ -3203,6 +3213,43 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
	return 1;
}

/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
					unsigned long start_page_number,
					unsigned long end_page_number)
{
	enum zone_type zone = zone_idx(folio_zone(folio));
	int nid = folio_nid(folio);
	unsigned long head_pfn = folio_pfn(folio);
	unsigned long pfn, end_pfn = head_pfn + end_page_number;
	int ret;

	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
		struct page *page = pfn_to_page(pfn);

		__init_single_page(page, pfn, zone, nid);
		prep_compound_tail((struct page *)folio, pfn - head_pfn);
		ret = page_ref_freeze(page, 1);
		VM_BUG_ON(!ret);
	}
}

static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
					      struct hstate *h,
					      unsigned long nr_pages)
{
	int ret;

	/* Prepare folio head */
	__folio_clear_reserved(folio);
	__folio_set_head(folio);
	ret = page_ref_freeze(&folio->page, 1);
	VM_BUG_ON(!ret);
	/* Initialize the necessary tail struct pages */
	hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages);
	prep_compound_head((struct page *)folio, huge_page_order(h));
}

/*
 * Put bootmem huge pages into the standard lists after mem_map is up.
 * Note: This only applies to gigantic (order > MAX_ORDER) pages.
@@ -3213,19 +3260,21 @@ static void __init gather_bootmem_prealloc(void)

	list_for_each_entry(m, &huge_boot_pages, list) {
		struct page *page = virt_to_page(m);
		struct folio *folio = page_folio(page);
		struct folio *folio = (void *)page;
		struct hstate *h = m->hstate;

		VM_BUG_ON(!hstate_is_gigantic(h));
		WARN_ON(folio_ref_count(folio) != 1);
		if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
			WARN_ON(folio_test_reserved(folio));

		hugetlb_folio_init_vmemmap(folio, h,
					   HUGETLB_VMEMMAP_RESERVE_PAGES);
		prep_new_hugetlb_folio(h, folio, folio_nid(folio));
		/* If HVO fails, initialize all tail struct pages */
		if (!HPageVmemmapOptimized(&folio->page))
			hugetlb_folio_init_tail_vmemmap(folio,
						HUGETLB_VMEMMAP_RESERVE_PAGES,
						pages_per_huge_page(h));
		free_huge_folio(folio); /* add to the hugepage allocator */
		} else {
			/* VERY unlikely inflated ref count on a tail page */
			free_gigantic_folio(folio, huge_page_order(h));
		}

		/*
		 * We need to restore the 'stolen' pages to totalram_pages
@@ -3236,6 +3285,7 @@ static void __init gather_bootmem_prealloc(void)
		cond_resched();
	}
}

static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
{
	unsigned long i;
+2 −2
Original line number Diff line number Diff line
@@ -318,7 +318,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
		.reuse_addr	= reuse,
		.vmemmap_pages	= &vmemmap_pages,
	};
	int nid = page_to_nid((struct page *)start);
	int nid = page_to_nid((struct page *)reuse);
	gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY |
			__GFP_NOWARN;

@@ -586,7 +586,7 @@ static int __init hugetlb_vmemmap_init(void)
	const struct hstate *h;

	/* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
	BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE);
	BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);

	for_each_hstate(h) {
		if (hugetlb_vmemmap_optimizable(h)) {
+5 −4
Original line number Diff line number Diff line
@@ -10,15 +10,16 @@
#define _LINUX_HUGETLB_VMEMMAP_H
#include <linux/hugetlb.h>

#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);

/*
 * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
 * Documentation/vm/vmemmap_dedup.rst.
 */
#define HUGETLB_VMEMMAP_RESERVE_SIZE	PAGE_SIZE
#define HUGETLB_VMEMMAP_RESERVE_PAGES	(HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page))

#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);

static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{
+3 −0
Original line number Diff line number Diff line
@@ -1154,4 +1154,7 @@ struct vma_prepare {
	struct vm_area_struct *remove;
	struct vm_area_struct *remove2;
};

void __meminit __init_single_page(struct page *page, unsigned long pfn,
				unsigned long zone, int nid);
#endif	/* __MM_INTERNAL_H */
Loading