Unverified Commit 7db2768c authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!7530 mm: more thp control for large folio

Merge Pull Request from: @ci-robot 
 
PR sync from: Kefeng Wang <wangkefeng.wang@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/NEWBFLIWKVHG2GRUI2VCSE72LAVQL4M5/ 
Barry Song (4):
  mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback
    counters
  mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters
  mm: add docs for per-order mTHP counters and transhuge_page ABI
  mm: correct the docs for thp_fault_alloc and thp_fault_fallback

Kefeng Wang (4):
  mm: filemap: make mTHP configurable for exec mapping
  mm: huge_memory: add folio_get_unmapped_area()
  mm: huge_memory: add thp mapping align control
  mm: add control to allow specified high-order pages stored on PCP list

Matthew Wilcox (Oracle) (3):
  mm: remove inc/dec lruvec page state functions
  mm/khugepaged: use a folio more in collapse_file()
  mm/memcontrol: remove __mod_lruvec_page_state()

Ryan Roberts (1):
  mm/filemap: Allow arch to request folio size for exec memory


-- 
2.27.0
 
https://gitee.com/openeuler/kernel/issues/I9Q9DF 
 
Link:https://gitee.com/openeuler/kernel/pulls/7530

 

Reviewed-by: default avatarZhang Peng <zhangpeng362@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents f71a6309 b7d1fdc0
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
What:		/sys/kernel/mm/transparent_hugepage/
Date:		April 2024
Contact:	Linux memory management mailing list <linux-mm@kvack.org>
Description:
		/sys/kernel/mm/transparent_hugepage/ contains a number of files and
		subdirectories,

			- defrag
			- enabled
			- hpage_pmd_size
			- khugepaged
			- shmem_enabled
			- use_zero_page
			- subdirectories of the form hugepages-<size>kB, where <size>
			  is the page size of the hugepages supported by the kernel/CPU
			  combination.

		See Documentation/admin-guide/mm/transhuge.rst for details.
+53 −5
Original line number Diff line number Diff line
@@ -203,11 +203,31 @@ PMD-mappable transparent hugepage::
	cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size

The kernel tries to use huge, PMD-mappable page on read page fault for
file exec mapping if CONFIG_READ_ONLY_THP_FOR_FS enabled. It's possible
to enabled the feature by writing 1 or disablt by writing 0::
if CONFIG_READ_ONLY_THP_FOR_FS enabled, or try non-PMD size page(eg,
64K arm64) for file exec mapping, BIT0 for PMD THP, BIT1 for mTHP. It's
possible to enable/disable it by configurate the corresponding bit::

	echo 0x0 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled
	echo 0x1 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled
	echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled
	echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled

The kernel could try to enable other larger size mappings align other
than THP size, eg, 64K on arm64, BIT0 for file mapping, BIT1 for anon
mapping, it is disabled by default, and could enable this feature by
writing the corresponding bit to 1::

	echo 0x1 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align
	echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align
	echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align

The kernel could enable high-orders(greated than PAGE_ALLOC_COSTLY_ORDER, only
support order 4 for now) be stored on PCP lists(except PMD order), which could
reduce the zone lock contention when allocate hige-order pages frequently. It
is possible to enable order 4 pages stored on PCP lists by writing 4 or disable
it back by writing 0::

        echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order
        echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order

khugepaged will be automatically started when one or more hugepage
sizes are enabled (either by directly setting "always" or "madvise",
@@ -376,7 +396,7 @@ monitor how successfully the system is providing huge pages for use.

thp_fault_alloc
	is incremented every time a huge page is successfully
	allocated to handle a page fault.
	allocated and charged to handle a page fault.

thp_collapse_alloc
	is incremented by khugepaged when it has found
@@ -384,7 +404,7 @@ thp_collapse_alloc
	successfully allocated a new huge page to store the data.

thp_fault_fallback
	is incremented if a page fault fails to allocate
	is incremented if a page fault fails to allocate or charge
	a huge page and instead falls back to using small pages.

thp_fault_fallback_charge
@@ -454,6 +474,34 @@ thp_swpout_fallback
	Usually because failed to allocate some continuous swap space
	for the huge page.

In /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats, There are
also individual counters for each huge page size, which can be utilized to
monitor the system's effectiveness in providing huge pages for usage. Each
counter has its own corresponding file.

anon_fault_alloc
	is incremented every time a huge page is successfully
	allocated and charged to handle a page fault.

anon_fault_fallback
	is incremented if a page fault fails to allocate or charge
	a huge page and instead falls back to using huge pages with
	lower orders or small pages.

anon_fault_fallback_charge
	is incremented if a page fault fails to charge a huge page and
	instead falls back to using huge pages with lower orders or
	small pages even though the allocation was successful.

anon_swpout
	is incremented every time a huge page is swapped out in one
	piece without splitting.

anon_swpout_fallback
	is incremented if a huge page has to be split before swapout.
	Usually because failed to allocate some continuous swap space
	for the huge page.

As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
+12 −0
Original line number Diff line number Diff line
@@ -1147,6 +1147,18 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
 */
#define arch_wants_old_prefaulted_pte	cpu_has_hw_af

/*
 * Request exec memory is read into pagecache in at least 64K folios. The
 * trade-off here is performance improvement due to storing translations more
 * effciently in the iTLB vs the potential for read amplification due to reading
 * data from disk that won't be used. The latter is independent of base page
 * size, so we set a page-size independent block size of 64K. This size can be
 * contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB entry),
 * and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base pages are in
 * use.
 */
#define arch_wants_exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)

static inline bool pud_sect_supported(void)
{
	return PAGE_SIZE == SZ_4K;
+1 −0
Original line number Diff line number Diff line
@@ -335,6 +335,7 @@ extern void page_frag_free(void *addr);

void page_alloc_init_cpuhp(void);
int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_zone_pages(void);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
+27 −0
Original line number Diff line number Diff line
@@ -51,6 +51,9 @@ enum transparent_hugepage_flag {
	TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
	TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
	TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG,
	TRANSPARENT_HUGEPAGE_FILE_EXEC_MTHP_FLAG,
	TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG,
	TRANSPARENT_HUGEPAGE_ANON_MAPPING_ALIGN_FLAG,
};

struct kobject;
@@ -101,6 +104,7 @@ extern unsigned long transparent_hugepage_flags;
extern unsigned long huge_anon_orders_always;
extern unsigned long huge_anon_orders_madvise;
extern unsigned long huge_anon_orders_inherit;
extern unsigned long huge_pcp_allow_orders;

static inline bool hugepage_global_enabled(void)
{
@@ -257,6 +261,29 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
					  enforce_sysfs, orders);
}

enum mthp_stat_item {
	MTHP_STAT_ANON_FAULT_ALLOC,
	MTHP_STAT_ANON_FAULT_FALLBACK,
	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
	MTHP_STAT_ANON_SWPOUT,
	MTHP_STAT_ANON_SWPOUT_FALLBACK,
	__MTHP_STAT_COUNT
};

struct mthp_stat {
	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
};

DECLARE_PER_CPU(struct mthp_stat, mthp_stats);

static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{
	if (order <= 0 || order > PMD_ORDER)
		return;

	this_cpu_inc(mthp_stats.stats[order][item]);
}

#define transparent_hugepage_use_zero_page()				\
	(transparent_hugepage_flags &					\
	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
Loading