Unverified Commit 87cf6952 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3064 mm: PCP high auto-tuning

Merge Pull Request from: @ci-robot 
 
PR sync from: Ze Zuo <zuoze1@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/53JBDDN3LWQ7YXTD7DR3DIRQUP6IMN7Z/ 
The page allocation performance requirements of different workloads
are often different.  So, we need to tune the PCP (Per-CPU Pageset)
high on each CPU automatically to optimize the page allocation
performance.

The list of patches in series is as follows,

[1/9] mm, pcp: avoid to drain PCP when process exit
[2/9] cacheinfo: calculate per-CPU data cache size
[3/9] mm, pcp: reduce lock contention for draining high-order pages
[4/9] mm: restrict the pcp batch scale factor to avoid too long latency
[5/9] mm, page_alloc: scale the number of pages that are batch allocated
[6/9] mm: add framework for PCP high auto-tuning
[7/9] mm: tune PCP high automatically
[8/9] mm, pcp: decrease PCP high if free pages < high watermark
[9/9] mm, pcp: reduce detecting time of consecutive high order page freeing

Patch [1/9], [2/9], [3/9] optimize the PCP draining for consecutive
high-order pages freeing.

Patch [4/9], [5/9] optimize batch freeing and allocating.

Patch [6/9], [7/9], [8/9] implement and optimize a PCP high
auto-tuning method.

Patch [9/9] optimize the PCP draining for consecutive high order page
freeing based on PCP high auto-tuning.

Huang Ying (9):
  mm, pcp: avoid to drain PCP when process exit
  cacheinfo: calculate size of per-CPU data cache slice
  mm, pcp: reduce lock contention for draining high-order pages
  mm: restrict the pcp batch scale factor to avoid too long latency
  mm, page_alloc: scale the number of pages that are batch allocated
  mm: add framework for PCP high auto-tuning
  mm: tune PCP high automatically
  mm, pcp: decrease PCP high if free pages < high watermark
  mm, pcp: reduce detecting time of consecutive high order page freeing


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8JXIR 
 
Link:https://gitee.com/openeuler/kernel/pulls/3064

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 95b98a3f 75049248
Loading
Loading
Loading
Loading
+50 −1
Original line number Diff line number Diff line
@@ -898,6 +898,48 @@ static int cache_add_dev(unsigned int cpu)
	return rc;
}

/*
 * Calculate the size of the per-CPU data cache slice.  This can be
 * used to estimate the size of the data cache slice that can be used
 * by one CPU under ideal circumstances.  UNIFIED caches are counted
 * in addition to DATA caches.  So, please consider code cache usage
 * when use the result.
 *
 * Because the cache inclusive/non-inclusive information isn't
 * available, we just use the size of the per-CPU slice of LLC to make
 * the result more predictable across architectures.
 */
static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
{
	struct cpu_cacheinfo *ci;
	struct cacheinfo *llc;
	unsigned int nr_shared;

	if (!last_level_cache_is_valid(cpu))
		return;

	ci = ci_cacheinfo(cpu);
	llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);

	if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
		return;

	nr_shared = cpumask_weight(&llc->shared_cpu_map);
	if (nr_shared)
		ci->per_cpu_data_slice_size = llc->size / nr_shared;
}

static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
{
	unsigned int icpu;

	for_each_online_cpu(icpu) {
		if (!cpu_online && icpu == cpu)
			continue;
		update_per_cpu_data_slice_size_cpu(icpu);
	}
}

static int cacheinfo_cpu_online(unsigned int cpu)
{
	int rc = detect_cache_attributes(cpu);
@@ -906,6 +948,11 @@ static int cacheinfo_cpu_online(unsigned int cpu)
		return rc;
	rc = cache_add_dev(cpu);
	if (rc)
		goto err;
	update_per_cpu_data_slice_size(true, cpu);
	setup_pcp_cacheinfo();
	return 0;
err:
	free_cache_attributes(cpu);
	return rc;
}
@@ -916,6 +963,8 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)
		cpu_cache_sysfs_exit(cpu);

	free_cache_attributes(cpu);
	update_per_cpu_data_slice_size(false, cpu);
	setup_pcp_cacheinfo();
	return 0;
}

+1 −0
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@ struct cacheinfo {

struct cpu_cacheinfo {
	struct cacheinfo *info_list;
	unsigned int per_cpu_data_slice_size;
	unsigned int num_levels;
	unsigned int num_leaves;
	bool cpu_map_populated;
+2 −0
Original line number Diff line number Diff line
@@ -320,11 +320,13 @@ extern void page_frag_free(void *addr);
#define free_page(addr) free_pages((addr), 0)

void page_alloc_init_cpuhp(void);
int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);

void page_alloc_init_late(void);
void setup_pcp_cacheinfo(void);

/*
 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
+24 −3
Original line number Diff line number Diff line
@@ -676,15 +676,34 @@ enum zone_watermarks {
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)

/*
 * Flags used in pcp->flags field.
 *
 * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the
 * previous page freeing.  To avoid to drain PCP for an accident
 * high-order page freeing.
 *
 * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before
 * draining PCP for consecutive high-order pages freeing without
 * allocation if data cache slice of CPU is large enough.  To reduce
 * zone lock contention and keep cache-hot pages reusing.
 */
#define	PCPF_PREV_FREE_HIGH_ORDER	BIT(0)
#define	PCPF_FREE_HIGH_BATCH		BIT(1)

struct per_cpu_pages {
	spinlock_t lock;	/* Protects lists field */
	int count;		/* number of pages in the list */
	int high;		/* high watermark, emptying needed */
	int high_min;		/* min high watermark */
	int high_max;		/* max high watermark */
	int batch;		/* chunk size for buddy add/remove */
	short free_factor;	/* batch scaling factor during free */
	u8 flags;		/* protected by pcp->lock */
	u8 alloc_factor;	/* batch scaling factor during allocate */
#ifdef CONFIG_NUMA
	short expire;		/* When 0, remote pagesets are drained */
	u8 expire;		/* When 0, remote pagesets are drained */
#endif
	short free_count;	/* consecutive free count */

	/* Lists of pages, one per migrate type stored on the pcp-lists */
	struct list_head lists[NR_PCP_LISTS];
@@ -837,7 +856,8 @@ struct zone {
	 * the high and batch values are copied to individual pagesets for
	 * faster access
	 */
	int pageset_high;
	int pageset_high_min;
	int pageset_high_max;
	int pageset_batch;

#ifndef CONFIG_SPARSEMEM
@@ -998,6 +1018,7 @@ enum zone_flags {
					 * Cleared when kswapd is woken.
					 */
	ZONE_RECLAIM_ACTIVE,		/* kswapd may be scanning the zone. */
	ZONE_BELOW_HIGH,		/* zone is below high watermark. */
};

static inline unsigned long zone_managed_pages(struct zone *zone)
+11 −0
Original line number Diff line number Diff line
@@ -704,6 +704,17 @@ config HUGETLB_PAGE_SIZE_VARIABLE
config CONTIG_ALLOC
	def_bool (MEMORY_ISOLATION && COMPACTION) || CMA

config PCP_BATCH_SCALE_MAX
	int "Maximum scale factor of PCP (Per-CPU pageset) batch allocate/free"
	default 5
	range 0 6
	help
	  In page allocator, PCP (Per-CPU pageset) is refilled and drained in
	  batches.  The batch number is scaled automatically to improve page
	  allocation/free throughput.  But too large scale factor may hurt
	  latency.  This option sets the upper limit of scale factor to limit
	  the maximum latency.

config PHYS_ADDR_T_64BIT
	def_bool 64BIT

Loading