!3064 mm: PCP high auto-tuning (87cf6952) · Commits · EulixOS / Software / Kernel

drivers/base/cacheinfo.c

+50 −1

Original line number	Diff line number	Diff line
		@@ -898,6 +898,48 @@ static int cache_add_dev(unsigned int cpu)
		return rc;
		}

		/*
		* Calculate the size of the per-CPU data cache slice. This can be
		* used to estimate the size of the data cache slice that can be used
		* by one CPU under ideal circumstances. UNIFIED caches are counted
		* in addition to DATA caches. So, please consider code cache usage
		* when use the result.
		*
		* Because the cache inclusive/non-inclusive information isn't
		* available, we just use the size of the per-CPU slice of LLC to make
		* the result more predictable across architectures.
		*/
		static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
		{
		struct cpu_cacheinfo *ci;
		struct cacheinfo *llc;
		unsigned int nr_shared;

		if (!last_level_cache_is_valid(cpu))
		return;

		ci = ci_cacheinfo(cpu);
		llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);

		if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
		return;

		nr_shared = cpumask_weight(&llc->shared_cpu_map);
		if (nr_shared)
		ci->per_cpu_data_slice_size = llc->size / nr_shared;
		}

		static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
		{
		unsigned int icpu;

		for_each_online_cpu(icpu) {
		if (!cpu_online && icpu == cpu)
		continue;
		update_per_cpu_data_slice_size_cpu(icpu);
		}
		}

		static int cacheinfo_cpu_online(unsigned int cpu)
		{
		int rc = detect_cache_attributes(cpu);
		@@ -906,6 +948,11 @@ static int cacheinfo_cpu_online(unsigned int cpu)
		return rc;
		rc = cache_add_dev(cpu);
		if (rc)
		goto err;
		update_per_cpu_data_slice_size(true, cpu);
		setup_pcp_cacheinfo();
		return 0;
		err:
		free_cache_attributes(cpu);
		return rc;
		}
		@@ -916,6 +963,8 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)
		cpu_cache_sysfs_exit(cpu);

		free_cache_attributes(cpu);
		update_per_cpu_data_slice_size(false, cpu);
		setup_pcp_cacheinfo();
		return 0;
		}

include/linux/cacheinfo.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -73,6 +73,7 @@ struct cacheinfo {

		struct cpu_cacheinfo {
		struct cacheinfo *info_list;
		unsigned int per_cpu_data_slice_size;
		unsigned int num_levels;
		unsigned int num_leaves;
		bool cpu_map_populated;

include/linux/gfp.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -320,11 +320,13 @@ extern void page_frag_free(void *addr);
		#define free_page(addr) free_pages((addr), 0)

		void page_alloc_init_cpuhp(void);
		int decay_pcp_high(struct zone zone, struct per_cpu_pages pcp);
		void drain_zone_pages(struct zone zone, struct per_cpu_pages pcp);
		void drain_all_pages(struct zone *zone);
		void drain_local_pages(struct zone *zone);

		void page_alloc_init_late(void);
		void setup_pcp_cacheinfo(void);

		/*
		* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what

include/linux/mmzone.h

+24 −3

Original line number	Diff line number	Diff line
		@@ -676,15 +676,34 @@ enum zone_watermarks {
		#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
		#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)

		/*
		* Flags used in pcp->flags field.
		*
		* PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the
		* previous page freeing. To avoid to drain PCP for an accident
		* high-order page freeing.
		*
		* PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before
		* draining PCP for consecutive high-order pages freeing without
		* allocation if data cache slice of CPU is large enough. To reduce
		* zone lock contention and keep cache-hot pages reusing.
		*/
		#define PCPF_PREV_FREE_HIGH_ORDER BIT(0)
		#define PCPF_FREE_HIGH_BATCH BIT(1)

		struct per_cpu_pages {
		spinlock_t lock; /* Protects lists field */
		int count; /* number of pages in the list */
		int high; /* high watermark, emptying needed */
		int high_min; /* min high watermark */
		int high_max; /* max high watermark */
		int batch; /* chunk size for buddy add/remove */
		short free_factor; /* batch scaling factor during free */
		u8 flags; /* protected by pcp->lock */
		u8 alloc_factor; /* batch scaling factor during allocate */
		#ifdef CONFIG_NUMA
		short expire; /* When 0, remote pagesets are drained */
		u8 expire; /* When 0, remote pagesets are drained */
		#endif
		short free_count; /* consecutive free count */

		/* Lists of pages, one per migrate type stored on the pcp-lists */
		struct list_head lists[NR_PCP_LISTS];
		@@ -837,7 +856,8 @@ struct zone {
		* the high and batch values are copied to individual pagesets for
		* faster access
		*/
		int pageset_high;
		int pageset_high_min;
		int pageset_high_max;
		int pageset_batch;

		#ifndef CONFIG_SPARSEMEM
		@@ -998,6 +1018,7 @@ enum zone_flags {
		* Cleared when kswapd is woken.
		*/
		ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */
		ZONE_BELOW_HIGH, /* zone is below high watermark. */
		};

		static inline unsigned long zone_managed_pages(struct zone *zone)

mm/Kconfig

+11 −0

Original line number	Diff line number	Diff line
		@@ -704,6 +704,17 @@ config HUGETLB_PAGE_SIZE_VARIABLE
		config CONTIG_ALLOC
		def_bool (MEMORY_ISOLATION && COMPACTION) \|\| CMA

		config PCP_BATCH_SCALE_MAX
		int "Maximum scale factor of PCP (Per-CPU pageset) batch allocate/free"
		default 5
		range 0 6
		help
		In page allocator, PCP (Per-CPU pageset) is refilled and drained in
		batches. The batch number is scaled automatically to improve page
		allocation/free throughput. But too large scale factor may hurt
		latency. This option sets the upper limit of scale factor to limit
		the maximum latency.

		config PHYS_ADDR_T_64BIT
		def_bool 64BIT