mm, pcp: reduce lock contention for draining high-order pages (df67a5fc) · Commits · EulixOS / Software / Kernel

drivers/base/cacheinfo.c

+2 −0

Original line number	Diff line number	Diff line
		@@ -950,6 +950,7 @@ static int cacheinfo_cpu_online(unsigned int cpu)
		if (rc)
		goto err;
		update_per_cpu_data_slice_size(true, cpu);
		setup_pcp_cacheinfo();
		return 0;
		err:
		free_cache_attributes(cpu);
		@@ -963,6 +964,7 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)

		free_cache_attributes(cpu);
		update_per_cpu_data_slice_size(false, cpu);
		setup_pcp_cacheinfo();
		return 0;
		}

include/linux/gfp.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -325,6 +325,7 @@ void drain_all_pages(struct zone *zone);
		void drain_local_pages(struct zone *zone);

		void page_alloc_init_late(void);
		void setup_pcp_cacheinfo(void);

		/*
		* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what

include/linux/mmzone.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -682,8 +682,14 @@ enum zone_watermarks {
		* PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the
		* previous page freeing. To avoid to drain PCP for an accident
		* high-order page freeing.
		*
		* PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before
		* draining PCP for consecutive high-order pages freeing without
		* allocation if data cache slice of CPU is large enough. To reduce
		* zone lock contention and keep cache-hot pages reusing.
		*/
		#define PCPF_PREV_FREE_HIGH_ORDER BIT(0)
		#define PCPF_FREE_HIGH_BATCH BIT(1)

		struct per_cpu_pages {
		spinlock_t lock; /* Protects lists field */

mm/page_alloc.c

+37 −1

Original line number	Diff line number	Diff line
		@@ -52,6 +52,7 @@
		#include <linux/psi.h>
		#include <linux/khugepaged.h>
		#include <linux/delayacct.h>
		#include <linux/cacheinfo.h>
		#include <asm/div64.h>
		#include "internal.h"
		#include "shuffle.h"
		@@ -2385,7 +2386,9 @@ static void free_unref_page_commit(struct zone zone, struct per_cpu_pages pcp,
		*/
		if (order && order <= PAGE_ALLOC_COSTLY_ORDER) {
		free_high = (pcp->free_factor &&
		(pcp->flags & PCPF_PREV_FREE_HIGH_ORDER));
		(pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&
		(!(pcp->flags & PCPF_FREE_HIGH_BATCH) \|\|
		pcp->count >= READ_ONCE(pcp->batch)));
		pcp->flags \|= PCPF_PREV_FREE_HIGH_ORDER;
		} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
		pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
		@@ -5418,6 +5421,39 @@ static void zone_pcp_update(struct zone *zone, int cpu_online)
		mutex_unlock(&pcp_batch_high_lock);
		}

		static void zone_pcp_update_cacheinfo(struct zone *zone)
		{
		int cpu;
		struct per_cpu_pages *pcp;
		struct cpu_cacheinfo *cci;

		for_each_online_cpu(cpu) {
		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
		cci = get_cpu_cacheinfo(cpu);
		/*
		* If data cache slice of CPU is large enough, "pcp->batch"
		* pages can be preserved in PCP before draining PCP for
		* consecutive high-order pages freeing without allocation.
		* This can reduce zone lock contention without hurting
		* cache-hot pages sharing.
		*/
		spin_lock(&pcp->lock);
		if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
		pcp->flags \|= PCPF_FREE_HIGH_BATCH;
		else
		pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
		spin_unlock(&pcp->lock);
		}
		}

		void setup_pcp_cacheinfo(void)
		{
		struct zone *zone;

		for_each_populated_zone(zone)
		zone_pcp_update_cacheinfo(zone);
		}

		/*
		* Allocate per cpu pagesets and initialize them.
		* Before this call only boot pagesets were available.