Commit 8968270e authored by Ma Wupeng's avatar Ma Wupeng Committed by Wang Wensheng
Browse files

mm: Add reliable memory use limit for user tasks

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S


CVE: NA

--------------------------------

There is a upper limit for all memory allocation if the following condtions
are met:
- gfp_zone(gfp & ~ GFP_RELIABLE) == ZONE_MOVABLE
- gfp & GFP_RELIABLE is true

Init tasks will alloc memory from non-mirrored region if their allocation
trigger limit.

The limit can be set or access via /proc/sys/vm/task_reliable_limit

This limit's default value is ULONG_MAX. User can update this value between
current user used reliable memory size and total reliable memory size.

Signed-off-by: default avatarMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
parent 62639947
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
@@ -5,8 +5,10 @@
#include <linux/stddef.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#include <linux/oom.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/percpu_counter.h>

#ifdef CONFIG_MEMORY_RELIABLE

@@ -17,6 +19,7 @@ extern bool shmem_reliable;
extern bool pagecache_use_reliable_mem;
extern struct percpu_counter pagecache_reliable_pages;
extern struct percpu_counter anon_reliable_pages;
extern unsigned long task_reliable_limit __read_mostly;

extern void mem_reliable_init(bool has_unmirrored_mem,
			      unsigned long *zone_movable_pfn,
@@ -30,6 +33,8 @@ extern void reliable_lru_add(enum lru_list lru, struct page *page,
extern void reliable_lru_add_batch(int zid, enum lru_list lru,
					      int val);
extern bool mem_reliable_counter_initialized(void);
extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
				       int preferred_nid, nodemask_t *nodemask);

static inline bool mem_reliable_is_enabled(void)
{
@@ -74,6 +79,31 @@ static inline bool page_reliable(struct page *page)

	return page_zonenum(page) < ZONE_MOVABLE;
}

static inline u64 task_reliable_used_pages(void)
{
	s64 nr_pages;

	nr_pages = percpu_counter_read_positive(&pagecache_reliable_pages);
	nr_pages += percpu_counter_read_positive(&anon_reliable_pages);

	return nr_pages;
}

static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
	return (task_reliable_used_pages() + nr_page) <=
	       (task_reliable_limit >> PAGE_SHIFT);
}

static inline bool mem_reliable_should_reclaim(void)
{
	if (percpu_counter_sum_positive(&pagecache_reliable_pages) >=
	    MAX_ORDER_NR_PAGES)
		return true;

	return false;
}
#else
#define reliable_enabled 0
#define pagecache_use_reliable_mem 0
@@ -98,6 +128,16 @@ static inline void reliable_lru_add(enum lru_list lru, struct page *page,
static inline void reliable_lru_add_batch(int zid, enum lru_list lru,
					  int val) {}
static inline bool mem_reliable_counter_initialized(void) { return false; }
static inline u64 task_reliable_used_pages(void) { return 0; }
static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
	return false;
}
static inline bool mem_reliable_should_reclaim(void) { return false; }
static inline void mem_reliable_out_of_memory(gfp_t gfp_mask,
					      unsigned int order,
					      int preferred_nid,
					      nodemask_t *nodemask) {}
#endif

#endif
+46 −0
Original line number Diff line number Diff line
@@ -20,6 +20,8 @@ bool pagecache_use_reliable_mem __read_mostly = true;
struct percpu_counter pagecache_reliable_pages;
struct percpu_counter anon_reliable_pages;
static unsigned long reliable_pagecache_max_bytes = ULONG_MAX;
/* reliable user limit for user tasks with reliable flag */
unsigned long task_reliable_limit = ULONG_MAX;

bool mem_reliable_counter_initialized(void)
{
@@ -178,6 +180,26 @@ void reliable_report_meminfo(struct seq_file *m)
	}
}

static int reliable_limit_handler(struct ctl_table *table, int write,
				  void __user *buffer, size_t *length,
				  loff_t *ppos)
{
	unsigned long old = task_reliable_limit;
	int ret;

	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
	if (ret == 0 && write) {
		if (task_reliable_limit > PAGES_TO_B(total_reliable_pages()) ||
		    task_reliable_limit <
			    (task_reliable_used_pages() << PAGE_SHIFT)) {
			task_reliable_limit = old;
			return -EINVAL;
		}
	}

	return ret;
}

static int reliable_pagecache_max_bytes_write(struct ctl_table *table,
					      int write, void __user *buffer,
					      size_t *length, loff_t *ppos)
@@ -205,6 +227,13 @@ static struct ctl_table reliable_ctl_table[] = {
		.mode = 0644,
		.proc_handler = reliable_pagecache_max_bytes_write,
	},
	{
		.procname = "task_reliable_limit",
		.data = &task_reliable_limit,
		.maxlen = sizeof(task_reliable_limit),
		.mode = 0644,
		.proc_handler = reliable_limit_handler,
	},
	{}
};

@@ -235,6 +264,23 @@ static int __init reliable_sysctl_init(void)
}
arch_initcall(reliable_sysctl_init);

void mem_reliable_out_of_memory(gfp_t gfp, unsigned int order,
				int preferred_nid, nodemask_t *nodemask)
{
	struct oom_control oc = {
		.zonelist = node_zonelist(preferred_nid, gfp),
		.nodemask = nodemask,
		.memcg = NULL,
		.gfp_mask = gfp,
		.order = order,
	};

	if (!mutex_trylock(&oom_lock))
		return;
	out_of_memory(&oc);
	mutex_unlock(&oom_lock);
}

static int __init setup_reliable_debug(char *str)
{
	if (*str++ != '=' || !*str)
+87 −0
Original line number Diff line number Diff line
@@ -5170,6 +5170,89 @@ static inline void prepare_before_alloc(gfp_t *gfp_mask)
	*gfp_mask &= ~GFP_RELIABLE;
}

static inline long mem_reliable_direct_reclaim(int nr_pages, struct alloc_context *ac)
{
	long nr_reclaimed = 0;

	while (nr_reclaimed < nr_pages) {
		/* try to free cache from reliable region */
		long progress = __perform_reclaim(GFP_KERNEL, 0, ac);

		nr_reclaimed += progress;
		if (progress < SWAP_CLUSTER_MAX)
			break;
	}

	return nr_reclaimed;
}

/*
 * return true means memory allocation need retry and flag ___GFP_RELIABILITY
 * must be cleared.
 */
static inline bool check_after_alloc(gfp_t *gfp, unsigned int order,
				     int preferred_nid,
				     struct alloc_context *ac,
				     struct page **_page)
{
	int retry_times = MAX_RECLAIM_RETRIES;
	int nr_pages;

	if (!mem_reliable_is_enabled())
		return false;

	if (!(*gfp & GFP_RELIABLE))
		return false;

	if (!*_page)
		goto out_retry;

	if (*gfp & __GFP_NOFAIL || current->flags & PF_MEMALLOC)
		goto out;

	/* percpu counter is not initialized, ignore limit check */
	if (!mem_reliable_counter_initialized())
		goto out;

limit_check:
	/* user task is limited by task_reliable_limit */
	if (!reliable_mem_limit_check(1 << order))
		goto out_free_page;

	goto out;

out_free_page:
	if (mem_reliable_should_reclaim() && retry_times--) {
		nr_pages = mem_reliable_direct_reclaim(1 << order, ac);
		if (nr_pages)
			goto limit_check;
	}

	__free_pages(*_page, order);
	*_page = NULL;

out_retry:
	if (is_global_init(current)) {
		*gfp &= ~GFP_RELIABLE;
		return true;
	}

	if (*gfp & (__GFP_NORETRY | __GFP_RETRY_MAYFAIL | __GFP_THISNODE))
		goto out;

	/* Coredumps can quickly deplete all memory reserves */
	if (current->flags & PF_DUMPCORE)
		goto out;
	/* The OOM killer will not help higher order allocs */
	if (order > PAGE_ALLOC_COSTLY_ORDER)
		goto out;

	/* oom here */
	mem_reliable_out_of_memory(*gfp, order, preferred_nid, ac->nodemask);
out:
	return false;
}

/*
 * This is the 'heart' of the zoned buddy allocator.
 */
@@ -5194,6 +5277,7 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,

	prepare_before_alloc(&gfp);

retry:
	alloc_gfp = gfp;
	if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
			&alloc_gfp, &alloc_flags))
@@ -5239,6 +5323,9 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
		page = NULL;
	}

	if (check_after_alloc(&gfp, order, preferred_nid, &ac, &page))
		goto retry;

	trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);

	return page;