Commit e0fb8bd6 authored by Ma Wupeng's avatar Ma Wupeng Committed by Wupeng Ma
Browse files

mm: mem_reliable: Introduce memory reliable

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8USBA


CVE: NA

--------------------------------

Introduction

============

Memory reliable feature is a memory tiering mechanism. It is based on
kernel mirror feature, which splits memory into two separate regions,
mirrored(reliable) region and non-mirrored (non-reliable) region.

for kernel mirror feature:

- allocate kernel memory from mirrored region by default
- allocate user memory from non-mirrored region by default

non-mirrored region will be arranged into ZONE_MOVABLE.

for kernel reliable feature, it has additional features below:

- normal user tasks never alloc memory from mirrored region with userspace
  apis(malloc, mmap, etc.)
- special user tasks will allocate memory from mirrored region by default
- shmem/pagecache allocate memory from mirrored region by default
- upper limit of mirrored region allocated for user tasks, shmem and
  page cache

Support Reliable fallback mechanism which allows special user tasks, shmem
and page cache can fallback to alloc non-mirrored region, it's the default
setting.

In order to fulfill the goal

- GFP_KERNEL flag added for task to alloc memory from mirrored region.
- the high_zoneidx for special user tasks/shmem/pagecache is set to
  ZONE_NORMAL to alloc memory from mirrored region.
- normal user tasks can only alloc memory from ZONE_MOVABLE.

This patch is just the main framework, memory reliable support for special
user tasks, page cache and shmem has their own patches.

To enable this function, mirrored(reliable) memory is needed and
"kernelcore=reliable" should be added to kernel parameters.

Signed-off-by: default avatarMa Wupeng <mawupeng1@huawei.com>
parent 0277fdb2
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -2449,7 +2449,7 @@
	keepinitrd	[HW,ARM]

	kernelcore=	[KNL,X86,IA-64,PPC]
			Format: nn[KMGTPE] | nn% | "mirror"
			Format: nn[KMGTPE] | nn% | "mirror" | "reliable"
			This parameter specifies the amount of memory usable by
			the kernel for non-movable allocations.  The requested
			amount is spread evenly throughout all nodes in the
@@ -2473,6 +2473,10 @@
			for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
			are exclusive, so you cannot specify multiple forms.

			Option "reliable" is base on option "mirror", but make
			some extension. These two features are alternatives.
			Current only arm64 is supported.

	kgdbdbgp=	[KGDB,HW] kgdb over EHCI usb debug port.
			Format: <Controller#>[,poll interval]
			The controller # is the number of the ehci usb debug
+6 −0
Original line number Diff line number Diff line
@@ -134,6 +134,12 @@ static inline enum zone_type gfp_zone(gfp_t flags)
	z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
					 ((1 << GFP_ZONES_SHIFT) - 1);
	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);

#ifdef CONFIG_MEMORY_RELIABLE
	if (z == ZONE_MOVABLE && (flags & GFP_RELIABLE))
		return ZONE_NORMAL;
#endif

	return z;
}

+9 −1
Original line number Diff line number Diff line
@@ -31,7 +31,11 @@ typedef unsigned int __bitwise gfp_t;
#define ___GFP_IO		0x40u
#define ___GFP_FS		0x80u
#define ___GFP_ZERO		0x100u
/* 0x200u unused */
#ifdef CONFIG_MEMORY_RELIABLE
#define ___GFP_RELIABLE		0x200u
#else
#define ___GFP_RELIABLE		0
#endif
#define ___GFP_DIRECT_RECLAIM	0x400u
#define ___GFP_KSWAPD_RECLAIM	0x800u
#define ___GFP_WRITE		0x1000u
@@ -248,6 +252,9 @@ typedef unsigned int __bitwise gfp_t;
/* Disable lockdep for GFP context tracking */
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)

/* Alloc memory from mirrored region */
#define __GFP_RELIABLE ((__force gfp_t)___GFP_RELIABLE)

/* Room for N __GFP_FOO bits */
#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -336,5 +343,6 @@ typedef unsigned int __bitwise gfp_t;
#define GFP_TRANSHUGE_LIGHT	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
			 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE	(GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
#define GFP_RELIABLE __GFP_RELIABLE

#endif /* __LINUX_GFP_TYPES_H */
+79 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_MEM_RELIABLE__
#define __MM_MEM_RELIABLE__

#ifdef CONFIG_MEMORY_RELIABLE

#include <linux/stddef.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#include <linux/mm_types.h>
#include <linux/sched.h>

DECLARE_STATIC_KEY_FALSE(mem_reliable);

extern bool reliable_enabled;

void mem_reliable_init(bool has_unmirrored_mem, unsigned long mirrored_sz);
bool mem_reliable_status(void);

static inline bool mem_reliable_is_enabled(void)
{
	return static_branch_likely(&mem_reliable);
}

static inline bool page_reliable(struct page *page)
{
	if (!mem_reliable_is_enabled())
		return false;

	if (!page)
		return false;

	return page_zonenum(page) < ZONE_MOVABLE;
}

static inline bool folio_reliable(struct folio *folio)
{
	if (!mem_reliable_is_enabled())
		return false;

	if (!folio)
		return false;

	return folio_zonenum(folio) < ZONE_MOVABLE;
}

static inline bool skip_non_mirrored_zone(gfp_t gfp, struct zoneref *z)
{
	if (!mem_reliable_is_enabled())
		return false;

	if (!current->mm || (current->flags & PF_KTHREAD))
		return false;

	/* user tasks can only alloc memory from non-mirrored region */
	if (!(gfp & GFP_RELIABLE) && (gfp & __GFP_HIGHMEM) &&
	    (gfp & __GFP_MOVABLE)) {
		if (zonelist_zone_idx(z) < ZONE_MOVABLE)
			return true;
	}

	return false;
}
#else
#define reliable_enabled 0

static inline bool mem_reliable_is_enabled(void) { return false; }
static inline void mem_reliable_init(bool has_unmirrored_mem,
				     unsigned long mirrored_sz) {}
static inline bool page_reliable(struct page *page) { return false; }
static inline bool folio_reliable(struct folio *folio) { return false; }
static inline bool skip_non_mirrored_zone(gfp_t gfp, struct zoneref *z)
{
	return false;
}
static inline bool mem_reliable_status(void) { return false; }
#endif

#endif
+3 −0
Original line number Diff line number Diff line
@@ -4093,4 +4093,7 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end)

#endif

/* added to mm.h to avoid every caller adding new header file */
#include <linux/mem_reliable.h>

#endif /* _LINUX_MM_H */
Loading