Commit 682fc25d authored by Liu Shixin's avatar Liu Shixin
Browse files

mm/swapfile: introduce per-memcg swapfile control

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7CGGT


CVE: NA

--------------------------------

With memory.swapfile interface, the avail swap device can be limit for
memcg. The acceptable parameters are 'all', 'none' and valid swap device.
Usage:
	echo /dev/zram0 > memory.swapfile

If the swap device is offline, the swapfile will be fallback to 'none'.

Signed-off-by: default avatarLiu Shixin <liushixin2@huawei.com>
parent 5361bef3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ Brief summary of control files.
 memory.force_empty		     trigger forced page reclaim
 memory.force_swapin		     trigger forced swapin anon page
 memory.swap.max		     set/show limit for swap
 memory.swapfile		     set/show available swap file
 memory.pressure_level		     set memory pressure notifications
 memory.swappiness		     set/show swappiness parameter of vmscan
				     (See sysctl's vm.swappiness)
+18 −0
Original line number Diff line number Diff line
@@ -50,6 +50,11 @@ enum memcg_memory_event {
	MEMCG_NR_MEMORY_EVENTS,
};

enum {
	SWAP_TYPE_ALL	= -1, /* allowd use all swap file */
	SWAP_TYPE_NONE	= -2, /* prohibited use any swapfile */
};

struct mem_cgroup_reclaim_cookie {
	pg_data_t *pgdat;
	unsigned int generation;
@@ -242,6 +247,7 @@ struct obj_cgroup {

struct swap_device {
	unsigned long max;
	int type;
};

/*
@@ -1307,6 +1313,9 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)

int mem_cgroup_force_empty(struct mem_cgroup *memcg);

int memcg_get_swap_type(struct page *page);
void memcg_remove_swapfile(int type);

#else /* CONFIG_MEMCG */

#define MEM_CGROUP_ID_SHIFT	0
@@ -1714,6 +1723,15 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
static inline void memcg_print_bad_task(struct oom_control *oc)
{
}

static inline int memcg_get_swap_type(struct page *page)
{
	return SWAP_TYPE_ALL;
}

static inline void memcg_remove_swapfile(int type)
{
}
#endif /* CONFIG_MEMCG */

/* idx can be of type enum memcg_stat_item or node_stat_item */
+9 −1
Original line number Diff line number Diff line
@@ -511,11 +511,14 @@ static inline long get_nr_swap_pages(void)
	return atomic_long_read(&nr_swap_pages);
}

extern long get_nr_swap_pages_type(int type);

extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(struct page *page);
extern void put_swap_page(struct page *page, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size,
			  int type);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
@@ -547,6 +550,11 @@ static inline void put_swap_device(struct swap_info_struct *si)
	percpu_ref_put(&si->sei->users);
}

#ifdef CONFIG_MEMCG_SWAP_QOS
extern int write_swapfile_for_memcg(struct address_space *mapping,
				    int *swap_type);
extern void read_swapfile_for_memcg(struct seq_file *m, int type);
#endif
#else /* CONFIG_SWAP */

static inline int swap_readpage(struct page *page, bool do_poll)
+146 −4
Original line number Diff line number Diff line
@@ -4065,8 +4065,10 @@ static void memcg_swap_qos_reset(void)
{
	struct mem_cgroup *memcg;

	for_each_mem_cgroup(memcg)
	for_each_mem_cgroup(memcg) {
		WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
	}
}

static int sysctl_memcg_swap_qos_handler(struct ctl_table *table, int write,
@@ -4157,11 +4159,15 @@ static void memcg_free_swap_device(struct mem_cgroup *memcg)
static void memcg_swap_device_init(struct mem_cgroup *memcg,
				   struct mem_cgroup *parent)
{
	if (!static_branch_likely(&memcg_swap_qos_key) || !parent)
	if (!static_branch_likely(&memcg_swap_qos_key) || !parent) {
		WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
	else
		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
	} else {
		WRITE_ONCE(memcg->swap_dev->max,
			   READ_ONCE(parent->swap_dev->max));
		WRITE_ONCE(memcg->swap_dev->type,
			   READ_ONCE(parent->swap_dev->type));
	}
}

u64 memcg_swapmax_read(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -4235,6 +4241,121 @@ static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
	return 0;
}

static int memcg_swapfile_read(struct seq_file *m, void *v)
{
	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
	int type;

	if (!static_branch_likely(&memcg_swap_qos_key)) {
		seq_printf(m, "all\n");
		return 0;
	}

	type = READ_ONCE(memcg->swap_dev->type);
	if (type == SWAP_TYPE_NONE)
		seq_printf(m, "none\n");
	else if (type == SWAP_TYPE_ALL)
		seq_printf(m, "all\n");
	else
		read_swapfile_for_memcg(m, type);
	return 0;
}

static ssize_t memcg_swapfile_write(struct kernfs_open_file *of, char *buf,
				     size_t nbytes, loff_t off)
{
	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
	struct filename *pathname;
	struct file *swapfile;
	int ret;

	if (!static_branch_likely(&memcg_swap_qos_key))
		return -EACCES;

	buf = strstrip(buf);

	if (!strcmp(buf, "none")) {
		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
		return nbytes;
	} else if (!strcmp(buf, "all")) {
		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
		return nbytes;
	}

	pathname = getname_kernel(buf);
	if (IS_ERR(pathname))
		return PTR_ERR(pathname);

	swapfile = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
	if (IS_ERR(swapfile)) {
		putname(pathname);
		return PTR_ERR(swapfile);
	}
	ret = write_swapfile_for_memcg(swapfile->f_mapping,
				       &memcg->swap_dev->type);
	filp_close(swapfile, NULL);
	putname(pathname);

	return ret < 0 ? ret : nbytes;
}

int memcg_get_swap_type(struct page *page)
{
	struct mem_cgroup *memcg;
	int type;

	if (!static_branch_likely(&memcg_swap_qos_key))
		return SWAP_TYPE_ALL;

	if (!page)
		return SWAP_TYPE_ALL;

	rcu_read_lock();
	memcg = page_memcg(page);
	if (!memcg || mem_cgroup_is_root(memcg)) {
		rcu_read_unlock();
		return SWAP_TYPE_ALL;
	}

	if (!css_tryget_online(&memcg->css)) {
		rcu_read_unlock();
		return SWAP_TYPE_ALL;
	}
	rcu_read_unlock();

	type = READ_ONCE(memcg->swap_dev->type);
	css_put(&memcg->css);
	return type;
}

void memcg_remove_swapfile(int type)
{
	struct mem_cgroup *memcg;

	if (!static_branch_likely(&memcg_swap_qos_key))
		return;

	for_each_mem_cgroup(memcg)
		if (READ_ONCE(memcg->swap_dev->type) == type)
			WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
}

static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
{
	int type;

	if (!static_branch_likely(&memcg_swap_qos_key))
		return mem_cgroup_get_nr_swap_pages(memcg);

	type = READ_ONCE(memcg->swap_dev->type);
	if (type == SWAP_TYPE_ALL)
		return mem_cgroup_get_nr_swap_pages(memcg);
	else if (type == SWAP_TYPE_NONE)
		return 0;
	else
		return get_nr_swap_pages_type(type);
}

#else
static int memcg_alloc_swap_device(struct mem_cgroup *memcg)
{
@@ -4254,6 +4375,21 @@ static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
{
	return 0;
}

int memcg_get_swap_type(struct page *page)
{
	return SWAP_TYPE_ALL;
}

void memcg_remove_swapfile(int type)
{
}

static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
{
	return mem_cgroup_get_nr_swap_pages(memcg);
}

#endif

#ifdef CONFIG_NUMA
@@ -5523,7 +5659,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,

		/* If only reclaim swap pages, check swap space at first. */
		if ((reclaim_options & MEMCG_RECLAIM_NOT_FILE) &&
		    (mem_cgroup_get_nr_swap_pages(memcg) <= 0))
		    (mem_cgroup_get_nr_swap_pages_type(memcg) <= 0))
			return -EAGAIN;

		/* This is the final attempt, drain percpu lru caches in the
@@ -5960,6 +6096,12 @@ static struct cftype mem_cgroup_legacy_files[] = {
		.write = memcg_swapmax_write,
		.read_u64 = memcg_swapmax_read,
	},
	{
		.name = "swapfile",
		.flags = CFTYPE_NOT_ON_ROOT,
		.write = memcg_swapfile_write,
		.seq_show = memcg_swapfile_read,
	},
#endif
	{
		.name = "high_async_ratio",
+10 −4
Original line number Diff line number Diff line
@@ -266,7 +266,7 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
	cache->cur = 0;
	if (swap_slot_cache_active)
		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
					   cache->slots, 1);
					   cache->slots, 1, SWAP_TYPE_ALL);

	return cache->nr;
}
@@ -307,12 +307,17 @@ swp_entry_t get_swap_page(struct page *page)
{
	swp_entry_t entry;
	struct swap_slots_cache *cache;
	int type;

	entry.val = 0;

	type = memcg_get_swap_type(page);
	if (type == SWAP_TYPE_NONE)
		goto out;

	if (PageTransHuge(page)) {
		if (IS_ENABLED(CONFIG_THP_SWAP))
			get_swap_pages(1, &entry, HPAGE_PMD_NR);
			get_swap_pages(1, &entry, HPAGE_PMD_NR, type);
		goto out;
	}

@@ -327,7 +332,8 @@ swp_entry_t get_swap_page(struct page *page)
	 */
	cache = raw_cpu_ptr(&swp_slots);

	if (likely(check_cache_active() && cache->slots)) {
	if (likely(check_cache_active() && cache->slots) &&
	    type == SWAP_TYPE_ALL) {
		mutex_lock(&cache->alloc_lock);
		if (cache->slots) {
repeat:
@@ -344,7 +350,7 @@ swp_entry_t get_swap_page(struct page *page)
			goto out;
	}

	get_swap_pages(1, &entry, 1);
	get_swap_pages(1, &entry, 1, type);
out:
	if (mem_cgroup_try_charge_swap(page, entry)) {
		put_swap_page(page, entry);
Loading