Unverified Commit f6a1bbf1 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!5047 Backport etmem swapcache recalim feature to OLK 6.6

Merge Pull Request from: @ci-robot 
 
PR sync from: Yuchen Tang <tangyuchen5@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/AS6EX4EOO27ASJPOZXSTQ2ZSPUZYORYS/ 
From: tangyuchen <tangyuchen5@huawei.com>

v2 -> v3

- fix compile error
- bind to correct issue url

v1 -> v2

- remove redundant symbol export
- use safer kthread_freezable_should_stop
- make THP our recalim candidate

Yuchen Tang (2):
  etmem: Expose symbol reclaim_folio_list
  etmem: add swapcache reclaim to etmem


-- 
2.33.0
 
https://gitee.com/openeuler/kernel/issues/I96X3J?from=project-issue 
 
Link:https://gitee.com/openeuler/kernel/pulls/5047

 

Reviewed-by: default avatarLiu Chao <liuchao173@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 779c3d90 ec19aefe
Loading
Loading
Loading
Loading
+173 −0
Original line number Diff line number Diff line
@@ -11,6 +11,22 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/etmem.h>
#include <linux/freezer.h>
#include <linux/kthread.h>

#define RECLAIM_SWAPCACHE_MAGIC 0X77
#define SET_SWAPCACHE_WMARK	_IOW(RECLAIM_SWAPCACHE_MAGIC, 0x02, unsigned int)
#define RECLAIM_SWAPCACHE_ON	_IOW(RECLAIM_SWAPCACHE_MAGIC, 0x01, unsigned int)
#define RECLAIM_SWAPCACHE_OFF	_IOW(RECLAIM_SWAPCACHE_MAGIC, 0x00, unsigned int)

#define WATERMARK_MAX           100
#define SWAP_SCAN_NUM_MAX       32

static struct task_struct *reclaim_swapcache_tk;
static bool enable_swapcache_reclaim;
static unsigned long swapcache_watermark[ETMEM_SWAPCACHE_NR_WMARK];

static DECLARE_WAIT_QUEUE_HEAD(reclaim_queue);

static ssize_t swap_pages_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
@@ -82,6 +98,152 @@ static int swap_pages_release(struct inode *inode, struct file *file)

extern struct file_operations proc_swap_pages_operations;

/* check if swapcache meet requirements */
static bool swapcache_balanced(void)
{
	return total_swapcache_pages() < swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH];
}

/* the flag present if swapcache reclaim is started */
static bool swapcache_reclaim_enabled(void)
{
	return  READ_ONCE(enable_swapcache_reclaim);
}

static void start_swapcache_reclaim(void)
{
	if (swapcache_balanced())
		return;
	/* RECLAIM_SWAPCACHE_ON trigger the thread to start running. */
	if (!waitqueue_active(&reclaim_queue))
		return;

	WRITE_ONCE(enable_swapcache_reclaim, true);
	wake_up_interruptible(&reclaim_queue);
}

static void stop_swapcache_reclaim(void)
{
	WRITE_ONCE(enable_swapcache_reclaim, false);
}

static bool should_goto_sleep(void)
{
	if (swapcache_balanced())
		stop_swapcache_reclaim();

	if (swapcache_reclaim_enabled())
		return false;

	return true;
}

static int get_swapcache_watermark(unsigned int ratio)
{
	unsigned int low_watermark;
	unsigned int high_watermark;

	low_watermark = ratio & 0xFF;
	high_watermark = (ratio >> 8) & 0xFF;
	if (low_watermark > WATERMARK_MAX ||
		high_watermark > WATERMARK_MAX ||
		low_watermark > high_watermark)
		return -EPERM;

	swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] = totalram_pages() *
						low_watermark / WATERMARK_MAX;
	swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH] = totalram_pages() *
						high_watermark / WATERMARK_MAX;

	return 0;
}

static void reclaim_swapcache_try_to_sleep(void)
{
	DEFINE_WAIT(wait);

	if (freezing(current) || kthread_should_stop())
		return;

	prepare_to_wait(&reclaim_queue, &wait, TASK_INTERRUPTIBLE);
	if (should_goto_sleep()) {
		if (!kthread_should_stop())
			schedule();
	}
	finish_wait(&reclaim_queue, &wait);
}

static void etmem_reclaim_swapcache(void)
{
	do_swapcache_reclaim(swapcache_watermark,
			ARRAY_SIZE(swapcache_watermark));
	stop_swapcache_reclaim();
}

static int reclaim_swapcache_proactive(void *para)
{
	set_freezable();

	while (1) {
		bool ret;

		reclaim_swapcache_try_to_sleep();
		ret = try_to_freeze();
		if (kthread_freezable_should_stop(NULL))
			break;

		if (ret)
			continue;

		etmem_reclaim_swapcache();
	}

	return 0;
}

static int reclaim_swapcache_run(void)
{
	int ret = 0;

	reclaim_swapcache_tk = kthread_run(reclaim_swapcache_proactive, NULL,
						"etmem_recalim_swapcache");
	if (IS_ERR(reclaim_swapcache_tk)) {
		ret = PTR_ERR(reclaim_swapcache_tk);
		reclaim_swapcache_tk = NULL;
	}
	return ret;
}

static long swap_page_ioctl(struct file *filp, unsigned int cmd,
			unsigned long arg)
{
	void __user *argp = (void __user *)arg;
	unsigned int ratio;

	switch (cmd) {
	case RECLAIM_SWAPCACHE_ON:
		if (swapcache_reclaim_enabled())
			return 0;
		start_swapcache_reclaim();
		break;
	case RECLAIM_SWAPCACHE_OFF:
		stop_swapcache_reclaim();
		break;
	case SET_SWAPCACHE_WMARK:
		if (get_user(ratio, (unsigned int __user *)argp))
			return -EFAULT;

		if (get_swapcache_watermark(ratio) != 0)
			return -EFAULT;
		break;
	default:
		return -EPERM;
	}

	return 0;
}


static int swap_pages_entry(void)
{
	proc_swap_pages_operations.flock(NULL, 1, NULL);
@@ -89,8 +251,12 @@ static int swap_pages_entry(void)
	proc_swap_pages_operations.write = swap_pages_write;
	proc_swap_pages_operations.open = swap_pages_open;
	proc_swap_pages_operations.release = swap_pages_release;
	proc_swap_pages_operations.unlocked_ioctl = swap_page_ioctl;
	proc_swap_pages_operations.flock(NULL, 0, NULL);

	enable_swapcache_reclaim = false;
	reclaim_swapcache_run();

	return 0;
}

@@ -101,7 +267,14 @@ static void swap_pages_exit(void)
	proc_swap_pages_operations.write = NULL;
	proc_swap_pages_operations.open = NULL;
	proc_swap_pages_operations.release = NULL;
	proc_swap_pages_operations.unlocked_ioctl = NULL;
	proc_swap_pages_operations.flock(NULL, 0, NULL);

	if (!IS_ERR(reclaim_swapcache_tk)) {
		kthread_stop(reclaim_swapcache_tk);
		reclaim_swapcache_tk = NULL;
	}
	return;
}

MODULE_LICENSE("GPL");
+29 −0
Original line number Diff line number Diff line
@@ -9,6 +9,28 @@
#include <linux/page-flags.h>

#ifdef CONFIG_ETMEM
/**
 * list_for_each_entry_safe_reverse_from - iterate backwards over list from
 * current point safe against removal
 * @pos:        the type * to use as a loop cursor.
 * @n:          another type * to use as temporary storage
 * @head:       the head for your list.
 * @member:     the name of the list_head within the struct.
 *
 * Iterate backwards over list of given type from current point, safe against
 * removal of list entry.
 */
#define list_for_each_entry_safe_reverse_from(pos, n, head, member)     \
	for (n = list_prev_entry(pos, member);                          \
		!list_entry_is_head(pos, head, member);                \
		pos = n, n = list_prev_entry(n, member))


enum etmem_swapcache_watermark_en {
	ETMEM_SWAPCACHE_WMARK_LOW,
	ETMEM_SWAPCACHE_WMARK_HIGH,
	ETMEM_SWAPCACHE_NR_WMARK
};

#if IS_ENABLED(CONFIG_KVM)
static inline struct kvm *mm_kvm(struct mm_struct *mm)
@@ -27,6 +49,8 @@ extern struct page *get_page_from_vaddr(struct mm_struct *mm,
					unsigned long vaddr);
extern struct kobj_attribute kernel_swap_enable_attr;
extern bool kernel_swap_enabled(void);
extern int do_swapcache_reclaim(unsigned long *swapcache_watermark,
				unsigned int watermark_nr);
#else /* !CONFIG_ETMEM */
static inline int add_page_for_swap(struct page *page, struct list_head *pagelist)
{
@@ -43,5 +67,10 @@ static inline bool kernel_swap_enabled(void)
{
	return true;
}
static inline int do_swapcache_reclaim(unsigned long *swapcache_watermark,
					unsigned int watermark_nr)
{
	return 0;
}
#endif /* #ifdef CONFIG_ETMEM */
#endif /* define __MM_ETMEM_H_ */
+2 −0
Original line number Diff line number Diff line
@@ -419,6 +419,8 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
					gfp_t gfp_mask, nodemask_t *mask);
extern unsigned int reclaim_folio_list(struct list_head *folio_list,
						struct pglist_data *pgdat);
extern unsigned long reclaim_pages(struct list_head *folio_list);

#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+173 −0
Original line number Diff line number Diff line
@@ -93,3 +93,176 @@ struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr)
	return page;
}
EXPORT_SYMBOL_GPL(get_page_from_vaddr);

#define SWAP_SCAN_NUM_MAX	32

static unsigned long get_swapcache_reclaim_num(unsigned long *swapcache_watermark)
{
	return total_swapcache_pages() >
		swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] ?
		(total_swapcache_pages() - swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW]) : 0;
}

static int move_lru_folios_to_list(struct lruvec *lruvec,
	struct folio *folio, struct list_head *foliolist)
{

	if (!folio_test_large(folio)) {
		/* If another process is also mapping this folio */
		if (folio_mapcount(folio) > 1)
			return -EACCES;
	} else if (folio_test_hugetlb(folio)) {
		/* Do not reclaim hugetlb folios */
		return -EACCES;
	} else {
		/* Try to reclaim THP unless it is mapped by another process */
		if (folio_entire_mapcount(folio) > 1)
			return -EACCES;
	}

	/*
	 * try to a reference to a folio
	 * may fail if, the folio has been freed/frozen
	 */
	if (!(folio_try_get(folio)))
		return -1;

	/* racing with another isolation */
	if (!folio_test_clear_lru(folio)) {
		folio_put(folio);
		return -1;
	}

	list_move(&folio->lru, foliolist);
	update_lru_size(lruvec,
			LRU_INACTIVE_ANON,
			folio_zonenum(folio),
			-folio_nr_pages(folio));
	return 0;
}

/*
 * For each node, scan the inactive anon lru, isolate and move
 * appropriate candidates to swapcache_list[nid]
 */
static void memcg_reclaim_swapcache(struct list_head *swapcache_list,
			unsigned long swapcache_to_reclaim)
{
	struct mem_cgroup *memcg = NULL, *target_memcg = NULL;
	struct lruvec *lruvec;
	int nid;
	pg_data_t *pgdat;
	unsigned int scan_count = 0;
	unsigned long swapcache_total_reclaimable = 0;
	struct list_head *src = NULL;
	struct folio *folio = NULL, *next = NULL, *pos = NULL;

	for_each_node_state(nid, N_MEMORY) {
		INIT_LIST_HEAD(&swapcache_list[nid]);
		cond_resched();
		pgdat = NODE_DATA(nid);

		memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
		do {
			cond_resched();
			lruvec = mem_cgroup_lruvec(memcg, pgdat);
			src = &(lruvec->lists[LRU_INACTIVE_ANON]);

			spin_lock_irq(&lruvec->lru_lock);
			pos = list_last_entry(src, struct folio, lru);
			spin_unlock_irq(&lruvec->lru_lock);
reverse_scan_lru:
			cond_resched();
			scan_count = 0;

			spin_lock_irq(&lruvec->lru_lock);
			if (!pos || list_entry_is_head(pos, src, lru)) {
				spin_unlock_irq(&lruvec->lru_lock);
				continue;
			}

			if (!folio_test_lru(pos) || folio_lru_list(pos) != LRU_INACTIVE_ANON) {
				spin_unlock_irq(&lruvec->lru_lock);
				continue;
			}

			folio = pos;

			list_for_each_entry_safe_reverse_from(folio, next, src, lru) {
				pos = next;
				scan_count++;
				if (scan_count >= SWAP_SCAN_NUM_MAX)
					break;

				if (!folio_test_swapcache(folio) || folio_mapped(folio))
					continue;

				if (move_lru_folios_to_list(lruvec,
							folio,
							&swapcache_list[nid]) != 0)
					continue;

				swapcache_total_reclaimable += folio_nr_pages(folio);
			}
			spin_unlock_irq(&lruvec->lru_lock);

			if (swapcache_total_reclaimable >= swapcache_to_reclaim)
				break;

			if (scan_count >= SWAP_SCAN_NUM_MAX)
				goto reverse_scan_lru;

		} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
	}
}

static int lru_gen_reclaim_swapcache(struct list_head *swapcache_list,
			unsigned long swapcache_to_reclaim)
{
	return 0;
}

int do_swapcache_reclaim(unsigned long *swapcache_watermark,
			unsigned int watermark_nr)
{
	int nid;
	unsigned long swapcache_to_reclaim = 0;
	struct list_head *swapcache_list = NULL, *folio_list = NULL;
	struct folio *folio = NULL;

	if (swapcache_watermark == NULL ||
		watermark_nr < ETMEM_SWAPCACHE_NR_WMARK)
		return -EINVAL;

	if (lru_gen_enabled())
		return lru_gen_reclaim_swapcache(swapcache_list, swapcache_to_reclaim);

	swapcache_to_reclaim = get_swapcache_reclaim_num(swapcache_watermark);

	swapcache_list = kcalloc(MAX_NUMNODES, sizeof(struct list_head), GFP_KERNEL);
	if (swapcache_list == NULL)
		return -ENOMEM;

	memcg_reclaim_swapcache(swapcache_list, swapcache_to_reclaim);

	/* Reclaim all the swapcache we have scanned */
	for_each_node_state(nid, N_MEMORY) {
		cond_resched();
		reclaim_folio_list(&swapcache_list[nid], NODE_DATA(nid));
	}

	/* Put pack all the pages that are not reclaimed by shrink_folio_list */
	for_each_node_state(nid, N_MEMORY) {
		cond_resched();
		folio_list = &swapcache_list[nid];
		while (!list_empty(folio_list)) {
			folio = lru_to_folio(folio_list);
			list_del(&folio->lru);
			folio_putback_lru(folio);
		}
	}

	kfree(swapcache_list);
	return 0;
}
EXPORT_SYMBOL_GPL(do_swapcache_reclaim);
+1 −1
Original line number Diff line number Diff line
@@ -2791,7 +2791,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
			nr_deactivate, nr_rotated, sc->priority, file);
}

static unsigned int reclaim_folio_list(struct list_head *folio_list,
unsigned int reclaim_folio_list(struct list_head *folio_list,
				      struct pglist_data *pgdat)
{
	struct reclaim_stat dummy_stat;