Commit 4d7ab4f1 authored by Barry Song's avatar Barry Song Committed by Euler
Browse files

mm: swap: add nr argument in swapcache_prepare and swapcache_clear to support large folios

mainline inclusion
from mainline-v6.12-rc1
commit 9f101bef408a3f70c44b6e4de44d3d4e2655ed10
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAOMRL

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f101bef408a3f70c44b6e4de44d3d4e2655ed10

--------------------------------

Right now, swapcache_prepare() and swapcache_clear() supports one entry
only, to support large folios, we need to handle multiple swap entries.

To optimize stack usage, we iterate twice in __swap_duplicate(): the first
time to verify that all entries are valid, and the second time to apply
the modifications to the entries.

Currently, we're using nr=1 for the existing users.

[v-songbaohua@oppo.com: clarify swap_count_continued and improve readability for  __swap_duplicate]
  Link: https://lkml.kernel.org/r/20240802071817.47081-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240730071339.107447-2-21cnbao@gmail.com


Signed-off-by: default avatarBarry Song <v-songbaohua@oppo.com>
Reviewed-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Tested-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Gao Xiang <xiang@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarWang Lian <dev01404@linx-info.com>
---
parent 4817730c
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -509,7 +509,7 @@ extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order,
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern int swapcache_prepare(swp_entry_t entry, int nr);
extern void swap_free_nr(swp_entry_t entry, int nr_pages);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
@@ -592,7 +592,7 @@ static inline int swap_duplicate(swp_entry_t swp)
	return 0;
}

static inline int swapcache_prepare(swp_entry_t swp)
static inline int swapcache_prepare(swp_entry_t swp, int nr)
{
	return 0;
}
+3 −3
Original line number Diff line number Diff line
@@ -4037,7 +4037,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
			 * reusing the same entry. It's undetectable as
			 * pte_same() returns true due to entry reuse.
			 */
			if (swapcache_prepare(entry)) {
			if (swapcache_prepare(entry, 1)) {
				/* Relax a bit to prevent rapid repeated page faults */
				schedule_timeout_uninterruptible(1);
				goto out;
@@ -4344,7 +4344,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
out:
	/* Clear the swap cache pin for direct swapin after PTL unlock */
	if (need_clear_cache)
		swapcache_clear(si, entry);
		swapcache_clear(si, entry, 1);
	if (si)
		put_swap_device(si);
	return ret;
@@ -4360,7 +4360,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
		folio_put(swapcache);
	}
	if (need_clear_cache)
		swapcache_clear(si, entry);
		swapcache_clear(si, entry, 1);
	if (si)
		put_swap_device(si);
	return ret;
+3 −2
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ void __delete_from_swap_cache(struct folio *folio,
void delete_from_swap_cache(struct folio *folio);
void clear_shadow_from_swap_cache(int type, unsigned long begin,
				  unsigned long end);
void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry);
void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
struct folio *swap_cache_get_folio(swp_entry_t entry,
		struct vm_area_struct *vma, unsigned long addr);
struct folio *filemap_get_incore_folio(struct address_space *mapping,
@@ -97,7 +97,7 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
	return 0;
}

static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
{
}

@@ -149,4 +149,5 @@ static inline unsigned int folio_swap_flags(struct folio *folio)
	return 0;
}
#endif /* CONFIG_SWAP */

#endif /* _MM_SWAP_H */
+1 −1
Original line number Diff line number Diff line
@@ -477,7 +477,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		err = swapcache_prepare(entry);
		err = swapcache_prepare(entry, 1);
		if (!err)
			break;

+54 −41
Original line number Diff line number Diff line
@@ -3462,7 +3462,7 @@ void si_swapinfo(struct sysinfo *val)
}

/*
 * Verify that a swap entry is valid and increment its swap map count.
 * Verify that nr swap entries are valid and increment their swap map counts.
 *
 * Returns error code in following case.
 * - success -> 0
@@ -3472,21 +3472,25 @@ void si_swapinfo(struct sysinfo *val)
 * - swap-cache reference is requested but the entry is not used. -> ENOENT
 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
 */
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
{
	struct swap_info_struct *p;
	struct swap_cluster_info *ci;
	unsigned long offset;
	unsigned char count;
	unsigned char has_cache;
	int err;
	int err, i;

	p = swp_swap_info(entry);

	offset = swp_offset(entry);
	VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
	VM_WARN_ON(usage == 1 && nr > 1);
	ci = lock_cluster_or_swap_info(p, offset);

	count = p->swap_map[offset];
	err = 0;
	for (i = 0; i < nr; i++) {
		count = p->swap_map[offset + i];

		/*
		 * swapin_readahead() doesn't check if a swap entry is valid, so the
@@ -3499,33 +3503,42 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)

		has_cache = count & SWAP_HAS_CACHE;
		count &= ~SWAP_HAS_CACHE;
	err = 0;

	if (usage == SWAP_HAS_CACHE) {

		/* set SWAP_HAS_CACHE if there is no cache and entry is used */
		if (!has_cache && count)
			has_cache = SWAP_HAS_CACHE;
		else if (has_cache)		/* someone else added cache */
			err = -EEXIST;
		else				/* no users remaining */
		if (!count && !has_cache) {
			err = -ENOENT;
		} else if (usage == SWAP_HAS_CACHE) {
			if (has_cache)
				err = -EEXIST;
		} else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) {
			err = -EINVAL;
		}

		if (err)
			goto unlock_out;
	}

	} else if (count || has_cache) {
	for (i = 0; i < nr; i++) {
		count = p->swap_map[offset + i];
		has_cache = count & SWAP_HAS_CACHE;
		count &= ~SWAP_HAS_CACHE;

		if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
		if (usage == SWAP_HAS_CACHE)
			has_cache = SWAP_HAS_CACHE;
		else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
			count += usage;
		else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
			err = -EINVAL;
		else if (swap_count_continued(p, offset, count))
		else if (swap_count_continued(p, offset + i, count))
			count = COUNT_CONTINUED;
		else
		else {
			/*
			 * Don't need to rollback changes, because if
			 * usage == 1, there must be nr == 1.
			 */
			err = -ENOMEM;
	} else
		err = -ENOENT;			/* unused swap entry */
			goto unlock_out;
		}

	if (!err)
		WRITE_ONCE(p->swap_map[offset], count | has_cache);
		WRITE_ONCE(p->swap_map[offset + i], count | has_cache);
	}

unlock_out:
	unlock_cluster_or_swap_info(p, ci);
@@ -3538,7 +3551,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 */
void swap_shmem_alloc(swp_entry_t entry)
{
	__swap_duplicate(entry, SWAP_MAP_SHMEM);
	__swap_duplicate(entry, SWAP_MAP_SHMEM, 1);
}

/*
@@ -3552,29 +3565,29 @@ int swap_duplicate(swp_entry_t entry)
{
	int err = 0;

	while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
	while (!err && __swap_duplicate(entry, 1, 1) == -ENOMEM)
		err = add_swap_count_continuation(entry, GFP_ATOMIC);
	return err;
}

/*
 * @entry: swap entry for which we allocate swap cache.
 * @entry: first swap entry from which we allocate nr swap cache.
 *
 * Called when allocating swap cache for existing swap entry,
 * Called when allocating swap cache for existing swap entries,
 * This can return error codes. Returns 0 at success.
 * -EEXIST means there is a swap cache.
 * Note: return code is different from swap_duplicate().
 */
int swapcache_prepare(swp_entry_t entry)
int swapcache_prepare(swp_entry_t entry, int nr)
{
	return __swap_duplicate(entry, SWAP_HAS_CACHE);
	return __swap_duplicate(entry, SWAP_HAS_CACHE, nr);
}

void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
{
	unsigned long offset = swp_offset(entry);

	cluster_swap_free_nr(si, offset, 1, SWAP_HAS_CACHE);
	cluster_swap_free_nr(si, offset, nr, SWAP_HAS_CACHE);
}

struct swap_info_struct *swp_swap_info(swp_entry_t entry)