Unverified Commit c6c933e9 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!11561 mm: support large folio swap-out and swap-in for shmem

Merge Pull Request from: @wedm23414 
 
community inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAOMRL


Reference:  https://lore.kernel.org/all/b371eadb27f42fc51261c51008fbb9a334985b4c.1723434324.git.baolin.wang@linux.alibaba.com/
--------------------------------

Shmem will support large folio allocation [1] [2] to get a better performance,
however, the memory reclaim still splits the precious large folios when trying
to swap-out shmem, which may lead to the memory fragmentation issue and can not
take advantage of the large folio for shmeme.

Moreover, the swap code already supports for swapping out large folio without
split, and large folio swap-in[3] series is queued into mm-unstable branch.
Hence this patch set also supports the large folio swap-out and swap-in for
shmem.

Please help to review. Thanks.

Functional testing
==================
Machine environment: 32 Arm cores, 120G memory and 50G swap device.

1. Run xfstests suite to test tmpfs filesystem, and I did not catch any
regressions with this patch set.
FSTYP=tmpfs
export TEST_DIR=/mnt/tempfs_mnt
export TEST_DEV=/mnt/tempfs_mnt
export SCRATCH_MNT=/mnt/scratchdir
export SCRATCH_DEV=/mnt/scratchdir

2. Run all mm selftests in tools/testing/selftests/mm/, and no
regressions found.

3. I also wrote several shmem swap test cases, including shmem splitting,
shmem swapout, shmem swapin, swapoff during shmem swapout, shmem reclaim,
shmem swapin replacement, etc. I tested these cases under 4K and 64K
shmem folio sizes with a swap device, and shmem swap functionality works
well on my machine.

[1] https://lore.kernel.org/all/cover.1717495894.git.baolin.wang@linux.alibaba.com/
[2] https://lore.kernel.org/all/20240515055719.32577-1-da.gomez@samsung.com/
[3] https://lore.kernel.org/all/20240508224040.190469-6-21cnbao@gmail.com/T/
[4] https://lore.kernel.org/all/8db63194-77fd-e0b8-8601-2bbf04889a5b@google.com/

Baolin Wang (8):
  mm: swap: extend swap_shmem_alloc() to support batch SWAP_MAP_SHMEM
    flag setting
  mm: shmem: extend shmem_partial_swap_usage() to support large folio
    swap
  mm: filemap: use xa_get_order() to get the swap entry order
  mm: shmem: use swap_free_nr() to free shmem swap entries
  mm: shmem: support large folio allocation for shmem_replace_folio()
  mm: shmem: drop folio reference count using 'nr_pages' in
    shmem_delete_from_page_cache()
  mm: shmem: split large entry if the swapin folio is not large
  mm: shmem: support large folio swap out

Daniel Gomez (1):
  mm: shmem: return number of pages beeing freed in shmem_free_swap

Barry Song (3):
  mm: extend 'usage' parameter so that cluster_swap_free_nr() can be
    reused
  mm/swapfile:__swap_duplicate: drop redundant WRITE_ONCE on swap_map
    for err cases
  mm: swap: add nr argument in swapcache_prepare and swapcache_clear to
    support large folios

Wang Lian (2):
  mm:shmem: Support large folio adjust reliable shmem usage count
  mm/pageout: Fix kabi broken in writeback_control


-- 
2.34.1
     
 
Link:https://gitee.com/openeuler/kernel/pulls/11561

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents d4893cbf 7cdb8f42
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -507,9 +507,9 @@ extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order,
			  int type);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern void swap_shmem_alloc(swp_entry_t, int);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern int swapcache_prepare(swp_entry_t entry, int nr);
extern void swap_free_nr(swp_entry_t entry, int nr_pages);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
@@ -583,7 +583,7 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
	return 0;
}

static inline void swap_shmem_alloc(swp_entry_t swp)
static inline void swap_shmem_alloc(swp_entry_t swp, int nr)
{
}

@@ -592,7 +592,7 @@ static inline int swap_duplicate(swp_entry_t swp)
	return 0;
}

static inline int swapcache_prepare(swp_entry_t swp)
static inline int swapcache_prepare(swp_entry_t swp, int nr)
{
	return 0;
}
+3 −1
Original line number Diff line number Diff line
@@ -90,9 +90,11 @@ struct writeback_control {
	size_t wb_tcand_bytes;		/* bytes written by this candidate */
#endif

	KABI_RESERVE(1)
	/* Target list for splitting a large folio */
	KABI_USE(1, struct list_head *list)
	KABI_RESERVE(2)
	KABI_RESERVE(3)

};

static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc)
+27 −14
Original line number Diff line number Diff line
@@ -2048,17 +2048,20 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
		if (!folio_batch_add(fbatch, folio))
			break;
	}
	rcu_read_unlock();

	if (folio_batch_count(fbatch)) {
		unsigned long nr = 1;
		unsigned long nr;
		int idx = folio_batch_count(fbatch) - 1;

		folio = fbatch->folios[idx];
		if (!xa_is_value(folio))
			nr = folio_nr_pages(folio);
		*start = indices[idx] + nr;
		else
			nr = 1 << xa_get_order(&mapping->i_pages, indices[idx]);
		*start = round_down(indices[idx] + nr, nr);
	}
	rcu_read_unlock();

	return folio_batch_count(fbatch);
}

@@ -2090,10 +2093,17 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,

	rcu_read_lock();
	while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
		unsigned long base;
		unsigned long nr;

		if (!xa_is_value(folio)) {
			if (folio->index < *start)
			nr = folio_nr_pages(folio);
			base = folio->index;
			/* Omit large folio which begins before the start */
			if (base < *start)
				goto put;
			if (folio_next_index(folio) - 1 > end)
			/* Omit large folio which extends beyond the end */
			if (base + nr - 1 > end)
				goto put;
			if (!folio_trylock(folio))
				goto put;
@@ -2102,7 +2112,19 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
				goto unlock;
			VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
					folio);
		} else {
			nr = 1 << xa_get_order(&mapping->i_pages, xas.xa_index);
			base = xas.xa_index & ~(nr - 1);
			/* Omit order>0 value which begins before the start */
			if (base < *start)
				continue;
			/* Omit order>0 value which extends beyond the end */
			if (base + nr - 1 > end)
				break;
		}

		/* Update start now so that last update is correct on return */
		*start = base + nr;
		indices[fbatch->nr] = xas.xa_index;
		if (!folio_batch_add(fbatch, folio))
			break;
@@ -2114,15 +2136,6 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
	}
	rcu_read_unlock();

	if (folio_batch_count(fbatch)) {
		unsigned long nr = 1;
		int idx = folio_batch_count(fbatch) - 1;

		folio = fbatch->folios[idx];
		if (!xa_is_value(folio))
			nr = folio_nr_pages(folio);
		*start = indices[idx] + nr;
	}
	return folio_batch_count(fbatch);
}

+3 −3
Original line number Diff line number Diff line
@@ -4062,7 +4062,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
			 * reusing the same entry. It's undetectable as
			 * pte_same() returns true due to entry reuse.
			 */
			if (swapcache_prepare(entry)) {
			if (swapcache_prepare(entry, 1)) {
				/* Relax a bit to prevent rapid repeated page faults */
				schedule_timeout_uninterruptible(1);
				goto out;
@@ -4369,7 +4369,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
out:
	/* Clear the swap cache pin for direct swapin after PTL unlock */
	if (need_clear_cache)
		swapcache_clear(si, entry);
		swapcache_clear(si, entry, 1);
	if (si)
		put_swap_device(si);
	return ret;
@@ -4385,7 +4385,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
		folio_put(swapcache);
	}
	if (need_clear_cache)
		swapcache_clear(si, entry);
		swapcache_clear(si, entry, 1);
	if (si)
		put_swap_device(si);
	return ret;
+205 −55
Original line number Diff line number Diff line
@@ -155,7 +155,7 @@ static unsigned long shmem_default_max_inodes(void)

static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
			struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
			struct mm_struct *fault_mm, vm_fault_t *fault_type);
			struct vm_area_struct *vma, vm_fault_t *fault_type);

static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
@@ -791,7 +791,6 @@ static int shmem_add_to_page_cache(struct folio *folio,
	VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
	VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
	VM_BUG_ON(expected && folio_test_large(folio));

	folio_ref_add(folio, nr);
	folio->mapping = mapping;
@@ -849,23 +848,27 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
	__lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
	shmem_reliable_folio_add(folio, -nr);
	xa_unlock_irq(&mapping->i_pages);
	folio_put(folio);
	folio_put_refs(folio, nr);
	BUG_ON(error);
}

/*
 * Remove swap entry from page cache, free the swap and its page cache.
 * Remove swap entry from page cache, free the swap and its page cache. Returns
 * the number of pages being freed. 0 means entry not found in XArray (0 pages
 * being freed).
 */
static int shmem_free_swap(struct address_space *mapping,
static long shmem_free_swap(struct address_space *mapping,
			    pgoff_t index, void *radswap)
{
	int order = xa_get_order(&mapping->i_pages, index);
	void *old;

	old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
	if (old != radswap)
		return -ENOENT;
	free_swap_and_cache(radix_to_swp_entry(radswap));
		return 0;
	free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order);

	return 1 << order;
}

/*
@@ -888,7 +891,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
		if (xas_retry(&xas, page))
			continue;
		if (xa_is_value(page))
			swapped++;
			swapped += 1 << xa_get_order(xas.xa, xas.xa_index);
		if (xas.xa_index == max)
			break;
		if (need_resched()) {
@@ -1017,7 +1020,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
			if (xa_is_value(folio)) {
				if (unfalloc)
					continue;
				nr_swaps_freed += !shmem_free_swap(mapping,
				nr_swaps_freed += shmem_free_swap(mapping,
							indices[i], folio);
				continue;
			}
@@ -1084,14 +1087,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
			folio = fbatch.folios[i];

			if (xa_is_value(folio)) {
				long swaps_freed;

				if (unfalloc)
					continue;
				if (shmem_free_swap(mapping, indices[i], folio)) {
				swaps_freed = shmem_free_swap(mapping, indices[i], folio);
				if (!swaps_freed) {
					/* Swap was replaced by page: retry */
					index = indices[i];
					break;
				}
				nr_swaps_freed++;
				nr_swaps_freed += swaps_freed;
				continue;
			}

@@ -1452,6 +1458,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
	swp_entry_t swap;
	pgoff_t index;
	int nr_pages;
	bool split = false;

	/*
	 * Our capabilities prevent regular writeback or sync from ever calling
@@ -1470,20 +1478,33 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
		goto redirty;

	/*
	 * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
	 * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
	 * and its shmem_writeback() needs them to be split when swapping.
	 * If CONFIG_THP_SWAP is not enabled, the large folio should be
	 * split when swapping.
	 *
	 * And shrinkage of pages beyond i_size does not split swap, so
	 * swapout of a large folio crossing i_size needs to split too
	 * (unless fallocate has been used to preallocate beyond EOF).
	 */
	if (folio_test_large(folio)) {
		index = shmem_fallocend(inode,
			DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
		if ((index > folio->index && index < folio_next_index(folio)) ||
		    !IS_ENABLED(CONFIG_THP_SWAP))
			split = true;
	}

	if (split) {
try_split:
		/* Ensure the subpages are still dirty */
		folio_test_set_dirty(folio);
		if (split_huge_page(page) < 0)
		if (split_huge_page_to_list_to_order(page, wbc->list, 0))
			goto redirty;
		folio = page_folio(page);
		folio_clear_dirty(folio);
	}

	index = folio->index;
	nr_pages = folio_nr_pages(folio);

	/*
	 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
@@ -1518,8 +1539,12 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
	}

	swap = folio_alloc_swap(folio);
	if (!swap.val)
	if (!swap.val) {
		if (nr_pages > 1)
			goto try_split;

		goto redirty;
	}

	/*
	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
@@ -1536,8 +1561,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
	if (add_to_swap_cache(folio, swap,
			__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
			NULL) == 0) {
		shmem_recalc_inode(inode, 0, 1);
		swap_shmem_alloc(swap);
		shmem_recalc_inode(inode, 0, nr_pages);
		swap_shmem_alloc(swap, nr_pages);
		shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));

		mutex_unlock(&shmem_swaplist_mutex);
@@ -1903,30 +1928,35 @@ static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
}

static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
				struct shmem_inode_info *info, pgoff_t index)
				struct shmem_inode_info *info, pgoff_t index,
				struct vm_area_struct *vma)
{
	struct folio *old, *new;
	struct address_space *swap_mapping;
	swp_entry_t entry;
	pgoff_t swap_index;
	int error;

	old = *foliop;
	entry = old->swap;
	swap_index = swp_offset(entry);
	swap_mapping = swap_address_space(entry);
	struct folio *new, *old = *foliop;
	swp_entry_t entry = old->swap;
	struct address_space *swap_mapping = swap_address_space(entry);
	pgoff_t swap_index = swp_offset(entry);
	XA_STATE(xas, &swap_mapping->i_pages, swap_index);
	int nr_pages = folio_nr_pages(old);
	int error = 0, i;

	/*
	 * We have arrived here because our zones are constrained, so don't
	 * limit chance of success by further cpuset and node constraints.
	 */
	gfp &= ~GFP_CONSTRAINT_MASK;
	VM_BUG_ON_FOLIO(folio_test_large(old), old);
	new = shmem_alloc_folio(gfp, 0, info, index);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	if (nr_pages > 1) {
		gfp_t huge_gfp = vma_thp_gfp_mask(vma);

		gfp = limit_gfp_mask(huge_gfp, gfp);
	}
#endif

	new = shmem_alloc_folio(gfp, folio_order(old), info, index);
	if (!new)
		return -ENOMEM;

	folio_get(new);
	folio_ref_add(new, nr_pages);
	folio_copy(new, old);
	flush_dcache_folio(new);

@@ -1936,20 +1966,27 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
	new->swap = entry;
	folio_set_swapcache(new);

	/*
	 * Our caller will very soon move newpage out of swapcache, but it's
	 * a nice clean interface for us to replace oldpage by newpage there.
	 */
	/* Swap cache still stores N entries instead of a high-order entry */
	xa_lock_irq(&swap_mapping->i_pages);
	error = shmem_replace_entry(swap_mapping, swap_index, old, new);
	for (i = 0; i < nr_pages; i++) {
		void *item = xas_load(&xas);

		if (item != old) {
			error = -ENOENT;
			break;
		}

		xas_store(&xas, new);
		xas_next(&xas);
	}
	if (!error) {
		mem_cgroup_replace_folio(old, new);
		__lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
		__lruvec_stat_mod_folio(new, NR_SHMEM, 1);
		shmem_reliable_folio_add(new, 1);
		__lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
		__lruvec_stat_mod_folio(old, NR_SHMEM, -1);
		shmem_reliable_folio_add(old, -1);
		__lruvec_stat_mod_folio(new, NR_FILE_PAGES, nr_pages);
		__lruvec_stat_mod_folio(new, NR_SHMEM, nr_pages);
		shmem_reliable_folio_add(new, nr_pages);
		__lruvec_stat_mod_folio(old, NR_FILE_PAGES, -nr_pages);
		__lruvec_stat_mod_folio(old, NR_SHMEM, -nr_pages);
		shmem_reliable_folio_add(old, -nr_pages);
	}
	xa_unlock_irq(&swap_mapping->i_pages);

@@ -1969,7 +2006,12 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
	old->private = NULL;

	folio_unlock(old);
	folio_put_refs(old, 2);
	/*
	 * The old folio are removed from swap cache, drop the 'nr_pages'
	 * reference, as well as one temporary reference getting from swap
	 * cache.
	 */
	folio_put_refs(old, nr_pages + 1);
	return error;
}

@@ -1979,6 +2021,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
	struct address_space *mapping = inode->i_mapping;
	swp_entry_t swapin_error;
	void *old;
	int nr_pages;

	swapin_error = make_poisoned_swp_entry();
	old = xa_cmpxchg_irq(&mapping->i_pages, index,
@@ -1987,6 +2030,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
	if (old != swp_to_radix_entry(swap))
		return;

	nr_pages = folio_nr_pages(folio);
	folio_wait_writeback(folio);
	delete_from_swap_cache(folio);
	/*
@@ -1994,8 +2038,86 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
	 * in shmem_evict_inode().
	 */
	shmem_recalc_inode(inode, -1, -1);
	swap_free(swap);
	shmem_recalc_inode(inode, -nr_pages, -nr_pages);
	swap_free_nr(swap, nr_pages);
}

static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
				   swp_entry_t swap, gfp_t gfp)
{
	struct address_space *mapping = inode->i_mapping;
	XA_STATE_ORDER(xas, &mapping->i_pages, index, 0);
	void *alloced_shadow = NULL;
	int alloced_order = 0, i;

	/* Convert user data gfp flags to xarray node gfp flags */
	gfp &= GFP_RECLAIM_MASK;

	for (;;) {
		int order = -1, split_order = 0;
		void *old = NULL;

		xas_lock_irq(&xas);
		old = xas_load(&xas);
		if (!xa_is_value(old) || swp_to_radix_entry(swap) != old) {
			xas_set_err(&xas, -EEXIST);
			goto unlock;
		}

		order = xas_get_order(&xas);

		/* Swap entry may have changed before we re-acquire the lock */
		if (alloced_order &&
		    (old != alloced_shadow || order != alloced_order)) {
			xas_destroy(&xas);
			alloced_order = 0;
		}

		/* Try to split large swap entry in pagecache */
		if (order > 0) {
			if (!alloced_order) {
				split_order = order;
				goto unlock;
			}
			xas_split(&xas, old, order);

			/*
			 * Re-set the swap entry after splitting, and the swap
			 * offset of the original large entry must be continuous.
			 */
			for (i = 0; i < 1 << order; i++) {
				pgoff_t aligned_index = round_down(index, 1 << order);
				swp_entry_t tmp;

				tmp = swp_entry(swp_type(swap), swp_offset(swap) + i);
				__xa_store(&mapping->i_pages, aligned_index + i,
					   swp_to_radix_entry(tmp), 0);
			}
		}

unlock:
		xas_unlock_irq(&xas);

		/* split needed, alloc here and retry. */
		if (split_order) {
			xas_split_alloc(&xas, old, split_order, gfp);
			if (xas_error(&xas))
				goto error;
			alloced_shadow = old;
			alloced_order = split_order;
			xas_reset(&xas);
			continue;
		}

		if (!xas_nomem(&xas, gfp))
			break;
	}

error:
	if (xas_error(&xas))
		return xas_error(&xas);

	return alloced_order;
}

/*
@@ -2006,15 +2128,16 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 */
static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
			     struct folio **foliop, enum sgp_type sgp,
			     gfp_t gfp, struct mm_struct *fault_mm,
			     gfp_t gfp, struct vm_area_struct *vma,
			     vm_fault_t *fault_type)
{
	struct address_space *mapping = inode->i_mapping;
	struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL;
	struct shmem_inode_info *info = SHMEM_I(inode);
	struct swap_info_struct *si;
	struct folio *folio = NULL;
	swp_entry_t swap;
	int error;
	int error, nr_pages;

	VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
	swap = radix_to_swp_entry(*foliop);
@@ -2034,12 +2157,37 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
	/* Look it up and read it in.. */
	folio = swap_cache_get_folio(swap, NULL, 0);
	if (!folio) {
		int split_order;

		/* Or update major stats only when swapin succeeds?? */
		if (fault_type) {
			*fault_type |= VM_FAULT_MAJOR;
			count_vm_event(PGMAJFAULT);
			count_memcg_event_mm(fault_mm, PGMAJFAULT);
		}

		/*
		 * Now swap device can only swap in order 0 folio, then we
		 * should split the large swap entry stored in the pagecache
		 * if necessary.
		 */
		split_order = shmem_split_large_entry(inode, index, swap, gfp);
		if (split_order < 0) {
			error = split_order;
			goto failed;
		}

		/*
		 * If the large swap entry has already been split, it is
		 * necessary to recalculate the new swap entry based on
		 * the old order alignment.
		 */
		if (split_order > 0) {
			pgoff_t offset = index - round_down(index, 1 << split_order);

			swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
		}

		/* Here we actually start the io */
		folio = shmem_swapin(swap, gfp, info, index);
		if (!folio) {
@@ -2061,6 +2209,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
		goto failed;
	}
	folio_wait_writeback(folio);
	nr_pages = folio_nr_pages(folio);

	/*
	 * Some architectures may have to restore extra metadata to the
@@ -2069,24 +2218,25 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
	arch_swap_restore(folio_swap(swap, folio), folio);

	if (shmem_should_replace_folio(folio, gfp)) {
		error = shmem_replace_folio(&folio, gfp, info, index);
		error = shmem_replace_folio(&folio, gfp, info, index, vma);
		if (error)
			goto failed;
	}

	error = shmem_add_to_page_cache(folio, mapping, index,
	error = shmem_add_to_page_cache(folio, mapping,
					round_down(index, nr_pages),
					swp_to_radix_entry(swap), gfp);
	if (error)
		goto failed;

	shmem_recalc_inode(inode, 0, -1);
	shmem_recalc_inode(inode, 0, -nr_pages);

	if (sgp == SGP_WRITE)
		folio_mark_accessed(folio);

	delete_from_swap_cache(folio);
	folio_mark_dirty(folio);
	swap_free(swap);
	swap_free_nr(swap, nr_pages);
	put_swap_device(si);

	*foliop = folio;
@@ -2149,7 +2299,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,

	if (xa_is_value(folio)) {
		error = shmem_swapin_folio(inode, index, &folio,
					   sgp, gfp, fault_mm, fault_type);
					   sgp, gfp, vma, fault_type);
		if (error == -EEXIST)
			goto repeat;

Loading