Commit 26232ad3 authored by Baolin Wang's avatar Baolin Wang Committed by Euler
Browse files

mm: shmem: split large entry if the swapin folio is not large

mainline inclusion
from mainline-v6.12-rc1
commit 12885cbe88ddf6c5fc3306193a6449ac310f2331
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAOMRL

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=12885cbe88ddf6c5fc3306193a6449ac310f2331

--------------------------------

Now the swap device can only swap-in order 0 folio, even though a large
folio is swapped out.  This requires us to split the large entry
previously saved in the shmem pagecache to support the swap in of small
folios.

[hughd@google.com: fix warnings from kmalloc_fix_flags()]
  Link: https://lkml.kernel.org/r/e2a2ba5d-864c-50aa-7579-97cba1c7dd0c@google.com
[baolin.wang@linux.alibaba.com: drop the 'new_order' parameter]
  Link: https://lkml.kernel.org/r/39c71ccf-669b-4d9f-923c-f6b9c4ceb8df@linux.alibaba.com
Link: https://lkml.kernel.org/r/4a0f12f27c54a62eb4d9ca1265fed3a62531a63e.1723434324.git.baolin.wang@linux.alibaba.com


Signed-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarWang Lian <dev01404@linx-info.com>
---
parent dbd691d4
Loading
Loading
Loading
Loading
+103 −0
Original line number Diff line number Diff line
@@ -2011,6 +2011,84 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
	swap_free_nr(swap, nr_pages);
}

static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
				   swp_entry_t swap, gfp_t gfp)
{
	struct address_space *mapping = inode->i_mapping;
	XA_STATE_ORDER(xas, &mapping->i_pages, index, 0);
	void *alloced_shadow = NULL;
	int alloced_order = 0, i;

	/* Convert user data gfp flags to xarray node gfp flags */
	gfp &= GFP_RECLAIM_MASK;

	for (;;) {
		int order = -1, split_order = 0;
		void *old = NULL;

		xas_lock_irq(&xas);
		old = xas_load(&xas);
		if (!xa_is_value(old) || swp_to_radix_entry(swap) != old) {
			xas_set_err(&xas, -EEXIST);
			goto unlock;
		}

		order = xas_get_order(&xas);

		/* Swap entry may have changed before we re-acquire the lock */
		if (alloced_order &&
		    (old != alloced_shadow || order != alloced_order)) {
			xas_destroy(&xas);
			alloced_order = 0;
		}

		/* Try to split large swap entry in pagecache */
		if (order > 0) {
			if (!alloced_order) {
				split_order = order;
				goto unlock;
			}
			xas_split(&xas, old, order);

			/*
			 * Re-set the swap entry after splitting, and the swap
			 * offset of the original large entry must be continuous.
			 */
			for (i = 0; i < 1 << order; i++) {
				pgoff_t aligned_index = round_down(index, 1 << order);
				swp_entry_t tmp;

				tmp = swp_entry(swp_type(swap), swp_offset(swap) + i);
				__xa_store(&mapping->i_pages, aligned_index + i,
					   swp_to_radix_entry(tmp), 0);
			}
		}

unlock:
		xas_unlock_irq(&xas);

		/* split needed, alloc here and retry. */
		if (split_order) {
			xas_split_alloc(&xas, old, split_order, gfp);
			if (xas_error(&xas))
				goto error;
			alloced_shadow = old;
			alloced_order = split_order;
			xas_reset(&xas);
			continue;
		}

		if (!xas_nomem(&xas, gfp))
			break;
	}

error:
	if (xas_error(&xas))
		return xas_error(&xas);

	return alloced_order;
}

/*
 * Swap in the folio pointed to by *foliop.
 * Caller has to make sure that *foliop contains a valid swapped folio.
@@ -2048,12 +2126,37 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
	/* Look it up and read it in.. */
	folio = swap_cache_get_folio(swap, NULL, 0);
	if (!folio) {
		int split_order;

		/* Or update major stats only when swapin succeeds?? */
		if (fault_type) {
			*fault_type |= VM_FAULT_MAJOR;
			count_vm_event(PGMAJFAULT);
			count_memcg_event_mm(fault_mm, PGMAJFAULT);
		}

		/*
		 * Now swap device can only swap in order 0 folio, then we
		 * should split the large swap entry stored in the pagecache
		 * if necessary.
		 */
		split_order = shmem_split_large_entry(inode, index, swap, gfp);
		if (split_order < 0) {
			error = split_order;
			goto failed;
		}

		/*
		 * If the large swap entry has already been split, it is
		 * necessary to recalculate the new swap entry based on
		 * the old order alignment.
		 */
		if (split_order > 0) {
			pgoff_t offset = index - round_down(index, 1 << split_order);

			swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
		}

		/* Here we actually start the io */
		folio = shmem_swapin(swap, gfp, info, index);
		if (!folio) {