Commit 5dfab109 authored by Huang Ying's avatar Huang Ying Committed by Andrew Morton
Browse files

migrate_pages: batch _unmap and _move

In this patch the _unmap and _move stage of the folio migration is
batched.  That for, previously, it is,

  for each folio
    _unmap()
    _move()

Now, it is,

  for each folio
    _unmap()
  for each folio
    _move()

Based on this, we can batch the TLB flushing and use some hardware
accelerator to copy folios between batched _unmap and batched _move
stages.

Link: https://lkml.kernel.org/r/20230213123444.155149-6-ying.huang@intel.com


Signed-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Tested-by: default avatarHyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Xin Hao <xhao@linux.alibaba.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 64c8902e
Loading
Loading
Loading
Loading
+189 −25
Original line number Diff line number Diff line
@@ -1051,6 +1051,33 @@ static void __migrate_folio_extract(struct folio *dst,
	dst->private = NULL;
}

/* Restore the source folio to the original state upon failure */
static void migrate_folio_undo_src(struct folio *src,
				   int page_was_mapped,
				   struct anon_vma *anon_vma,
				   struct list_head *ret)
{
	if (page_was_mapped)
		remove_migration_ptes(src, src, false);
	/* Drop an anon_vma reference if we took one */
	if (anon_vma)
		put_anon_vma(anon_vma);
	folio_unlock(src);
	list_move_tail(&src->lru, ret);
}

/* Restore the destination folio to the original state upon failure */
static void migrate_folio_undo_dst(struct folio *dst,
				   free_page_t put_new_page,
				   unsigned long private)
{
	folio_unlock(dst);
	if (put_new_page)
		put_new_page(&dst->page, private);
	else
		folio_put(dst);
}

/* Cleanup src folio upon migration success */
static void migrate_folio_done(struct folio *src,
			       enum migrate_reason reason)
@@ -1069,8 +1096,8 @@ static void migrate_folio_done(struct folio *src,
		folio_put(src);
}

static int __migrate_folio_unmap(struct folio *src, struct folio *dst,
				int force, enum migrate_mode mode)
static int __migrate_folio_unmap(struct folio *src, struct folio *dst, int force,
				 bool avoid_force_lock, enum migrate_mode mode)
{
	int rc = -EAGAIN;
	int page_was_mapped = 0;
@@ -1097,6 +1124,17 @@ static int __migrate_folio_unmap(struct folio *src, struct folio *dst,
		if (current->flags & PF_MEMALLOC)
			goto out;

		/*
		 * We have locked some folios and are going to wait to lock
		 * this folio.  To avoid a potential deadlock, let's bail
		 * out and not do that. The locked folios will be moved and
		 * unlocked, then we can wait to lock this folio.
		 */
		if (avoid_force_lock) {
			rc = -EDEADLOCK;
			goto out;
		}

		folio_lock(src);
	}

@@ -1205,10 +1243,20 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst,
	int page_was_mapped = 0;
	struct anon_vma *anon_vma = NULL;
	bool is_lru = !__PageMovable(&src->page);
	struct list_head *prev;

	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
	prev = dst->lru.prev;
	list_del(&dst->lru);

	rc = move_to_new_folio(dst, src, mode);

	if (rc == -EAGAIN) {
		list_add(&dst->lru, prev);
		__migrate_folio_record(dst, page_was_mapped, anon_vma);
		return rc;
	}

	if (unlikely(!is_lru))
		goto out_unlock_both;

@@ -1251,7 +1299,7 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst,
/* Obtain the lock on page, remove all ptes. */
static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
			       unsigned long private, struct folio *src,
			       struct folio **dstp, int force,
			       struct folio **dstp, int force, bool avoid_force_lock,
			       enum migrate_mode mode, enum migrate_reason reason,
			       struct list_head *ret)
{
@@ -1279,7 +1327,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
	*dstp = dst;

	dst->private = NULL;
	rc = __migrate_folio_unmap(src, dst, force, mode);
	rc = __migrate_folio_unmap(src, dst, force, avoid_force_lock, mode);
	if (rc == MIGRATEPAGE_UNMAP)
		return rc;

@@ -1287,7 +1335,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
	 * A folio that has not been unmapped will be restored to
	 * right list unless we want to retry.
	 */
	if (rc != -EAGAIN)
	if (rc != -EAGAIN && rc != -EDEADLOCK)
		list_move_tail(&src->lru, ret);

	if (put_new_page)
@@ -1326,8 +1374,7 @@ static int migrate_folio_move(free_page_t put_new_page, unsigned long private,
	 */
	if (rc == MIGRATEPAGE_SUCCESS) {
		migrate_folio_done(src, reason);
	} else {
		if (rc != -EAGAIN)
	} else if (rc != -EAGAIN) {
		list_add_tail(&src->lru, ret);

		if (put_new_page)
@@ -1603,12 +1650,16 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
	return nr_failed;
}

/*
 * migrate_pages_batch() first unmaps folios in the from list as many as
 * possible, then move the unmapped folios.
 */
static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
		free_page_t put_new_page, unsigned long private,
		enum migrate_mode mode, int reason, struct list_head *ret_folios,
		struct migrate_pages_stats *stats)
{
	int retry = 1;
	int retry;
	int large_retry = 1;
	int thp_retry = 1;
	int nr_failed = 0;
@@ -1617,13 +1668,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
	int pass = 0;
	bool is_large = false;
	bool is_thp = false;
	struct folio *folio, *folio2, *dst = NULL;
	int rc, nr_pages;
	struct folio *folio, *folio2, *dst = NULL, *dst2;
	int rc, rc_saved, nr_pages;
	LIST_HEAD(split_folios);
	LIST_HEAD(unmap_folios);
	LIST_HEAD(dst_folios);
	bool nosplit = (reason == MR_NUMA_MISPLACED);
	bool no_split_folio_counting = false;
	bool avoid_force_lock;

split_folio_migration:
retry:
	rc_saved = 0;
	avoid_force_lock = false;
	retry = 1;
	for (pass = 0;
	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
	     pass++) {
@@ -1645,16 +1702,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
			cond_resched();

			rc = migrate_folio_unmap(get_new_page, put_new_page, private,
						 folio, &dst, pass > 2, mode,
						 reason, ret_folios);
			if (rc == MIGRATEPAGE_UNMAP)
				rc = migrate_folio_move(put_new_page, private,
							folio, dst, mode,
							reason, ret_folios);
						 folio, &dst, pass > 2, avoid_force_lock,
						 mode, reason, ret_folios);
			/*
			 * The rules are:
			 *	Success: folio will be freed
			 *	Unmap: folio will be put on unmap_folios list,
			 *	       dst folio put on dst_folios list
			 *	-EAGAIN: stay on the from list
			 *	-EDEADLOCK: stay on the from list
			 *	-ENOMEM: stay on the from list
			 *	-ENOSYS: stay on the from list
			 *	Other errno: put on ret_folios list
@@ -1689,7 +1745,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
			case -ENOMEM:
				/*
				 * When memory is low, don't bother to try to migrate
				 * other folios, just exit.
				 * other folios, move unmapped folios, then exit.
				 */
				if (is_large) {
					nr_large_failed++;
@@ -1728,7 +1784,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
				/* nr_failed isn't updated for not used */
				nr_large_failed += large_retry;
				stats->nr_thp_failed += thp_retry;
				rc_saved = rc;
				if (list_empty(&unmap_folios))
					goto out;
				else
					goto move;
			case -EDEADLOCK:
				/*
				 * The folio cannot be locked for potential deadlock.
				 * Go move (and unlock) all locked folios.  Then we can
				 * try again.
				 */
				rc_saved = rc;
				goto move;
			case -EAGAIN:
				if (is_large) {
					large_retry++;
@@ -1742,6 +1810,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
				stats->nr_succeeded += nr_pages;
				stats->nr_thp_succeeded += is_thp;
				break;
			case MIGRATEPAGE_UNMAP:
				/*
				 * We have locked some folios, don't force lock
				 * to avoid deadlock.
				 */
				avoid_force_lock = true;
				list_move_tail(&folio->lru, &unmap_folios);
				list_add_tail(&dst->lru, &dst_folios);
				break;
			default:
				/*
				 * Permanent failure (-EBUSY, etc.):
@@ -1765,12 +1842,95 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
	nr_large_failed += large_retry;
	stats->nr_thp_failed += thp_retry;
	stats->nr_failed_pages += nr_retry_pages;
move:
	retry = 1;
	for (pass = 0;
	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
	     pass++) {
		retry = 0;
		large_retry = 0;
		thp_retry = 0;
		nr_retry_pages = 0;

		dst = list_first_entry(&dst_folios, struct folio, lru);
		dst2 = list_next_entry(dst, lru);
		list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
			is_large = folio_test_large(folio);
			is_thp = is_large && folio_test_pmd_mappable(folio);
			nr_pages = folio_nr_pages(folio);

			cond_resched();

			rc = migrate_folio_move(put_new_page, private,
						folio, dst, mode,
						reason, ret_folios);
			/*
			 * The rules are:
			 *	Success: folio will be freed
			 *	-EAGAIN: stay on the unmap_folios list
			 *	Other errno: put on ret_folios list
			 */
			switch(rc) {
			case -EAGAIN:
				if (is_large) {
					large_retry++;
					thp_retry += is_thp;
				} else if (!no_split_folio_counting) {
					retry++;
				}
				nr_retry_pages += nr_pages;
				break;
			case MIGRATEPAGE_SUCCESS:
				stats->nr_succeeded += nr_pages;
				stats->nr_thp_succeeded += is_thp;
				break;
			default:
				if (is_large) {
					nr_large_failed++;
					stats->nr_thp_failed += is_thp;
				} else if (!no_split_folio_counting) {
					nr_failed++;
				}

				stats->nr_failed_pages += nr_pages;
				break;
			}
			dst = dst2;
			dst2 = list_next_entry(dst, lru);
		}
	}
	nr_failed += retry;
	nr_large_failed += large_retry;
	stats->nr_thp_failed += thp_retry;
	stats->nr_failed_pages += nr_retry_pages;

	if (rc_saved)
		rc = rc_saved;
	else
		rc = nr_failed + nr_large_failed;
out:
	/* Cleanup remaining folios */
	dst = list_first_entry(&dst_folios, struct folio, lru);
	dst2 = list_next_entry(dst, lru);
	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
		int page_was_mapped = 0;
		struct anon_vma *anon_vma = NULL;

		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
				       ret_folios);
		list_del(&dst->lru);
		migrate_folio_undo_dst(dst, put_new_page, private);
		dst = dst2;
		dst2 = list_next_entry(dst, lru);
	}

	/*
	 * Try to migrate split folios of fail-to-migrate large folios, no
	 * nr_failed counting in this round, since all split folios of a
	 * large folio is counted as 1 failure in the first round.
	 */
	if (!list_empty(&split_folios)) {
	if (rc >= 0 && !list_empty(&split_folios)) {
		/*
		 * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
		 * retries) to ret_folios to avoid migrating them again.
@@ -1778,12 +1938,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
		list_splice_init(from, ret_folios);
		list_splice_init(&split_folios, from);
		no_split_folio_counting = true;
		retry = 1;
		goto split_folio_migration;
		goto retry;
	}

	rc = nr_failed + nr_large_failed;
out:
	/*
	 * We have unlocked all locked folios, so we can force lock now, let's
	 * try again.
	 */
	if (rc == -EDEADLOCK)
		goto retry;

	return rc;
}