Commit 4e0cf05f authored by Johannes Weiner's avatar Johannes Weiner Committed by Andrew Morton
Browse files

mm: memcontrol: skip moving non-present pages that are mapped elsewhere

Patch series "mm: push down lock_page_memcg()", v2.


This patch (of 3):

During charge moving, the pte lock and the page lock cover nearly all
cases of stabilizing page_mapped().  The only exception is when we're
looking at a non-present pte and find a page in the page cache or in the
swapcache: if the page is mapped elsewhere, it can become unmapped outside
of our control.  For this reason, rmap needs lock_page_memcg().

We don't like cgroup-specific locks in generic MM code - especially in
performance-critical MM code - and for a legacy feature that's unlikely to
have many users left - if any.

So remove the exception.  Arguably that's better semantics anyway: the
page is shared, and another process seems to be the more active user.

Once we stop moving such pages, rmap doesn't need lock_page_memcg()
anymore.  The next patch will remove it.

Link: https://lkml.kernel.org/r/20221206171340.139790-1-hannes@cmpxchg.org
Link: https://lkml.kernel.org/r/20221206171340.139790-2-hannes@cmpxchg.org


Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Suggested-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent c5094ec7
Loading
Loading
Loading
Loading
+38 −14
Original line number Diff line number Diff line
@@ -5692,7 +5692,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 * @from: mem_cgroup which the page is moved from.
 * @to:	mem_cgroup which the page is moved to. @from != @to.
 *
 * The caller must make sure the page is not on LRU (isolate_page() is useful.)
 * The page must be locked and not on the LRU.
 *
 * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
 * from old cgroup.
@@ -5709,20 +5709,13 @@ static int mem_cgroup_move_account(struct page *page,
	int nid, ret;

	VM_BUG_ON(from == to);
	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
	VM_BUG_ON(compound && !folio_test_large(folio));

	/*
	 * Prevent mem_cgroup_migrate() from looking at
	 * page's memory cgroup of its source page while we change it.
	 */
	ret = -EBUSY;
	if (!folio_trylock(folio))
		goto out;

	ret = -EINVAL;
	if (folio_memcg(folio) != from)
		goto out_unlock;
		goto out;

	pgdat = folio_pgdat(folio);
	from_vec = mem_cgroup_lruvec(from, pgdat);
@@ -5809,8 +5802,6 @@ static int mem_cgroup_move_account(struct page *page,
	mem_cgroup_charge_statistics(from, -nr_pages);
	memcg_check_events(from, nid);
	local_irq_enable();
out_unlock:
	folio_unlock(folio);
out:
	return ret;
}
@@ -5859,6 +5850,29 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
	else if (is_swap_pte(ptent))
		page = mc_handle_swap_pte(vma, ptent, &ent);

	if (target && page) {
		if (!trylock_page(page)) {
			put_page(page);
			return ret;
		}
		/*
		 * page_mapped() must be stable during the move. This
		 * pte is locked, so if it's present, the page cannot
		 * become unmapped. If it isn't, we have only partial
		 * control over the mapped state: the page lock will
		 * prevent new faults against pagecache and swapcache,
		 * so an unmapped page cannot become mapped. However,
		 * if the page is already mapped elsewhere, it can
		 * unmap, and there is nothing we can do about it.
		 * Alas, skip moving the page in this case.
		 */
		if (!pte_present(ptent) && page_mapped(page)) {
			unlock_page(page);
			put_page(page);
			return ret;
		}
	}

	if (!page && !ent.val)
		return ret;
	if (page) {
@@ -5875,9 +5889,12 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
			if (target)
				target->page = page;
		}
		if (!ret || !target)
		if (!ret || !target) {
			if (target)
				unlock_page(page);
			put_page(page);
		}
	}
	/*
	 * There is a swap entry and a page doesn't exist or isn't charged.
	 * But we cannot move a tail-page in a THP.
@@ -5916,6 +5933,10 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
		ret = MC_TARGET_PAGE;
		if (target) {
			get_page(page);
			if (!trylock_page(page)) {
				put_page(page);
				return MC_TARGET_NONE;
			}
			target->page = page;
		}
	}
@@ -6154,6 +6175,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
				}
				putback_lru_page(page);
			}
			unlock_page(page);
			put_page(page);
		} else if (target_type == MC_TARGET_DEVICE) {
			page = target.page;
@@ -6162,6 +6184,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
				mc.precharge -= HPAGE_PMD_NR;
				mc.moved_charge += HPAGE_PMD_NR;
			}
			unlock_page(page);
			put_page(page);
		}
		spin_unlock(ptl);
@@ -6204,7 +6227,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
			}
			if (!device)
				putback_lru_page(page);
put:			/* get_mctgt_type() gets the page */
put:			/* get_mctgt_type() gets & locks the page */
			unlock_page(page);
			put_page(page);
			break;
		case MC_TARGET_SWAP: