Unverified Commit 2d99a8d0 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!10336 v2 mm: numa-affinity: support THP migration

Merge Pull Request from: @ci-robot 
 
PR sync from: Nanyong Sun <sunnanyong@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/PJP5PVLPBJ7VF6J2ICRV5NJZVUBC2WXT/ 
1. Backport the patch series:
        mm: thp: use generic THP migration for NUMA hinting fault
2. Support THP migration in numa-affinity
3. Some minor optimizations

change from v1:
backport one more "bugfix" patch(the last one) to avoid CI failed.

Aneesh Kumar K.V (1):
  mm/migrate: fix NR_ISOLATED corruption on 64-bit

Huang Ying (1):
  mm,do_huge_pmd_numa_page: remove unnecessary TLB flushing code

Nanyong Sun (3):
  mm: fix KABI broken in struct vm_fault
  mm: numa-affinity: support THP migration
  mm: numa-affinity: delete the duplicate numa_migrate_prep

Yang Shi (7):
  mm: memory: add orig_pmd to struct vm_fault
  mm: memory: make numa_migrate_prep() non-static
  mm: thp: refactor NUMA fault handling
  mm: migrate: account THP NUMA migration counters correctly
  mm: migrate: don't split THP for misplaced NUMA page
  mm: migrate: check mapcount for THP instead of refcount
  mm: thp: skip make PMD PROT_NONE if THP migration is not supported

Ze Zuo (1):
  mm: numa-affinity: backport some migrate policy from AutoNuma


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/IAFONL 
 
Link:https://gitee.com/openeuler/kernel/pulls/10336

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parents 3d877061 b9a88427
Loading
Loading
Loading
Loading
+4 −5
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
		  struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
void huge_pmd_set_accessed(struct vm_fault *vmf);
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
		  pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
		  struct vm_area_struct *vma);
@@ -24,7 +24,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
}
#endif

vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
				   unsigned long addr, pmd_t *pmd,
				   unsigned int flags);
@@ -291,7 +291,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
		pud_t *pud, int flags, struct dev_pagemap **pgmap);

vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);

extern struct page *huge_zero_page;
extern unsigned long huge_zero_pfn;
@@ -444,8 +444,7 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
	return NULL;
}

static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
		pmd_t orig_pmd)
static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
	return 0;
}
+0 −23
Original line number Diff line number Diff line
@@ -105,14 +105,9 @@ static inline void __ClearPageMovable(struct page *page)
#endif

#ifdef CONFIG_NUMA_BALANCING
extern bool pmd_trans_migrating(pmd_t pmd);
extern int migrate_misplaced_page(struct page *page,
				  struct vm_area_struct *vma, int node);
#else
static inline bool pmd_trans_migrating(pmd_t pmd)
{
	return false;
}
static inline int migrate_misplaced_page(struct page *page,
					 struct vm_area_struct *vma, int node)
{
@@ -120,24 +115,6 @@ static inline int migrate_misplaced_page(struct page *page,
}
#endif /* CONFIG_NUMA_BALANCING */

#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
			struct vm_area_struct *vma,
			pmd_t *pmd, pmd_t entry,
			unsigned long address,
			struct page *page, int node);
#else
static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
			struct vm_area_struct *vma,
			pmd_t *pmd, pmd_t entry,
			unsigned long address,
			struct page *page, int node)
{
	return -EAGAIN;
}
#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/


#ifdef CONFIG_MIGRATION

/*
+7 −1
Original line number Diff line number Diff line
@@ -579,7 +579,13 @@ struct vm_fault {
	pud_t *pud;			/* Pointer to pud entry matching
					 * the 'address'
					 */
	KABI_REPLACE(pte_t orig_pte,
	union {
		pte_t orig_pte;		/* Value of PTE at the time of fault */
		pmd_t orig_pmd;		/* Value of PMD at the time of fault,
					 * used by PMD fault only.
					 */
	})

	struct page *cow_page;		/* Page handler may use for COW fault */
	struct page *page;		/* ->fault handlers should return a
+45 −122
Original line number Diff line number Diff line
@@ -1262,11 +1262,12 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
void huge_pmd_set_accessed(struct vm_fault *vmf)
{
	pmd_t entry;
	unsigned long haddr;
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	pmd_t orig_pmd = vmf->orig_pmd;

	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
@@ -1283,11 +1284,12 @@ void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
	spin_unlock(vmf->ptl);
}

vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct page *page;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	pmd_t orig_pmd = vmf->orig_pmd;

	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
	VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1423,160 +1425,77 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
}

/* NUMA hinting page fault entry point for trans huge pmds */
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct anon_vma *anon_vma = NULL;
	pmd_t oldpmd = vmf->orig_pmd;
	pmd_t pmd;
	struct page *page;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	int page_nid = NUMA_NO_NODE, this_nid = numa_node_id();
	int page_nid = NUMA_NO_NODE;
	int target_nid, last_cpupid = -1;
	bool page_locked;
	bool migrated = false;
	bool was_writable;
	bool was_writable = pmd_savedwrite(oldpmd);
	int flags = 0;

	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
	if (unlikely(!pmd_same(pmd, *vmf->pmd)))
		goto out_unlock;

	/*
	 * If there are potential migrations, wait for completion and retry
	 * without disrupting NUMA hinting information. Do not relock and
	 * check_same as the page may no longer be mapped.
	 */
	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
		page = pmd_page(*vmf->pmd);
		if (!get_page_unless_zero(page))
			goto out_unlock;
	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
		spin_unlock(vmf->ptl);
		put_and_wait_on_page_locked(page);
		goto out;
	}

	page = pmd_page(pmd);
	BUG_ON(is_huge_zero_page(page));
	page_nid = page_to_nid(page);
	last_cpupid = page_cpupid_last(page);
	count_vm_numa_event(NUMA_HINT_FAULTS);
	if (page_nid == this_nid) {
		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
		flags |= TNF_FAULT_LOCAL;
	}
	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
	page = vm_normal_page_pmd(vma, haddr, pmd);
	if (!page)
		goto out_map;

	/* See similar comment in do_numa_page for explanation */
	if (!pmd_savedwrite(pmd))
	if (!was_writable)
		flags |= TNF_NO_GROUP;

	/*
	 * Acquire the page lock to serialise THP migrations but avoid dropping
	 * page_table_lock if at all possible
	 */
	page_locked = trylock_page(page);
	target_nid = mpol_misplaced(page, vma, haddr);
	if (target_nid == NUMA_NO_NODE) {
		/* If the page was locked, there are no parallel migrations */
		if (page_locked)
			goto clear_pmdnuma;
	}

	/* Migration could have started since the pmd_trans_migrating check */
	if (!page_locked) {
		page_nid = NUMA_NO_NODE;
		if (!get_page_unless_zero(page))
			goto out_unlock;
		spin_unlock(vmf->ptl);
		put_and_wait_on_page_locked(page);
		goto out;
	}

	/*
	 * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
	 * to serialises splits
	 */
	get_page(page);
	spin_unlock(vmf->ptl);
	anon_vma = page_lock_anon_vma_read(page);

	/* Confirm the PMD did not change while page_table_lock was released */
	spin_lock(vmf->ptl);
	if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
		unlock_page(page);
		put_page(page);
		page_nid = NUMA_NO_NODE;
		goto out_unlock;
	}
	page_nid = page_to_nid(page);
	last_cpupid = page_cpupid_last(page);
	target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
				       &flags);

	/* Bail if we fail to protect against THP splits for any reason */
	if (unlikely(!anon_vma)) {
	if (target_nid == NUMA_NO_NODE) {
		put_page(page);
		page_nid = NUMA_NO_NODE;
		goto clear_pmdnuma;
		goto out_map;
	}

	/*
	 * Since we took the NUMA fault, we must have observed the !accessible
	 * bit. Make sure all other CPUs agree with that, to avoid them
	 * modifying the page we're about to migrate.
	 *
	 * Must be done under PTL such that we'll observe the relevant
	 * inc_tlb_flush_pending().
	 *
	 * We are not sure a pending tlb flush here is for a huge page
	 * mapping or not. Hence use the tlb range variant
	 */
	if (mm_tlb_flush_pending(vma->vm_mm)) {
		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
		/*
		 * change_huge_pmd() released the pmd lock before
		 * invalidating the secondary MMUs sharing the primary
		 * MMU pagetables (with ->invalidate_range()). The
		 * mmu_notifier_invalidate_range_end() (which
		 * internally calls ->invalidate_range()) in
		 * change_pmd_range() will run after us, so we can't
		 * rely on it here and we need an explicit invalidate.
		 */
		mmu_notifier_invalidate_range(vma->vm_mm, haddr,
					      haddr + HPAGE_PMD_SIZE);
	}

	/*
	 * Migrate the THP to the requested node, returns with page unlocked
	 * and access rights restored.
	 */
	spin_unlock(vmf->ptl);

	migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
				vmf->pmd, pmd, vmf->address, page, target_nid);
	migrated = migrate_misplaced_page(page, vma, target_nid);
	if (migrated) {
		flags |= TNF_MIGRATED;
		page_nid = target_nid;
	} else
	} else {
		flags |= TNF_MIGRATE_FAIL;

	goto out;
clear_pmdnuma:
	BUG_ON(!PageLocked(page));
	was_writable = pmd_savedwrite(pmd);
	pmd = pmd_modify(pmd, vma->vm_page_prot);
	pmd = pmd_mkyoung(pmd);
	if (was_writable)
		pmd = pmd_mkwrite(pmd);
	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
	unlock_page(page);
out_unlock:
		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
		if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
			spin_unlock(vmf->ptl);
			goto out;
		}
		goto out_map;
	}

out:
	if (anon_vma)
		page_unlock_anon_vma_read(anon_vma);

	if (page_nid != NUMA_NO_NODE)
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
				flags);

	return 0;

out_map:
	/* Restore the PMD */
	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
	pmd = pmd_mkyoung(pmd);
	if (was_writable)
		pmd = pmd_mkwrite(pmd);
	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
	spin_unlock(vmf->ptl);
	goto out;
}

/*
@@ -1806,6 +1725,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 * Returns
 *  - 0 if PMD could not be locked
 *  - 1 if PMD was locked but protections unchange and TLB flush unnecessary
 *      or if prot_numa but THP migration is not supported
 *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
 */
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
@@ -1820,6 +1740,9 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;

	if (prot_numa && !thp_migration_supported())
		return 1;

	ptl = __pmd_trans_huge_lock(pmd, vma);
	if (!ptl)
		return 0;
+3 −18
Original line number Diff line number Diff line
@@ -384,23 +384,6 @@ extern unsigned int munlock_vma_page(struct page *page);
 */
extern void clear_page_mlock(struct page *page);

/*
 * mlock_migrate_page - called only from migrate_misplaced_transhuge_page()
 * (because that does not go through the full procedure of migration ptes):
 * to migrate the Mlocked page flag; update statistics.
 */
static inline void mlock_migrate_page(struct page *newpage, struct page *page)
{
	if (TestClearPageMlocked(page)) {
		int nr_pages = thp_nr_pages(page);

		/* Holding pmd lock, no change in irq context: __mod is safe */
		__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
		SetPageMlocked(newpage);
		__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
	}
}

extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
@@ -476,7 +459,6 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
#else /* !CONFIG_MMU */
static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page) { }
static inline void mlock_migrate_page(struct page *new, struct page *old) { }

#endif /* !CONFIG_MMU */

@@ -673,4 +655,7 @@ struct migration_target_control {

DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);

int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
		      unsigned long addr, int page_nid, int *flags);

#endif	/* __MM_INTERNAL_H */
Loading