Unverified Commit 45c4db5b authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!7997 mm: some misc bugfix

Merge Pull Request from: @ci-robot 
 
PR sync from: Kefeng Wang <wangkefeng.wang@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/DBOSC3I7HKUSQ3DPBCZCNNXFJNA3GCAR/ 
Most of bugfix about large folio, one about fault around and
one about vmalloc, two small code optimization.

Baolin Wang (1):
  mm: huge_memory: add the missing folio_test_pmd_mappable() for THP
    split statistics

Barry Song (1):
  mm: prohibit the last subpage from reusing the entire large folio

John Hubbard (1):
  mm/memory.c: do_numa_page(): remove a redundant page table read

Kefeng Wang (2):
  mm: memory: fix shift-out-of-bounds in fault_around_bytes_set
  Revert "mm: support multi-size THP numa balancing"

Matthew Wilcox (1):
  mm: simplify thp_vma_allowable_order

Uladzislau Rezki (Sony) (1):
  mm: vmalloc: bail out early in find_vmap_area() if vmap is not init

Zi Yan (1):
  mm/huge_memory: skip invalid debugfs new_order input for folio split


-- 
2.27.0
 
https://gitee.com/openeuler/kernel/issues/I9S4Z4 
 
Link:https://gitee.com/openeuler/kernel/pulls/7997

 

Reviewed-by: default avatarZhang Peng <zhangpeng362@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents bdcd87c8 2ea2bf4a
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -869,8 +869,8 @@ static int show_smap(struct seq_file *m, void *v)
	__show_smap(m, &mss, false);

	seq_printf(m, "THPeligible:    %8u\n",
		   !!thp_vma_allowable_orders(vma, vma->vm_flags, true, false,
					      true, THP_ORDERS_ALL));
		   !!thp_vma_allowable_orders(vma, vma->vm_flags,
			   TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL));

	if (arch_pkeys_enabled())
		seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
+15 −14
Original line number Diff line number Diff line
@@ -89,8 +89,12 @@ extern struct kobj_attribute shmem_enabled_attr;
 */
#define THP_ORDERS_ALL		(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE)

#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \
	(!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order)))
#define TVA_SMAPS		(1 << 0)	/* Will be used for procfs */
#define TVA_IN_PF		(1 << 1)	/* Page fault handler */
#define TVA_ENFORCE_SYSFS	(1 << 2)	/* Obey sysfs configuration */

#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define HPAGE_PMD_SHIFT PMD_SHIFT
@@ -216,17 +220,15 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
}

unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
					 unsigned long vm_flags, bool smaps,
					 bool in_pf, bool enforce_sysfs,
					 unsigned long vm_flags,
					 unsigned long tva_flags,
					 unsigned long orders);

/**
 * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
 * @vma:  the vm area to check
 * @vm_flags: use these vm_flags instead of vma->vm_flags
 * @smaps: whether answer will be used for smaps file
 * @in_pf: whether answer will be used by page fault handler
 * @enforce_sysfs: whether sysfs config should be taken into account
 * @tva_flags: Which TVA flags to honour
 * @orders: bitfield of all orders to consider
 *
 * Calculates the intersection of the requested hugepage orders and the allowed
@@ -239,12 +241,12 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 */
static inline
unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
				       unsigned long vm_flags, bool smaps,
				       bool in_pf, bool enforce_sysfs,
				       unsigned long vm_flags,
				       unsigned long tva_flags,
				       unsigned long orders)
{
	/* Optimization to check if required orders are enabled early. */
	if (enforce_sysfs && vma_is_anonymous(vma)) {
	if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
		unsigned long mask = READ_ONCE(huge_anon_orders_always);

		if (vm_flags & VM_HUGEPAGE)
@@ -258,8 +260,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
			return 0;
	}

	return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf,
					  enforce_sysfs, orders);
	return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
}

enum mthp_stat_item {
@@ -437,8 +438,8 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
}

static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
					unsigned long vm_flags, bool smaps,
					bool in_pf, bool enforce_sysfs,
					unsigned long vm_flags,
					unsigned long tva_flags,
					unsigned long orders)
{
	return 0;
+16 −4
Original line number Diff line number Diff line
@@ -77,10 +77,13 @@ unsigned long huge_anon_orders_inherit __read_mostly;
unsigned long huge_pcp_allow_orders __read_mostly;

unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
					 unsigned long vm_flags, bool smaps,
					 bool in_pf, bool enforce_sysfs,
					 unsigned long vm_flags,
					 unsigned long tva_flags,
					 unsigned long orders)
{
	bool smaps = tva_flags & TVA_SMAPS;
	bool in_pf = tva_flags & TVA_IN_PF;
	bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS;
	/* Check the intersection of requested and supported orders. */
	orders &= vma_is_anonymous(vma) ?
			THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE;
@@ -3155,6 +3158,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
	XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
	struct anon_vma *anon_vma = NULL;
	struct address_space *mapping = NULL;
	bool is_thp = folio_test_pmd_mappable(folio);
	int extra_pins, ret;
	pgoff_t end;
	bool is_hzp;
@@ -3333,6 +3337,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
		i_mmap_unlock_read(mapping);
out:
	xas_destroy(&xas);
	if (is_thp)
		count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
	return ret;
}
@@ -3395,6 +3400,7 @@ void deferred_split_folio(struct folio *folio)

	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
	if (list_empty(&folio->_deferred_list)) {
		if (folio_test_pmd_mappable(folio))
			count_vm_event(THP_DEFERRED_SPLIT_PAGE);
		list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
		ds_queue->split_queue_len++;
@@ -3604,6 +3610,9 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
		if (!is_transparent_hugepage(folio))
			goto next;

		if (new_order >= folio_order(folio))
			goto next;

		total++;
		/*
		 * For folios with private, split_huge_page_to_list_to_order()
@@ -3671,6 +3680,9 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
		total++;
		nr_pages = folio_nr_pages(folio);

		if (new_order >= folio_order(folio))
			goto next;

		if (!folio_trylock(folio))
			goto next;

+7 −9
Original line number Diff line number Diff line
@@ -459,7 +459,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
{
	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
	    hugepage_flags_enabled()) {
		if (thp_vma_allowable_order(vma, vm_flags, false, false, true,
		if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS,
					    PMD_ORDER))
			__khugepaged_enter(vma->vm_mm);
	}
@@ -925,6 +925,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
				   struct collapse_control *cc)
{
	struct vm_area_struct *vma;
	unsigned long tva_flags = cc->is_khugepaged ? TVA_ENFORCE_SYSFS : 0;

	if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
		return SCAN_ANY_PROCESS;
@@ -935,8 +936,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,

	if (!thp_vma_suitable_order(vma, address, PMD_ORDER))
		return SCAN_ADDRESS_RANGE;
	if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false,
				     cc->is_khugepaged, PMD_ORDER))
	if (!thp_vma_allowable_order(vma, vma->vm_flags, tva_flags, PMD_ORDER))
		return SCAN_VMA_CHECK;
	/*
	 * Anon VMA expected, the address may be unmapped then
@@ -1527,8 +1527,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
	 * and map it by a PMD, regardless of sysfs THP settings. As such, let's
	 * analogously elide sysfs THP settings here.
	 */
	if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false, false,
				     PMD_ORDER))
	if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER))
		return SCAN_VMA_CHECK;

	/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
@@ -2403,8 +2402,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
			progress++;
			break;
		}
		if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false,
					     true, PMD_ORDER)) {
		if (!thp_vma_allowable_order(vma, vma->vm_flags,
					TVA_ENFORCE_SYSFS, PMD_ORDER)) {
skip:
			progress++;
			continue;
@@ -2741,8 +2740,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,

	*prev = vma;

	if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false, false,
				     PMD_ORDER))
	if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER))
		return -EINVAL;

	if (task_in_dynamic_pool(current))
+36 −61
Original line number Diff line number Diff line
@@ -3532,6 +3532,16 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
static bool wp_can_reuse_anon_folio(struct folio *folio,
				    struct vm_area_struct *vma)
{
	/*
	 * We could currently only reuse a subpage of a large folio if no
	 * other subpages of the large folios are still mapped. However,
	 * let's just consistently not reuse subpages even if we could
	 * reuse in that scenario, and give back a large folio a bit
	 * sooner.
	 */
	if (folio_test_large(folio))
		return false;

	/*
	 * We have to verify under folio lock: these early checks are
	 * just an optimization to avoid locking the folio and freeing
@@ -4333,8 +4343,8 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
	 * for this vma. Then filter out the orders that can't be allocated over
	 * the faulting address and still be fully contained in the vma.
	 */
	orders = thp_vma_allowable_orders(vma, vma->vm_flags, false, true, true,
					  BIT(PMD_ORDER) - 1);
	orders = thp_vma_allowable_orders(vma, vma->vm_flags,
			TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1);
	orders = thp_vma_suitable_orders(vma, vmf->address, orders);

	if (!orders)
@@ -4807,7 +4817,8 @@ static int fault_around_bytes_set(void *data, u64 val)
	 * The minimum value is 1 page, however this results in no fault-around
	 * at all. See should_fault_around().
	 */
	fault_around_pages = max(rounddown_pow_of_two(val) >> PAGE_SHIFT, 1UL);
	val = max(val, PAGE_SIZE);
	fault_around_pages = rounddown_pow_of_two(val) >> PAGE_SHIFT;

	return 0;
}
@@ -5071,51 +5082,17 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
}

static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
					unsigned long fault_addr, pte_t *fault_pte,
					bool writable)
{
	pte_t pte, old_pte;

	old_pte = ptep_modify_prot_start(vma, fault_addr, fault_pte);
	old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
	pte = pte_modify(old_pte, vma->vm_page_prot);
	pte = pte_mkyoung(pte);
	if (writable)
		pte = pte_mkwrite(pte, vma);
	ptep_modify_prot_commit(vma, fault_addr, fault_pte, old_pte, pte);
	update_mmu_cache_range(vmf, vma, fault_addr, fault_pte, 1);
}

static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
				       struct folio *folio, pte_t fault_pte,
				       bool ignore_writable, bool pte_write_upgrade)
{
	int nr = pte_pfn(fault_pte) - folio_pfn(folio);
	unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start);
	unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end);
	pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE;
	unsigned long addr;

	/* Restore all PTEs' mapping of the large folio */
	for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) {
		pte_t ptent = ptep_get(start_ptep);
		bool writable = false;

		if (!pte_present(ptent) || !pte_protnone(ptent))
			continue;

		if (pfn_folio(pte_pfn(ptent)) != folio)
			continue;

		if (!ignore_writable) {
			ptent = pte_modify(ptent, vma->vm_page_prot);
			writable = pte_write(ptent);
			if (!writable && pte_write_upgrade &&
			    can_change_pte_writable(vma, addr, ptent))
				writable = true;
		}

		numa_rebuild_single_mapping(vmf, vma, addr, start_ptep, writable);
	}
	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
	update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
@@ -5123,26 +5100,25 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
	struct vm_area_struct *vma = vmf->vma;
	struct folio *folio = NULL;
	int nid = NUMA_NO_NODE;
	bool writable = false, ignore_writable = false;
	bool pte_write_upgrade = vma_wants_manual_pte_write_upgrade(vma);
	bool writable = false;
	int last_cpupid;
	int target_nid;
	pte_t pte, old_pte;
	int flags = 0, nr_pages;
	int flags = 0;

	/*
	 * The "pte" at this point cannot be used safely without
	 * validation through pte_unmap_same(). It's of NUMA type but
	 * the pfn may be screwed if the read is non atomic.
	 * The pte cannot be used safely until we verify, while holding the page
	 * table lock, that its contents have not changed during fault handling.
	 */
	spin_lock(vmf->ptl);
	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
	/* Read the live PTE from the page tables: */
	old_pte = ptep_get(vmf->pte);

	if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
		pte_unmap_unlock(vmf->pte, vmf->ptl);
		goto out;
	}

	/* Get the normal PTE  */
	old_pte = ptep_get(vmf->pte);
	pte = pte_modify(old_pte, vma->vm_page_prot);

	/*
@@ -5150,7 +5126,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
	 * is only valid while holding the PT lock.
	 */
	writable = pte_write(pte);
	if (!writable && pte_write_upgrade &&
	if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
	    can_change_pte_writable(vma, vmf->address, pte))
		writable = true;

@@ -5158,6 +5134,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
	if (!folio || folio_is_zone_device(folio))
		goto out_map;

	/* TODO: handle PTE-mapped THP */
	if (folio_test_large(folio))
		goto out_map;

	/*
	 * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
	 * much anyway since they can be in shared cache state. This misses
@@ -5177,7 +5157,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
		flags |= TNF_SHARED;

	nid = folio_nid(folio);
	nr_pages = folio_nr_pages(folio);
	/*
	 * For memory tiering mode, cpupid of slow memory page is used
	 * to record page access time.  So use default value.
@@ -5194,7 +5173,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
	}
	pte_unmap_unlock(vmf->pte, vmf->ptl);
	writable = false;
	ignore_writable = true;

	/* Migrate to the requested node */
	if (migrate_misplaced_folio(folio, vma, target_nid)) {
@@ -5215,19 +5193,14 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)

out:
	if (nid != NUMA_NO_NODE)
		task_numa_fault(last_cpupid, nid, nr_pages, flags);
		task_numa_fault(last_cpupid, nid, 1, flags);
	return 0;
out_map:
	/*
	 * Make it present again, depending on how arch implements
	 * non-accessible ptes, some can allow access by kernel mode.
	 */
	if (folio && folio_test_large(folio))
		numa_rebuild_large_mapping(vmf, vma, folio, pte, ignore_writable,
					   pte_write_upgrade);
	else
		numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
					    writable);
	numa_rebuild_single_mapping(vmf, vma, writable);
	pte_unmap_unlock(vmf->pte, vmf->ptl);
	goto out;
}
@@ -5434,7 +5407,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
		return VM_FAULT_OOM;
retry_pud:
	if (pud_none(*vmf.pud) &&
	    thp_vma_allowable_order(vma, vm_flags, false, true, true, PUD_ORDER) &&
	    thp_vma_allowable_order(vma, vm_flags,
				TVA_IN_PF | TVA_ENFORCE_SYSFS, PUD_ORDER) &&
	    !task_in_dynamic_pool(current)) {
		ret = create_huge_pud(&vmf);
		if (!(ret & VM_FAULT_FALLBACK))
@@ -5469,7 +5443,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
		goto retry_pud;

	if (pmd_none(*vmf.pmd) &&
	    thp_vma_allowable_order(vma, vm_flags, false, true, true, PMD_ORDER) &&
	    thp_vma_allowable_order(vma, vm_flags,
				TVA_IN_PF | TVA_ENFORCE_SYSFS, PMD_ORDER) &&
	    !task_in_dynamic_pool(current)) {
		ret = create_huge_pmd(&vmf);
		if (!(ret & VM_FAULT_FALLBACK))
Loading