Commit 6b76c3ae authored by Will Deacon's avatar Will Deacon
Browse files

Merge branch 'for-next/faultaround' into for-next/core

Initialise prefaulted PTEs as 'old' for arm64 when hardware access-flag
updates are supported, which drastically improves vmscan performance.

* for-next/faultaround:
  mm: filemap: Fix microblaze build failure with 'mmu_defconfig'
  mm/nommu: Fix return type of filemap_map_pages()
  mm: Mark anonymous struct field of 'struct vm_fault' as 'const'
  mm: Use static initialisers for immutable fields of 'struct vm_fault'
  mm: Avoid modifying vmf.address in __collapse_huge_page_swapin()
  mm: Pass 'address' to map to do_set_pte() and drop FAULT_FLAG_PREFAULT
  mm: Move immutable fields of 'struct vm_fault' into anonymous struct
  arm64: mm: Implement arch_wants_old_prefaulted_pte()
  mm: Allow architectures to request 'old' entries when prefaulting
  mm: Cleanup faultaround and finish_fault() codepaths
parents 90eb8c9d de591a82
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -982,6 +982,16 @@ static inline bool arch_faults_on_old_pte(void)
}
#define arch_faults_on_old_pte		arch_faults_on_old_pte

/*
 * Experimentally, it's cheap to set the access flag in hardware and we
 * benefit from prefaulting mappings as 'old' to start with.
 */
static inline bool arch_wants_old_prefaulted_pte(void)
{
	return !arch_faults_on_old_pte();
}
#define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte

#endif /* !__ASSEMBLY__ */

#endif /* __ASM_PGTABLE_H */
+4 −2
Original line number Diff line number Diff line
@@ -1319,17 +1319,19 @@ xfs_filemap_pfn_mkwrite(
	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}

static void
static vm_fault_t
xfs_filemap_map_pages(
	struct vm_fault		*vmf,
	pgoff_t			start_pgoff,
	pgoff_t			end_pgoff)
{
	struct inode		*inode = file_inode(vmf->vma->vm_file);
	vm_fault_t ret;

	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
	filemap_map_pages(vmf, start_pgoff, end_pgoff);
	ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
	return ret;
}

static const struct vm_operations_struct xfs_file_vm_ops = {
+15 −10
Original line number Diff line number Diff line
@@ -514,11 +514,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
 * pgoff should be used in favour of virtual_address, if possible.
 */
struct vm_fault {
	const struct {
		struct vm_area_struct *vma;	/* Target VMA */
	unsigned int flags;		/* FAULT_FLAG_xxx flags */
		gfp_t gfp_mask;			/* gfp mask to be used for allocations */
		pgoff_t pgoff;			/* Logical page offset based on vma */
		unsigned long address;		/* Faulting virtual address */
	};
	unsigned int flags;		/* FAULT_FLAG_xxx flags
					 * XXX: should really be 'const' */
	pmd_t *pmd;			/* Pointer to pmd entry matching
					 * the 'address' */
	pud_t *pud;			/* Pointer to pud entry matching
@@ -542,8 +545,8 @@ struct vm_fault {
					 * is not NULL, otherwise pmd.
					 */
	pgtable_t prealloc_pte;		/* Pre-allocated pte page table.
					 * vm_ops->map_pages() calls
					 * alloc_set_pte() from atomic context.
					 * vm_ops->map_pages() sets up a page
					 * table from atomic context.
					 * do_fault_around() pre-allocates
					 * page table to avoid allocation from
					 * atomic context.
@@ -578,7 +581,7 @@ struct vm_operations_struct {
	vm_fault_t (*fault)(struct vm_fault *vmf);
	vm_fault_t (*huge_fault)(struct vm_fault *vmf,
			enum page_entry_size pe_size);
	void (*map_pages)(struct vm_fault *vmf,
	vm_fault_t (*map_pages)(struct vm_fault *vmf,
			pgoff_t start_pgoff, pgoff_t end_pgoff);
	unsigned long (*pagesize)(struct vm_area_struct * area);

@@ -988,7 +991,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
	return pte;
}

vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);

vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
@@ -2622,7 +2627,7 @@ extern void truncate_inode_pages_final(struct address_space *);

/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern void filemap_map_pages(struct vm_fault *vmf,
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
		pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);

+11 −0
Original line number Diff line number Diff line
@@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}

/*
 * the ordering of these checks is important for pmds with _page_devmap set.
 * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
 * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
 * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
 */
static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
{
	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}

#ifndef CONFIG_NUMA_BALANCING
/*
 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
+135 −44
Original line number Diff line number Diff line
@@ -42,6 +42,8 @@
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"

#define CREATE_TRACE_POINTS
@@ -2911,74 +2913,163 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);

void filemap_map_pages(struct vm_fault *vmf,
static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
{
	struct mm_struct *mm = vmf->vma->vm_mm;

	/* Huge page is mapped? No need to proceed. */
	if (pmd_trans_huge(*vmf->pmd)) {
		unlock_page(page);
		put_page(page);
		return true;
	}

	if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
	    vm_fault_t ret = do_set_pmd(vmf, page);
	    if (!ret) {
		    /* The page is mapped successfully, reference consumed. */
		    unlock_page(page);
		    return true;
	    }
	}

	if (pmd_none(*vmf->pmd)) {
		vmf->ptl = pmd_lock(mm, vmf->pmd);
		if (likely(pmd_none(*vmf->pmd))) {
			mm_inc_nr_ptes(mm);
			pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
			vmf->prealloc_pte = NULL;
		}
		spin_unlock(vmf->ptl);
	}

	/* See comment in handle_pte_fault() */
	if (pmd_devmap_trans_unstable(vmf->pmd)) {
		unlock_page(page);
		put_page(page);
		return true;
	}

	return false;
}

static struct page *next_uptodate_page(struct page *page,
				       struct address_space *mapping,
				       struct xa_state *xas, pgoff_t end_pgoff)
{
	unsigned long max_idx;

	do {
		if (!page)
			return NULL;
		if (xas_retry(xas, page))
			continue;
		if (xa_is_value(page))
			continue;
		if (PageLocked(page))
			continue;
		if (!page_cache_get_speculative(page))
			continue;
		/* Has the page moved or been split? */
		if (unlikely(page != xas_reload(xas)))
			goto skip;
		if (!PageUptodate(page) || PageReadahead(page))
			goto skip;
		if (PageHWPoison(page))
			goto skip;
		if (!trylock_page(page))
			goto skip;
		if (page->mapping != mapping)
			goto unlock;
		if (!PageUptodate(page))
			goto unlock;
		max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
		if (xas->xa_index >= max_idx)
			goto unlock;
		return page;
unlock:
		unlock_page(page);
skip:
		put_page(page);
	} while ((page = xas_next_entry(xas, end_pgoff)) != NULL);

	return NULL;
}

static inline struct page *first_map_page(struct address_space *mapping,
					  struct xa_state *xas,
					  pgoff_t end_pgoff)
{
	return next_uptodate_page(xas_find(xas, end_pgoff),
				  mapping, xas, end_pgoff);
}

static inline struct page *next_map_page(struct address_space *mapping,
					 struct xa_state *xas,
					 pgoff_t end_pgoff)
{
	return next_uptodate_page(xas_next_entry(xas, end_pgoff),
				  mapping, xas, end_pgoff);
}

vm_fault_t filemap_map_pages(struct vm_fault *vmf,
			     pgoff_t start_pgoff, pgoff_t end_pgoff)
{
	struct file *file = vmf->vma->vm_file;
	struct vm_area_struct *vma = vmf->vma;
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	pgoff_t last_pgoff = start_pgoff;
	unsigned long max_idx;
	unsigned long addr;
	XA_STATE(xas, &mapping->i_pages, start_pgoff);
	struct page *head, *page;
	unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
	vm_fault_t ret = 0;

	rcu_read_lock();
	xas_for_each(&xas, head, end_pgoff) {
		if (xas_retry(&xas, head))
			continue;
		if (xa_is_value(head))
			goto next;
	head = first_map_page(mapping, &xas, end_pgoff);
	if (!head)
		goto out;

		/*
		 * Check for a locked page first, as a speculative
		 * reference may adversely influence page migration.
		 */
		if (PageLocked(head))
			goto next;
		if (!page_cache_get_speculative(head))
			goto next;
	if (filemap_map_pmd(vmf, head)) {
		ret = VM_FAULT_NOPAGE;
		goto out;
	}

		/* Has the page moved or been split? */
		if (unlikely(head != xas_reload(&xas)))
			goto skip;
	addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
	do {
		page = find_subpage(head, xas.xa_index);

		if (!PageUptodate(head) ||
				PageReadahead(page) ||
				PageHWPoison(page))
			goto skip;
		if (!trylock_page(head))
			goto skip;

		if (head->mapping != mapping || !PageUptodate(head))
			goto unlock;

		max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
		if (xas.xa_index >= max_idx)
		if (PageHWPoison(page))
			goto unlock;

		if (mmap_miss > 0)
			mmap_miss--;

		vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
		if (vmf->pte)
		addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
		vmf->pte += xas.xa_index - last_pgoff;
		last_pgoff = xas.xa_index;
		if (alloc_set_pte(vmf, page))

		if (!pte_none(*vmf->pte))
			goto unlock;

		/* We're about to handle the fault */
		if (vmf->address == addr)
			ret = VM_FAULT_NOPAGE;

		do_set_pte(vmf, page, addr);
		/* no need to invalidate: a not-present page won't be cached */
		update_mmu_cache(vma, addr, vmf->pte);
		unlock_page(head);
		goto next;
		continue;
unlock:
		unlock_page(head);
skip:
		put_page(head);
next:
		/* Huge page is mapped? No need to proceed. */
		if (pmd_trans_huge(*vmf->pmd))
			break;
	}
	} while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
	pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
	rcu_read_unlock();
	WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
	return ret;
}
EXPORT_SYMBOL(filemap_map_pages);

Loading