Commit 27af67f3 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Andrew Morton
Browse files

powerpc/book3s64/mm: enable transparent pud hugepage

This is enabled only with radix translation and 1G hugepage size.  This
will be used with devdax device memory with a namespace alignment of 1G.

Anon transparent hugepage is not supported even though we do have helpers
checking pud_trans_huge().  We should never find that return true.  The
only expected pte bit combination is _PAGE_PTE | _PAGE_DEVMAP.

Some of the helpers are never expected to get called on hash translation
and hence is marked to call BUG() in such a case.

Link: https://lkml.kernel.org/r/20230724190759.483013-10-aneesh.kumar@linux.ibm.com


Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 104c49d5
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -138,7 +138,16 @@ static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
}

#define	hash__pmd_bad(pmd)		(pmd_val(pmd) & H_PMD_BAD_BITS)

/*
 * pud comparison that will work with both pte and page table pointer.
 */
static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
{
	return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
}
#define	hash__pud_bad(pud)		(pud_val(pud) & H_PUD_BAD_BITS)

static inline int hash__p4d_bad(p4d_t p4d)
{
	return (p4d_val(p4d) == 0);
+144 −11
Original line number Diff line number Diff line
@@ -921,8 +921,29 @@ static inline pud_t pte_pud(pte_t pte)
{
	return __pud_raw(pte_raw(pte));
}

static inline pte_t *pudp_ptep(pud_t *pud)
{
	return (pte_t *)pud;
}

#define pud_pfn(pud)		pte_pfn(pud_pte(pud))
#define pud_dirty(pud)		pte_dirty(pud_pte(pud))
#define pud_young(pud)		pte_young(pud_pte(pud))
#define pud_mkold(pud)		pte_pud(pte_mkold(pud_pte(pud)))
#define pud_wrprotect(pud)	pte_pud(pte_wrprotect(pud_pte(pud)))
#define pud_mkdirty(pud)	pte_pud(pte_mkdirty(pud_pte(pud)))
#define pud_mkclean(pud)	pte_pud(pte_mkclean(pud_pte(pud)))
#define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
#define pud_mkwrite(pud)	pte_pud(pte_mkwrite(pud_pte(pud)))
#define pud_write(pud)		pte_write(pud_pte(pud))

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define pud_soft_dirty(pmd)    pte_soft_dirty(pud_pte(pud))
#define pud_mksoft_dirty(pmd)  pte_pud(pte_mksoft_dirty(pud_pte(pud)))
#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

static inline int pud_bad(pud_t pud)
{
	if (radix_enabled())
@@ -1115,15 +1136,24 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
		       pmd_t *pmdp, pmd_t pmd);
extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
		       pud_t *pudp, pud_t pud);

static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
					unsigned long addr, pmd_t *pmd)
{
}

static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
					unsigned long addr, pud_t *pud)
{
}

extern int hash__has_transparent_hugepage(void);
static inline int has_transparent_hugepage(void)
{
@@ -1133,6 +1163,14 @@ static inline int has_transparent_hugepage(void)
}
#define has_transparent_hugepage has_transparent_hugepage

static inline int has_transparent_pud_hugepage(void)
{
	if (radix_enabled())
		return radix__has_transparent_pud_hugepage();
	return 0;
}
#define has_transparent_pud_hugepage has_transparent_pud_hugepage

static inline unsigned long
pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
		    unsigned long clr, unsigned long set)
@@ -1142,6 +1180,16 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
	return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
}

static inline unsigned long
pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
		    unsigned long clr, unsigned long set)
{
	if (radix_enabled())
		return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
	BUG();
	return pud_val(*pudp);
}

/*
 * returns true for pmd migration entries, THP, devmap, hugetlb
 * But compile time dependent on THP config
@@ -1151,6 +1199,11 @@ static inline int pmd_large(pmd_t pmd)
	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
}

static inline int pud_large(pud_t pud)
{
	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
}

/*
 * For radix we should always find H_PAGE_HASHPTE zero. Hence
 * the below will work for radix too
@@ -1166,6 +1219,17 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
	return ((old & _PAGE_ACCESSED) != 0);
}

static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
					      unsigned long addr, pud_t *pudp)
{
	unsigned long old;

	if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
		return 0;
	old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
	return ((old & _PAGE_ACCESSED) != 0);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
				      pmd_t *pmdp)
@@ -1174,6 +1238,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
}

#define __HAVE_ARCH_PUDP_SET_WRPROTECT
static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
				      pud_t *pudp)
{
	if (pud_write(*pudp))
		pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
}

/*
 * Only returns true for a THP. False for pmd migration entry.
 * We also need to return true when we come across a pte that
@@ -1195,6 +1267,17 @@ static inline int pmd_trans_huge(pmd_t pmd)
	return hash__pmd_trans_huge(pmd);
}

static inline int pud_trans_huge(pud_t pud)
{
	if (!pud_present(pud))
		return false;

	if (radix_enabled())
		return radix__pud_trans_huge(pud);
	return 0;
}


#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
@@ -1203,6 +1286,15 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
	return hash__pmd_same(pmd_a, pmd_b);
}

#define pud_same pud_same
static inline int pud_same(pud_t pud_a, pud_t pud_b)
{
	if (radix_enabled())
		return radix__pud_same(pud_a, pud_b);
	return hash__pud_same(pud_a, pud_b);
}


static inline pmd_t __pmd_mkhuge(pmd_t pmd)
{
	if (radix_enabled())
@@ -1210,6 +1302,14 @@ static inline pmd_t __pmd_mkhuge(pmd_t pmd)
	return hash__pmd_mkhuge(pmd);
}

static inline pud_t __pud_mkhuge(pud_t pud)
{
	if (radix_enabled())
		return radix__pud_mkhuge(pud);
	BUG();
	return pud;
}

/*
 * pfn_pmd return a pmd_t that can be used as pmd pte entry.
 */
@@ -1225,14 +1325,34 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
	return pmd;
}

static inline pud_t pud_mkhuge(pud_t pud)
{
#ifdef CONFIG_DEBUG_VM
	if (radix_enabled())
		WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
	else
		WARN_ON(1);
#endif
	return pud;
}


#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
				 unsigned long address, pmd_t *pmdp,
				 pmd_t entry, int dirty);
#define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
extern int pudp_set_access_flags(struct vm_area_struct *vma,
				 unsigned long address, pud_t *pudp,
				 pud_t entry, int dirty);

#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
				     unsigned long address, pmd_t *pmdp);
#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
				     unsigned long address, pud_t *pudp);


#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -1243,6 +1363,16 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
	return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
}

#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
					    unsigned long addr, pud_t *pudp)
{
	if (radix_enabled())
		return radix__pudp_huge_get_and_clear(mm, addr, pudp);
	BUG();
	return *pudp;
}

static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
					unsigned long address, pmd_t *pmdp)
{
@@ -1257,6 +1387,11 @@ pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
				   unsigned long addr,
				   pmd_t *pmdp, int full);

#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
				   unsigned long addr,
				   pud_t *pudp, int full);

#define __HAVE_ARCH_PGTABLE_DEPOSIT
static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
					      pmd_t *pmdp, pgtable_t pgtable)
@@ -1305,6 +1440,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
	return hash__pmd_mkdevmap(pmd);
}

static inline pud_t pud_mkdevmap(pud_t pud)
{
	if (radix_enabled())
		return radix__pud_mkdevmap(pud);
	BUG();
	return pud;
}

static inline int pmd_devmap(pmd_t pmd)
{
	return pte_devmap(pmd_pte(pmd));
@@ -1312,7 +1455,7 @@ static inline int pmd_devmap(pmd_t pmd)

static inline int pud_devmap(pud_t pud)
{
	return 0;
	return pte_devmap(pud_pte(pud));
}

static inline int pgd_devmap(pgd_t pgd)
@@ -1321,16 +1464,6 @@ static inline int pgd_devmap(pgd_t pgd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline int pud_pfn(pud_t pud)
{
	/*
	 * Currently all calls to pud_pfn() are gated around a pud_devmap()
	 * check so this should never be used. If it grows another user we
	 * want to know about it.
	 */
	BUILD_BUG();
	return 0;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+36 −0
Original line number Diff line number Diff line
@@ -250,6 +250,10 @@ static inline int radix__pud_bad(pud_t pud)
	return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
}

static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)
{
	return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);
}

static inline int radix__p4d_bad(p4d_t p4d)
{
@@ -268,9 +272,22 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
	return __pmd(pmd_val(pmd) | _PAGE_PTE);
}

static inline int radix__pud_trans_huge(pud_t pud)
{
	return (pud_val(pud) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
}

static inline pud_t radix__pud_mkhuge(pud_t pud)
{
	return __pud(pud_val(pud) | _PAGE_PTE);
}

extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
					  pmd_t *pmdp, unsigned long clr,
					  unsigned long set);
extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
						pud_t *pudp, unsigned long clr,
						unsigned long set);
extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
				  unsigned long address, pmd_t *pmdp);
extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
@@ -278,6 +295,9 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
				      unsigned long addr, pmd_t *pmdp);
pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
				     unsigned long addr, pud_t *pudp);

static inline int radix__has_transparent_hugepage(void)
{
	/* For radix 2M at PMD level means thp */
@@ -285,6 +305,14 @@ static inline int radix__has_transparent_hugepage(void)
		return 1;
	return 0;
}

static inline int radix__has_transparent_pud_hugepage(void)
{
	/* For radix 1G at PUD level means pud hugepage support */
	if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)
		return 1;
	return 0;
}
#endif

static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
@@ -292,9 +320,17 @@ static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
	return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
}

static inline pud_t radix__pud_mkdevmap(pud_t pud)
{
	return __pud(pud_val(pud) | (_PAGE_PTE | _PAGE_DEVMAP));
}

struct vmem_altmap;
extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
					     unsigned long page_size,
					     unsigned long phys);
int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
				      int node, struct vmem_altmap *altmap);
extern void radix__vmemmap_remove_mapping(unsigned long start,
				    unsigned long page_size);

+2 −0
Original line number Diff line number Diff line
@@ -68,6 +68,8 @@ void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
				      unsigned long end, int psize);
extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
				       unsigned long start, unsigned long end);
extern void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
				       unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
			    unsigned long end);
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
+8 −0
Original line number Diff line number Diff line
@@ -51,6 +51,14 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
		radix__flush_pmd_tlb_range(vma, start, end);
}

#define __HAVE_ARCH_FLUSH_PUD_TLB_RANGE
static inline void flush_pud_tlb_range(struct vm_area_struct *vma,
				       unsigned long start, unsigned long end)
{
	if (radix_enabled())
		radix__flush_pud_tlb_range(vma, start, end);
}

#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
					   unsigned long start,
Loading