Commit d909f910 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman
Browse files

powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP



This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.

This enables huge (2MB and 1GB) ioremap mappings. I don't have a
benchmark for this change, but huge vmap will be used by a later core
kernel change to enable huge vmalloc memory mappings. This improves
cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB
size dentry cache hash.

  Profiling git diff dTLB misses with a vanilla kernel:

  81.75%  git      [kernel.vmlinux]    [k] __d_lookup_rcu
   7.21%  git      [kernel.vmlinux]    [k] strncpy_from_user
   1.77%  git      [kernel.vmlinux]    [k] find_get_entry
   1.59%  git      [kernel.vmlinux]    [k] kmem_cache_free

            40,168      dTLB-miss
       0.100342754 seconds time elapsed

  With powerpc huge vmalloc:

             2,987      dTLB-miss
       0.095933138 seconds time elapsed

Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent d38153f9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2927,7 +2927,7 @@
			register save and restore. The kernel will only save
			legacy floating-point registers on task switch.

	nohugeiomap	[KNL,x86] Disable kernel huge I/O mappings.
	nohugeiomap	[KNL,x86,PPC] Disable kernel huge I/O mappings.

	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
			Equivalent to smt=1.
+1 −0
Original line number Diff line number Diff line
@@ -167,6 +167,7 @@ config PPC
	select GENERIC_STRNLEN_USER
	select GENERIC_TIME_VSYSCALL
	select HAVE_ARCH_AUDITSYSCALL
	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
	select HAVE_ARCH_JUMP_LABEL
	select HAVE_ARCH_KASAN			if PPC32
	select HAVE_ARCH_KGDB
+8 −0
Original line number Diff line number Diff line
@@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
#define VMALLOC_START	__vmalloc_start
#define VMALLOC_END	__vmalloc_end

static inline unsigned int ioremap_max_order(void)
{
	if (radix_enabled())
		return PUD_SHIFT;
	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
}
#define IOREMAP_MAX_ORDER ioremap_max_order()

extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
+100 −0
Original line number Diff line number Diff line
@@ -1124,6 +1124,106 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
	set_pte_at(mm, addr, ptep, pte);
}

int __init arch_ioremap_pud_supported(void)
{
	/* HPT does not cope with large pages in the vmalloc area */
	return radix_enabled();
}

int __init arch_ioremap_pmd_supported(void)
{
	return radix_enabled();
}

int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
{
	return 0;
}

int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
	pte_t *ptep = (pte_t *)pud;
	pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);

	if (!radix_enabled())
		return 0;

	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);

	return 1;
}

int pud_clear_huge(pud_t *pud)
{
	if (pud_huge(*pud)) {
		pud_clear(pud);
		return 1;
	}

	return 0;
}

int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
	pmd_t *pmd;
	int i;

	pmd = (pmd_t *)pud_page_vaddr(*pud);
	pud_clear(pud);

	flush_tlb_kernel_range(addr, addr + PUD_SIZE);

	for (i = 0; i < PTRS_PER_PMD; i++) {
		if (!pmd_none(pmd[i])) {
			pte_t *pte;
			pte = (pte_t *)pmd_page_vaddr(pmd[i]);

			pte_free_kernel(&init_mm, pte);
		}
	}

	pmd_free(&init_mm, pmd);

	return 1;
}

int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
	pte_t *ptep = (pte_t *)pmd;
	pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);

	if (!radix_enabled())
		return 0;

	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);

	return 1;
}

int pmd_clear_huge(pmd_t *pmd)
{
	if (pmd_huge(*pmd)) {
		pmd_clear(pmd);
		return 1;
	}

	return 0;
}

int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
	pte_t *pte;

	pte = (pte_t *)pmd_page_vaddr(*pmd);
	pmd_clear(pmd);

	flush_tlb_kernel_range(addr, addr + PMD_SIZE);

	pte_free_kernel(&init_mm, pte);

	return 1;
}

int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
			pgprot_t prot, int nid)
{