Unverified Commit 2e75ab31 authored by Palmer Dabbelt's avatar Palmer Dabbelt
Browse files

Merge patch series "riscv: Use PUD/P4D/PGD pages for the linear mapping"

Alexandre Ghiti <alexghiti@rivosinc.com> says:

This patchset intends to improve tlb utilization by using hugepages for
the linear mapping.

As reported by Anup in v6, when STRICT_KERNEL_RWX is enabled, we must
take care of isolating the kernel text and rodata so that they are not
mapped with a PUD mapping which would then assign wrong permissions to
the whole region: it is achieved the same way as arm64 by using the
memblock nomap API which isolates those regions and re-merge them afterwards
thus avoiding any issue with the system resources tree creation.

arch/riscv/include/asm/page.h |  19 ++++++-
 arch/riscv/mm/init.c          | 102 ++++++++++++++++++++++++++--------
 arch/riscv/mm/physaddr.c      |  16 ++++++
 drivers/of/fdt.c              |  11 ++--
 4 files changed, 118 insertions(+), 30 deletions(-)

* b4-shazam-merge:
  riscv: Use PUD/P4D/PGD pages for the linear mapping
  riscv: Move the linear mapping creation in its own function
  riscv: Get rid of riscv_pfn_base variable

Link: https://lore.kernel.org/r/20230324155421.271544-1-alexghiti@rivosinc.com


Signed-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parents 5464912c 3335068f
Loading
Loading
Loading
Loading
+17 −2
Original line number Diff line number Diff line
@@ -89,9 +89,16 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif

#ifdef CONFIG_64BIT
/*
 * We override this value as its generic definition uses __pa too early in
 * the boot process (before kernel_map.va_pa_offset is set).
 */
#define MIN_MEMBLOCK_ADDR      0
#endif

#ifdef CONFIG_MMU
extern unsigned long riscv_pfn_base;
#define ARCH_PFN_OFFSET		(riscv_pfn_base)
#define ARCH_PFN_OFFSET		(PFN_DOWN((unsigned long)phys_ram_base))
#else
#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
@@ -121,7 +128,11 @@ extern phys_addr_t phys_ram_base;
#define is_linear_mapping(x)	\
	((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))

#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_pa_to_va(x)	((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
#else
void *linear_mapping_pa_to_va(unsigned long x);
#endif
#define kernel_mapping_pa_to_va(y)	({					\
	unsigned long _y = (unsigned long)(y);					\
	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ?			\
@@ -130,7 +141,11 @@ extern phys_addr_t phys_ram_base;
	})
#define __pa_to_va_nodebug(x)		linear_mapping_pa_to_va(x)

#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_va_to_pa(x)	((unsigned long)(x) - kernel_map.va_pa_offset)
#else
phys_addr_t linear_mapping_va_to_pa(unsigned long x);
#endif
#define kernel_mapping_va_to_pa(y) ({						\
	unsigned long _y = (unsigned long)(y);					\
	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
+79 −23
Original line number Diff line number Diff line
@@ -213,6 +213,14 @@ static void __init setup_bootmem(void)
	phys_ram_end = memblock_end_of_DRAM();
	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
		phys_ram_base = memblock_start_of_DRAM();

	/*
	 * In 64-bit, any use of __va/__pa before this point is wrong as we
	 * did not know the start of DRAM before.
	 */
	if (IS_ENABLED(CONFIG_64BIT))
		kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;

	/*
	 * memblock allocator is not aware of the fact that last 4K bytes of
	 * the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -271,9 +279,6 @@ static void __init setup_bootmem(void)
#ifdef CONFIG_MMU
struct pt_alloc_ops pt_ops __initdata;

unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);

pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -285,7 +290,6 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG

#ifdef CONFIG_XIP_KERNEL
#define pt_ops			(*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define riscv_pfn_base         (*(unsigned long  *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte             ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir           ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -671,9 +675,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,

static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
	/* Upgrade to PMD_SIZE mappings whenever possible */
	base &= PMD_SIZE - 1;
	if (!base && size >= PMD_SIZE)
	if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
		return PGDIR_SIZE;

	if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
		return P4D_SIZE;

	if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
		return PUD_SIZE;

	if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
		return PMD_SIZE;

	return PAGE_SIZE;
@@ -982,11 +993,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
	set_satp_mode();
#endif

	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
	/*
	 * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
	 * where we have the system memory layout: this allows us to align
	 * the physical and virtual mappings and then make use of PUD/P4D/PGD
	 * for the linear mapping. This is only possible because the kernel
	 * mapping lies outside the linear mapping.
	 * In 32-bit however, as the kernel resides in the linear mapping,
	 * setup_vm_final can not change the mapping established here,
	 * otherwise the same kernel addresses would get mapped to different
	 * physical addresses (if the start of dram is different from the
	 * kernel physical address start).
	 */
	kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
				0UL : PAGE_OFFSET - kernel_map.phys_addr;
	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;

	riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);

	/*
	 * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
	 * kernel, whereas for 64-bit kernel, the end of the virtual address
@@ -1090,16 +1112,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
	pt_ops_set_fixmap();
}

static void __init setup_vm_final(void)
static void __init create_linear_mapping_range(phys_addr_t start,
					       phys_addr_t end)
{
	phys_addr_t pa;
	uintptr_t va, map_size;
	phys_addr_t pa, start, end;

	for (pa = start; pa < end; pa += map_size) {
		va = (uintptr_t)__va(pa);
		map_size = best_map_size(pa, end - pa);

		create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
				   pgprot_from_va(va));
	}
}

static void __init create_linear_mapping_page_table(void)
{
	phys_addr_t start, end;
	u64 i;

	/* Setup swapper PGD for fixmap */
	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
			   __pa_symbol(fixmap_pgd_next),
			   PGDIR_SIZE, PAGE_TABLE);
#ifdef CONFIG_STRICT_KERNEL_RWX
	phys_addr_t ktext_start = __pa_symbol(_start);
	phys_addr_t ktext_size = __init_data_begin - _start;
	phys_addr_t krodata_start = __pa_symbol(__start_rodata);
	phys_addr_t krodata_size = _data - __start_rodata;

	/* Isolate kernel text and rodata so they don't get mapped with a PUD */
	memblock_mark_nomap(ktext_start,  ktext_size);
	memblock_mark_nomap(krodata_start, krodata_size);
#endif

	/* Map all memory banks in the linear mapping */
	for_each_mem_range(i, &start, &end) {
@@ -1111,15 +1153,29 @@ static void __init setup_vm_final(void)
		if (end >= __pa(PAGE_OFFSET) + memory_limit)
			end = __pa(PAGE_OFFSET) + memory_limit;

		for (pa = start; pa < end; pa += map_size) {
			va = (uintptr_t)__va(pa);
			map_size = best_map_size(pa, end - pa);

			create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
					   pgprot_from_va(va));
		create_linear_mapping_range(start, end);
	}

#ifdef CONFIG_STRICT_KERNEL_RWX
	create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
	create_linear_mapping_range(krodata_start,
				    krodata_start + krodata_size);

	memblock_clear_nomap(ktext_start,  ktext_size);
	memblock_clear_nomap(krodata_start, krodata_size);
#endif
}

static void __init setup_vm_final(void)
{
	/* Setup swapper PGD for fixmap */
	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
			   __pa_symbol(fixmap_pgd_next),
			   PGDIR_SIZE, PAGE_TABLE);

	/* Map the linear mapping */
	create_linear_mapping_page_table();

	/* Map the kernel */
	if (IS_ENABLED(CONFIG_64BIT))
		create_kernel_page_table(swapper_pg_dir, false);
+16 −0
Original line number Diff line number Diff line
@@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
	return __va_to_pa_nodebug(x);
}
EXPORT_SYMBOL(__phys_addr_symbol);

phys_addr_t linear_mapping_va_to_pa(unsigned long x)
{
	BUG_ON(!kernel_map.va_pa_offset);

	return ((unsigned long)(x) - kernel_map.va_pa_offset);
}
EXPORT_SYMBOL(linear_mapping_va_to_pa);

void *linear_mapping_pa_to_va(unsigned long x)
{
	BUG_ON(!kernel_map.va_pa_offset);

	return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
}
EXPORT_SYMBOL(linear_mapping_pa_to_va);
+6 −5
Original line number Diff line number Diff line
@@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
static void __early_init_dt_declare_initrd(unsigned long start,
					   unsigned long end)
{
	/* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
	 * enabled since __va() is called too early. ARM64 does make use
	 * of phys_initrd_start/phys_initrd_size so we can skip this
	 * conversion.
	/*
	 * __va() is not yet available this early on some platforms. In that
	 * case, the platform uses phys_initrd_start/phys_initrd_size instead
	 * and does the VA conversion itself.
	 */
	if (!IS_ENABLED(CONFIG_ARM64)) {
	if (!IS_ENABLED(CONFIG_ARM64) &&
	    !(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
		initrd_start = (unsigned long)__va(start);
		initrd_end = (unsigned long)__va(end);
		initrd_below_start_ok = 1;