Commit ca4a4e66 authored by Nikita Panov's avatar Nikita Panov Committed by Denis Darvish
Browse files

arm64: enable per-NUMA node kernel text and rodata replication

kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IBOJU2



-------------------------------------------------

During boot memory for replicas is allocated,
local translation tables are created,
original text and rodata are copied to replicas,
and replicas are mapped to local tables.
On startup of the secondary CPUs, after minimal initialization,
the local pgtable is loaded to ttbr1.
For the user-space part, every mm_switch table from the local node is loaded.

Acked-by: default avatarArtem Kuzin <artem.kuzin@huawei.com>
Acked-by: default avatarAlexander Grubnikov <alexander.grubnikov@huawei.com>
Acked-by: default avatarIlya Hanov <ilya.hanov@huawei-partners.com>
Acked-by: default avatarDenis Darvish <darvish.denis@huawei.com>
Signed-off-by: default avatarNikita Panov <panov.nikita@huawei.com>
parent 2a313f08
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -21,7 +21,11 @@
 * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
 *	and fixed mappings
 */
#ifdef CONFIG_KERNEL_REPLICATION
#define VMALLOC_START		((MODULES_END & PGDIR_MASK) + PGDIR_SIZE)
#else /* !CONFIG_KERNEL_REPLICATION */
#define VMALLOC_START		(MODULES_END)
#endif /* CONFIG_KERNEL_REPLICATION */
#define VMALLOC_END		(VMEMMAP_START - SZ_256M)

#define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
+8 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/kvm_host.h>
#include <linux/numa_kernel_replication.h>

#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -209,6 +210,13 @@ asmlinkage notrace void secondary_start_kernel(void)
	mmgrab(mm);
	current->active_mm = mm;

	/*
	 * Setup per-NUMA node page table if kernel
	 * replication is enabled. Option supported
	 * only for 64-bit mode.
	 */
	numa_setup_pgd();

	/*
	 * TTBR0 is only used for the identity mapping at this stage. Make it
	 * point to zero page to avoid speculatively fetching new entries.
+2 −1
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/numa_kernel_replication.h>

#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
@@ -267,7 +268,7 @@ void check_and_switch_context(struct mm_struct *mm)
	 * emulating PAN.
	 */
	if (!system_uses_ttbr0_pan())
		cpu_switch_mm(mm->pgd, mm);
		cpu_switch_mm(this_node_pgd(mm), mm);
}

unsigned long arm64_mm_context_get(struct mm_struct *mm)
+49 −0
Original line number Diff line number Diff line
@@ -597,6 +597,47 @@ void __init bootmem_init(void)
	memblock_dump_all();
}

#ifdef CONFIG_KERNEL_REPLICATION
/*
 * It is necessary to preallocate vmalloc pages in advance,
 * otherwise the replicated page-tables can be incomplete.
 */
void __init preallocate_vmalloc_pages(void)
{
	unsigned long addr;

	for (addr = MODULES_VADDR; addr <= VMALLOC_END;
			addr = ALIGN(addr + 1, PGDIR_SIZE)) {
		pgd_t *pgd = pgd_offset_k(addr);
		p4d_t *p4d;
		pud_t *pud;
		pmd_t *pmd;
		int pte;

		p4d = p4d_alloc(&init_mm, pgd, addr);
		/*
		 * No need to check p4d here due to
		 * only 4-stage page table is possible
		 */
		pud = pud_alloc(&init_mm, p4d, addr);
		if (!pud)
			panic("Failed to pre-allocate pud pages for vmalloc area\n");
		if (!mm_pud_folded(&init_mm))
			continue;

		pmd = pmd_alloc(&init_mm, pud, addr);
		if (!pmd)
			panic("Failed to pre-allocate pmd pages for vmalloc area\n");
		if (!mm_pmd_folded(&init_mm))
			continue;

		pte = pte_alloc(&init_mm, pmd);
		if (pte)
			panic("Failed to pre-allocate pte pages for vmalloc area\n");
	}
}
#endif /* CONFIG_KERNEL_REPLICATION */

/*
 * mem_init() marks the free areas in the mem_map and tells us how much memory
 * is free.  This is done after various parts of the system have claimed their
@@ -651,7 +692,15 @@ void free_initmem(void)
	 * prevents the region from being reused for kernel modules, which
	 * is not supported by kallsyms.
	 */
#ifdef CONFIG_KERNEL_REPLICATION
	/*
	 * In case of replicated kernel the per-NUMA node vmalloc
	 * memory should be released.
	 */
	vunmap_range_replicas((u64)__init_begin, (u64)__init_end);
#else
	vunmap_range((u64)__init_begin, (u64)__init_end);
#endif /* CONFIG_KERNEL_REPLICATION */
}

void dump_mem_limit(void)
+39 −2
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/kfence.h>
#include <linux/numa_kernel_replication.h>

#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -477,6 +478,23 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
			     pgd_pgtable_alloc, flags);
}

static void populate_mappings_prot(phys_addr_t phys, unsigned long virt,
				   phys_addr_t size, pgprot_t prot)
{
#ifdef CONFIG_KERNEL_REPLICATION
	int nid;

	for_each_memory_node(nid) {
		__create_pgd_mapping(per_node_pgd(&init_mm, nid),
			page_to_phys(walk_to_page_node(nid, (void *)virt)),
			virt, size, prot, NULL, NO_CONT_MAPPINGS);
	}
#else
	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
			     NO_CONT_MAPPINGS);
#endif /* CONFIG_KERNEL_REPLICATION */
}

static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
				phys_addr_t size, pgprot_t prot)
{
@@ -486,8 +504,7 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
		return;
	}

	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
			     NO_CONT_MAPPINGS);
	populate_mappings_prot(phys, virt, size, prot);

	/* flush the TLBs after updating live kernel mappings */
	flush_tlb_kernel_range(virt, virt + size);
@@ -676,6 +693,22 @@ static pgprot_t kernel_exec_prot(void)
}

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0

#ifdef CONFIG_KERNEL_REPLICATION
static void __init populate_trampoline_mappings(void)
{
	int nid;

	/* Copy trampoline mappings in replicated tables */
	for_each_memory_node(nid) {
		memcpy(per_node_pgd(&init_mm, nid) - (PAGE_SIZE * 2 / sizeof(pgd_t)),
				tramp_pg_dir, PGD_SIZE);
	}
	/* Be sure that replicated page table can be observed properly */
	dsb(ishst);
}
#endif /* CONFIG_KERNEL_REPLICATION */

static int __init map_entry_trampoline(void)
{
	int i;
@@ -701,6 +734,10 @@ static int __init map_entry_trampoline(void)
		__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
			     pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);

#ifdef CONFIG_KERNEL_REPLICATION
	populate_trampoline_mappings();
#endif /* CONFIG_KERNEL_REPLICATION */

	return 0;
}
core_initcall(map_entry_trampoline);
Loading