Unverified Commit 2caf0ac9 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!15233 v4 mm: arm64: Add kernel replication feature

Merge Pull Request from: @ci-robot 
 
PR sync from: Denis Darvish <darvish.denis@huawei.com>
https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/C7M5E2K2UD7FV7XYPWPZREJBCOCBIVRN/ 
This patchset implements initial support of kernel text and rodata replication for AArch64.
openEuler 25.03 is used as a baseline.

Current implementation supports next functionality:
1. Replicated kernel text and rodata per-NUMA node 2. Vmalloc is able to work with replicated areas, so
   kernel modules text and rodata also replicated during
   modules loading stage.
3. BPF handlers are not replicated by default,
   but this can be easily done by using existent APIs.
4. KPROBES, KGDB and all functionality that depends on
   kernel text patching work without any limitation.
5. KPTI, KASLR, and KASAN fully supported.
6. Replicates parts of translation table related to
   replicated text and rodata.
7. 4K and 64K pages are supported.

Translation tables synchronization is necessary only in several special cases:
1. Kernel boot
2. Modules deployment
3. Any allocation in user space that require new PUD/P4D

In current design mutable kernel data modifications don't require synchronization between translation tables due to on 64-bit platforms all physical memory already mapped in kernel space and this mapping is persistent.
In user space the translation tables synchronizations are quite rare due to the only case is new PUD/P4D allocation.

At the current stage only the PGD layer is replicated for the user space.
Please refer to the next pics.

TT overview:
                   NODE 0                   NODE 1
              USER      KERNEL         USER      KERNEL
           ---------------------    ---------------------
           ---------------------    ---------------------
            -------------------      ------------------- 
           ---------------------    ---------------------
           ---------------------    ---------------------
            -------------------      -------------------
           ---------------------    ---------------------
           ---------------------    --------------------- 
                --------       -------      --------
    MEM         --------       -------      --------
                <------>                    <------>
                 NODE 0        Shared        NODE 1
                               between
                               nodes
* - entries unique in each table

TT synchronization:
               NODE 0                    NODE 1
          USER      KERNEL          USER      KERNEL
       ---------------------     ---------------------
       ---------------------     ---------------------
                              |
                              |
                              |
                              |
                              |
                              |
                              \/
       ---------------------     ---------------------
       ---------------------     ---------------------
            ---------------------------
                     |
                    ---------------------
                    ---------------------


Known problems:
   not supported right now.
2. Replication support in vmalloc, possibly, can be optimized in future.
3. Module APIs currently have lack of memory policies support.
   This part will be fixed in future.


Nikita Panov (20):
  mm: arm64 add Kconfig option for kernel replication
  arm64: align kernel text and rodata
  mm: allow per-NUMA node local P4D/PUD/PMD/PTE allocation
  arm64: add arch callbacks for kernel replication
  mm: per-NUMA node replication core infrastructure
  mm: add apply_to_page_range() for replicated memory
  mm: add support of memory protection for NUMA replicas
  arm64: add support of memory protection for NUMA replicas
  mm: set memory permissions for BPF handlers replicas
  arm64: bpf arch text poke support
  mm: add replicas allocation support for vmalloc
  arm64: enable per-NUMA node kernel text and rodata replication
  mm: enable per-NUMA node kernel text and rodata replication
  arm64: make power management aware about kernel replication
  arm64: make KASAN aware about kernel replication
  arm64: make kernel text patching aware about replicas
  arm64: add support of NUMA replication for efi page tables
  arm64: add support of NUMA replication for ptdump
  arm64: add kernel modules text and rodata replication support
  mm: init kernel modules with replication support

 
https://gitee.com/openeuler/kernel/issues/IBOJU2 
 
Link:https://gitee.com/openeuler/kernel/pulls/15233

 

Reviewed-by: default avatarXu Kuohai <xukuohai@huawei.com>
Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarYue Haibing <yuehaibing@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: default avatarZhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
parents 80e5830c 10f9d33d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1212,6 +1212,7 @@ CONFIG_ARM64_HAFT=y
CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
CONFIG_PER_VMA_LOCK=y
CONFIG_LOCK_MM_AND_FIND_VMA=y
# CONFIG_KERNEL_REPLICATION is not set
CONFIG_IOMMU_MM_DATA=y
# CONFIG_ASCEND_FEATURES is not set
CONFIG_PAGE_CACHE_LIMIT=y
+16 −2
Original line number Diff line number Diff line
@@ -87,8 +87,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)

static inline unsigned long efi_get_kimg_min_align(void)
{
	extern bool efi_nokaslr;

	/*
	 * Although relocatable kernels can fix up the misalignment with
	 * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
@@ -97,7 +95,23 @@ static inline unsigned long efi_get_kimg_min_align(void)
	 * 2M alignment if KASLR was explicitly disabled, even if it was not
	 * going to be activated to begin with.
	 */

#ifdef CONFIG_KERNEL_REPLICATION
	/* If kernel replication is enabled, the special alignment is necessary.
	 * Due to this fact for now we map kernel by huge pages even
	 * in case of KASLR enabled. Ugly but works.
	 */
#ifdef CONFIG_ARM64_4K_PAGES
	return HPAGE_SIZE;
#else
	return CONT_PTE_SIZE;
#endif

#else
	extern bool efi_nokaslr;

	return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
#endif
}

#define EFI_ALLOC_ALIGN		SZ_64K
+4 −0
Original line number Diff line number Diff line
@@ -162,7 +162,11 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
	/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
	phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));

#ifdef CONFIG_KERNEL_REPLICATION
	if (system_supports_cnp() && !WARN_ON(pgdp != init_mm.pgd_numa[numa_node_id()])) {
#else
	if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
#endif /* CONFIG_KERNEL_REPLICATION */
		/*
		 * cpu_replace_ttbr1() is used when there's a boot CPU
		 * up (i.e. cpufeature framework is not up yet) and
+54 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_NUMA_REPLICATION_H
#define __ASM_NUMA_REPLICATION_H

#ifdef CONFIG_KERNEL_REPLICATION
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <linux/mm.h>
#include <linux/seq_file.h>

#define PAGE_TABLE_REPLICATION_LEFT  ((max((u64)_end - SZ_2G, (u64)MODULES_VADDR)) & PGDIR_MASK)
#define PAGE_TABLE_REPLICATION_RIGHT ((((u64)_end + SZ_2G) & PGDIR_MASK) + PGDIR_SIZE - 1)

static inline pgd_t *numa_replicate_pgt_pgd(int nid)
{
	pgd_t *new_pgd;
	struct page *pgd_page;

	pgd_page = alloc_pages_node(nid, GFP_PGTABLE_KERNEL, 2);
	BUG_ON(pgd_page == NULL);

	new_pgd = (pgd_t *)page_address(pgd_page);
	new_pgd += (PAGE_SIZE * 2 / sizeof(pgd_t)); //Extra pages for KPTI
	copy_page(new_pgd, swapper_pg_dir);

	return new_pgd;
}

static inline void numa_load_replicated_pgd(pgd_t *pgd)
{
	cpu_replace_ttbr1(pgd, idmap_pg_dir);
	local_flush_tlb_all();
}

static inline ssize_t numa_cpu_dump(struct seq_file *m)
{
	seq_printf(m, "NODE: #%02d, CPU: #%04d, ttbr1_el1: 0x%p, COMM: %s\n",
		numa_node_id(),
		smp_processor_id(),
		(void *)read_sysreg(ttbr1_el1),
		current->group_leader->comm);
	return 0;
}

static inline void numa_sync_text_replicas(unsigned long start, unsigned long end)
{
	caches_clean_inval_pou(start, end);
	icache_inval_all_pou();
}
#endif /* CONFIG_KERNEL_REPLICATION */
#endif /* __ASM_NUMA_REPLICATION_H */
+13 −0
Original line number Diff line number Diff line
@@ -21,7 +21,11 @@
 * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
 *	and fixed mappings
 */
#ifdef CONFIG_KERNEL_REPLICATION
#define VMALLOC_START		((MODULES_END & PGDIR_MASK) + PGDIR_SIZE)
#else /* !CONFIG_KERNEL_REPLICATION */
#define VMALLOC_START		(MODULES_END)
#endif /* CONFIG_KERNEL_REPLICATION */
#define VMALLOC_END		(VMEMMAP_START - SZ_256M)

#define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
@@ -537,6 +541,15 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))

#ifdef CONFIG_KERNEL_REPLICATION
static inline pgprot_t pmd_pgprot(pmd_t pmd)
{
	unsigned long pfn = pmd_pfn(pmd);

	return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
}
#endif /* CONFIG_KERNEL_REPLICATION */

static inline void __set_pte_at(struct mm_struct *mm,
				unsigned long __always_unused addr,
				pte_t *ptep, pte_t pte, unsigned int nr)
Loading