Unverified Commit 69f301dd authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3280 arm64: add machine check safe support

Merge Pull Request from: @ci-robot 
 
PR sync from: Tong Tiangen <tongtiangen@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/343LHBM44UNOCW2VSYOSYXHMJJMSHETT/ 
With the increase of memory capacity and density, the probability of memory
error also increases. The increasing size and density of server RAM in data
centers and clouds have shown increased uncorrectable memory errors.

Currently, the kernel has mechanism to recover from hardware memory errors.
This patchset provides a new recovery mechanism.

For arm64, the hardware memory error handling in do_sea(), which is divided
into two cases:
 1. If the user state consumed the memory errors, the solution is to kill
    the user process and isolate the error page.
 2. If the kernel state consumed the memory errors, the solution is to
    panic.

For case 2, Undifferentiated panic may not be the optimal choice, as it can
be handled better. In some scenarios, we can avoid panic, such as uaccess,
if the uaccess fails due to memory error, only the user process will be
affected, killing the user process and isolating the user page with
hardware memory errors is a better choice.

Tong Tiangen (7):
  uaccess: add generic fallback version of copy_mc_to_user()
  arm64: add support for machine check error safe
  arm64: add uaccess to machine check safe
  mm/hwpoison: return -EFAULT when copy fail in
    copy_mc_[user]_highpage()
  arm64: support copy_mc_[user]_highpage()
  arm64: introduce copy_mc_to_kernel() implementation
  arm64: add machine check safe sysctl interface


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8M74H 
 
Link:https://gitee.com/openeuler/kernel/pulls/3280

 

Reviewed-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Reviewed-by: default avatarZhang Jianhua <chris.zjh@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 765e43e9 fafd6b47
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -540,6 +540,20 @@ if leaking kernel pointer values to unprivileged users is a concern.
When ``kptr_restrict`` is set to 2, kernel pointers printed using
%pK will be replaced with 0s regardless of privileges.

machine_check_safe (arm64 only)
================================

This indicates whether the Machine Check safe memory copy feature enabled
or not,which only exists on arm64 when ARCH_HAS_COPY_MC enabled.

The value in this file determines the behaviour of the kernel when
synchronous exception from memory copy.

= ===================================================================
0 the kernel will panic immediately.
1 the kernel will recover since a memcpy-variant provided which can
  safely fail when accessing to hwpoison.
= ===================================================================

modprobe
========
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ config ARM64
	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
	select ARCH_HAS_CACHE_LINE_SIZE
	select ARCH_HAS_COPY_MC if ACPI_APEI_GHES
	select ARCH_HAS_CURRENT_STACK_POINTER
	select ARCH_HAS_DEBUG_VIRTUAL
	select ARCH_HAS_DEBUG_VM_PGTABLE
+15 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#define EX_TYPE_UACCESS_ERR_ZERO	2
#define EX_TYPE_KACCESS_ERR_ZERO	3
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD	4
#define EX_TYPE_COPY_MC_PAGE_ERR_ZERO	5

/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
#define EX_DATA_REG_ERR_SHIFT	0
@@ -51,6 +52,16 @@
#define _ASM_EXTABLE_UACCESS(insn, fixup)				\
	_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)

#define _ASM_EXTABLE_COPY_MC_PAGE_ERR_ZERO(insn, fixup, err, zero)	\
	__ASM_EXTABLE_RAW(insn, fixup, 					\
			  EX_TYPE_COPY_MC_PAGE_ERR_ZERO,		\
			  (						\
			    EX_DATA_REG(ERR, err) |			\
			    EX_DATA_REG(ZERO, zero)			\
			  ))

#define _ASM_EXTABLE_COPY_MC_PAGE(insn, fixup)				\
	_ASM_EXTABLE_COPY_MC_PAGE_ERR_ZERO(insn, fixup, wzr, wzr)
/*
 * Create an exception table entry for uaccess `insn`, which will branch to `fixup`
 * when an unhandled fault is taken.
@@ -59,6 +70,10 @@
	_ASM_EXTABLE_UACCESS(\insn, \fixup)
	.endm

	.macro          _asm_extable_copy_mc_page, insn, fixup
	_ASM_EXTABLE_COPY_MC_PAGE(\insn, \fixup)
	.endm

/*
 * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
 * do nothing.
+4 −0
Original line number Diff line number Diff line
@@ -154,6 +154,10 @@ lr .req x30 // link register
#define CPU_LE(code...) code
#endif

#define CPY_MC(l, x...)		\
9999:   x;			\
	_asm_extable_copy_mc_page    9999b, l

/*
 * Define a macro that constructs a 64-bit value by concatenating two
 * 32-bit registers. Note that on big endian systems the order of the
+1 −0
Original line number Diff line number Diff line
@@ -46,4 +46,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
#endif /* !CONFIG_BPF_JIT */

bool fixup_exception(struct pt_regs *regs);
bool fixup_exception_mc(struct pt_regs *regs);
#endif
Loading