Commit fa756368 authored by Shengwei Luo's avatar Shengwei Luo Committed by zhoukaiqi
Browse files

RAS: Report ARM processor information to userspace

kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4OUGN?from=project-issue


CVE: NA

----------------

The original arm_event trace code only traces out ARM processor error
information data. It's not enough for user to take appropriate action.

According to UEFI_2_9 specification chapter N2.4.4, the ARM processor
error section includes several ARM processor error information, several
ARM processor context information and several vendor specific error
information structures. In addition to these info, there are error
severity and cpu logical index about the event. Report all of these
information to userspace via perf i/f. So that the user can do cpu core
isolation according to error severity and other info.

Original-Author: Jason Tian <jason@os.amperecomputing.com>
Signed-off-by: default avatarShengwei Luo <luoshengwei@huawei.com>
Reviewed-by: default avatarLv Ying <lvying6@huawei.com>
Reviewed-by: default avatarTan Xiaofei <tanxiaofei@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: default avatarzhoukaiqi <zhoukaiqi@huawei.com>
	modified:   arch/arm64/configs/openeuler_defconfig
	modified:   drivers/acpi/apei/ghes.c
	modified:   drivers/ras/Kconfig
	modified:   drivers/ras/ras.c
	modified:   include/linux/ras.h
	modified:   include/ras/ras_event.h
parent 038c00b1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -6769,6 +6769,7 @@ CONFIG_HNS3_PMU=m
# end of Performance monitor support

CONFIG_RAS=y
CONFIG_RAS_ARM_EVENT_INFO=y
CONFIG_PAGE_EJECT=m
CONFIG_USB4=m
# CONFIG_USB4_DEBUGFS_WRITE is not set
+5 −2
Original line number Diff line number Diff line
@@ -518,9 +518,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
	int sec_sev, i;
	char *p;

	log_arm_hw_error(err);

	sec_sev = ghes_severity(gdata->error_severity);
#ifdef CONFIG_RAS_ARM_EVENT_INFO
	log_arm_hw_error(err, sec_sev);
#else
	log_arm_hw_error(err);
#endif
	if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
		return false;

+11 −0
Original line number Diff line number Diff line
@@ -29,6 +29,17 @@ menuconfig RAS
	  so have ideal availability, but may be unreliable, with frequent
	  data corruption.

config RAS_ARM_EVENT_INFO
	bool "RAS feature: report all the arm processor info in arm event"
	default y
	depends on ARM64
	help
	  This option allows to report several ARM processor error information,
	  context information, vendor specific error information, error severity
	  and cpu logical index about the event to userspace via perf i/f. So
	  that the user can do cpu core isolation according to error severity
	  and other info.

if RAS

source "arch/x86/ras/Kconfig"
+50 −0
Original line number Diff line number Diff line
@@ -21,9 +21,59 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
	trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
}

#ifdef CONFIG_RAS_ARM_EVENT_INFO
void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
#else
void log_arm_hw_error(struct cper_sec_proc_arm *err)
#endif
{
#ifdef CONFIG_RAS_ARM_EVENT_INFO
	u32 pei_len;
	u32 ctx_len = 0;
	s32 vsei_len;
	u8 *pei_err;
	u8 *ctx_err;
	u8 *ven_err_data;
	struct cper_arm_err_info *err_info;
	struct cper_arm_ctx_info *ctx_info;
	int n, sz;
	int cpu;

	pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num;
	pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm);

	err_info = (struct cper_arm_err_info *)(err + 1);
	ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
	ctx_err = (u8 *)ctx_info;
	for (n = 0; n < err->context_info_num; n++) {
		sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
		ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
		ctx_len += sz;
	}

	vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) +
						pei_len + ctx_len);
	if (vsei_len < 0) {
		pr_warn(FW_BUG
			"section length: %d\n", err->section_length);
		pr_warn(FW_BUG
			"section length is too small\n");
		pr_warn(FW_BUG
			"firmware-generated error record is incorrect\n");
		vsei_len = 0;
	}
	ven_err_data = (u8 *)ctx_info;

	cpu = GET_LOGICAL_INDEX(err->mpidr);
	/* when return value is invalid, set cpu index to -1 */
	if (cpu < 0)
		cpu = -1;

	trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len,
			ven_err_data, (u32)vsei_len, sev, cpu);
#else
	trace_arm_event(err);
#endif
}

static int __init ras_init(void)
+23 −0
Original line number Diff line number Diff line
@@ -24,15 +24,38 @@ int __init parse_cec_param(char *str);
void log_non_standard_event(const guid_t *sec_type,
			    const guid_t *fru_id, const char *fru_text,
			    const u8 sev, const u8 *err, const u32 len);

#ifdef CONFIG_RAS_ARM_EVENT_INFO
void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev);
#else
void log_arm_hw_error(struct cper_sec_proc_arm *err);
#endif

#else
static inline void
log_non_standard_event(const guid_t *sec_type,
		       const guid_t *fru_id, const char *fru_text,
		       const u8 sev, const u8 *err, const u32 len)
{ return; }

#ifdef CONFIG_RAS_ARM_EVENT_INFO
static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
#else
log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
#endif

#endif

#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#include <asm/smp_plat.h>
/*
 * Include ARM specific SMP header which provides a function mapping mpidr to
 * cpu logical index.
 */
#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
#else
#define GET_LOGICAL_INDEX(mpidr) -EINVAL
#endif /* CONFIG_ARM || CONFIG_ARM64 */

#endif /* __RAS_H__ */
Loading