Commit 8e40612f authored by Jia He's avatar Jia He Committed by Borislav Petkov
Browse files

EDAC/ghes: Add a notifier for reporting memory errors



In order to make it a proper module and disentangle it from facilities,
add a notifier for reporting memory errors. Use an atomic notifier
because calls sites like ghes_proc_in_irq() run in interrupt context.

  [ bp: Massage commit message. ]

Suggested-by: default avatarBorislav Petkov <bp@alien8.de>
Signed-off-by: default avatarJia He <justin.he@arm.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20221010023559.69655-3-justin.he@arm.com
parent 5012524e
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
@@ -94,6 +94,8 @@
#define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
#endif

static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);

static inline bool is_hest_type_generic_v2(struct ghes *ghes)
{
	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
@@ -645,7 +647,7 @@ static bool ghes_do_proc(struct ghes *ghes,
		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);

			ghes_edac_report_mem_error(sev, mem_err);
			atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);

			arch_apei_report_mem_error(sev, mem_err);
			queued = ghes_handle_memory_failure(gdata, sev);
@@ -1497,3 +1499,15 @@ void __init acpi_ghes_init(void)
	else
		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
}

void ghes_register_report_chain(struct notifier_block *nb)
{
	atomic_notifier_chain_register(&ghes_report_chain, nb);
}
EXPORT_SYMBOL_GPL(ghes_register_report_chain);

void ghes_unregister_report_chain(struct notifier_block *nb)
{
	atomic_notifier_chain_unregister(&ghes_report_chain, nb);
}
EXPORT_SYMBOL_GPL(ghes_unregister_report_chain);
+17 −2
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/dmi.h>
#include "edac_module.h"
#include <ras/ras_event.h>
#include <linux/notifier.h>

#define OTHER_DETAIL_LEN	400

@@ -267,11 +268,14 @@ static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char
	return n;
}

void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
static int ghes_edac_report_mem_error(struct notifier_block *nb,
				      unsigned long val, void *data)
{
	struct cper_sec_mem_err *mem_err = (struct cper_sec_mem_err *)data;
	struct cper_mem_err_compact cmem;
	struct edac_raw_error_desc *e;
	struct mem_ctl_info *mci;
	unsigned long sev = val;
	struct ghes_pvt *pvt;
	unsigned long flags;
	char *p;
@@ -282,7 +286,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
	 * know.
	 */
	if (WARN_ON_ONCE(in_nmi()))
		return;
		return NOTIFY_OK;

	spin_lock_irqsave(&ghes_lock, flags);

@@ -374,8 +378,15 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)

unlock:
	spin_unlock_irqrestore(&ghes_lock, flags);

	return NOTIFY_OK;
}

static struct notifier_block ghes_edac_mem_err_nb = {
	.notifier_call	= ghes_edac_report_mem_error,
	.priority	= 0,
};

/*
 * Known systems that are safe to enable this module.
 */
@@ -503,6 +514,8 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
	ghes_pvt = pvt;
	spin_unlock_irqrestore(&ghes_lock, flags);

	ghes_register_report_chain(&ghes_edac_mem_err_nb);

	/* only set on success */
	refcount_set(&ghes_refcount, 1);

@@ -548,6 +561,8 @@ void ghes_edac_unregister(struct ghes *ghes)
	if (mci)
		edac_mc_free(mci);

	ghes_unregister_report_chain(&ghes_edac_mem_err_nb);

unlock:
	mutex_unlock(&ghes_reg_mutex);
}
+3 −7
Original line number Diff line number Diff line
@@ -76,18 +76,11 @@ int ghes_estatus_pool_init(int num_ghes);
/* From drivers/edac/ghes_edac.c */

#ifdef CONFIG_EDAC_GHES
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);

int ghes_edac_register(struct ghes *ghes, struct device *dev);

void ghes_edac_unregister(struct ghes *ghes);

#else
static inline void ghes_edac_report_mem_error(int sev,
				       struct cper_sec_mem_err *mem_err)
{
}

static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
	return -ENODEV;
@@ -145,4 +138,7 @@ int ghes_notify_sea(void);
static inline int ghes_notify_sea(void) { return -ENOENT; }
#endif

struct notifier_block;
extern void ghes_register_report_chain(struct notifier_block *nb);
extern void ghes_unregister_report_chain(struct notifier_block *nb);
#endif /* GHES_H */