!14132 v2 ACPI: APEI: handle synchronous errors in task work (47f40eb8) · Commits · EulixOS / Software / Kernel

arch/x86/kernel/cpu/mce/core.c

+0 −5

Original line number	Diff line number	Diff line
		@@ -1276,11 +1276,6 @@ static void kill_me_maybe(struct callback_head *cb)
		return;
		}

		/*
		* -EHWPOISON from memory_failure() means that it already sent SIGBUS
		* to the current process with the proper error info, so no need to
		* send SIGBUS here again.
		*/
		if (ret == -EHWPOISON)
		return;

drivers/acpi/apei/ghes.c

+76 −38

Original line number	Diff line number	Diff line
		@@ -99,6 +99,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
		return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
		}

		/*
		* A platform may describe one error source for the handling of synchronous
		* errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
		* or External Interrupt). On x86, the HEST notifications are always
		* asynchronous, so only SEA on ARM is delivered as a synchronous
		* notification.
		*/
		static inline bool is_hest_sync_notify(struct ghes *ghes)
		{
		u8 notify_type = ghes->generic->notify.type;

		return notify_type == ACPI_HEST_NOTIFY_SEA;
		}

		/*
		* This driver isn't really modular, however for the time being,
		* continuing to use module_param is the easiest way to remain
		@@ -427,28 +441,41 @@ static void ghes_clear_estatus(struct ghes *ghes,
		}

		/*
		* Called as task_work before returning to user-space.
		* Ensure any queued work has been done before we return to the context that
		* triggered the notification.
		* struct ghes_task_work - for synchronous RAS event
		*
		* @twork: callback_head for task work
		* @pfn: page frame number of corrupted page
		* @flags: work control flags
		*
		* Structure to pass task work to be handled before
		* returning to user-space via task_work_add().
		*/
		static void ghes_kick_task_work(struct callback_head *head)
		struct ghes_task_work {
		struct callback_head twork;
		u64 pfn;
		int flags;
		};

		static void memory_failure_cb(struct callback_head *twork)
		{
		struct acpi_hest_generic_status *estatus;
		struct ghes_estatus_node *estatus_node;
		u32 node_len;
		struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork);
		int ret;

		estatus_node = container_of(head, struct ghes_estatus_node, task_work);
		if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
		memory_failure_queue_kick(estatus_node->task_work_cpu);
		ret = memory_failure(twcb->pfn, twcb->flags);
		gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb));

		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
		node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
		if (!ret \|\| ret == -EHWPOISON \|\| ret == -EOPNOTSUPP)
		return;

		pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
		twcb->pfn, current->comm, task_pid_nr(current));
		force_sig(SIGBUS);
		}

		static bool ghes_do_memory_failure(u64 physical_addr, int flags)
		{
		unsigned long pfn;
		struct ghes_task_work *twcb;

		if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
		return false;
		@@ -461,12 +488,24 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
		return false;
		}

		if (flags == MF_ACTION_REQUIRED && current->mm) {
		twcb = (void )gen_pool_alloc(ghes_estatus_pool, sizeof(twcb));
		if (!twcb)
		return false;

		twcb->pfn = pfn;
		twcb->flags = flags;
		init_task_work(&twcb->twork, memory_failure_cb);
		task_work_add(current, &twcb->twork, TWA_RESUME);
		return true;
		}

		memory_failure_queue(pfn, flags);
		return true;
		}

		static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
		int sev)
		int sev, bool sync)
		{
		int flags = -1;
		int sec_sev = ghes_severity(gdata->error_severity);
		@@ -480,7 +519,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
		(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
		flags = MF_SOFT_OFFLINE;
		if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
		flags = 0;
		flags = sync ? MF_ACTION_REQUIRED : 0;

		if (flags != -1)
		return ghes_do_memory_failure(mem_err->physical_addr, flags);
		@@ -488,9 +527,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
		return false;
		}

		static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
		static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
		int sev, bool sync)
		{
		struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
		int flags = sync ? MF_ACTION_REQUIRED : 0;
		bool queued = false;
		int sec_sev, i;
		char *p;
		@@ -514,7 +555,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
		* and don't filter out 'corrected' error here.
		*/
		if (is_cache && has_pa) {
		queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
		queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
		p += err_info->length;
		continue;
		}
		@@ -626,7 +667,7 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
		schedule_work(&entry->work);
		}

		static bool ghes_do_proc(struct ghes *ghes,
		static void ghes_do_proc(struct ghes *ghes,
		const struct acpi_hest_generic_status *estatus)
		{
		int sev, sec_sev;
		@@ -635,6 +676,7 @@ static bool ghes_do_proc(struct ghes *ghes,
		const guid_t *fru_id = &guid_null;
		char *fru_text = "";
		bool queued = false;
		bool sync = is_hest_sync_notify(ghes);

		sev = ghes_severity(estatus->error_severity);
		apei_estatus_for_each_section(estatus, gdata) {
		@@ -652,13 +694,13 @@ static bool ghes_do_proc(struct ghes *ghes,
		ghes_edac_report_mem_error(sev, mem_err);

		arch_apei_report_mem_error(sev, mem_err);
		queued = ghes_handle_memory_failure(gdata, sev);
		queued = ghes_handle_memory_failure(gdata, sev, sync);
		}
		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
		ghes_handle_aer(gdata);
		}
		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
		queued = ghes_handle_arm_hw_error(gdata, sev);
		queued = ghes_handle_arm_hw_error(gdata, sev, sync);
		#ifdef CONFIG_ACPI_APEI_GHES_TS_CORE
		}
		else if (guid_equal(sec_type, &CPER_SEC_TS_CORE)) {
		@@ -681,7 +723,15 @@ static bool ghes_do_proc(struct ghes *ghes,
		#endif
		}

		return queued;
		/*
		* If no memory failure work is queued for abnormal synchronous
		* errors, do a force kill.
		*/
		if (sync && !queued) {
		pr_err(HW_ERR GHES_PFX "%s:%d: hardware memory corruption (SIGBUS)\n",
		current->comm, task_pid_nr(current));
		force_sig(SIGBUS);
		}
		}

		static void __ghes_print_estatus(const char *pfx,
		@@ -977,9 +1027,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
		struct ghes_estatus_node *estatus_node;
		struct acpi_hest_generic *generic;
		struct acpi_hest_generic_status *estatus;
		bool task_work_pending;
		u32 len, node_len;
		int ret;

		llnode = llist_del_all(&ghes_estatus_llist);
		/*
		@@ -994,25 +1042,16 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
		len = cper_estatus_len(estatus);
		node_len = GHES_ESTATUS_NODE_LEN(len);
		task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);

		ghes_do_proc(estatus_node->ghes, estatus);

		if (!ghes_estatus_cached(estatus)) {
		generic = estatus_node->generic;
		if (ghes_print_estatus(NULL, generic, estatus))
		ghes_estatus_cache_add(generic, estatus);
		}

		if (task_work_pending && current->mm) {
		estatus_node->task_work.func = ghes_kick_task_work;
		estatus_node->task_work_cpu = smp_processor_id();
		ret = task_work_add(current, &estatus_node->task_work,
		TWA_RESUME);
		if (ret)
		estatus_node->task_work.func = NULL;
		}

		if (!estatus_node->task_work.func)
		gen_pool_free(ghes_estatus_pool,
		(unsigned long)estatus_node, node_len);
		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
		node_len);

		llnode = next;
		}
		@@ -1073,7 +1112,6 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,

		estatus_node->ghes = ghes;
		estatus_node->generic = ghes->generic;
		estatus_node->task_work.func = NULL;
		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);

		if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {

include/acpi/ghes.h

+0 −3

Original line number	Diff line number	Diff line
		@@ -33,9 +33,6 @@ struct ghes_estatus_node {
		struct llist_node llnode;
		struct acpi_hest_generic *generic;
		struct ghes *ghes;

		int task_work_cpu;
		struct callback_head task_work;
		};

		struct ghes_estatus_cache {

include/linux/mm.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -3172,7 +3172,6 @@ enum mf_flags {
		};
		extern int memory_failure(unsigned long pfn, int flags);
		extern void memory_failure_queue(unsigned long pfn, int flags);
		extern void memory_failure_queue_kick(int cpu);
		extern int unpoison_memory(unsigned long pfn);
		extern int sysctl_memory_failure_early_kill;
		extern int sysctl_memory_failure_recovery;

mm/memory-failure.c

+6 −13

Original line number	Diff line number	Diff line
		@@ -1524,6 +1524,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
		*
		* Must run in process context (e.g. a work queue) with interrupts
		* enabled and no spinlocks hold.
		* Return:
		* 0 - success,
		* -ENXIO - memory not managed by the kernel
		* -EHWPOISON - the page was already poisoned, potentially
		* kill process,
		* other negative values - failure.
		*/
		int memory_failure(unsigned long pfn, int flags)
		{
		@@ -1792,19 +1798,6 @@ static void memory_failure_work_func(struct work_struct *work)
		}
		}

		/*
		* Process memory_failure work queued on the specified CPU.
		* Used to avoid return-to-userspace racing with the memory_failure workqueue.
		*/
		void memory_failure_queue_kick(int cpu)
		{
		struct memory_failure_cpu *mf_cpu;

		mf_cpu = &per_cpu(memory_failure_cpu, cpu);
		cancel_work_sync(&mf_cpu->work);
		memory_failure_work_func(&mf_cpu->work);
		}

		static int __init memory_failure_init(void)
		{
		struct memory_failure_cpu *mf_cpu;