Commit 55793a96 authored by Valentin Schneider's avatar Valentin Schneider Committed by sanglipeng
Browse files

panic, kexec: make __crash_kexec() NMI safe

stable inclusion
from stable-v5.10.178
commit 56314b90fd43bd2444942bc14a7c5c768ce8ec57
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I8ALH3

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=56314b90fd43bd2444942bc14a7c5c768ce8ec57

--------------------------------

commit 05c62574 upstream.

Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI
panic() doesn't work.  The cause of that lies in the PREEMPT_RT definition
of mutex_trylock():

	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
		return 0;

This prevents an nmi_panic() from executing the main body of
__crash_kexec() which does the actual kexec into the kdump kernel.  The
warning and return are explained by:

  6ce47fd9 ("rtmutex: Warn if trylock is called from hard/softirq context")
  [...]
  The reasons for this are:

      1) There is a potential deadlock in the slowpath

      2) Another cpu which blocks on the rtmutex will boost the task
	 which allegedly locked the rtmutex, but that cannot work
	 because the hard/softirq context borrows the task context.

Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex
and replace it with an atomic variable.  This is somewhat overzealous as
*some* callsites could keep using a mutex (e.g.  the sysfs-facing ones
like crash_shrink_memory()), but this has the benefit of involving a
single unified lock and preventing any future NMI-related surprises.

Tested by triggering NMI panics via:

  $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
  $ echo 1 > /proc/sys/kernel/unknown_nmi_panic
  $ echo 1 > /proc/sys/kernel/panic

  $ ipmitool power diag

Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com


Fixes: 6ce47fd9 ("rtmutex: Warn if trylock is called from hard/softirq context")
Signed-off-by: default avatarValentin Schneider <vschneid@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Juri Lelli <jlelli@redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarWen Yang <wenyang.linux@foxmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarsanglipeng <sanglipeng1@jd.com>
parent 3c147e8b
Loading
Loading
Loading
Loading
+4 −7
Original line number Diff line number Diff line
@@ -122,13 +122,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,

	/*
	 * Because we write directly to the reserved memory region when loading
	 * crash kernels we need a mutex here to prevent multiple crash kernels
	 * from attempting to load simultaneously, and to prevent a crash kernel
	 * from loading over the top of a in use crash kernel.
	 *
	 * KISS: always take the mutex.
	 * crash kernels we need a serialization here to prevent multiple crash
	 * kernels from attempting to load simultaneously.
	 */
	if (!mutex_trylock(&kexec_mutex))
	if (!kexec_trylock())
		return -EBUSY;

	if (flags & KEXEC_ON_CRASH) {
@@ -194,7 +191,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,

	kimage_free(image);
out_unlock:
	mutex_unlock(&kexec_mutex);
	kexec_unlock();
	return ret;
}

+10 −10
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@
#include <crypto/hash.h>
#include "kexec_internal.h"

DEFINE_MUTEX(kexec_mutex);
atomic_t __kexec_lock = ATOMIC_INIT(0);

/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -949,7 +949,7 @@ int kexec_load_disabled;
 */
void __noclone __crash_kexec(struct pt_regs *regs)
{
	/* Take the kexec_mutex here to prevent sys_kexec_load
	/* Take the kexec_lock here to prevent sys_kexec_load
	 * running on one cpu from replacing the crash kernel
	 * we are using after a panic on a different cpu.
	 *
@@ -957,7 +957,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
	 * of memory the xchg(&kexec_crash_image) would be
	 * sufficient.  But since I reuse the memory...
	 */
	if (mutex_trylock(&kexec_mutex)) {
	if (kexec_trylock()) {
		if (kexec_crash_image) {
			struct pt_regs fixed_regs;

@@ -966,7 +966,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
			machine_crash_shutdown(&fixed_regs);
			machine_kexec(kexec_crash_image);
		}
		mutex_unlock(&kexec_mutex);
		kexec_unlock();
	}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -999,13 +999,13 @@ ssize_t crash_get_memory_size(void)
{
	ssize_t size = 0;

	if (!mutex_trylock(&kexec_mutex))
	if (!kexec_trylock())
		return -EBUSY;

	if (crashk_res.end != crashk_res.start)
		size = resource_size(&crashk_res);

	mutex_unlock(&kexec_mutex);
	kexec_unlock();
	return size;
}

@@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size)
	unsigned long old_size;
	struct resource *ram_res;

	if (!mutex_trylock(&kexec_mutex))
	if (!kexec_trylock())
		return -EBUSY;

	if (kexec_crash_image) {
@@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
	insert_resource(&iomem_resource, ram_res);

unlock:
	mutex_unlock(&kexec_mutex);
	kexec_unlock();
	return ret;
}

@@ -1136,7 +1136,7 @@ int kernel_kexec(void)
{
	int error = 0;

	if (!mutex_trylock(&kexec_mutex))
	if (!kexec_trylock())
		return -EBUSY;
	if (!kexec_image) {
		error = -EINVAL;
@@ -1211,7 +1211,7 @@ int kernel_kexec(void)
#endif

 Unlock:
	mutex_unlock(&kexec_mutex);
	kexec_unlock();
	return error;
}

+2 −2
Original line number Diff line number Diff line
@@ -377,7 +377,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,

	image = NULL;

	if (!mutex_trylock(&kexec_mutex))
	if (!kexec_trylock())
		return -EBUSY;

	dest_image = &kexec_image;
@@ -449,7 +449,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
	if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
		arch_kexec_protect_crashkres();

	mutex_unlock(&kexec_mutex);
	kexec_unlock();
	kimage_free(image);
	return ret;
}
+14 −1
Original line number Diff line number Diff line
@@ -15,7 +15,20 @@ int kimage_is_destination_range(struct kimage *image,

int machine_kexec_post_load(struct kimage *image);

extern struct mutex kexec_mutex;
/*
 * Whatever is used to serialize accesses to the kexec_crash_image needs to be
 * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
 * "simple" atomic variable that is acquired with a cmpxchg().
 */
extern atomic_t __kexec_lock;
static inline bool kexec_trylock(void)
{
	return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
}
static inline void kexec_unlock(void)
{
	atomic_set_release(&__kexec_lock, 0);
}

#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>