Commit 0982c8d8 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/nvhe-stacktrace into kvmarm-master/next



* kvm-arm64/nvhe-stacktrace: (27 commits)
  : .
  : Add an overflow stack to the nVHE EL2 code, allowing
  : the implementation of an unwinder, courtesy of
  : Kalesh Singh. From the cover letter (slightly edited):
  :
  : "nVHE has two modes of operation: protected (pKVM) and unprotected
  : (conventional nVHE). Depending on the mode, a slightly different approach
  : is used to dump the hypervisor stacktrace but the core unwinding logic
  : remains the same.
  :
  : * Protected nVHE (pKVM) stacktraces:
  :
  : In protected nVHE mode, the host cannot directly access hypervisor memory.
  :
  : The hypervisor stack unwinding happens in EL2 and is made accessible to
  : the host via a shared buffer. Symbolizing and printing the stacktrace
  : addresses is delegated to the host and happens in EL1.
  :
  : * Non-protected (Conventional) nVHE stacktraces:
  :
  : In non-protected mode, the host is able to directly access the hypervisor
  : stack pages.
  :
  : The hypervisor stack unwinding and dumping of the stacktrace is performed
  : by the host in EL1, as this avoids the memory overhead of setting up
  : shared buffers between the host and hypervisor."
  :
  : Additional patches from Oliver Upton and Marc Zyngier, tidying up
  : the initial series.
  : .
  arm64: Update 'unwinder howto'
  KVM: arm64: Don't open code ARRAY_SIZE()
  KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c
  KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions
  KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit
  KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around
  KVM: arm64: Introduce pkvm_dump_backtrace()
  KVM: arm64: Implement protected nVHE hyp stack unwinder
  KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace
  KVM: arm64: Stub implementation of pKVM HYP stack unwinder
  KVM: arm64: Allocate shared pKVM hyp stacktrace buffers
  KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig
  KVM: arm64: Introduce hyp_dump_backtrace()
  KVM: arm64: Implement non-protected nVHE hyp stack unwinder
  KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace
  KVM: arm64: Stub implementation of non-protected nVHE HYP stack unwinder
  KVM: arm64: On stack overflow switch to hyp overflow_stack
  arm64: stacktrace: Add description of stacktrace/common.h
  arm64: stacktrace: Factor out common unwind()
  arm64: stacktrace: Handle frame pointer from different address spaces
  ...

Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents ae98a4a9 a4c750e2
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
	unsigned long vtcr;
};

/*
 * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
 * hyp_panic() in non-protected mode.
 *
 * @stack_base:                 hyp VA of the hyp_stack base.
 * @overflow_stack_base:        hyp VA of the hyp_overflow_stack base.
 * @fp:                         hyp FP where the backtrace begins.
 * @pc:                         hyp PC where the backtrace begins.
 */
struct kvm_nvhe_stacktrace_info {
	unsigned long stack_base;
	unsigned long overflow_stack_base;
	unsigned long fp;
	unsigned long pc;
};

/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr)						\
	({								\
+8 −0
Original line number Diff line number Diff line
@@ -113,6 +113,14 @@

#define OVERFLOW_STACK_SIZE	SZ_4K

/*
 * With the minimum frame size of [x29, x30], exactly half the combined
 * sizes of the hyp and overflow stacks is the maximum size needed to
 * save the unwinded stacktrace; plus an additional entry to delimit the
 * end.
 */
#define NVHE_STACKTRACE_SIZE	((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))

/*
 * Alignment of kernel segments (e.g. .text, .data).
 *
+2 −60
Original line number Diff line number Diff line
@@ -8,52 +8,20 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h>

#include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>

enum stack_type {
	STACK_TYPE_UNKNOWN,
	STACK_TYPE_TASK,
	STACK_TYPE_IRQ,
	STACK_TYPE_OVERFLOW,
	STACK_TYPE_SDEI_NORMAL,
	STACK_TYPE_SDEI_CRITICAL,
	__NR_STACK_TYPES
};

struct stack_info {
	unsigned long low;
	unsigned long high;
	enum stack_type type;
};
#include <asm/stacktrace/common.h>

extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
			   const char *loglvl);

DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

static inline bool on_stack(unsigned long sp, unsigned long size,
			    unsigned long low, unsigned long high,
			    enum stack_type type, struct stack_info *info)
{
	if (!low)
		return false;

	if (sp < low || sp + size < sp || sp + size > high)
		return false;

	if (info) {
		info->low = low;
		info->high = high;
		info->type = type;
	}
	return true;
}

static inline bool on_irq_stack(unsigned long sp, unsigned long size,
				struct stack_info *info)
{
@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
			struct stack_info *info) { return false; }
#endif


/*
 * We can only safely access per-cpu stacks from current in a non-preemptible
 * context.
 */
static inline bool on_accessible_stack(const struct task_struct *tsk,
				       unsigned long sp, unsigned long size,
				       struct stack_info *info)
{
	if (info)
		info->type = STACK_TYPE_UNKNOWN;

	if (on_task_stack(tsk, sp, size, info))
		return true;
	if (tsk != current || preemptible())
		return false;
	if (on_irq_stack(sp, size, info))
		return true;
	if (on_overflow_stack(sp, size, info))
		return true;
	if (on_sdei_stack(sp, size, info))
		return true;

	return false;
}

#endif	/* __ASM_STACKTRACE_H */
+199 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Common arm64 stack unwinder code.
 *
 * To implement a new arm64 stack unwinder:
 *     1) Include this header
 *
 *     2) Call into unwind_next_common() from your top level unwind
 *        function, passing it the validation and translation callbacks
 *        (though the later can be NULL if no translation is required).
 *
 * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
 *
 * Copyright (C) 2012 ARM Ltd.
 */
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H

#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/kprobes.h>
#include <linux/types.h>

enum stack_type {
	STACK_TYPE_UNKNOWN,
	STACK_TYPE_TASK,
	STACK_TYPE_IRQ,
	STACK_TYPE_OVERFLOW,
	STACK_TYPE_SDEI_NORMAL,
	STACK_TYPE_SDEI_CRITICAL,
	STACK_TYPE_HYP,
	__NR_STACK_TYPES
};

struct stack_info {
	unsigned long low;
	unsigned long high;
	enum stack_type type;
};

/*
 * A snapshot of a frame record or fp/lr register values, along with some
 * accounting information necessary for robust unwinding.
 *
 * @fp:          The fp value in the frame record (or the real fp)
 * @pc:          The lr value in the frame record (or the real lr)
 *
 * @stacks_done: Stacks which have been entirely unwound, for which it is no
 *               longer valid to unwind to.
 *
 * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
 *               of 0. This is used to ensure that within a stack, each
 *               subsequent frame record is at an increasing address.
 * @prev_type:   The type of stack this frame record was on, or a synthetic
 *               value of STACK_TYPE_UNKNOWN. This is used to detect a
 *               transition from one stack to another.
 *
 * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
 *               associated with the most recently encountered replacement lr
 *               value.
 *
 * @task:        The task being unwound.
 */
struct unwind_state {
	unsigned long fp;
	unsigned long pc;
	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
	unsigned long prev_fp;
	enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
	struct llist_node *kr_cur;
#endif
	struct task_struct *task;
};

static inline bool on_stack(unsigned long sp, unsigned long size,
			    unsigned long low, unsigned long high,
			    enum stack_type type, struct stack_info *info)
{
	if (!low)
		return false;

	if (sp < low || sp + size < sp || sp + size > high)
		return false;

	if (info) {
		info->low = low;
		info->high = high;
		info->type = type;
	}
	return true;
}

static inline void unwind_init_common(struct unwind_state *state,
				      struct task_struct *task)
{
	state->task = task;
#ifdef CONFIG_KRETPROBES
	state->kr_cur = NULL;
#endif

	/*
	 * Prime the first unwind.
	 *
	 * In unwind_next() we'll check that the FP points to a valid stack,
	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
	 * treated as a transition to whichever stack that happens to be. The
	 * prev_fp value won't be used, but we set it to 0 such that it is
	 * definitely not an accessible stack address.
	 */
	bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
	state->prev_fp = 0;
	state->prev_type = STACK_TYPE_UNKNOWN;
}

/*
 * stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
 * a kernel address.
 *
 * @fp:   the frame pointer to be updated to its kernel address.
 * @type: the stack type associated with frame pointer @fp
 *
 * Returns true and success and @fp is updated to the corresponding
 * kernel virtual address; otherwise returns false.
 */
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
					    enum stack_type type);

/*
 * on_accessible_stack_fn() - Check whether a stack range is on any
 * of the possible stacks.
 *
 * @tsk:  task whose stack is being unwound
 * @sp:   stack address being checked
 * @size: size of the stack range being checked
 * @info: stack unwinding context
 */
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
				       unsigned long sp, unsigned long size,
				       struct stack_info *info);

static inline int unwind_next_common(struct unwind_state *state,
				     struct stack_info *info,
				     on_accessible_stack_fn accessible,
				     stack_trace_translate_fp_fn translate_fp)
{
	unsigned long fp = state->fp, kern_fp = fp;
	struct task_struct *tsk = state->task;

	if (fp & 0x7)
		return -EINVAL;

	if (!accessible(tsk, fp, 16, info))
		return -EINVAL;

	if (test_bit(info->type, state->stacks_done))
		return -EINVAL;

	/*
	 * If fp is not from the current address space perform the necessary
	 * translation before dereferencing it to get the next fp.
	 */
	if (translate_fp && !translate_fp(&kern_fp, info->type))
		return -EINVAL;

	/*
	 * As stacks grow downward, any valid record on the same stack must be
	 * at a strictly higher address than the prior record.
	 *
	 * Stacks can nest in several valid orders, e.g.
	 *
	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
	 * HYP -> OVERFLOW
	 *
	 * ... but the nesting itself is strict. Once we transition from one
	 * stack to another, it's never valid to unwind back to that first
	 * stack.
	 */
	if (info->type == state->prev_type) {
		if (fp <= state->prev_fp)
			return -EINVAL;
	} else {
		__set_bit(state->prev_type, state->stacks_done);
	}

	/*
	 * Record this frame record's values and location. The prev_fp and
	 * prev_type are only meaningful to the next unwind_next() invocation.
	 */
	state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
	state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
	state->prev_fp = fp;
	state->prev_type = info->type;

	return 0;
}

#endif	/* __ASM_STACKTRACE_COMMON_H */
+55 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * KVM nVHE hypervisor stack tracing support.
 *
 * The unwinder implementation depends on the nVHE mode:
 *
 *   1) Non-protected nVHE mode - the host can directly access the
 *      HYP stack pages and unwind the HYP stack in EL1. This saves having
 *      to allocate shared buffers for the host to read the unwinded
 *      stacktrace.
 *
 *   2) pKVM (protected nVHE) mode - the host cannot directly access
 *      the HYP memory. The stack is unwinded in EL2 and dumped to a shared
 *      buffer where the host can read and print the stacktrace.
 *
 * Copyright (C) 2022 Google LLC
 */
#ifndef __ASM_STACKTRACE_NVHE_H
#define __ASM_STACKTRACE_NVHE_H

#include <asm/stacktrace/common.h>

/*
 * kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
 *
 * @state : unwind_state to initialize
 * @fp    : frame pointer at which to start the unwinding.
 * @pc    : program counter at which to start the unwinding.
 */
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
					unsigned long fp,
					unsigned long pc)
{
	unwind_init_common(state, NULL);

	state->fp = fp;
	state->pc = pc;
}

#ifndef __KVM_NVHE_HYPERVISOR__
/*
 * Conventional (non-protected) nVHE HYP stack unwinder
 *
 * In non-protected mode, the unwinding is done from kernel proper context
 * (by the host in EL1).
 */

DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);

void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);

#endif	/* __KVM_NVHE_HYPERVISOR__ */
#endif	/* __ASM_STACKTRACE_NVHE_H */
Loading