Commit c4edb2ba authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files
KVM/arm64 updates for 5.20:

- Unwinder implementations for both nVHE modes (classic and
  protected), complete with an overflow stack

- Rework of the sysreg access from userspace, with a complete
  rewrite of the vgic-v3 view to allign with the rest of the
  infrastructure

- Disagregation of the vcpu flags in separate sets to better track
  their use model.

- A fix for the GICv2-on-v3 selftest

- A small set of cosmetic fixes
parents 63f4b210 0982c8d8
Loading
Loading
Loading
Loading
+16 −0
Original line number Original line Diff line number Diff line
@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
	unsigned long vtcr;
	unsigned long vtcr;
};
};


/*
 * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
 * hyp_panic() in non-protected mode.
 *
 * @stack_base:                 hyp VA of the hyp_stack base.
 * @overflow_stack_base:        hyp VA of the hyp_overflow_stack base.
 * @fp:                         hyp FP where the backtrace begins.
 * @pc:                         hyp PC where the backtrace begins.
 */
struct kvm_nvhe_stacktrace_info {
	unsigned long stack_base;
	unsigned long overflow_stack_base;
	unsigned long fp;
	unsigned long pc;
};

/* Translate a kernel address @ptr into its equivalent linear mapping */
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr)						\
#define kvm_ksym_ref(ptr)						\
	({								\
	({								\
+10 −1
Original line number Original line Diff line number Diff line
@@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,


static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
{
	vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
	WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
	vcpu_set_flag(vcpu, INCREMENT_PC);
}
}


#define kvm_pend_exception(v, e)					\
	do {								\
		WARN_ON(vcpu_get_flag((v), INCREMENT_PC));		\
		vcpu_set_flag((v), PENDING_EXCEPTION);			\
		vcpu_set_flag((v), e);					\
	} while (0)


static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
{
{
	return test_bit(feature, vcpu->arch.features);
	return test_bit(feature, vcpu->arch.features);
+148 −57
Original line number Original line Diff line number Diff line
@@ -325,8 +325,30 @@ struct kvm_vcpu_arch {
	/* Exception Information */
	/* Exception Information */
	struct kvm_vcpu_fault_info fault;
	struct kvm_vcpu_fault_info fault;


	/* Miscellaneous vcpu state flags */
	/* Ownership of the FP regs */
	u64 flags;
	enum {
		FP_STATE_FREE,
		FP_STATE_HOST_OWNED,
		FP_STATE_GUEST_OWNED,
	} fp_state;

	/* Configuration flags, set once and for all before the vcpu can run */
	u8 cflags;

	/* Input flags to the hypervisor code, potentially cleared after use */
	u8 iflags;

	/* State flags for kernel bookkeeping, unused by the hypervisor code */
	u8 sflags;

	/*
	 * Don't run the guest (internal implementation need).
	 *
	 * Contrary to the flags above, this is set/cleared outside of
	 * a vcpu context, and thus cannot be mixed with the flags
	 * themselves (or the flag accesses need to be made atomic).
	 */
	bool pause;


	/*
	/*
	 * We maintain more than a single set of debug registers to support
	 * We maintain more than a single set of debug registers to support
@@ -376,9 +398,6 @@ struct kvm_vcpu_arch {
	/* vcpu power state */
	/* vcpu power state */
	struct kvm_mp_state mp_state;
	struct kvm_mp_state mp_state;


	/* Don't run the guest (internal implementation need) */
	bool pause;

	/* Cache some mmu pages needed inside spinlock regions */
	/* Cache some mmu pages needed inside spinlock regions */
	struct kvm_mmu_memory_cache mmu_page_cache;
	struct kvm_mmu_memory_cache mmu_page_cache;


@@ -392,10 +411,6 @@ struct kvm_vcpu_arch {
	/* Additional reset state */
	/* Additional reset state */
	struct vcpu_reset_state	reset_state;
	struct vcpu_reset_state	reset_state;


	/* True when deferrable sysregs are loaded on the physical CPU,
	 * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
	bool sysregs_loaded_on_cpu;

	/* Guest PV state */
	/* Guest PV state */
	struct {
	struct {
		u64 last_steal;
		u64 last_steal;
@@ -403,6 +418,124 @@ struct kvm_vcpu_arch {
	} steal;
	} steal;
};
};


/*
 * Each 'flag' is composed of a comma-separated triplet:
 *
 * - the flag-set it belongs to in the vcpu->arch structure
 * - the value for that flag
 * - the mask for that flag
 *
 *  __vcpu_single_flag() builds such a triplet for a single-bit flag.
 * unpack_vcpu_flag() extract the flag value from the triplet for
 * direct use outside of the flag accessors.
 */
#define __vcpu_single_flag(_set, _f)	_set, (_f), (_f)

#define __unpack_flag(_set, _f, _m)	_f
#define unpack_vcpu_flag(...)		__unpack_flag(__VA_ARGS__)

#define __build_check_flag(v, flagset, f, m)			\
	do {							\
		typeof(v->arch.flagset) *_fset;			\
								\
		/* Check that the flags fit in the mask */	\
		BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m)));	\
		/* Check that the flags fit in the type */	\
		BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m));	\
	} while (0)

#define __vcpu_get_flag(v, flagset, f, m)			\
	({							\
		__build_check_flag(v, flagset, f, m);		\
								\
		v->arch.flagset & (m);				\
	})

#define __vcpu_set_flag(v, flagset, f, m)			\
	do {							\
		typeof(v->arch.flagset) *fset;			\
								\
		__build_check_flag(v, flagset, f, m);		\
								\
		fset = &v->arch.flagset;			\
		if (HWEIGHT(m) > 1)				\
			*fset &= ~(m);				\
		*fset |= (f);					\
	} while (0)

#define __vcpu_clear_flag(v, flagset, f, m)			\
	do {							\
		typeof(v->arch.flagset) *fset;			\
								\
		__build_check_flag(v, flagset, f, m);		\
								\
		fset = &v->arch.flagset;			\
		*fset &= ~(m);					\
	} while (0)

#define vcpu_get_flag(v, ...)	__vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...)	__vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...)	__vcpu_clear_flag((v), __VA_ARGS__)

/* SVE exposed to guest */
#define GUEST_HAS_SVE		__vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED	__vcpu_single_flag(cflags, BIT(1))
/* PTRAUTH exposed to guest */
#define GUEST_HAS_PTRAUTH	__vcpu_single_flag(cflags, BIT(2))

/* Exception pending */
#define PENDING_EXCEPTION	__vcpu_single_flag(iflags, BIT(0))
/*
 * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
 * be set together with an exception...
 */
#define INCREMENT_PC		__vcpu_single_flag(iflags, BIT(1))
/* Target EL/MODE (not a single flag, but let's abuse the macro) */
#define EXCEPT_MASK		__vcpu_single_flag(iflags, GENMASK(3, 1))

/* Helpers to encode exceptions with minimum fuss */
#define __EXCEPT_MASK_VAL	unpack_vcpu_flag(EXCEPT_MASK)
#define __EXCEPT_SHIFT		__builtin_ctzl(__EXCEPT_MASK_VAL)
#define __vcpu_except_flags(_f)	iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL

/*
 * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
 * values:
 *
 * For AArch32 EL1:
 */
#define EXCEPT_AA32_UND		__vcpu_except_flags(0)
#define EXCEPT_AA32_IABT	__vcpu_except_flags(1)
#define EXCEPT_AA32_DABT	__vcpu_except_flags(2)
/* For AArch64: */
#define EXCEPT_AA64_EL1_SYNC	__vcpu_except_flags(0)
#define EXCEPT_AA64_EL1_IRQ	__vcpu_except_flags(1)
#define EXCEPT_AA64_EL1_FIQ	__vcpu_except_flags(2)
#define EXCEPT_AA64_EL1_SERR	__vcpu_except_flags(3)
/* For AArch64 with NV (one day): */
#define EXCEPT_AA64_EL2_SYNC	__vcpu_except_flags(4)
#define EXCEPT_AA64_EL2_IRQ	__vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ	__vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR	__vcpu_except_flags(7)
/* Guest debug is live */
#define DEBUG_DIRTY		__vcpu_single_flag(iflags, BIT(4))
/* Save SPE context if active  */
#define DEBUG_STATE_SAVE_SPE	__vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active  */
#define DEBUG_STATE_SAVE_TRBE	__vcpu_single_flag(iflags, BIT(6))

/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED	__vcpu_single_flag(sflags, BIT(0))
/* SME enabled for EL0 */
#define HOST_SME_ENABLED	__vcpu_single_flag(sflags, BIT(1))
/* Physical CPU not in supported_cpus */
#define ON_UNSUPPORTED_CPU	__vcpu_single_flag(sflags, BIT(2))
/* WFIT instruction trapped */
#define IN_WFIT			__vcpu_single_flag(sflags, BIT(3))
/* vcpu system registers loaded on physical CPU */
#define SYSREGS_ON_CPU		__vcpu_single_flag(sflags, BIT(4))

/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
			     sve_ffr_offset((vcpu)->arch.sve_max_vl))
			     sve_ffr_offset((vcpu)->arch.sve_max_vl))
@@ -423,70 +556,31 @@ struct kvm_vcpu_arch {
	__size_ret;							\
	__size_ret;							\
})
})


/* vcpu_arch flags field values: */
#define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
#define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST		(1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_ENABLED	(1 << 4) /* SVE enabled for EL0 */
#define KVM_ARM64_GUEST_HAS_SVE		(1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED	(1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH	(1 << 7) /* PTRAUTH exposed to guest */
#define KVM_ARM64_PENDING_EXCEPTION	(1 << 8) /* Exception pending */
/*
 * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
 * set together with an exception...
 */
#define KVM_ARM64_INCREMENT_PC		(1 << 9) /* Increment PC */
#define KVM_ARM64_EXCEPT_MASK		(7 << 9) /* Target EL/MODE */
/*
 * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
 * take the following values:
 *
 * For AArch32 EL1:
 */
#define KVM_ARM64_EXCEPT_AA32_UND	(0 << 9)
#define KVM_ARM64_EXCEPT_AA32_IABT	(1 << 9)
#define KVM_ARM64_EXCEPT_AA32_DABT	(2 << 9)
/* For AArch64: */
#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC	(0 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ	(1 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ	(2 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_SERR	(3 << 9)
#define KVM_ARM64_EXCEPT_AA64_EL1	(0 << 11)
#define KVM_ARM64_EXCEPT_AA64_EL2	(1 << 11)

#define KVM_ARM64_DEBUG_STATE_SAVE_SPE	(1 << 12) /* Save SPE context if active  */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE	(1 << 13) /* Save TRBE context if active  */
#define KVM_ARM64_FP_FOREIGN_FPSTATE	(1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU	(1 << 15) /* Physical CPU not in supported_cpus */
#define KVM_ARM64_HOST_SME_ENABLED	(1 << 16) /* SME enabled for EL0 */
#define KVM_ARM64_WFIT			(1 << 17) /* WFIT instruction trapped */

#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
				 KVM_GUESTDBG_USE_SW_BP | \
				 KVM_GUESTDBG_USE_SW_BP | \
				 KVM_GUESTDBG_USE_HW | \
				 KVM_GUESTDBG_USE_HW | \
				 KVM_GUESTDBG_SINGLESTEP)
				 KVM_GUESTDBG_SINGLESTEP)


#define vcpu_has_sve(vcpu) (system_supports_sve() &&			\
#define vcpu_has_sve(vcpu) (system_supports_sve() &&			\
			    ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
			    vcpu_get_flag(vcpu, GUEST_HAS_SVE))


#ifdef CONFIG_ARM64_PTR_AUTH
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu)						\
#define vcpu_has_ptrauth(vcpu)						\
	((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||		\
	((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||		\
	  cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&		\
	  cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&		\
	 (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
	  vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
#else
#else
#define vcpu_has_ptrauth(vcpu)		false
#define vcpu_has_ptrauth(vcpu)		false
#endif
#endif


#define vcpu_on_unsupported_cpu(vcpu)					\
#define vcpu_on_unsupported_cpu(vcpu)					\
	((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
	vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)


#define vcpu_set_on_unsupported_cpu(vcpu)				\
#define vcpu_set_on_unsupported_cpu(vcpu)				\
	((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
	vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)


#define vcpu_clear_on_unsupported_cpu(vcpu)				\
#define vcpu_clear_on_unsupported_cpu(vcpu)				\
	((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
	vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)


#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.regs)
#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.regs)


@@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);


unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);


int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
			      struct kvm_vcpu_events *events);
			      struct kvm_vcpu_events *events);
@@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);


#define kvm_arm_vcpu_sve_finalized(vcpu) \
#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
	((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)


#define kvm_has_mte(kvm)					\
#define kvm_has_mte(kvm)					\
	(system_supports_mte() &&				\
	(system_supports_mte() &&				\
+8 −0
Original line number Original line Diff line number Diff line
@@ -113,6 +113,14 @@


#define OVERFLOW_STACK_SIZE	SZ_4K
#define OVERFLOW_STACK_SIZE	SZ_4K


/*
 * With the minimum frame size of [x29, x30], exactly half the combined
 * sizes of the hyp and overflow stacks is the maximum size needed to
 * save the unwinded stacktrace; plus an additional entry to delimit the
 * end.
 */
#define NVHE_STACKTRACE_SIZE	((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))

/*
/*
 * Alignment of kernel segments (e.g. .text, .data).
 * Alignment of kernel segments (e.g. .text, .data).
 *
 *
+2 −60
Original line number Original line Diff line number Diff line
@@ -8,52 +8,20 @@
#include <linux/percpu.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h>
#include <linux/llist.h>


#include <asm/memory.h>
#include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
#include <asm/sdei.h>


enum stack_type {
#include <asm/stacktrace/common.h>
	STACK_TYPE_UNKNOWN,
	STACK_TYPE_TASK,
	STACK_TYPE_IRQ,
	STACK_TYPE_OVERFLOW,
	STACK_TYPE_SDEI_NORMAL,
	STACK_TYPE_SDEI_CRITICAL,
	__NR_STACK_TYPES
};

struct stack_info {
	unsigned long low;
	unsigned long high;
	enum stack_type type;
};


extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
			   const char *loglvl);
			   const char *loglvl);


DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);


static inline bool on_stack(unsigned long sp, unsigned long size,
			    unsigned long low, unsigned long high,
			    enum stack_type type, struct stack_info *info)
{
	if (!low)
		return false;

	if (sp < low || sp + size < sp || sp + size > high)
		return false;

	if (info) {
		info->low = low;
		info->high = high;
		info->type = type;
	}
	return true;
}

static inline bool on_irq_stack(unsigned long sp, unsigned long size,
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
				struct stack_info *info)
				struct stack_info *info)
{
{
@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
			struct stack_info *info) { return false; }
			struct stack_info *info) { return false; }
#endif
#endif



/*
 * We can only safely access per-cpu stacks from current in a non-preemptible
 * context.
 */
static inline bool on_accessible_stack(const struct task_struct *tsk,
				       unsigned long sp, unsigned long size,
				       struct stack_info *info)
{
	if (info)
		info->type = STACK_TYPE_UNKNOWN;

	if (on_task_stack(tsk, sp, size, info))
		return true;
	if (tsk != current || preemptible())
		return false;
	if (on_irq_stack(sp, size, info))
		return true;
	if (on_overflow_stack(sp, size, info))
		return true;
	if (on_sdei_stack(sp, size, info))
		return true;

	return false;
}

#endif	/* __ASM_STACKTRACE_H */
#endif	/* __ASM_STACKTRACE_H */
Loading