Commit be1a5408 authored by Joerg Roedel's avatar Joerg Roedel Committed by Borislav Petkov
Browse files

x86/sev: Split up runtime #VC handler for correct state tracking



Split up the #VC handler code into a from-user and a from-kernel part.
This allows clean and correct state tracking, as the #VC handler needs
to enter NMI-state when raised from kernel mode and plain IRQ state when
raised from user-mode.

Fixes: 62441a1f ("x86/sev-es: Correctly track IRQ states in runtime #VC handler")
Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org
parent d187f217
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)

	movq	%rsp, %rdi		/* pt_regs pointer */

	call	\cfunc
	call	kernel_\cfunc

	/*
	 * No need to switch back to the IST stack. The current stack is either
@@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)

	/* Switch to the regular task stack */
.Lfrom_usermode_switch_stack_\@:
	idtentry_body safe_stack_\cfunc, has_error_code=1
	idtentry_body user_\cfunc, has_error_code=1

_ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
+10 −19
Original line number Diff line number Diff line
@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
 */
#define DECLARE_IDTENTRY_VC(vector, func)				\
	DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func);			\
	__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code);	\
	__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
	__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code);	\
	__visible noinstr void   user_##func(struct pt_regs *regs, unsigned long error_code)

/**
 * DEFINE_IDTENTRY_IST - Emit code for IST entry points
@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
	DEFINE_IDTENTRY_RAW_ERRORCODE(func)

/**
 * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
				   which runs on a safe stack.
 * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
			       when raised from kernel mode
 * @func:	Function name of the entry point
 *
 * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
 */
#define DEFINE_IDTENTRY_VC_SAFE_STACK(func)				\
	DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
#define DEFINE_IDTENTRY_VC_KERNEL(func)				\
	DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)

/**
 * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
			    which runs on the VC fall-back stack
 * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
			     when raised from user mode
 * @func:	Function name of the entry point
 *
 * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
 */
#define DEFINE_IDTENTRY_VC_IST(func)				\
	DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)

/**
 * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
 * @func:	Function name of the entry point
 *
 * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
 */
#define DEFINE_IDTENTRY_VC(func)					\
	DEFINE_IDTENTRY_RAW_ERRORCODE(func)
#define DEFINE_IDTENTRY_VC_USER(func)				\
	DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)

#else	/* CONFIG_X86_64 */

+79 −69
Original line number Diff line number Diff line
@@ -793,7 +793,7 @@ void __init sev_es_init_vc_handling(void)
	sev_es_setup_play_dead();

	/* Secondary CPUs use the runtime #VC handler */
	initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
	initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
}

static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
@@ -1231,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
	return ES_EXCEPTION;
}

static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
{
	if (user_mode(regs))
		noist_exc_debug(regs);
	else
		exc_debug(regs);
}

static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
					 struct ghcb *ghcb,
					 unsigned long exit_code)
@@ -1334,41 +1326,13 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
}

/*
 * Main #VC exception handler. It is called when the entry code was able to
 * switch off the IST to a safe kernel stack.
 *
 * With the current implementation it is always possible to switch to a safe
 * stack because #VC exceptions only happen at known places, like intercepted
 * instructions or accesses to MMIO areas/IO ports. They can also happen with
 * code instrumentation when the hypervisor intercepts #DB, but the critical
 * paths are forbidden to be instrumented, so #DB exceptions currently also
 * only happen in safe places.
 */
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
{
	irqentry_state_t irq_state;
	struct ghcb_state state;
	struct es_em_ctxt ctxt;
	enum es_result result;
	struct ghcb *ghcb;

	/*
	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
	 */
	if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
		vc_handle_trap_db(regs);
		return;
	}

	irq_state = irqentry_nmi_enter(regs);
	instrumentation_begin();

	/*
	 * This is invoked through an interrupt gate, so IRQs are disabled. The
	 * code below might walk page-tables for user or kernel addresses, so
	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
	 */
	bool ret = true;

	ghcb = __sev_get_ghcb(&state);

@@ -1388,15 +1352,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
	case ES_UNSUPPORTED:
		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
				   error_code, regs->ip);
		goto fail;
		ret = false;
		break;
	case ES_VMM_ERROR:
		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
				   error_code, regs->ip);
		goto fail;
		ret = false;
		break;
	case ES_DECODE_FAILED:
		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
				   error_code, regs->ip);
		goto fail;
		ret = false;
		break;
	case ES_EXCEPTION:
		vc_forward_exception(&ctxt);
		break;
@@ -1412,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		BUG();
	}

out:
	instrumentation_end();
	irqentry_nmi_exit(regs, irq_state);
	return ret;
}

	return;
static __always_inline bool vc_is_db(unsigned long error_code)
{
	return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}

fail:
	if (user_mode(regs)) {
/*
		 * Do not kill the machine if user-space triggered the
		 * exception. Send SIGBUS instead and let user-space deal with
		 * it.
 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
 * and will panic when an error happens.
 */
		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
	} else {
		pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
			 result);
DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
{
	irqentry_state_t irq_state;

	/*
	 * With the current implementation it is always possible to switch to a
	 * safe stack because #VC exceptions only happen at known places, like
	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
	 * also happen with code instrumentation when the hypervisor intercepts
	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
	 * exceptions currently also only happen in safe places.
	 *
	 * But keep this here in case the noinstr annotations are violated due
	 * to bug elsewhere.
	 */
	if (unlikely(on_vc_fallback_stack(regs))) {
		instrumentation_begin();
		panic("Can't handle #VC exception from unsupported context\n");
		instrumentation_end();
	}

	/*
	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
	 */
	if (vc_is_db(error_code)) {
		exc_debug(regs);
		return;
	}

	irq_state = irqentry_nmi_enter(regs);

	instrumentation_begin();

	if (!vc_raw_handle_exception(regs, error_code)) {
		/* Show some debug info */
		show_regs(regs);

@@ -1440,23 +1435,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		panic("Returned from Terminate-Request to Hypervisor\n");
	}

	goto out;
	instrumentation_end();
	irqentry_nmi_exit(regs, irq_state);
}

/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
/*
 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
 * and will kill the current task with SIGBUS when an error happens.
 */
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
{
	/*
	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
	 */
	if (vc_is_db(error_code)) {
		noist_exc_debug(regs);
		return;
	}

	irqentry_enter_from_user_mode(regs);
	instrumentation_begin();
	panic("Can't handle #VC exception from unsupported context\n");
	instrumentation_end();

	if (!vc_raw_handle_exception(regs, error_code)) {
		/*
		 * Do not kill the machine if user-space triggered the
		 * exception. Send SIGBUS instead and let user-space deal with
		 * it.
		 */
		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
	}

DEFINE_IDTENTRY_VC(exc_vmm_communication)
{
	if (likely(!on_vc_fallback_stack(regs)))
		safe_stack_exc_vmm_communication(regs, error_code);
	else
		ist_exc_vmm_communication(regs, error_code);
	instrumentation_end();
	irqentry_exit_to_user_mode(regs);
}

bool __init handle_vc_boot_ghcb(struct pt_regs *regs)