Merge tag 'x86_sev_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (d04f7de0) · Commits · EulixOS / Software / Kernel

arch/x86/entry/entry_64.S

+2 −2

Original line number	Diff line number	Diff line
		@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)

		movq %rsp, %rdi /* pt_regs pointer */

		call \cfunc
		call kernel_\cfunc

		/*
		* No need to switch back to the IST stack. The current stack is either
		@@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)

		/* Switch to the regular task stack */
		.Lfrom_usermode_switch_stack_\@:
		idtentry_body safe_stack_\cfunc, has_error_code=1
		idtentry_body user_\cfunc, has_error_code=1

		_ASM_NOKPROBE(\asmsym)
		SYM_CODE_END(\asmsym)

arch/x86/include/asm/idtentry.h

+10 −19

Original line number	Diff line number	Diff line
		@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
		*/
		#define DECLARE_IDTENTRY_VC(vector, func) \
		DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
		__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
		__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
		__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
		__visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)

		/**
		* DEFINE_IDTENTRY_IST - Emit code for IST entry points
		@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
		DEFINE_IDTENTRY_RAW_ERRORCODE(func)

		/**
		* DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
		which runs on a safe stack.
		* DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
		when raised from kernel mode
		* @func: Function name of the entry point
		*
		* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
		*/
		#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
		DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
		#define DEFINE_IDTENTRY_VC_KERNEL(func) \
		DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)

		/**
		* DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
		which runs on the VC fall-back stack
		* DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
		when raised from user mode
		* @func: Function name of the entry point
		*
		* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
		*/
		#define DEFINE_IDTENTRY_VC_IST(func) \
		DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)

		/**
		* DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
		* @func: Function name of the entry point
		*
		* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
		*/
		#define DEFINE_IDTENTRY_VC(func) \
		DEFINE_IDTENTRY_RAW_ERRORCODE(func)
		#define DEFINE_IDTENTRY_VC_USER(func) \
		DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)

		#else /* CONFIG_X86_64 */

arch/x86/include/asm/sev-common.h

+15 −1

Original line number	Diff line number	Diff line
		@@ -9,8 +9,13 @@
		#define __ASM_X86_SEV_COMMON_H

		#define GHCB_MSR_INFO_POS 0
		#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
		#define GHCB_DATA_LOW 12
		#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1)

		#define GHCB_DATA(v) \
		(((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)

		/* SEV Information Request/Response */
		#define GHCB_MSR_SEV_INFO_RESP 0x001
		#define GHCB_MSR_SEV_INFO_REQ 0x002
		#define GHCB_MSR_VER_MAX_POS 48
		@@ -28,6 +33,7 @@
		#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
		#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)

		/* CPUID Request/Response */
		#define GHCB_MSR_CPUID_REQ 0x004
		#define GHCB_MSR_CPUID_RESP 0x005
		#define GHCB_MSR_CPUID_FUNC_POS 32
		@@ -45,6 +51,14 @@
		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) \| \
		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))

		/* AP Reset Hold */
		#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
		#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007

		/* GHCB Hypervisor Feature Request/Response */
		#define GHCB_MSR_HV_FT_REQ 0x080
		#define GHCB_MSR_HV_FT_RESP 0x081

		#define GHCB_MSR_TERM_REQ 0x100
		#define GHCB_MSR_TERM_REASON_SET_POS 12
		#define GHCB_MSR_TERM_REASON_SET_MASK 0xf

arch/x86/kernel/sev.c

+114 −87

Original line number	Diff line number	Diff line
		@@ -7,12 +7,11 @@
		* Author: Joerg Roedel <jroedel@suse.de>
		*/

		#define pr_fmt(fmt) "SEV-ES: " fmt
		#define pr_fmt(fmt) "SEV: " fmt

		#include <linux/sched/debug.h> /* For show_regs() */
		#include <linux/percpu-defs.h>
		#include <linux/mem_encrypt.h>
		#include <linux/lockdep.h>
		#include <linux/printk.h>
		#include <linux/mm_types.h>
		#include <linux/set_memory.h>
		@@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void)
		this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], (unsigned long )ist);
		}

		static __always_inline struct ghcb sev_es_get_ghcb(struct ghcb_state state)
		/*
		* Nothing shall interrupt this code path while holding the per-CPU
		* GHCB. The backup GHCB is only for NMIs interrupting this path.
		*
		* Callers must disable local interrupts around it.
		*/
		static noinstr struct ghcb __sev_get_ghcb(struct ghcb_state state)
		{
		struct sev_es_runtime_data *data;
		struct ghcb *ghcb;

		WARN_ON(!irqs_disabled());

		data = this_cpu_read(runtime_data);
		ghcb = &data->ghcb_page;

		@@ -213,7 +220,9 @@ static __always_inline struct ghcb sev_es_get_ghcb(struct ghcb_state state)
		data->ghcb_active = false;
		data->backup_ghcb_active = false;

		instrumentation_begin();
		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
		instrumentation_end();
		}

		/* Mark backup_ghcb active before writing to it */
		@@ -258,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
		static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
		{
		char buffer[MAX_INSN_SIZE];
		int res;
		int insn_bytes;

		res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
		if (!res) {
		insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
		if (insn_bytes == 0) {
		/* Nothing could be copied */
		ctxt->fi.vector = X86_TRAP_PF;
		ctxt->fi.error_code = X86_PF_INSTR \| X86_PF_USER;
		ctxt->fi.cr2 = ctxt->regs->ip;
		return ES_EXCEPTION;
		} else if (insn_bytes == -EINVAL) {
		/* Effective RIP could not be calculated */
		ctxt->fi.vector = X86_TRAP_GP;
		ctxt->fi.error_code = 0;
		ctxt->fi.cr2 = 0;
		return ES_EXCEPTION;
		}

		if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
		if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
		return ES_DECODE_FAILED;

		if (ctxt->insn.immediate.got)
		@@ -479,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
		/* Include code shared with pre-decompression boot stage */
		#include "sev-shared.c"

		static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
		static noinstr void __sev_put_ghcb(struct ghcb_state *state)
		{
		struct sev_es_runtime_data *data;
		struct ghcb *ghcb;

		WARN_ON(!irqs_disabled());

		data = this_cpu_read(runtime_data);
		ghcb = &data->ghcb_page;

		@@ -507,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void)
		struct ghcb_state state;
		struct ghcb *ghcb;

		ghcb = sev_es_get_ghcb(&state);
		ghcb = __sev_get_ghcb(&state);

		vc_ghcb_invalidate(ghcb);
		ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
		@@ -517,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void)
		sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
		VMGEXIT();

		sev_es_put_ghcb(&state);
		__sev_put_ghcb(&state);
		}

		static u64 get_jump_table_addr(void)
		@@ -529,7 +547,7 @@ static u64 get_jump_table_addr(void)

		local_irq_save(flags);

		ghcb = sev_es_get_ghcb(&state);
		ghcb = __sev_get_ghcb(&state);

		vc_ghcb_invalidate(ghcb);
		ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
		@@ -543,7 +561,7 @@ static u64 get_jump_table_addr(void)
		ghcb_sw_exit_info_2_is_valid(ghcb))
		ret = ghcb->save.sw_exit_info_2;

		sev_es_put_ghcb(&state);
		__sev_put_ghcb(&state);

		local_irq_restore(flags);

		@@ -668,7 +686,7 @@ static void sev_es_ap_hlt_loop(void)
		struct ghcb_state state;
		struct ghcb *ghcb;

		ghcb = sev_es_get_ghcb(&state);
		ghcb = __sev_get_ghcb(&state);

		while (true) {
		vc_ghcb_invalidate(ghcb);
		@@ -685,7 +703,7 @@ static void sev_es_ap_hlt_loop(void)
		break;
		}

		sev_es_put_ghcb(&state);
		__sev_put_ghcb(&state);
		}

		/*
		@@ -775,7 +793,7 @@ void __init sev_es_init_vc_handling(void)
		sev_es_setup_play_dead();

		/* Secondary CPUs use the runtime #VC handler */
		initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
		initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
		}

		static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
		@@ -1213,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
		return ES_EXCEPTION;
		}

		static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
		{
		if (user_mode(regs))
		noist_exc_debug(regs);
		else
		exc_debug(regs);
		}

		static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
		struct ghcb *ghcb,
		unsigned long exit_code)
		@@ -1316,44 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
		return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
		}

		/*
		* Main #VC exception handler. It is called when the entry code was able to
		* switch off the IST to a safe kernel stack.
		*
		* With the current implementation it is always possible to switch to a safe
		* stack because #VC exceptions only happen at known places, like intercepted
		* instructions or accesses to MMIO areas/IO ports. They can also happen with
		* code instrumentation when the hypervisor intercepts #DB, but the critical
		* paths are forbidden to be instrumented, so #DB exceptions currently also
		* only happen in safe places.
		*/
		DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
		{
		irqentry_state_t irq_state;
		struct ghcb_state state;
		struct es_em_ctxt ctxt;
		enum es_result result;
		struct ghcb *ghcb;
		bool ret = true;

		/*
		* Handle #DB before calling into !noinstr code to avoid recursive #DB.
		*/
		if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
		vc_handle_trap_db(regs);
		return;
		}

		irq_state = irqentry_nmi_enter(regs);
		lockdep_assert_irqs_disabled();
		instrumentation_begin();

		/*
		* This is invoked through an interrupt gate, so IRQs are disabled. The
		* code below might walk page-tables for user or kernel addresses, so
		* keep the IRQs disabled to protect us against concurrent TLB flushes.
		*/

		ghcb = sev_es_get_ghcb(&state);
		ghcb = __sev_get_ghcb(&state);

		vc_ghcb_invalidate(ghcb);
		result = vc_init_em_ctxt(&ctxt, regs, error_code);
		@@ -1361,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		if (result == ES_OK)
		result = vc_handle_exitcode(&ctxt, ghcb, error_code);

		sev_es_put_ghcb(&state);
		__sev_put_ghcb(&state);

		/* Done - now check the result */
		switch (result) {
		@@ -1369,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		vc_finish_insn(&ctxt);
		break;
		case ES_UNSUPPORTED:
		pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
		error_code, regs->ip);
		goto fail;
		ret = false;
		break;
		case ES_VMM_ERROR:
		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
		error_code, regs->ip);
		goto fail;
		ret = false;
		break;
		case ES_DECODE_FAILED:
		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
		error_code, regs->ip);
		goto fail;
		ret = false;
		break;
		case ES_EXCEPTION:
		vc_forward_exception(&ctxt);
		break;
		@@ -1395,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		BUG();
		}

		out:
		instrumentation_end();
		irqentry_nmi_exit(regs, irq_state);
		return ret;
		}

		return;
		static __always_inline bool vc_is_db(unsigned long error_code)
		{
		return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
		}

		fail:
		if (user_mode(regs)) {
		/*
		* Do not kill the machine if user-space triggered the
		* exception. Send SIGBUS instead and let user-space deal with
		* it.
		* Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
		* and will panic when an error happens.
		*/
		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
		} else {
		pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
		result);
		DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
		{
		irqentry_state_t irq_state;

		/*
		* With the current implementation it is always possible to switch to a
		* safe stack because #VC exceptions only happen at known places, like
		* intercepted instructions or accesses to MMIO areas/IO ports. They can
		* also happen with code instrumentation when the hypervisor intercepts
		* #DB, but the critical paths are forbidden to be instrumented, so #DB
		* exceptions currently also only happen in safe places.
		*
		* But keep this here in case the noinstr annotations are violated due
		* to bug elsewhere.
		*/
		if (unlikely(on_vc_fallback_stack(regs))) {
		instrumentation_begin();
		panic("Can't handle #VC exception from unsupported context\n");
		instrumentation_end();
		}

		/*
		* Handle #DB before calling into !noinstr code to avoid recursive #DB.
		*/
		if (vc_is_db(error_code)) {
		exc_debug(regs);
		return;
		}

		irq_state = irqentry_nmi_enter(regs);

		instrumentation_begin();

		if (!vc_raw_handle_exception(regs, error_code)) {
		/* Show some debug info */
		show_regs(regs);

		@@ -1423,23 +1435,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
		panic("Returned from Terminate-Request to Hypervisor\n");
		}

		goto out;
		instrumentation_end();
		irqentry_nmi_exit(regs, irq_state);
		}

		/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
		DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
		/*
		* Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
		* and will kill the current task with SIGBUS when an error happens.
		*/
		DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
		{
		/*
		* Handle #DB before calling into !noinstr code to avoid recursive #DB.
		*/
		if (vc_is_db(error_code)) {
		noist_exc_debug(regs);
		return;
		}

		irqentry_enter_from_user_mode(regs);
		instrumentation_begin();
		panic("Can't handle #VC exception from unsupported context\n");
		instrumentation_end();

		if (!vc_raw_handle_exception(regs, error_code)) {
		/*
		* Do not kill the machine if user-space triggered the
		* exception. Send SIGBUS instead and let user-space deal with
		* it.
		*/
		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
		}

		DEFINE_IDTENTRY_VC(exc_vmm_communication)
		{
		if (likely(!on_vc_fallback_stack(regs)))
		safe_stack_exc_vmm_communication(regs, error_code);
		else
		ist_exc_vmm_communication(regs, error_code);
		instrumentation_end();
		irqentry_exit_to_user_mode(regs);
		}

		bool __init handle_vc_boot_ghcb(struct pt_regs *regs)

arch/x86/kernel/umip.c

+4 −6

Original line number	Diff line number	Diff line
		@@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs)
		if (!regs)
		return false;

		nr_copied = insn_fetch_from_user(regs, buf);

		/*
		* The insn_fetch_from_user above could have failed if user code
		* is protected by a memory protection key. Give up on emulation
		* in such a case. Should we issue a page fault?
		* Give up on emulation if fetching the instruction failed. Should a
		* page fault or a #GP be issued?
		*/
		if (!nr_copied)
		nr_copied = insn_fetch_from_user(regs, buf);
		if (nr_copied <= 0)
		return false;

		if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))