Commit dd3e4012 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "A bunch of things accumulated for x86 in the last two weeks:

   - Fix guest vtime accounting so that ticks happening while the guest
     is running can also be accounted to it. Along with a consolidation
     to the guest-specific context tracking helpers.

   - Provide for the host NMI handler running after a VMX VMEXIT to be
     able to run on the kernel stack correctly.

   - Initialize MSR_TSC_AUX when RDPID is supported and not RDTSCP (virt
     relevant - real hw supports both)

   - A code generation improvement to TASK_SIZE_MAX through the use of
     alternatives

   - The usual misc and related cleanups and improvements"

* tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  KVM: x86: Consolidate guest enter/exit logic to common helpers
  context_tracking: KVM: Move guest enter/exit wrappers to KVM's domain
  context_tracking: Consolidate guest enter/exit wrappers
  sched/vtime: Move guest enter/exit vtime accounting to vtime.h
  sched/vtime: Move vtime accounting external declarations above inlines
  KVM: x86: Defer vtime accounting 'til after IRQ handling
  context_tracking: Move guest exit vtime accounting to separate helpers
  context_tracking: Move guest exit context tracking to separate helpers
  KVM/VMX: Invoke NMI non-IST entry instead of IST entry
  x86/cpu: Remove write_tsc() and write_rdtscp_aux() wrappers
  x86/cpu: Initialize MSR_TSC_AUX if RDTSCP *or* RDPID is supported
  x86/resctrl: Fix init const confusion
  x86: Delete UD0, UD1 traces
  x86/smpboot: Remove duplicate includes
  x86/cpu: Use alternative to generate the TASK_SIZE_MAX constant
parents b7415964 bc908e09
Loading
Loading
Loading
Loading
+0 −9
Original line number Diff line number Diff line
@@ -7,18 +7,9 @@

/*
 * Despite that some emulators terminate on UD2, we use it for WARN().
 *
 * Since various instruction decoders/specs disagree on the encoding of
 * UD0/UD1.
 */

#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */
#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
#define ASM_UD2		".byte 0x0f, 0x0b"

#define INSN_UD0	0xff0f
#define INSN_UD2	0x0b0f

#define LEN_UD2		2

#ifdef CONFIG_GENERIC_BUG
+15 −0
Original line number Diff line number Diff line
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);
#endif

/* NMI */

#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
/*
 * Special NOIST entry point for VMX which invokes this on the kernel
 * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
 * 'executing' marker.
 *
 * On 32bit this just uses the regular NMI entry point because 32-bit does
 * not have ISTs.
 */
DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_noist);
#else
#define asm_exc_nmi_noist		asm_exc_nmi
#endif

DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,	xenpv_exc_nmi);
+0 −4
Original line number Diff line number Diff line
@@ -324,10 +324,6 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
	return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
}

#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))

#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)

struct msr *msrs_alloc(void);
void msrs_free(struct msr *msrs);
int msr_set_bit(u32 msr, u8 bit);
+33 −0
Original line number Diff line number Diff line
@@ -56,6 +56,39 @@ static inline void clear_page(void *page)

void copy_page(void *to, void *from);

#ifdef CONFIG_X86_5LEVEL
/*
 * User space process size.  This is the first address outside the user range.
 * There are a few constraints that determine this:
 *
 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 * address, then that syscall will enter the kernel with a
 * non-canonical return address, and SYSRET will explode dangerously.
 * We avoid this particular problem by preventing anything
 * from being mapped at the maximum canonical address.
 *
 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
 * CPUs malfunction if they execute code from the highest canonical page.
 * They'll speculate right off the end of the canonical space, and
 * bad things happen.  This is worked around in the same way as the
 * Intel problem.
 *
 * With page table isolation enabled, we map the LDT in ... [stay tuned]
 */
static inline unsigned long task_size_max(void)
{
	unsigned long ret;

	alternative_io("movq %[small],%0","movq %[large],%0",
			X86_FEATURE_LA57,
			"=r" (ret),
			[small] "i" ((1ul << 47)-PAGE_SIZE),
			[large] "i" ((1ul << 56)-PAGE_SIZE));

	return ret;
}
#endif	/* CONFIG_X86_5LEVEL */

#endif	/* !__ASSEMBLY__ */

#ifdef CONFIG_X86_VSYSCALL_EMULATION
+3 −20
Original line number Diff line number Diff line
@@ -55,30 +55,13 @@

#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled() ? 56 : 47)
/* See task_size_max() in <asm/page_64.h> */
#else
#define __VIRTUAL_MASK_SHIFT	47
#define task_size_max()		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#endif

/*
 * User space process size.  This is the first address outside the user range.
 * There are a few constraints that determine this:
 *
 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 * address, then that syscall will enter the kernel with a
 * non-canonical return address, and SYSRET will explode dangerously.
 * We avoid this particular problem by preventing anything
 * from being mapped at the maximum canonical address.
 *
 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
 * CPUs malfunction if they execute code from the highest canonical page.
 * They'll speculate right off the end of the canonical space, and
 * bad things happen.  This is worked around in the same way as the
 * Intel problem.
 *
 * With page table isolation enabled, we map the LDT in ... [stay tuned]
 */
#define TASK_SIZE_MAX	((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)

#define TASK_SIZE_MAX		task_size_max()
#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)

/* This decides where the kernel will search for a free chunk of vm
Loading