Commit ef68017e authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Thomas Gleixner
Browse files

x86/kvm: Handle async page faults directly through do_page_fault()



KVM overloads #PF to indicate two types of not-actually-page-fault
events.  Right now, the KVM guest code intercepts them by modifying
the IDT and hooking the #PF vector.  This makes the already fragile
fault code even harder to understand, and it also pollutes call
traces with async_page_fault and do_async_page_fault for normal page
faults.

Clean it up by moving the logic into do_page_fault() using a static
branch.  This gets rid of the platform trap_init override mechanism
completely.

[ tglx: Fixed up 32bit, removed error code from the async functions and
  	massaged coding style ]

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarAlexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200505134059.169270470@linutronix.de
parent af1e56b7
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -1693,14 +1693,6 @@ SYM_CODE_START(general_protection)
	jmp	common_exception
SYM_CODE_END(general_protection)

#ifdef CONFIG_KVM_GUEST
SYM_CODE_START(async_page_fault)
	ASM_CLAC
	pushl	$do_async_page_fault
	jmp	common_exception_read_cr2
SYM_CODE_END(async_page_fault)
#endif

SYM_CODE_START(rewind_stack_do_exit)
	/* Prevent any naive code from trying to unwind to our caller. */
	xorl	%ebp, %ebp
+0 −4
Original line number Diff line number Diff line
@@ -1202,10 +1202,6 @@ idtentry xendebug do_debug has_error_code=0
idtentry general_protection	do_general_protection	has_error_code=1
idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1

#ifdef CONFIG_KVM_GUEST
idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
#endif

#ifdef CONFIG_X86_MCE
idtentry machine_check		do_mce			has_error_code=0	paranoid=1
#endif
+17 −2
Original line number Diff line number Diff line
@@ -91,8 +91,18 @@ unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
void kvm_disable_steal_time(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);

DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);

static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
	if (static_branch_unlikely(&kvm_async_pf_enabled))
		return __kvm_handle_async_pf(regs, token);
	else
		return false;
}

#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
@@ -130,6 +140,11 @@ static inline void kvm_disable_steal_time(void)
{
	return;
}

static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
	return false;
}
#endif

#endif /* _ASM_X86_KVM_PARA_H */
+0 −2
Original line number Diff line number Diff line
@@ -50,14 +50,12 @@ struct x86_init_resources {
 * @pre_vector_init:		init code to run before interrupt vectors
 *				are set up.
 * @intr_init:			interrupt init code
 * @trap_init:			platform specific trap setup
 * @intr_mode_select:		interrupt delivery mode selection
 * @intr_mode_init:		interrupt delivery mode setup
 */
struct x86_init_irqs {
	void (*pre_vector_init)(void);
	void (*intr_init)(void);
	void (*trap_init)(void);
	void (*intr_mode_select)(void);
	void (*intr_mode_init)(void);
};
+21 −18
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>

DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);

static int kvmapf = 1;

static int __init parse_no_kvmapf(char *arg)
@@ -242,25 +244,27 @@ u32 kvm_read_and_reset_pf_reason(void)
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);

dotraplinkage void
do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
	/*
	 * If we get a page fault right here, the pf_reason seems likely
	 * to be clobbered.  Bummer.
	 */
	switch (kvm_read_and_reset_pf_reason()) {
	default:
		do_page_fault(regs, error_code, address);
		break;
		return false;
	case KVM_PV_REASON_PAGE_NOT_PRESENT:
		/* page is swapped out by the host. */
		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
		break;
		kvm_async_pf_task_wait(token, !user_mode(regs));
		return true;
	case KVM_PV_REASON_PAGE_READY:
		rcu_irq_enter();
		kvm_async_pf_task_wake((u32)address);
		kvm_async_pf_task_wake(token);
		rcu_irq_exit();
		break;
		return true;
	}
}
NOKPROBE_SYMBOL(do_async_page_fault);
NOKPROBE_SYMBOL(__kvm_handle_async_pf);

static void __init paravirt_ops_setup(void)
{
@@ -306,7 +310,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
		u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
		u64 pa;

		WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));

		pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));

#ifdef CONFIG_PREEMPTION
		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
@@ -592,12 +600,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
}
#endif

static void __init kvm_apf_trap_init(void)
{
	update_intr_gate(X86_TRAP_PF, async_page_fault);
}


static void kvm_flush_tlb_others(const struct cpumask *cpumask,
			const struct flush_tlb_info *info)
{
@@ -632,8 +634,6 @@ static void __init kvm_guest_init(void)
	register_reboot_notifier(&kvm_pv_reboot_nb);
	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
		raw_spin_lock_init(&async_pf_sleepers[i].lock);
	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
		x86_init.irqs.trap_init = kvm_apf_trap_init;

	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
		has_steal_clock = 1;
@@ -649,6 +649,9 @@ static void __init kvm_guest_init(void)
	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
		apic_set_eoi_write(kvm_guest_apic_eoi_write);

	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
		static_branch_enable(&kvm_async_pf_enabled);

#ifdef CONFIG_SMP
	smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
Loading