Commit 2e1ae9cd authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman
Browse files

KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU



Rather than partition the guest PID space + flush a rogue guest PID to
work around this problem, instead fix it by always disabling the MMU when
switching in or out of guest MMU context in HV mode.

This may be a bit less efficient, but it is a lot less complicated and
allows the P9 path to trivally implement the workaround too. Newer CPUs
are not subject to this issue.

Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-22-npiggin@gmail.com
parent 41f77991
Loading
Loading
Loading
Loading
+0 −6
Original line number Original line Diff line number Diff line
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
}
#endif
#endif


#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
#else
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
#endif

extern void switch_cop(struct mm_struct *next);
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+14 −7
Original line number Original line Diff line number Diff line
@@ -807,7 +807,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
		 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
		 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
		 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
		 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
		 */
		 */
		if (mflags != 0 && mflags != 3)
		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
				kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
			return H_UNSUPPORTED_FLAG_START;
			return H_UNSUPPORTED_FLAG_START;
		return H_TOO_HARD;
		return H_TOO_HARD;
	default:
	default:
@@ -1677,6 +1678,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
		lpcr &= ~LPCR_AIL;
		lpcr &= ~LPCR_AIL;
	if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
	if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
		lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
		lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
	/*
	 * On some POWER9s we force AIL off for radix guests to prevent
	 * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
	 * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
	 * be cached, which the host TLB management does not expect.
	 */
	if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
		lpcr &= ~LPCR_AIL;


	/*
	/*
	 * On POWER9, allow userspace to enable large decrementer for the
	 * On POWER9, allow userspace to enable large decrementer for the
@@ -4360,12 +4369,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;


	do {
	do {
		/*
		if (kvm->arch.threads_indep && kvm_is_radix(kvm))
		 * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
		 * path, which also handles hash and dependent threads mode.
		 */
		if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
		    !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
						  vcpu->arch.vcore->lpcr);
						  vcpu->arch.vcore->lpcr);
		else
		else
@@ -4995,6 +4999,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
		if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
		if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
			pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
			pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
			kvm->arch.threads_indep = true;
			kvm->arch.threads_indep = true;
		} else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
			pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
			kvm->arch.threads_indep = true;
		} else {
		} else {
			kvm->arch.threads_indep = indep_threads_mode;
			kvm->arch.threads_indep = indep_threads_mode;
		}
		}
+11 −3
Original line number Original line Diff line number Diff line
@@ -218,6 +218,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc


	mtspr(SPRN_AMOR, ~0UL);
	mtspr(SPRN_AMOR, ~0UL);


	if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);

	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);


	/*
	/*
@@ -226,6 +229,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
	 */
	 */
	mtspr(SPRN_HDEC, hdec);
	mtspr(SPRN_HDEC, hdec);


	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
		__mtmsrd(0, 1); /* clear RI */
		__mtmsrd(0, 1); /* clear RI */


	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
@@ -341,8 +345,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc


	radix_clear_slb();
	radix_clear_slb();


	__mtmsrd(msr, 0);

	accumulate_time(vcpu, &vcpu->arch.rm_exit);
	accumulate_time(vcpu, &vcpu->arch.rm_exit);


	/* Advance host PURR/SPURR by the amount used by guest */
	/* Advance host PURR/SPURR by the amount used by guest */
@@ -408,6 +410,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc


	switch_mmu_to_host_radix(kvm, host_pidr);
	switch_mmu_to_host_radix(kvm, host_pidr);


	/*
	 * If we are in real mode, only switch MMU on after the MMU is
	 * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
	 */
	__mtmsrd(msr, 0);

	end_timing(vcpu);
	end_timing(vcpu);


	return trap;
	return trap;
+0 −34
Original line number Original line Diff line number Diff line
@@ -1717,40 +1717,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
	eieio
	eieio
	tlbsync
	tlbsync
	ptesync
	ptesync

BEGIN_FTR_SECTION
	/* Radix: Handle the case where the guest used an illegal PID */
	LOAD_REG_ADDR(r4, mmu_base_pid)
	lwz	r3, VCPU_GUEST_PID(r9)
	lwz	r5, 0(r4)
	cmpw	cr0,r3,r5
	blt	2f

	/*
	 * Illegal PID, the HW might have prefetched and cached in the TLB
	 * some translations for the  LPID 0 / guest PID combination which
	 * Linux doesn't know about, so we need to flush that PID out of
	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
	 * the right context.
	*/
	li	r0,0
	mtspr	SPRN_LPID,r0
	isync

	/* Then do a congruence class local flush */
	ld	r6,VCPU_KVM(r9)
	lwz	r0,KVM_TLB_SETS(r6)
	mtctr	r0
	li	r7,0x400		/* IS field = 0b01 */
	ptesync
	sldi	r0,r3,32		/* RS has PID */
1:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
	addi	r7,r7,0x1000
	bdnz	1b
	ptesync
END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)

2:
#endif /* CONFIG_PPC_RADIX_MMU */
#endif /* CONFIG_PPC_RADIX_MMU */


	/*
	/*
+8 −19
Original line number Original line Diff line number Diff line
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
	}
	}


	/* Find out how many PID bits are supported */
	/* Find out how many PID bits are supported */
	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
		if (!mmu_pid_bits)
			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
			mmu_pid_bits = 20;
		mmu_base_pid = 1;
	} else if (cpu_has_feature(CPU_FTR_HVMODE)) {
		if (!mmu_pid_bits)
			mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
		/*
		/*
		 * When KVM is possible, we only use the top half of the
		 * Older versions of KVM on these machines perfer if the
		 * PID space to avoid collisions between host and guest PIDs
		 * guest only uses the low 19 PID bits.
		 * which can cause problems due to prefetch when exiting the
		 * guest with AIL=3
		 */
		 */
		mmu_base_pid = 1 << (mmu_pid_bits - 1);
#else
		mmu_base_pid = 1;
#endif
	} else {
		/* The guest uses the bottom half of the PID space */
		if (!mmu_pid_bits)
		if (!mmu_pid_bits)
			mmu_pid_bits = 19;
			mmu_pid_bits = 19;
		mmu_base_pid = 1;
	} else {
		if (!mmu_pid_bits)
			mmu_pid_bits = 20;
	}
	}
	mmu_base_pid = 1;


	/*
	/*
	 * Allocate Partition table and process table for the
	 * Allocate Partition table and process table for the
Loading