Commit 957ed9ef authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Avi Kivity
Browse files

KVM: MMU: prefetch ptes when intercepted guest #PF



Support prefetch ptes when intercept guest #PF, avoid to #PF by later
access

If we meet any failure in the prefetch path, we will exit it and
not try other ptes to avoid become heavy path

Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent 48987781
Loading
Loading
Loading
Loading
+103 −1
Original line number Diff line number Diff line
@@ -89,6 +89,8 @@ module_param(oos_shadow, bool, 0644);
	}
#endif

#define PTE_PREFETCH_NUM		8

#define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_SECOND_AVAIL_BITS_SHIFT 52

@@ -400,7 +402,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
	if (r)
		goto out;
	r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
				   rmap_desc_cache, 4);
				   rmap_desc_cache, 4 + PTE_PREFETCH_NUM);
	if (r)
		goto out;
	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -2089,6 +2091,105 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}

static struct kvm_memory_slot *
pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
{
	struct kvm_memory_slot *slot;

	slot = gfn_to_memslot(vcpu->kvm, gfn);
	if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
	      (no_dirty_log && slot->dirty_bitmap))
		slot = NULL;

	return slot;
}

static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
				     bool no_dirty_log)
{
	struct kvm_memory_slot *slot;
	unsigned long hva;

	slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log);
	if (!slot) {
		get_page(bad_page);
		return page_to_pfn(bad_page);
	}

	hva = gfn_to_hva_memslot(slot, gfn);

	return hva_to_pfn_atomic(vcpu->kvm, hva);
}

static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
				    struct kvm_mmu_page *sp,
				    u64 *start, u64 *end)
{
	struct page *pages[PTE_PREFETCH_NUM];
	unsigned access = sp->role.access;
	int i, ret;
	gfn_t gfn;

	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
	if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK))
		return -1;

	ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start);
	if (ret <= 0)
		return -1;

	for (i = 0; i < ret; i++, gfn++, start++)
		mmu_set_spte(vcpu, start, ACC_ALL,
			     access, 0, 0, 1, NULL,
			     sp->role.level, gfn,
			     page_to_pfn(pages[i]), true, true);

	return 0;
}

static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
				  struct kvm_mmu_page *sp, u64 *sptep)
{
	u64 *spte, *start = NULL;
	int i;

	WARN_ON(!sp->role.direct);

	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
	spte = sp->spt + i;

	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
		if (*spte != shadow_trap_nonpresent_pte || spte == sptep) {
			if (!start)
				continue;
			if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
				break;
			start = NULL;
		} else if (!start)
			start = spte;
	}
}

static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
{
	struct kvm_mmu_page *sp;

	/*
	 * Since it's no accessed bit on EPT, it's no way to
	 * distinguish between actually accessed translations
	 * and prefetched, so disable pte prefetch if EPT is
	 * enabled.
	 */
	if (!shadow_accessed_mask)
		return;

	sp = page_header(__pa(sptep));
	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
		return;

	__direct_pte_prefetch(vcpu, sp, sptep);
}

static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
			int level, gfn_t gfn, pfn_t pfn)
{
@@ -2102,6 +2203,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
				     0, write, 1, &pt_write,
				     level, gfn, pfn, false, true);
			direct_pte_prefetch(vcpu, iterator.sptep);
			++vcpu->stat.pf_fixed;
			break;
		}
+72 −0
Original line number Diff line number Diff line
@@ -310,6 +310,77 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
	return r || curr_pte != gw->ptes[level - 1];
}

static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep)
{
	struct kvm_mmu_page *sp;
	pt_element_t gptep[PTE_PREFETCH_NUM];
	gpa_t first_pte_gpa;
	int offset = 0, i;
	u64 *spte;

	sp = page_header(__pa(sptep));

	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
		return;

	if (sp->role.direct)
		return __direct_pte_prefetch(vcpu, sp, sptep);

	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	first_pte_gpa = gfn_to_gpa(sp->gfn) +
				(offset + i) * sizeof(pt_element_t);

	if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep,
					sizeof(gptep)) < 0)
		return;

	spte = sp->spt + i;

	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
		pt_element_t gpte;
		unsigned pte_access;
		gfn_t gfn;
		pfn_t pfn;
		bool dirty;

		if (spte == sptep)
			continue;

		if (*spte != shadow_trap_nonpresent_pte)
			continue;

		gpte = gptep[i];

		if (!is_present_gpte(gpte) ||
		      is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) {
			if (!sp->unsync)
				__set_spte(spte, shadow_notrap_nonpresent_pte);
			continue;
		}

		if (!(gpte & PT_ACCESSED_MASK))
			continue;

		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		gfn = gpte_to_gfn(gpte);
		dirty = is_dirty_gpte(gpte);
		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
				      (pte_access & ACC_WRITE_MASK) && dirty);
		if (is_error_pfn(pfn)) {
			kvm_release_pfn_clean(pfn);
			break;
		}

		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
			     dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
			     pfn, true, true);
	}
}

/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
@@ -391,6 +462,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
		     user_fault, write_fault, dirty, ptwrite, it.level,
		     gw->gfn, pfn, false, true);
	FNAME(pte_prefetch)(vcpu, it.sptep);

	return it.sptep;