Commit 73a3c659 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

KVM: MMU: change kvm_mmu_hugepage_adjust() arguments to kvm_page_fault



Pass struct kvm_page_fault to kvm_mmu_hugepage_adjust() instead of
extracting the arguments from the struct; the results are also stored
in the struct, so the callers are adjusted consequently.

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 3c8ad5a6
Loading
Loading
Loading
Loading
+33 −2
Original line number Diff line number Diff line
@@ -127,12 +127,34 @@ struct kvm_page_fault {
	const bool rsvd;
	const bool user;

	/* Derived from mmu.  */
	/* Derived from mmu and global state.  */
	const bool is_tdp;
	const bool nx_huge_page_workaround_enabled;

	/* Input to FNAME(fetch), __direct_map and kvm_tdp_mmu_map.  */
	/*
	 * Whether a >4KB mapping can be created or is forbidden due to NX
	 * hugepages.
	 */
	bool huge_page_disallowed;

	/*
	 * Maximum page size that can be created for this fault; input to
	 * FNAME(fetch), __direct_map and kvm_tdp_mmu_map.
	 */
	u8 max_level;

	/*
	 * Page size that can be created based on the max_level and the
	 * page size used by the host mapping.
	 */
	u8 req_level;

	/*
	 * Page size that will be created based on the req_level and
	 * huge_page_disallowed.
	 */
	u8 goal_level;

	/* Shifted addr, or result of guest page table walk if addr is a gva.  */
	gfn_t gfn;

@@ -144,6 +166,12 @@ struct kvm_page_fault {

int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);

extern int nx_huge_pages;
static inline bool is_nx_huge_page_enabled(void)
{
	return READ_ONCE(nx_huge_pages);
}

static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
					u32 err, bool prefault)
{
@@ -157,8 +185,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
		.user = err & PFERR_USER_MASK,
		.prefault = prefault,
		.is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
		.nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(),

		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
		.req_level = PG_LEVEL_4K,
		.goal_level = PG_LEVEL_4K,
	};
#ifdef CONFIG_RETPOLINE
	if (fault.is_tdp)
+27 −33
Original line number Diff line number Diff line
@@ -2920,48 +2920,45 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
	return min(host_level, max_level);
}

int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
			    int max_level, kvm_pfn_t *pfnp,
			    bool huge_page_disallowed, int *req_level)
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
	struct kvm_memory_slot *slot;
	kvm_pfn_t pfn = *pfnp;
	kvm_pfn_t mask;
	int level;

	*req_level = PG_LEVEL_4K;
	fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled;

	if (unlikely(max_level == PG_LEVEL_4K))
		return PG_LEVEL_4K;
	if (unlikely(fault->max_level == PG_LEVEL_4K))
		return;

	if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn))
		return PG_LEVEL_4K;
	if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn))
		return;

	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true);
	slot = gfn_to_memslot_dirty_bitmap(vcpu, fault->gfn, true);
	if (!slot)
		return PG_LEVEL_4K;
		return;

	/*
	 * Enforce the iTLB multihit workaround after capturing the requested
	 * level, which will be used to do precise, accurate accounting.
	 */
	*req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
	if (level == PG_LEVEL_4K || huge_page_disallowed)
		return PG_LEVEL_4K;
	fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
						     fault->gfn, fault->pfn,
						     fault->max_level);
	if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
		return;

	/*
	 * mmu_notifier_retry() was successful and mmu_lock is held, so
	 * the pmd can't be split from under us.
	 */
	mask = KVM_PAGES_PER_HPAGE(level) - 1;
	VM_BUG_ON((gfn & mask) != (pfn & mask));
	*pfnp = pfn & ~mask;

	return level;
	fault->goal_level = fault->req_level;
	mask = KVM_PAGES_PER_HPAGE(fault->goal_level) - 1;
	VM_BUG_ON((fault->gfn & mask) != (fault->pfn & mask));
	fault->pfn &= ~mask;
}

void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
				kvm_pfn_t *pfnp, int *goal_levelp)
				kvm_pfn_t *pfnp, u8 *goal_levelp)
{
	int level = *goal_levelp;

@@ -2984,28 +2981,25 @@ void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,

static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
	bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
	bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
	struct kvm_shadow_walk_iterator it;
	struct kvm_mmu_page *sp;
	int level, req_level, ret;
	int ret;
	gfn_t base_gfn = fault->gfn;

	level = kvm_mmu_hugepage_adjust(vcpu, fault->gfn, fault->max_level, &fault->pfn,
					huge_page_disallowed, &req_level);
	kvm_mmu_hugepage_adjust(vcpu, fault);

	trace_kvm_mmu_spte_requested(fault->addr, level, fault->pfn);
	trace_kvm_mmu_spte_requested(fault->addr, fault->goal_level, fault->pfn);
	for_each_shadow_entry(vcpu, fault->addr, it) {
		/*
		 * We cannot overwrite existing page tables with an NX
		 * large page, as the leaf could be executable.
		 */
		if (nx_huge_page_workaround_enabled)
		if (fault->nx_huge_page_workaround_enabled)
			disallowed_hugepage_adjust(*it.sptep, fault->gfn, it.level,
						   &fault->pfn, &level);
						   &fault->pfn, &fault->goal_level);

		base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
		if (it.level == level)
		if (it.level == fault->goal_level)
			break;

		drop_large_spte(vcpu, it.sptep);
@@ -3016,13 +3010,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
				      it.level - 1, true, ACC_ALL);

		link_shadow_page(vcpu, it.sptep, sp);
		if (fault->is_tdp && huge_page_disallowed &&
		    req_level >= it.level)
		if (fault->is_tdp && fault->huge_page_disallowed &&
		    fault->req_level >= it.level)
			account_huge_nx_page(vcpu->kvm, sp);
	}

	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
			   fault->write, level, base_gfn, fault->pfn,
			   fault->write, fault->goal_level, base_gfn, fault->pfn,
			   fault->prefault, fault->map_writable);
	if (ret == RET_PF_SPURIOUS)
		return ret;
+2 −10
Original line number Diff line number Diff line
@@ -118,12 +118,6 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
	       kvm_x86_ops.cpu_dirty_log_size;
}

extern int nx_huge_pages;
static inline bool is_nx_huge_page_enabled(void)
{
	return READ_ONCE(nx_huge_pages);
}

int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync,
			    bool speculative);

@@ -164,11 +158,9 @@ enum {
int kvm_mmu_max_mapping_level(struct kvm *kvm,
			      const struct kvm_memory_slot *slot, gfn_t gfn,
			      kvm_pfn_t pfn, int max_level);
int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
			    int max_level, kvm_pfn_t *pfnp,
			    bool huge_page_disallowed, int *req_level);
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
				kvm_pfn_t *pfnp, int *goal_levelp);
				kvm_pfn_t *pfnp, u8 *goal_levelp);

void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);

+7 −9
Original line number Diff line number Diff line
@@ -658,12 +658,10 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
			 struct guest_walker *gw)
{
	bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
	bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
	struct kvm_mmu_page *sp = NULL;
	struct kvm_shadow_walk_iterator it;
	unsigned int direct_access, access;
	int top_level, level, req_level, ret;
	int top_level, ret;
	gfn_t base_gfn = fault->gfn;

	WARN_ON_ONCE(gw->gfn != base_gfn);
@@ -730,8 +728,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
			link_shadow_page(vcpu, it.sptep, sp);
	}

	level = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, fault->max_level, &fault->pfn,
					huge_page_disallowed, &req_level);
	kvm_mmu_hugepage_adjust(vcpu, fault);

	trace_kvm_mmu_spte_requested(fault->addr, gw->level, fault->pfn);

@@ -742,12 +739,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
		 * We cannot overwrite existing page tables with an NX
		 * large page, as the leaf could be executable.
		 */
		if (nx_huge_page_workaround_enabled)
		if (fault->nx_huge_page_workaround_enabled)
			disallowed_hugepage_adjust(*it.sptep, fault->gfn, it.level,
						   &fault->pfn, &level);
						   &fault->pfn, &fault->goal_level);

		base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
		if (it.level == level)
		if (it.level == fault->goal_level)
			break;

		validate_direct_spte(vcpu, it.sptep, direct_access);
@@ -758,7 +755,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
			sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr,
					      it.level - 1, true, direct_access);
			link_shadow_page(vcpu, it.sptep, sp);
			if (huge_page_disallowed && req_level >= it.level)
			if (fault->huge_page_disallowed &&
			    fault->req_level >= it.level)
				account_huge_nx_page(vcpu->kvm, sp);
		}
	}
+8 −13
Original line number Diff line number Diff line
@@ -986,30 +986,25 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 */
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
	bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
	bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
	struct kvm_mmu *mmu = vcpu->arch.mmu;
	struct tdp_iter iter;
	struct kvm_mmu_page *sp;
	u64 *child_pt;
	u64 new_spte;
	int ret;
	int level;
	int req_level;

	level = kvm_mmu_hugepage_adjust(vcpu, fault->gfn, fault->max_level, &fault->pfn,
					huge_page_disallowed, &req_level);
	kvm_mmu_hugepage_adjust(vcpu, fault);

	trace_kvm_mmu_spte_requested(fault->addr, level, fault->pfn);
	trace_kvm_mmu_spte_requested(fault->addr, fault->goal_level, fault->pfn);

	rcu_read_lock();

	tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
		if (nx_huge_page_workaround_enabled)
		if (fault->nx_huge_page_workaround_enabled)
			disallowed_hugepage_adjust(iter.old_spte, fault->gfn,
						   iter.level, &fault->pfn, &level);
						   iter.level, &fault->pfn, &fault->goal_level);

		if (iter.level == level)
		if (iter.level == fault->goal_level)
			break;

		/*
@@ -1047,8 +1042,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)

			if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
				tdp_mmu_link_page(vcpu->kvm, sp,
						  huge_page_disallowed &&
						  req_level >= iter.level);
						  fault->huge_page_disallowed &&
						  fault->req_level >= iter.level);

				trace_kvm_mmu_get_page(sp, true);
			} else {
@@ -1058,7 +1053,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
		}
	}

	if (iter.level != level) {
	if (iter.level != fault->goal_level) {
		rcu_read_unlock();
		return RET_PF_RETRY;
	}