Commit b14b2690 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: Rename/refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()



Rename and refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()
to better reflect what KVM is actually checking, and to eliminate extra
pfn_to_page() lookups.  The kvm_release_pfn_*() an kvm_try_get_pfn()
helpers in particular benefit from "refouncted" nomenclature, as it's not
all that obvious why KVM needs to get/put refcounts for some PG_reserved
pages (ZERO_PAGE and ZONE_DEVICE).

Add a comment to call out that the list of exceptions to PG_reserved is
all but guaranteed to be incomplete.  The list has mostly been compiled
by people throwing noodles at KVM and finding out they stick a little too
well, e.g. the ZERO_PAGE's refcount overflowed and ZONE_DEVICE pages
didn't get freed.

No functional change intended.

Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20220429010416.2788472-10-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 284dc493
Loading
Loading
Loading
Loading
+9 −6
Original line number Diff line number Diff line
@@ -534,6 +534,7 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
	kvm_pfn_t pfn;
	u64 old_spte = *sptep;
	int level = sptep_to_sp(sptep)->role.level;
	struct page *page;

	if (!is_shadow_present_pte(old_spte) ||
	    !spte_has_volatile_bits(old_spte))
@@ -549,11 +550,13 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
	pfn = spte_to_pfn(old_spte);

	/*
	 * KVM does not hold the refcount of the page used by
	 * kvm mmu, before reclaiming the page, we should
	 * unmap it from mmu first.
	 * KVM doesn't hold a reference to any pages mapped into the guest, and
	 * instead uses the mmu_notifier to ensure that KVM unmaps any pages
	 * before they are reclaimed.  Sanity check that, if the pfn is backed
	 * by a refcounted page, the refcount is elevated.
	 */
	WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
	page = kvm_pfn_to_refcounted_page(pfn);
	WARN_ON(page && !page_count(page));

	if (is_accessed_spte(old_spte))
		kvm_set_pfn_accessed(pfn);
@@ -2881,7 +2884,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
	if (unlikely(fault->max_level == PG_LEVEL_4K))
		return;

	if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn))
	if (is_error_noslot_pfn(fault->pfn) || !kvm_pfn_to_refcounted_page(fault->pfn))
		return;

	if (kvm_slot_dirty_track_enabled(slot))
@@ -5993,7 +5996,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
		 * the guest, and the guest page table is using 4K page size
		 * mapping if the indirect sp has level = 1.
		 */
		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
		if (sp->role.direct && kvm_pfn_to_refcounted_page(pfn) &&
		    sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
							       pfn, PG_LEVEL_NUM)) {
			pte_list_remove(kvm, rmap_head, sptep);
+1 −1
Original line number Diff line number Diff line
@@ -1751,7 +1751,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
		 */
		pfn = spte_to_pfn(iter.old_spte);

		if (kvm_is_reserved_pfn(pfn))
		if (!kvm_pfn_to_refcounted_page(pfn))
			continue;

		max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
+1 −1
Original line number Diff line number Diff line
@@ -1570,7 +1570,7 @@ void kvm_arch_sync_events(struct kvm *kvm);

int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);

bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
bool kvm_is_zone_device_page(struct page *page);

struct kvm_irq_ack_notifier {
+52 −14
Original line number Diff line number Diff line
@@ -182,19 +182,36 @@ bool kvm_is_zone_device_page(struct page *page)
	return is_zone_device_page(page);
}

bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
/*
 * Returns a 'struct page' if the pfn is "valid" and backed by a refcounted
 * page, NULL otherwise.  Note, the list of refcounted PG_reserved page types
 * is likely incomplete, it has been compiled purely through people wanting to
 * back guest with a certain type of memory and encountering issues.
 */
struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn)
{
	struct page *page;

	if (!pfn_valid(pfn))
		return NULL;

	page = pfn_to_page(pfn);
	if (!PageReserved(page))
		return page;

	/* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */
	if (is_zero_pfn(pfn))
		return page;

	/*
	 * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
	 * perspective they are "normal" pages, albeit with slightly different
	 * usage rules.
	 */
	if (pfn_valid(pfn))
		return PageReserved(pfn_to_page(pfn)) &&
		       !is_zero_pfn(pfn) &&
		       !kvm_is_zone_device_page(pfn_to_page(pfn));
	if (kvm_is_zone_device_page(page))
		return page;

	return true;
	return NULL;
}

/*
@@ -2501,9 +2518,12 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)

static int kvm_try_get_pfn(kvm_pfn_t pfn)
{
	if (kvm_is_reserved_pfn(pfn))
	struct page *page = kvm_pfn_to_refcounted_page(pfn);

	if (!page)
		return 1;
	return get_page_unless_zero(pfn_to_page(pfn));

	return get_page_unless_zero(page);
}

static int hva_to_pfn_remapped(struct vm_area_struct *vma,
@@ -2728,6 +2748,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
 */
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
	struct page *page;
	kvm_pfn_t pfn;

	pfn = gfn_to_pfn(kvm, gfn);
@@ -2735,10 +2756,11 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
	if (is_error_noslot_pfn(pfn))
		return KVM_ERR_PTR_BAD_PAGE;

	if (kvm_is_reserved_pfn(pfn))
	page = kvm_pfn_to_refcounted_page(pfn);
	if (!page)
		return KVM_ERR_PTR_BAD_PAGE;

	return pfn_to_page(pfn);
	return page;
}
EXPORT_SYMBOL_GPL(gfn_to_page);

@@ -2841,8 +2863,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);

void kvm_release_pfn_clean(kvm_pfn_t pfn)
{
	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
		kvm_release_page_clean(pfn_to_page(pfn));
	struct page *page;

	if (is_error_noslot_pfn(pfn))
		return;

	page = kvm_pfn_to_refcounted_page(pfn);
	if (!page)
		return;

	kvm_release_page_clean(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);

@@ -2857,8 +2887,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_dirty);

void kvm_release_pfn_dirty(kvm_pfn_t pfn)
{
	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
		kvm_release_page_dirty(pfn_to_page(pfn));
	struct page *page;

	if (is_error_noslot_pfn(pfn))
		return;

	page = kvm_pfn_to_refcounted_page(pfn);
	if (!page)
		return;

	kvm_release_page_dirty(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);