Commit 338068b5 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: x86/mmu: Drop infrastructure for multiple page-track modes

Drop "support" for multiple page-track modes, as there is no evidence
that array-based and refcounted metadata is the optimal solution for
other modes, nor is there any evidence that other use cases, e.g. for
access-tracking, will be a good fit for the page-track machinery in
general.

E.g. one potential use case of access-tracking would be to prevent guest
access to poisoned memory (from the guest's perspective).  In that case,
the number of poisoned pages is likely to be a very small percentage of
the guest memory, and there is no need to reference count the number of
access-tracking users, i.e. expanding gfn_track[] for a new mode would be
grossly inefficient.  And for poisoned memory, host userspace would also
likely want to trap accesses, e.g. to inject #MC into the guest, and that
isn't currently supported by the page-track framework.

A better alternative for that poisoned page use case is likely a
variation of the proposed per-gfn attributes overlay (linked), which
would allow efficiently tracking the sparse set of poisoned pages, and by
default would exit to userspace on access.

Link: https://lore.kernel.org/all/Y2WB48kD0J4VGynX@google.com


Cc: Ben Gardon <bgardon@google.com>
Tested-by: default avatarYongwei Ma <yongwei.ma@intel.com>
Link: https://lore.kernel.org/r/20230729013535.1070024-24-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent e998fb1a
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
 * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
 * also includes TDP pages) to determine whether or not a page can be used in
 * the given MMU context.  This is a subset of the overall kvm_cpu_role to
 * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
 * 2 bytes per gfn instead of 4 bytes per gfn.
 * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
 * allocating 2 bytes per gfn instead of 4 bytes per gfn.
 *
 * Upper-level shadow pages having gptes are tracked for write-protection via
 * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
 * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
 * gfn_track will overflow and explosions will ensure.
 * gfn_write_track.  As above, gfn_write_track is a 16 bit counter, so KVM must
 * not create more than 2^16-1 upper-level shadow pages at a single gfn,
 * otherwise gfn_write_track will overflow and explosions will ensue.
 *
 * A unique shadow page (SP) for a gfn is created if and only if an existing SP
 * cannot be reused.  The ability to reuse a SP is tracked by its role, which
@@ -1023,7 +1023,7 @@ struct kvm_lpage_info {
struct kvm_arch_memory_slot {
	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
	unsigned short *gfn_write_track;
};

/*
+2 −9
Original line number Diff line number Diff line
@@ -4,17 +4,10 @@

#include <linux/kvm_types.h>

enum kvm_page_track_mode {
	KVM_PAGE_TRACK_WRITE,
	KVM_PAGE_TRACK_MAX,
};

void kvm_slot_page_track_add_page(struct kvm *kvm,
				  struct kvm_memory_slot *slot, gfn_t gfn,
				  enum kvm_page_track_mode mode);
				  struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_slot_page_track_remove_page(struct kvm *kvm,
				     struct kvm_memory_slot *slot, gfn_t gfn,
				     enum kvm_page_track_mode mode);
				     struct kvm_memory_slot *slot, gfn_t gfn);

#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
/*
+6 −8
Original line number Diff line number Diff line
@@ -831,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)

	/* the non-leaf shadow pages are keeping readonly. */
	if (sp->role.level > PG_LEVEL_4K)
		return kvm_slot_page_track_add_page(kvm, slot, gfn,
						    KVM_PAGE_TRACK_WRITE);
		return kvm_slot_page_track_add_page(kvm, slot, gfn);

	kvm_mmu_gfn_disallow_lpage(slot, gfn);

@@ -878,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
	slots = kvm_memslots_for_spte_role(kvm, sp->role);
	slot = __gfn_to_memslot(slots, gfn);
	if (sp->role.level > PG_LEVEL_4K)
		return kvm_slot_page_track_remove_page(kvm, slot, gfn,
						       KVM_PAGE_TRACK_WRITE);
		return kvm_slot_page_track_remove_page(kvm, slot, gfn);

	kvm_mmu_gfn_allow_lpage(slot, gfn);
}
@@ -2809,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
	 * track machinery is used to write-protect upper-level shadow pages,
	 * i.e. this guards the role.level == 4K assertion below!
	 */
	if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
	if (kvm_slot_page_track_is_active(kvm, slot, gfn))
		return -EPERM;

	/*
@@ -4203,7 +4201,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
	 * guest is writing the page which is write tracked which can
	 * not be fixed by page fault handler.
	 */
	if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
	if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn))
		return true;

	return false;
@@ -5422,8 +5420,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
	 * physical address properties) in a single VM would require tracking
	 * all relevant CPUID information in kvm_mmu_page_role. That is very
	 * undesirable as it would increase the memory requirements for
	 * gfn_track (see struct kvm_mmu_page_role comments).  For now that
	 * problem is swept under the rug; KVM's CPUID API is horrific and
	 * gfn_write_track (see struct kvm_mmu_page_role comments).  For now
	 * that problem is swept under the rug; KVM's CPUID API is horrific and
	 * it's all but impossible to solve it without introducing a new API.
	 */
	vcpu->arch.root_mmu.root_role.word = 0;
+31 −75
Original line number Diff line number Diff line
@@ -27,76 +27,50 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)

void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
	int i;

	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
		kvfree(slot->arch.gfn_track[i]);
		slot->arch.gfn_track[i] = NULL;
	}
	kvfree(slot->arch.gfn_write_track);
	slot->arch.gfn_write_track = NULL;
}

int kvm_page_track_create_memslot(struct kvm *kvm,
				  struct kvm_memory_slot *slot,
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
						 unsigned long npages)
{
	int i;
	const size_t size = sizeof(*slot->arch.gfn_write_track);

	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
		if (i == KVM_PAGE_TRACK_WRITE &&
		    !kvm_page_track_write_tracking_enabled(kvm))
			continue;

		slot->arch.gfn_track[i] =
			__vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
	if (!slot->arch.gfn_write_track)
		slot->arch.gfn_write_track = __vcalloc(npages, size,
						       GFP_KERNEL_ACCOUNT);
		if (!slot->arch.gfn_track[i])
			goto track_free;
	}

	return 0;

track_free:
	kvm_page_track_free_memslot(slot);
	return -ENOMEM;
	return slot->arch.gfn_write_track ? 0 : -ENOMEM;
}

static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
int kvm_page_track_create_memslot(struct kvm *kvm,
				  struct kvm_memory_slot *slot,
				  unsigned long npages)
{
	if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
		return false;
	if (!kvm_page_track_write_tracking_enabled(kvm))
		return 0;

	return true;
	return __kvm_page_track_write_tracking_alloc(slot, npages);
}

int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
{
	unsigned short *gfn_track;

	if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
		return 0;

	gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
			      GFP_KERNEL_ACCOUNT);
	if (gfn_track == NULL)
		return -ENOMEM;

	slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
	return 0;
	return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
}

static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
			     enum kvm_page_track_mode mode, short count)
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
				   short count)
{
	int index, val;

	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);

	val = slot->arch.gfn_track[mode][index];
	val = slot->arch.gfn_write_track[index];

	if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
		return;

	slot->arch.gfn_track[mode][index] += count;
	slot->arch.gfn_write_track[index] += count;
}

/*
@@ -109,21 +83,14 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
 * @kvm: the guest instance we are interested in.
 * @slot: the @gfn belongs to.
 * @gfn: the guest page.
 * @mode: tracking mode, currently only write track is supported.
 */
void kvm_slot_page_track_add_page(struct kvm *kvm,
				  struct kvm_memory_slot *slot, gfn_t gfn,
				  enum kvm_page_track_mode mode)
				  struct kvm_memory_slot *slot, gfn_t gfn)
{

	if (WARN_ON_ONCE(!page_track_mode_is_valid(mode)))
	if (WARN_ON_ONCE(!kvm_page_track_write_tracking_enabled(kvm)))
		return;

	if (WARN_ON_ONCE(mode == KVM_PAGE_TRACK_WRITE &&
			 !kvm_page_track_write_tracking_enabled(kvm)))
		return;

	update_gfn_track(slot, gfn, mode, 1);
	update_gfn_write_track(slot, gfn, 1);

	/*
	 * new track stops large page mapping for the
@@ -131,7 +98,6 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
	 */
	kvm_mmu_gfn_disallow_lpage(slot, gfn);

	if (mode == KVM_PAGE_TRACK_WRITE)
	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
		kvm_flush_remote_tlbs(kvm);
}
@@ -148,20 +114,14 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
 * @kvm: the guest instance we are interested in.
 * @slot: the @gfn belongs to.
 * @gfn: the guest page.
 * @mode: tracking mode, currently only write track is supported.
 */
void kvm_slot_page_track_remove_page(struct kvm *kvm,
				     struct kvm_memory_slot *slot, gfn_t gfn,
				     enum kvm_page_track_mode mode)
				     struct kvm_memory_slot *slot, gfn_t gfn)
{
	if (WARN_ON_ONCE(!page_track_mode_is_valid(mode)))
		return;

	if (WARN_ON_ONCE(mode == KVM_PAGE_TRACK_WRITE &&
			 !kvm_page_track_write_tracking_enabled(kvm)))
	if (WARN_ON_ONCE(!kvm_page_track_write_tracking_enabled(kvm)))
		return;

	update_gfn_track(slot, gfn, mode, -1);
	update_gfn_write_track(slot, gfn, -1);

	/*
	 * allow large page mapping for the tracked page
@@ -176,22 +136,18 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 */
bool kvm_slot_page_track_is_active(struct kvm *kvm,
				   const struct kvm_memory_slot *slot,
				   gfn_t gfn, enum kvm_page_track_mode mode)
				   gfn_t gfn)
{
	int index;

	if (WARN_ON_ONCE(!page_track_mode_is_valid(mode)))
		return false;

	if (!slot)
		return false;

	if (mode == KVM_PAGE_TRACK_WRITE &&
	    !kvm_page_track_write_tracking_enabled(kvm))
	if (!kvm_page_track_write_tracking_enabled(kvm))
		return false;

	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
	return !!READ_ONCE(slot->arch.gfn_write_track[index]);
}

#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+1 −2
Original line number Diff line number Diff line
@@ -16,8 +16,7 @@ int kvm_page_track_create_memslot(struct kvm *kvm,
				  unsigned long npages);

bool kvm_slot_page_track_is_active(struct kvm *kvm,
				   const struct kvm_memory_slot *slot,
				   gfn_t gfn, enum kvm_page_track_mode mode);
				   const struct kvm_memory_slot *slot, gfn_t gfn);

#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
int kvm_page_track_init(struct kvm *kvm);
Loading