KVM: x86/mmu: Split huge pages mapped by the TDP MMU when dirty logging is enabled (a3fe5dbd) · Commits · EulixOS / Software / Kernel

Documentation/admin-guide/kernel-parameters.txt

+24 −0

Original line number	Diff line number	Diff line
		@@ -2339,6 +2339,30 @@
		kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
		Default is 0 (don't ignore, but inject #GP)

		kvm.eager_page_split=
		[KVM,X86] Controls whether or not KVM will try to
		proactively split all huge pages during dirty logging.
		Eager page splitting reduces interruptions to vCPU
		execution by eliminating the write-protection faults
		and MMU lock contention that would otherwise be
		required to split huge pages lazily.

		VM workloads that rarely perform writes or that write
		only to a small region of VM memory may benefit from
		disabling eager page splitting to allow huge pages to
		still be used for reads.

		The behavior of eager page splitting depends on whether
		KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If
		disabled, all huge pages in a memslot will be eagerly
		split when dirty logging is enabled on that memslot. If
		enabled, huge pages will not be eagerly split.

		Eager page splitting currently only supports splitting
		huge pages mapped by the TDP MMU.

		Default is Y (on).

		kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
		Default is false (don't support).

+3 −0

Original line number	Diff line number	Diff line
		@@ -1587,6 +1587,9 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
		void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
		const struct kvm_memory_slot *memslot,
		int start_level);
		void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
		const struct kvm_memory_slot *memslot,
		int target_level);
		void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
		const struct kvm_memory_slot *memslot);
		void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,

+24 −0

Original line number	Diff line number	Diff line
		@@ -5830,6 +5830,30 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
		}

		void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
		const struct kvm_memory_slot *memslot,
		int target_level)
		{
		u64 start = memslot->base_gfn;
		u64 end = start + memslot->npages;

		if (is_tdp_mmu_enabled(kvm)) {
		read_lock(&kvm->mmu_lock);
		kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level);
		read_unlock(&kvm->mmu_lock);
		}

		/*
		* No TLB flush is necessary here. KVM will flush TLBs after
		* write-protecting and/or clearing dirty on the newly split SPTEs to
		* ensure that guest writes are reflected in the dirty log before the
		* ioctl to enable dirty logging on this memslot completes. Since the
		* split SPTEs retain the write and dirty bits of the huge SPTE, it is
		* safe for KVM to decide if a TLB flush is necessary based on the split
		* SPTEs.
		*/
		}

		static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
		struct kvm_rmap_head *rmap_head,
		const struct kvm_memory_slot *slot)

+59 −0

Original line number	Diff line number	Diff line
		@@ -192,6 +192,65 @@ bool make_spte(struct kvm_vcpu vcpu, struct kvm_mmu_page sp,
		return wrprot;
		}

		static u64 make_spte_executable(u64 spte)
		{
		bool is_access_track = is_access_track_spte(spte);

		if (is_access_track)
		spte = restore_acc_track_spte(spte);

		spte &= ~shadow_nx_mask;
		spte \|= shadow_x_mask;

		if (is_access_track)
		spte = mark_spte_for_access_track(spte);

		return spte;
		}

		/*
		* Construct an SPTE that maps a sub-page of the given huge page SPTE where
		* `index` identifies which sub-page.
		*
		* This is used during huge page splitting to build the SPTEs that make up the
		* new page table.
		*/
		u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index)
		{
		u64 child_spte;
		int child_level;

		if (WARN_ON_ONCE(!is_shadow_present_pte(huge_spte)))
		return 0;

		if (WARN_ON_ONCE(!is_large_pte(huge_spte)))
		return 0;

		child_spte = huge_spte;
		child_level = huge_level - 1;

		/*
		* The child_spte already has the base address of the huge page being
		* split. So we just have to OR in the offset to the page at the next
		* lower level for the given index.
		*/
		child_spte \|= (index * KVM_PAGES_PER_HPAGE(child_level)) << PAGE_SHIFT;

		if (child_level == PG_LEVEL_4K) {
		child_spte &= ~PT_PAGE_SIZE_MASK;

		/*
		* When splitting to a 4K page, mark the page executable as the
		* NX hugepage mitigation no longer applies.
		*/
		if (is_nx_huge_page_enabled())
		child_spte = make_spte_executable(child_spte);
		}

		return child_spte;
		}


		u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
		{
		u64 spte = SPTE_MMU_PRESENT_MASK;

+1 −0

Original line number	Diff line number	Diff line
		@@ -415,6 +415,7 @@ bool make_spte(struct kvm_vcpu vcpu, struct kvm_mmu_page sp,
		unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
		u64 old_spte, bool prefetch, bool can_unsync,
		bool host_writable, u64 *new_spte);
		u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index);
		u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled);
		u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access);
		u64 mark_spte_for_access_track(u64 spte);