Merge commit 'kvm-tdp-fix-flushes' into kvm-master (825e34d3) · Commits · EulixOS / Software / Kernel

arch/x86/kvm/mmu/mmu.c

+5 −4

Original line number	Diff line number	Diff line
		@@ -5884,6 +5884,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
		struct kvm_mmu_page *sp;
		unsigned int ratio;
		LIST_HEAD(invalid_list);
		bool flush = false;
		ulong to_zap;

		rcu_idx = srcu_read_lock(&kvm->srcu);
		@@ -5905,19 +5906,19 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
		lpage_disallowed_link);
		WARN_ON_ONCE(!sp->lpage_disallowed);
		if (is_tdp_mmu_page(sp)) {
		kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
		sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
		flush = kvm_tdp_mmu_zap_sp(kvm, sp);
		} else {
		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
		WARN_ON_ONCE(sp->lpage_disallowed);
		}

		if (need_resched() \|\| rwlock_needbreak(&kvm->mmu_lock)) {
		kvm_mmu_commit_zap_page(kvm, &invalid_list);
		kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
		cond_resched_rwlock_write(&kvm->mmu_lock);
		flush = false;
		}
		}
		kvm_mmu_commit_zap_page(kvm, &invalid_list);
		kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);

		write_unlock(&kvm->mmu_lock);
		srcu_read_unlock(&kvm->srcu, rcu_idx);

arch/x86/kvm/mmu/tdp_mmu.c

+14 −12

Original line number	Diff line number	Diff line
		@@ -86,7 +86,7 @@ static inline struct kvm_mmu_page tdp_mmu_next_root(struct kvm kvm,
		list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)

		static bool zap_gfn_range(struct kvm kvm, struct kvm_mmu_page root,
		gfn_t start, gfn_t end, bool can_yield);
		gfn_t start, gfn_t end, bool can_yield, bool flush);

		void kvm_tdp_mmu_free_root(struct kvm kvm, struct kvm_mmu_page root)
		{
		@@ -99,7 +99,7 @@ void kvm_tdp_mmu_free_root(struct kvm kvm, struct kvm_mmu_page root)

		list_del(&root->link);

		zap_gfn_range(kvm, root, 0, max_gfn, false);
		zap_gfn_range(kvm, root, 0, max_gfn, false, false);

		free_page((unsigned long)root->spt);
		kmem_cache_free(mmu_page_header_cache, root);
		@@ -668,20 +668,21 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
		* scheduler needs the CPU or there is contention on the MMU lock. If this
		* function cannot yield, it will not release the MMU lock or reschedule and
		* the caller must ensure it does not supply too large a GFN range, or the
		* operation can cause a soft lockup.
		* operation can cause a soft lockup. Note, in some use cases a flush may be
		* required by prior actions. Ensure the pending flush is performed prior to
		* yielding.
		*/
		static bool zap_gfn_range(struct kvm kvm, struct kvm_mmu_page root,
		gfn_t start, gfn_t end, bool can_yield)
		gfn_t start, gfn_t end, bool can_yield, bool flush)
		{
		struct tdp_iter iter;
		bool flush_needed = false;

		rcu_read_lock();

		tdp_root_for_each_pte(iter, root, start, end) {
		if (can_yield &&
		tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
		flush_needed = false;
		tdp_mmu_iter_cond_resched(kvm, &iter, flush)) {
		flush = false;
		continue;
		}

		@@ -699,11 +700,11 @@ static bool zap_gfn_range(struct kvm kvm, struct kvm_mmu_page root,
		continue;

		tdp_mmu_set_spte(kvm, &iter, 0);
		flush_needed = true;
		flush = true;
		}

		rcu_read_unlock();
		return flush_needed;
		return flush;
		}

		/*
		@@ -712,13 +713,14 @@ static bool zap_gfn_range(struct kvm kvm, struct kvm_mmu_page root,
		* SPTEs have been cleared and a TLB flush is needed before releasing the
		* MMU lock.
		*/
		bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
		bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
		bool can_yield)
		{
		struct kvm_mmu_page *root;
		bool flush = false;

		for_each_tdp_mmu_root_yield_safe(kvm, root)
		flush \|= zap_gfn_range(kvm, root, start, end, true);
		flush = zap_gfn_range(kvm, root, start, end, can_yield, flush);

		return flush;
		}
		@@ -930,7 +932,7 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
		struct kvm_mmu_page *root, gfn_t start,
		gfn_t end, unsigned long unused)
		{
		return zap_gfn_range(kvm, root, start, end, false);
		return zap_gfn_range(kvm, root, start, end, false, false);
		}

		int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,

arch/x86/kvm/mmu/tdp_mmu.h

+23 −1

Original line number	Diff line number	Diff line
		@@ -8,7 +8,29 @@
		hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
		void kvm_tdp_mmu_free_root(struct kvm kvm, struct kvm_mmu_page root);

		bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
		bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
		bool can_yield);
		static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start,
		gfn_t end)
		{
		return __kvm_tdp_mmu_zap_gfn_range(kvm, start, end, true);
		}
		static inline bool kvm_tdp_mmu_zap_sp(struct kvm kvm, struct kvm_mmu_page sp)
		{
		gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);

		/*
		* Don't allow yielding, as the caller may have a flush pending. Note,
		* if mmu_lock is held for write, zapping will never yield in this case,
		* but explicitly disallow it for safety. The TDP MMU does not yield
		* until it has made forward progress (steps sideways), and when zapping
		* a single shadow page that it's guaranteed to see (thus the mmu_lock
		* requirement), its "step sideways" will always step beyond the bounds
		* of the shadow page's gfn range and stop iterating before yielding.
		*/
		lockdep_assert_held_write(&kvm->mmu_lock);
		return __kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, end, false);
		}
		void kvm_tdp_mmu_zap_all(struct kvm *kvm);

		int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,