Commit ac11ca5a authored by Ben Gardon's avatar Ben Gardon Committed by Yu Zhang
Browse files

KVM: x86/mmu: Allow enabling/disabling dirty logging under MMU read lock

mainline inclusion
from mainline-v5.13-rc1
commit 24ae4cfa
category: feature
bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I7S3VQ
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=24ae4cfaaaa22a4f293acd0c7d97804454b7e9fb



----------------------------------------------------------------------

To reduce lock contention and interference with page fault handlers,
allow the TDP MMU functions which enable and disable dirty logging
to operate under the MMU read lock.

Signed-off-by: default avatarBen Gardon <bgardon@google.com>
Message-Id: <20210401233736.638171-12-bgardon@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>

conflict:
	arch/x86/kvm/mmu/tdp_mmu.c
	arch/x86/kvm/mmu/mmu.c

Signed-off-by: default avatarYu Zhang <yu.c.zhang@linux.intel.com>
parent 274e0e4c
Loading
Loading
Loading
Loading
+24 −8
Original line number Diff line number Diff line
@@ -5626,10 +5626,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
	write_lock(&kvm->mmu_lock);
	flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
				start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
	if (is_tdp_mmu_enabled(kvm))
		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
	write_unlock(&kvm->mmu_lock);

	if (is_tdp_mmu_enabled(kvm)) {
		read_lock(&kvm->mmu_lock);
		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
		read_unlock(&kvm->mmu_lock);
	}

	/*
	 * We can flush all the TLBs out of the mmu lock without TLB
	 * corruption since we just change the spte from writable to
@@ -5732,10 +5736,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,

	write_lock(&kvm->mmu_lock);
	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
	if (is_tdp_mmu_enabled(kvm))
		flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
	write_unlock(&kvm->mmu_lock);

	if (is_tdp_mmu_enabled(kvm)) {
		read_lock(&kvm->mmu_lock);
		flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
		read_unlock(&kvm->mmu_lock);
	}

	/*
	 * It's also safe to flush TLBs out of mmu lock here as currently this
	 * function is only used for dirty logging, in which case flushing TLB
@@ -5755,10 +5763,14 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
	write_lock(&kvm->mmu_lock);
	flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
					false);
	if (is_tdp_mmu_enabled(kvm))
		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
	write_unlock(&kvm->mmu_lock);

	if (is_tdp_mmu_enabled(kvm)) {
		read_lock(&kvm->mmu_lock);
		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
		read_unlock(&kvm->mmu_lock);
	}

	if (flush)
		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
@@ -5771,10 +5783,14 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,

	write_lock(&kvm->mmu_lock);
	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
	if (is_tdp_mmu_enabled(kvm))
		flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
	write_unlock(&kvm->mmu_lock);

	if (is_tdp_mmu_enabled(kvm)) {
		read_lock(&kvm->mmu_lock);
		flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
		read_unlock(&kvm->mmu_lock);
	}

	if (flush)
		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
+63 −17
Original line number Diff line number Diff line
@@ -495,8 +495,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
}

/*
 * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
 * associated bookkeeping
 * tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically
 * and handle the associated bookkeeping, but do not mark the page dirty
 * in KVM's dirty bitmaps.
 *
 * @kvm: kvm instance
 * @iter: a tdp_iter instance currently on the SPTE that should be set
@@ -504,7 +505,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 * Returns: true if the SPTE was set, false if it was not. If false is returned,
 *	    this function will have no side-effects.
 */
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
							struct tdp_iter *iter,
							u64 new_spte)
{
@@ -521,12 +522,25 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
		      new_spte) != iter->old_spte)
		return false;

	handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
			      new_spte, iter->level, true);
	handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level);

	return true;
}

static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
					   struct tdp_iter *iter,
					   u64 new_spte)
{
	if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
		return false;

	handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
				      iter->old_spte, new_spte, iter->level);
	return true;
}

static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
					   struct tdp_iter *iter)
{
@@ -1082,7 +1096,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,

	for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
				   min_level, start, end) {
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
retry:
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
			continue;

		if (!is_shadow_present_pte(iter.old_spte) ||
@@ -1092,7 +1107,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,

		new_spte = iter.old_spte & ~PT_WRITABLE_MASK;

		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
		if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
							  new_spte)) {
			/*
			 * The iter must explicitly re-read the SPTE because
			 * the atomic cmpxchg failed.
			 */
			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
			goto retry;
		}
		spte_set = true;
	}

@@ -1111,7 +1134,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
	struct kvm_mmu_page *root;
	bool spte_set = false;

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false)
	lockdep_assert_held_read(&kvm->mmu_lock);

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
		spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
			     slot->base_gfn + slot->npages, min_level);

@@ -1135,7 +1160,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
	rcu_read_lock();

	tdp_root_for_each_leaf_pte(iter, root, start, end) {
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
retry:
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
			continue;

		if (!is_shadow_present_pte(iter.old_spte))
@@ -1153,7 +1179,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
				continue;
		}

		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
		if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
							  new_spte)) {
			/*
			 * The iter must explicitly re-read the SPTE because
			 * the atomic cmpxchg failed.
			 */
			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
			goto retry;
		}
		spte_set = true;
	}

@@ -1173,7 +1207,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
	struct kvm_mmu_page *root;
	bool spte_set = false;

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false)
	lockdep_assert_held_read(&kvm->mmu_lock);

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
		spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
				slot->base_gfn + slot->npages);

@@ -1257,8 +1293,9 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,

	rcu_read_lock();

	tdp_root_for_each_pte(iter, root, start, end) {
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
	tdp_root_for_each_leaf_pte(iter, root, start, end) {
retry:
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
			continue;

		if (!is_shadow_present_pte(iter.old_spte) ||
@@ -1267,7 +1304,14 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,

		new_spte = iter.old_spte | shadow_dirty_mask;

		tdp_mmu_set_spte(kvm, &iter, new_spte);
		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
			/*
			 * The iter must explicitly re-read the SPTE because
			 * the atomic cmpxchg failed.
			 */
			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
			goto retry;
		}
		spte_set = true;
	}

@@ -1285,7 +1329,9 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
	struct kvm_mmu_page *root;
	bool spte_set = false;

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false)
	lockdep_assert_held_read(&kvm->mmu_lock);

	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
		spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
				slot->base_gfn + slot->npages);
	return spte_set;