Commit ac63cf0a authored by Sean Christopherson's avatar Sean Christopherson Committed by Yu Zhang
Browse files

KVM: Move MMU notifier's mmu_lock acquisition into common helper

mainline inclusion
from mainline-v5.13-rc1
commit f922bd9b
category: feature
bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I7S3VQ
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f922bd9bf33bd5a8c6694927f010f32127810fbf



----------------------------------------------------------------------

Acquire and release mmu_lock in the __kvm_handle_hva_range() helper
instead of requiring the caller to do the same.  This paves the way for
future patches to take mmu_lock if and only if an overlapping memslot is
found, without also having to introduce the on_lock() shenanigans used
to manipulate the notifier count and sequence.

No functional change intended.

Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20210402005658.3024832-8-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>

conflict:
	virt/kvm/kvm_main.c

Signed-off-by: default avatarYu Zhang <yu.c.zhang@linux.intel.com>
parent ef18d479
Loading
Loading
Loading
Loading
+71 −45
Original line number Diff line number Diff line
@@ -513,28 +513,57 @@ static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,

typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);

typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
			     unsigned long end);

struct kvm_hva_range {
	unsigned long start;
	unsigned long end;
	pte_t pte;
	hva_handler_t handler;
	on_lock_fn_t on_lock;
	bool flush_on_ret;
	bool may_block;
};

/*
 * Use a dedicated stub instead of NULL to indicate that there is no callback
 * function/handler.  The compiler technically can't guarantee that a real
 * function will have a non-zero address, and so it will generate code to
 * check for !NULL, whereas comparing against a stub will be elided at compile
 * time (unless the compiler is getting long in the tooth, e.g. gcc 4.9).
 */
static void kvm_null_fn(void)
{

}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)

static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
						  const struct kvm_hva_range *range)
{
	struct kvm_gfn_range gfn_range;
	struct kvm_memory_slot *slot;
	struct kvm_memslots *slots;
	struct kvm_gfn_range gfn_range;
	bool ret = false;
	int i, idx;

	lockdep_assert_held_write(&kvm->mmu_lock);
	/* A null handler is allowed if and only if on_lock() is provided. */
	if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
			 IS_KVM_NULL_FN(range->handler)))
		return 0;

	KVM_MMU_LOCK(kvm);

	idx = srcu_read_lock(&kvm->srcu);

	if (!IS_KVM_NULL_FN(range->on_lock)) {
		range->on_lock(kvm, range->start, range->end);

		if (IS_KVM_NULL_FN(range->handler))
			goto out_unlock;
	}

	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
		slots = __kvm_memslots(kvm, i);
		kvm_for_each_memslot(slot, slots) {
@@ -570,6 +599,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
	if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
		kvm_flush_remote_tlbs(kvm);

out_unlock:
	KVM_MMU_UNLOCK(kvm);

	srcu_read_unlock(&kvm->srcu, idx);

	/* The notifiers are averse to booleans. :-( */
@@ -588,16 +620,12 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
		.end		= end,
		.pte		= pte,
		.handler	= handler,
		.on_lock	= (void *)kvm_null_fn,
		.flush_on_ret	= true,
		.may_block	= false,
	};
	int ret;

	KVM_MMU_LOCK(kvm);
	ret = __kvm_handle_hva_range(kvm, &range);
	KVM_MMU_UNLOCK(kvm);

	return ret;
	return __kvm_handle_hva_range(kvm, &range);
}

static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn,
@@ -611,16 +639,41 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
		.end		= end,
		.pte		= __pte(0),
		.handler	= handler,
		.on_lock	= (void *)kvm_null_fn,
		.flush_on_ret	= false,
		.may_block	= false,
	};
	int ret;

	KVM_MMU_LOCK(kvm);
	ret = __kvm_handle_hva_range(kvm, &range);
	KVM_MMU_UNLOCK(kvm);
	return __kvm_handle_hva_range(kvm, &range);
}

	return ret;
static void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
				   unsigned long end)
{
	/*
	 * The count increase must become visible at unlock time as no
	 * spte can be established without taking the mmu_lock and
	 * count is also read inside the mmu_lock critical section.
	 */
	kvm->mmu_notifier_count++;
	if (likely(kvm->mmu_notifier_count == 1)) {
		kvm->mmu_notifier_range_start = start;
		kvm->mmu_notifier_range_end = end;
	} else {
		/*
		 * Fully tracking multiple concurrent ranges has dimishing
		 * returns. Keep things simple and just find the minimal range
		 * which includes the current and new ranges. As there won't be
		 * enough information to subtract a range after its invalidate
		 * completes, any ranges invalidated concurrently will
		 * accumulate and persist until all outstanding invalidates
		 * complete.
		 */
		kvm->mmu_notifier_range_start =
			min(kvm->mmu_notifier_range_start, start);
		kvm->mmu_notifier_range_end =
			max(kvm->mmu_notifier_range_end, end);
	}
}
#endif /* KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS */

@@ -663,6 +716,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
		.end		= range->end,
		.pte		= __pte(0),
		.handler	= kvm_unmap_gfn_range,
		.on_lock	= kvm_inc_notifier_count,
		.flush_on_ret	= true,
		.may_block	= mmu_notifier_range_blockable(range),
	};
@@ -672,47 +726,19 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,

	trace_kvm_unmap_hva_range(range->start, range->end);

#ifndef KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS
	idx = srcu_read_lock(&kvm->srcu);
#endif

	KVM_MMU_LOCK(kvm);
	/*
	 * The count increase must become visible at unlock time as no
	 * spte can be established without taking the mmu_lock and
	 * count is also read inside the mmu_lock critical section.
	 */
	kvm->mmu_notifier_count++;
	if (likely(kvm->mmu_notifier_count == 1)) {
		kvm->mmu_notifier_range_start = range->start;
		kvm->mmu_notifier_range_end = range->end;
	} else {
		/*
		 * Fully tracking multiple concurrent ranges has dimishing
		 * returns. Keep things simple and just find the minimal range
		 * which includes the current and new ranges. As there won't be
		 * enough information to subtract a range after its invalidate
		 * completes, any ranges invalidated concurrently will
		 * accumulate and persist until all outstanding invalidates
		 * complete.
		 */
		kvm->mmu_notifier_range_start =
			min(kvm->mmu_notifier_range_start, range->start);
		kvm->mmu_notifier_range_end =
			max(kvm->mmu_notifier_range_end, range->end);
	}

#ifdef KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS
	__kvm_handle_hva_range(kvm, &hva_range);
#else
	idx = srcu_read_lock(&kvm->srcu);
	KVM_MMU_LOCK(kvm);
	need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
					     range->flags);
	/* we've to flush the tlb before the pages can be freed */
	if (need_tlb_flush || kvm->tlbs_dirty)
		kvm_flush_remote_tlbs(kvm);
	KVM_MMU_UNLOCK(kvm);
#endif
	
	KVM_MMU_UNLOCK(kvm);
	kvm_arch_guest_memory_reclaimed(kvm);
#ifndef KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS
	srcu_read_unlock(&kvm->srcu, idx);