Commit 1577cb58 authored by Oliver Upton's avatar Oliver Upton Committed by Marc Zyngier
Browse files

KVM: arm64: Handle stage-2 faults in parallel



The stage-2 map walker has been made parallel-aware, and as such can be
called while only holding the read side of the MMU lock. Rip out the
conditional locking in user_mem_abort() and instead grab the read lock.
Continue to take the write lock from other callsites to
kvm_pgtable_stage2_map().

Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107220033.1895655-1-oliver.upton@linux.dev
parent af87fc03
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -412,6 +412,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
 * @prot:	Permissions and attributes for the mapping.
 * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
 *		page-table pages.
 * @flags:	Flags to control the page-table walk (ex. a shared walk)
 *
 * The offset of @addr within a page is ignored, @size is rounded-up to
 * the next page boundary and @phys is rounded-down to the previous page
@@ -433,7 +434,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
 */
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
			   u64 phys, enum kvm_pgtable_prot prot,
			   void *mc);
			   void *mc, enum kvm_pgtable_walk_flags flags);

/**
 * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
+1 −1
Original line number Diff line number Diff line
@@ -257,7 +257,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
				      enum kvm_pgtable_prot prot)
{
	return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
				      prot, &host_s2_pool);
				      prot, &host_s2_pool, 0);
}

/*
+3 −2
Original line number Diff line number Diff line
@@ -912,7 +912,7 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,

int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
			   u64 phys, enum kvm_pgtable_prot prot,
			   void *mc)
			   void *mc, enum kvm_pgtable_walk_flags flags)
{
	int ret;
	struct stage2_map_data map_data = {
@@ -923,7 +923,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
	};
	struct kvm_pgtable_walker walker = {
		.cb		= stage2_map_walker,
		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
		.flags		= flags |
				  KVM_PGTABLE_WALK_TABLE_PRE |
				  KVM_PGTABLE_WALK_LEAF,
		.arg		= &map_data,
	};
+7 −24
Original line number Diff line number Diff line
@@ -861,7 +861,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,

		write_lock(&kvm->mmu_lock);
		ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
					     &cache);
					     &cache, 0);
		write_unlock(&kvm->mmu_lock);
		if (ret)
			break;
@@ -1156,7 +1156,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	gfn_t gfn;
	kvm_pfn_t pfn;
	bool logging_active = memslot_is_logging(memslot);
	bool use_read_lock = false;
	unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
	unsigned long vma_pagesize, fault_granule;
	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -1191,8 +1190,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (logging_active) {
		force_pte = true;
		vma_shift = PAGE_SHIFT;
		use_read_lock = (fault_status == FSC_PERM && write_fault &&
				 fault_granule == PAGE_SIZE);
	} else {
		vma_shift = get_vma_page_shift(vma, hva);
	}
@@ -1291,15 +1288,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (exec_fault && device)
		return -ENOEXEC;

	/*
	 * To reduce MMU contentions and enhance concurrency during dirty
	 * logging dirty logging, only acquire read lock for permission
	 * relaxation.
	 */
	if (use_read_lock)
	read_lock(&kvm->mmu_lock);
	else
		write_lock(&kvm->mmu_lock);
	pgt = vcpu->arch.hw_mmu->pgt;
	if (mmu_invalidate_retry(kvm, mmu_seq))
		goto out_unlock;
@@ -1343,15 +1332,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
	 * kvm_pgtable_stage2_map() should be called to change block size.
	 */
	if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
	if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
	} else {
		WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");

	else
		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
					     __pfn_to_phys(pfn), prot,
					     memcache);
	}
					     memcache, KVM_PGTABLE_WALK_SHARED);

	/* Mark the page dirty only if the fault is handled successfully */
	if (writable && !ret) {
@@ -1360,10 +1346,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	}

out_unlock:
	if (use_read_lock)
	read_unlock(&kvm->mmu_lock);
	else
		write_unlock(&kvm->mmu_lock);
	kvm_set_pfn_accessed(pfn);
	kvm_release_pfn_clean(pfn);
	return ret != -EAGAIN ? ret : 0;
@@ -1569,7 +1552,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
	 */
	kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
			       PAGE_SIZE, __pfn_to_phys(pfn),
			       KVM_PGTABLE_PROT_R, NULL);
			       KVM_PGTABLE_PROT_R, NULL, 0);

	return false;
}