Commit 32ab5a5e authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/mmu/MMIO-block-mapping into kvmarm-master/next

MMIO block mapping support from Keqian Zhu, allowing larger
(and lazy) mappings for devices assigned to guests.

* kvm-arm64/mmu/MMIO-block-mapping:
  KVM: arm64: Try stage2 block mapping for host device MMIO
  KVM: arm64: Remove the creation time's mapping of MMIO regions
parents 32e92b71 2aa53d68
Loading
Loading
Loading
Loading
+54 −45
Original line number Diff line number Diff line
@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
	return PAGE_SIZE;
}

static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
{
	unsigned long pa;

	if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
		return huge_page_shift(hstate_vma(vma));

	if (!(vma->vm_flags & VM_PFNMAP))
		return PAGE_SHIFT;

	VM_BUG_ON(is_vm_hugetlb_page(vma));

	pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);

#ifndef __PAGETABLE_PMD_FOLDED
	if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
	    ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
	    ALIGN(hva, PUD_SIZE) <= vma->vm_end)
		return PUD_SHIFT;
#endif

	if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
	    ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
	    ALIGN(hva, PMD_SIZE) <= vma->vm_end)
		return PMD_SHIFT;

	return PAGE_SHIFT;
}

static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_memory_slot *memslot, unsigned long hva,
			  unsigned long fault_status)
@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;
	}

	/* Let's check if we will get back a huge page backed by hugetlbfs */
	/*
	 * Let's check if we will get back a huge page backed by hugetlbfs, or
	 * get block mapping for device MMIO region.
	 */
	mmap_read_lock(current->mm);
	vma = find_vma_intersection(current->mm, hva, hva + 1);
	if (unlikely(!vma)) {
@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;
	}

	if (is_vm_hugetlb_page(vma))
		vma_shift = huge_page_shift(hstate_vma(vma));
	else
		vma_shift = PAGE_SHIFT;

	if (logging_active ||
	    (vma->vm_flags & VM_PFNMAP)) {
	/*
	 * logging_active is guaranteed to never be true for VM_PFNMAP
	 * memslots.
	 */
	if (logging_active) {
		force_pte = true;
		vma_shift = PAGE_SHIFT;
	} else {
		vma_shift = get_vma_page_shift(vma, hva);
	}

	switch (vma_shift) {
@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;

	if (kvm_is_device_pfn(pfn)) {
		/*
		 * If the page was identified as device early by looking at
		 * the VMA flags, vma_pagesize is already representing the
		 * largest quantity we can map.  If instead it was mapped
		 * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
		 * and must not be upgraded.
		 *
		 * In both cases, we don't let transparent_hugepage_adjust()
		 * change things at the last minute.
		 */
		device = true;
		force_pte = true;
	} else if (logging_active && !write_fault) {
		/*
		 * Only actually map the page as writable if this was a write
@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * If we are not forced to use page mapping, check if we are
	 * backed by a THP and thus use block mapping if possible.
	 */
	if (vma_pagesize == PAGE_SIZE && !force_pte)
	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
							   &pfn, &fault_ipa);
	if (writable)
@@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
	hva_t hva = mem->userspace_addr;
	hva_t reg_end = hva + mem->memory_size;
	bool writable = !(mem->flags & KVM_MEM_READONLY);
	int ret = 0;

	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
	mmap_read_lock(current->mm);
	/*
	 * A memory region could potentially cover multiple VMAs, and any holes
	 * between them, so iterate over all of them to find out if we can map
	 * any of them right now.
	 * between them, so iterate over all of them.
	 *
	 *     +--------------------------------------------+
	 * +---------------+----------------+   +----------------+
@@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
	 */
	do {
		struct vm_area_struct *vma;
		hva_t vm_start, vm_end;

		vma = find_vma_intersection(current->mm, hva, reg_end);
		if (!vma)
			break;

		/*
		 * Take the intersection of this VMA with the memory region
		 */
		vm_start = max(hva, vma->vm_start);
		vm_end = min(reg_end, vma->vm_end);

		if (vma->vm_flags & VM_PFNMAP) {
			gpa_t gpa = mem->guest_phys_addr +
				    (vm_start - mem->userspace_addr);
			phys_addr_t pa;

			pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
			pa += vm_start - vma->vm_start;

			/* IO region dirty page logging not allowed */
			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
				ret = -EINVAL;
				goto out;
			}

			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
						    vm_end - vm_start,
						    writable);
			if (ret)
				break;
			}
		hva = vm_end;
		}
		hva = min(reg_end, vma->vm_end);
	} while (hva < reg_end);

	if (change == KVM_MR_FLAGS_ONLY)
		goto out;

	spin_lock(&kvm->mmu_lock);
	if (ret)
		unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
	else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
		stage2_flush_memslot(kvm, memslot);
	spin_unlock(&kvm->mmu_lock);
out:
	mmap_read_unlock(current->mm);
	return ret;
}