Commit 2d84f3ce authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/mmu/mapping-levels into kvmarm-master/next



Revamp the KVM/arm64 THP code by parsing the userspace
page tables instead of relying on an infrastructure that
is about to disappear (we are the last user).

* kvm-arm64/mmu/mapping-levels:
  KVM: Get rid of kvm_get_pfn()
  KVM: arm64: Use get_page() instead of kvm_get_pfn()
  KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap()
  KVM: arm64: Avoid mapping size adjustment on permission fault
  KVM: arm64: Walk userspace page tables to compute the THP mapping size
  KVM: arm64: Introduce helper to retrieve a PTE and its level

Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents a4516f32 36c3ce6c
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
		     struct kvm_pgtable_walker *walker);

/**
 * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
 *			    with its level.
 * @pgt:	Page-table structure initialised by kvm_pgtable_*_init()
 *		or a similar initialiser.
 * @addr:	Input address for the start of the walk.
 * @ptep:	Pointer to storage for the retrieved PTE.
 * @level:	Pointer to storage for the level of the retrieved PTE.
 *
 * The offset of @addr within a page is ignored.
 *
 * The walker will walk the page-table entries corresponding to the input
 * address specified, retrieving the leaf corresponding to this address.
 * Invalid entries are treated as leaf entries.
 *
 * Return: 0 on success, negative error code on failure.
 */
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
			 kvm_pte_t *ptep, u32 *level);

/**
 * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
 *				     Addresses with compatible permission
+39 −0
Original line number Diff line number Diff line
@@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
	return _kvm_pgtable_walk(&walk_data);
}

struct leaf_walk_data {
	kvm_pte_t	pte;
	u32		level;
};

static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
		       enum kvm_pgtable_walk_flags flag, void * const arg)
{
	struct leaf_walk_data *data = arg;

	data->pte   = *ptep;
	data->level = level;

	return 0;
}

int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
			 kvm_pte_t *ptep, u32 *level)
{
	struct leaf_walk_data data;
	struct kvm_pgtable_walker walker = {
		.cb	= leaf_walker,
		.flags	= KVM_PGTABLE_WALK_LEAF,
		.arg	= &data,
	};
	int ret;

	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
			       PAGE_SIZE, &walker);
	if (!ret) {
		if (ptep)
			*ptep  = data.pte;
		if (level)
			*level = data.level;
	}

	return ret;
}

struct hyp_map_data {
	u64				phys;
	kvm_pte_t			attr;
+38 −7
Original line number Diff line number Diff line
@@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
	return 0;
}

static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
	/* We shouldn't need any other callback to walk the PT */
	.phys_to_virt		= kvm_host_va,
};

static int get_user_mapping_size(struct kvm *kvm, u64 addr)
{
	struct kvm_pgtable pgt = {
		.pgd		= (kvm_pte_t *)kvm->mm->pgd,
		.ia_bits	= VA_BITS,
		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
				   CONFIG_PGTABLE_LEVELS),
		.mm_ops		= &kvm_user_mm_ops,
	};
	kvm_pte_t pte = 0;	/* Keep GCC quiet... */
	u32 level = ~0;
	int ret;

	ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
	VM_BUG_ON(ret);
	VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
	VM_BUG_ON(!(pte & PTE_VALID));

	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
}

static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
	.zalloc_page		= stage2_memcache_zalloc_page,
	.zalloc_pages_exact	= kvm_host_zalloc_pages_exact,
@@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
 * Returns the size of the mapping.
 */
static unsigned long
transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
			    unsigned long hva, kvm_pfn_t *pfnp,
			    phys_addr_t *ipap)
{
@@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
	 * sure that the HVA and IPA are sufficiently aligned and that the
	 * block map is contained within the memslot.
	 */
	if (kvm_is_transparent_hugepage(pfn) &&
	    fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
	    get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
		/*
		 * The address we faulted on is backed by a transparent huge
		 * page.  However, because we map the compound huge page and
@@ -814,7 +840,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
		*ipap &= PMD_MASK;
		kvm_release_pfn_clean(pfn);
		pfn &= ~(PTRS_PER_PMD - 1);
		kvm_get_pfn(pfn);
		get_page(pfn_to_page(pfn));
		*pfnp = pfn;

		return PMD_SIZE;
@@ -1050,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * If we are not forced to use page mapping, check if we are
	 * backed by a THP and thus use block mapping if possible.
	 */
	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
							   &pfn, &fault_ipa);
	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
		if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
			vma_pagesize = fault_granule;
		else
			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
								   hva, &pfn,
								   &fault_ipa);
	}

	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
+0 −1
Original line number Diff line number Diff line
@@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn);

void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+0 −37
Original line number Diff line number Diff line
@@ -632,43 +632,6 @@ static inline int PageTransCompound(struct page *page)
	return PageCompound(page);
}

/*
 * PageTransCompoundMap is the same as PageTransCompound, but it also
 * guarantees the primary MMU has the entire compound page mapped
 * through pmd_trans_huge, which in turn guarantees the secondary MMUs
 * can also map the entire compound page. This allows the secondary
 * MMUs to call get_user_pages() only once for each compound page and
 * to immediately map the entire compound page with a single secondary
 * MMU fault. If there will be a pmd split later, the secondary MMUs
 * will get an update through the MMU notifier invalidation through
 * split_huge_pmd().
 *
 * Unlike PageTransCompound, this is safe to be called only while
 * split_huge_pmd() cannot run from under us, like if protected by the
 * MMU notifier, otherwise it may result in page->_mapcount check false
 * positives.
 *
 * We have to treat page cache THP differently since every subpage of it
 * would get _mapcount inc'ed once it is PMD mapped.  But, it may be PTE
 * mapped in the current process so comparing subpage's _mapcount to
 * compound_mapcount to filter out PTE mapped case.
 */
static inline int PageTransCompoundMap(struct page *page)
{
	struct page *head;

	if (!PageTransCompound(page))
		return 0;

	if (PageAnon(page))
		return atomic_read(&page->_mapcount) < 0;

	head = compound_head(page);
	/* File THP is PMD mapped and not PTE mapped */
	return atomic_read(&page->_mapcount) ==
	       atomic_read(compound_mapcount_ptr(head));
}

/*
 * PageTransTail returns true for both transparent huge pages
 * and hugetlbfs pages, so it should only be called when it's known
Loading