Merge branch kvm-arm64/mmu/mapping-levels into kvmarm-master/next (2d84f3ce) · Commits · EulixOS / Software / Kernel

arch/arm64/include/asm/kvm_pgtable.h

+20 −0

Original line number	Diff line number	Diff line
		@@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
		int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
		struct kvm_pgtable_walker *walker);

		/**
		* kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
		* with its level.
		* @pgt: Page-table structure initialised by kvm_pgtable_*_init()
		* or a similar initialiser.
		* @addr: Input address for the start of the walk.
		* @ptep: Pointer to storage for the retrieved PTE.
		* @level: Pointer to storage for the level of the retrieved PTE.
		*
		* The offset of @addr within a page is ignored.
		*
		* The walker will walk the page-table entries corresponding to the input
		* address specified, retrieving the leaf corresponding to this address.
		* Invalid entries are treated as leaf entries.
		*
		* Return: 0 on success, negative error code on failure.
		*/
		int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
		kvm_pte_t ptep, u32 level);

		/**
		* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
		* Addresses with compatible permission

arch/arm64/kvm/hyp/pgtable.c

+39 −0

Original line number	Diff line number	Diff line
		@@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
		return _kvm_pgtable_walk(&walk_data);
		}

		struct leaf_walk_data {
		kvm_pte_t pte;
		u32 level;
		};

		static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
		enum kvm_pgtable_walk_flags flag, void * const arg)
		{
		struct leaf_walk_data *data = arg;

		data->pte = *ptep;
		data->level = level;

		return 0;
		}

		int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
		kvm_pte_t ptep, u32 level)
		{
		struct leaf_walk_data data;
		struct kvm_pgtable_walker walker = {
		.cb = leaf_walker,
		.flags = KVM_PGTABLE_WALK_LEAF,
		.arg = &data,
		};
		int ret;

		ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
		PAGE_SIZE, &walker);
		if (!ret) {
		if (ptep)
		*ptep = data.pte;
		if (level)
		*level = data.level;
		}

		return ret;
		}

		struct hyp_map_data {
		u64 phys;
		kvm_pte_t attr;

arch/arm64/kvm/mmu.c

+38 −7

Original line number	Diff line number	Diff line
		@@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
		return 0;
		}

		static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
		/* We shouldn't need any other callback to walk the PT */
		.phys_to_virt = kvm_host_va,
		};

		static int get_user_mapping_size(struct kvm *kvm, u64 addr)
		{
		struct kvm_pgtable pgt = {
		.pgd = (kvm_pte_t *)kvm->mm->pgd,
		.ia_bits = VA_BITS,
		.start_level = (KVM_PGTABLE_MAX_LEVELS -
		CONFIG_PGTABLE_LEVELS),
		.mm_ops = &kvm_user_mm_ops,
		};
		kvm_pte_t pte = 0; /* Keep GCC quiet... */
		u32 level = ~0;
		int ret;

		ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
		VM_BUG_ON(ret);
		VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
		VM_BUG_ON(!(pte & PTE_VALID));

		return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
		}

		static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
		.zalloc_page = stage2_memcache_zalloc_page,
		.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
		@@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
		* Returns the size of the mapping.
		*/
		static unsigned long
		transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
		transparent_hugepage_adjust(struct kvm kvm, struct kvm_memory_slot memslot,
		unsigned long hva, kvm_pfn_t *pfnp,
		phys_addr_t *ipap)
		{
		@@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
		* sure that the HVA and IPA are sufficiently aligned and that the
		* block map is contained within the memslot.
		*/
		if (kvm_is_transparent_hugepage(pfn) &&
		fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
		get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
		/*
		* The address we faulted on is backed by a transparent huge
		* page. However, because we map the compound huge page and
		@@ -814,7 +840,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
		*ipap &= PMD_MASK;
		kvm_release_pfn_clean(pfn);
		pfn &= ~(PTRS_PER_PMD - 1);
		kvm_get_pfn(pfn);
		get_page(pfn_to_page(pfn));
		*pfnp = pfn;

		return PMD_SIZE;
		@@ -1050,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		* If we are not forced to use page mapping, check if we are
		* backed by a THP and thus use block mapping if possible.
		*/
		if (vma_pagesize == PAGE_SIZE && !(force_pte \|\| device))
		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
		&pfn, &fault_ipa);
		if (vma_pagesize == PAGE_SIZE && !(force_pte \|\| device)) {
		if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
		vma_pagesize = fault_granule;
		else
		vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
		hva, &pfn,
		&fault_ipa);
		}

		if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
		/* Check the VMM hasn't introduced a new VM_SHARED VMA */

include/linux/kvm_host.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
		void kvm_release_pfn_dirty(kvm_pfn_t pfn);
		void kvm_set_pfn_dirty(kvm_pfn_t pfn);
		void kvm_set_pfn_accessed(kvm_pfn_t pfn);
		void kvm_get_pfn(kvm_pfn_t pfn);

		void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
		int kvm_read_guest_page(struct kvm kvm, gfn_t gfn, void data, int offset,

include/linux/page-flags.h

+0 −37

Original line number	Diff line number	Diff line
		@@ -632,43 +632,6 @@ static inline int PageTransCompound(struct page *page)
		return PageCompound(page);
		}

		/*
		* PageTransCompoundMap is the same as PageTransCompound, but it also
		* guarantees the primary MMU has the entire compound page mapped
		* through pmd_trans_huge, which in turn guarantees the secondary MMUs
		* can also map the entire compound page. This allows the secondary
		* MMUs to call get_user_pages() only once for each compound page and
		* to immediately map the entire compound page with a single secondary
		* MMU fault. If there will be a pmd split later, the secondary MMUs
		* will get an update through the MMU notifier invalidation through
		* split_huge_pmd().
		*
		* Unlike PageTransCompound, this is safe to be called only while
		* split_huge_pmd() cannot run from under us, like if protected by the
		* MMU notifier, otherwise it may result in page->_mapcount check false
		* positives.
		*
		* We have to treat page cache THP differently since every subpage of it
		* would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
		* mapped in the current process so comparing subpage's _mapcount to
		* compound_mapcount to filter out PTE mapped case.
		*/
		static inline int PageTransCompoundMap(struct page *page)
		{
		struct page *head;

		if (!PageTransCompound(page))
		return 0;

		if (PageAnon(page))
		return atomic_read(&page->_mapcount) < 0;

		head = compound_head(page);
		/* File THP is PMD mapped and not PTE mapped */
		return atomic_read(&page->_mapcount) ==
		atomic_read(compound_mapcount_ptr(head));
		}

		/*
		* PageTransTail returns true for both transparent huge pages
		* and hugetlbfs pages, so it should only be called when it's known