Commit b88835a8 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/mmu/stage2-cmos into kvmarm-master/next

Cache maintenance updates from Yanan Wang, moving the CMOs
down into the page-table code. This ensures that we only issue
them when actually performing a mapping rather than upfront.

* kvm-arm64/mmu/stage2-cmos:
  KVM: arm64: Move guest CMOs to the fault handlers
  KVM: arm64: Tweak parameters of guest cache maintenance functions
  KVM: arm64: Introduce mm_ops member for structure stage2_attr_data
  KVM: arm64: Introduce two cache maintenance callbacks
parents fbba7e69 25aa2869
Loading
Loading
Loading
Loading
+2 −7
Original line number Diff line number Diff line
@@ -187,10 +187,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
	return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}

static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
static inline void __clean_dcache_guest_page(void *va, size_t size)
{
	void *va = page_address(pfn_to_page(pfn));

	/*
	 * With FWB, we ensure that the guest always accesses memory using
	 * cacheable attributes, and we don't have to clean to PoC when
@@ -203,16 +201,13 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
	kvm_flush_dcache_to_poc(va, size);
}

static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
						  unsigned long size)
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
	if (icache_is_aliasing()) {
		/* any kind of VIPT cache */
		__flush_icache_all();
	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
		void *va = page_address(pfn_to_page(pfn));

		invalidate_icache_range((unsigned long)va,
					(unsigned long)va + size);
	}
+25 −17
Original line number Diff line number Diff line
@@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;

/**
 * struct kvm_pgtable_mm_ops - Memory management callbacks.
 * @zalloc_page:	Allocate a single zeroed memory page. The @arg parameter
 *			can be used by the walker to pass a memcache. The
 *			initial refcount of the page is 1.
 * @zalloc_pages_exact:	Allocate an exact number of zeroed memory pages. The
 *			@size parameter is in bytes, and is rounded-up to the
 *			next page boundary. The resulting allocation is
 *			physically contiguous.
 * @zalloc_page:		Allocate a single zeroed memory page.
 *				The @arg parameter can be used by the walker
 *				to pass a memcache. The initial refcount of
 *				the page is 1.
 * @zalloc_pages_exact:		Allocate an exact number of zeroed memory pages.
 *				The @size parameter is in bytes, and is rounded
 *				up to the next page boundary. The resulting
 *				allocation is physically contiguous.
 * @free_pages_exact:		Free an exact number of memory pages previously
 *				allocated by zalloc_pages_exact.
 * @get_page:			Increment the refcount on a page.
 * @put_page:		Decrement the refcount on a page. When the refcount
 *			reaches 0 the page is automatically freed.
 * @put_page:			Decrement the refcount on a page. When the
 *				refcount reaches 0 the page is automatically
 *				freed.
 * @page_count:			Return the refcount of a page.
 * @phys_to_virt:	Convert a physical address into a virtual address mapped
 *			in the current context.
 * @virt_to_phys:	Convert a virtual address mapped in the current context
 *			into a physical address.
 * @phys_to_virt:		Convert a physical address into a virtual
 *				address	mapped in the current context.
 * @virt_to_phys:		Convert a virtual address mapped in the current
 *				context into a physical address.
 * @dcache_clean_inval_poc:	Clean and invalidate the data cache to the PoC
 *				for the	specified memory address range.
 * @icache_inval_pou:		Invalidate the instruction cache to the PoU
 *				for the specified memory address range.
 */
struct kvm_pgtable_mm_ops {
	void*		(*zalloc_page)(void *arg);
@@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
	int		(*page_count)(void *addr);
	void*		(*phys_to_virt)(phys_addr_t phys);
	phys_addr_t	(*virt_to_phys)(void *addr);
	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
	void		(*icache_inval_pou)(void *addr, size_t size);
};

/**
+37 −11
Original line number Diff line number Diff line
@@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
	mm_ops->put_page(ptep);
}

static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}

static bool stage2_pte_executable(kvm_pte_t pte)
{
	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}

static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
				      kvm_pte_t *ptep,
				      struct stage2_map_data *data)
{
	kvm_pte_t new, old = *ptep;
	u64 granule = kvm_granule_size(level), phys = data->phys;
	struct kvm_pgtable *pgt = data->mmu->pgt;
	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;

	if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
	}

	/* Perform CMOs before installation of the guest stage-2 PTE */
	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
						granule);

	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);

	smp_store_release(ptep, new);
	if (stage2_pte_is_counted(new))
		mm_ops->get_page(ptep);
@@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
	return ret;
}

static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}

static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
			       enum kvm_pgtable_walk_flags flag,
			       void * const arg)
@@ -865,6 +879,7 @@ struct stage2_attr_data {
	kvm_pte_t			attr_clr;
	kvm_pte_t			pte;
	u32				level;
	struct kvm_pgtable_mm_ops	*mm_ops;
};

static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -873,6 +888,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
{
	kvm_pte_t pte = *ptep;
	struct stage2_attr_data *data = arg;
	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;

	if (!kvm_pte_valid(pte))
		return 0;
@@ -887,8 +903,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
	 * but worst-case the access flag update gets lost and will be
	 * set on the next access instead.
	 */
	if (data->pte != pte)
	if (data->pte != pte) {
		/*
		 * Invalidate instruction cache before updating the guest
		 * stage-2 PTE if we are going to add executable permission.
		 */
		if (mm_ops->icache_inval_pou &&
		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
						  kvm_granule_size(level));
		WRITE_ONCE(*ptep, pte);
	}

	return 0;
}
@@ -903,6 +928,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
	struct stage2_attr_data data = {
		.attr_set	= attr_set & attr_mask,
		.attr_clr	= attr_clr & attr_mask,
		.mm_ops		= pgt->mm_ops,
	};
	struct kvm_pgtable_walker walker = {
		.cb		= stage2_attr_walker,
+17 −22
Original line number Diff line number Diff line
@@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
	return __va(phys);
}

static void clean_dcache_guest_page(void *va, size_t size)
{
	__clean_dcache_guest_page(va, size);
}

static void invalidate_icache_guest_page(void *va, size_t size)
{
	__invalidate_icache_guest_page(va, size);
}

/*
 * Unmapping vs dcache management:
 *
@@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
	.page_count		= kvm_host_page_count,
	.phys_to_virt		= kvm_host_va,
	.virt_to_phys		= kvm_host_pa,
	.dcache_clean_inval_poc	= clean_dcache_guest_page,
	.icache_inval_pou	= invalidate_icache_guest_page,
};

/**
@@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}

static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
	__clean_dcache_guest_page(pfn, size);
}

static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
	__invalidate_icache_guest_page(pfn, size);
}

static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
{
	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
@@ -1012,13 +1014,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (writable)
		prot |= KVM_PGTABLE_PROT_W;

	if (fault_status != FSC_PERM && !device)
		clean_dcache_guest_page(pfn, vma_pagesize);

	if (exec_fault) {
	if (exec_fault)
		prot |= KVM_PGTABLE_PROT_X;
		invalidate_icache_guest_page(pfn, vma_pagesize);
	}

	if (device)
		prot |= KVM_PGTABLE_PROT_DEVICE;
@@ -1216,12 +1213,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
	WARN_ON(range->end - range->start != 1);

	/*
	 * We've moved a page around, probably through CoW, so let's treat it
	 * just like a translation fault and clean the cache to the PoC.
	 */
	clean_dcache_guest_page(pfn, PAGE_SIZE);

	/*
	 * We've moved a page around, probably through CoW, so let's treat
	 * it just like a translation fault and the map handler will clean
	 * the cache to the PoC.
	 *
	 * The MMU notifiers will have unmapped a huge PMD before calling
	 * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
	 * therefore we never need to clear out a huge PMD through this