Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (3a93e403) · Commits · EulixOS / Software / Kernel

arch/arm64/kvm/mmu.c

+64 −35

Original line number	Diff line number	Diff line
		@@ -666,14 +666,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
		CONFIG_PGTABLE_LEVELS),
		.mm_ops = &kvm_user_mm_ops,
		};
		unsigned long flags;
		kvm_pte_t pte = 0; /* Keep GCC quiet... */
		u32 level = ~0;
		int ret;

		/*
		* Disable IRQs so that we hazard against a concurrent
		* teardown of the userspace page tables (which relies on
		* IPI-ing threads).
		*/
		local_irq_save(flags);
		ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
		VM_BUG_ON(ret);
		VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
		VM_BUG_ON(!(pte & PTE_VALID));
		local_irq_restore(flags);

		if (ret)
		return ret;

		/*
		* Not seeing an error, but not updating level? Something went
		* deeply wrong...
		*/
		if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
		return -EFAULT;

		/* Oops, the userspace PTs are gone... Replay the fault */
		if (!kvm_pte_valid(pte))
		return -EAGAIN;

		return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
		}
		@@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
		*
		* Returns the size of the mapping.
		*/
		static unsigned long
		static long
		transparent_hugepage_adjust(struct kvm kvm, struct kvm_memory_slot memslot,
		unsigned long hva, kvm_pfn_t *pfnp,
		phys_addr_t *ipap)
		@@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm kvm, struct kvm_memory_slot memslot,
		* sure that the HVA and IPA are sufficiently aligned and that the
		* block map is contained within the memslot.
		*/
		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
		get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
		int sz = get_user_mapping_size(kvm, hva);

		if (sz < 0)
		return sz;

		if (sz < PMD_SIZE)
		return PAGE_SIZE;

		/*
		* The address we faulted on is backed by a transparent huge
		* page. However, because we map the compound huge page and
		@@ -1192,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		{
		int ret = 0;
		bool write_fault, writable, force_pte = false;
		bool exec_fault;
		bool exec_fault, mte_allowed;
		bool device = false;
		unsigned long mmu_seq;
		struct kvm *kvm = vcpu->kvm;
		@@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		kvm_pfn_t pfn;
		bool logging_active = memslot_is_logging(memslot);
		unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
		unsigned long vma_pagesize, fault_granule;
		long vma_pagesize, fault_granule;
		enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
		struct kvm_pgtable *pgt;

		@@ -1217,6 +1243,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;
		}

		/*
		* Permission faults just need to update the existing leaf entry,
		* and so normally don't require allocations from the memcache. The
		* only exception to this is when dirty logging is enabled at runtime
		* and a write fault needs to collapse a block entry into a table.
		*/
		if (fault_status != ESR_ELx_FSC_PERM \|\|
		(logging_active && write_fault)) {
		ret = kvm_mmu_topup_memory_cache(memcache,
		kvm_mmu_cache_min_pages(kvm));
		if (ret)
		return ret;
		}

		/*
		* Let's check if we will get back a huge page backed by hugetlbfs, or
		* get block mapping for device MMIO region.
		@@ -1269,37 +1309,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		fault_ipa &= ~(vma_pagesize - 1);

		gfn = fault_ipa >> PAGE_SHIFT;
		mmap_read_unlock(current->mm);
		mte_allowed = kvm_vma_mte_allowed(vma);

		/*
		* Permission faults just need to update the existing leaf entry,
		* and so normally don't require allocations from the memcache. The
		* only exception to this is when dirty logging is enabled at runtime
		* and a write fault needs to collapse a block entry into a table.
		*/
		if (fault_status != ESR_ELx_FSC_PERM \|\|
		(logging_active && write_fault)) {
		ret = kvm_mmu_topup_memory_cache(memcache,
		kvm_mmu_cache_min_pages(kvm));
		if (ret)
		return ret;
		}
		/* Don't use the VMA after the unlock -- it may have vanished */
		vma = NULL;

		mmu_seq = vcpu->kvm->mmu_invalidate_seq;
		/*
		* Ensure the read of mmu_invalidate_seq happens before we call
		* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
		* the page we just got a reference to gets unmapped before we have a
		* chance to grab the mmu_lock, which ensure that if the page gets
		* unmapped afterwards, the call to kvm_unmap_gfn will take it away
		* from us again properly. This smp_rmb() interacts with the smp_wmb()
		* in kvm_mmu_notifier_invalidate_<page\|range_end>.
		* Read mmu_invalidate_seq so that KVM can detect if the results of
		* vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
		* acquiring kvm->mmu_lock.
		*
		* Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
		* used to avoid unnecessary overhead introduced to locate the memory
		* slot because it's always fixed even @gfn is adjusted for huge pages.
		* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
		* with the smp_wmb() in kvm_mmu_invalidate_end().
		*/
		smp_rmb();
		mmu_seq = vcpu->kvm->mmu_invalidate_seq;
		mmap_read_unlock(current->mm);

		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
		write_fault, &writable, NULL);
		@@ -1350,11 +1374,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
		hva, &pfn,
		&fault_ipa);

		if (vma_pagesize < 0) {
		ret = vma_pagesize;
		goto out_unlock;
		}
		}

		if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
		/* Check the VMM hasn't introduced a new disallowed VMA */
		if (kvm_vma_mte_allowed(vma)) {
		if (mte_allowed) {
		sanitise_mte_tags(kvm, pfn, vma_pagesize);
		} else {
		ret = -EFAULT;

arch/arm64/kvm/pmu-emul.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
		if (!kvm_pmu_is_3p5(vcpu))
		val &= ~ARMV8_PMU_PMCR_LP;

		__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
		/* The reset bits don't indicate any state, and shouldn't be saved. */
		__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C \| ARMV8_PMU_PMCR_P);

		if (val & ARMV8_PMU_PMCR_E) {
		kvm_pmu_enable_counter_mask(vcpu,

arch/arm64/kvm/sys_regs.c

+19 −2

Original line number	Diff line number	Diff line
		@@ -856,6 +856,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
		return true;
		}

		static int get_pmu_evcntr(struct kvm_vcpu vcpu, const struct sys_reg_desc r,
		u64 *val)
		{
		u64 idx;

		if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0)
		/* PMCCNTR_EL0 */
		idx = ARMV8_PMU_CYCLE_IDX;
		else
		/* PMEVCNTRn_EL0 */
		idx = ((r->CRm & 3) << 3) \| (r->Op2 & 7);

		*val = kvm_pmu_get_counter_value(vcpu, idx);
		return 0;
		}

		static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
		struct sys_reg_params *p,
		const struct sys_reg_desc *r)
		@@ -1072,7 +1088,7 @@ static bool access_pmuserenr(struct kvm_vcpu vcpu, struct sys_reg_params p,
		/* Macro to expand the PMEVCNTRn_EL0 register */
		#define PMU_PMEVCNTR_EL0(n) \
		{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
		.reset = reset_pmevcntr, \
		.reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
		.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }

		/* Macro to expand the PMEVTYPERn_EL0 register */
		@@ -1982,7 +1998,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
		{ PMU_SYS_REG(SYS_PMCEID1_EL0),
		.access = access_pmceid, .reset = NULL },
		{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
		.access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
		.access = access_pmu_evcntr, .reset = reset_unknown,
		.reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
		{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
		.access = access_pmu_evtyper, .reset = NULL },
		{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),

arch/riscv/kvm/vcpu_timer.c

+2 −4

Original line number	Diff line number	Diff line
		@@ -147,11 +147,9 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu)
		return;

		delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
		if (delta_ns) {
		hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
		t->next_set = true;
		}
		}

		static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu)
		{