Commit cf0c7125 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/mmu/el2-tracking into kvmarm-master/next



* kvm-arm64/mmu/el2-tracking: (25 commits)
  : Enable tracking of page sharing between host EL1 and EL2
  KVM: arm64: Minor optimization of range_is_memory
  KVM: arm64: Make hyp_panic() more robust when protected mode is enabled
  KVM: arm64: Return -EPERM from __pkvm_host_share_hyp()
  KVM: arm64: Make __pkvm_create_mappings static
  KVM: arm64: Restrict EL2 stage-1 changes in protected mode
  KVM: arm64: Refactor protected nVHE stage-1 locking
  KVM: arm64: Remove __pkvm_mark_hyp
  KVM: arm64: Mark host bss and rodata section as shared
  KVM: arm64: Enable retrieving protections attributes of PTEs
  KVM: arm64: Introduce addr_is_memory()
  KVM: arm64: Expose pkvm_hyp_id
  KVM: arm64: Expose host stage-2 manipulation helpers
  KVM: arm64: Add helpers to tag shared pages in SW bits
  KVM: arm64: Allow populating software bits
  KVM: arm64: Enable forcing page-level stage-2 mappings
  KVM: arm64: Tolerate re-creating hyp mappings to set software bits
  KVM: arm64: Don't overwrite software bits with owner id
  KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED
  KVM: arm64: Optimize host memory aborts
  KVM: arm64: Expose page-table helpers
  ...

Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents 82f8d543 14ecf075
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -59,12 +59,11 @@
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs		13
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs		14
#define __KVM_HOST_SMCCC_FUNC___pkvm_init			15
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings		16
#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp		16
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping	17
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector		18
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize		19
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp			20
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			21
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			20

#ifndef __ASSEMBLY__

+103 −47
Original line number Diff line number Diff line
@@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)

typedef u64 kvm_pte_t;

#define KVM_PTE_VALID			BIT(0)

#define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48		GENMASK(15, 12)

static inline bool kvm_pte_valid(kvm_pte_t pte)
{
	return pte & KVM_PTE_VALID;
}

static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
{
	u64 pa = pte & KVM_PTE_ADDR_MASK;

	if (PAGE_SHIFT == 16)
		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;

	return pa;
}

static inline u64 kvm_granule_shift(u32 level)
{
	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
}

static inline u64 kvm_granule_size(u32 level)
{
	return BIT(kvm_granule_shift(level));
}

static inline bool kvm_level_supports_block_mapping(u32 level)
{
	/*
	 * Reject invalid block mappings and don't bother with 4TB mappings for
	 * 52-bit PAs.
	 */
	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
}

/**
 * struct kvm_pgtable_mm_ops - Memory management callbacks.
 * @zalloc_page:		Allocate a single zeroed memory page.
@@ -75,31 +115,16 @@ enum kvm_pgtable_stage2_flags {
	KVM_PGTABLE_S2_IDMAP			= BIT(1),
};

/**
 * struct kvm_pgtable - KVM page-table.
 * @ia_bits:		Maximum input address size, in bits.
 * @start_level:	Level at which the page-table walk starts.
 * @pgd:		Pointer to the first top-level entry of the page-table.
 * @mm_ops:		Memory management callbacks.
 * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
 */
struct kvm_pgtable {
	u32					ia_bits;
	u32					start_level;
	kvm_pte_t				*pgd;
	struct kvm_pgtable_mm_ops		*mm_ops;

	/* Stage-2 only */
	struct kvm_s2_mmu			*mmu;
	enum kvm_pgtable_stage2_flags		flags;
};

/**
 * enum kvm_pgtable_prot - Page-table permissions and attributes.
 * @KVM_PGTABLE_PROT_X:		Execute permission.
 * @KVM_PGTABLE_PROT_W:		Write permission.
 * @KVM_PGTABLE_PROT_R:		Read permission.
 * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
 * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
 * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
 * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
 * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
 */
enum kvm_pgtable_prot {
	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -107,21 +132,48 @@ enum kvm_pgtable_prot {
	KVM_PGTABLE_PROT_R			= BIT(2),

	KVM_PGTABLE_PROT_DEVICE			= BIT(3),

	KVM_PGTABLE_PROT_SW0			= BIT(55),
	KVM_PGTABLE_PROT_SW1			= BIT(56),
	KVM_PGTABLE_PROT_SW2			= BIT(57),
	KVM_PGTABLE_PROT_SW3			= BIT(58),
};

#define PAGE_HYP		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
#define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
#define KVM_PGTABLE_PROT_RWX	(KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)

#define PKVM_HOST_MEM_PROT	KVM_PGTABLE_PROT_RWX
#define PKVM_HOST_MMIO_PROT	KVM_PGTABLE_PROT_RW

#define PAGE_HYP		KVM_PGTABLE_PROT_RW
#define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
#define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
#define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)

typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
					   enum kvm_pgtable_prot prot);

/**
 * struct kvm_mem_range - Range of Intermediate Physical Addresses
 * @start:	Start of the range.
 * @end:	End of the range.
 * struct kvm_pgtable - KVM page-table.
 * @ia_bits:		Maximum input address size, in bits.
 * @start_level:	Level at which the page-table walk starts.
 * @pgd:		Pointer to the first top-level entry of the page-table.
 * @mm_ops:		Memory management callbacks.
 * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
 * @flags:		Stage-2 page-table flags.
 * @force_pte_cb:	Function that returns true if page level mappings must
 *			be used instead of block mappings.
 */
struct kvm_mem_range {
	u64 start;
	u64 end;
struct kvm_pgtable {
	u32					ia_bits;
	u32					start_level;
	kvm_pte_t				*pgd;
	struct kvm_pgtable_mm_ops		*mm_ops;

	/* Stage-2 only */
	struct kvm_s2_mmu			*mmu;
	enum kvm_pgtable_stage2_flags		flags;
	kvm_pgtable_force_pte_cb_t		force_pte_cb;
};

/**
@@ -216,21 +268,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);

/**
 * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
 * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
 * @pgt:	Uninitialised page-table structure to initialise.
 * @arch:	Arch-specific KVM structure representing the guest virtual
 *		machine.
 * @mm_ops:	Memory management callbacks.
 * @flags:	Stage-2 configuration flags.
 * @force_pte_cb: Function that returns true if page level mappings must
 *		be used instead of block mappings.
 *
 * Return: 0 on success, negative error code on failure.
 */
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
			      struct kvm_pgtable_mm_ops *mm_ops,
				  enum kvm_pgtable_stage2_flags flags);
			      enum kvm_pgtable_stage2_flags flags,
			      kvm_pgtable_force_pte_cb_t force_pte_cb);

#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
	__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)

/**
 * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -374,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
 * If there is a valid, leaf page-table entry used to translate @addr, then
 * relax the permissions in that entry according to the read, write and
 * execute permissions specified by @prot. No permissions are removed, and
 * TLB invalidation is performed after updating the entry.
 * TLB invalidation is performed after updating the entry. Software bits cannot
 * be set or cleared using kvm_pgtable_stage2_relax_perms().
 *
 * Return: 0 on success, negative error code on failure.
 */
@@ -453,22 +509,22 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
			 kvm_pte_t *ptep, u32 *level);

/**
 * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
 *				     Addresses with compatible permission
 *				     attributes.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
 * @addr:	Address that must be covered by the range.
 * @prot:	Protection attributes that the range must be compatible with.
 * @range:	Range structure used to limit the search space at call time and
 *		that will hold the result.
 * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
 *				   stage-2 Page-Table Entry.
 * @pte:	Page-table entry
 *
 * The offset of @addr within a page is ignored. An IPA is compatible with @prot
 * iff its corresponding stage-2 page-table entry has default ownership and, if
 * valid, is mapped with protection attributes identical to @prot.
 * Return: protection attributes of the page-table entry in the enum
 *	   kvm_pgtable_prot format.
 */
enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);

/**
 * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
 *				Page-Table Entry.
 * @pte:	Page-table entry
 *
 * Return: 0 on success, negative error code on failure.
 * Return: protection attributes of the page-table entry in the enum
 *	   kvm_pgtable_prot format.
 */
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
				  enum kvm_pgtable_prot prot,
				  struct kvm_mem_range *range);
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
#endif	/* __ARM64_KVM_PGTABLE_H__ */
+9 −0
Original line number Diff line number Diff line
@@ -46,6 +46,15 @@ if KVM

source "virt/kvm/Kconfig"

config NVHE_EL2_DEBUG
	bool "Debug mode for non-VHE EL2 object"
	help
	  Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
	  Failure reports will BUG() in the hypervisor. This is intended for
	  local EL2 hypervisor development.

	  If unsure, say N.

endif # KVM

endif # VIRTUALIZATION
+8 −49
Original line number Diff line number Diff line
@@ -91,10 +91,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
		kvm->arch.return_nisv_io_abort_to_user = true;
		break;
	case KVM_CAP_ARM_MTE:
		if (!system_supports_mte() || kvm->created_vcpus)
			return -EINVAL;
		mutex_lock(&kvm->lock);
		if (!system_supports_mte() || kvm->created_vcpus) {
			r = -EINVAL;
		} else {
			r = 0;
			kvm->arch.mte_enabled = true;
		}
		mutex_unlock(&kvm->lock);
		break;
	default:
		r = -EINVAL;
@@ -1946,62 +1950,17 @@ static void _kvm_host_prot_finalize(void *discard)
	WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
}

static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
{
	return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
}

#define pkvm_mark_hyp_section(__section)		\
	pkvm_mark_hyp(__pa_symbol(__section##_start),	\
			__pa_symbol(__section##_end))

static int finalize_hyp_mode(void)
{
	int cpu, ret;

	if (!is_protected_kvm_enabled())
		return 0;

	ret = pkvm_mark_hyp_section(__hyp_idmap_text);
	if (ret)
		return ret;

	ret = pkvm_mark_hyp_section(__hyp_text);
	if (ret)
		return ret;

	ret = pkvm_mark_hyp_section(__hyp_rodata);
	if (ret)
		return ret;

	/*
	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
	 * at, which would end badly once the section is inaccessible.
	 * None of other sections should ever be introspected.
	 */
	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
	ret = pkvm_mark_hyp_section(__hyp_bss);
	if (ret)
		return ret;

	ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
	if (ret)
		return ret;

	for_each_possible_cpu(cpu) {
		phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
		phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());

		ret = pkvm_mark_hyp(start, end);
		if (ret)
			return ret;

		start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
		end = start + PAGE_SIZE;
		ret = pkvm_mark_hyp(start, end);
		if (ret)
			return ret;
	}

	/*
	 * Flip the static key upfront as that may no longer be possible
+14 −9
Original line number Diff line number Diff line
@@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
		kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
}

void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
					      u64 elr_virt, u64 elr_phys,
					      u64 par, uintptr_t vcpu,
					      u64 far, u64 hpfar) {
	u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
	u64 elr_in_kimg = __phys_to_kimg(elr_phys);
	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
	u64 mode = spsr & PSR_MODE_MASK;

	/*
@@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
		kvm_err("Invalid host exception to nVHE hyp!\n");
	} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
		   (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
		struct bug_entry *bug = find_bug(elr_in_kimg);
		const char *file = NULL;
		unsigned int line = 0;

		/* All hyp bugs, including warnings, are treated as fatal. */
		if (!is_protected_kvm_enabled() ||
		    IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
			struct bug_entry *bug = find_bug(elr_in_kimg);

			if (bug)
				bug_get_file_line(bug, &file, &line);
		}

		if (file)
			kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
		else
			kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
			kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
	} else {
		kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
		kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
	}

	/*
@@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
	kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);

	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
	      spsr, elr, esr, far, hpfar, par, vcpu);
	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
}
Loading