Commit 1f0f4a2e authored by Oliver Upton's avatar Oliver Upton Committed by Marc Zyngier
Browse files

KVM: arm64: Infer the PA offset from IPA in stage-2 map walker



Until now, the page table walker counted increments to the PA and IPA
of a walk in two separate places. While the PA is incremented as soon as
a leaf PTE is installed in stage2_map_walker_try_leaf(), the IPA is
actually bumped in the generic table walker context. Critically,
__kvm_pgtable_visit() rereads the PTE after the LEAF callback returns
to work out if a table or leaf was installed, and only bumps the IPA for
a leaf PTE.

This arrangement worked fine when we handled faults behind the write lock,
as the walker had exclusive access to the stage-2 page tables. However,
commit 1577cb58 ("KVM: arm64: Handle stage-2 faults in parallel")
started handling all stage-2 faults behind the read lock, opening up a
race where a walker could increment the PA but not the IPA of a walk.
Nothing good ensues, as the walker starts mapping with the incorrect
IPA -> PA relationship.

For example, assume that two vCPUs took a data abort on the same IPA.
One observes that dirty logging is disabled, and the other observed that
it is enabled:

  vCPU attempting PMD mapping		  vCPU attempting PTE mapping
  ======================================  =====================================
  /* install PMD */
  stage2_make_pte(ctx, leaf);
  data->phys += granule;
  					  /* replace PMD with a table */
  					  stage2_try_break_pte(ctx, data->mmu);
					  stage2_make_pte(ctx, table);
  /* table is observed */
  ctx.old = READ_ONCE(*ptep);
  table = kvm_pte_table(ctx.old, level);

  /*
   * map walk continues w/o incrementing
   * IPA.
   */
   __kvm_pgtable_walk(..., level + 1);

Bring an end to the whole mess by using the IPA as the single source of
truth for how far along a walk has gotten. Work out the correct PA to
map by calculating the IPA offset from the beginning of the walk and add
that to the starting physical address.

Cc: stable@vger.kernel.org
Fixes: 1577cb58 ("KVM: arm64: Handle stage-2 faults in parallel")
Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230421071606.1603916-2-oliver.upton@linux.dev
parent 197b6b60
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
	kvm_pte_t				old;
	void					*arg;
	struct kvm_pgtable_mm_ops		*mm_ops;
	u64					start;
	u64					addr;
	u64					end;
	u32					level;
+28 −4
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@
struct kvm_pgtable_walk_data {
	struct kvm_pgtable_walker	*walker;

	u64				start;
	u64				addr;
	u64				end;
};
@@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
		.old	= READ_ONCE(*ptep),
		.arg	= data->walker->arg,
		.mm_ops	= mm_ops,
		.start	= data->start,
		.addr	= data->addr,
		.end	= data->end,
		.level	= level,
@@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
		     struct kvm_pgtable_walker *walker)
{
	struct kvm_pgtable_walk_data walk_data = {
		.start	= ALIGN_DOWN(addr, PAGE_SIZE),
		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
		.end	= PAGE_ALIGN(walk_data.addr + size),
		.walker	= walker,
@@ -794,20 +797,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}

static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
				       const struct stage2_map_data *data)
{
	u64 phys = data->phys;

	/*
	 * Stage-2 walks to update ownership data are communicated to the map
	 * walker using an invalid PA. Avoid offsetting an already invalid PA,
	 * which could overflow and make the address valid again.
	 */
	if (!kvm_phys_is_valid(phys))
		return phys;

	/*
	 * Otherwise, work out the correct PA based on how far the walk has
	 * gotten.
	 */
	return phys + (ctx->addr - ctx->start);
}

static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
					struct stage2_map_data *data)
{
	u64 phys = stage2_map_walker_phys_addr(ctx, data);

	if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
		return false;

	return kvm_block_mapping_supported(ctx, data->phys);
	return kvm_block_mapping_supported(ctx, phys);
}

static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
				      struct stage2_map_data *data)
{
	kvm_pte_t new;
	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
	u64 phys = stage2_map_walker_phys_addr(ctx, data);
	u64 granule = kvm_granule_size(ctx->level);
	struct kvm_pgtable *pgt = data->mmu->pgt;
	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;

@@ -841,8 +867,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,

	stage2_make_pte(ctx, new);

	if (kvm_phys_is_valid(phys))
		data->phys += granule;
	return 0;
}