Commit 3fd33273 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-pasid-2022-03-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 PASID support from Thomas Gleixner:
 "Reenable ENQCMD/PASID support:

   - Simplify the PASID handling to allocate the PASID once, associate
     it to the mm of a process and free it on mm_exit().

     The previous attempt of refcounted PASIDs and dynamic
     alloc()/free() turned out to be error prone and too complex. The
     PASID space is 20bits, so the case of resource exhaustion is a pure
     academic concern.

   - Populate the PASID MSR on demand via #GP to avoid racy updates via
     IPIs.

   - Reenable ENQCMD and let objtool check for the forbidden usage of
     ENQCMD in the kernel.

   - Update the documentation for Shared Virtual Addressing accordingly"

* tag 'x86-pasid-2022-03-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Documentation/x86: Update documentation for SVA (Shared Virtual Addressing)
  tools/objtool: Check for use of the ENQCMD instruction in the kernel
  x86/cpufeatures: Re-enable ENQCMD
  x86/traps: Demand-populate PASID MSR via #GP
  sched: Define and initialize a flag to identify valid PASID in the task
  x86/fpu: Clear PASID when copying fpstate
  iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit
  kernel/fork: Initialize mm's PASID
  iommu/ioasid: Introduce a helper to check for valid PASIDs
  mm: Change CONFIG option for mm->pasid field
  iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA
parents eaa54b14 83aa52ff
Loading
Loading
Loading
Loading
+41 −12
Original line number Diff line number Diff line
@@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
thread can interact with a device. Threads that belong to the same
process share the same page tables, thus the same MSR value.

PASID is cleared when a process is created. The PASID allocation and MSR
programming may occur long after a process and its threads have been created.
One thread must call iommu_sva_bind_device() to allocate the PASID for the
process. If a thread uses ENQCMD without the MSR first being populated, a #GP
will be raised. The kernel will update the PASID MSR with the PASID for all
threads in the process. A single process PASID can be used simultaneously
with multiple devices since they all share the same address space.

One thread can call iommu_sva_unbind_device() to free the allocated PASID.
The kernel will clear the PASID MSR for all threads belonging to the process.

New threads inherit the MSR value from the parent.
PASID Life Cycle Management
===========================

PASID is initialized as INVALID_IOASID (-1) when a process is created.

Only processes that access SVA-capable devices need to have a PASID
allocated. This allocation happens when a process opens/binds an SVA-capable
device but finds no PASID for this process. Subsequent binds of the same, or
other devices will share the same PASID.

Although the PASID is allocated to the process by opening a device,
it is not active in any of the threads of that process. It's loaded to the
IA32_PASID MSR lazily when a thread tries to submit a work descriptor
to a device using the ENQCMD.

That first access will trigger a #GP fault because the IA32_PASID MSR
has not been initialized with the PASID value assigned to the process
when the device was opened. The Linux #GP handler notes that a PASID has
been allocated for the process, and so initializes the IA32_PASID MSR
and returns so that the ENQCMD instruction is re-executed.

On fork(2) or exec(2) the PASID is removed from the process as it no
longer has the same address space that it had when the device was opened.

On clone(2) the new task shares the same address space, so will be
able to use the PASID allocated to the process. The IA32_PASID is not
preemptively initialized as the PASID value might not be allocated yet or
the kernel does not know whether this thread is going to access the device
and the cleared IA32_PASID MSR reduces context switch overhead by xstate
init optimization. Since #GP faults have to be handled on any threads that
were created before the PASID was assigned to the mm of the process, newly
created threads might as well be treated in a consistent way.

Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
all threads in unbind, free the PASID lazily only on mm exit.

If a process does a close(2) of the device file descriptor and munmap(2)
of the device MMIO portal, then the driver will unbind the device. The
PASID is still marked VALID in the PASID_MSR for any threads in the
process that accessed the device. But this is harmless as without the
MMIO portal they cannot submit new work to the device.

Relationships
=============
+5 −2
Original line number Diff line number Diff line
@@ -56,8 +56,11 @@
# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
#endif

/* Force disable because it's broken beyond repair */
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD		0
#else
# define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
#endif

#ifdef CONFIG_X86_SGX
# define DISABLE_SGX	0
+7 −0
Original line number Diff line number Diff line
@@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
		fpu_inherit_perms(dst_fpu);
	fpregs_unlock();

	/*
	 * Children never inherit PASID state.
	 * Force it to have its init value:
	 */
	if (use_xsave())
		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;

	trace_x86_fpu_copy_src(src_fpu);
	trace_x86_fpu_copy_dst(dst_fpu);

+55 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/io.h>
#include <linux/hardirq.h>
#include <linux/atomic.h>
#include <linux/ioasid.h>

#include <asm/stacktrace.h>
#include <asm/processor.h>
@@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
	return true;
}

/*
 * The unprivileged ENQCMD instruction generates #GPs if the
 * IA32_PASID MSR has not been populated.  If possible, populate
 * the MSR from a PASID previously allocated to the mm.
 */
static bool try_fixup_enqcmd_gp(void)
{
#ifdef CONFIG_IOMMU_SVA
	u32 pasid;

	/*
	 * MSR_IA32_PASID is managed using XSAVE.  Directly
	 * writing to the MSR is only possible when fpregs
	 * are valid and the fpstate is not.  This is
	 * guaranteed when handling a userspace exception
	 * in *before* interrupts are re-enabled.
	 */
	lockdep_assert_irqs_disabled();

	/*
	 * Hardware without ENQCMD will not generate
	 * #GPs that can be fixed up here.
	 */
	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
		return false;

	pasid = current->mm->pasid;

	/*
	 * If the mm has not been allocated a
	 * PASID, the #GP can not be fixed up.
	 */
	if (!pasid_valid(pasid))
		return false;

	/*
	 * Did this thread already have its PASID activated?
	 * If so, the #GP must be from something else.
	 */
	if (current->pasid_activated)
		return false;

	wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
	current->pasid_activated = 1;

	return true;
#else
	return false;
#endif
}

DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
	unsigned long gp_addr;
	int ret;

	if (user_mode(regs) && try_fixup_enqcmd_gp())
		return;

	cond_local_irq_enable(regs);

	if (static_cpu_has(X86_FEATURE_UMIP)) {
+3 −3
Original line number Diff line number Diff line
@@ -144,8 +144,8 @@ config IOMMU_DMA
	select IRQ_MSI_IOMMU
	select NEED_SG_DMA_LENGTH

# Shared Virtual Addressing library
config IOMMU_SVA_LIB
# Shared Virtual Addressing
config IOMMU_SVA
	bool
	select IOASID

@@ -379,7 +379,7 @@ config ARM_SMMU_V3
config ARM_SMMU_V3_SVA
	bool "Shared Virtual Addressing support for the ARM SMMUv3"
	depends on ARM_SMMU_V3
	select IOMMU_SVA_LIB
	select IOMMU_SVA
	select MMU_NOTIFIER
	help
	  Support for sharing process address spaces with devices using the
Loading