Commit 75bc81d0 authored by Will Deacon's avatar Will Deacon
Browse files

Merge branch 'for-next/sve-state' into for-next/core

* for-next/sve-state:
  arm64/fp: Use a struct to pass data to fpsimd_bind_state_to_cpu()
  arm64/sve: Leave SVE enabled on syscall if we don't context switch
  arm64/fpsimd: SME no longer requires SVE register state
  arm64/fpsimd: Load FP state based on recorded data type
  arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM
  arm64/fpsimd: Have KVM explicitly say which FP registers to save
  arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE
  KVM: arm64: Discard any SVE state when entering KVM guests
parents 595a121e 1192b93b
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -56,11 +56,20 @@ extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_kvm_prepare(void);

struct cpu_fp_state {
	struct user_fpsimd_state *st;
	void *sve_state;
	void *za_state;
	u64 *svcr;
	unsigned int sve_vl;
	unsigned int sme_vl;
	enum fp_type *fp_type;
	enum fp_type to_save;
};

extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
				     void *sve_state, unsigned int sve_vl,
				     void *za_state, unsigned int sme_vl,
				     u64 *svcr);
extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);

extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
+11 −1
Original line number Diff line number Diff line
@@ -306,8 +306,18 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
	struct kvm_cpu_context ctxt;

	/* Guest floating point state */
	/*
	 * Guest floating point state
	 *
	 * The architecture has two main floating point extensions,
	 * the original FPSIMD and SVE.  These have overlapping
	 * register views, with the FPSIMD V registers occupying the
	 * low 128 bits of the SVE Z registers.  When the core
	 * floating point code saves the register state of a task it
	 * records which view it saved in fp_type.
	 */
	void *sve_state;
	enum fp_type fp_type;
	unsigned int sve_max_vl;
	u64 svcr;

+7 −0
Original line number Diff line number Diff line
@@ -122,6 +122,12 @@ enum vec_type {
	ARM64_VEC_MAX,
};

enum fp_type {
	FP_STATE_CURRENT,	/* Save based on current task state. */
	FP_STATE_FPSIMD,
	FP_STATE_SVE,
};

struct cpu_context {
	unsigned long x19;
	unsigned long x20;
@@ -152,6 +158,7 @@ struct thread_struct {
		struct user_fpsimd_state fpsimd_state;
	} uw;

	enum fp_type		fp_type;	/* registers FPSIMD or SVE? */
	unsigned int		fpsimd_cpu;
	void			*sve_state;	/* SVE registers, if any */
	void			*za_state;	/* ZA register, if any */
+115 −50
Original line number Diff line number Diff line
@@ -118,16 +118,8 @@
 *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
 *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
 */
struct fpsimd_last_state_struct {
	struct user_fpsimd_state *st;
	void *sve_state;
	void *za_state;
	u64 *svcr;
	unsigned int sve_vl;
	unsigned int sme_vl;
};

static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);

__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
@@ -330,15 +322,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
 *    The task can execute SVE instructions while in userspace without
 *    trapping to the kernel.
 *
 *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
 *    corresponding Zn), P0-P15 and FFR are encoded in
 *    task->thread.sve_state, formatted appropriately for vector
 *    length task->thread.sve_vl or, if SVCR.SM is set,
 *    task->thread.sme_vl.
 *
 *    task->thread.sve_state must point to a valid buffer at least
 *    sve_state_size(task) bytes in size.
 *
 *    During any syscall, the kernel may optionally clear TIF_SVE and
 *    discard the vector state except for the FPSIMD subset.
 *
@@ -348,7 +331,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
 *    do_sve_acc() to be called, which does some preparation and then
 *    sets TIF_SVE.
 *
 *    When stored, FPSIMD registers V0-V31 are encoded in
 * During any syscall, the kernel may optionally clear TIF_SVE and
 * discard the vector state except for the FPSIMD subset.
 *
 * The data will be stored in one of two formats:
 *
 *  * FPSIMD only - FP_STATE_FPSIMD:
 *
 *    When the FPSIMD only state stored task->thread.fp_type is set to
 *    FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
 *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
 *    logically zero but not stored anywhere; P0-P15 and FFR are not
 *    stored and have unspecified values from userspace's point of
@@ -356,7 +347,23 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
 *    but userspace is discouraged from relying on this.
 *
 *    task->thread.sve_state does not need to be non-NULL, valid or any
 *    particular size: it must not be dereferenced.
 *    particular size: it must not be dereferenced and any data stored
 *    there should be considered stale and not referenced.
 *
 *  * SVE state - FP_STATE_SVE:
 *
 *    When the full SVE state is stored task->thread.fp_type is set to
 *    FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
 *    corresponding Zn), P0-P15 and FFR are encoded in in
 *    task->thread.sve_state, formatted appropriately for vector
 *    length task->thread.sve_vl or, if SVCR.SM is set,
 *    task->thread.sme_vl. The storage for the vector registers in
 *    task->thread.uw.fpsimd_state should be ignored.
 *
 *    task->thread.sve_state must point to a valid buffer at least
 *    sve_state_size(task) bytes in size. The data stored in
 *    task->thread.uw.fpsimd_state.vregs should be considered stale
 *    and not referenced.
 *
 *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
 *    irrespective of whether TIF_SVE is clear or set, since these are
@@ -378,11 +385,37 @@ static void task_fpsimd_load(void)
	WARN_ON(!system_supports_fpsimd());
	WARN_ON(!have_cpu_fpsimd_context());

	/* Check if we should restore SVE first */
	if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
	if (system_supports_sve()) {
		switch (current->thread.fp_type) {
		case FP_STATE_FPSIMD:
			/* Stop tracking SVE for this task until next use. */
			if (test_and_clear_thread_flag(TIF_SVE))
				sve_user_disable();
			break;
		case FP_STATE_SVE:
			if (!thread_sm_enabled(&current->thread) &&
			    !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
				sve_user_enable();

			if (test_thread_flag(TIF_SVE))
				sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);

			restore_sve_regs = true;
			restore_ffr = true;
			break;
		default:
			/*
			 * This indicates either a bug in
			 * fpsimd_save() or memory corruption, we
			 * should always record an explicit format
			 * when we save. We always at least have the
			 * memory allocated for FPSMID registers so
			 * try that and hope for the best.
			 */
			WARN_ON_ONCE(1);
			clear_thread_flag(TIF_SVE);
			break;
		}
	}

	/* Restore SME, override SVE register configuration if needed */
@@ -398,19 +431,20 @@ static void task_fpsimd_load(void)
		if (thread_za_enabled(&current->thread))
			za_load_state(current->thread.za_state);

		if (thread_sm_enabled(&current->thread)) {
			restore_sve_regs = true;
		if (thread_sm_enabled(&current->thread))
			restore_ffr = system_supports_fa64();
	}
	}

	if (restore_sve_regs)
	if (restore_sve_regs) {
		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
		sve_load_state(sve_pffr(&current->thread),
			       &current->thread.uw.fpsimd_state.fpsr,
			       restore_ffr);
	else
	} else {
		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
		fpsimd_load_state(&current->thread.uw.fpsimd_state);
	}
}

/*
 * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
@@ -419,12 +453,12 @@ static void task_fpsimd_load(void)
 * last, if KVM is involved this may be the guest VM context rather
 * than the host thread for the VM pointed to by current. This means
 * that we must always reference the state storage via last rather
 * than via current, other than the TIF_ flags which KVM will
 * carefully maintain for us.
 * than via current, if we are saving KVM state then it will have
 * ensured that the type of registers to save is set in last->to_save.
 */
static void fpsimd_save(void)
{
	struct fpsimd_last_state_struct const *last =
	struct cpu_fp_state const *last =
		this_cpu_ptr(&fpsimd_last_state);
	/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
	bool save_sve_regs = false;
@@ -437,7 +471,14 @@ static void fpsimd_save(void)
	if (test_thread_flag(TIF_FOREIGN_FPSTATE))
		return;

	if (test_thread_flag(TIF_SVE)) {
	/*
	 * If a task is in a syscall the ABI allows us to only
	 * preserve the state shared with FPSIMD so don't bother
	 * saving the full SVE state in that case.
	 */
	if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
	     !in_syscall(current_pt_regs())) ||
	    last->to_save == FP_STATE_SVE) {
		save_sve_regs = true;
		save_ffr = true;
		vl = last->sve_vl;
@@ -474,8 +515,10 @@ static void fpsimd_save(void)
		sve_save_state((char *)last->sve_state +
					sve_ffr_offset(vl),
			       &last->st->fpsr, save_ffr);
		*last->fp_type = FP_STATE_SVE;
	} else {
		fpsimd_save_state(last->st);
		*last->fp_type = FP_STATE_FPSIMD;
	}
}

@@ -768,8 +811,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
 */
void sve_sync_to_fpsimd(struct task_struct *task)
{
	if (test_tsk_thread_flag(task, TIF_SVE) ||
	    thread_sm_enabled(&task->thread))
	if (task->thread.fp_type == FP_STATE_SVE)
		sve_to_fpsimd(task);
}

@@ -848,8 +890,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,

	fpsimd_flush_task_state(task);
	if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
	    thread_sm_enabled(&task->thread))
	    thread_sm_enabled(&task->thread)) {
		sve_to_fpsimd(task);
		task->thread.fp_type = FP_STATE_FPSIMD;
	}

	if (system_supports_sme() && type == ARM64_VEC_SME) {
		task->thread.svcr &= ~(SVCR_SM_MASK |
@@ -1368,6 +1412,7 @@ static void sve_init_regs(void)
		fpsimd_bind_task_to_cpu();
	} else {
		fpsimd_to_sve(current);
		current->thread.fp_type = FP_STATE_SVE;
	}
}

@@ -1596,6 +1641,8 @@ void fpsimd_flush_thread(void)
		current->thread.svcr = 0;
	}

	current->thread.fp_type = FP_STATE_FPSIMD;

	put_cpu_fpsimd_context();
	kfree(sve_state);
	kfree(za_state);
@@ -1627,6 +1674,31 @@ void fpsimd_signal_preserve_current_state(void)
		sve_to_fpsimd(current);
}

/*
 * Called by KVM when entering the guest.
 */
void fpsimd_kvm_prepare(void)
{
	if (!system_supports_sve())
		return;

	/*
	 * KVM does not save host SVE state since we can only enter
	 * the guest from a syscall so the ABI means that only the
	 * non-saved SVE state needs to be saved.  If we have left
	 * SVE enabled for performance reasons then update the task
	 * state to be FPSIMD only.
	 */
	get_cpu_fpsimd_context();

	if (test_and_clear_thread_flag(TIF_SVE)) {
		sve_to_fpsimd(current);
		current->thread.fp_type = FP_STATE_FPSIMD;
	}

	put_cpu_fpsimd_context();
}

/*
 * Associate current's FPSIMD context with this cpu
 * The caller must have ownership of the cpu FPSIMD context before calling
@@ -1634,8 +1706,7 @@ void fpsimd_signal_preserve_current_state(void)
 */
static void fpsimd_bind_task_to_cpu(void)
{
	struct fpsimd_last_state_struct *last =
		this_cpu_ptr(&fpsimd_last_state);
	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);

	WARN_ON(!system_supports_fpsimd());
	last->st = &current->thread.uw.fpsimd_state;
@@ -1644,6 +1715,8 @@ static void fpsimd_bind_task_to_cpu(void)
	last->sve_vl = task_get_sve_vl(current);
	last->sme_vl = task_get_sme_vl(current);
	last->svcr = &current->thread.svcr;
	last->fp_type = &current->thread.fp_type;
	last->to_save = FP_STATE_CURRENT;
	current->thread.fpsimd_cpu = smp_processor_id();

	/*
@@ -1665,22 +1738,14 @@ static void fpsimd_bind_task_to_cpu(void)
	}
}

void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
			      unsigned int sve_vl, void *za_state,
			      unsigned int sme_vl, u64 *svcr)
void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
{
	struct fpsimd_last_state_struct *last =
		this_cpu_ptr(&fpsimd_last_state);
	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);

	WARN_ON(!system_supports_fpsimd());
	WARN_ON(!in_softirq() && !irqs_disabled());

	last->st = st;
	last->svcr = svcr;
	last->sve_state = sve_state;
	last->za_state = za_state;
	last->sve_vl = sve_vl;
	last->sme_vl = sme_vl;
	*last = *state;
}

/*
+2 −0
Original line number Diff line number Diff line
@@ -331,6 +331,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
		clear_tsk_thread_flag(dst, TIF_SME);
	}

	dst->thread.fp_type = FP_STATE_FPSIMD;

	/* clear any pending asynchronous tag fault raised by the parent */
	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);

Loading