Commit 11e969bc authored by Max Filippov's avatar Max Filippov
Browse files

xtensa: support coprocessors on SMP



Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
function coprocessor_flush_release_all to do flush followed by release
atomically. Add function local_coprocessors_flush_release_all to flush
and release all coprocessors on the local CPU and use it to flush
coprocessor contexts from the CPU that goes offline.

Signed-off-by: default avatarMax Filippov <jcmvbkbc@gmail.com>
parent f29cab29
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -142,10 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
	__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));

extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
struct thread_info;
void coprocessor_flush(struct thread_info *ti, int cp_index);
void coprocessor_release_all(struct thread_info *ti);
void coprocessor_flush_all(struct thread_info *ti);
void coprocessor_flush_release_all(struct thread_info *ti);
void local_coprocessors_flush_release_all(void);

#endif	/* XTENSA_HAVE_COPROCESSORS */

+6 −1
Original line number Diff line number Diff line
@@ -52,12 +52,17 @@ struct thread_info {
	__u32			cpu;		/* current CPU */
	__s32			preempt_count;	/* 0 => preemptable,< 0 => BUG*/

	unsigned long		cpenable;
#if XCHAL_HAVE_EXCLUSIVE
	/* result of the most recent exclusive store */
	unsigned long		atomctl8;
#endif

	/*
	 * If i-th bit is set then coprocessor state is loaded into the
	 * coprocessor i on CPU cp_owner_cpu.
	 */
	unsigned long		cpenable;
	u32			cp_owner_cpu;
	/* Allocate storage for extra user states and coprocessor states. */
#if XTENSA_HAVE_COPROCESSORS
	xtregs_coprocessor_t	xtregs_cp;
+6 −0
Original line number Diff line number Diff line
@@ -27,6 +27,10 @@ struct exc_table {
	void *fixup;
	/* For passing a parameter to fixup */
	void *fixup_param;
#if XTENSA_HAVE_COPROCESSORS
	/* Pointers to owner struct thread_info */
	struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
#endif
	/* Fast user exception handlers */
	void *fast_user_handler[EXCCAUSE_N];
	/* Fast kernel exception handlers */
@@ -35,6 +39,8 @@ struct exc_table {
	xtensa_exception_handler *default_handler[EXCCAUSE_N];
};

DECLARE_PER_CPU(struct exc_table, exc_table);

xtensa_exception_handler *
__init trap_set_handler(int cause, xtensa_exception_handler *handler);

+7 −1
Original line number Diff line number Diff line
@@ -91,10 +91,12 @@ int main(void)
	/* struct thread_info (offset from start_struct) */
	DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
	DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XCHAL_HAVE_EXCLUSIVE
	DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
#endif
	DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
	DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
	DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
#if XTENSA_HAVE_COPROCESSORS
	DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
	DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
	DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
	DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
	DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
#if XTENSA_HAVE_COPROCESSORS
	DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
	       offsetof(struct exc_table, coprocessor_owner));
#endif
	DEFINE(EXC_TABLE_FAST_USER,
	       offsetof(struct exc_table, fast_user_handler));
	DEFINE(EXC_TABLE_FAST_KERNEL,
+90 −32
Original line number Diff line number Diff line
@@ -19,6 +19,26 @@
#include <asm/current.h>
#include <asm/regs.h>

/*
 * Rules for coprocessor state manipulation on SMP:
 *
 * - a task may have live coprocessors only on one CPU.
 *
 * - whether coprocessor context of task T is live on some CPU is
 *   denoted by T's thread_info->cpenable.
 *
 * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
 *   is valid in the T's thread_info. Zero thread_info->cpenable means that
 *   coprocessor context is valid in the T's thread_info.
 *
 * - if a coprocessor context of task T is live on CPU X, only CPU X changes
 *   T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
 *   This is done by making sure that for the task T with live coprocessor
 *   on CPU X cpenable SR is 0 when T runs on any other CPU Y.
 *   When fast_coprocessor exception is taken on CPU Y it goes to the
 *   C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
 */

#if XTENSA_HAVE_COPROCESSORS

/*
@@ -101,9 +121,37 @@

ENTRY(fast_coprocessor)

	s32i	a3, a2, PT_AREG3

#ifdef CONFIG_SMP
	/*
	 * Check if any coprocessor context is live on another CPU
	 * and if so go through the C-level coprocessor exception handler
	 * to flush it to memory.
	 */
	GET_THREAD_INFO (a0, a2)
	l32i	a3, a0, THREAD_CPENABLE
	beqz	a3, .Lload_local

	/*
	 * Pairs with smp_wmb in local_coprocessor_release_all
	 * and with both memws below.
	 */
	memw
	l32i	a3, a0, THREAD_CPU
	l32i	a0, a0, THREAD_CP_OWNER_CPU
	beq	a0, a3, .Lload_local

	rsr	a0, ps
	l32i	a3, a2, PT_AREG3
	bbci.l	a0, PS_UM_BIT, 1f
	call0	user_exception
1:	call0	kernel_exception
#endif

	/* Save remaining registers a1-a3 and SAR */

	s32i	a3, a2, PT_AREG3
.Lload_local:
	rsr	a3, sar
	s32i	a1, a2, PT_AREG1
	s32i	a3, a2, PT_SAR
@@ -117,6 +165,9 @@ ENTRY(fast_coprocessor)
	s32i	a5, a1, PT_AREG5
	s32i	a6, a1, PT_AREG6
	s32i	a7, a1, PT_AREG7
	s32i	a8, a1, PT_AREG8
	s32i	a9, a1, PT_AREG9
	s32i	a10, a1, PT_AREG10

	/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */

@@ -139,51 +190,66 @@ ENTRY(fast_coprocessor)
	addx8	a7, a3, a7
	addx4	a7, a3, a7

	/* Retrieve previous owner. (a3 still holds CP number) */
	/* Retrieve previous owner (a8). */

	movi	a0, coprocessor_owner	# list of owners
	rsr	a0, excsave1		# exc_table
	addx4	a0, a3, a0		# entry for CP
	l32i	a4, a0, 0
	l32i	a8, a0, EXC_TABLE_COPROCESSOR_OWNER

	beqz	a4, 1f			# skip 'save' if no previous owner
	/* Set new owner (a9). */

	/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
	GET_THREAD_INFO (a9, a1)
	l32i	a4, a9, THREAD_CPU
	s32i	a9, a0, EXC_TABLE_COPROCESSOR_OWNER
	s32i	a4, a9, THREAD_CP_OWNER_CPU

	l32i	a5, a4, THREAD_CPENABLE
	xor	a5, a5, a2		# (1 << cp-id) still in a2
	s32i	a5, a4, THREAD_CPENABLE
	/*
	 * Enable coprocessor for the new owner. (a2 = 1 << CP number)
	 * This can be done before loading context into the coprocessor.
	 */
	l32i	a4, a9, THREAD_CPENABLE
	or	a4, a4, a2

	/*
	 * Get context save area and call save routine.
	 * (a4 still holds previous owner (thread_info), a3 CP number)
	 * Make sure THREAD_CP_OWNER_CPU is in memory before updating
	 * THREAD_CPENABLE
	 */
	memw				# (2)
	s32i	a4, a9, THREAD_CPENABLE

	l32i	a2, a7, CP_REGS_TAB_OFFSET
	l32i	a3, a7, CP_REGS_TAB_SAVE
	add	a2, a2, a4
	callx0	a3
	beqz	a8, 1f			# skip 'save' if no previous owner

	/* Note that only a0 and a1 were preserved. */
	/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */

	rsr	a3, exccause
	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
	movi	a0, coprocessor_owner
	addx4	a0, a3, a0
	l32i	a10, a8, THREAD_CPENABLE
	xor	a10, a10, a2

	/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
	/* Get context save area and call save routine. */

1:	GET_THREAD_INFO (a4, a1)
	s32i	a4, a0, 0
	l32i	a2, a7, CP_REGS_TAB_OFFSET
	l32i	a3, a7, CP_REGS_TAB_SAVE
	add	a2, a2, a8
	callx0	a3

	/*
	 * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
	 * before updating THREAD_CPENABLE
	 */
	memw				# (3)
	s32i	a10, a8, THREAD_CPENABLE
1:
	/* Get context save area and call load routine. */

	l32i	a2, a7, CP_REGS_TAB_OFFSET
	l32i	a3, a7, CP_REGS_TAB_LOAD
	add	a2, a2, a4
	add	a2, a2, a9
	callx0	a3

	/* Restore all registers and return from exception handler. */

	l32i	a10, a1, PT_AREG10
	l32i	a9, a1, PT_AREG9
	l32i	a8, a1, PT_AREG8
	l32i	a7, a1, PT_AREG7
	l32i	a6, a1, PT_AREG6
	l32i	a5, a1, PT_AREG5
@@ -233,12 +299,4 @@ ENTRY(coprocessor_flush)

ENDPROC(coprocessor_flush)

	.data

ENTRY(coprocessor_owner)

	.fill XCHAL_CP_MAX, 4, 0

END(coprocessor_owner)

#endif /* XTENSA_HAVE_COPROCESSORS */
Loading