Commit 05096666 authored by Al Viro's avatar Al Viro Committed by Matt Turner
Browse files

alpha: lazy FPU switching



	On each context switch we save the FPU registers on stack
of old process and restore FPU registers from the stack of new one.
That allows us to avoid doing that each time we enter/leave the
kernel mode; however, that can get suboptimal in some cases.

	For one thing, we don't need to bother saving anything
for kernel threads.  For another, if between entering and leaving
the kernel a thread gives CPU up more than once, it will do
useless work, saving the same values every time, only to discard
the saved copy as soon as it returns from switch_to().

	Alternative solution:

* move the array we save into from switch_stack to thread_info
* have a (thread-synchronous) flag set when we save them
* have another flag set when they should be restored on return to userland.
* do *NOT* save/restore them in do_switch_stack()/undo_switch_stack().
* restore on the exit to user mode if the restore flag had
been set.  Clear both flags.
* on context switch, entry to fork/clone/vfork, before entry into do_signal()
and on entry into straced syscall save the registers and set the 'saved' flag
unless it had been already set.
* on context switch set the 'restore' flag as well.
* have copy_thread() set both flags for child, so the registers would be
restored once the child returns to userland.
* use the saved data in setup_sigcontext(); have restore_sigcontext() set both flags
and copy from sigframe to save area.
* teach ptrace to look for FPU registers in thread_info instead of
switch_stack.
* teach isolated accesses to FPU registers (rdfpcr, wrfpcr, etc.)
to check the 'saved' flag (under preempt_disable()) and work with the save area
if it's been set; if 'saved' flag is found upon write access, set 'restore' flag
as well.

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarMatt Turner <mattst88@gmail.com>
parent a7acb188
Loading
Loading
Loading
Loading
+37 −24
Original line number Diff line number Diff line
@@ -15,6 +15,10 @@ rdfpcr(void)
{
	unsigned long tmp, ret;

	preempt_disable();
	if (current_thread_info()->status & TS_SAVED_FP) {
		ret = current_thread_info()->fp[31];
	} else {
#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
		__asm__ __volatile__ (
			"ftoit $f0,%0\n\t"
@@ -30,6 +34,8 @@ rdfpcr(void)
			"ldt $f0,%0"
			: "=m"(tmp), "=m"(ret));
#endif
	}
	preempt_enable();

	return ret;
}
@@ -39,6 +45,11 @@ wrfpcr(unsigned long val)
{
	unsigned long tmp;

	preempt_disable();
	if (current_thread_info()->status & TS_SAVED_FP) {
		current_thread_info()->status |= TS_RESTORE_FP;
		current_thread_info()->fp[31] = val;
	} else {
#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
		__asm__ __volatile__ (
			"ftoit $f0,%0\n\t"
@@ -55,6 +66,8 @@ wrfpcr(unsigned long val)
			: "=m"(tmp) : "m"(val));
#endif
	}
	preempt_enable();
}

static inline unsigned long
swcr_update_status(unsigned long swcr, unsigned long fpcr)
+16 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ struct thread_info {
	int bpt_nsaved;
	unsigned long bpt_addr[2];		/* breakpoint handling  */
	unsigned int bpt_insn[2];
	unsigned long fp[32];
};

/*
@@ -83,6 +84,9 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define TS_UAC_NOFIX		0x0002	/* ! flags as they match          */
#define TS_UAC_SIGBUS		0x0004	/* ! userspace part of 'osf_sysinfo' */

#define TS_SAVED_FP		0x0008
#define TS_RESTORE_FP		0x0010

#define SET_UNALIGN_CTL(task,value)	({				\
	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
	if (value & PR_UNALIGN_NOPRINT)					\
@@ -106,5 +110,17 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
	put_user(res, (int __user *)(value));				\
	})

#ifndef __ASSEMBLY__
extern void __save_fpu(void);

static inline void save_fpu(void)
{
	if (!(current_thread_info()->status & TS_SAVED_FP)) {
		current_thread_info()->status |= TS_SAVED_FP;
		__save_fpu();
	}
}
#endif

#endif /* __KERNEL__ */
#endif /* _ALPHA_THREAD_INFO_H */
+2 −0
Original line number Diff line number Diff line
@@ -64,7 +64,9 @@ struct switch_stack {
	unsigned long r14;
	unsigned long r15;
	unsigned long r26;
#ifndef __KERNEL__
	unsigned long fp[32];	/* fp[31] is fpcr */
#endif
};


+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@ void foo(void)
	DEFINE(TI_TASK, offsetof(struct thread_info, task));
	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
	DEFINE(TI_FP, offsetof(struct thread_info, fp));
	DEFINE(TI_STATUS, offsetof(struct thread_info, status));
	BLANK();

        DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
+73 −71
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@

/* Stack offsets.  */
#define SP_OFF			184
#define SWITCH_STACK_SIZE	320
#define SWITCH_STACK_SIZE	64

.macro	CFI_START_OSF_FRAME	func
	.align	4
@@ -159,7 +159,6 @@
	.cfi_rel_offset	$13, 32
	.cfi_rel_offset	$14, 40
	.cfi_rel_offset	$15, 48
	/* We don't really care about the FP registers for debugging.  */
.endm

.macro	UNDO_SWITCH_STACK
@@ -498,6 +497,10 @@ ret_to_user:
	and	$17, _TIF_WORK_MASK, $2
	bne	$2, work_pending
restore_all:
	ldl	$2, TI_STATUS($8)
	and	$2, TS_SAVED_FP | TS_RESTORE_FP, $3
	bne	$3, restore_fpu
restore_other:
	.cfi_remember_state
	RESTORE_ALL
	call_pal PAL_rti
@@ -506,7 +509,7 @@ ret_to_kernel:
	.cfi_restore_state
	lda	$16, 7
	call_pal PAL_swpipl
	br restore_all
	br restore_other

	.align 3
$syscall_error:
@@ -570,6 +573,14 @@ $work_notifysig:
	.type	strace, @function
strace:
	/* set up signal stack, call syscall_trace */
	// NB: if anyone adds preemption, this block will need to be protected
	ldl	$1, TI_STATUS($8)
	and	$1, TS_SAVED_FP, $3
	or	$1, TS_SAVED_FP, $2
	bne	$3, 1f
	stl	$2, TI_STATUS($8)
	bsr	$26, __save_fpu
1:
	DO_SWITCH_STACK
	jsr	$26, syscall_trace_enter /* returns the syscall number */
	UNDO_SWITCH_STACK
@@ -649,40 +660,6 @@ do_switch_stack:
	stq	$14, 40($sp)
	stq	$15, 48($sp)
	stq	$26, 56($sp)
	stt	$f0, 64($sp)
	stt	$f1, 72($sp)
	stt	$f2, 80($sp)
	stt	$f3, 88($sp)
	stt	$f4, 96($sp)
	stt	$f5, 104($sp)
	stt	$f6, 112($sp)
	stt	$f7, 120($sp)
	stt	$f8, 128($sp)
	stt	$f9, 136($sp)
	stt	$f10, 144($sp)
	stt	$f11, 152($sp)
	stt	$f12, 160($sp)
	stt	$f13, 168($sp)
	stt	$f14, 176($sp)
	stt	$f15, 184($sp)
	stt	$f16, 192($sp)
	stt	$f17, 200($sp)
	stt	$f18, 208($sp)
	stt	$f19, 216($sp)
	stt	$f20, 224($sp)
	stt	$f21, 232($sp)
	stt	$f22, 240($sp)
	stt	$f23, 248($sp)
	stt	$f24, 256($sp)
	stt	$f25, 264($sp)
	stt	$f26, 272($sp)
	stt	$f27, 280($sp)
	mf_fpcr	$f0		# get fpcr
	stt	$f28, 288($sp)
	stt	$f29, 296($sp)
	stt	$f30, 304($sp)
	stt	$f0, 312($sp)	# save fpcr in slot of $f31
	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
	ret	$31, ($1), 1
	.cfi_endproc
	.size	do_switch_stack, .-do_switch_stack
@@ -701,54 +678,71 @@ undo_switch_stack:
	ldq	$14, 40($sp)
	ldq	$15, 48($sp)
	ldq	$26, 56($sp)
	ldt	$f30, 312($sp)	# get saved fpcr
	ldt	$f0, 64($sp)
	ldt	$f1, 72($sp)
	ldt	$f2, 80($sp)
	ldt	$f3, 88($sp)
	mt_fpcr	$f30		# install saved fpcr
	ldt	$f4, 96($sp)
	ldt	$f5, 104($sp)
	ldt	$f6, 112($sp)
	ldt	$f7, 120($sp)
	ldt	$f8, 128($sp)
	ldt	$f9, 136($sp)
	ldt	$f10, 144($sp)
	ldt	$f11, 152($sp)
	ldt	$f12, 160($sp)
	ldt	$f13, 168($sp)
	ldt	$f14, 176($sp)
	ldt	$f15, 184($sp)
	ldt	$f16, 192($sp)
	ldt	$f17, 200($sp)
	ldt	$f18, 208($sp)
	ldt	$f19, 216($sp)
	ldt	$f20, 224($sp)
	ldt	$f21, 232($sp)
	ldt	$f22, 240($sp)
	ldt	$f23, 248($sp)
	ldt	$f24, 256($sp)
	ldt	$f25, 264($sp)
	ldt	$f26, 272($sp)
	ldt	$f27, 280($sp)
	ldt	$f28, 288($sp)
	ldt	$f29, 296($sp)
	ldt	$f30, 304($sp)
	lda	$sp, SWITCH_STACK_SIZE($sp)
	ret	$31, ($1), 1
	.cfi_endproc
	.size	undo_switch_stack, .-undo_switch_stack

#define FR(n) n * 8 + TI_FP($8)
	.align	4
	.globl	__save_fpu
	.type	__save_fpu, @function
__save_fpu:
#define V(n) stt	$f##n, FR(n)
	V( 0); V( 1); V( 2); V( 3)
	V( 4); V( 5); V( 6); V( 7)
	V( 8); V( 9); V(10); V(11)
	V(12); V(13); V(14); V(15)
	V(16); V(17); V(18); V(19)
	V(20); V(21); V(22); V(23)
	V(24); V(25); V(26); V(27)
	mf_fpcr	$f0		# get fpcr
	V(28); V(29); V(30)
	stt	$f0, FR(31)	# save fpcr in slot of $f31
	ldt	$f0, FR(0)	# don't let "__save_fpu" change fp state.
	ret
#undef V
	.size	__save_fpu, .-__save_fpu

	.align	4
restore_fpu:
	and	$3, TS_RESTORE_FP, $3
	bic	$2, TS_SAVED_FP | TS_RESTORE_FP, $2
	beq	$3, 1f
#define V(n) ldt	$f##n, FR(n)
	ldt	$f30, FR(31)	# get saved fpcr
	V( 0); V( 1); V( 2); V( 3)
	mt_fpcr	$f30		# install saved fpcr
	V( 4); V( 5); V( 6); V( 7)
	V( 8); V( 9); V(10); V(11)
	V(12); V(13); V(14); V(15)
	V(16); V(17); V(18); V(19)
	V(20); V(21); V(22); V(23)
	V(24); V(25); V(26); V(27)
	V(28); V(29); V(30)
1:	stl $2, TI_STATUS($8)
	br restore_other
#undef V


/*
 * The meat of the context switch code.
 */

	.align	4
	.globl	alpha_switch_to
	.type	alpha_switch_to, @function
	.cfi_startproc
alpha_switch_to:
	DO_SWITCH_STACK
	ldl	$1, TI_STATUS($8)
	and	$1, TS_RESTORE_FP, $3
	bne	$3, 1f
	or	$1, TS_RESTORE_FP | TS_SAVED_FP, $2
	and	$1, TS_SAVED_FP, $3
	stl	$2, TI_STATUS($8)
	bne	$3, 1f
	bsr	$26, __save_fpu
1:
	call_pal PAL_swpctx
	lda	$8, 0x3fff
	UNDO_SWITCH_STACK
@@ -799,6 +793,14 @@ ret_from_kernel_thread:
alpha_\name:
	.prologue 0
	bsr	$1, do_switch_stack
	// NB: if anyone adds preemption, this block will need to be protected
	ldl	$1, TI_STATUS($8)
	and	$1, TS_SAVED_FP, $3
	or	$1, TS_SAVED_FP, $2
	bne	$3, 1f
	stl	$2, TI_STATUS($8)
	bsr	$26, __save_fpu
1:
	jsr	$26, sys_\name
	ldq	$26, 56($sp)
	lda	$sp, SWITCH_STACK_SIZE($sp)
Loading