Commit 50807460 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Russell King (Oracle)
Browse files

ARM: 9195/1: entry: avoid explicit literal loads



ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into
registers without having to rely on literal loads that go via the
D-cache.  For older cores, we now support a similar arrangement, based
on PC-relative group relocations.

This means we can elide most literal loads entirely from the entry path,
by switching to the ldr_va macro to emit the appropriate sequence
depending on the target architecture revision.

While at it, switch to the bl_r macro for invoking the right PABT/DABT
helpers instead of setting the LR register explicitly, which does not
play well with cores that speculate across function returns.

Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Reviewed-by: default avatarLinus Walleij <linus.walleij@linaro.org>
Signed-off-by: default avatarRussell King (Oracle) <rmk+kernel@armlinux.org.uk>
parent 952f0331
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
	__adldst_l	str, \src, \sym, \tmp, \cond
	.endm

	.macro		__ldst_va, op, reg, tmp, sym, cond
	.macro		__ldst_va, op, reg, tmp, sym, cond, offset
#if __LINUX_ARM_ARCH__ >= 7 || \
    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
	mov_l		\tmp, \sym, \cond
	\op\cond	\reg, [\tmp]
#else
	/*
	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -683,20 +682,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
.L0_\@: sub\cond	\tmp, pc, #8
.L1_\@: sub\cond	\tmp, \tmp, #4
.L2_\@: \op\cond	\reg, [\tmp, #0]
.L0_\@: sub\cond	\tmp, pc, #8 - \offset
.L1_\@: sub\cond	\tmp, \tmp, #4 - \offset
.L2_\@:
#endif
	\op\cond	\reg, [\tmp, #\offset]
	.endm

	/*
	 * ldr_va - load a 32-bit word from the virtual address of \sym
	 */
	.macro		ldr_va, rd:req, sym:req, cond, tmp
	.macro		ldr_va, rd:req, sym:req, cond, tmp, offset=0
	.ifnb		\tmp
	__ldst_va	ldr, \rd, \tmp, \sym, \cond
	__ldst_va	ldr, \rd, \tmp, \sym, \cond, \offset
	.else
	__ldst_va	ldr, \rd, \rd, \sym, \cond
	__ldst_va	ldr, \rd, \rd, \sym, \cond, \offset
	.endif
	.endm

@@ -704,7 +704,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
	 * str_va - store a 32-bit word to the virtual address of \sym
	 */
	.macro		str_va, rn:req, sym:req, tmp:req, cond
	__ldst_va	str, \rn, \tmp, \sym, \cond
	__ldst_va	str, \rn, \tmp, \sym, \cond, 0
	.endm

	/*
+7 −30
Original line number Diff line number Diff line
@@ -61,9 +61,8 @@
	.macro	pabt_helper
	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
#ifdef MULTI_PABORT
	ldr	ip, .LCprocfns
	mov	lr, pc
	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
	ldr_va	ip, processor, offset=PROCESSOR_PABT_FUNC
	bl_r	ip
#else
	bl	CPU_PABORT_HANDLER
#endif
@@ -82,9 +81,8 @@
	@ the fault status register in r1.  r9 must be preserved.
	@
#ifdef MULTI_DABORT
	ldr	ip, .LCprocfns
	mov	lr, pc
	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
	ldr_va	ip, processor, offset=PROCESSOR_DABT_FUNC
	bl_r	ip
#else
	bl	CPU_DABORT_HANDLER
#endif
@@ -302,16 +300,6 @@ __fiq_svc:
 UNWIND(.fnend		)
ENDPROC(__fiq_svc)

	.align	5
.LCcralign:
	.word	cr_alignment
#ifdef MULTI_DABORT
.LCprocfns:
	.word	processor
#endif
.LCfp:
	.word	fp_enter

/*
 * Abort mode handlers
 */
@@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
 THUMB(	stmia	sp, {r0 - r12}	)

 ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
 ATRAP(	ldr	r8, .LCcralign)
 ATRAP(	ldr_va	r8, cr_alignment)

	ldmia	r0, {r3 - r5}
	add	r0, sp, #S_PC		@ here for interlock avoidance
@@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
	str	r3, [sp]		@ save the "real" r0 copied
					@ from the exception stack

 ATRAP(	ldr	r8, [r8, #0])

	@
	@ We are now ready to fill in the remaining blanks on the stack:
	@
@@ -505,9 +491,7 @@ __und_usr_thumb:
 */
#if __LINUX_ARM_ARCH__ < 7
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
#define NEED_CPU_ARCHITECTURE
	ldr	r5, .LCcpu_architecture
	ldr	r5, [r5]
	ldr_va	r5, cpu_architecture
	cmp	r5, #CPU_ARCH_ARMv7
	blo	__und_usr_fault_16		@ 16bit undefined instruction
/*
@@ -654,12 +638,6 @@ call_fpe:
	ret.w	lr				@ CP#14 (Debug)
	ret.w	lr				@ CP#15 (Control)

#ifdef NEED_CPU_ARCHITECTURE
	.align	2
.LCcpu_architecture:
	.word	__cpu_architecture
#endif

#ifdef CONFIG_NEON
	.align	6

@@ -685,9 +663,8 @@ call_fpe:
#endif

do_fpe:
	ldr	r4, .LCfp
	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
	ldr	pc, [r4]			@ Call FP module USR entry point
	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point

/*
 * The FP module is called with these registers set:
+1 −9
Original line number Diff line number Diff line
@@ -198,7 +198,7 @@ ENTRY(vector_swi)
#endif
	reload_current r10, ip
	zero_fp
	alignment_trap r10, ip, __cr_alignment
	alignment_trap r10, ip, cr_alignment
	asm_trace_hardirqs_on save=0
	enable_irq_notrace
	ct_user_exit save=0
@@ -328,14 +328,6 @@ __sys_trace_return:
	bl	syscall_trace_exit
	b	ret_slow_syscall

	.align	5
#ifdef CONFIG_ALIGNMENT_TRAP
	.type	__cr_alignment, #object
__cr_alignment:
	.word	cr_alignment
#endif
	.ltorg

	.macro	syscall_table_start, sym
	.equ	__sys_nr, 0
	.type	\sym, #object
+1 −2
Original line number Diff line number Diff line
@@ -48,8 +48,7 @@
	.macro	alignment_trap, rtmp1, rtmp2, label
#ifdef CONFIG_ALIGNMENT_TRAP
	mrc	p15, 0, \rtmp2, c1, c0, 0
	ldr	\rtmp1, \label
	ldr	\rtmp1, [\rtmp1]
	ldr_va	\rtmp1, \label
	teq	\rtmp1, \rtmp2
	mcrne	p15, 0, \rtmp1, c1, c0, 0
#endif