Commit 7b9896c3 authored by Ard Biesheuvel's avatar Ard Biesheuvel
Browse files

ARM: percpu: add SMP_ON_UP support



Permit the use of the TPIDRPRW system register for carrying the per-CPU
offset in generic SMP configurations that also target non-SMP capable
ARMv6 cores. This uses the SMP_ON_UP code patching framework to turn all
TPIDRPRW accesses into reads/writes of entry #0 in the __per_cpu_offset
array.

While at it, switch over some existing direct TPIDRPRW accesses in asm
code to invocations of a new helper that is patched in the same way when
necessary.

Note that CPU_V6+SMP without SMP_ON_UP results in a kernel that does not
boot on v6 CPUs without SMP extensions, so add this dependency to
Kconfig as well.

Acked-by: default avatarLinus Walleij <linus.walleij@linaro.org>
Acked-by: default avatarNicolas Pitre <nico@fluxnic.net>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Tested-by: default avatarMarc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
parent 4e918ab1
Loading
Loading
Loading
Loading
+56 −3
Original line number Diff line number Diff line
@@ -220,9 +220,7 @@ THUMB( fpreg .req r7 )

	.macro	reload_current, t1:req, t2:req
#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
	adr_l	\t1, __entry_task		@ get __entry_task base address
	mrc	p15, 0, \t2, c13, c0, 4		@ get per-CPU offset
	ldr	\t1, [\t1, \t2]			@ load variable
	ldr_this_cpu \t1, __entry_task, \t1, \t2
	mcr	p15, 0, \t1, c13, c0, 3		@ store in TPIDRURO
#endif
	.endm
@@ -312,6 +310,26 @@ THUMB( fpreg .req r7 )
#define ALT_UP_B(label) b label
#endif

	/*
	 * this_cpu_offset - load the per-CPU offset of this CPU into
	 * 		     register 'rd'
	 */
	.macro		this_cpu_offset, rd:req
#ifdef CONFIG_SMP
ALT_SMP(mrc		p15, 0, \rd, c13, c0, 4)
#ifdef CONFIG_CPU_V6
ALT_UP_B(.L1_\@)
.L0_\@:
	.subsection	1
.L1_\@: ldr_va		\rd, __per_cpu_offset
	b		.L0_\@
	.previous
#endif
#else
	mov		\rd, #0
#endif
	.endm

/*
 * Instruction barrier
 */
@@ -648,6 +666,41 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
	__ldst_va	str, \rn, \tmp, \sym, \cond
	.endm

	/*
	 * ldr_this_cpu_armv6 - Load a 32-bit word from the per-CPU variable 'sym',
	 *			without using a temp register. Supported in ARM mode
	 *			only.
	 */
	.macro		ldr_this_cpu_armv6, rd:req, sym:req
	this_cpu_offset	\rd
	.globl		\sym
	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
	add		\rd, \rd, pc
.L0_\@: sub		\rd, \rd, #4
.L1_\@: sub		\rd, \rd, #0
.L2_\@: ldr		\rd, [\rd, #4]
	.endm

	/*
	 * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym'
	 *		  into register 'rd', which may be the stack pointer,
	 *		  using 't1' and 't2' as general temp registers. These
	 *		  are permitted to overlap with 'rd' if != sp
	 */
	.macro		ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
#if __LINUX_ARM_ARCH__ >= 7 || \
    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
    (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
	this_cpu_offset	\t1
	mov_l		\t2, \sym
	ldr		\rd, [\t1, \t2]
#else
	ldr_this_cpu_armv6 \rd, \sym
#endif
	.endm

	/*
	 * rev_l - byte-swap a 32-bit value
	 *
+24 −0
Original line number Diff line number Diff line
@@ -2,6 +2,30 @@
#ifndef __ASM_ARM_INSN_H
#define __ASM_ARM_INSN_H

#include <linux/types.h>

/*
 * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
 * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
 * which should be sufficient for the core kernel as well as modules loaded
 * into the module region. (Not supported by LLD before release 14)
 */
#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
#define LOAD_SYM_ARMV6(reg, sym)					\
	"	.globl	" #sym "				\n\t"	\
	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, " #sym "	\n\t"	\
	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, " #sym "	\n\t"	\
	"	.reloc	12f, R_ARM_LDR_PC_G2, " #sym "		\n\t"	\
	"10:	sub	" #reg ", pc, #8			\n\t"	\
	"11:	sub	" #reg ", " #reg ", #4			\n\t"	\
	"12:	ldr	" #reg ", [" #reg ", #0]		\n\t"
#else
#define LOAD_SYM_ARMV6(reg, sym)					\
	"	ldr	" #reg ", =" #sym "			\n\t"	\
	"	ldr	" #reg ", [" #reg "]			\n\t"
#endif

static inline unsigned long
arm_gen_nop(void)
{
+22 −3
Original line number Diff line number Diff line
@@ -5,15 +5,22 @@
#ifndef _ASM_ARM_PERCPU_H_
#define _ASM_ARM_PERCPU_H_

#include <asm/insn.h>

register unsigned long current_stack_pointer asm ("sp");

/*
 * Same as asm-generic/percpu.h, except that we store the per cpu offset
 * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
 */
#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
#ifdef CONFIG_SMP
static inline void set_my_cpu_offset(unsigned long off)
{
	extern unsigned int smp_on_up;

	if (IS_ENABLED(CONFIG_CPU_V6) && !smp_on_up)
		return;

	/* Set TPIDRPRW */
	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
}
@@ -27,7 +34,19 @@ static inline unsigned long __my_cpu_offset(void)
	 * We want to allow caching the value, so avoid using volatile and
	 * instead use a fake stack read to hazard against barrier().
	 */
	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
	asm("0:	mrc p15, 0, %0, c13, c0, 4			\n\t"
#ifdef CONFIG_CPU_V6
	    "1:							\n\t"
	    "	.subsection 1					\n\t"
	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
	    "	b	1b					\n\t"
	    "	.previous					\n\t"
	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
	    "	.long	0b - .					\n\t"
	    "	b	. + (2b - 0b)				\n\t"
	    "	.popsection					\n\t"
#endif
	     : "=r" (off)
	     : "Q" (*(const unsigned long *)current_stack_pointer));

	return off;
+3 −13
Original line number Diff line number Diff line
@@ -35,15 +35,14 @@
	.macro	irq_handler, from_user:req
	mov	r0, sp
#ifdef CONFIG_IRQSTACKS
	mov_l	r2, irq_stack_ptr	@ Take base address
	mrc	p15, 0, r3, c13, c0, 4	@ Get CPU offset
#ifdef CONFIG_UNWINDER_ARM
	mov	fpreg, sp		@ Preserve original SP
#else
	mov	r8, fp			@ Preserve original FP
	mov	r9, sp			@ Preserve original SP
#endif
	ldr	sp, [r2, r3]		@ Load SP from per-CPU var
	ldr_this_cpu sp, irq_stack_ptr, r2, r3

	.if	\from_user == 0
UNWIND(	.setfp	fpreg, sp		)
	@
@@ -876,16 +875,7 @@ __bad_stack:
THUMB(	bx	pc		)
THUMB(	nop			)
THUMB(	.arm			)
	mrc	p15, 0, ip, c13, c0, 4		@ Get per-CPU offset

	.globl	overflow_stack_ptr
	.reloc	0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
	.reloc	1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
	.reloc	2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
	add	ip, ip, pc
0:	add	ip, ip, #-4
1:	add	ip, ip, #0
2:	ldr	ip, [ip, #4]
	ldr_this_cpu_armv6 ip, overflow_stack_ptr

	str	sp, [ip, #-4]!			@ Preserve original SP value
	mov	sp, ip				@ Switch to overflow stack
+1 −3
Original line number Diff line number Diff line
@@ -71,9 +71,7 @@ ENTRY(__cpu_suspend)
	@ Run the suspend code from the overflow stack so we don't have to rely
	@ on vmalloc-to-phys conversions anywhere in the arch suspend code.
	@ The original SP value captured in R5 will be restored on the way out.
	mov_l	r6, overflow_stack_ptr	@ Base pointer
	mrc	p15, 0, r7, c13, c0, 4	@ Get per-CPU offset
	ldr	sp, [r6, r7]		@ Address of this CPU's overflow stack
	ldr_this_cpu sp, overflow_stack_ptr, r6, r7
#endif
	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
	sub	sp, sp, r4		@ allocate CPU state on stack
Loading