Commit c4da8e0d authored by Al Viro's avatar Al Viro
Browse files

sparc32: switch copy_user.S away from range exception table entries



Those were the last range exception table entries, which will allow
to get rid of a lot of weirdness.  Emits the same code into .text.

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent cfd5fa70
Loading
Loading
Loading
Loading
+112 −203
Original line number Diff line number Diff line
@@ -21,98 +21,134 @@
/* Work around cpp -rob */
#define ALLOC #alloc
#define EXECINSTR #execinstr

#define EX_ENTRY(l1, l2)			\
	.section __ex_table,ALLOC;		\
	.align	4;				\
	.word	l1, l2;				\
	.text;

#define EX(x,y,a,b) 				\
98: 	x,y;					\
	.section .fixup,ALLOC,EXECINSTR;	\
	.align	4;				\
99:	ba fixupretl;				\
	 a, b, %g3;				\
	.section __ex_table,ALLOC;		\
	.align	4;				\
	.word	98b, 99b;			\
	.text;					\
	.align	4
99:	retl;					\
	 a, b, %o0;				\
	EX_ENTRY(98b, 99b)

#define EX2(x,y,c,d,e,a,b) 			\
98: 	x,y;					\
	.section .fixup,ALLOC,EXECINSTR;	\
	.align	4;				\
99:	c, d, e;				\
	ba fixupretl;				\
	 a, b, %g3;				\
	.section __ex_table,ALLOC;		\
	.align	4;				\
	.word	98b, 99b;			\
	.text;					\
	.align	4
	retl;					\
	 a, b, %o0;				\
	EX_ENTRY(98b, 99b)

#define EXO2(x,y) 				\
98: 	x, y;					\
	.section __ex_table,ALLOC;		\
	.align	4;				\
	.word	98b, 97f;			\
	.text;					\
	.align	4
	EX_ENTRY(98b, 97f)

#define EXT(start,end,handler)			\
	.section __ex_table,ALLOC;		\
	.align	4;				\
	.word	start, 0, end, handler;		\
	.text;					\
	.align	4
#define LD(insn, src, offset, reg, label)	\
98:	insn [%src + (offset)], %reg;		\
	.section .fixup,ALLOC,EXECINSTR;	\
99:	ba	label;				\
	 mov	offset, %g5;			\
	EX_ENTRY(98b, 99b)

/* Please do not change following macros unless you change logic used
 * in .fixup at the end of this file as well
 */
#define ST(insn, dst, offset, reg, label)	\
98:	insn %reg, [%dst + (offset)];		\
	.section .fixup,ALLOC,EXECINSTR;	\
99:	ba	label;				\
	 mov	offset, %g5;			\
	EX_ENTRY(98b, 99b)

/* Both these macros have to start with exactly the same insn */
/* left: g7 + (g1 % 128) - offset */
#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
	ldd	[%src + (offset) + 0x00], %t0; \
	ldd	[%src + (offset) + 0x08], %t2; \
	ldd	[%src + (offset) + 0x10], %t4; \
	ldd	[%src + (offset) + 0x18], %t6; \
	st	%t0, [%dst + (offset) + 0x00]; \
	st	%t1, [%dst + (offset) + 0x04]; \
	st	%t2, [%dst + (offset) + 0x08]; \
	st	%t3, [%dst + (offset) + 0x0c]; \
	st	%t4, [%dst + (offset) + 0x10]; \
	st	%t5, [%dst + (offset) + 0x14]; \
	st	%t6, [%dst + (offset) + 0x18]; \
	st	%t7, [%dst + (offset) + 0x1c];

	LD(ldd, src, offset + 0x00, t0, bigchunk_fault)	\
	LD(ldd, src, offset + 0x08, t2, bigchunk_fault)	\
	LD(ldd, src, offset + 0x10, t4, bigchunk_fault)	\
	LD(ldd, src, offset + 0x18, t6, bigchunk_fault)	\
	ST(st, dst, offset + 0x00, t0, bigchunk_fault)	\
	ST(st, dst, offset + 0x04, t1, bigchunk_fault)	\
	ST(st, dst, offset + 0x08, t2, bigchunk_fault)	\
	ST(st, dst, offset + 0x0c, t3, bigchunk_fault)	\
	ST(st, dst, offset + 0x10, t4, bigchunk_fault)	\
	ST(st, dst, offset + 0x14, t5, bigchunk_fault)	\
	ST(st, dst, offset + 0x18, t6, bigchunk_fault)	\
	ST(st, dst, offset + 0x1c, t7, bigchunk_fault)

/* left: g7 + (g1 % 128) - offset */
#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
	ldd	[%src + (offset) + 0x00], %t0; \
	ldd	[%src + (offset) + 0x08], %t2; \
	ldd	[%src + (offset) + 0x10], %t4; \
	ldd	[%src + (offset) + 0x18], %t6; \
	std	%t0, [%dst + (offset) + 0x00]; \
	std	%t2, [%dst + (offset) + 0x08]; \
	std	%t4, [%dst + (offset) + 0x10]; \
	std	%t6, [%dst + (offset) + 0x18];
	LD(ldd, src, offset + 0x00, t0, bigchunk_fault)	\
	LD(ldd, src, offset + 0x08, t2, bigchunk_fault)	\
	LD(ldd, src, offset + 0x10, t4, bigchunk_fault)	\
	LD(ldd, src, offset + 0x18, t6, bigchunk_fault)	\
	ST(std, dst, offset + 0x00, t0, bigchunk_fault)	\
	ST(std, dst, offset + 0x08, t2, bigchunk_fault)	\
	ST(std, dst, offset + 0x10, t4, bigchunk_fault)	\
	ST(std, dst, offset + 0x18, t6, bigchunk_fault)

	.section .fixup,#alloc,#execinstr
bigchunk_fault:
	sub	%g7, %g5, %o0
	and	%g1, 127, %g1
	retl
	 add	%o0, %g1, %o0

/* left: offset + 16 + (g1 % 16) */
#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
	ldd	[%src - (offset) - 0x10], %t0; \
	ldd	[%src - (offset) - 0x08], %t2; \
	st	%t0, [%dst - (offset) - 0x10]; \
	st	%t1, [%dst - (offset) - 0x0c]; \
	st	%t2, [%dst - (offset) - 0x08]; \
	st	%t3, [%dst - (offset) - 0x04];
	LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault)	\
	LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault)	\
	ST(st, dst, -(offset + 0x10), t0, lastchunk_fault)	\
	ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault)	\
	ST(st, dst, -(offset + 0x08), t2, lastchunk_fault)	\
	ST(st, dst, -(offset + 0x04), t3, lastchunk_fault)

#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
	lduh	[%src + (offset) + 0x00], %t0; \
	lduh	[%src + (offset) + 0x02], %t1; \
	lduh	[%src + (offset) + 0x04], %t2; \
	lduh	[%src + (offset) + 0x06], %t3; \
	sth	%t0, [%dst + (offset) + 0x00]; \
	sth	%t1, [%dst + (offset) + 0x02]; \
	sth	%t2, [%dst + (offset) + 0x04]; \
	sth	%t3, [%dst + (offset) + 0x06];
	.section .fixup,#alloc,#execinstr
lastchunk_fault:
	and	%g1, 15, %g1
	retl
	 sub	%g1, %g5, %o0

/* left: o3 + (o2 % 16) - offset */
#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
	LD(lduh, src, offset + 0x00, t0, halfchunk_fault)	\
	LD(lduh, src, offset + 0x02, t1, halfchunk_fault)	\
	LD(lduh, src, offset + 0x04, t2, halfchunk_fault)	\
	LD(lduh, src, offset + 0x06, t3, halfchunk_fault)	\
	ST(sth, dst, offset + 0x00, t0, halfchunk_fault)	\
	ST(sth, dst, offset + 0x02, t1, halfchunk_fault)	\
	ST(sth, dst, offset + 0x04, t2, halfchunk_fault)	\
	ST(sth, dst, offset + 0x06, t3, halfchunk_fault)

/* left: o3 + (o2 % 16) + offset + 2 */
#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
	ldub	[%src - (offset) - 0x02], %t0; \
	ldub	[%src - (offset) - 0x01], %t1; \
	stb	%t0, [%dst - (offset) - 0x02]; \
	stb	%t1, [%dst - (offset) - 0x01];
	LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault)	\
	LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault)	\
	ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault)	\
	ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault)

	.section .fixup,#alloc,#execinstr
halfchunk_fault:
	and	%o2, 15, %o2
	sub	%o3, %g5, %o3
	retl
	 add	%o2, %o3, %o0

/* left: offset + 2 + (o2 % 2) */
#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \
	LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault)	\
	LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault)	\
	ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault)	\
	ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault)

	.section .fixup,#alloc,#execinstr
last_shortchunk_fault:
	and	%o2, 1, %o2
	retl
	 sub	%o2, %g5, %o0

	.text
	.align	4
@@ -182,8 +218,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */
	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
80:
	EXT(5b, 80b, 50f)
	subcc	%g7, 128, %g7
	add	%o1, 128, %o1
	bne	5b
@@ -201,7 +235,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */
	jmpl	%o5 + %lo(copy_user_table_end), %g0
	 add	%o0, %g7, %o0

copy_user_table:
	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
@@ -210,7 +243,6 @@ copy_user_table:
	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
copy_user_table_end:
	EXT(copy_user_table, copy_user_table_end, 51f)
	be	copy_user_last7
	 andcc	%g1, 4, %g0

@@ -250,8 +282,6 @@ ldd_std:
	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
81:
	EXT(ldd_std, 81b, 52f)
	subcc	%g7, 128, %g7
	add	%o1, 128, %o1
	bne	ldd_std
@@ -290,8 +320,6 @@ cannot_optimize:
10:
	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
82:
	EXT(10b, 82b, 53f)
	subcc	%o3, 0x10, %o3
	add	%o1, 0x10, %o1
	bne	10b
@@ -308,8 +336,6 @@ byte_chunk:
	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
83:
	EXT(byte_chunk, 83b, 54f)
	subcc	%o3, 0x10, %o3
	add	%o1, 0x10, %o1
	bne	byte_chunk
@@ -325,16 +351,14 @@ short_end:
	add	%o1, %o3, %o1
	jmpl	%o5 + %lo(short_table_end), %g0
	 andcc	%o2, 1, %g0
84:
	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3)
	MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3)
short_table_end:
	EXT(84b, short_table_end, 55f)
	be	1f
	 nop
	EX(ldub	[%o1], %g2, add %g0, 1)
@@ -363,123 +387,8 @@ short_aligned_end:
	.section .fixup,#alloc,#execinstr
	.align	4
97:
	mov	%o2, %g3
fixupretl:
	retl
	 mov	%g3, %o0

/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
50:
/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
 * happens. This is derived from the amount ldd reads, st stores, etc.
 * x = g2 % 12;
 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
 * o0 += (g2 / 12) * 32;
 */
	cmp	%g2, 12
	add	%o0, %g7, %o0
	bcs	1f
	 cmp	%g2, 24
	bcs	2f
	 cmp	%g2, 36
	bcs	3f
	 nop
	sub	%g2, 12, %g2
	sub	%g7, 32, %g7
3:	sub	%g2, 12, %g2
	sub	%g7, 32, %g7
2:	sub	%g2, 12, %g2
	sub	%g7, 32, %g7
1:	cmp	%g2, 4
	bcs,a	60f
	 clr	%g2
	sub	%g2, 4, %g2
	sll	%g2, 2, %g2
60:	and	%g1, 0x7f, %g3
	sub	%o0, %g7, %o0
	add	%g3, %g7, %g3
	ba	fixupretl
	 sub	%g3, %g2, %g3
51:
/* i = 41 - g2; j = i % 6;
 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
 * o0 -= (i / 6) * 16 + 16;
 */
	neg	%g2
	and	%g1, 0xf, %g1
	add	%g2, 41, %g2
	add	%o0, %g1, %o0
1:	cmp	%g2, 6
	bcs,a	2f
	 cmp	%g2, 4
	add	%g1, 16, %g1
	b	1b
	 sub	%g2, 6, %g2
2:	bcc,a	2f
	 mov	16, %g2
	inc	%g2
	sll	%g2, 2, %g2
2:	add	%g1, %g2, %g3
	ba	fixupretl
	 sub	%o0, %g3, %o0
52:
/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
   o0 += (g2 / 8) * 32 */
	andn	%g2, 7, %g4
	add	%o0, %g7, %o0
	andcc	%g2, 4, %g0
	and	%g2, 3, %g2
	sll	%g4, 2, %g4
	sll	%g2, 3, %g2
	bne	60b
	 sub	%g7, %g4, %g7
	ba	60b
	 clr	%g2
53:
/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
   o0 += (g2 & 8) */
	and	%g2, 3, %g4
	andcc	%g2, 4, %g0
	and	%g2, 8, %g2
	sll	%g4, 1, %g4
	be	1f
	 add	%o0, %g2, %o0
	add	%g2, %g4, %g2
1:	and	%o2, 0xf, %g3
	add	%g3, %o3, %g3
	ba	fixupretl
	 sub	%g3, %g2, %g3
54:
/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
   o0 += (g2 / 4) * 2 */
	srl	%g2, 2, %o4
	and	%g2, 1, %o5
	srl	%g2, 1, %g2
	add	%o4, %o4, %o4
	and	%o5, %g2, %o5
	and	%o2, 0xf, %o2
	add	%o0, %o4, %o0
	sub	%o3, %o5, %o3
	sub	%o2, %o4, %o2
	ba	fixupretl
	 add	%o2, %o3, %g3
55:
/* i = 27 - g2;
   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
   o0 -= i / 4 * 2 + 1 */
	neg	%g2
	and	%o2, 1, %o2
	add	%g2, 27, %g2
	srl	%g2, 2, %o5
	andcc	%g2, 3, %g0
	mov	1, %g2
	add	%o5, %o5, %o5
	be,a	1f
	 clr	%g2
1:	add	%g2, %o5, %g3
	sub	%o0, %g3, %o0
	ba	fixupretl
	 add	%g3, %o2, %g3
	 mov	%o2, %o0

	.globl  __copy_user_end
__copy_user_end: