Commit b64be683 authored by Vineet Gupta's avatar Vineet Gupta
Browse files

ARC: atomics: implement relaxed variants

The current ARC fetch/return atomics provide fully ordered semantics
only with 2 full barriers around the operation.

Instead implement them as relaxed variants without any barriers and
rely on generic code to generate the fully-ordered, acquire and release
varaints by adding the appropriate full barriers.

This helps elide some extra barriers in case of acquire/release/relaxed
calls.

bloat-o-meter for hsdk defconfig shows codegen improvements, although
numbers below inflated due to unrelated inlining heuristic changes

| bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed
| add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154)
| Function                                     old     new   delta
| ..
| sys_renameat                                 462     476     +14
| ip_mc_inc_group                              424     436     +12
| do_read_cache_page                          1882    1894     +12
| ..
| refcount_dec_and_mutex_lock                  254     250      -4
| refcount_dec_and_lock_irqsave                258     254      -4
| refcount_dec_and_lock                        254     250      -4
| ..
| tcp_v6_route_req                             246     238      -8
| tcp_v4_destroy_sock                          286     278      -8
| tcp_twsk_unique                              352     344      -8

Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net


Suggested-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarVineet Gupta <vgupta@kernel.org>
parent 7e8f8cbb
Loading
Loading
Loading
Loading
+13 −19
Original line number Diff line number Diff line
@@ -22,16 +22,10 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \
}									\

#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
{									\
	unsigned int val;						\
									\
	/*								\
	 * Explicit full memory barrier needed before/after as		\
	 * LLOCK/SCOND themselves don't provide any such semantics	\
	 */								\
	smp_mb();							\
									\
	__asm__ __volatile__(						\
	"1:	llock   %[val], [%[ctr]]		\n"		\
	"	" #asm_op " %[val], %[val], %[i]	\n"		\
@@ -42,22 +36,17 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
	  [i]	"ir"	(i)						\
	: "cc");							\
									\
	smp_mb();							\
									\
	return val;							\
}

#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed

#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
{									\
	unsigned int val, orig;						\
									\
	/*								\
	 * Explicit full memory barrier needed before/after as		\
	 * LLOCK/SCOND themselves don't provide any such semantics	\
	 */								\
	smp_mb();							\
									\
	__asm__ __volatile__(						\
	"1:	llock   %[orig], [%[ctr]]		\n"		\
	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
@@ -69,11 +58,17 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
	  [i]	"ir"	(i)						\
	: "cc");							\
									\
	smp_mb();							\
									\
	return orig;							\
}

#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed

#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed

#define ATOMIC_OPS(op, c_op, asm_op)					\
	ATOMIC_OP(op, c_op, asm_op)					\
	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
@@ -93,7 +88,6 @@ ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)

#define arch_atomic_andnot		arch_atomic_andnot
#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot

#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
+13 −11
Original line number Diff line number Diff line
@@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
}									\

#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
{									\
	s64 val;							\
									\
	smp_mb();							\
									\
	__asm__ __volatile__(						\
	"1:				\n"				\
	"	llockd   %0, [%1]	\n"				\
@@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \
	: "r"(&v->counter), "ir"(a)					\
	: "cc");	/* memory clobber comes from smp_mb() */	\
									\
	smp_mb();							\
									\
	return val;							\
}

#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed

#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
{									\
	s64 val, orig;							\
									\
	smp_mb();							\
									\
	__asm__ __volatile__(						\
	"1:				\n"				\
	"	llockd   %0, [%2]	\n"				\
@@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \
	: "r"(&v->counter), "ir"(a)					\
	: "cc");	/* memory clobber comes from smp_mb() */	\
									\
	smp_mb();							\
									\
	return orig;							\
}

#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed

#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed

#define ATOMIC64_OPS(op, op1, op2)					\
	ATOMIC64_OP(op, op1, op2)					\
	ATOMIC64_OP_RETURN(op, op1, op2)				\
@@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or)
ATOMIC64_OPS(xor, xor, xor)

#define arch_atomic64_andnot		arch_atomic64_andnot
#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot

#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP