Commit e188f333 authored by Vineet Gupta's avatar Vineet Gupta
Browse files

ARC: cmpxchg/xchg: rewrite as macros to make type safe



Existing code forces/assume args to type "long" which won't work in LP64
regime, so prepare code for that

Interestingly this should be a non functional change but I do see
some codegen changes

| bloat-o-meter vmlinux-cmpxchg-A vmlinux-cmpxchg-B
| add/remove: 0/0 grow/shrink: 17/12 up/down: 218/-150 (68)
|
| Function                                     old     new   delta
| rwsem_optimistic_spin                        518     550     +32
| rwsem_down_write_slowpath                   1244    1274     +30
| __do_sys_perf_event_open                    2576    2600     +24
| down_read                                    192     200      +8
| __down_read                                  192     200      +8
...
| task_work_run                                168     148     -20
| dma_fence_chain_walk.part                    760     736     -24
| __genradix_ptr_alloc                         674     646     -28

Total: Before=6187409, After=6187477, chg +0.00%

Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarVineet Gupta <vgupta@kernel.org>
parent ecf51c9f
Loading
Loading
Loading
Loading
+117 −96
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#ifndef __ASM_ARC_CMPXCHG_H
#define __ASM_ARC_CMPXCHG_H

#include <linux/build_bug.h>
#include <linux/types.h>

#include <asm/barrier.h>
@@ -13,62 +14,77 @@

#ifdef CONFIG_ARC_HAS_LLSC

static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
	unsigned long prev;

/*
	 * Explicit full memory barrier needed before/after as
	 * LLOCK/SCOND themselves don't provide any such semantics
 * if (*ptr == @old)
 *      *ptr = @new
 */
	smp_mb();

	__asm__ __volatile__(
	"1:	llock   %0, [%1]	\n"
	"	brne    %0, %2, 2f	\n"
	"	scond   %3, [%1]	\n"
	"	bnz     1b		\n"
	"2:				\n"
	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
	  "ir"(expected),
	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
	: "cc", "memory"); /* so that gcc knows memory is being written here */

	smp_mb();

	return prev;
}
#define __cmpxchg(ptr, old, new)					\
({									\
	__typeof__(*(ptr)) _prev;					\
									\
	__asm__ __volatile__(						\
	"1:	llock  %0, [%1]	\n"					\
	"	brne   %0, %2, 2f	\n"				\
	"	scond  %3, [%1]	\n"					\
	"	bnz     1b		\n"				\
	"2:				\n"				\
	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
	: "r"(ptr),	/* Not "m": llock only supports reg */		\
	  "ir"(old),							\
	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
	: "cc",								\
	  "memory");	/* gcc knows memory is clobbered */		\
									\
	_prev;								\
})

#else /* !CONFIG_ARC_HAS_LLSC */
#define arch_cmpxchg(ptr, old, new)				        \
({									\
	__typeof__(ptr) _p_ = (ptr);					\
	__typeof__(*(ptr)) _o_ = (old);					\
	__typeof__(*(ptr)) _n_ = (new);					\
	__typeof__(*(ptr)) _prev_;					\
									\
	switch(sizeof((_p_))) {						\
	case 4:								\
	        /*							\
		 * Explicit full memory barrier needed before/after	\
	         */							\
		smp_mb();						\
		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
		smp_mb();						\
		break;							\
	default:							\
		BUILD_BUG();						\
	}								\
	_prev_;								\
})

static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
	unsigned long flags;
	int prev;
	volatile unsigned long *p = ptr;
#else

	/*
	 * spin lock/unlock provide the needed smp_mb() before/after
	 */
	atomic_ops_lock(flags);
	prev = *p;
	if (prev == expected)
		*p = new;
	atomic_ops_unlock(flags);
	return prev;
}
#define arch_cmpxchg(ptr, old, new)				        \
({									\
	volatile __typeof__(ptr) _p_ = (ptr);				\
	__typeof__(*(ptr)) _o_ = (old);					\
	__typeof__(*(ptr)) _n_ = (new);					\
	__typeof__(*(ptr)) _prev_;					\
	unsigned long __flags;						\
									\
	BUILD_BUG_ON(sizeof(_p_) != 4);					\
									\
	/*								\
	 * spin lock/unlock provide the needed smp_mb() before/after	\
	 */								\
	atomic_ops_lock(__flags);					\
	_prev_ = *_p_;							\
	if (_prev_ == _o_)						\
		*_p_ = _n_;						\
	atomic_ops_unlock(__flags);					\
	_prev_;								\
})

#endif

#define arch_cmpxchg(ptr, o, n) ({			\
	(typeof(*(ptr)))__cmpxchg((ptr),		\
				  (unsigned long)(o),	\
				  (unsigned long)(n));	\
})

/*
 * atomic_cmpxchg is same as cmpxchg
 *   LLSC: only different in data-type, semantics are exactly same
@@ -77,61 +93,66 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 */
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))


/*
 * xchg (reg with memory) based on "Native atomic" EX insn
 * xchg
 */
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
				   int size)
{
	extern unsigned long __xchg_bad_pointer(void);

	switch (size) {
	case 4:
		smp_mb();

		__asm__ __volatile__(
		"	ex  %0, [%1]	\n"
		: "+r"(val)
		: "r"(ptr)
		: "memory");
#ifdef CONFIG_ARC_HAS_LLSC

		smp_mb();
#define __xchg(ptr, val)						\
({									\
	__asm__ __volatile__(						\
	"	ex  %0, [%1]	\n"	/* set new value */	        \
	: "+r"(val)							\
	: "r"(ptr)							\
	: "memory");							\
	_val_;		/* get old value */				\
})

		return val;
	}
	return __xchg_bad_pointer();
}
#define arch_xchg(ptr, val)						\
({									\
	__typeof__(ptr) _p_ = (ptr);					\
	__typeof__(*(ptr)) _val_ = (val);				\
									\
	switch(sizeof(*(_p_))) {					\
	case 4:								\
		smp_mb();						\
		_val_ = __xchg(_p_, _val_);				\
	        smp_mb();						\
		break;							\
	default:							\
		BUILD_BUG();						\
	}								\
	_val_;								\
})

#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
						 sizeof(*(ptr))))
#else  /* !CONFIG_ARC_HAS_LLSC */

/*
 * xchg() maps directly to ARC EX instruction which guarantees atomicity.
 * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
 * due to a subtle reason:
 *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
 *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
 *    Hence xchg() needs to follow same locking rules.
 * EX instructions is baseline and present in !LLSC too. But in this
 * regime it still needs use @atomic_ops_lock spinlock to allow interop
 * with cmpxchg() which uses spinlock in !LLSC
 * (llist.h use xchg and cmpxchg on sama data)
 */

#ifndef CONFIG_ARC_HAS_LLSC

#define arch_xchg(ptr, with)		\
#define arch_xchg(ptr, val)					        \
({									\
	unsigned long flags;		\
	typeof(*(ptr)) old_val;		\
	__typeof__(ptr) _p_ = (ptr);					\
	__typeof__(*(ptr)) _val_ = (val);				\
									\
	unsigned long __flags;						\
									\
	atomic_ops_lock(__flags);					\
									\
	atomic_ops_lock(flags);		\
	old_val = _xchg(ptr, with);	\
	atomic_ops_unlock(flags);	\
	old_val;			\
	__asm__ __volatile__(						\
	"	ex  %0, [%1]	\n"					\
	: "+r"(_val_)							\
	: "r"(_p_)							\
	: "memory");							\
									\
	atomic_ops_unlock(__flags);					\
	_val_;								\
})

#else

#define arch_xchg(ptr, with)  _xchg(ptr, with)

#endif

/*