xref: /third_party/ltp/include/tst_atomic.h (revision f08c3bdf)
1/* SPDX-License-Identifier: GPL-2.0-or-later
2 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
3 */
4
5/* The LTP library has some of its own atomic synchronisation primitives
6 * contained in this file. Generally speaking these should not be used
7 * directly in tests for synchronisation, instead use tst_checkpoint.h,
8 * tst_fuzzy_sync.h or the POSIX library.
9 *
10 * Notes on compile and runtime memory barriers and atomics.
11 *
12 * Within the LTP library we have three concerns when accessing variables
13 * shared by multiple threads or processes:
14 *
15 * (1) Removal or reordering of accesses by the compiler.
16 * (2) Atomicity of addition.
17 * (3) LOAD-STORE ordering between threads.
18 *
19 * The first (1) is the most likely to cause an error if not properly
20 * handled. We avoid it by using volatile variables and statements which will
21 * not be removed or reordered by the compiler during optimisation. This includes
22 * the __atomic and __sync intrinsics and volatile asm statements marked with
23 * "memory" as well as variables marked with volatile.
24 *
25 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
26 * 32-bit integer will be atomic. However fetching and adding to a variable is
27 * quite likely not; so for (2) we need to ensure we use atomic addition.
28 *
29 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
30 * STOREs of any shared variables (including non-atomics) that are made
31 * between calls to tst_fzsync_wait are completed (globally visible) before
32 * tst_fzsync_wait completes. For this, runtime memory and instruction
33 * barriers are required in addition to compile time.
34 *
35 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
36 * simplicity. LTP tests tend to be syscall heavy so any performance gain from
37 * using a weaker memory model is unlikely to result in a relatively large
38 * performance improvement while at the same time being a potent source of
39 * confusion.
40 *
41 * Likewise, for the fallback ASM, the simplest "definitely will work, always"
42 * approach is preferred over anything more performant.
43 *
44 * Also see Documentation/memory-barriers.txt in the kernel tree and
45 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
46 * terminology may vary between sources.
47 */
48
49#ifndef TST_ATOMIC_H__
50#define TST_ATOMIC_H__
51
52#include "config.h"
53
54#if HAVE_ATOMIC_MEMORY_MODEL == 1
55static inline int tst_atomic_add_return(int i, int *v)
56{
57	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
58}
59
60static inline int tst_atomic_load(int *v)
61{
62	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
63}
64
65static inline void tst_atomic_store(int i, int *v)
66{
67	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
68}
69
70#elif HAVE_SYNC_ADD_AND_FETCH == 1
71static inline int tst_atomic_add_return(int i, int *v)
72{
73	return __sync_add_and_fetch(v, i);
74}
75
76static inline int tst_atomic_load(int *v)
77{
78	int ret;
79
80	__sync_synchronize();
81	ret = *v;
82	__sync_synchronize();
83	return ret;
84}
85
86static inline void tst_atomic_store(int i, int *v)
87{
88	__sync_synchronize();
89	*v = i;
90	__sync_synchronize();
91}
92
93#elif defined(__i386__) || defined(__x86_64__)
94# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
95
96static inline int tst_atomic_add_return(int i, int *v)
97{
98	int __ret = i;
99
100	/*
101	 * taken from arch/x86/include/asm/cmpxchg.h
102	 */
103	asm volatile ("lock; xaddl %0, %1\n"
104		: "+r" (__ret), "+m" (*v) : : "memory", "cc");
105
106	return i + __ret;
107}
108
109#elif defined(__powerpc__) || defined(__powerpc64__)
110static inline int tst_atomic_add_return(int i, int *v)
111{
112	int t;
113
114	/* taken from arch/powerpc/include/asm/atomic.h */
115	asm volatile(
116		"	sync\n"
117		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
118		"	add %0,%1,%0\n"
119		"	stwcx.	%0,0,%2 \n"
120		"	bne-	1b\n"
121		"	sync\n"
122		: "=&r" (t)
123		: "r" (i), "r" (v)
124		: "cc", "memory");
125
126	return t;
127}
128
129static inline int tst_atomic_load(int *v)
130{
131	int ret;
132
133	asm volatile("sync\n" : : : "memory");
134	ret = *v;
135	asm volatile("sync\n" : : : "memory");
136
137	return ret;
138}
139
140static inline void tst_atomic_store(int i, int *v)
141{
142	asm volatile("sync\n" : : : "memory");
143	*v = i;
144	asm volatile("sync\n" : : : "memory");
145}
146
147#elif defined(__s390__) || defined(__s390x__)
148# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
149
150static inline int tst_atomic_add_return(int i, int *v)
151{
152	int old_val, new_val;
153
154	/* taken from arch/s390/include/asm/atomic.h */
155	asm volatile(
156		"	l	%0,%2\n"
157		"0:	lr	%1,%0\n"
158		"	ar	%1,%3\n"
159		"	cs	%0,%1,%2\n"
160		"	jl	0b"
161		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
162		: "d" (i)
163		: "cc", "memory");
164
165	return old_val + i;
166}
167
168#elif defined(__arc__)
169
170/*ARCv2 defines the smp barriers */
171#ifdef __ARC700__
172#define smp_mb()	asm volatile("" : : : "memory")
173#else
174#define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
175#endif
176
177static inline int tst_atomic_add_return(int i, int *v)
178{
179	unsigned int val;
180
181	smp_mb();
182
183	asm volatile(
184		"1:	llock   %[val], [%[ctr]]	\n"
185		"	add     %[val], %[val], %[i]	\n"
186		"	scond   %[val], [%[ctr]]	\n"
187		"	bnz     1b			\n"
188		: [val]	"=&r"	(val)
189		: [ctr]	"r"	(v),
190		  [i]	"ir"	(i)
191		: "cc", "memory");
192
193	smp_mb();
194
195	return val;
196}
197
198static inline int tst_atomic_load(int *v)
199{
200	int ret;
201
202	smp_mb();
203	ret = *v;
204	smp_mb();
205
206	return ret;
207}
208
209static inline void tst_atomic_store(int i, int *v)
210{
211	smp_mb();
212	*v = i;
213	smp_mb();
214}
215
216#elif defined (__aarch64__)
217static inline int tst_atomic_add_return(int i, int *v)
218{
219	unsigned long tmp;
220	int result;
221
222	__asm__ __volatile__(
223"       prfm    pstl1strm, %2	\n"
224"1:     ldaxr	%w0, %2		\n"
225"       add	%w0, %w0, %w3	\n"
226"       stlxr	%w1, %w0, %2	\n"
227"       cbnz	%w1, 1b		\n"
228"       dmb ish			\n"
229	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
230	: "Ir" (i)
231	: "memory");
232
233	return result;
234}
235
236/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
237 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
238 * functions from interfering with tst_atomic_add_return which takes advantage
239 * of exclusivity. It is not clear if this is a good idea or not, but does
240 * mean that all three functions are very similar.
241 */
242static inline int tst_atomic_load(int *v)
243{
244	int ret;
245	unsigned long tmp;
246
247	asm volatile("//atomic_load			\n"
248		"	prfm	pstl1strm,  %[v]	\n"
249		"1:	ldaxr	%w[ret], %[v]		\n"
250		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
251		"	cbnz    %w[tmp], 1b		\n"
252		"	dmb ish				\n"
253		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
254		: : "memory");
255
256	return ret;
257}
258
259static inline void tst_atomic_store(int i, int *v)
260{
261	unsigned long tmp;
262
263	asm volatile("//atomic_store			\n"
264		"	prfm	pstl1strm, %[v]		\n"
265		"1:	ldaxr	%w[tmp], %[v]		\n"
266		"	stlxr   %w[tmp], %w[i], %[v]	\n"
267		"	cbnz    %w[tmp], 1b		\n"
268		"	dmb ish				\n"
269		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
270		: [i] "r" (i)
271		: "memory");
272}
273
274#elif defined(__sparc__) && defined(__arch64__)
275# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
276static inline int tst_atomic_add_return(int i, int *v)
277{
278	int ret, tmp;
279
280	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
281	 * function removed because we are unlikely to have a large (>= 16?)
282	 * number of cores continuously trying to update one variable.
283	 */
284	asm volatile("/*atomic_add_return*/		\n"
285		"1:	ldsw	[%[v]], %[ret];		\n"
286		"	add	%[ret], %[i], %[tmp];	\n"
287		"	cas	[%[v]], %[ret], %[tmp];	\n"
288		"	cmp	%[ret], %[tmp];		\n"
289		"	bne,pn	%%icc, 1b;		\n"
290		"	nop;				\n"
291		"	add	%[ret], %[i], %[ret];	\n"
292		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
293		: [i] "r" (i), [v] "r" (v)
294		: "memory", "cc");
295
296	return ret;
297}
298
299#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
300# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
301        and an LTP implementation is missing for your architecture.
302#endif
303
304#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
305static inline int tst_atomic_load(int *v)
306{
307	int ret;
308
309	asm volatile("" : : : "memory");
310	ret = *v;
311	asm volatile("" : : : "memory");
312
313	return ret;
314}
315
316static inline void tst_atomic_store(int i, int *v)
317{
318	asm volatile("" : : : "memory");
319	*v = i;
320	asm volatile("" : : : "memory");
321}
322#endif
323
324static inline int tst_atomic_inc(int *v)
325{
326	return tst_atomic_add_return(1, v);
327}
328
329static inline int tst_atomic_dec(int *v)
330{
331	return tst_atomic_add_return(-1, v);
332}
333
334#endif	/* TST_ATOMIC_H__ */
335