1/* SPDX-License-Identifier: GPL-2.0-or-later 2 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz> 3 */ 4 5/* The LTP library has some of its own atomic synchronisation primitives 6 * contained in this file. Generally speaking these should not be used 7 * directly in tests for synchronisation, instead use tst_checkpoint.h, 8 * tst_fuzzy_sync.h or the POSIX library. 9 * 10 * Notes on compile and runtime memory barriers and atomics. 11 * 12 * Within the LTP library we have three concerns when accessing variables 13 * shared by multiple threads or processes: 14 * 15 * (1) Removal or reordering of accesses by the compiler. 16 * (2) Atomicity of addition. 17 * (3) LOAD-STORE ordering between threads. 18 * 19 * The first (1) is the most likely to cause an error if not properly 20 * handled. We avoid it by using volatile variables and statements which will 21 * not be removed or reordered by the compiler during optimisation. This includes 22 * the __atomic and __sync intrinsics and volatile asm statements marked with 23 * "memory" as well as variables marked with volatile. 24 * 25 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a 26 * 32-bit integer will be atomic. However fetching and adding to a variable is 27 * quite likely not; so for (2) we need to ensure we use atomic addition. 28 * 29 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and 30 * STOREs of any shared variables (including non-atomics) that are made 31 * between calls to tst_fzsync_wait are completed (globally visible) before 32 * tst_fzsync_wait completes. For this, runtime memory and instruction 33 * barriers are required in addition to compile time. 34 * 35 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of 36 * simplicity. LTP tests tend to be syscall heavy so any performance gain from 37 * using a weaker memory model is unlikely to result in a relatively large 38 * performance improvement while at the same time being a potent source of 39 * confusion. 40 * 41 * Likewise, for the fallback ASM, the simplest "definitely will work, always" 42 * approach is preferred over anything more performant. 43 * 44 * Also see Documentation/memory-barriers.txt in the kernel tree and 45 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html 46 * terminology may vary between sources. 47 */ 48 49#ifndef TST_ATOMIC_H__ 50#define TST_ATOMIC_H__ 51 52#include "config.h" 53 54#if HAVE_ATOMIC_MEMORY_MODEL == 1 55static inline int tst_atomic_add_return(int i, int *v) 56{ 57 return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST); 58} 59 60static inline int tst_atomic_load(int *v) 61{ 62 return __atomic_load_n(v, __ATOMIC_SEQ_CST); 63} 64 65static inline void tst_atomic_store(int i, int *v) 66{ 67 __atomic_store_n(v, i, __ATOMIC_SEQ_CST); 68} 69 70#elif HAVE_SYNC_ADD_AND_FETCH == 1 71static inline int tst_atomic_add_return(int i, int *v) 72{ 73 return __sync_add_and_fetch(v, i); 74} 75 76static inline int tst_atomic_load(int *v) 77{ 78 int ret; 79 80 __sync_synchronize(); 81 ret = *v; 82 __sync_synchronize(); 83 return ret; 84} 85 86static inline void tst_atomic_store(int i, int *v) 87{ 88 __sync_synchronize(); 89 *v = i; 90 __sync_synchronize(); 91} 92 93#elif defined(__i386__) || defined(__x86_64__) 94# define LTP_USE_GENERIC_LOAD_STORE_ASM 1 95 96static inline int tst_atomic_add_return(int i, int *v) 97{ 98 int __ret = i; 99 100 /* 101 * taken from arch/x86/include/asm/cmpxchg.h 102 */ 103 asm volatile ("lock; xaddl %0, %1\n" 104 : "+r" (__ret), "+m" (*v) : : "memory", "cc"); 105 106 return i + __ret; 107} 108 109#elif defined(__powerpc__) || defined(__powerpc64__) 110static inline int tst_atomic_add_return(int i, int *v) 111{ 112 int t; 113 114 /* taken from arch/powerpc/include/asm/atomic.h */ 115 asm volatile( 116 " sync\n" 117 "1: lwarx %0,0,%2 # atomic_add_return\n" 118 " add %0,%1,%0\n" 119 " stwcx. %0,0,%2 \n" 120 " bne- 1b\n" 121 " sync\n" 122 : "=&r" (t) 123 : "r" (i), "r" (v) 124 : "cc", "memory"); 125 126 return t; 127} 128 129static inline int tst_atomic_load(int *v) 130{ 131 int ret; 132 133 asm volatile("sync\n" : : : "memory"); 134 ret = *v; 135 asm volatile("sync\n" : : : "memory"); 136 137 return ret; 138} 139 140static inline void tst_atomic_store(int i, int *v) 141{ 142 asm volatile("sync\n" : : : "memory"); 143 *v = i; 144 asm volatile("sync\n" : : : "memory"); 145} 146 147#elif defined(__s390__) || defined(__s390x__) 148# define LTP_USE_GENERIC_LOAD_STORE_ASM 1 149 150static inline int tst_atomic_add_return(int i, int *v) 151{ 152 int old_val, new_val; 153 154 /* taken from arch/s390/include/asm/atomic.h */ 155 asm volatile( 156 " l %0,%2\n" 157 "0: lr %1,%0\n" 158 " ar %1,%3\n" 159 " cs %0,%1,%2\n" 160 " jl 0b" 161 : "=&d" (old_val), "=&d" (new_val), "+Q" (*v) 162 : "d" (i) 163 : "cc", "memory"); 164 165 return old_val + i; 166} 167 168#elif defined(__arc__) 169 170/*ARCv2 defines the smp barriers */ 171#ifdef __ARC700__ 172#define smp_mb() asm volatile("" : : : "memory") 173#else 174#define smp_mb() asm volatile("dmb 3\n" : : : "memory") 175#endif 176 177static inline int tst_atomic_add_return(int i, int *v) 178{ 179 unsigned int val; 180 181 smp_mb(); 182 183 asm volatile( 184 "1: llock %[val], [%[ctr]] \n" 185 " add %[val], %[val], %[i] \n" 186 " scond %[val], [%[ctr]] \n" 187 " bnz 1b \n" 188 : [val] "=&r" (val) 189 : [ctr] "r" (v), 190 [i] "ir" (i) 191 : "cc", "memory"); 192 193 smp_mb(); 194 195 return val; 196} 197 198static inline int tst_atomic_load(int *v) 199{ 200 int ret; 201 202 smp_mb(); 203 ret = *v; 204 smp_mb(); 205 206 return ret; 207} 208 209static inline void tst_atomic_store(int i, int *v) 210{ 211 smp_mb(); 212 *v = i; 213 smp_mb(); 214} 215 216#elif defined (__aarch64__) 217static inline int tst_atomic_add_return(int i, int *v) 218{ 219 unsigned long tmp; 220 int result; 221 222 __asm__ __volatile__( 223" prfm pstl1strm, %2 \n" 224"1: ldaxr %w0, %2 \n" 225" add %w0, %w0, %w3 \n" 226" stlxr %w1, %w0, %2 \n" 227" cbnz %w1, 1b \n" 228" dmb ish \n" 229 : "=&r" (result), "=&r" (tmp), "+Q" (*v) 230 : "Ir" (i) 231 : "memory"); 232 233 return result; 234} 235 236/* We are using load and store exclusive (ldaxr & stlxr) instructions to try 237 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store 238 * functions from interfering with tst_atomic_add_return which takes advantage 239 * of exclusivity. It is not clear if this is a good idea or not, but does 240 * mean that all three functions are very similar. 241 */ 242static inline int tst_atomic_load(int *v) 243{ 244 int ret; 245 unsigned long tmp; 246 247 asm volatile("//atomic_load \n" 248 " prfm pstl1strm, %[v] \n" 249 "1: ldaxr %w[ret], %[v] \n" 250 " stlxr %w[tmp], %w[ret], %[v] \n" 251 " cbnz %w[tmp], 1b \n" 252 " dmb ish \n" 253 : [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v) 254 : : "memory"); 255 256 return ret; 257} 258 259static inline void tst_atomic_store(int i, int *v) 260{ 261 unsigned long tmp; 262 263 asm volatile("//atomic_store \n" 264 " prfm pstl1strm, %[v] \n" 265 "1: ldaxr %w[tmp], %[v] \n" 266 " stlxr %w[tmp], %w[i], %[v] \n" 267 " cbnz %w[tmp], 1b \n" 268 " dmb ish \n" 269 : [tmp] "=&r" (tmp), [v] "+Q" (*v) 270 : [i] "r" (i) 271 : "memory"); 272} 273 274#elif defined(__sparc__) && defined(__arch64__) 275# define LTP_USE_GENERIC_LOAD_STORE_ASM 1 276static inline int tst_atomic_add_return(int i, int *v) 277{ 278 int ret, tmp; 279 280 /* Based on arch/sparc/lib/atomic_64.S with the exponential backoff 281 * function removed because we are unlikely to have a large (>= 16?) 282 * number of cores continuously trying to update one variable. 283 */ 284 asm volatile("/*atomic_add_return*/ \n" 285 "1: ldsw [%[v]], %[ret]; \n" 286 " add %[ret], %[i], %[tmp]; \n" 287 " cas [%[v]], %[ret], %[tmp]; \n" 288 " cmp %[ret], %[tmp]; \n" 289 " bne,pn %%icc, 1b; \n" 290 " nop; \n" 291 " add %[ret], %[i], %[ret]; \n" 292 : [ret] "=r&" (ret), [tmp] "=r&" (tmp) 293 : [i] "r" (i), [v] "r" (v) 294 : "memory", "cc"); 295 296 return ret; 297} 298 299#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */ 300# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \ 301 and an LTP implementation is missing for your architecture. 302#endif 303 304#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM 305static inline int tst_atomic_load(int *v) 306{ 307 int ret; 308 309 asm volatile("" : : : "memory"); 310 ret = *v; 311 asm volatile("" : : : "memory"); 312 313 return ret; 314} 315 316static inline void tst_atomic_store(int i, int *v) 317{ 318 asm volatile("" : : : "memory"); 319 *v = i; 320 asm volatile("" : : : "memory"); 321} 322#endif 323 324static inline int tst_atomic_inc(int *v) 325{ 326 return tst_atomic_add_return(1, v); 327} 328 329static inline int tst_atomic_dec(int *v) 330{ 331 return tst_atomic_add_return(-1, v); 332} 333 334#endif /* TST_ATOMIC_H__ */ 335