1 #ifndef Py_ATOMIC_H
2 #define Py_ATOMIC_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6
7 #ifndef Py_BUILD_CORE
8 # error "this header requires Py_BUILD_CORE define"
9 #endif
10
11 #include "dynamic_annotations.h" /* _Py_ANNOTATE_MEMORY_ORDER */
12 #include "pyconfig.h"
13
14 #ifdef HAVE_STD_ATOMIC
15 # include <stdatomic.h>
16 #endif
17
18
19 #if defined(_MSC_VER)
20 #include <intrin.h>
21 #if defined(_M_IX86) || defined(_M_X64)
22 # include <immintrin.h>
23 #endif
24 #endif
25
26 /* This is modeled after the atomics interface from C1x, according to
27 * the draft at
28 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29 * Operations and types are named the same except with a _Py_ prefix
30 * and have the same semantics.
31 *
32 * Beware, the implementations here are deep magic.
33 */
34
35 #if defined(HAVE_STD_ATOMIC)
36
37 typedef enum _Py_memory_order {
38 _Py_memory_order_relaxed = memory_order_relaxed,
39 _Py_memory_order_acquire = memory_order_acquire,
40 _Py_memory_order_release = memory_order_release,
41 _Py_memory_order_acq_rel = memory_order_acq_rel,
42 _Py_memory_order_seq_cst = memory_order_seq_cst
43 } _Py_memory_order;
44
45 typedef struct _Py_atomic_address {
46 atomic_uintptr_t _value;
47 } _Py_atomic_address;
48
49 typedef struct _Py_atomic_int {
50 atomic_int _value;
51 } _Py_atomic_int;
52
53 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54 atomic_signal_fence(ORDER)
55
56 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57 atomic_thread_fence(ORDER)
58
59 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60 atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61
62 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63 atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64
65 // Use builtin atomic operations in GCC >= 4.7 and clang
66 #elif defined(HAVE_BUILTIN_ATOMIC)
67
68 typedef enum _Py_memory_order {
69 _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70 _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71 _Py_memory_order_release = __ATOMIC_RELEASE,
72 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74 } _Py_memory_order;
75
76 typedef struct _Py_atomic_address {
77 uintptr_t _value;
78 } _Py_atomic_address;
79
80 typedef struct _Py_atomic_int {
81 int _value;
82 } _Py_atomic_int;
83
84 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85 __atomic_signal_fence(ORDER)
86
87 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88 __atomic_thread_fence(ORDER)
89
90 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91 (assert((ORDER) == __ATOMIC_RELAXED \
92 || (ORDER) == __ATOMIC_SEQ_CST \
93 || (ORDER) == __ATOMIC_RELEASE), \
94 __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95
96 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
97 (assert((ORDER) == __ATOMIC_RELAXED \
98 || (ORDER) == __ATOMIC_SEQ_CST \
99 || (ORDER) == __ATOMIC_ACQUIRE \
100 || (ORDER) == __ATOMIC_CONSUME), \
101 __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102
103 /* Only support GCC (for expression statements) and x86 (for simple
104 * atomic semantics) and MSVC x86/x64/ARM */
105 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106 typedef enum _Py_memory_order {
107 _Py_memory_order_relaxed,
108 _Py_memory_order_acquire,
109 _Py_memory_order_release,
110 _Py_memory_order_acq_rel,
111 _Py_memory_order_seq_cst
112 } _Py_memory_order;
113
114 typedef struct _Py_atomic_address {
115 uintptr_t _value;
116 } _Py_atomic_address;
117
118 typedef struct _Py_atomic_int {
119 int _value;
120 } _Py_atomic_int;
121
122
123 static __inline__ void
_Py_atomic_signal_fence(_Py_memory_order order)124 _Py_atomic_signal_fence(_Py_memory_order order)
125 {
126 if (order != _Py_memory_order_relaxed)
127 __asm__ volatile("":::"memory");
128 }
129
130 static __inline__ void
_Py_atomic_thread_fence(_Py_memory_order order)131 _Py_atomic_thread_fence(_Py_memory_order order)
132 {
133 if (order != _Py_memory_order_relaxed)
134 __asm__ volatile("mfence":::"memory");
135 }
136
137 /* Tell the race checker about this operation's effects. */
138 static __inline__ void
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)139 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140 {
141 (void)address; /* shut up -Wunused-parameter */
142 switch(order) {
143 case _Py_memory_order_release:
144 case _Py_memory_order_acq_rel:
145 case _Py_memory_order_seq_cst:
146 _Py_ANNOTATE_HAPPENS_BEFORE(address);
147 break;
148 case _Py_memory_order_relaxed:
149 case _Py_memory_order_acquire:
150 break;
151 }
152 switch(order) {
153 case _Py_memory_order_acquire:
154 case _Py_memory_order_acq_rel:
155 case _Py_memory_order_seq_cst:
156 _Py_ANNOTATE_HAPPENS_AFTER(address);
157 break;
158 case _Py_memory_order_relaxed:
159 case _Py_memory_order_release:
160 break;
161 }
162 }
163
164 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165 __extension__ ({ \
166 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167 __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169 _Py_memory_order order = ORDER; \
170 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171 \
172 /* Perform the operation. */ \
173 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174 switch(order) { \
175 case _Py_memory_order_release: \
176 _Py_atomic_signal_fence(_Py_memory_order_release); \
177 /* fallthrough */ \
178 case _Py_memory_order_relaxed: \
179 *volatile_data = new_val; \
180 break; \
181 \
182 case _Py_memory_order_acquire: \
183 case _Py_memory_order_acq_rel: \
184 case _Py_memory_order_seq_cst: \
185 __asm__ volatile("xchg %0, %1" \
186 : "+r"(new_val) \
187 : "m"(atomic_val->_value) \
188 : "memory"); \
189 break; \
190 } \
191 _Py_ANNOTATE_IGNORE_WRITES_END(); \
192 })
193
194 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195 __extension__ ({ \
196 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197 __typeof__(atomic_val->_value) result; \
198 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199 _Py_memory_order order = ORDER; \
200 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201 \
202 /* Perform the operation. */ \
203 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204 switch(order) { \
205 case _Py_memory_order_release: \
206 case _Py_memory_order_acq_rel: \
207 case _Py_memory_order_seq_cst: \
208 /* Loads on x86 are not releases by default, so need a */ \
209 /* thread fence. */ \
210 _Py_atomic_thread_fence(_Py_memory_order_release); \
211 break; \
212 default: \
213 /* No fence */ \
214 break; \
215 } \
216 result = *volatile_data; \
217 switch(order) { \
218 case _Py_memory_order_acquire: \
219 case _Py_memory_order_acq_rel: \
220 case _Py_memory_order_seq_cst: \
221 /* Loads on x86 are automatically acquire operations so */ \
222 /* can get by with just a compiler fence. */ \
223 _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224 break; \
225 default: \
226 /* No fence */ \
227 break; \
228 } \
229 _Py_ANNOTATE_IGNORE_READS_END(); \
230 result; \
231 })
232
233 #elif defined(_MSC_VER)
234 /* _Interlocked* functions provide a full memory barrier and are therefore
235 enough for acq_rel and seq_cst. If the HLE variants aren't available
236 in hardware they will fall back to a full memory barrier as well.
237
238 This might affect performance but likely only in some very specific and
239 hard to measure scenario.
240 */
241 #if defined(_M_IX86) || defined(_M_X64)
242 typedef enum _Py_memory_order {
243 _Py_memory_order_relaxed,
244 _Py_memory_order_acquire,
245 _Py_memory_order_release,
246 _Py_memory_order_acq_rel,
247 _Py_memory_order_seq_cst
248 } _Py_memory_order;
249
250 typedef struct _Py_atomic_address {
251 volatile uintptr_t _value;
252 } _Py_atomic_address;
253
254 typedef struct _Py_atomic_int {
255 volatile int _value;
256 } _Py_atomic_int;
257
258
259 #if defined(_M_X64)
260 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261 switch (ORDER) { \
262 case _Py_memory_order_acquire: \
263 _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264 break; \
265 case _Py_memory_order_release: \
266 _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267 break; \
268 default: \
269 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270 break; \
271 }
272 #else
273 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274 #endif
275
276 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277 switch (ORDER) { \
278 case _Py_memory_order_acquire: \
279 _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280 break; \
281 case _Py_memory_order_release: \
282 _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283 break; \
284 default: \
285 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286 break; \
287 }
288
289 #if defined(_M_X64)
290 /* This has to be an intptr_t for now.
291 gil_created() uses -1 as a sentinel value, if this returns
292 a uintptr_t it will do an unsigned compare and crash
293 */
_Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order)294 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295 __int64 old;
296 switch (order) {
297 case _Py_memory_order_acquire:
298 {
299 do {
300 old = *value;
301 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302 break;
303 }
304 case _Py_memory_order_release:
305 {
306 do {
307 old = *value;
308 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309 break;
310 }
311 case _Py_memory_order_relaxed:
312 old = *value;
313 break;
314 default:
315 {
316 do {
317 old = *value;
318 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319 break;
320 }
321 }
322 return old;
323 }
324
325 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327
328 #else
329 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330 #endif
331
_Py_atomic_load_32bit_impl(volatile int* value, int order)332 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333 long old;
334 switch (order) {
335 case _Py_memory_order_acquire:
336 {
337 do {
338 old = *value;
339 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340 break;
341 }
342 case _Py_memory_order_release:
343 {
344 do {
345 old = *value;
346 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347 break;
348 }
349 case _Py_memory_order_relaxed:
350 old = *value;
351 break;
352 default:
353 {
354 do {
355 old = *value;
356 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357 break;
358 }
359 }
360 return old;
361 }
362
363 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365
366 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367 if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368 _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369 _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370
371 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372 ( \
373 sizeof((ATOMIC_VAL)->_value) == 8 ? \
374 _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375 _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376 )
377 #elif defined(_M_ARM) || defined(_M_ARM64)
378 typedef enum _Py_memory_order {
379 _Py_memory_order_relaxed,
380 _Py_memory_order_acquire,
381 _Py_memory_order_release,
382 _Py_memory_order_acq_rel,
383 _Py_memory_order_seq_cst
384 } _Py_memory_order;
385
386 typedef struct _Py_atomic_address {
387 volatile uintptr_t _value;
388 } _Py_atomic_address;
389
390 typedef struct _Py_atomic_int {
391 volatile int _value;
392 } _Py_atomic_int;
393
394
395 #if defined(_M_ARM64)
396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397 switch (ORDER) { \
398 case _Py_memory_order_acquire: \
399 _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400 break; \
401 case _Py_memory_order_release: \
402 _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403 break; \
404 default: \
405 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406 break; \
407 }
408 #else
409 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410 #endif
411
412 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413 switch (ORDER) { \
414 case _Py_memory_order_acquire: \
415 _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416 break; \
417 case _Py_memory_order_release: \
418 _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419 break; \
420 default: \
421 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422 break; \
423 }
424
425 #if defined(_M_ARM64)
426 /* This has to be an intptr_t for now.
427 gil_created() uses -1 as a sentinel value, if this returns
428 a uintptr_t it will do an unsigned compare and crash
429 */
_Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order)430 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431 uintptr_t old;
432 switch (order) {
433 case _Py_memory_order_acquire:
434 {
435 do {
436 old = *value;
437 } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438 break;
439 }
440 case _Py_memory_order_release:
441 {
442 do {
443 old = *value;
444 } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445 break;
446 }
447 case _Py_memory_order_relaxed:
448 old = *value;
449 break;
450 default:
451 {
452 do {
453 old = *value;
454 } while(_InterlockedCompareExchange64(value, old, old) != old);
455 break;
456 }
457 }
458 return old;
459 }
460
461 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463
464 #else
465 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466 #endif
467
_Py_atomic_load_32bit_impl(volatile int* value, int order)468 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469 int old;
470 switch (order) {
471 case _Py_memory_order_acquire:
472 {
473 do {
474 old = *value;
475 } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476 break;
477 }
478 case _Py_memory_order_release:
479 {
480 do {
481 old = *value;
482 } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483 break;
484 }
485 case _Py_memory_order_relaxed:
486 old = *value;
487 break;
488 default:
489 {
490 do {
491 old = *value;
492 } while(_InterlockedCompareExchange(value, old, old) != old);
493 break;
494 }
495 }
496 return old;
497 }
498
499 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501
502 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503 if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504 _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505 _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506
507 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508 ( \
509 sizeof((ATOMIC_VAL)->_value) == 8 ? \
510 _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511 _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512 )
513 #endif
514 #else /* !gcc x86 !_msc_ver */
515 typedef enum _Py_memory_order {
516 _Py_memory_order_relaxed,
517 _Py_memory_order_acquire,
518 _Py_memory_order_release,
519 _Py_memory_order_acq_rel,
520 _Py_memory_order_seq_cst
521 } _Py_memory_order;
522
523 typedef struct _Py_atomic_address {
524 uintptr_t _value;
525 } _Py_atomic_address;
526
527 typedef struct _Py_atomic_int {
528 int _value;
529 } _Py_atomic_int;
530 /* Fall back to other compilers and processors by assuming that simple
531 volatile accesses are atomic. This is false, so people should port
532 this. */
533 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536 ((ATOMIC_VAL)->_value = NEW_VAL)
537 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538 ((ATOMIC_VAL)->_value)
539 #endif
540
541 /* Standardized shortcuts. */
542 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544 #define _Py_atomic_load(ATOMIC_VAL) \
545 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546
547 /* Python-local extensions */
548
549 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553
554 #ifdef __cplusplus
555 }
556 #endif
557 #endif /* Py_ATOMIC_H */
558