1#ifndef Py_ATOMIC_H
2#define Py_ATOMIC_H
3#ifdef __cplusplus
4extern "C" {
5#endif
6
7#ifndef Py_BUILD_CORE
8#  error "this header requires Py_BUILD_CORE define"
9#endif
10
11#include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
12#include "pyconfig.h"
13
14#ifdef HAVE_STD_ATOMIC
15#  include <stdatomic.h>
16#endif
17
18
19#if defined(_MSC_VER)
20#include <intrin.h>
21#if defined(_M_IX86) || defined(_M_X64)
22#  include <immintrin.h>
23#endif
24#endif
25
26/* This is modeled after the atomics interface from C1x, according to
27 * the draft at
28 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29 * Operations and types are named the same except with a _Py_ prefix
30 * and have the same semantics.
31 *
32 * Beware, the implementations here are deep magic.
33 */
34
35#if defined(HAVE_STD_ATOMIC)
36
37typedef enum _Py_memory_order {
38    _Py_memory_order_relaxed = memory_order_relaxed,
39    _Py_memory_order_acquire = memory_order_acquire,
40    _Py_memory_order_release = memory_order_release,
41    _Py_memory_order_acq_rel = memory_order_acq_rel,
42    _Py_memory_order_seq_cst = memory_order_seq_cst
43} _Py_memory_order;
44
45typedef struct _Py_atomic_address {
46    atomic_uintptr_t _value;
47} _Py_atomic_address;
48
49typedef struct _Py_atomic_int {
50    atomic_int _value;
51} _Py_atomic_int;
52
53#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54    atomic_signal_fence(ORDER)
55
56#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57    atomic_thread_fence(ORDER)
58
59#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60    atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61
62#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63    atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64
65// Use builtin atomic operations in GCC >= 4.7 and clang
66#elif defined(HAVE_BUILTIN_ATOMIC)
67
68typedef enum _Py_memory_order {
69    _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70    _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71    _Py_memory_order_release = __ATOMIC_RELEASE,
72    _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73    _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74} _Py_memory_order;
75
76typedef struct _Py_atomic_address {
77    uintptr_t _value;
78} _Py_atomic_address;
79
80typedef struct _Py_atomic_int {
81    int _value;
82} _Py_atomic_int;
83
84#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85    __atomic_signal_fence(ORDER)
86
87#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88    __atomic_thread_fence(ORDER)
89
90#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91    (assert((ORDER) == __ATOMIC_RELAXED                       \
92            || (ORDER) == __ATOMIC_SEQ_CST                    \
93            || (ORDER) == __ATOMIC_RELEASE),                  \
94     __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95
96#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
97    (assert((ORDER) == __ATOMIC_RELAXED                       \
98            || (ORDER) == __ATOMIC_SEQ_CST                    \
99            || (ORDER) == __ATOMIC_ACQUIRE                    \
100            || (ORDER) == __ATOMIC_CONSUME),                  \
101     __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102
103/* Only support GCC (for expression statements) and x86 (for simple
104 * atomic semantics) and MSVC x86/x64/ARM */
105#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106typedef enum _Py_memory_order {
107    _Py_memory_order_relaxed,
108    _Py_memory_order_acquire,
109    _Py_memory_order_release,
110    _Py_memory_order_acq_rel,
111    _Py_memory_order_seq_cst
112} _Py_memory_order;
113
114typedef struct _Py_atomic_address {
115    uintptr_t _value;
116} _Py_atomic_address;
117
118typedef struct _Py_atomic_int {
119    int _value;
120} _Py_atomic_int;
121
122
123static __inline__ void
124_Py_atomic_signal_fence(_Py_memory_order order)
125{
126    if (order != _Py_memory_order_relaxed)
127        __asm__ volatile("":::"memory");
128}
129
130static __inline__ void
131_Py_atomic_thread_fence(_Py_memory_order order)
132{
133    if (order != _Py_memory_order_relaxed)
134        __asm__ volatile("mfence":::"memory");
135}
136
137/* Tell the race checker about this operation's effects. */
138static __inline__ void
139_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140{
141    (void)address;              /* shut up -Wunused-parameter */
142    switch(order) {
143    case _Py_memory_order_release:
144    case _Py_memory_order_acq_rel:
145    case _Py_memory_order_seq_cst:
146        _Py_ANNOTATE_HAPPENS_BEFORE(address);
147        break;
148    case _Py_memory_order_relaxed:
149    case _Py_memory_order_acquire:
150        break;
151    }
152    switch(order) {
153    case _Py_memory_order_acquire:
154    case _Py_memory_order_acq_rel:
155    case _Py_memory_order_seq_cst:
156        _Py_ANNOTATE_HAPPENS_AFTER(address);
157        break;
158    case _Py_memory_order_relaxed:
159    case _Py_memory_order_release:
160        break;
161    }
162}
163
164#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165    __extension__ ({ \
166        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167        __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168        volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169        _Py_memory_order order = ORDER; \
170        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171        \
172        /* Perform the operation. */ \
173        _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174        switch(order) { \
175        case _Py_memory_order_release: \
176            _Py_atomic_signal_fence(_Py_memory_order_release); \
177            /* fallthrough */ \
178        case _Py_memory_order_relaxed: \
179            *volatile_data = new_val; \
180            break; \
181        \
182        case _Py_memory_order_acquire: \
183        case _Py_memory_order_acq_rel: \
184        case _Py_memory_order_seq_cst: \
185            __asm__ volatile("xchg %0, %1" \
186                         : "+r"(new_val) \
187                         : "m"(atomic_val->_value) \
188                         : "memory"); \
189            break; \
190        } \
191        _Py_ANNOTATE_IGNORE_WRITES_END(); \
192    })
193
194#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195    __extension__ ({  \
196        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197        __typeof__(atomic_val->_value) result; \
198        volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199        _Py_memory_order order = ORDER; \
200        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201        \
202        /* Perform the operation. */ \
203        _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204        switch(order) { \
205        case _Py_memory_order_release: \
206        case _Py_memory_order_acq_rel: \
207        case _Py_memory_order_seq_cst: \
208            /* Loads on x86 are not releases by default, so need a */ \
209            /* thread fence. */ \
210            _Py_atomic_thread_fence(_Py_memory_order_release); \
211            break; \
212        default: \
213            /* No fence */ \
214            break; \
215        } \
216        result = *volatile_data; \
217        switch(order) { \
218        case _Py_memory_order_acquire: \
219        case _Py_memory_order_acq_rel: \
220        case _Py_memory_order_seq_cst: \
221            /* Loads on x86 are automatically acquire operations so */ \
222            /* can get by with just a compiler fence. */ \
223            _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224            break; \
225        default: \
226            /* No fence */ \
227            break; \
228        } \
229        _Py_ANNOTATE_IGNORE_READS_END(); \
230        result; \
231    })
232
233#elif defined(_MSC_VER)
234/*  _Interlocked* functions provide a full memory barrier and are therefore
235    enough for acq_rel and seq_cst. If the HLE variants aren't available
236    in hardware they will fall back to a full memory barrier as well.
237
238    This might affect performance but likely only in some very specific and
239    hard to measure scenario.
240*/
241#if defined(_M_IX86) || defined(_M_X64)
242typedef enum _Py_memory_order {
243    _Py_memory_order_relaxed,
244    _Py_memory_order_acquire,
245    _Py_memory_order_release,
246    _Py_memory_order_acq_rel,
247    _Py_memory_order_seq_cst
248} _Py_memory_order;
249
250typedef struct _Py_atomic_address {
251    volatile uintptr_t _value;
252} _Py_atomic_address;
253
254typedef struct _Py_atomic_int {
255    volatile int _value;
256} _Py_atomic_int;
257
258
259#if defined(_M_X64)
260#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261    switch (ORDER) { \
262    case _Py_memory_order_acquire: \
263      _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264      break; \
265    case _Py_memory_order_release: \
266      _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267      break; \
268    default: \
269      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270      break; \
271  }
272#else
273#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274#endif
275
276#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277  switch (ORDER) { \
278  case _Py_memory_order_acquire: \
279    _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280    break; \
281  case _Py_memory_order_release: \
282    _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283    break; \
284  default: \
285    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286    break; \
287  }
288
289#if defined(_M_X64)
290/*  This has to be an intptr_t for now.
291    gil_created() uses -1 as a sentinel value, if this returns
292    a uintptr_t it will do an unsigned compare and crash
293*/
294inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295    __int64 old;
296    switch (order) {
297    case _Py_memory_order_acquire:
298    {
299      do {
300        old = *value;
301      } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302      break;
303    }
304    case _Py_memory_order_release:
305    {
306      do {
307        old = *value;
308      } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309      break;
310    }
311    case _Py_memory_order_relaxed:
312      old = *value;
313      break;
314    default:
315    {
316      do {
317        old = *value;
318      } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319      break;
320    }
321    }
322    return old;
323}
324
325#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327
328#else
329#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330#endif
331
332inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333    long old;
334    switch (order) {
335    case _Py_memory_order_acquire:
336    {
337      do {
338        old = *value;
339      } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340      break;
341    }
342    case _Py_memory_order_release:
343    {
344      do {
345        old = *value;
346      } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347      break;
348    }
349    case _Py_memory_order_relaxed:
350      old = *value;
351      break;
352    default:
353    {
354      do {
355        old = *value;
356      } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357      break;
358    }
359    }
360    return old;
361}
362
363#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365
366#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368    _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369    _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370
371#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372  ( \
373    sizeof((ATOMIC_VAL)->_value) == 8 ? \
374    _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375    _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376  )
377#elif defined(_M_ARM) || defined(_M_ARM64)
378typedef enum _Py_memory_order {
379    _Py_memory_order_relaxed,
380    _Py_memory_order_acquire,
381    _Py_memory_order_release,
382    _Py_memory_order_acq_rel,
383    _Py_memory_order_seq_cst
384} _Py_memory_order;
385
386typedef struct _Py_atomic_address {
387    volatile uintptr_t _value;
388} _Py_atomic_address;
389
390typedef struct _Py_atomic_int {
391    volatile int _value;
392} _Py_atomic_int;
393
394
395#if defined(_M_ARM64)
396#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397    switch (ORDER) { \
398    case _Py_memory_order_acquire: \
399      _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400      break; \
401    case _Py_memory_order_release: \
402      _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403      break; \
404    default: \
405      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406      break; \
407  }
408#else
409#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410#endif
411
412#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413  switch (ORDER) { \
414  case _Py_memory_order_acquire: \
415    _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416    break; \
417  case _Py_memory_order_release: \
418    _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419    break; \
420  default: \
421    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422    break; \
423  }
424
425#if defined(_M_ARM64)
426/*  This has to be an intptr_t for now.
427    gil_created() uses -1 as a sentinel value, if this returns
428    a uintptr_t it will do an unsigned compare and crash
429*/
430inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431    uintptr_t old;
432    switch (order) {
433    case _Py_memory_order_acquire:
434    {
435      do {
436        old = *value;
437      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438      break;
439    }
440    case _Py_memory_order_release:
441    {
442      do {
443        old = *value;
444      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445      break;
446    }
447    case _Py_memory_order_relaxed:
448      old = *value;
449      break;
450    default:
451    {
452      do {
453        old = *value;
454      } while(_InterlockedCompareExchange64(value, old, old) != old);
455      break;
456    }
457    }
458    return old;
459}
460
461#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463
464#else
465#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466#endif
467
468inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469    int old;
470    switch (order) {
471    case _Py_memory_order_acquire:
472    {
473      do {
474        old = *value;
475      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476      break;
477    }
478    case _Py_memory_order_release:
479    {
480      do {
481        old = *value;
482      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483      break;
484    }
485    case _Py_memory_order_relaxed:
486      old = *value;
487      break;
488    default:
489    {
490      do {
491        old = *value;
492      } while(_InterlockedCompareExchange(value, old, old) != old);
493      break;
494    }
495    }
496    return old;
497}
498
499#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501
502#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504    _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505    _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506
507#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508  ( \
509    sizeof((ATOMIC_VAL)->_value) == 8 ? \
510    _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511    _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512  )
513#endif
514#else  /* !gcc x86  !_msc_ver */
515typedef enum _Py_memory_order {
516    _Py_memory_order_relaxed,
517    _Py_memory_order_acquire,
518    _Py_memory_order_release,
519    _Py_memory_order_acq_rel,
520    _Py_memory_order_seq_cst
521} _Py_memory_order;
522
523typedef struct _Py_atomic_address {
524    uintptr_t _value;
525} _Py_atomic_address;
526
527typedef struct _Py_atomic_int {
528    int _value;
529} _Py_atomic_int;
530/* Fall back to other compilers and processors by assuming that simple
531   volatile accesses are atomic.  This is false, so people should port
532   this. */
533#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536    ((ATOMIC_VAL)->_value = NEW_VAL)
537#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538    ((ATOMIC_VAL)->_value)
539#endif
540
541/* Standardized shortcuts. */
542#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544#define _Py_atomic_load(ATOMIC_VAL) \
545    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546
547/* Python-local extensions */
548
549#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553
554#ifdef __cplusplus
555}
556#endif
557#endif  /* Py_ATOMIC_H */
558