1 /* Bit and bytes utilities.
2
3 Bytes swap functions, reverse order of bytes:
4
5 - _Py_bswap16(uint16_t)
6 - _Py_bswap32(uint32_t)
7 - _Py_bswap64(uint64_t)
8 */
9
10 #ifndef Py_INTERNAL_BITUTILS_H
11 #define Py_INTERNAL_BITUTILS_H
12 #ifdef __cplusplus
13 extern "C" {
14 #endif
15
16 #ifndef Py_BUILD_CORE
17 # error "this header requires Py_BUILD_CORE define"
18 #endif
19
20 #if defined(__GNUC__) \
21 && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22 /* __builtin_bswap16() is available since GCC 4.8,
23 __builtin_bswap32() is available since GCC 4.3,
24 __builtin_bswap64() is available since GCC 4.3. */
25 # define _PY_HAVE_BUILTIN_BSWAP
26 #endif
27
28 #ifdef _MSC_VER
29 /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30 # include <intrin.h>
31 #endif
32
33 static inline uint16_t
_Py_bswap16(uint16_t word)34 _Py_bswap16(uint16_t word)
35 {
36 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37 return __builtin_bswap16(word);
38 #elif defined(_MSC_VER)
39 Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40 return _byteswap_ushort(word);
41 #else
42 // Portable implementation which doesn't rely on circular bit shift
43 return ( ((word & UINT16_C(0x00FF)) << 8)
44 | ((word & UINT16_C(0xFF00)) >> 8));
45 #endif
46 }
47
48 static inline uint32_t
_Py_bswap32(uint32_t word)49 _Py_bswap32(uint32_t word)
50 {
51 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52 return __builtin_bswap32(word);
53 #elif defined(_MSC_VER)
54 Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55 return _byteswap_ulong(word);
56 #else
57 // Portable implementation which doesn't rely on circular bit shift
58 return ( ((word & UINT32_C(0x000000FF)) << 24)
59 | ((word & UINT32_C(0x0000FF00)) << 8)
60 | ((word & UINT32_C(0x00FF0000)) >> 8)
61 | ((word & UINT32_C(0xFF000000)) >> 24));
62 #endif
63 }
64
65 static inline uint64_t
_Py_bswap64(uint64_t word)66 _Py_bswap64(uint64_t word)
67 {
68 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69 return __builtin_bswap64(word);
70 #elif defined(_MSC_VER)
71 return _byteswap_uint64(word);
72 #else
73 // Portable implementation which doesn't rely on circular bit shift
74 return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75 | ((word & UINT64_C(0x000000000000FF00)) << 40)
76 | ((word & UINT64_C(0x0000000000FF0000)) << 24)
77 | ((word & UINT64_C(0x00000000FF000000)) << 8)
78 | ((word & UINT64_C(0x000000FF00000000)) >> 8)
79 | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80 | ((word & UINT64_C(0x00FF000000000000)) >> 40)
81 | ((word & UINT64_C(0xFF00000000000000)) >> 56));
82 #endif
83 }
84
85
86 // Population count: count the number of 1's in 'x'
87 // (number of bits set to 1), also known as the hamming weight.
88 //
89 // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90 // can be used, to keep the implementation simple. For example, Visual Studio
91 // __popcnt() is not used this reason. The clang and GCC builtin function can
92 // use the x86 POPCNT instruction if the target architecture has SSE4a or
93 // newer.
94 static inline int
_Py_popcount32(uint32_t x)95 _Py_popcount32(uint32_t x)
96 {
97 #if (defined(__clang__) || defined(__GNUC__))
98
99 #if SIZEOF_INT >= 4
100 Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101 return __builtin_popcount(x);
102 #else
103 // The C standard guarantees that unsigned long will always be big enough
104 // to hold a uint32_t value without losing information.
105 Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106 return __builtin_popcountl(x);
107 #endif
108
109 #else
110 // 32-bit SWAR (SIMD Within A Register) popcount
111
112 // Binary: 0 1 0 1 ...
113 const uint32_t M1 = 0x55555555;
114 // Binary: 00 11 00 11. ..
115 const uint32_t M2 = 0x33333333;
116 // Binary: 0000 1111 0000 1111 ...
117 const uint32_t M4 = 0x0F0F0F0F;
118
119 // Put count of each 2 bits into those 2 bits
120 x = x - ((x >> 1) & M1);
121 // Put count of each 4 bits into those 4 bits
122 x = (x & M2) + ((x >> 2) & M2);
123 // Put count of each 8 bits into those 8 bits
124 x = (x + (x >> 4)) & M4;
125 // Sum of the 4 byte counts.
126 // Take care when considering changes to the next line. Portability and
127 // correctness are delicate here, thanks to C's "integer promotions" (C99
128 // §6.3.1.1p2). On machines where the `int` type has width greater than 32
129 // bits, `x` will be promoted to an `int`, and following C's "usual
130 // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
131 // performed as a multiplication of two `unsigned int` operands. In this
132 // case it's critical that we cast back to `uint32_t` in order to keep only
133 // the least significant 32 bits. On machines where the `int` type has
134 // width no greater than 32, the multiplication is of two 32-bit unsigned
135 // integer types, and the (uint32_t) cast is a no-op. In both cases, we
136 // avoid the risk of undefined behaviour due to overflow of a
137 // multiplication of signed integer types.
138 return (uint32_t)(x * 0x01010101U) >> 24;
139 #endif
140 }
141
142
143 // Return the index of the most significant 1 bit in 'x'. This is the smallest
144 // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
145 static inline int
_Py_bit_length(unsigned long x)146 _Py_bit_length(unsigned long x)
147 {
148 #if (defined(__clang__) || defined(__GNUC__))
149 if (x != 0) {
150 // __builtin_clzl() is available since GCC 3.4.
151 // Undefined behavior for x == 0.
152 return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
153 }
154 else {
155 return 0;
156 }
157 #elif defined(_MSC_VER)
158 // _BitScanReverse() is documented to search 32 bits.
159 Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
160 unsigned long msb;
161 if (_BitScanReverse(&msb, x)) {
162 return (int)msb + 1;
163 }
164 else {
165 return 0;
166 }
167 #else
168 const int BIT_LENGTH_TABLE[32] = {
169 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
170 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
171 };
172 int msb = 0;
173 while (x >= 32) {
174 msb += 6;
175 x >>= 6;
176 }
177 msb += BIT_LENGTH_TABLE[x];
178 return msb;
179 #endif
180 }
181
182
183 #ifdef __cplusplus
184 }
185 #endif
186 #endif /* !Py_INTERNAL_BITUTILS_H */
187