1 /* auto-generated on 2023-12-01 13:59:01 -0500. Do not edit! */
2 /* begin file src/simdutf.cpp */
3 #include "simdutf.h"
4 /* begin file src/implementation.cpp */
5 #include <initializer_list>
6 #include <climits>
7
8 // Useful for debugging purposes
9 namespace simdutf {
10 namespace {
11
12 template <typename T>
toBinaryString(T b)13 std::string toBinaryString(T b) {
14 std::string binary = "";
15 T mask = T(1) << (sizeof(T) * CHAR_BIT - 1);
16 while (mask > 0) {
17 binary += ((b & mask) == 0) ? '0' : '1';
18 mask >>= 1;
19 }
20 return binary;
21 }
22 }
23 }
24
25 // Implementations
26 // The best choice should always come first!
27 /* begin file src/simdutf/arm64.h */
28 #ifndef SIMDUTF_ARM64_H
29 #define SIMDUTF_ARM64_H
30
31 #ifdef SIMDUTF_FALLBACK_H
32 #error "arm64.h must be included before fallback.h"
33 #endif
34
35
36 #ifndef SIMDUTF_IMPLEMENTATION_ARM64
37 #define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64)
38 #endif
39 #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64
40
41
42
43 #if SIMDUTF_IMPLEMENTATION_ARM64
44
45 namespace simdutf {
46 /**
47 * Implementation for NEON (ARMv8).
48 */
49 namespace arm64 {
50 } // namespace arm64
51 } // namespace simdutf
52
53 /* begin file src/simdutf/arm64/implementation.h */
54 #ifndef SIMDUTF_ARM64_IMPLEMENTATION_H
55 #define SIMDUTF_ARM64_IMPLEMENTATION_H
56
57
58 namespace simdutf {
59 namespace arm64 {
60
61 namespace {
62 using namespace simdutf;
63 }
64
65 class implementation final : public simdutf::implementation {
66 public:
implementation()67 simdutf_really_inline implementation() : simdutf::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {}
68 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
69 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
70 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
71 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
72 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
73 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
74 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
75 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
76 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
77 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
78 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
79 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
80 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
81 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
82 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
83 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
84 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
85 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
86 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
87 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
88 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
89 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
90 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
91 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
92 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
93 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
94 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
95 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
96 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
97 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
98 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
99 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
100 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
101 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
102 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
103 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
104 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
105 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
106 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
107 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
108 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
109 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
110 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
111 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
112 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
113 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
114 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
115 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
116 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
117 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
118 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
119 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
120 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
121 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
122 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
123 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
124 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
125 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
126 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
127 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
128 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
129 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
130 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
131 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
132 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
133 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
134 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
135 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
136 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
137 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
138 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
139 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
140 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
141 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
142 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
143
144 };
145
146 } // namespace arm64
147 } // namespace simdutf
148
149 #endif // SIMDUTF_ARM64_IMPLEMENTATION_H
150 /* end file src/simdutf/arm64/implementation.h */
151
152 /* begin file src/simdutf/arm64/begin.h */
153 // redefining SIMDUTF_IMPLEMENTATION to "arm64"
154 // #define SIMDUTF_IMPLEMENTATION arm64
155 /* end file src/simdutf/arm64/begin.h */
156
157 // Declarations
158 /* begin file src/simdutf/arm64/intrinsics.h */
159 #ifndef SIMDUTF_ARM64_INTRINSICS_H
160 #define SIMDUTF_ARM64_INTRINSICS_H
161
162
163 // This should be the correct header whether
164 // you use visual studio or other compilers.
165 #include <arm_neon.h>
166
167 #endif // SIMDUTF_ARM64_INTRINSICS_H
168 /* end file src/simdutf/arm64/intrinsics.h */
169 /* begin file src/simdutf/arm64/bitmanipulation.h */
170 #ifndef SIMDUTF_ARM64_BITMANIPULATION_H
171 #define SIMDUTF_ARM64_BITMANIPULATION_H
172
173 namespace simdutf {
174 namespace arm64 {
175 namespace {
176
177 /* result might be undefined when input_num is zero */
count_ones(uint64_t input_num)178 simdutf_really_inline int count_ones(uint64_t input_num) {
179 return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
180 }
181
182 } // unnamed namespace
183 } // namespace arm64
184 } // namespace simdutf
185
186 #endif // SIMDUTF_ARM64_BITMANIPULATION_H
187 /* end file src/simdutf/arm64/bitmanipulation.h */
188 /* begin file src/simdutf/arm64/simd.h */
189 #ifndef SIMDUTF_ARM64_SIMD_H
190 #define SIMDUTF_ARM64_SIMD_H
191
192 #include <type_traits>
193
194
195 namespace simdutf {
196 namespace arm64 {
197 namespace {
198 namespace simd {
199
200 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
201 namespace {
202 // Start of private section with Visual Studio workaround
203
204 #ifndef simdutf_make_uint8x16_t
205 #define simdutf_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \
206 x13, x14, x15, x16) \
207 ([=]() { \
208 uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \
209 x9, x10, x11, x12, x13, x14, x15, x16}; \
210 return vld1q_u8(array); \
211 }())
212 #endif
213 #ifndef simdutf_make_int8x16_t
214 #define simdutf_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \
215 x13, x14, x15, x16) \
216 ([=]() { \
217 int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \
218 x9, x10, x11, x12, x13, x14, x15, x16}; \
219 return vld1q_s8(array); \
220 }())
221 #endif
222
223 #ifndef simdutf_make_uint8x8_t
224 #define simdutf_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \
225 ([=]() { \
226 uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \
227 return vld1_u8(array); \
228 }())
229 #endif
230 #ifndef simdutf_make_int8x8_t
231 #define simdutf_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \
232 ([=]() { \
233 int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \
234 return vld1_s8(array); \
235 }())
236 #endif
237 #ifndef simdutf_make_uint16x8_t
238 #define simdutf_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \
239 ([=]() { \
240 uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \
241 return vld1q_u16(array); \
242 }())
243 #endif
244 #ifndef simdutf_make_int16x8_t
245 #define simdutf_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \
246 ([=]() { \
247 int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \
248 return vld1q_s16(array); \
249 }())
250 #endif
251
252
253 // End of private section with Visual Studio workaround
254 } // namespace
255 #endif // SIMDUTF_REGULAR_VISUAL_STUDIO
256
257
258 template<typename T>
259 struct simd8;
260
261 //
262 // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally.
263 //
264 template<typename T, typename Mask=simd8<bool>>
265 struct base_u8 {
266 uint8x16_t value;
267 static const int SIZE = sizeof(value);
268
269 // Conversion from/to SIMD register
base_u8simdutf::arm64::__anon13834::simd::base_u8270 simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
operator const uint8x16_t&simdutf::arm64::__anon13834::simd::base_u8271 simdutf_really_inline operator const uint8x16_t&() const { return this->value; }
operator uint8x16_t&simdutf::arm64::__anon13834::simd::base_u8272 simdutf_really_inline operator uint8x16_t&() { return this->value; }
firstsimdutf::arm64::__anon13834::simd::base_u8273 simdutf_really_inline T first() const { return vgetq_lane_u8(*this,0); }
lastsimdutf::arm64::__anon13834::simd::base_u8274 simdutf_really_inline T last() const { return vgetq_lane_u8(*this,15); }
275
276 // Bit operations
operator |simdutf::arm64::__anon13834::simd::base_u8277 simdutf_really_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
operator &simdutf::arm64::__anon13834::simd::base_u8278 simdutf_really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
operator ^simdutf::arm64::__anon13834::simd::base_u8279 simdutf_really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
bit_andnotsimdutf::arm64::__anon13834::simd::base_u8280 simdutf_really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
operator ~simdutf::arm64::__anon13834::simd::base_u8281 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
operator |=simdutf::arm64::__anon13834::simd::base_u8282 simdutf_really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
operator &=simdutf::arm64::__anon13834::simd::base_u8283 simdutf_really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
operator ^=simdutf::arm64::__anon13834::simd::base_u8284 simdutf_really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
285
operator ==simdutf::arm64::__anon13834::simd::base_u8286 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); }
287
288 template<int N=1>
prevsimdutf::arm64::__anon13834::simd::base_u8289 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
290 return vextq_u8(prev_chunk, *this, 16 - N);
291 }
292 };
293
294 // SIMD byte mask type (returned by things like eq and gt)
295 template<>
296 struct simd8<bool>: base_u8<bool> {
297 typedef uint16_t bitmask_t;
298 typedef uint32_t bitmask2_t;
299
splatsimdutf::arm64::__anon13834::simd::simd8300 static simdutf_really_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
301
simd8simdutf::arm64::__anon13834::simd::simd8302 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
303 // False constructor
simd8simdutf::arm64::__anon13834::simd::simd8304 simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
305 // Splat constructor
simd8simdutf::arm64::__anon13834::simd::simd8306 simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {}
storesimdutf::arm64::__anon13834::simd::simd8307 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
308
309 // We return uint32_t instead of uint16_t because that seems to be more efficient for most
310 // purposes (cutting it down to uint16_t costs performance in some compilers).
to_bitmasksimdutf::arm64::__anon13834::simd::simd8311 simdutf_really_inline uint32_t to_bitmask() const {
312 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
313 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
314 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
315 #else
316 const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
317 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
318 #endif
319 auto minput = *this & bit_mask;
320 uint8x16_t tmp = vpaddq_u8(minput, minput);
321 tmp = vpaddq_u8(tmp, tmp);
322 tmp = vpaddq_u8(tmp, tmp);
323 return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
324 }
325
326 // Returns 4-bit out of each byte, alternating between the high 4 bits and low bits
327 // result it is 64 bit.
328 // This method is expected to be faster than none() and is equivalent
329 // when the vector register is the result of a comparison, with byte
330 // values 0xff and 0x00.
to_bitmask64simdutf::arm64::__anon13834::simd::simd8331 simdutf_really_inline uint64_t to_bitmask64() const {
332 return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0);
333 }
334
anysimdutf::arm64::__anon13834::simd::simd8335 simdutf_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
nonesimdutf::arm64::__anon13834::simd::simd8336 simdutf_really_inline bool none() const { return vmaxvq_u8(*this) == 0; }
allsimdutf::arm64::__anon13834::simd::simd8337 simdutf_really_inline bool all() const { return vminvq_u8(*this) == 0xFF; }
338
339
340 };
341
342 // Unsigned bytes
343 template<>
344 struct simd8<uint8_t>: base_u8<uint8_t> {
splatsimdutf::arm64::__anon13834::simd::simd8345 static simdutf_really_inline simd8<uint8_t> splat(uint8_t _value) { return vmovq_n_u8(_value); }
zerosimdutf::arm64::__anon13834::simd::simd8346 static simdutf_really_inline simd8<uint8_t> zero() { return vdupq_n_u8(0); }
loadsimdutf::arm64::__anon13834::simd::simd8347 static simdutf_really_inline simd8<uint8_t> load(const uint8_t* values) { return vld1q_u8(values); }
simd8simdutf::arm64::__anon13834::simd::simd8348 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
349 // Zero constructor
simd8simdutf::arm64::__anon13834::simd::simd8350 simdutf_really_inline simd8() : simd8(zero()) {}
351 // Array constructor
simd8simdutf::arm64::__anon13834::simd::simd8352 simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
353 // Splat constructor
simd8simdutf::arm64::__anon13834::simd::simd8354 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
355 // Member-by-member initialization
356 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
simd8simdutf::arm64::__anon13834::simd::simd8357 simdutf_really_inline simd8(
358 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
359 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
360 ) : simd8(simdutf_make_uint8x16_t(
361 v0, v1, v2, v3, v4, v5, v6, v7,
362 v8, v9, v10,v11,v12,v13,v14,v15
363 )) {}
364 #else
simd8simdutf::arm64::__anon13834::simd::simd8365 simdutf_really_inline simd8(
366 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
367 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
368 ) : simd8(uint8x16_t{
369 v0, v1, v2, v3, v4, v5, v6, v7,
370 v8, v9, v10,v11,v12,v13,v14,v15
371 }) {}
372 #endif
373
374 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::arm64::__anon13834::simd::simd8375 simdutf_really_inline static simd8<uint8_t> repeat_16(
376 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
377 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
378 ) {
379 return simd8<uint8_t>(
380 v0, v1, v2, v3, v4, v5, v6, v7,
381 v8, v9, v10,v11,v12,v13,v14,v15
382 );
383 }
384
385 // Store to array
storesimdutf::arm64::__anon13834::simd::simd8386 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
387
388 // Saturated math
saturating_addsimdutf::arm64::__anon13834::simd::simd8389 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
saturating_subsimdutf::arm64::__anon13834::simd::simd8390 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
391
392 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::arm64::__anon13834::simd::simd8393 simdutf_really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
operator -simdutf::arm64::__anon13834::simd::simd8394 simdutf_really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
operator +=simdutf::arm64::__anon13834::simd::simd8395 simdutf_really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
operator -=simdutf::arm64::__anon13834::simd::simd8396 simdutf_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
397
398 // Order-specific operations
max_valsimdutf::arm64::__anon13834::simd::simd8399 simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
min_valsimdutf::arm64::__anon13834::simd::simd8400 simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
max_valsimdutf::arm64::__anon13834::simd::simd8401 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
min_valsimdutf::arm64::__anon13834::simd::simd8402 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
operator <=simdutf::arm64::__anon13834::simd::simd8403 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
operator >=simdutf::arm64::__anon13834::simd::simd8404 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
operator <simdutf::arm64::__anon13834::simd::simd8405 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
operator >simdutf::arm64::__anon13834::simd::simd8406 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
407 // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
gt_bitssimdutf::arm64::__anon13834::simd::simd8408 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
409 // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
lt_bitssimdutf::arm64::__anon13834::simd::simd8410 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
411
412 // Bit-specific operations
any_bits_setsimdutf::arm64::__anon13834::simd::simd8413 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
is_asciisimdutf::arm64::__anon13834::simd::simd8414 simdutf_really_inline bool is_ascii() const { return this->max_val() < 0b10000000u; }
415
any_bits_set_anywheresimdutf::arm64::__anon13834::simd::simd8416 simdutf_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
any_bits_set_anywheresimdutf::arm64::__anon13834::simd::simd8417 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
418 template<int N>
shrsimdutf::arm64::__anon13834::simd::simd8419 simdutf_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
420 template<int N>
shlsimdutf::arm64::__anon13834::simd::simd8421 simdutf_really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
422
423 // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
424 template<typename L>
lookup_16simdutf::arm64::__anon13834::simd::simd8425 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
426 return lookup_table.apply_lookup_16_to(*this);
427 }
428
429
430 template<typename L>
lookup_16simdutf::arm64::__anon13834::simd::simd8431 simdutf_really_inline simd8<L> lookup_16(
432 L replace0, L replace1, L replace2, L replace3,
433 L replace4, L replace5, L replace6, L replace7,
434 L replace8, L replace9, L replace10, L replace11,
435 L replace12, L replace13, L replace14, L replace15) const {
436 return lookup_16(simd8<L>::repeat_16(
437 replace0, replace1, replace2, replace3,
438 replace4, replace5, replace6, replace7,
439 replace8, replace9, replace10, replace11,
440 replace12, replace13, replace14, replace15
441 ));
442 }
443
444 template<typename T>
apply_lookup_16_tosimdutf::arm64::__anon13834::simd::simd8445 simdutf_really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) const {
446 return vqtbl1q_u8(*this, simd8<uint8_t>(original));
447 }
448 };
449
450 // Signed bytes
451 template<>
452 struct simd8<int8_t> {
453 int8x16_t value;
454
splatsimdutf::arm64::__anon13834::simd::simd8455 static simdutf_really_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
zerosimdutf::arm64::__anon13834::simd::simd8456 static simdutf_really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
loadsimdutf::arm64::__anon13834::simd::simd8457 static simdutf_really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
458
459 // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a USHLL #0,
460 // and shifting in NEON is actually quite slow.
461 //
462 // While this needs the registers to be in a specific order, bigger cores can interleave
463 // these with no overhead, and it still performs decently on little cores.
464 // movi v1.3d, #0
465 // mov v0.16b, value[0]
466 // st2 {v0.16b, v1.16b}, [ptr], #32
467 // mov v0.16b, value[1]
468 // st2 {v0.16b, v1.16b}, [ptr], #32
469 // ...
470 template <endianness big_endian>
store_ascii_as_utf16simdutf::arm64::__anon13834::simd::simd8471 simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
472 int8x16x2_t pair = match_system(big_endian)
473 ? int8x16x2_t{{this->value, vmovq_n_s8(0)}}
474 : int8x16x2_t{{vmovq_n_s8(0), this->value}};
475 vst2q_s8(reinterpret_cast<int8_t *>(p), pair);
476 }
477
478 // currently unused
479 // Technically this could be done with ST4 like in store_ascii_as_utf16, but it is
480 // very much not worth it, as explicitly mentioned in the ARM Cortex-X1 Core Software
481 // Optimization Guide:
482 // 4.18 Complex ASIMD instructions
483 // The bandwidth of [ST4 with element size less than 64b] is limited by decode
484 // constraints and it is advisable to avoid them when high performing code is desired.
485 // Instead, it is better to use ZIP1+ZIP2 and two ST2.
store_ascii_as_utf32simdutf::arm64::__anon13834::simd::simd8486 simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
487 const uint16x8_t low = vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0)));
488 const uint16x8_t high = vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0)));
489 const uint16x8x2_t low_pair{{ low, vmovq_n_u16(0) }};
490 vst2q_u16(reinterpret_cast<uint16_t *>(p), low_pair);
491 const uint16x8x2_t high_pair{{ high, vmovq_n_u16(0) }};
492 vst2q_u16(reinterpret_cast<uint16_t *>(p + 8), high_pair);
493 }
494
495 // In places where the table can be reused, which is most uses in simdutf, it is worth it to do
496 // 4 table lookups, as there is no direct zero extension from u8 to u32.
store_ascii_as_utf32_tblsimdutf::arm64::__anon13834::simd::simd8497 simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t * p) const {
498 const simd8<uint8_t> tb1{ 0,255,255,255, 1,255,255,255, 2,255,255,255, 3,255,255,255 };
499 const simd8<uint8_t> tb2{ 4,255,255,255, 5,255,255,255, 6,255,255,255, 7,255,255,255 };
500 const simd8<uint8_t> tb3{ 8,255,255,255, 9,255,255,255, 10,255,255,255, 11,255,255,255 };
501 const simd8<uint8_t> tb4{ 12,255,255,255, 13,255,255,255, 14,255,255,255, 15,255,255,255 };
502
503 // encourage store pairing and interleaving
504 const auto shuf1 = this->apply_lookup_16_to(tb1);
505 const auto shuf2 = this->apply_lookup_16_to(tb2);
506 shuf1.store(reinterpret_cast<int8_t *>(p));
507 shuf2.store(reinterpret_cast<int8_t *>(p + 4));
508
509 const auto shuf3 = this->apply_lookup_16_to(tb3);
510 const auto shuf4 = this->apply_lookup_16_to(tb4);
511 shuf3.store(reinterpret_cast<int8_t *>(p + 8));
512 shuf4.store(reinterpret_cast<int8_t *>(p + 12));
513 }
514 // Conversion from/to SIMD register
simd8simdutf::arm64::__anon13834::simd::simd8515 simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {}
operator const int8x16_t&simdutf::arm64::__anon13834::simd::simd8516 simdutf_really_inline operator const int8x16_t&() const { return this->value; }
517 #ifndef SIMDUTF_REGULAR_VISUAL_STUDIO
operator const uint8x16_tsimdutf::arm64::__anon13834::simd::simd8518 simdutf_really_inline operator const uint8x16_t() const { return vreinterpretq_u8_s8(this->value); }
519 #endif
operator int8x16_t&simdutf::arm64::__anon13834::simd::simd8520 simdutf_really_inline operator int8x16_t&() { return this->value; }
521
522 // Zero constructor
simd8simdutf::arm64::__anon13834::simd::simd8523 simdutf_really_inline simd8() : simd8(zero()) {}
524 // Splat constructor
simd8simdutf::arm64::__anon13834::simd::simd8525 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
526 // Array constructor
simd8simdutf::arm64::__anon13834::simd::simd8527 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
528 // Member-by-member initialization
529 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
simd8simdutf::arm64::__anon13834::simd::simd8530 simdutf_really_inline simd8(
531 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
532 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
533 ) : simd8(simdutf_make_int8x16_t(
534 v0, v1, v2, v3, v4, v5, v6, v7,
535 v8, v9, v10,v11,v12,v13,v14,v15
536 )) {}
537 #else
simd8simdutf::arm64::__anon13834::simd::simd8538 simdutf_really_inline simd8(
539 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
540 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
541 ) : simd8(int8x16_t{
542 v0, v1, v2, v3, v4, v5, v6, v7,
543 v8, v9, v10,v11,v12,v13,v14,v15
544 }) {}
545 #endif
546 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::arm64::__anon13834::simd::simd8547 simdutf_really_inline static simd8<int8_t> repeat_16(
548 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
549 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
550 ) {
551 return simd8<int8_t>(
552 v0, v1, v2, v3, v4, v5, v6, v7,
553 v8, v9, v10,v11,v12,v13,v14,v15
554 );
555 }
556
557 // Store to array
storesimdutf::arm64::__anon13834::simd::simd8558 simdutf_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, value); }
559 // Explicit conversion to/from unsigned
560 //
561 // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type.
562 // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14
563 // and relatively ugly and hard to read.
564 #ifndef SIMDUTF_REGULAR_VISUAL_STUDIO
simd8simdutf::arm64::__anon13834::simd::simd8565 simdutf_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
566 #endif
operator simd8<uint8_t>simdutf::arm64::__anon13834::simd::simd8567 simdutf_really_inline operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
568
operator |simdutf::arm64::__anon13834::simd::simd8569 simdutf_really_inline simd8<int8_t> operator|(const simd8<int8_t> other) const { return vorrq_s8(value, other.value); }
operator &simdutf::arm64::__anon13834::simd::simd8570 simdutf_really_inline simd8<int8_t> operator&(const simd8<int8_t> other) const { return vandq_s8(value, other.value); }
operator ^simdutf::arm64::__anon13834::simd::simd8571 simdutf_really_inline simd8<int8_t> operator^(const simd8<int8_t> other) const { return veorq_s8(value, other.value); }
bit_andnotsimdutf::arm64::__anon13834::simd::simd8572 simdutf_really_inline simd8<int8_t> bit_andnot(const simd8<int8_t> other) const { return vbicq_s8(value, other.value); }
573
574 // Math
operator +simdutf::arm64::__anon13834::simd::simd8575 simdutf_really_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(value, other.value); }
operator -simdutf::arm64::__anon13834::simd::simd8576 simdutf_really_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(value, other.value); }
operator +=simdutf::arm64::__anon13834::simd::simd8577 simdutf_really_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
operator -=simdutf::arm64::__anon13834::simd::simd8578 simdutf_really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
579
max_valsimdutf::arm64::__anon13834::simd::simd8580 simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); }
min_valsimdutf::arm64::__anon13834::simd::simd8581 simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); }
is_asciisimdutf::arm64::__anon13834::simd::simd8582 simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; }
583
584 // Order-sensitive comparisons
max_valsimdutf::arm64::__anon13834::simd::simd8585 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(value, other.value); }
min_valsimdutf::arm64::__anon13834::simd::simd8586 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(value, other.value); }
operator >simdutf::arm64::__anon13834::simd::simd8587 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(value, other.value); }
operator <simdutf::arm64::__anon13834::simd::simd8588 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(value, other.value); }
operator ==simdutf::arm64::__anon13834::simd::simd8589 simdutf_really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(value, other.value); }
590
591 template<int N=1>
prevsimdutf::arm64::__anon13834::simd::simd8592 simdutf_really_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
593 return vextq_s8(prev_chunk, *this, 16 - N);
594 }
595
596 // Perform a lookup assuming no value is larger than 16
597 template<typename L>
lookup_16simdutf::arm64::__anon13834::simd::simd8598 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
599 return lookup_table.apply_lookup_16_to(*this);
600 }
601 template<typename L>
lookup_16simdutf::arm64::__anon13834::simd::simd8602 simdutf_really_inline simd8<L> lookup_16(
603 L replace0, L replace1, L replace2, L replace3,
604 L replace4, L replace5, L replace6, L replace7,
605 L replace8, L replace9, L replace10, L replace11,
606 L replace12, L replace13, L replace14, L replace15) const {
607 return lookup_16(simd8<L>::repeat_16(
608 replace0, replace1, replace2, replace3,
609 replace4, replace5, replace6, replace7,
610 replace8, replace9, replace10, replace11,
611 replace12, replace13, replace14, replace15
612 ));
613 }
614
615 template<typename T>
apply_lookup_16_tosimdutf::arm64::__anon13834::simd::simd8616 simdutf_really_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) const {
617 return vqtbl1q_s8(*this, simd8<uint8_t>(original));
618 }
619 };
620
621 template<typename T>
622 struct simd8x64 {
623 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
624 static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
625 simd8<T> chunks[NUM_CHUNKS];
626
627 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
628 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
629 simd8x64() = delete; // no default constructor allowed
630
simd8x64simdutf::arm64::__anon13834::simd::simd8x64631 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
simd8x64simdutf::arm64::__anon13834::simd::simd8x64632 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
633
storesimdutf::arm64::__anon13834::simd::simd8x64634 simdutf_really_inline void store(T* ptr) const {
635 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
636 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
637 this->chunks[2].store(ptr+sizeof(simd8<T>)*2/sizeof(T));
638 this->chunks[3].store(ptr+sizeof(simd8<T>)*3/sizeof(T));
639 }
640
641
operator |=simdutf::arm64::__anon13834::simd::simd8x64642 simdutf_really_inline simd8x64<T>& operator |=(const simd8x64<T> &other) {
643 this->chunks[0] |= other.chunks[0];
644 this->chunks[1] |= other.chunks[1];
645 this->chunks[2] |= other.chunks[2];
646 this->chunks[3] |= other.chunks[3];
647 return *this;
648 }
649
reduce_orsimdutf::arm64::__anon13834::simd::simd8x64650 simdutf_really_inline simd8<T> reduce_or() const {
651 return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
652 }
653
is_asciisimdutf::arm64::__anon13834::simd::simd8x64654 simdutf_really_inline bool is_ascii() const {
655 return reduce_or().is_ascii();
656 }
657
658 template <endianness endian>
store_ascii_as_utf16simdutf::arm64::__anon13834::simd::simd8x64659 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
660 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
661 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
662 this->chunks[2].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*2);
663 this->chunks[3].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*3);
664 }
665
store_ascii_as_utf32simdutf::arm64::__anon13834::simd::simd8x64666 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
667 this->chunks[0].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*0);
668 this->chunks[1].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*1);
669 this->chunks[2].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*2);
670 this->chunks[3].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*3);
671 }
672
to_bitmasksimdutf::arm64::__anon13834::simd::simd8x64673 simdutf_really_inline uint64_t to_bitmask() const {
674 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
675 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(
676 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
677 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
678 );
679 #else
680 const uint8x16_t bit_mask = {
681 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
682 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
683 };
684 #endif
685 // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
686 uint8x16_t sum0 = vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), vandq_u8(uint8x16_t(this->chunks[1]), bit_mask));
687 uint8x16_t sum1 = vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), vandq_u8(uint8x16_t(this->chunks[3]), bit_mask));
688 sum0 = vpaddq_u8(sum0, sum1);
689 sum0 = vpaddq_u8(sum0, sum0);
690 return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
691 }
692
eqsimdutf::arm64::__anon13834::simd::simd8x64693 simdutf_really_inline uint64_t eq(const T m) const {
694 const simd8<T> mask = simd8<T>::splat(m);
695 return simd8x64<bool>(
696 this->chunks[0] == mask,
697 this->chunks[1] == mask,
698 this->chunks[2] == mask,
699 this->chunks[3] == mask
700 ).to_bitmask();
701 }
702
lteqsimdutf::arm64::__anon13834::simd::simd8x64703 simdutf_really_inline uint64_t lteq(const T m) const {
704 const simd8<T> mask = simd8<T>::splat(m);
705 return simd8x64<bool>(
706 this->chunks[0] <= mask,
707 this->chunks[1] <= mask,
708 this->chunks[2] <= mask,
709 this->chunks[3] <= mask
710 ).to_bitmask();
711 }
712
in_rangesimdutf::arm64::__anon13834::simd::simd8x64713 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
714 const simd8<T> mask_low = simd8<T>::splat(low);
715 const simd8<T> mask_high = simd8<T>::splat(high);
716
717 return simd8x64<bool>(
718 (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
719 (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
720 (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
721 (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
722 ).to_bitmask();
723 }
not_in_rangesimdutf::arm64::__anon13834::simd::simd8x64724 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
725 const simd8<T> mask_low = simd8<T>::splat(low);
726 const simd8<T> mask_high = simd8<T>::splat(high);
727 return simd8x64<bool>(
728 (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
729 (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
730 (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
731 (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)
732 ).to_bitmask();
733 }
ltsimdutf::arm64::__anon13834::simd::simd8x64734 simdutf_really_inline uint64_t lt(const T m) const {
735 const simd8<T> mask = simd8<T>::splat(m);
736 return simd8x64<bool>(
737 this->chunks[0] < mask,
738 this->chunks[1] < mask,
739 this->chunks[2] < mask,
740 this->chunks[3] < mask
741 ).to_bitmask();
742 }
gtsimdutf::arm64::__anon13834::simd::simd8x64743 simdutf_really_inline uint64_t gt(const T m) const {
744 const simd8<T> mask = simd8<T>::splat(m);
745 return simd8x64<bool>(
746 this->chunks[0] > mask,
747 this->chunks[1] > mask,
748 this->chunks[2] > mask,
749 this->chunks[3] > mask
750 ).to_bitmask();
751 }
gteqsimdutf::arm64::__anon13834::simd::simd8x64752 simdutf_really_inline uint64_t gteq(const T m) const {
753 const simd8<T> mask = simd8<T>::splat(m);
754 return simd8x64<bool>(
755 this->chunks[0] >= mask,
756 this->chunks[1] >= mask,
757 this->chunks[2] >= mask,
758 this->chunks[3] >= mask
759 ).to_bitmask();
760 }
gteq_unsignedsimdutf::arm64::__anon13834::simd::simd8x64761 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
762 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
763 return simd8x64<bool>(
764 simd8<uint8_t>(uint8x16_t(this->chunks[0])) >= mask,
765 simd8<uint8_t>(uint8x16_t(this->chunks[1])) >= mask,
766 simd8<uint8_t>(uint8x16_t(this->chunks[2])) >= mask,
767 simd8<uint8_t>(uint8x16_t(this->chunks[3])) >= mask
768 ).to_bitmask();
769 }
770 }; // struct simd8x64<T>
771 /* begin file src/simdutf/arm64/simd16-inl.h */
772 template<typename T>
773 struct simd16;
774
775 template<typename T, typename Mask=simd16<bool>>
776 struct base_u16 {
777 uint16x8_t value;
778 static const int SIZE = sizeof(value);
779
780 // Conversion from/to SIMD register
781 simdutf_really_inline base_u16() = default;
base_u16simdutf::arm64::__anon13834::simd::base_u16782 simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
operator const uint16x8_t&simdutf::arm64::__anon13834::simd::base_u16783 simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
operator uint16x8_t&simdutf::arm64::__anon13834::simd::base_u16784 simdutf_really_inline operator uint16x8_t&() { return this->value; }
785 // Bit operations
operator |simdutf::arm64::__anon13834::simd::base_u16786 simdutf_really_inline simd16<T> operator|(const simd16<T> other) const { return vorrq_u16(*this, other); }
operator &simdutf::arm64::__anon13834::simd::base_u16787 simdutf_really_inline simd16<T> operator&(const simd16<T> other) const { return vandq_u16(*this, other); }
operator ^simdutf::arm64::__anon13834::simd::base_u16788 simdutf_really_inline simd16<T> operator^(const simd16<T> other) const { return veorq_u16(*this, other); }
bit_andnotsimdutf::arm64::__anon13834::simd::base_u16789 simdutf_really_inline simd16<T> bit_andnot(const simd16<T> other) const { return vbicq_u16(*this, other); }
operator ~simdutf::arm64::__anon13834::simd::base_u16790 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
operator |=simdutf::arm64::__anon13834::simd::base_u16791 simdutf_really_inline simd16<T>& operator|=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
operator &=simdutf::arm64::__anon13834::simd::base_u16792 simdutf_really_inline simd16<T>& operator&=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
operator ^=simdutf::arm64::__anon13834::simd::base_u16793 simdutf_really_inline simd16<T>& operator^=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
794
operator ==simdutf::arm64::__anon13834::simd::base_u16795 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return vceqq_u16(lhs, rhs); }
796
797 template<int N=1>
prevsimdutf::arm64::__anon13834::simd::base_u16798 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
799 return vextq_u18(prev_chunk, *this, 8 - N);
800 }
801 };
802
803 template<typename T, typename Mask=simd16<bool>>
804 struct base16: base_u16<T> {
805 typedef uint16_t bitmask_t;
806 typedef uint32_t bitmask2_t;
807
base16simdutf::arm64::__anon13834::simd::base16808 simdutf_really_inline base16() : base_u16<T>() {}
base16simdutf::arm64::__anon13834::simd::base16809 simdutf_really_inline base16(const uint16x8_t _value) : base_u16<T>(_value) {}
810 template <typename Pointer>
base16simdutf::arm64::__anon13834::simd::base16811 simdutf_really_inline base16(const Pointer* ptr) : base16(vld1q_u16(ptr)) {}
812
813 static const int SIZE = sizeof(base_u16<T>::value);
814
815 template<int N=1>
prevsimdutf::arm64::__anon13834::simd::base16816 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
817 return vextq_u18(prev_chunk, *this, 8 - N);
818 }
819 };
820
821 // SIMD byte mask type (returned by things like eq and gt)
822 template<>
823 struct simd16<bool>: base16<bool> {
splatsimdutf::arm64::__anon13834::simd::simd16824 static simdutf_really_inline simd16<bool> splat(bool _value) { return vmovq_n_u16(uint16_t(-(!!_value))); }
825
simd16simdutf::arm64::__anon13834::simd::simd16826 simdutf_really_inline simd16<bool>() : base16() {}
simd16simdutf::arm64::__anon13834::simd::simd16827 simdutf_really_inline simd16<bool>(const uint16x8_t _value) : base16<bool>(_value) {}
828 // Splat constructor
simd16simdutf::arm64::__anon13834::simd::simd16829 simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
830
831 };
832
833 template<typename T>
834 struct base16_numeric: base16<T> {
splatsimdutf::arm64::__anon13834::simd::base16_numeric835 static simdutf_really_inline simd16<T> splat(T _value) { return vmovq_n_u16(_value); }
zerosimdutf::arm64::__anon13834::simd::base16_numeric836 static simdutf_really_inline simd16<T> zero() { return vdupq_n_u16(0); }
loadsimdutf::arm64::__anon13834::simd::base16_numeric837 static simdutf_really_inline simd16<T> load(const T values[8]) {
838 return vld1q_u16(reinterpret_cast<const uint16_t*>(values));
839 }
840
base16_numericsimdutf::arm64::__anon13834::simd::base16_numeric841 simdutf_really_inline base16_numeric() : base16<T>() {}
base16_numericsimdutf::arm64::__anon13834::simd::base16_numeric842 simdutf_really_inline base16_numeric(const uint16x8_t _value) : base16<T>(_value) {}
843
844 // Store to array
storesimdutf::arm64::__anon13834::simd::base16_numeric845 simdutf_really_inline void store(T dst[8]) const { return vst1q_u16(dst, *this); }
846
847 // Override to distinguish from bool version
operator ~simdutf::arm64::__anon13834::simd::base16_numeric848 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
849
850 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::arm64::__anon13834::simd::base16_numeric851 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return vaddq_u8(*this, other); }
operator -simdutf::arm64::__anon13834::simd::base16_numeric852 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return vsubq_u8(*this, other); }
operator +=simdutf::arm64::__anon13834::simd::base16_numeric853 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
operator -=simdutf::arm64::__anon13834::simd::base16_numeric854 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
855 };
856
857 // Signed code units
858 template<>
859 struct simd16<int16_t> : base16_numeric<int16_t> {
simd16simdutf::arm64::__anon13834::simd::simd16860 simdutf_really_inline simd16() : base16_numeric<int16_t>() {}
861 #ifndef SIMDUTF_REGULAR_VISUAL_STUDIO
simd16simdutf::arm64::__anon13834::simd::simd16862 simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<int16_t>(_value) {}
863 #endif
simd16simdutf::arm64::__anon13834::simd::simd16864 simdutf_really_inline simd16(const int16x8_t _value) : base16_numeric<int16_t>(vreinterpretq_u16_s16(_value)) {}
865
866 // Splat constructor
simd16simdutf::arm64::__anon13834::simd::simd16867 simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
868 // Array constructor
simd16simdutf::arm64::__anon13834::simd::simd16869 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
simd16simdutf::arm64::__anon13834::simd::simd16870 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
871 simdutf_really_inline operator simd16<uint16_t>() const;
operator const uint16x8_t&simdutf::arm64::__anon13834::simd::simd16872 simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
operator const int16x8_tsimdutf::arm64::__anon13834::simd::simd16873 simdutf_really_inline operator const int16x8_t() const { return vreinterpretq_s16_u16(this->value); }
874
max_valsimdutf::arm64::__anon13834::simd::simd16875 simdutf_really_inline int16_t max_val() const { return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); }
min_valsimdutf::arm64::__anon13834::simd::simd16876 simdutf_really_inline int16_t min_val() const { return vminvq_s16(vreinterpretq_s16_u16(this->value)); }
877 // Order-sensitive comparisons
max_valsimdutf::arm64::__anon13834::simd::simd16878 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return vmaxq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
min_valsimdutf::arm64::__anon13834::simd::simd16879 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return vmaxq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
operator >simdutf::arm64::__anon13834::simd::simd16880 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return vcgtq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
operator <simdutf::arm64::__anon13834::simd::simd16881 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return vcltq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
882 };
883
884
885
886
887 // Unsigned code units
888 template<>
889 struct simd16<uint16_t>: base16_numeric<uint16_t> {
simd16simdutf::arm64::__anon13834::simd::simd16890 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
simd16simdutf::arm64::__anon13834::simd::simd16891 simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<uint16_t>(_value) {}
892
893 // Splat constructor
simd16simdutf::arm64::__anon13834::simd::simd16894 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
895 // Array constructor
simd16simdutf::arm64::__anon13834::simd::simd16896 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
simd16simdutf::arm64::__anon13834::simd::simd16897 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
898
899
max_valsimdutf::arm64::__anon13834::simd::simd16900 simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); }
min_valsimdutf::arm64::__anon13834::simd::simd16901 simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); }
902 // Saturated math
saturating_addsimdutf::arm64::__anon13834::simd::simd16903 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return vqaddq_u16(*this, other); }
saturating_subsimdutf::arm64::__anon13834::simd::simd16904 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return vqsubq_u16(*this, other); }
905
906 // Order-specific operations
max_valsimdutf::arm64::__anon13834::simd::simd16907 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return vmaxq_u16(*this, other); }
min_valsimdutf::arm64::__anon13834::simd::simd16908 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return vminq_u16(*this, other); }
909 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::arm64::__anon13834::simd::simd16910 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
911 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::arm64::__anon13834::simd::simd16912 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::arm64::__anon13834::simd::simd16913 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return vcleq_u16(*this, other); }
operator >=simdutf::arm64::__anon13834::simd::simd16914 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return vcgeq_u16(*this, other); }
operator >simdutf::arm64::__anon13834::simd::simd16915 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return vcgtq_u16(*this, other); }
operator <simdutf::arm64::__anon13834::simd::simd16916 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return vcltq_u16(*this, other); }
917
918 // Bit-specific operations
bits_not_setsimdutf::arm64::__anon13834::simd::simd16919 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
920 template<int N>
shrsimdutf::arm64::__anon13834::simd::simd16921 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(vshrq_n_u16(*this, N)); }
922 template<int N>
shlsimdutf::arm64::__anon13834::simd::simd16923 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(vshlq_n_u16(*this, N)); }
924
925 // logical operations
operator |simdutf::arm64::__anon13834::simd::simd16926 simdutf_really_inline simd16<uint16_t> operator|(const simd16<uint16_t> other) const { return vorrq_u16(*this, other); }
operator &simdutf::arm64::__anon13834::simd::simd16927 simdutf_really_inline simd16<uint16_t> operator&(const simd16<uint16_t> other) const { return vandq_u16(*this, other); }
operator ^simdutf::arm64::__anon13834::simd::simd16928 simdutf_really_inline simd16<uint16_t> operator^(const simd16<uint16_t> other) const { return veorq_u16(*this, other); }
929
930 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
packsimdutf::arm64::__anon13834::simd::simd16931 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
932 return vqmovn_high_u16(vqmovn_u16(v0), v1);
933 }
934
935 // Change the endianness
swap_bytessimdutf::arm64::__anon13834::simd::simd16936 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
937 return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this)));
938 }
939 };
operator simd16<uint16_t>() const940 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
941
942
943 template<typename T>
944 struct simd16x32 {
945 static constexpr int NUM_CHUNKS = 64 / sizeof(simd16<T>);
946 static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
947 simd16<T> chunks[NUM_CHUNKS];
948
949 simd16x32(const simd16x32<T>& o) = delete; // no copy allowed
950 simd16x32<T>& operator=(const simd16<T> other) = delete; // no assignment allowed
951 simd16x32() = delete; // no default constructor allowed
952
simd16x32simdutf::arm64::__anon13834::simd::simd16x32953 simdutf_really_inline simd16x32(const simd16<T> chunk0, const simd16<T> chunk1, const simd16<T> chunk2, const simd16<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
simd16x32simdutf::arm64::__anon13834::simd::simd16x32954 simdutf_really_inline simd16x32(const T* ptr) : chunks{simd16<T>::load(ptr), simd16<T>::load(ptr+sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+2*sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+3*sizeof(simd16<T>)/sizeof(T))} {}
955
storesimdutf::arm64::__anon13834::simd::simd16x32956 simdutf_really_inline void store(T* ptr) const {
957 this->chunks[0].store(ptr+sizeof(simd16<T>)*0/sizeof(T));
958 this->chunks[1].store(ptr+sizeof(simd16<T>)*1/sizeof(T));
959 this->chunks[2].store(ptr+sizeof(simd16<T>)*2/sizeof(T));
960 this->chunks[3].store(ptr+sizeof(simd16<T>)*3/sizeof(T));
961 }
962
reduce_orsimdutf::arm64::__anon13834::simd::simd16x32963 simdutf_really_inline simd16<T> reduce_or() const {
964 return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
965 }
966
is_asciisimdutf::arm64::__anon13834::simd::simd16x32967 simdutf_really_inline bool is_ascii() const {
968 return reduce_or().is_ascii();
969 }
970
store_ascii_as_utf16simdutf::arm64::__anon13834::simd::simd16x32971 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
972 this->chunks[0].store_ascii_as_utf16(ptr+sizeof(simd16<T>)*0);
973 this->chunks[1].store_ascii_as_utf16(ptr+sizeof(simd16<T>)*1);
974 this->chunks[2].store_ascii_as_utf16(ptr+sizeof(simd16<T>)*2);
975 this->chunks[3].store_ascii_as_utf16(ptr+sizeof(simd16<T>)*3);
976 }
977
to_bitmasksimdutf::arm64::__anon13834::simd::simd16x32978 simdutf_really_inline uint64_t to_bitmask() const {
979 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
980 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(
981 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
982 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
983 );
984 #else
985 const uint8x16_t bit_mask = {
986 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
987 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
988 };
989 #endif
990 // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
991 uint8x16_t sum0 = vpaddq_u8(vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)), vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask)));
992 uint8x16_t sum1 = vpaddq_u8(vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)), vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask)));
993 sum0 = vpaddq_u8(sum0, sum1);
994 sum0 = vpaddq_u8(sum0, sum0);
995 return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
996 }
997
swap_bytessimdutf::arm64::__anon13834::simd::simd16x32998 simdutf_really_inline void swap_bytes() {
999 this->chunks[0] = this->chunks[0].swap_bytes();
1000 this->chunks[1] = this->chunks[1].swap_bytes();
1001 this->chunks[2] = this->chunks[2].swap_bytes();
1002 this->chunks[3] = this->chunks[3].swap_bytes();
1003 }
1004
eqsimdutf::arm64::__anon13834::simd::simd16x321005 simdutf_really_inline uint64_t eq(const T m) const {
1006 const simd16<T> mask = simd16<T>::splat(m);
1007 return simd16x32<bool>(
1008 this->chunks[0] == mask,
1009 this->chunks[1] == mask,
1010 this->chunks[2] == mask,
1011 this->chunks[3] == mask
1012 ).to_bitmask();
1013 }
1014
lteqsimdutf::arm64::__anon13834::simd::simd16x321015 simdutf_really_inline uint64_t lteq(const T m) const {
1016 const simd16<T> mask = simd16<T>::splat(m);
1017 return simd16x32<bool>(
1018 this->chunks[0] <= mask,
1019 this->chunks[1] <= mask,
1020 this->chunks[2] <= mask,
1021 this->chunks[3] <= mask
1022 ).to_bitmask();
1023 }
1024
in_rangesimdutf::arm64::__anon13834::simd::simd16x321025 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
1026 const simd16<T> mask_low = simd16<T>::splat(low);
1027 const simd16<T> mask_high = simd16<T>::splat(high);
1028
1029 return simd16x32<bool>(
1030 (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
1031 (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
1032 (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
1033 (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
1034 ).to_bitmask();
1035 }
not_in_rangesimdutf::arm64::__anon13834::simd::simd16x321036 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
1037 const simd16<T> mask_low = simd16<T>::splat(low);
1038 const simd16<T> mask_high = simd16<T>::splat(high);
1039 return simd16x32<bool>(
1040 (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
1041 (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
1042 (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
1043 (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)
1044 ).to_bitmask();
1045 }
ltsimdutf::arm64::__anon13834::simd::simd16x321046 simdutf_really_inline uint64_t lt(const T m) const {
1047 const simd16<T> mask = simd16<T>::splat(m);
1048 return simd16x32<bool>(
1049 this->chunks[0] < mask,
1050 this->chunks[1] < mask,
1051 this->chunks[2] < mask,
1052 this->chunks[3] < mask
1053 ).to_bitmask();
1054 }
1055
1056 }; // struct simd16x32<T>
1057 template<>
not_in_range(const uint16_t low, const uint16_t high) const1058 simdutf_really_inline uint64_t simd16x32<uint16_t>::not_in_range(const uint16_t low, const uint16_t high) const {
1059 const simd16<uint16_t> mask_low = simd16<uint16_t>::splat(low);
1060 const simd16<uint16_t> mask_high = simd16<uint16_t>::splat(high);
1061 simd16x32<uint16_t> x(
1062 simd16<uint16_t>((this->chunks[0] > mask_high) | (this->chunks[0] < mask_low)),
1063 simd16<uint16_t>((this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)),
1064 simd16<uint16_t>((this->chunks[2] > mask_high) | (this->chunks[2] < mask_low)),
1065 simd16<uint16_t>((this->chunks[3] > mask_high) | (this->chunks[3] < mask_low))
1066 );
1067 return x.to_bitmask();
1068 }
1069 /* end file src/simdutf/arm64/simd16-inl.h */
1070 } // namespace simd
1071 } // unnamed namespace
1072 } // namespace arm64
1073 } // namespace simdutf
1074
1075 #endif // SIMDUTF_ARM64_SIMD_H
1076 /* end file src/simdutf/arm64/simd.h */
1077
1078 /* begin file src/simdutf/arm64/end.h */
1079 /* end file src/simdutf/arm64/end.h */
1080
1081 #endif // SIMDUTF_IMPLEMENTATION_ARM64
1082
1083 #endif // SIMDUTF_ARM64_H
1084 /* end file src/simdutf/arm64.h */
1085 /* begin file src/simdutf/icelake.h */
1086 #ifndef SIMDUTF_ICELAKE_H
1087 #define SIMDUTF_ICELAKE_H
1088
1089
1090
1091 #ifdef __has_include
1092 // How do we detect that a compiler supports vbmi2?
1093 // For sure if the following header is found, we are ok?
1094 #if __has_include(<avx512vbmi2intrin.h>)
1095 #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1
1096 #endif
1097 #endif
1098
1099 #ifdef _MSC_VER
1100 #if _MSC_VER >= 1930
1101 // Visual Studio 2022 and up support VBMI2 under x64 even if the header
1102 // avx512vbmi2intrin.h is not found.
1103 // Visual Studio 2019 technically supports VBMI2, but the implementation
1104 // might be unreliable. Search for visualstudio2019icelakeissue in our
1105 // tests.
1106 #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1
1107 #endif
1108 #endif
1109
1110 // We allow icelake on x64 as long as the compiler is known to support VBMI2.
1111 #ifndef SIMDUTF_IMPLEMENTATION_ICELAKE
1112 #define SIMDUTF_IMPLEMENTATION_ICELAKE ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2))
1113 #endif
1114
1115 // To see why (__BMI__) && (__LZCNT__) are not part of this next line, see
1116 // https://github.com/simdutf/simdutf/issues/1247
1117 #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && (SIMDUTF_HAS_AVX512F && \
1118 SIMDUTF_HAS_AVX512DQ && \
1119 SIMDUTF_HAS_AVX512VL && \
1120 SIMDUTF_HAS_AVX512VBMI2) && (!SIMDUTF_IS_32BITS))
1121
1122 #if SIMDUTF_IMPLEMENTATION_ICELAKE
1123 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
1124 #define SIMDUTF_TARGET_ICELAKE
1125 #else
1126 #define SIMDUTF_TARGET_ICELAKE SIMDUTF_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq")
1127 #endif
1128
1129 namespace simdutf {
1130 namespace icelake {
1131 } // namespace icelake
1132 } // namespace simdutf
1133
1134
1135
1136 //
1137 // These two need to be included outside SIMDUTF_TARGET_REGION
1138 //
1139 /* begin file src/simdutf/icelake/intrinsics.h */
1140 #ifndef SIMDUTF_ICELAKE_INTRINSICS_H
1141 #define SIMDUTF_ICELAKE_INTRINSICS_H
1142
1143
1144 #ifdef SIMDUTF_VISUAL_STUDIO
1145 // under clang within visual studio, this will include <x86intrin.h>
1146 #include <intrin.h> // visual studio or clang
1147 #include <immintrin.h>
1148 #else
1149
1150 #if SIMDUTF_GCC11ORMORE
1151 // We should not get warnings while including <x86intrin.h> yet we do
1152 // under some versions of GCC.
1153 // If the x86intrin.h header has uninitialized values that are problematic,
1154 // it is a GCC issue, we want to ignore these warnigns.
1155 SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
1156 #endif
1157
1158 #include <x86intrin.h> // elsewhere
1159
1160
1161 #if SIMDUTF_GCC11ORMORE
1162 // cancels the suppression of the -Wuninitialized
1163 SIMDUTF_POP_DISABLE_WARNINGS
1164 #endif
1165
1166 #ifndef _tzcnt_u64
1167 #define _tzcnt_u64(x) __tzcnt_u64(x)
1168 #endif // _tzcnt_u64
1169 #endif // SIMDUTF_VISUAL_STUDIO
1170
1171 #ifdef SIMDUTF_CLANG_VISUAL_STUDIO
1172 /**
1173 * You are not supposed, normally, to include these
1174 * headers directly. Instead you should either include intrin.h
1175 * or x86intrin.h. However, when compiling with clang
1176 * under Windows (i.e., when _MSC_VER is set), these headers
1177 * only get included *if* the corresponding features are detected
1178 * from macros:
1179 * e.g., if __AVX2__ is set... in turn, we normally set these
1180 * macros by compiling against the corresponding architecture
1181 * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
1182 * software with these advanced instructions. In simdutf, we
1183 * want to compile the whole program for a generic target,
1184 * and only target our specific kernels. As a workaround,
1185 * we directly include the needed headers. These headers would
1186 * normally guard against such usage, but we carefully included
1187 * <x86intrin.h> (or <intrin.h>) before, so the headers
1188 * are fooled.
1189 */
1190 #include <bmiintrin.h> // for _blsr_u64
1191 #include <bmi2intrin.h> // for _pext_u64, _pdep_u64
1192 #include <lzcntintrin.h> // for __lzcnt64
1193 #include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
1194 #include <smmintrin.h>
1195 #include <tmmintrin.h>
1196 #include <avxintrin.h>
1197 #include <avx2intrin.h>
1198 // Important: we need the AVX-512 headers:
1199 #include <avx512fintrin.h>
1200 #include <avx512dqintrin.h>
1201 #include <avx512cdintrin.h>
1202 #include <avx512bwintrin.h>
1203 #include <avx512vlintrin.h>
1204 #include <avx512vlbwintrin.h>
1205 #include <avx512vbmiintrin.h>
1206 #include <avx512vbmi2intrin.h>
1207 #include <avx512vpopcntdqintrin.h>
1208 #include <avx512vpopcntdqvlintrin.h>
1209 // unfortunately, we may not get _blsr_u64, but, thankfully, clang
1210 // has it as a macro.
1211 #ifndef _blsr_u64
1212 // we roll our own
1213 #define _blsr_u64(n) ((n - 1) & n)
1214 #endif // _blsr_u64
1215 #endif // SIMDUTF_CLANG_VISUAL_STUDIO
1216
1217
1218
1219 #if defined(__GNUC__) && !defined(__clang__)
1220
1221 #if __GNUC__ == 8
1222 #define SIMDUTF_GCC8 1
1223 #elif __GNUC__ == 9
1224 #define SIMDUTF_GCC9 1
1225 #endif // __GNUC__ == 8 || __GNUC__ == 9
1226
1227 #endif // defined(__GNUC__) && !defined(__clang__)
1228
1229 #if SIMDUTF_GCC8
1230 #pragma GCC push_options
1231 #pragma GCC target("avx512f")
1232 /**
1233 * GCC 8 fails to provide _mm512_set_epi8. We roll our own.
1234 */
_mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63)1235 inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) {
1236 return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56),
1237 uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56),
1238 uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56),
1239 uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56),
1240 uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56),
1241 uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56),
1242 uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56),
1243 uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56));
1244 }
1245 #pragma GCC pop_options
1246 #endif // SIMDUTF_GCC8
1247
1248 #endif // SIMDUTF_HASWELL_INTRINSICS_H
1249 /* end file src/simdutf/icelake/intrinsics.h */
1250 /* begin file src/simdutf/icelake/implementation.h */
1251 #ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H
1252 #define SIMDUTF_ICELAKE_IMPLEMENTATION_H
1253
1254
1255 namespace simdutf {
1256 namespace icelake {
1257
1258 namespace {
1259 using namespace simdutf;
1260 }
1261
1262 class implementation final : public simdutf::implementation {
1263 public:
implementation()1264 simdutf_really_inline implementation() : simdutf::implementation(
1265 "icelake",
1266 "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 extensions)",
1267 internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 | internal::instruction_set::AVX512VPOPCNTDQ ) {}
1268 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
1269 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1270 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1271 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1272 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1273 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1274 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1275 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1276 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1277 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1278 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1279 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1280 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1281 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1282 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1283 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1284 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1285 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1286 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1287 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1288 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1289 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1290 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1291 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1292 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1293 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1294 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1295 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1296 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1297 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1298 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1299 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1300 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1301 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1302 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1303 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1304 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1305 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1306 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1307 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1308 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1309 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1310 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1311 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1312 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1313 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1314 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1315 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1316 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1317 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1318 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1319 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1320 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1321 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1322 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1323 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1324 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1325 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1326 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1327 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1328 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1329 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1330 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1331 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1332 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1333 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
1334 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1335 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1336 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
1337 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
1338 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
1339 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
1340 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
1341 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
1342 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
1343 };
1344
1345 } // namespace icelake
1346 } // namespace simdutf
1347
1348 #endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H
1349 /* end file src/simdutf/icelake/implementation.h */
1350
1351 //
1352 // The rest need to be inside the region
1353 //
1354 /* begin file src/simdutf/icelake/begin.h */
1355 // redefining SIMDUTF_IMPLEMENTATION to "icelake"
1356 // #define SIMDUTF_IMPLEMENTATION icelake
1357
1358 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
1359 // nothing needed.
1360 #else
1361 SIMDUTF_TARGET_ICELAKE
1362 #endif
1363
1364 #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
1365 SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
1366 #endif // end of workaround
1367 /* end file src/simdutf/icelake/begin.h */
1368 // Declarations
1369 /* begin file src/simdutf/icelake/bitmanipulation.h */
1370 #ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H
1371 #define SIMDUTF_ICELAKE_BITMANIPULATION_H
1372
1373 namespace simdutf {
1374 namespace icelake {
1375 namespace {
1376
1377 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
count_ones(uint64_t input_num)1378 simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) {
1379 // note: we do not support legacy 32-bit Windows
1380 return __popcnt64(input_num);// Visual Studio wants two underscores
1381 }
1382 #else
1383 simdutf_really_inline long long int count_ones(uint64_t input_num) {
1384 return _popcnt64(input_num);
1385 }
1386 #endif
1387
1388 } // unnamed namespace
1389 } // namespace icelake
1390 } // namespace simdutf
1391
1392 #endif // SIMDUTF_ICELAKE_BITMANIPULATION_H
1393 /* end file src/simdutf/icelake/bitmanipulation.h */
1394 /* begin file src/simdutf/icelake/end.h */
1395 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
1396 // nothing needed.
1397 #else
1398 SIMDUTF_UNTARGET_REGION
1399 #endif
1400
1401
1402 #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
1403 SIMDUTF_POP_DISABLE_WARNINGS
1404 #endif // end of workaround
1405 /* end file src/simdutf/icelake/end.h */
1406
1407
1408
1409 #endif // SIMDUTF_IMPLEMENTATION_ICELAKE
1410 #endif // SIMDUTF_ICELAKE_H
1411 /* end file src/simdutf/icelake.h */
1412 /* begin file src/simdutf/haswell.h */
1413 #ifndef SIMDUTF_HASWELL_H
1414 #define SIMDUTF_HASWELL_H
1415
1416 #ifdef SIMDUTF_WESTMERE_H
1417 #error "haswell.h must be included before westmere.h"
1418 #endif
1419 #ifdef SIMDUTF_FALLBACK_H
1420 #error "haswell.h must be included before fallback.h"
1421 #endif
1422
1423
1424 // Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected
1425 // at runtime.
1426 #ifndef SIMDUTF_IMPLEMENTATION_HASWELL
1427 //
1428 // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__
1429 // because we want to rely on *runtime dispatch*.
1430 //
1431 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
1432 #define SIMDUTF_IMPLEMENTATION_HASWELL 0
1433 #else
1434 #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64)
1435 #endif
1436
1437 #endif
1438 // To see why (__BMI__) && (__LZCNT__) are not part of this next line, see
1439 // https://github.com/simdutf/simdutf/issues/1247
1440 #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__))
1441
1442 #if SIMDUTF_IMPLEMENTATION_HASWELL
1443
1444 #define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt,popcnt")
1445
1446 namespace simdutf {
1447 /**
1448 * Implementation for Haswell (Intel AVX2).
1449 */
1450 namespace haswell {
1451 } // namespace haswell
1452 } // namespace simdutf
1453
1454 //
1455 // These two need to be included outside SIMDUTF_TARGET_REGION
1456 //
1457 /* begin file src/simdutf/haswell/implementation.h */
1458 #ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H
1459 #define SIMDUTF_HASWELL_IMPLEMENTATION_H
1460
1461
1462 // The constructor may be executed on any host, so we take care not to use SIMDUTF_TARGET_REGION
1463 namespace simdutf {
1464 namespace haswell {
1465
1466 using namespace simdutf;
1467
1468 class implementation final : public simdutf::implementation {
1469 public:
implementation()1470 simdutf_really_inline implementation() : simdutf::implementation(
1471 "haswell",
1472 "Intel/AMD AVX2",
1473 internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2
1474 ) {}
1475 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
1476 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1477 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1478 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1479 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1480 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1481 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1482 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1483 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1484 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1485 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1486 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1487 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1488 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1489 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1490 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1491 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1492 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1493 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1494 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1495 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1496 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1497 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1498 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1499 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1500 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1501 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1502 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1503 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1504 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1505 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1506 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1507 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1508 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1509 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1510 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1511 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1512 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1513 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1514 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1515 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1516 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1517 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1518 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1519 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1520 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1521 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1522 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1523 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1524 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1525 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1526 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1527 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1528 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1529 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1530 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1531 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1532 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1533 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1534 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1535 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1536 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1537 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1538 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1539 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1540 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
1541 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1542 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1543 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
1544 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
1545 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
1546 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
1547 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
1548 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
1549 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
1550 };
1551
1552 } // namespace haswell
1553 } // namespace simdutf
1554
1555 #endif // SIMDUTF_HASWELL_IMPLEMENTATION_H
1556 /* end file src/simdutf/haswell/implementation.h */
1557 /* begin file src/simdutf/haswell/intrinsics.h */
1558 #ifndef SIMDUTF_HASWELL_INTRINSICS_H
1559 #define SIMDUTF_HASWELL_INTRINSICS_H
1560
1561
1562 #ifdef SIMDUTF_VISUAL_STUDIO
1563 // under clang within visual studio, this will include <x86intrin.h>
1564 #include <intrin.h> // visual studio or clang
1565 #else
1566
1567 #if SIMDUTF_GCC11ORMORE
1568 // We should not get warnings while including <x86intrin.h> yet we do
1569 // under some versions of GCC.
1570 // If the x86intrin.h header has uninitialized values that are problematic,
1571 // it is a GCC issue, we want to ignore these warnigns.
1572 SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
1573 #endif
1574
1575 #include <x86intrin.h> // elsewhere
1576
1577
1578 #if SIMDUTF_GCC11ORMORE
1579 // cancels the suppression of the -Wuninitialized
1580 SIMDUTF_POP_DISABLE_WARNINGS
1581 #endif
1582
1583 #endif // SIMDUTF_VISUAL_STUDIO
1584
1585 #ifdef SIMDUTF_CLANG_VISUAL_STUDIO
1586 /**
1587 * You are not supposed, normally, to include these
1588 * headers directly. Instead you should either include intrin.h
1589 * or x86intrin.h. However, when compiling with clang
1590 * under Windows (i.e., when _MSC_VER is set), these headers
1591 * only get included *if* the corresponding features are detected
1592 * from macros:
1593 * e.g., if __AVX2__ is set... in turn, we normally set these
1594 * macros by compiling against the corresponding architecture
1595 * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
1596 * software with these advanced instructions. In simdutf, we
1597 * want to compile the whole program for a generic target,
1598 * and only target our specific kernels. As a workaround,
1599 * we directly include the needed headers. These headers would
1600 * normally guard against such usage, but we carefully included
1601 * <x86intrin.h> (or <intrin.h>) before, so the headers
1602 * are fooled.
1603 */
1604 #include <bmiintrin.h> // for _blsr_u64
1605 #include <lzcntintrin.h> // for __lzcnt64
1606 #include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
1607 #include <smmintrin.h>
1608 #include <tmmintrin.h>
1609 #include <avxintrin.h>
1610 #include <avx2intrin.h>
1611 // unfortunately, we may not get _blsr_u64, but, thankfully, clang
1612 // has it as a macro.
1613 #ifndef _blsr_u64
1614 // we roll our own
1615 #define _blsr_u64(n) ((n - 1) & n)
1616 #endif // _blsr_u64
1617 #endif // SIMDUTF_CLANG_VISUAL_STUDIO
1618
1619 #endif // SIMDUTF_HASWELL_INTRINSICS_H
1620 /* end file src/simdutf/haswell/intrinsics.h */
1621
1622 //
1623 // The rest need to be inside the region
1624 //
1625 /* begin file src/simdutf/haswell/begin.h */
1626 // redefining SIMDUTF_IMPLEMENTATION to "haswell"
1627 // #define SIMDUTF_IMPLEMENTATION haswell
1628
1629 #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL
1630 // nothing needed.
1631 #else
1632 SIMDUTF_TARGET_HASWELL
1633 #endif
1634
1635 #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
1636 SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
1637 #endif // end of workaround
1638 /* end file src/simdutf/haswell/begin.h */
1639 // Declarations
1640 /* begin file src/simdutf/haswell/bitmanipulation.h */
1641 #ifndef SIMDUTF_HASWELL_BITMANIPULATION_H
1642 #define SIMDUTF_HASWELL_BITMANIPULATION_H
1643
1644 namespace simdutf {
1645 namespace haswell {
1646 namespace {
1647
1648 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
count_ones(uint64_t input_num)1649 simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) {
1650 // note: we do not support legacy 32-bit Windows
1651 return __popcnt64(input_num);// Visual Studio wants two underscores
1652 }
1653 #else
1654 simdutf_really_inline long long int count_ones(uint64_t input_num) {
1655 return _popcnt64(input_num);
1656 }
1657 #endif
1658
1659 } // unnamed namespace
1660 } // namespace haswell
1661 } // namespace simdutf
1662
1663 #endif // SIMDUTF_HASWELL_BITMANIPULATION_H
1664 /* end file src/simdutf/haswell/bitmanipulation.h */
1665 /* begin file src/simdutf/haswell/simd.h */
1666 #ifndef SIMDUTF_HASWELL_SIMD_H
1667 #define SIMDUTF_HASWELL_SIMD_H
1668
1669
1670 namespace simdutf {
1671 namespace haswell {
1672 namespace {
1673 namespace simd {
1674
1675 // Forward-declared so they can be used by splat and friends.
1676 template<typename Child>
1677 struct base {
1678 __m256i value;
1679
1680 // Zero constructor
basesimdutf::haswell::__anon13839::simd::base1681 simdutf_really_inline base() : value{__m256i()} {}
1682
1683 // Conversion from SIMD register
basesimdutf::haswell::__anon13839::simd::base1684 simdutf_really_inline base(const __m256i _value) : value(_value) {}
1685 // Conversion to SIMD register
operator const __m256i&simdutf::haswell::__anon13839::simd::base1686 simdutf_really_inline operator const __m256i&() const { return this->value; }
operator __m256i&simdutf::haswell::__anon13839::simd::base1687 simdutf_really_inline operator __m256i&() { return this->value; }
1688 template <endianness big_endian>
store_ascii_as_utf16simdutf::haswell::__anon13839::simd::base1689 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
1690 __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this));
1691 __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this,1));
1692 if (big_endian) {
1693 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
1694 17, 16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
1695 first = _mm256_shuffle_epi8(first, swap);
1696 second = _mm256_shuffle_epi8(second, swap);
1697 }
1698 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first);
1699 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second);
1700 }
store_ascii_as_utf32simdutf::haswell::__anon13839::simd::base1701 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
1702 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this)));
1703 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr+8), _mm256_cvtepu8_epi32(_mm256_castsi256_si128(_mm256_srli_si256(*this,8))));
1704 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this,1)));
1705 _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24), _mm256_cvtepu8_epi32(_mm_srli_si128(_mm256_extractf128_si256(*this,1),8)));
1706 }
1707 // Bit operations
operator |simdutf::haswell::__anon13839::simd::base1708 simdutf_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); }
operator &simdutf::haswell::__anon13839::simd::base1709 simdutf_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
operator ^simdutf::haswell::__anon13839::simd::base1710 simdutf_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
bit_andnotsimdutf::haswell::__anon13839::simd::base1711 simdutf_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
operator |=simdutf::haswell::__anon13839::simd::base1712 simdutf_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
operator &=simdutf::haswell::__anon13839::simd::base1713 simdutf_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
operator ^=simdutf::haswell::__anon13839::simd::base1714 simdutf_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
1715 };
1716
1717 // Forward-declared so they can be used by splat and friends.
1718 template<typename T>
1719 struct simd8;
1720
1721 template<typename T, typename Mask=simd8<bool>>
1722 struct base8: base<simd8<T>> {
1723 typedef uint32_t bitmask_t;
1724 typedef uint64_t bitmask2_t;
1725
base8simdutf::haswell::__anon13839::simd::base81726 simdutf_really_inline base8() : base<simd8<T>>() {}
base8simdutf::haswell::__anon13839::simd::base81727 simdutf_really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
firstsimdutf::haswell::__anon13839::simd::base81728 simdutf_really_inline T first() const { return _mm256_extract_epi8(*this,0); }
lastsimdutf::haswell::__anon13839::simd::base81729 simdutf_really_inline T last() const { return _mm256_extract_epi8(*this,31); }
operator ==simdutf::haswell::__anon13839::simd::base81730 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); }
1731
1732 static const int SIZE = sizeof(base<T>::value);
1733
1734 template<int N=1>
prevsimdutf::haswell::__anon13839::simd::base81735 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
1736 return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N);
1737 }
1738 };
1739
1740 // SIMD byte mask type (returned by things like eq and gt)
1741 template<>
1742 struct simd8<bool>: base8<bool> {
splatsimdutf::haswell::__anon13839::simd::simd81743 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
1744
simd8simdutf::haswell::__anon13839::simd::simd81745 simdutf_really_inline simd8<bool>() : base8() {}
simd8simdutf::haswell::__anon13839::simd::simd81746 simdutf_really_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
1747 // Splat constructor
simd8simdutf::haswell::__anon13839::simd::simd81748 simdutf_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
1749
to_bitmasksimdutf::haswell::__anon13839::simd::simd81750 simdutf_really_inline uint32_t to_bitmask() const { return uint32_t(_mm256_movemask_epi8(*this)); }
anysimdutf::haswell::__anon13839::simd::simd81751 simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
nonesimdutf::haswell::__anon13839::simd::simd81752 simdutf_really_inline bool none() const { return _mm256_testz_si256(*this, *this); }
allsimdutf::haswell::__anon13839::simd::simd81753 simdutf_really_inline bool all() const { return static_cast<uint32_t>(_mm256_movemask_epi8(*this)) == 0xFFFFFFFF; }
operator ~simdutf::haswell::__anon13839::simd::simd81754 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
1755 };
1756
1757 template<typename T>
1758 struct base8_numeric: base8<T> {
splatsimdutf::haswell::__anon13839::simd::base8_numeric1759 static simdutf_really_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
zerosimdutf::haswell::__anon13839::simd::base8_numeric1760 static simdutf_really_inline simd8<T> zero() { return _mm256_setzero_si256(); }
loadsimdutf::haswell::__anon13839::simd::base8_numeric1761 static simdutf_really_inline simd8<T> load(const T values[32]) {
1762 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
1763 }
1764 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::haswell::__anon13839::simd::base8_numeric1765 static simdutf_really_inline simd8<T> repeat_16(
1766 T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
1767 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
1768 ) {
1769 return simd8<T>(
1770 v0, v1, v2, v3, v4, v5, v6, v7,
1771 v8, v9, v10,v11,v12,v13,v14,v15,
1772 v0, v1, v2, v3, v4, v5, v6, v7,
1773 v8, v9, v10,v11,v12,v13,v14,v15
1774 );
1775 }
1776
base8_numericsimdutf::haswell::__anon13839::simd::base8_numeric1777 simdutf_really_inline base8_numeric() : base8<T>() {}
base8_numericsimdutf::haswell::__anon13839::simd::base8_numeric1778 simdutf_really_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
1779
1780 // Store to array
storesimdutf::haswell::__anon13839::simd::base8_numeric1781 simdutf_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
1782
1783 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::haswell::__anon13839::simd::base8_numeric1784 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
operator -simdutf::haswell::__anon13839::simd::base8_numeric1785 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
operator +=simdutf::haswell::__anon13839::simd::base8_numeric1786 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
operator -=simdutf::haswell::__anon13839::simd::base8_numeric1787 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
1788
1789 // Override to distinguish from bool version
operator ~simdutf::haswell::__anon13839::simd::base8_numeric1790 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
1791
1792 // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
1793 template<typename L>
lookup_16simdutf::haswell::__anon13839::simd::base8_numeric1794 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
1795 return _mm256_shuffle_epi8(lookup_table, *this);
1796 }
1797
1798 template<typename L>
lookup_16simdutf::haswell::__anon13839::simd::base8_numeric1799 simdutf_really_inline simd8<L> lookup_16(
1800 L replace0, L replace1, L replace2, L replace3,
1801 L replace4, L replace5, L replace6, L replace7,
1802 L replace8, L replace9, L replace10, L replace11,
1803 L replace12, L replace13, L replace14, L replace15) const {
1804 return lookup_16(simd8<L>::repeat_16(
1805 replace0, replace1, replace2, replace3,
1806 replace4, replace5, replace6, replace7,
1807 replace8, replace9, replace10, replace11,
1808 replace12, replace13, replace14, replace15
1809 ));
1810 }
1811 };
1812
1813
1814 // Signed bytes
1815 template<>
1816 struct simd8<int8_t> : base8_numeric<int8_t> {
simd8simdutf::haswell::__anon13839::simd::simd81817 simdutf_really_inline simd8() : base8_numeric<int8_t>() {}
simd8simdutf::haswell::__anon13839::simd::simd81818 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
1819
1820 // Splat constructor
simd8simdutf::haswell::__anon13839::simd::simd81821 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
1822 // Array constructor
simd8simdutf::haswell::__anon13839::simd::simd81823 simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
1824 simdutf_really_inline operator simd8<uint8_t>() const;
1825 // Member-by-member initialization
simd8simdutf::haswell::__anon13839::simd::simd81826 simdutf_really_inline simd8(
1827 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
1828 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
1829 int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
1830 int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31
1831 ) : simd8(_mm256_setr_epi8(
1832 v0, v1, v2, v3, v4, v5, v6, v7,
1833 v8, v9, v10,v11,v12,v13,v14,v15,
1834 v16,v17,v18,v19,v20,v21,v22,v23,
1835 v24,v25,v26,v27,v28,v29,v30,v31
1836 )) {}
1837 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::haswell::__anon13839::simd::simd81838 simdutf_really_inline static simd8<int8_t> repeat_16(
1839 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
1840 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
1841 ) {
1842 return simd8<int8_t>(
1843 v0, v1, v2, v3, v4, v5, v6, v7,
1844 v8, v9, v10,v11,v12,v13,v14,v15,
1845 v0, v1, v2, v3, v4, v5, v6, v7,
1846 v8, v9, v10,v11,v12,v13,v14,v15
1847 );
1848 }
is_asciisimdutf::haswell::__anon13839::simd::simd81849 simdutf_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
1850 // Order-sensitive comparisons
max_valsimdutf::haswell::__anon13839::simd::simd81851 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
min_valsimdutf::haswell::__anon13839::simd::simd81852 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
operator >simdutf::haswell::__anon13839::simd::simd81853 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
operator <simdutf::haswell::__anon13839::simd::simd81854 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
1855 };
1856
1857 // Unsigned bytes
1858 template<>
1859 struct simd8<uint8_t>: base8_numeric<uint8_t> {
simd8simdutf::haswell::__anon13839::simd::simd81860 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
simd8simdutf::haswell::__anon13839::simd::simd81861 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
1862 // Splat constructor
simd8simdutf::haswell::__anon13839::simd::simd81863 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
1864 // Array constructor
simd8simdutf::haswell::__anon13839::simd::simd81865 simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
1866 // Member-by-member initialization
simd8simdutf::haswell::__anon13839::simd::simd81867 simdutf_really_inline simd8(
1868 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
1869 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
1870 uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
1871 uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31
1872 ) : simd8(_mm256_setr_epi8(
1873 v0, v1, v2, v3, v4, v5, v6, v7,
1874 v8, v9, v10,v11,v12,v13,v14,v15,
1875 v16,v17,v18,v19,v20,v21,v22,v23,
1876 v24,v25,v26,v27,v28,v29,v30,v31
1877 )) {}
1878 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::haswell::__anon13839::simd::simd81879 simdutf_really_inline static simd8<uint8_t> repeat_16(
1880 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
1881 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
1882 ) {
1883 return simd8<uint8_t>(
1884 v0, v1, v2, v3, v4, v5, v6, v7,
1885 v8, v9, v10,v11,v12,v13,v14,v15,
1886 v0, v1, v2, v3, v4, v5, v6, v7,
1887 v8, v9, v10,v11,v12,v13,v14,v15
1888 );
1889 }
1890
1891
1892 // Saturated math
saturating_addsimdutf::haswell::__anon13839::simd::simd81893 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
saturating_subsimdutf::haswell::__anon13839::simd::simd81894 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
1895
1896 // Order-specific operations
max_valsimdutf::haswell::__anon13839::simd::simd81897 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
min_valsimdutf::haswell::__anon13839::simd::simd81898 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
1899 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::haswell::__anon13839::simd::simd81900 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
1901 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::haswell::__anon13839::simd::simd81902 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::haswell::__anon13839::simd::simd81903 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
operator >=simdutf::haswell::__anon13839::simd::simd81904 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
operator >simdutf::haswell::__anon13839::simd::simd81905 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
operator <simdutf::haswell::__anon13839::simd::simd81906 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
1907
1908 // Bit-specific operations
bits_not_setsimdutf::haswell::__anon13839::simd::simd81909 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
bits_not_setsimdutf::haswell::__anon13839::simd::simd81910 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
any_bits_setsimdutf::haswell::__anon13839::simd::simd81911 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
any_bits_setsimdutf::haswell::__anon13839::simd::simd81912 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
is_asciisimdutf::haswell::__anon13839::simd::simd81913 simdutf_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
bits_not_set_anywheresimdutf::haswell::__anon13839::simd::simd81914 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
any_bits_set_anywheresimdutf::haswell::__anon13839::simd::simd81915 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
bits_not_set_anywheresimdutf::haswell::__anon13839::simd::simd81916 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
any_bits_set_anywheresimdutf::haswell::__anon13839::simd::simd81917 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
1918 template<int N>
shrsimdutf::haswell::__anon13839::simd::simd81919 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
1920 template<int N>
shlsimdutf::haswell::__anon13839::simd::simd81921 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
1922 // Get one of the bits and make a bitmask out of it.
1923 // e.g. value.get_bit<7>() gets the high bit
1924 template<int N>
get_bitsimdutf::haswell::__anon13839::simd::simd81925 simdutf_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
1926 };
operator simd8<uint8_t>() const1927 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
1928
1929
1930 template<typename T>
1931 struct simd8x64 {
1932 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
1933 static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block.");
1934 simd8<T> chunks[NUM_CHUNKS];
1935
1936 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
1937 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
1938 simd8x64() = delete; // no default constructor allowed
1939
simd8x64simdutf::haswell::__anon13839::simd::simd8x641940 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
simd8x64simdutf::haswell::__anon13839::simd::simd8x641941 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T))} {}
1942
storesimdutf::haswell::__anon13839::simd::simd8x641943 simdutf_really_inline void store(T* ptr) const {
1944 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
1945 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
1946 }
1947
to_bitmasksimdutf::haswell::__anon13839::simd::simd8x641948 simdutf_really_inline uint64_t to_bitmask() const {
1949 uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
1950 uint64_t r_hi = this->chunks[1].to_bitmask();
1951 return r_lo | (r_hi << 32);
1952 }
1953
operator |=simdutf::haswell::__anon13839::simd::simd8x641954 simdutf_really_inline simd8x64<T>& operator|=(const simd8x64<T> &other) {
1955 this->chunks[0] |= other.chunks[0];
1956 this->chunks[1] |= other.chunks[1];
1957 return *this;
1958 }
1959
reduce_orsimdutf::haswell::__anon13839::simd::simd8x641960 simdutf_really_inline simd8<T> reduce_or() const {
1961 return this->chunks[0] | this->chunks[1];
1962 }
1963
is_asciisimdutf::haswell::__anon13839::simd::simd8x641964 simdutf_really_inline bool is_ascii() const {
1965 return this->reduce_or().is_ascii();
1966 }
1967
1968 template <endianness endian>
store_ascii_as_utf16simdutf::haswell::__anon13839::simd::simd8x641969 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
1970 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
1971 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
1972 }
1973
store_ascii_as_utf32simdutf::haswell::__anon13839::simd::simd8x641974 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
1975 this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*0);
1976 this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*1);
1977 }
1978
bit_orsimdutf::haswell::__anon13839::simd::simd8x641979 simdutf_really_inline simd8x64<T> bit_or(const T m) const {
1980 const simd8<T> mask = simd8<T>::splat(m);
1981 return simd8x64<T>(
1982 this->chunks[0] | mask,
1983 this->chunks[1] | mask
1984 );
1985 }
1986
eqsimdutf::haswell::__anon13839::simd::simd8x641987 simdutf_really_inline uint64_t eq(const T m) const {
1988 const simd8<T> mask = simd8<T>::splat(m);
1989 return simd8x64<bool>(
1990 this->chunks[0] == mask,
1991 this->chunks[1] == mask
1992 ).to_bitmask();
1993 }
1994
eqsimdutf::haswell::__anon13839::simd::simd8x641995 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
1996 return simd8x64<bool>(
1997 this->chunks[0] == other.chunks[0],
1998 this->chunks[1] == other.chunks[1]
1999 ).to_bitmask();
2000 }
2001
lteqsimdutf::haswell::__anon13839::simd::simd8x642002 simdutf_really_inline uint64_t lteq(const T m) const {
2003 const simd8<T> mask = simd8<T>::splat(m);
2004 return simd8x64<bool>(
2005 this->chunks[0] <= mask,
2006 this->chunks[1] <= mask
2007 ).to_bitmask();
2008 }
2009
in_rangesimdutf::haswell::__anon13839::simd::simd8x642010 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2011 const simd8<T> mask_low = simd8<T>::splat(low);
2012 const simd8<T> mask_high = simd8<T>::splat(high);
2013
2014 return simd8x64<bool>(
2015 (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
2016 (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)
2017 ).to_bitmask();
2018 }
not_in_rangesimdutf::haswell::__anon13839::simd::simd8x642019 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2020 const simd8<T> mask_low = simd8<T>::splat(low);
2021 const simd8<T> mask_high = simd8<T>::splat(high);
2022 return simd8x64<bool>(
2023 (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
2024 (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)
2025 ).to_bitmask();
2026 }
ltsimdutf::haswell::__anon13839::simd::simd8x642027 simdutf_really_inline uint64_t lt(const T m) const {
2028 const simd8<T> mask = simd8<T>::splat(m);
2029 return simd8x64<bool>(
2030 this->chunks[0] < mask,
2031 this->chunks[1] < mask
2032 ).to_bitmask();
2033 }
2034
gtsimdutf::haswell::__anon13839::simd::simd8x642035 simdutf_really_inline uint64_t gt(const T m) const {
2036 const simd8<T> mask = simd8<T>::splat(m);
2037 return simd8x64<bool>(
2038 this->chunks[0] > mask,
2039 this->chunks[1] > mask
2040 ).to_bitmask();
2041 }
gteqsimdutf::haswell::__anon13839::simd::simd8x642042 simdutf_really_inline uint64_t gteq(const T m) const {
2043 const simd8<T> mask = simd8<T>::splat(m);
2044 return simd8x64<bool>(
2045 this->chunks[0] >= mask,
2046 this->chunks[1] >= mask
2047 ).to_bitmask();
2048 }
gteq_unsignedsimdutf::haswell::__anon13839::simd::simd8x642049 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
2050 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
2051 return simd8x64<bool>(
2052 (simd8<uint8_t>(__m256i(this->chunks[0])) >= mask),
2053 (simd8<uint8_t>(__m256i(this->chunks[1])) >= mask)
2054 ).to_bitmask();
2055 }
2056 }; // struct simd8x64<T>
2057
2058 /* begin file src/simdutf/haswell/simd16-inl.h */
2059 #ifdef __GNUC__
2060 #if __GNUC__ < 8
2061 #define _mm256_set_m128i(xmm1, xmm2) _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), _mm256_castsi128_si256(xmm2), 2)
2062 #define _mm256_setr_m128i(xmm2, xmm1) _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), _mm256_castsi128_si256(xmm2), 2)
2063 #endif
2064 #endif
2065
2066 template<typename T>
2067 struct simd16;
2068
2069 template<typename T, typename Mask=simd16<bool>>
2070 struct base16: base<simd16<T>> {
2071 using bitmask_type = uint32_t;
2072
base16simdutf::haswell::__anon13839::simd::base162073 simdutf_really_inline base16() : base<simd16<T>>() {}
base16simdutf::haswell::__anon13839::simd::base162074 simdutf_really_inline base16(const __m256i _value) : base<simd16<T>>(_value) {}
2075 template <typename Pointer>
base16simdutf::haswell::__anon13839::simd::base162076 simdutf_really_inline base16(const Pointer* ptr) : base16(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr))) {}
operator ==simdutf::haswell::__anon13839::simd::base162077 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return _mm256_cmpeq_epi16(lhs, rhs); }
2078
2079 /// the size of vector in bytes
2080 static const int SIZE = sizeof(base<simd16<T>>::value);
2081
2082 /// the number of elements of type T a vector can hold
2083 static const int ELEMENTS = SIZE / sizeof(T);
2084
2085 template<int N=1>
prevsimdutf::haswell::__anon13839::simd::base162086 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
2087 return _mm256_alignr_epi8(*this, prev_chunk, 16 - N);
2088 }
2089 };
2090
2091 // SIMD byte mask type (returned by things like eq and gt)
2092 template<>
2093 struct simd16<bool>: base16<bool> {
splatsimdutf::haswell::__anon13839::simd::simd162094 static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm256_set1_epi16(uint16_t(-(!!_value))); }
2095
simd16simdutf::haswell::__anon13839::simd::simd162096 simdutf_really_inline simd16<bool>() : base16() {}
simd16simdutf::haswell::__anon13839::simd::simd162097 simdutf_really_inline simd16<bool>(const __m256i _value) : base16<bool>(_value) {}
2098 // Splat constructor
simd16simdutf::haswell::__anon13839::simd::simd162099 simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
2100
to_bitmasksimdutf::haswell::__anon13839::simd::simd162101 simdutf_really_inline bitmask_type to_bitmask() const { return _mm256_movemask_epi8(*this); }
anysimdutf::haswell::__anon13839::simd::simd162102 simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
operator ~simdutf::haswell::__anon13839::simd::simd162103 simdutf_really_inline simd16<bool> operator~() const { return *this ^ true; }
2104 };
2105
2106 template<typename T>
2107 struct base16_numeric: base16<T> {
splatsimdutf::haswell::__anon13839::simd::base16_numeric2108 static simdutf_really_inline simd16<T> splat(T _value) { return _mm256_set1_epi16(_value); }
zerosimdutf::haswell::__anon13839::simd::base16_numeric2109 static simdutf_really_inline simd16<T> zero() { return _mm256_setzero_si256(); }
loadsimdutf::haswell::__anon13839::simd::base16_numeric2110 static simdutf_really_inline simd16<T> load(const T values[8]) {
2111 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
2112 }
2113
base16_numericsimdutf::haswell::__anon13839::simd::base16_numeric2114 simdutf_really_inline base16_numeric() : base16<T>() {}
base16_numericsimdutf::haswell::__anon13839::simd::base16_numeric2115 simdutf_really_inline base16_numeric(const __m256i _value) : base16<T>(_value) {}
2116
2117 // Store to array
storesimdutf::haswell::__anon13839::simd::base16_numeric2118 simdutf_really_inline void store(T dst[8]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
2119
2120 // Override to distinguish from bool version
operator ~simdutf::haswell::__anon13839::simd::base16_numeric2121 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFFFu; }
2122
2123 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::haswell::__anon13839::simd::base16_numeric2124 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return _mm256_add_epi16(*this, other); }
operator -simdutf::haswell::__anon13839::simd::base16_numeric2125 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return _mm256_sub_epi16(*this, other); }
operator +=simdutf::haswell::__anon13839::simd::base16_numeric2126 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
operator -=simdutf::haswell::__anon13839::simd::base16_numeric2127 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
2128 };
2129
2130 // Signed code units
2131 template<>
2132 struct simd16<int16_t> : base16_numeric<int16_t> {
simd16simdutf::haswell::__anon13839::simd::simd162133 simdutf_really_inline simd16() : base16_numeric<int16_t>() {}
simd16simdutf::haswell::__anon13839::simd::simd162134 simdutf_really_inline simd16(const __m256i _value) : base16_numeric<int16_t>(_value) {}
2135 // Splat constructor
simd16simdutf::haswell::__anon13839::simd::simd162136 simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
2137 // Array constructor
simd16simdutf::haswell::__anon13839::simd::simd162138 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
simd16simdutf::haswell::__anon13839::simd::simd162139 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
2140 // Order-sensitive comparisons
max_valsimdutf::haswell::__anon13839::simd::simd162141 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return _mm256_max_epi16(*this, other); }
min_valsimdutf::haswell::__anon13839::simd::simd162142 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return _mm256_min_epi16(*this, other); }
operator >simdutf::haswell::__anon13839::simd::simd162143 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return _mm256_cmpgt_epi16(*this, other); }
operator <simdutf::haswell::__anon13839::simd::simd162144 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return _mm256_cmpgt_epi16(other, *this); }
2145 };
2146
2147 // Unsigned code units
2148 template<>
2149 struct simd16<uint16_t>: base16_numeric<uint16_t> {
simd16simdutf::haswell::__anon13839::simd::simd162150 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
simd16simdutf::haswell::__anon13839::simd::simd162151 simdutf_really_inline simd16(const __m256i _value) : base16_numeric<uint16_t>(_value) {}
2152
2153 // Splat constructor
simd16simdutf::haswell::__anon13839::simd::simd162154 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
2155 // Array constructor
simd16simdutf::haswell::__anon13839::simd::simd162156 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
simd16simdutf::haswell::__anon13839::simd::simd162157 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
2158
2159 // Saturated math
saturating_addsimdutf::haswell::__anon13839::simd::simd162160 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm256_adds_epu16(*this, other); }
saturating_subsimdutf::haswell::__anon13839::simd::simd162161 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm256_subs_epu16(*this, other); }
2162
2163 // Order-specific operations
max_valsimdutf::haswell::__anon13839::simd::simd162164 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm256_max_epu16(*this, other); }
min_valsimdutf::haswell::__anon13839::simd::simd162165 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm256_min_epu16(*this, other); }
2166 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::haswell::__anon13839::simd::simd162167 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
2168 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::haswell::__anon13839::simd::simd162169 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::haswell::__anon13839::simd::simd162170 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
operator >=simdutf::haswell::__anon13839::simd::simd162171 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
operator >simdutf::haswell::__anon13839::simd::simd162172 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
operator <simdutf::haswell::__anon13839::simd::simd162173 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
2174
2175 // Bit-specific operations
bits_not_setsimdutf::haswell::__anon13839::simd::simd162176 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
bits_not_setsimdutf::haswell::__anon13839::simd::simd162177 simdutf_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const { return (*this & bits).bits_not_set(); }
any_bits_setsimdutf::haswell::__anon13839::simd::simd162178 simdutf_really_inline simd16<bool> any_bits_set() const { return ~this->bits_not_set(); }
any_bits_setsimdutf::haswell::__anon13839::simd::simd162179 simdutf_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const { return ~this->bits_not_set(bits); }
2180
bits_not_set_anywheresimdutf::haswell::__anon13839::simd::simd162181 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
any_bits_set_anywheresimdutf::haswell::__anon13839::simd::simd162182 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
bits_not_set_anywheresimdutf::haswell::__anon13839::simd::simd162183 simdutf_really_inline bool bits_not_set_anywhere(simd16<uint16_t> bits) const { return _mm256_testz_si256(*this, bits); }
any_bits_set_anywheresimdutf::haswell::__anon13839::simd::simd162184 simdutf_really_inline bool any_bits_set_anywhere(simd16<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2185 template<int N>
shrsimdutf::haswell::__anon13839::simd::simd162186 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(_mm256_srli_epi16(*this, N)); }
2187 template<int N>
shlsimdutf::haswell::__anon13839::simd::simd162188 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(_mm256_slli_epi16(*this, N)); }
2189 // Get one of the bits and make a bitmask out of it.
2190 // e.g. value.get_bit<7>() gets the high bit
2191 template<int N>
get_bitsimdutf::haswell::__anon13839::simd::simd162192 simdutf_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 15-N)); }
2193
2194 // Change the endianness
swap_bytessimdutf::haswell::__anon13839::simd::simd162195 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
2196 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
2197 17, 16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
2198 return _mm256_shuffle_epi8(*this, swap);
2199 }
2200
2201 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
packsimdutf::haswell::__anon13839::simd::simd162202 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
2203 // Note: the AVX2 variant of pack operates on 128-bit lanes, thus
2204 // we have to shuffle lanes in order to produce bytes in the
2205 // correct order.
2206
2207 // get the 0th lanes
2208 const __m128i lo_0 = _mm256_extracti128_si256(v0, 0);
2209 const __m128i lo_1 = _mm256_extracti128_si256(v1, 0);
2210
2211 // get the 1st lanes
2212 const __m128i hi_0 = _mm256_extracti128_si256(v0, 1);
2213 const __m128i hi_1 = _mm256_extracti128_si256(v1, 1);
2214
2215 // build new vectors (shuffle lanes)
2216 const __m256i t0 = _mm256_set_m128i(lo_1, lo_0);
2217 const __m256i t1 = _mm256_set_m128i(hi_1, hi_0);
2218
2219 // pack code units in linear order from v0 and v1
2220 return _mm256_packus_epi16(t0, t1);
2221 }
2222 };
2223
2224
2225 template<typename T>
2226 struct simd16x32 {
2227 static constexpr int NUM_CHUNKS = 64 / sizeof(simd16<T>);
2228 static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block.");
2229 simd16<T> chunks[NUM_CHUNKS];
2230
2231 simd16x32(const simd16x32<T>& o) = delete; // no copy allowed
2232 simd16x32<T>& operator=(const simd16<T> other) = delete; // no assignment allowed
2233 simd16x32() = delete; // no default constructor allowed
2234
simd16x32simdutf::haswell::__anon13839::simd::simd16x322235 simdutf_really_inline simd16x32(const simd16<T> chunk0, const simd16<T> chunk1) : chunks{chunk0, chunk1} {}
simd16x32simdutf::haswell::__anon13839::simd::simd16x322236 simdutf_really_inline simd16x32(const T* ptr) : chunks{simd16<T>::load(ptr), simd16<T>::load(ptr+sizeof(simd16<T>)/sizeof(T))} {}
2237
storesimdutf::haswell::__anon13839::simd::simd16x322238 simdutf_really_inline void store(T* ptr) const {
2239 this->chunks[0].store(ptr+sizeof(simd16<T>)*0/sizeof(T));
2240 this->chunks[1].store(ptr+sizeof(simd16<T>)*1/sizeof(T));
2241 }
2242
to_bitmasksimdutf::haswell::__anon13839::simd::simd16x322243 simdutf_really_inline uint64_t to_bitmask() const {
2244 uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
2245 uint64_t r_hi = this->chunks[1].to_bitmask();
2246 return r_lo | (r_hi << 32);
2247 }
2248
reduce_orsimdutf::haswell::__anon13839::simd::simd16x322249 simdutf_really_inline simd16<T> reduce_or() const {
2250 return this->chunks[0] | this->chunks[1];
2251 }
2252
is_asciisimdutf::haswell::__anon13839::simd::simd16x322253 simdutf_really_inline bool is_ascii() const {
2254 return this->reduce_or().is_ascii();
2255 }
2256
store_ascii_as_utf16simdutf::haswell::__anon13839::simd::simd16x322257 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
2258 this->chunks[0].store_ascii_as_utf16(ptr+sizeof(simd16<T>)*0);
2259 this->chunks[1].store_ascii_as_utf16(ptr+sizeof(simd16<T>));
2260 }
2261
bit_orsimdutf::haswell::__anon13839::simd::simd16x322262 simdutf_really_inline simd16x32<T> bit_or(const T m) const {
2263 const simd16<T> mask = simd16<T>::splat(m);
2264 return simd16x32<T>(
2265 this->chunks[0] | mask,
2266 this->chunks[1] | mask
2267 );
2268 }
2269
swap_bytessimdutf::haswell::__anon13839::simd::simd16x322270 simdutf_really_inline void swap_bytes() {
2271 this->chunks[0] = this->chunks[0].swap_bytes();
2272 this->chunks[1] = this->chunks[1].swap_bytes();
2273 }
2274
eqsimdutf::haswell::__anon13839::simd::simd16x322275 simdutf_really_inline uint64_t eq(const T m) const {
2276 const simd16<T> mask = simd16<T>::splat(m);
2277 return simd16x32<bool>(
2278 this->chunks[0] == mask,
2279 this->chunks[1] == mask
2280 ).to_bitmask();
2281 }
2282
eqsimdutf::haswell::__anon13839::simd::simd16x322283 simdutf_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
2284 return simd16x32<bool>(
2285 this->chunks[0] == other.chunks[0],
2286 this->chunks[1] == other.chunks[1]
2287 ).to_bitmask();
2288 }
2289
lteqsimdutf::haswell::__anon13839::simd::simd16x322290 simdutf_really_inline uint64_t lteq(const T m) const {
2291 const simd16<T> mask = simd16<T>::splat(m);
2292 return simd16x32<bool>(
2293 this->chunks[0] <= mask,
2294 this->chunks[1] <= mask
2295 ).to_bitmask();
2296 }
2297
in_rangesimdutf::haswell::__anon13839::simd::simd16x322298 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2299 const simd16<T> mask_low = simd16<T>::splat(low);
2300 const simd16<T> mask_high = simd16<T>::splat(high);
2301
2302 return simd16x32<bool>(
2303 (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
2304 (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)
2305 ).to_bitmask();
2306 }
not_in_rangesimdutf::haswell::__anon13839::simd::simd16x322307 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2308 const simd16<T> mask_low = simd16<T>::splat(static_cast<T>(low-1));
2309 const simd16<T> mask_high = simd16<T>::splat(static_cast<T>(high+1));
2310 return simd16x32<bool>(
2311 (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low),
2312 (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)
2313 ).to_bitmask();
2314 }
ltsimdutf::haswell::__anon13839::simd::simd16x322315 simdutf_really_inline uint64_t lt(const T m) const {
2316 const simd16<T> mask = simd16<T>::splat(m);
2317 return simd16x32<bool>(
2318 this->chunks[0] < mask,
2319 this->chunks[1] < mask
2320 ).to_bitmask();
2321 }
2322 }; // struct simd16x32<T>
2323 /* end file src/simdutf/haswell/simd16-inl.h */
2324
2325 } // namespace simd
2326
2327 } // unnamed namespace
2328 } // namespace haswell
2329 } // namespace simdutf
2330
2331 #endif // SIMDUTF_HASWELL_SIMD_H
2332 /* end file src/simdutf/haswell/simd.h */
2333
2334 /* begin file src/simdutf/haswell/end.h */
2335 #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL
2336 // nothing needed.
2337 #else
2338 SIMDUTF_UNTARGET_REGION
2339 #endif
2340
2341
2342 #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
2343 SIMDUTF_POP_DISABLE_WARNINGS
2344 #endif // end of workaround
2345 /* end file src/simdutf/haswell/end.h */
2346
2347 #endif // SIMDUTF_IMPLEMENTATION_HASWELL
2348 #endif // SIMDUTF_HASWELL_COMMON_H
2349 /* end file src/simdutf/haswell.h */
2350 /* begin file src/simdutf/westmere.h */
2351 #ifndef SIMDUTF_WESTMERE_H
2352 #define SIMDUTF_WESTMERE_H
2353
2354 #ifdef SIMDUTF_FALLBACK_H
2355 #error "westmere.h must be included before fallback.h"
2356 #endif
2357
2358
2359 // Default Westmere to on if this is x86-64, unless we'll always select Haswell.
2360 #ifndef SIMDUTF_IMPLEMENTATION_WESTMERE
2361 //
2362 // You do not want to set it to (SIMDUTF_IS_X86_64 && !SIMDUTF_REQUIRES_HASWELL)
2363 // because you want to rely on runtime dispatch!
2364 //
2365 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL
2366 #define SIMDUTF_IMPLEMENTATION_WESTMERE 0
2367 #else
2368 #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64)
2369 #endif
2370
2371 #endif
2372
2373 #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__)
2374
2375 #if SIMDUTF_IMPLEMENTATION_WESTMERE
2376
2377 #define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,popcnt")
2378
2379 namespace simdutf {
2380 /**
2381 * Implementation for Westmere (Intel SSE4.2).
2382 */
2383 namespace westmere {
2384 } // namespace westmere
2385 } // namespace simdutf
2386
2387 //
2388 // These two need to be included outside SIMDUTF_TARGET_REGION
2389 //
2390 /* begin file src/simdutf/westmere/implementation.h */
2391 #ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H
2392 #define SIMDUTF_WESTMERE_IMPLEMENTATION_H
2393
2394
2395 // The constructor may be executed on any host, so we take care not to use SIMDUTF_TARGET_REGION
2396 namespace simdutf {
2397 namespace westmere {
2398
2399 namespace {
2400 using namespace simdutf;
2401 }
2402
2403 class implementation final : public simdutf::implementation {
2404 public:
implementation()2405 simdutf_really_inline implementation() : simdutf::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42) {}
2406 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
2407 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
2408 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
2409 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
2410 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
2411 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
2412 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
2413 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
2414 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
2415 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
2416 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
2417 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
2418 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2419 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2420 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2421 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2422 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
2423 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2424 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2425 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2426 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2427 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2428 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2429 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2430 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2431 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2432 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2433 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2434 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2435 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2436 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2437 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2438 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2439 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2440 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2441 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2442 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2443 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2444 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2445 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2446 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2447 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2448 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2449 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2450 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2451 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2452 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2453 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2454 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2455 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2456 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2457 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2458 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2459 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2460 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2461 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2462 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2463 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
2464 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
2465 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
2466 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
2467 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
2468 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
2469 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
2470 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
2471 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
2472 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
2473 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
2474 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
2475 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
2476 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
2477 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
2478 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
2479 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
2480 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
2481 };
2482
2483 } // namespace westmere
2484 } // namespace simdutf
2485
2486 #endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H
2487 /* end file src/simdutf/westmere/implementation.h */
2488 /* begin file src/simdutf/westmere/intrinsics.h */
2489 #ifndef SIMDUTF_WESTMERE_INTRINSICS_H
2490 #define SIMDUTF_WESTMERE_INTRINSICS_H
2491
2492 #ifdef SIMDUTF_VISUAL_STUDIO
2493 // under clang within visual studio, this will include <x86intrin.h>
2494 #include <intrin.h> // visual studio or clang
2495 #else
2496
2497 #if SIMDUTF_GCC11ORMORE
2498 // We should not get warnings while including <x86intrin.h> yet we do
2499 // under some versions of GCC.
2500 // If the x86intrin.h header has uninitialized values that are problematic,
2501 // it is a GCC issue, we want to ignore these warnigns.
2502 SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
2503 #endif
2504
2505 #include <x86intrin.h> // elsewhere
2506
2507
2508 #if SIMDUTF_GCC11ORMORE
2509 // cancels the suppression of the -Wuninitialized
2510 SIMDUTF_POP_DISABLE_WARNINGS
2511 #endif
2512
2513 #endif // SIMDUTF_VISUAL_STUDIO
2514
2515
2516 #ifdef SIMDUTF_CLANG_VISUAL_STUDIO
2517 /**
2518 * You are not supposed, normally, to include these
2519 * headers directly. Instead you should either include intrin.h
2520 * or x86intrin.h. However, when compiling with clang
2521 * under Windows (i.e., when _MSC_VER is set), these headers
2522 * only get included *if* the corresponding features are detected
2523 * from macros:
2524 */
2525 #include <smmintrin.h> // for _mm_alignr_epi8
2526 #endif
2527
2528
2529
2530 #endif // SIMDUTF_WESTMERE_INTRINSICS_H
2531 /* end file src/simdutf/westmere/intrinsics.h */
2532
2533 //
2534 // The rest need to be inside the region
2535 //
2536 /* begin file src/simdutf/westmere/begin.h */
2537 // redefining SIMDUTF_IMPLEMENTATION to "westmere"
2538 // #define SIMDUTF_IMPLEMENTATION westmere
2539
2540 #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE
2541 // nothing needed.
2542 #else
2543 SIMDUTF_TARGET_WESTMERE
2544 #endif
2545 /* end file src/simdutf/westmere/begin.h */
2546
2547 // Declarations
2548 /* begin file src/simdutf/westmere/bitmanipulation.h */
2549 #ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H
2550 #define SIMDUTF_WESTMERE_BITMANIPULATION_H
2551
2552 namespace simdutf {
2553 namespace westmere {
2554 namespace {
2555
2556 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
count_ones(uint64_t input_num)2557 simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) {
2558 // note: we do not support legacy 32-bit Windows
2559 return __popcnt64(input_num);// Visual Studio wants two underscores
2560 }
2561 #else
2562 simdutf_really_inline long long int count_ones(uint64_t input_num) {
2563 return _popcnt64(input_num);
2564 }
2565 #endif
2566
2567 } // unnamed namespace
2568 } // namespace westmere
2569 } // namespace simdutf
2570
2571 #endif // SIMDUTF_WESTMERE_BITMANIPULATION_H
2572 /* end file src/simdutf/westmere/bitmanipulation.h */
2573 /* begin file src/simdutf/westmere/simd.h */
2574 #ifndef SIMDUTF_WESTMERE_SIMD_H
2575 #define SIMDUTF_WESTMERE_SIMD_H
2576
2577 namespace simdutf {
2578 namespace westmere {
2579 namespace {
2580 namespace simd {
2581
2582 template<typename Child>
2583 struct base {
2584 __m128i value;
2585
2586 // Zero constructor
basesimdutf::westmere::__anon13842::simd::base2587 simdutf_really_inline base() : value{__m128i()} {}
2588
2589 // Conversion from SIMD register
basesimdutf::westmere::__anon13842::simd::base2590 simdutf_really_inline base(const __m128i _value) : value(_value) {}
2591 // Conversion to SIMD register
operator const __m128i&simdutf::westmere::__anon13842::simd::base2592 simdutf_really_inline operator const __m128i&() const { return this->value; }
operator __m128i&simdutf::westmere::__anon13842::simd::base2593 simdutf_really_inline operator __m128i&() { return this->value; }
2594 template <endianness big_endian>
store_ascii_as_utf16simdutf::westmere::__anon13842::simd::base2595 simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
2596 __m128i first = _mm_cvtepu8_epi16(*this);
2597 __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this,8));
2598 if (big_endian) {
2599 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
2600 first = _mm_shuffle_epi8(first, swap);
2601 second = _mm_shuffle_epi8(second, swap);
2602 }
2603 _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first);
2604 _mm_storeu_si128(reinterpret_cast<__m128i *>(p+8), second);
2605 }
store_ascii_as_utf32simdutf::westmere::__anon13842::simd::base2606 simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
2607 _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this));
2608 _mm_storeu_si128(reinterpret_cast<__m128i *>(p+4), _mm_cvtepu8_epi32(_mm_srli_si128(*this,4)));
2609 _mm_storeu_si128(reinterpret_cast<__m128i *>(p+8), _mm_cvtepu8_epi32(_mm_srli_si128(*this,8)));
2610 _mm_storeu_si128(reinterpret_cast<__m128i *>(p+12), _mm_cvtepu8_epi32(_mm_srli_si128(*this,12)));
2611 }
2612 // Bit operations
operator |simdutf::westmere::__anon13842::simd::base2613 simdutf_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); }
operator &simdutf::westmere::__anon13842::simd::base2614 simdutf_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
operator ^simdutf::westmere::__anon13842::simd::base2615 simdutf_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
bit_andnotsimdutf::westmere::__anon13842::simd::base2616 simdutf_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
operator |=simdutf::westmere::__anon13842::simd::base2617 simdutf_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
operator &=simdutf::westmere::__anon13842::simd::base2618 simdutf_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
operator ^=simdutf::westmere::__anon13842::simd::base2619 simdutf_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
2620 };
2621
2622 // Forward-declared so they can be used by splat and friends.
2623 template<typename T>
2624 struct simd8;
2625
2626 template<typename T, typename Mask=simd8<bool>>
2627 struct base8: base<simd8<T>> {
2628 typedef uint16_t bitmask_t;
2629 typedef uint32_t bitmask2_t;
2630
firstsimdutf::westmere::__anon13842::simd::base82631 simdutf_really_inline T first() const { return _mm_extract_epi8(*this,0); }
lastsimdutf::westmere::__anon13842::simd::base82632 simdutf_really_inline T last() const { return _mm_extract_epi8(*this,15); }
base8simdutf::westmere::__anon13842::simd::base82633 simdutf_really_inline base8() : base<simd8<T>>() {}
base8simdutf::westmere::__anon13842::simd::base82634 simdutf_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
2635
operator ==simdutf::westmere::__anon13842::simd::base82636 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); }
2637
2638 static const int SIZE = sizeof(base<simd8<T>>::value);
2639
2640 template<int N=1>
prevsimdutf::westmere::__anon13842::simd::base82641 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
2642 return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
2643 }
2644 };
2645
2646 // SIMD byte mask type (returned by things like eq and gt)
2647 template<>
2648 struct simd8<bool>: base8<bool> {
splatsimdutf::westmere::__anon13842::simd::simd82649 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
2650
simd8simdutf::westmere::__anon13842::simd::simd82651 simdutf_really_inline simd8<bool>() : base8() {}
simd8simdutf::westmere::__anon13842::simd::simd82652 simdutf_really_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
2653 // Splat constructor
simd8simdutf::westmere::__anon13842::simd::simd82654 simdutf_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
2655
to_bitmasksimdutf::westmere::__anon13842::simd::simd82656 simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
anysimdutf::westmere::__anon13842::simd::simd82657 simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
nonesimdutf::westmere::__anon13842::simd::simd82658 simdutf_really_inline bool none() const { return _mm_testz_si128(*this, *this); }
allsimdutf::westmere::__anon13842::simd::simd82659 simdutf_really_inline bool all() const { return _mm_movemask_epi8(*this) == 0xFFFF; }
operator ~simdutf::westmere::__anon13842::simd::simd82660 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
2661 };
2662
2663 template<typename T>
2664 struct base8_numeric: base8<T> {
splatsimdutf::westmere::__anon13842::simd::base8_numeric2665 static simdutf_really_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
zerosimdutf::westmere::__anon13842::simd::base8_numeric2666 static simdutf_really_inline simd8<T> zero() { return _mm_setzero_si128(); }
loadsimdutf::westmere::__anon13842::simd::base8_numeric2667 static simdutf_really_inline simd8<T> load(const T values[16]) {
2668 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
2669 }
2670 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::westmere::__anon13842::simd::base8_numeric2671 static simdutf_really_inline simd8<T> repeat_16(
2672 T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
2673 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
2674 ) {
2675 return simd8<T>(
2676 v0, v1, v2, v3, v4, v5, v6, v7,
2677 v8, v9, v10,v11,v12,v13,v14,v15
2678 );
2679 }
2680
base8_numericsimdutf::westmere::__anon13842::simd::base8_numeric2681 simdutf_really_inline base8_numeric() : base8<T>() {}
base8_numericsimdutf::westmere::__anon13842::simd::base8_numeric2682 simdutf_really_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
2683
2684 // Store to array
storesimdutf::westmere::__anon13842::simd::base8_numeric2685 simdutf_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
2686
2687 // Override to distinguish from bool version
operator ~simdutf::westmere::__anon13842::simd::base8_numeric2688 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
2689
2690 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::westmere::__anon13842::simd::base8_numeric2691 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
operator -simdutf::westmere::__anon13842::simd::base8_numeric2692 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
operator +=simdutf::westmere::__anon13842::simd::base8_numeric2693 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
operator -=simdutf::westmere::__anon13842::simd::base8_numeric2694 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
2695
2696 // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
2697 template<typename L>
lookup_16simdutf::westmere::__anon13842::simd::base8_numeric2698 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
2699 return _mm_shuffle_epi8(lookup_table, *this);
2700 }
2701
2702 template<typename L>
lookup_16simdutf::westmere::__anon13842::simd::base8_numeric2703 simdutf_really_inline simd8<L> lookup_16(
2704 L replace0, L replace1, L replace2, L replace3,
2705 L replace4, L replace5, L replace6, L replace7,
2706 L replace8, L replace9, L replace10, L replace11,
2707 L replace12, L replace13, L replace14, L replace15) const {
2708 return lookup_16(simd8<L>::repeat_16(
2709 replace0, replace1, replace2, replace3,
2710 replace4, replace5, replace6, replace7,
2711 replace8, replace9, replace10, replace11,
2712 replace12, replace13, replace14, replace15
2713 ));
2714 }
2715 };
2716
2717 // Signed bytes
2718 template<>
2719 struct simd8<int8_t> : base8_numeric<int8_t> {
simd8simdutf::westmere::__anon13842::simd::simd82720 simdutf_really_inline simd8() : base8_numeric<int8_t>() {}
simd8simdutf::westmere::__anon13842::simd::simd82721 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
2722 // Splat constructor
simd8simdutf::westmere::__anon13842::simd::simd82723 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
2724 // Array constructor
simd8simdutf::westmere::__anon13842::simd::simd82725 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
2726 // Member-by-member initialization
simd8simdutf::westmere::__anon13842::simd::simd82727 simdutf_really_inline simd8(
2728 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
2729 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
2730 ) : simd8(_mm_setr_epi8(
2731 v0, v1, v2, v3, v4, v5, v6, v7,
2732 v8, v9, v10,v11,v12,v13,v14,v15
2733 )) {}
2734 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::westmere::__anon13842::simd::simd82735 simdutf_really_inline static simd8<int8_t> repeat_16(
2736 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
2737 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
2738 ) {
2739 return simd8<int8_t>(
2740 v0, v1, v2, v3, v4, v5, v6, v7,
2741 v8, v9, v10,v11,v12,v13,v14,v15
2742 );
2743 }
2744 simdutf_really_inline operator simd8<uint8_t>() const;
is_asciisimdutf::westmere::__anon13842::simd::simd82745 simdutf_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
2746
2747 // Order-sensitive comparisons
max_valsimdutf::westmere::__anon13842::simd::simd82748 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
min_valsimdutf::westmere::__anon13842::simd::simd82749 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
operator >simdutf::westmere::__anon13842::simd::simd82750 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
operator <simdutf::westmere::__anon13842::simd::simd82751 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
2752 };
2753
2754 // Unsigned bytes
2755 template<>
2756 struct simd8<uint8_t>: base8_numeric<uint8_t> {
simd8simdutf::westmere::__anon13842::simd::simd82757 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
simd8simdutf::westmere::__anon13842::simd::simd82758 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
2759
2760 // Splat constructor
simd8simdutf::westmere::__anon13842::simd::simd82761 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
2762 // Array constructor
simd8simdutf::westmere::__anon13842::simd::simd82763 simdutf_really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
2764 // Member-by-member initialization
simd8simdutf::westmere::__anon13842::simd::simd82765 simdutf_really_inline simd8(
2766 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
2767 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
2768 ) : simd8(_mm_setr_epi8(
2769 v0, v1, v2, v3, v4, v5, v6, v7,
2770 v8, v9, v10,v11,v12,v13,v14,v15
2771 )) {}
2772 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::westmere::__anon13842::simd::simd82773 simdutf_really_inline static simd8<uint8_t> repeat_16(
2774 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
2775 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
2776 ) {
2777 return simd8<uint8_t>(
2778 v0, v1, v2, v3, v4, v5, v6, v7,
2779 v8, v9, v10,v11,v12,v13,v14,v15
2780 );
2781 }
2782
2783 // Saturated math
saturating_addsimdutf::westmere::__anon13842::simd::simd82784 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
saturating_subsimdutf::westmere::__anon13842::simd::simd82785 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
2786
2787 // Order-specific operations
max_valsimdutf::westmere::__anon13842::simd::simd82788 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
min_valsimdutf::westmere::__anon13842::simd::simd82789 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
2790 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::westmere::__anon13842::simd::simd82791 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
2792 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::westmere::__anon13842::simd::simd82793 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::westmere::__anon13842::simd::simd82794 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
operator >=simdutf::westmere::__anon13842::simd::simd82795 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
operator >simdutf::westmere::__anon13842::simd::simd82796 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
operator <simdutf::westmere::__anon13842::simd::simd82797 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2798
2799 // Bit-specific operations
bits_not_setsimdutf::westmere::__anon13842::simd::simd82800 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
bits_not_setsimdutf::westmere::__anon13842::simd::simd82801 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
any_bits_setsimdutf::westmere::__anon13842::simd::simd82802 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
any_bits_setsimdutf::westmere::__anon13842::simd::simd82803 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
is_asciisimdutf::westmere::__anon13842::simd::simd82804 simdutf_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
2805
bits_not_set_anywheresimdutf::westmere::__anon13842::simd::simd82806 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
any_bits_set_anywheresimdutf::westmere::__anon13842::simd::simd82807 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
bits_not_set_anywheresimdutf::westmere::__anon13842::simd::simd82808 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
any_bits_set_anywheresimdutf::westmere::__anon13842::simd::simd82809 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
2810 template<int N>
shrsimdutf::westmere::__anon13842::simd::simd82811 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
2812 template<int N>
shlsimdutf::westmere::__anon13842::simd::simd82813 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
2814 // Get one of the bits and make a bitmask out of it.
2815 // e.g. value.get_bit<7>() gets the high bit
2816 template<int N>
get_bitsimdutf::westmere::__anon13842::simd::simd82817 simdutf_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
2818 };
operator simd8<uint8_t>() const2819 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
2820
2821 // Unsigned bytes
2822 template<>
2823 struct simd8<uint16_t>: base<uint16_t> {
splatsimdutf::westmere::__anon13842::simd::simd82824 static simdutf_really_inline simd8<uint16_t> splat(uint16_t _value) { return _mm_set1_epi16(_value); }
loadsimdutf::westmere::__anon13842::simd::simd82825 static simdutf_really_inline simd8<uint16_t> load(const uint16_t values[8]) {
2826 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
2827 }
2828
simd8simdutf::westmere::__anon13842::simd::simd82829 simdutf_really_inline simd8() : base<uint16_t>() {}
simd8simdutf::westmere::__anon13842::simd::simd82830 simdutf_really_inline simd8(const __m128i _value) : base<uint16_t>(_value) {}
2831 // Splat constructor
simd8simdutf::westmere::__anon13842::simd::simd82832 simdutf_really_inline simd8(uint16_t _value) : simd8(splat(_value)) {}
2833 // Array constructor
simd8simdutf::westmere::__anon13842::simd::simd82834 simdutf_really_inline simd8(const uint16_t* values) : simd8(load(values)) {}
2835 // Member-by-member initialization
simd8simdutf::westmere::__anon13842::simd::simd82836 simdutf_really_inline simd8(
2837 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7
2838 ) : simd8(_mm_setr_epi16(
2839 v0, v1, v2, v3, v4, v5, v6, v7
2840 )) {}
2841
2842 // Saturated math
saturating_addsimdutf::westmere::__anon13842::simd::simd82843 simdutf_really_inline simd8<uint16_t> saturating_add(const simd8<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
saturating_subsimdutf::westmere::__anon13842::simd::simd82844 simdutf_really_inline simd8<uint16_t> saturating_sub(const simd8<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
2845
2846 // Order-specific operations
max_valsimdutf::westmere::__anon13842::simd::simd82847 simdutf_really_inline simd8<uint16_t> max_val(const simd8<uint16_t> other) const { return _mm_max_epu16(*this, other); }
min_valsimdutf::westmere::__anon13842::simd::simd82848 simdutf_really_inline simd8<uint16_t> min_val(const simd8<uint16_t> other) const { return _mm_min_epu16(*this, other); }
2849 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::westmere::__anon13842::simd::simd82850 simdutf_really_inline simd8<uint16_t> gt_bits(const simd8<uint16_t> other) const { return this->saturating_sub(other); }
2851 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::westmere::__anon13842::simd::simd82852 simdutf_really_inline simd8<uint16_t> lt_bits(const simd8<uint16_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::westmere::__anon13842::simd::simd82853 simdutf_really_inline simd8<bool> operator<=(const simd8<uint16_t> other) const { return other.max_val(*this) == other; }
operator >=simdutf::westmere::__anon13842::simd::simd82854 simdutf_really_inline simd8<bool> operator>=(const simd8<uint16_t> other) const { return other.min_val(*this) == other; }
operator ==simdutf::westmere::__anon13842::simd::simd82855 simdutf_really_inline simd8<bool> operator==(const simd8<uint16_t> other) const { return _mm_cmpeq_epi16(*this, other); }
operator &simdutf::westmere::__anon13842::simd::simd82856 simdutf_really_inline simd8<bool> operator&(const simd8<uint16_t> other) const { return _mm_and_si128(*this, other); }
operator |simdutf::westmere::__anon13842::simd::simd82857 simdutf_really_inline simd8<bool> operator|(const simd8<uint16_t> other) const { return _mm_or_si128(*this, other); }
2858
2859 // Bit-specific operations
bits_not_setsimdutf::westmere::__anon13842::simd::simd82860 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint16_t(0); }
any_bits_setsimdutf::westmere::__anon13842::simd::simd82861 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
2862
bits_not_set_anywheresimdutf::westmere::__anon13842::simd::simd82863 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
any_bits_set_anywheresimdutf::westmere::__anon13842::simd::simd82864 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
bits_not_set_anywheresimdutf::westmere::__anon13842::simd::simd82865 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
any_bits_set_anywheresimdutf::westmere::__anon13842::simd::simd82866 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2867 };
2868 template<typename T>
2869 struct simd8x64 {
2870 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
2871 static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block.");
2872 simd8<T> chunks[NUM_CHUNKS];
2873
2874 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
2875 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
2876 simd8x64() = delete; // no default constructor allowed
2877
simd8x64simdutf::westmere::__anon13842::simd::simd8x642878 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
simd8x64simdutf::westmere::__anon13842::simd::simd8x642879 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
2880
storesimdutf::westmere::__anon13842::simd::simd8x642881 simdutf_really_inline void store(T* ptr) const {
2882 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
2883 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
2884 this->chunks[2].store(ptr+sizeof(simd8<T>)*2/sizeof(T));
2885 this->chunks[3].store(ptr+sizeof(simd8<T>)*3/sizeof(T));
2886 }
2887
operator |=simdutf::westmere::__anon13842::simd::simd8x642888 simdutf_really_inline simd8x64<T>& operator |=(const simd8x64<T> &other) {
2889 this->chunks[0] |= other.chunks[0];
2890 this->chunks[1] |= other.chunks[1];
2891 this->chunks[2] |= other.chunks[2];
2892 this->chunks[3] |= other.chunks[3];
2893 return *this;
2894 }
2895
reduce_orsimdutf::westmere::__anon13842::simd::simd8x642896 simdutf_really_inline simd8<T> reduce_or() const {
2897 return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
2898 }
2899
is_asciisimdutf::westmere::__anon13842::simd::simd8x642900 simdutf_really_inline bool is_ascii() const {
2901 return this->reduce_or().is_ascii();
2902 }
2903
2904 template <endianness endian>
store_ascii_as_utf16simdutf::westmere::__anon13842::simd::simd8x642905 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
2906 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
2907 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
2908 this->chunks[2].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*2);
2909 this->chunks[3].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*3);
2910 }
2911
store_ascii_as_utf32simdutf::westmere::__anon13842::simd::simd8x642912 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
2913 this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*0);
2914 this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*1);
2915 this->chunks[2].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*2);
2916 this->chunks[3].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*3);
2917 }
2918
to_bitmasksimdutf::westmere::__anon13842::simd::simd8x642919 simdutf_really_inline uint64_t to_bitmask() const {
2920 uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
2921 uint64_t r1 = this->chunks[1].to_bitmask();
2922 uint64_t r2 = this->chunks[2].to_bitmask();
2923 uint64_t r3 = this->chunks[3].to_bitmask();
2924 return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
2925 }
2926
eqsimdutf::westmere::__anon13842::simd::simd8x642927 simdutf_really_inline uint64_t eq(const T m) const {
2928 const simd8<T> mask = simd8<T>::splat(m);
2929 return simd8x64<bool>(
2930 this->chunks[0] == mask,
2931 this->chunks[1] == mask,
2932 this->chunks[2] == mask,
2933 this->chunks[3] == mask
2934 ).to_bitmask();
2935 }
2936
eqsimdutf::westmere::__anon13842::simd::simd8x642937 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
2938 return simd8x64<bool>(
2939 this->chunks[0] == other.chunks[0],
2940 this->chunks[1] == other.chunks[1],
2941 this->chunks[2] == other.chunks[2],
2942 this->chunks[3] == other.chunks[3]
2943 ).to_bitmask();
2944 }
2945
lteqsimdutf::westmere::__anon13842::simd::simd8x642946 simdutf_really_inline uint64_t lteq(const T m) const {
2947 const simd8<T> mask = simd8<T>::splat(m);
2948 return simd8x64<bool>(
2949 this->chunks[0] <= mask,
2950 this->chunks[1] <= mask,
2951 this->chunks[2] <= mask,
2952 this->chunks[3] <= mask
2953 ).to_bitmask();
2954 }
2955
in_rangesimdutf::westmere::__anon13842::simd::simd8x642956 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2957 const simd8<T> mask_low = simd8<T>::splat(low);
2958 const simd8<T> mask_high = simd8<T>::splat(high);
2959
2960 return simd8x64<bool>(
2961 (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
2962 (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
2963 (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
2964 (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
2965 ).to_bitmask();
2966 }
not_in_rangesimdutf::westmere::__anon13842::simd::simd8x642967 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2968 const simd8<T> mask_low = simd8<T>::splat(low-1);
2969 const simd8<T> mask_high = simd8<T>::splat(high+1);
2970 return simd8x64<bool>(
2971 (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low),
2972 (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low),
2973 (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low),
2974 (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)
2975 ).to_bitmask();
2976 }
ltsimdutf::westmere::__anon13842::simd::simd8x642977 simdutf_really_inline uint64_t lt(const T m) const {
2978 const simd8<T> mask = simd8<T>::splat(m);
2979 return simd8x64<bool>(
2980 this->chunks[0] < mask,
2981 this->chunks[1] < mask,
2982 this->chunks[2] < mask,
2983 this->chunks[3] < mask
2984 ).to_bitmask();
2985 }
2986
gtsimdutf::westmere::__anon13842::simd::simd8x642987 simdutf_really_inline uint64_t gt(const T m) const {
2988 const simd8<T> mask = simd8<T>::splat(m);
2989 return simd8x64<bool>(
2990 this->chunks[0] > mask,
2991 this->chunks[1] > mask,
2992 this->chunks[2] > mask,
2993 this->chunks[3] > mask
2994 ).to_bitmask();
2995 }
gteqsimdutf::westmere::__anon13842::simd::simd8x642996 simdutf_really_inline uint64_t gteq(const T m) const {
2997 const simd8<T> mask = simd8<T>::splat(m);
2998 return simd8x64<bool>(
2999 this->chunks[0] >= mask,
3000 this->chunks[1] >= mask,
3001 this->chunks[2] >= mask,
3002 this->chunks[3] >= mask
3003 ).to_bitmask();
3004 }
gteq_unsignedsimdutf::westmere::__anon13842::simd::simd8x643005 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
3006 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3007 return simd8x64<bool>(
3008 simd8<uint8_t>(__m128i(this->chunks[0])) >= mask,
3009 simd8<uint8_t>(__m128i(this->chunks[1])) >= mask,
3010 simd8<uint8_t>(__m128i(this->chunks[2])) >= mask,
3011 simd8<uint8_t>(__m128i(this->chunks[3])) >= mask
3012 ).to_bitmask();
3013 }
3014 }; // struct simd8x64<T>
3015
3016 /* begin file src/simdutf/westmere/simd16-inl.h */
3017 template<typename T>
3018 struct simd16;
3019
3020 template<typename T, typename Mask=simd16<bool>>
3021 struct base16: base<simd16<T>> {
3022 typedef uint16_t bitmask_t;
3023 typedef uint32_t bitmask2_t;
3024
base16simdutf::westmere::__anon13842::simd::base163025 simdutf_really_inline base16() : base<simd16<T>>() {}
base16simdutf::westmere::__anon13842::simd::base163026 simdutf_really_inline base16(const __m128i _value) : base<simd16<T>>(_value) {}
3027 template <typename Pointer>
base16simdutf::westmere::__anon13842::simd::base163028 simdutf_really_inline base16(const Pointer* ptr) : base16(_mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr))) {}
3029
operator ==simdutf::westmere::__anon13842::simd::base163030 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return _mm_cmpeq_epi16(lhs, rhs); }
3031
3032 static const int SIZE = sizeof(base<simd16<T>>::value);
3033
3034 template<int N=1>
prevsimdutf::westmere::__anon13842::simd::base163035 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
3036 return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
3037 }
3038 };
3039
3040 // SIMD byte mask type (returned by things like eq and gt)
3041 template<>
3042 struct simd16<bool>: base16<bool> {
splatsimdutf::westmere::__anon13842::simd::simd163043 static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm_set1_epi16(uint16_t(-(!!_value))); }
3044
simd16simdutf::westmere::__anon13842::simd::simd163045 simdutf_really_inline simd16<bool>() : base16() {}
simd16simdutf::westmere::__anon13842::simd::simd163046 simdutf_really_inline simd16<bool>(const __m128i _value) : base16<bool>(_value) {}
3047 // Splat constructor
simd16simdutf::westmere::__anon13842::simd::simd163048 simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
3049
to_bitmasksimdutf::westmere::__anon13842::simd::simd163050 simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
anysimdutf::westmere::__anon13842::simd::simd163051 simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
operator ~simdutf::westmere::__anon13842::simd::simd163052 simdutf_really_inline simd16<bool> operator~() const { return *this ^ true; }
3053 };
3054
3055 template<typename T>
3056 struct base16_numeric: base16<T> {
splatsimdutf::westmere::__anon13842::simd::base16_numeric3057 static simdutf_really_inline simd16<T> splat(T _value) { return _mm_set1_epi16(_value); }
zerosimdutf::westmere::__anon13842::simd::base16_numeric3058 static simdutf_really_inline simd16<T> zero() { return _mm_setzero_si128(); }
loadsimdutf::westmere::__anon13842::simd::base16_numeric3059 static simdutf_really_inline simd16<T> load(const T values[8]) {
3060 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
3061 }
3062
base16_numericsimdutf::westmere::__anon13842::simd::base16_numeric3063 simdutf_really_inline base16_numeric() : base16<T>() {}
base16_numericsimdutf::westmere::__anon13842::simd::base16_numeric3064 simdutf_really_inline base16_numeric(const __m128i _value) : base16<T>(_value) {}
3065
3066 // Store to array
storesimdutf::westmere::__anon13842::simd::base16_numeric3067 simdutf_really_inline void store(T dst[8]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
3068
3069 // Override to distinguish from bool version
operator ~simdutf::westmere::__anon13842::simd::base16_numeric3070 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
3071
3072 // Addition/subtraction are the same for signed and unsigned
operator +simdutf::westmere::__anon13842::simd::base16_numeric3073 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return _mm_add_epi16(*this, other); }
operator -simdutf::westmere::__anon13842::simd::base16_numeric3074 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return _mm_sub_epi16(*this, other); }
operator +=simdutf::westmere::__anon13842::simd::base16_numeric3075 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
operator -=simdutf::westmere::__anon13842::simd::base16_numeric3076 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
3077 };
3078
3079 // Signed code units
3080 template<>
3081 struct simd16<int16_t> : base16_numeric<int16_t> {
simd16simdutf::westmere::__anon13842::simd::simd163082 simdutf_really_inline simd16() : base16_numeric<int16_t>() {}
simd16simdutf::westmere::__anon13842::simd::simd163083 simdutf_really_inline simd16(const __m128i _value) : base16_numeric<int16_t>(_value) {}
3084 // Splat constructor
simd16simdutf::westmere::__anon13842::simd::simd163085 simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
3086 // Array constructor
simd16simdutf::westmere::__anon13842::simd::simd163087 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
simd16simdutf::westmere::__anon13842::simd::simd163088 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
3089 // Member-by-member initialization
simd16simdutf::westmere::__anon13842::simd::simd163090 simdutf_really_inline simd16(
3091 int16_t v0, int16_t v1, int16_t v2, int16_t v3, int16_t v4, int16_t v5, int16_t v6, int16_t v7)
3092 : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {}
3093 simdutf_really_inline operator simd16<uint16_t>() const;
3094
3095 // Order-sensitive comparisons
max_valsimdutf::westmere::__anon13842::simd::simd163096 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return _mm_max_epi16(*this, other); }
min_valsimdutf::westmere::__anon13842::simd::simd163097 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return _mm_min_epi16(*this, other); }
operator >simdutf::westmere::__anon13842::simd::simd163098 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return _mm_cmpgt_epi16(*this, other); }
operator <simdutf::westmere::__anon13842::simd::simd163099 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return _mm_cmpgt_epi16(other, *this); }
3100 };
3101
3102 // Unsigned code units
3103 template<>
3104 struct simd16<uint16_t>: base16_numeric<uint16_t> {
simd16simdutf::westmere::__anon13842::simd::simd163105 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
simd16simdutf::westmere::__anon13842::simd::simd163106 simdutf_really_inline simd16(const __m128i _value) : base16_numeric<uint16_t>(_value) {}
3107
3108 // Splat constructor
simd16simdutf::westmere::__anon13842::simd::simd163109 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
3110 // Array constructor
simd16simdutf::westmere::__anon13842::simd::simd163111 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
simd16simdutf::westmere::__anon13842::simd::simd163112 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
3113 // Member-by-member initialization
simd16simdutf::westmere::__anon13842::simd::simd163114 simdutf_really_inline simd16(
3115 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7)
3116 : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {}
3117 // Repeat 16 values as many times as necessary (usually for lookup tables)
repeat_16simdutf::westmere::__anon13842::simd::simd163118 simdutf_really_inline static simd16<uint16_t> repeat_16(
3119 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7
3120 ) {
3121 return simd16<uint16_t>(v0, v1, v2, v3, v4, v5, v6, v7);
3122 }
3123
3124 // Saturated math
saturating_addsimdutf::westmere::__anon13842::simd::simd163125 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
saturating_subsimdutf::westmere::__anon13842::simd::simd163126 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
3127
3128 // Order-specific operations
max_valsimdutf::westmere::__anon13842::simd::simd163129 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm_max_epu16(*this, other); }
min_valsimdutf::westmere::__anon13842::simd::simd163130 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm_min_epu16(*this, other); }
3131 // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
gt_bitssimdutf::westmere::__anon13842::simd::simd163132 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
3133 // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
lt_bitssimdutf::westmere::__anon13842::simd::simd163134 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
operator <=simdutf::westmere::__anon13842::simd::simd163135 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
operator >=simdutf::westmere::__anon13842::simd::simd163136 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
operator >simdutf::westmere::__anon13842::simd::simd163137