Lines Matching defs:const

68   simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
69 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
70 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
71 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
72 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
73 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
74 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
75 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
76 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
77 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
78 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
79 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
80 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
81 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
82 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
83 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
84 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
85 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
86 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
87 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
88 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
89 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
90 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
91 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
92 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
93 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
94 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
95 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
96 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
97 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
98 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
99 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
100 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
101 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
102 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
103 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
104 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
105 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
106 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
107 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
108 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
109 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
110 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
111 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
112 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
113 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
114 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
115 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
116 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
117 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
118 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
119 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
120 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
121 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
122 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
123 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
124 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
125 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
126 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
127 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
128 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
129 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
130 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
131 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
132 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
133 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
134 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
135 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
136 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
137 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
138 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
139 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
140 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
141 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
142 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
267 static const int SIZE = sizeof(value);
270 simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
271 simdutf_really_inline operator const uint8x16_t&() const { return this->value; }
273 simdutf_really_inline T first() const { return vgetq_lane_u8(*this,0); }
274 simdutf_really_inline T last() const { return vgetq_lane_u8(*this,15); }
277 simdutf_really_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
278 simdutf_really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
279 simdutf_really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
280 simdutf_really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
281 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
282 simdutf_really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
283 simdutf_really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
284 simdutf_really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
286 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); }
289 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
302 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
307 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
311 simdutf_really_inline uint32_t to_bitmask() const {
313 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
316 const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
331 simdutf_really_inline uint64_t to_bitmask64() const {
335 simdutf_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
336 simdutf_really_inline bool none() const { return vmaxvq_u8(*this) == 0; }
337 simdutf_really_inline bool all() const { return vminvq_u8(*this) == 0xFF; }
347 static simdutf_really_inline simd8<uint8_t> load(const uint8_t* values) { return vld1q_u8(values); }
348 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
352 simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
386 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
389 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
390 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
393 simdutf_really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
394 simdutf_really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
395 simdutf_really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
396 simdutf_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
399 simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
400 simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
401 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
402 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
403 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
404 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
405 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
406 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
408 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
410 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
413 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
414 simdutf_really_inline bool is_ascii() const { return this->max_val() < 0b10000000u; }
416 simdutf_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
417 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
419 simdutf_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
421 simdutf_really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
425 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
435 L replace12, L replace13, L replace14, L replace15) const {
445 simdutf_really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) const {
457 static simdutf_really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
471 simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
486 simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
487 const uint16x8_t low = vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0)));
488 const uint16x8_t high = vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0)));
489 const uint16x8x2_t low_pair{{ low, vmovq_n_u16(0) }};
491 const uint16x8x2_t high_pair{{ high, vmovq_n_u16(0) }};
497 simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t * p) const {
498 const simd8<uint8_t> tb1{ 0,255,255,255, 1,255,255,255, 2,255,255,255, 3,255,255,255 };
499 const simd8<uint8_t> tb2{ 4,255,255,255, 5,255,255,255, 6,255,255,255, 7,255,255,255 };
500 const simd8<uint8_t> tb3{ 8,255,255,255, 9,255,255,255, 10,255,255,255, 11,255,255,255 };
501 const simd8<uint8_t> tb4{ 12,255,255,255, 13,255,255,255, 14,255,255,255, 15,255,255,255 };
504 const auto shuf1 = this->apply_lookup_16_to(tb1);
505 const auto shuf2 = this->apply_lookup_16_to(tb2);
509 const auto shuf3 = this->apply_lookup_16_to(tb3);
510 const auto shuf4 = this->apply_lookup_16_to(tb4);
515 simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {}
516 simdutf_really_inline operator const int8x16_t&() const { return this->value; }
518 simdutf_really_inline operator const uint8x16_t() const { return vreinterpretq_u8_s8(this->value); }
527 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
558 simdutf_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, value); }
565 simdutf_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
567 simdutf_really_inline operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
569 simdutf_really_inline simd8<int8_t> operator|(const simd8<int8_t> other) const { return vorrq_s8(value, other.value); }
570 simdutf_really_inline simd8<int8_t> operator&(const simd8<int8_t> other) const { return vandq_s8(value, other.value); }
571 simdutf_really_inline simd8<int8_t> operator^(const simd8<int8_t> other) const { return veorq_s8(value, other.value); }
572 simdutf_really_inline simd8<int8_t> bit_andnot(const simd8<int8_t> other) const { return vbicq_s8(value, other.value); }
575 simdutf_really_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(value, other.value); }
576 simdutf_really_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(value, other.value); }
577 simdutf_really_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
578 simdutf_really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
580 simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); }
581 simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); }
582 simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; }
585 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(value, other.value); }
586 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(value, other.value); }
587 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(value, other.value); }
588 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(value, other.value); }
589 simdutf_really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(value, other.value); }
592 simdutf_really_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
598 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
606 L replace12, L replace13, L replace14, L replace15) const {
616 simdutf_really_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) const {
627 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
628 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
631 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
632 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
634 simdutf_really_inline void store(T* ptr) const {
642 simdutf_really_inline simd8x64<T>& operator |=(const simd8x64<T> &other) {
650 simdutf_really_inline simd8<T> reduce_or() const {
654 simdutf_really_inline bool is_ascii() const {
659 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
666 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
673 simdutf_really_inline uint64_t to_bitmask() const {
675 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(
680 const uint8x16_t bit_mask = {
693 simdutf_really_inline uint64_t eq(const T m) const {
694 const simd8<T> mask = simd8<T>::splat(m);
703 simdutf_really_inline uint64_t lteq(const T m) const {
704 const simd8<T> mask = simd8<T>::splat(m);
713 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
714 const simd8<T> mask_low = simd8<T>::splat(low);
715 const simd8<T> mask_high = simd8<T>::splat(high);
724 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
725 const simd8<T> mask_low = simd8<T>::splat(low);
726 const simd8<T> mask_high = simd8<T>::splat(high);
734 simdutf_really_inline uint64_t lt(const T m) const {
735 const simd8<T> mask = simd8<T>::splat(m);
743 simdutf_really_inline uint64_t gt(const T m) const {
744 const simd8<T> mask = simd8<T>::splat(m);
752 simdutf_really_inline uint64_t gteq(const T m) const {
753 const simd8<T> mask = simd8<T>::splat(m);
761 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
762 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
778 static const int SIZE = sizeof(value);
782 simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
783 simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
786 simdutf_really_inline simd16<T> operator|(const simd16<T> other) const { return vorrq_u16(*this, other); }
787 simdutf_really_inline simd16<T> operator&(const simd16<T> other) const { return vandq_u16(*this, other); }
788 simdutf_really_inline simd16<T> operator^(const simd16<T> other) const { return veorq_u16(*this, other); }
789 simdutf_really_inline simd16<T> bit_andnot(const simd16<T> other) const { return vbicq_u16(*this, other); }
790 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
791 simdutf_really_inline simd16<T>& operator|=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
792 simdutf_really_inline simd16<T>& operator&=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
793 simdutf_really_inline simd16<T>& operator^=(const simd16<T> other) { auto this_cast = static_cast<simd16<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
795 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return vceqq_u16(lhs, rhs); }
798 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
809 simdutf_really_inline base16(const uint16x8_t _value) : base_u16<T>(_value) {}
811 simdutf_really_inline base16(const Pointer* ptr) : base16(vld1q_u16(ptr)) {}
813 static const int SIZE = sizeof(base_u16<T>::value);
816 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
827 simdutf_really_inline simd16<bool>(const uint16x8_t _value) : base16<bool>(_value) {}
837 static simdutf_really_inline simd16<T> load(const T values[8]) {
838 return vld1q_u16(reinterpret_cast<const uint16_t*>(values));
842 simdutf_really_inline base16_numeric(const uint16x8_t _value) : base16<T>(_value) {}
845 simdutf_really_inline void store(T dst[8]) const { return vst1q_u16(dst, *this); }
848 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
851 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return vaddq_u8(*this, other); }
852 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return vsubq_u8(*this, other); }
853 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
854 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
862 simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<int16_t>(_value) {}
864 simdutf_really_inline simd16(const int16x8_t _value) : base16_numeric<int16_t>(vreinterpretq_u16_s16(_value)) {}
869 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
870 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
871 simdutf_really_inline operator simd16<uint16_t>() const;
872 simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
873 simdutf_really_inline operator const int16x8_t() const { return vreinterpretq_s16_u16(this->value); }
875 simdutf_really_inline int16_t max_val() const { return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); }
876 simdutf_really_inline int16_t min_val() const { return vminvq_s16(vreinterpretq_s16_u16(this->value)); }
878 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return vmaxq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
879 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return vmaxq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
880 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return vcgtq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
881 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return vcltq_s16(vreinterpretq_s16_u16(this->value), vreinterpretq_s16_u16(other.value)); }
891 simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<uint16_t>(_value) {}
896 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
897 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
900 simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); }
901 simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); }
903 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return vqaddq_u16(*this, other); }
904 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return vqsubq_u16(*this, other); }
907 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return vmaxq_u16(*this, other); }
908 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return vminq_u16(*this, other); }
910 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
912 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
913 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return vcleq_u16(*this, other); }
914 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return vcgeq_u16(*this, other); }
915 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return vcgtq_u16(*this, other); }
916 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return vcltq_u16(*this, other); }
919 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
921 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(vshrq_n_u16(*this, N)); }
923 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(vshlq_n_u16(*this, N)); }
926 simdutf_really_inline simd16<uint16_t> operator|(const simd16<uint16_t> other) const { return vorrq_u16(*this, other); }
927 simdutf_really_inline simd16<uint16_t> operator&(const simd16<uint16_t> other) const { return vandq_u16(*this, other); }
928 simdutf_really_inline simd16<uint16_t> operator^(const simd16<uint16_t> other) const { return veorq_u16(*this, other); }
931 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
936 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
940 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
949 simd16x32(const simd16x32<T>& o) = delete; // no copy allowed
950 simd16x32<T>& operator=(const simd16<T> other) = delete; // no assignment allowed
953 simdutf_really_inline simd16x32(const simd16<T> chunk0, const simd16<T> chunk1, const simd16<T> chunk2, const simd16<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
954 simdutf_really_inline simd16x32(const T* ptr) : chunks{simd16<T>::load(ptr), simd16<T>::load(ptr+sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+2*sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+3*sizeof(simd16<T>)/sizeof(T))} {}
956 simdutf_really_inline void store(T* ptr) const {
963 simdutf_really_inline simd16<T> reduce_or() const {
967 simdutf_really_inline bool is_ascii() const {
971 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
978 simdutf_really_inline uint64_t to_bitmask() const {
980 const uint8x16_t bit_mask = simdutf_make_uint8x16_t(
985 const uint8x16_t bit_mask = {
1005 simdutf_really_inline uint64_t eq(const T m) const {
1006 const simd16<T> mask = simd16<T>::splat(m);
1015 simdutf_really_inline uint64_t lteq(const T m) const {
1016 const simd16<T> mask = simd16<T>::splat(m);
1025 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
1026 const simd16<T> mask_low = simd16<T>::splat(low);
1027 const simd16<T> mask_high = simd16<T>::splat(high);
1036 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
1037 const simd16<T> mask_low = simd16<T>::splat(low);
1038 const simd16<T> mask_high = simd16<T>::splat(high);
1046 simdutf_really_inline uint64_t lt(const T m) const {
1047 const simd16<T> mask = simd16<T>::splat(m);
1058 simdutf_really_inline uint64_t simd16x32<uint16_t>::not_in_range(const uint16_t low, const uint16_t high) const {
1059 const simd16<uint16_t> mask_low = simd16<uint16_t>::splat(low);
1060 const simd16<uint16_t> mask_high = simd16<uint16_t>::splat(high);
1268 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
1269 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1270 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1271 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1272 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1273 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1274 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1275 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1276 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1277 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1278 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1279 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1280 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1281 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1282 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1283 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1284 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1285 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1286 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1287 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1288 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1289 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1290 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1291 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1292 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1293 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1294 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1295 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1296 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1297 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1298 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1299 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1300 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1301 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1302 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1303 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1304 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1305 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1306 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1307 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1308 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1309 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1310 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1311 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1312 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1313 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1314 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1315 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1316 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1317 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1318 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1319 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1320 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1321 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1322 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1323 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1324 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1325 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1326 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1327 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1328 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1329 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1330 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1331 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1332 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1333 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
1334 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1335 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1336 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
1337 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
1338 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
1339 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
1340 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
1341 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
1342 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
1475 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
1476 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1477 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1478 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1479 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1480 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1481 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1482 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1483 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1484 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1485 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1486 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1487 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1488 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1489 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1490 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1491 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1492 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1493 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1494 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1495 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1496 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1497 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1498 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1499 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1500 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1501 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1502 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1503 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1504 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1505 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1506 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1507 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1508 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1509 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1510 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1511 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1512 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1513 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1514 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1515 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1516 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1517 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1518 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1519 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1520 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1521 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1522 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1523 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1524 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1525 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1526 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1527 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1528 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1529 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1530 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1531 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1532 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1533 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1534 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1535 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1536 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1537 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1538 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
1539 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
1540 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
1541 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1542 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
1543 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
1544 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
1545 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
1546 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
1547 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
1548 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
1549 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
1684 simdutf_really_inline base(const __m256i _value) : value(_value) {}
1686 simdutf_really_inline operator const __m256i&() const { return this->value; }
1689 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
1693 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
1701 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
1708 simdutf_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); }
1709 simdutf_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
1710 simdutf_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
1711 simdutf_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
1712 simdutf_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
1713 simdutf_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
1714 simdutf_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
1727 simdutf_really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
1728 simdutf_really_inline T first() const { return _mm256_extract_epi8(*this,0); }
1729 simdutf_really_inline T last() const { return _mm256_extract_epi8(*this,31); }
1730 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); }
1732 static const int SIZE = sizeof(base<T>::value);
1735 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
1746 simdutf_really_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
1750 simdutf_really_inline uint32_t to_bitmask() const { return uint32_t(_mm256_movemask_epi8(*this)); }
1751 simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
1752 simdutf_really_inline bool none() const { return _mm256_testz_si256(*this, *this); }
1753 simdutf_really_inline bool all() const { return static_cast<uint32_t>(_mm256_movemask_epi8(*this)) == 0xFFFFFFFF; }
1754 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
1761 static simdutf_really_inline simd8<T> load(const T values[32]) {
1762 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
1778 simdutf_really_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
1781 simdutf_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
1784 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
1785 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
1786 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
1787 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
1790 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
1794 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
1803 L replace12, L replace13, L replace14, L replace15) const {
1818 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
1823 simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
1824 simdutf_really_inline operator simd8<uint8_t>() const;
1849 simdutf_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
1851 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
1852 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
1853 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
1854 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
1861 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
1865 simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
1893 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
1894 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
1897 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
1898 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
1900 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
1902 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
1903 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
1904 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
1905 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
1906 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
1909 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
1910 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
1911 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
1912 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
1913 simdutf_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
1914 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
1915 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
1916 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
1917 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
1919 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
1921 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
1925 simdutf_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
1927 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
1936 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
1937 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
1940 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
1941 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T))} {}
1943 simdutf_really_inline void store(T* ptr) const {
1948 simdutf_really_inline uint64_t to_bitmask() const {
1954 simdutf_really_inline simd8x64<T>& operator|=(const simd8x64<T> &other) {
1960 simdutf_really_inline simd8<T> reduce_or() const {
1964 simdutf_really_inline bool is_ascii() const {
1969 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
1974 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
1979 simdutf_really_inline simd8x64<T> bit_or(const T m) const {
1980 const simd8<T> mask = simd8<T>::splat(m);
1987 simdutf_really_inline uint64_t eq(const T m) const {
1988 const simd8<T> mask = simd8<T>::splat(m);
1995 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
2002 simdutf_really_inline uint64_t lteq(const T m) const {
2003 const simd8<T> mask = simd8<T>::splat(m);
2010 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2011 const simd8<T> mask_low = simd8<T>::splat(low);
2012 const simd8<T> mask_high = simd8<T>::splat(high);
2019 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2020 const simd8<T> mask_low = simd8<T>::splat(low);
2021 const simd8<T> mask_high = simd8<T>::splat(high);
2027 simdutf_really_inline uint64_t lt(const T m) const {
2028 const simd8<T> mask = simd8<T>::splat(m);
2035 simdutf_really_inline uint64_t gt(const T m) const {
2036 const simd8<T> mask = simd8<T>::splat(m);
2042 simdutf_really_inline uint64_t gteq(const T m) const {
2043 const simd8<T> mask = simd8<T>::splat(m);
2049 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
2050 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
2074 simdutf_really_inline base16(const __m256i _value) : base<simd16<T>>(_value) {}
2076 simdutf_really_inline base16(const Pointer* ptr) : base16(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr))) {}
2077 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return _mm256_cmpeq_epi16(lhs, rhs); }
2080 static const int SIZE = sizeof(base<simd16<T>>::value);
2083 static const int ELEMENTS = SIZE / sizeof(T);
2086 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
2097 simdutf_really_inline simd16<bool>(const __m256i _value) : base16<bool>(_value) {}
2101 simdutf_really_inline bitmask_type to_bitmask() const { return _mm256_movemask_epi8(*this); }
2102 simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
2103 simdutf_really_inline simd16<bool> operator~() const { return *this ^ true; }
2110 static simdutf_really_inline simd16<T> load(const T values[8]) {
2111 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
2115 simdutf_really_inline base16_numeric(const __m256i _value) : base16<T>(_value) {}
2118 simdutf_really_inline void store(T dst[8]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
2121 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFFFu; }
2124 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return _mm256_add_epi16(*this, other); }
2125 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return _mm256_sub_epi16(*this, other); }
2126 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
2127 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
2134 simdutf_really_inline simd16(const __m256i _value) : base16_numeric<int16_t>(_value) {}
2138 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
2139 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
2141 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return _mm256_max_epi16(*this, other); }
2142 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return _mm256_min_epi16(*this, other); }
2143 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return _mm256_cmpgt_epi16(*this, other); }
2144 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return _mm256_cmpgt_epi16(other, *this); }
2151 simdutf_really_inline simd16(const __m256i _value) : base16_numeric<uint16_t>(_value) {}
2156 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
2157 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
2160 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm256_adds_epu16(*this, other); }
2161 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm256_subs_epu16(*this, other); }
2164 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm256_max_epu16(*this, other); }
2165 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm256_min_epu16(*this, other); }
2167 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
2169 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
2170 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
2171 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
2172 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
2173 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
2176 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
2177 simdutf_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const { return (*this & bits).bits_not_set(); }
2178 simdutf_really_inline simd16<bool> any_bits_set() const { return ~this->bits_not_set(); }
2179 simdutf_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const { return ~this->bits_not_set(bits); }
2181 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
2182 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
2183 simdutf_really_inline bool bits_not_set_anywhere(simd16<uint16_t> bits) const { return _mm256_testz_si256(*this, bits); }
2184 simdutf_really_inline bool any_bits_set_anywhere(simd16<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2186 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(_mm256_srli_epi16(*this, N)); }
2188 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(_mm256_slli_epi16(*this, N)); }
2192 simdutf_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 15-N)); }
2195 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
2196 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
2202 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
2208 const __m128i lo_0 = _mm256_extracti128_si256(v0, 0);
2209 const __m128i lo_1 = _mm256_extracti128_si256(v1, 0);
2212 const __m128i hi_0 = _mm256_extracti128_si256(v0, 1);
2213 const __m128i hi_1 = _mm256_extracti128_si256(v1, 1);
2216 const __m256i t0 = _mm256_set_m128i(lo_1, lo_0);
2217 const __m256i t1 = _mm256_set_m128i(hi_1, hi_0);
2231 simd16x32(const simd16x32<T>& o) = delete; // no copy allowed
2232 simd16x32<T>& operator=(const simd16<T> other) = delete; // no assignment allowed
2235 simdutf_really_inline simd16x32(const simd16<T> chunk0, const simd16<T> chunk1) : chunks{chunk0, chunk1} {}
2236 simdutf_really_inline simd16x32(const T* ptr) : chunks{simd16<T>::load(ptr), simd16<T>::load(ptr+sizeof(simd16<T>)/sizeof(T))} {}
2238 simdutf_really_inline void store(T* ptr) const {
2243 simdutf_really_inline uint64_t to_bitmask() const {
2249 simdutf_really_inline simd16<T> reduce_or() const {
2253 simdutf_really_inline bool is_ascii() const {
2257 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
2262 simdutf_really_inline simd16x32<T> bit_or(const T m) const {
2263 const simd16<T> mask = simd16<T>::splat(m);
2275 simdutf_really_inline uint64_t eq(const T m) const {
2276 const simd16<T> mask = simd16<T>::splat(m);
2283 simdutf_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
2290 simdutf_really_inline uint64_t lteq(const T m) const {
2291 const simd16<T> mask = simd16<T>::splat(m);
2298 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2299 const simd16<T> mask_low = simd16<T>::splat(low);
2300 const simd16<T> mask_high = simd16<T>::splat(high);
2307 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2308 const simd16<T> mask_low = simd16<T>::splat(static_cast<T>(low-1));
2309 const simd16<T> mask_high = simd16<T>::splat(static_cast<T>(high+1));
2315 simdutf_really_inline uint64_t lt(const T m) const {
2316 const simd16<T> mask = simd16<T>::splat(m);
2406 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
2407 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
2408 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
2409 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
2410 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
2411 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
2412 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
2413 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
2414 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
2415 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
2416 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
2417 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
2418 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2419 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2420 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2421 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2422 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
2423 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2424 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2425 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2426 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2427 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2428 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2429 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2430 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2431 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2432 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2433 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2434 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2435 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2436 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2437 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2438 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2439 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2440 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2441 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2442 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2443 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2444 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2445 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2446 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2447 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2448 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2449 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2450 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2451 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2452 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2453 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2454 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2455 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2456 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2457 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2458 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2459 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2460 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2461 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2462 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2463 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
2464 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
2465 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
2466 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
2467 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
2468 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
2469 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
2470 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
2471 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
2472 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
2473 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
2474 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
2475 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
2476 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
2477 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
2478 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
2479 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
2480 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
2590 simdutf_really_inline base(const __m128i _value) : value(_value) {}
2592 simdutf_really_inline operator const __m128i&() const { return this->value; }
2595 simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
2599 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
2606 simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
2613 simdutf_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); }
2614 simdutf_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
2615 simdutf_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
2616 simdutf_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
2617 simdutf_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
2618 simdutf_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
2619 simdutf_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
2631 simdutf_really_inline T first() const { return _mm_extract_epi8(*this,0); }
2632 simdutf_really_inline T last() const { return _mm_extract_epi8(*this,15); }
2634 simdutf_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
2636 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); }
2638 static const int SIZE = sizeof(base<simd8<T>>::value);
2641 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
2652 simdutf_really_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
2656 simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
2657 simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
2658 simdutf_really_inline bool none() const { return _mm_testz_si128(*this, *this); }
2659 simdutf_really_inline bool all() const { return _mm_movemask_epi8(*this) == 0xFFFF; }
2660 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
2667 static simdutf_really_inline simd8<T> load(const T values[16]) {
2668 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
2682 simdutf_really_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
2685 simdutf_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
2688 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
2691 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
2692 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
2693 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
2694 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
2698 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
2707 L replace12, L replace13, L replace14, L replace15) const {
2721 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
2725 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
2744 simdutf_really_inline operator simd8<uint8_t>() const;
2745 simdutf_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
2748 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
2749 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
2750 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
2751 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
2758 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
2763 simdutf_really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
2784 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
2785 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
2788 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
2789 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
2791 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
2793 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
2794 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
2795 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
2796 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2797 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2800 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
2801 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
2802 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
2803 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
2804 simdutf_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
2806 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
2807 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
2808 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
2809 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
2811 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
2813 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
2817 simdutf_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
2819 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
2825 static simdutf_really_inline simd8<uint16_t> load(const uint16_t values[8]) {
2826 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
2830 simdutf_really_inline simd8(const __m128i _value) : base<uint16_t>(_value) {}
2834 simdutf_really_inline simd8(const uint16_t* values) : simd8(load(values)) {}
2843 simdutf_really_inline simd8<uint16_t> saturating_add(const simd8<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
2844 simdutf_really_inline simd8<uint16_t> saturating_sub(const simd8<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
2847 simdutf_really_inline simd8<uint16_t> max_val(const simd8<uint16_t> other) const { return _mm_max_epu16(*this, other); }
2848 simdutf_really_inline simd8<uint16_t> min_val(const simd8<uint16_t> other) const { return _mm_min_epu16(*this, other); }
2850 simdutf_really_inline simd8<uint16_t> gt_bits(const simd8<uint16_t> other) const { return this->saturating_sub(other); }
2852 simdutf_really_inline simd8<uint16_t> lt_bits(const simd8<uint16_t> other) const { return other.saturating_sub(*this); }
2853 simdutf_really_inline simd8<bool> operator<=(const simd8<uint16_t> other) const { return other.max_val(*this) == other; }
2854 simdutf_really_inline simd8<bool> operator>=(const simd8<uint16_t> other) const { return other.min_val(*this) == other; }
2855 simdutf_really_inline simd8<bool> operator==(const simd8<uint16_t> other) const { return _mm_cmpeq_epi16(*this, other); }
2856 simdutf_really_inline simd8<bool> operator&(const simd8<uint16_t> other) const { return _mm_and_si128(*this, other); }
2857 simdutf_really_inline simd8<bool> operator|(const simd8<uint16_t> other) const { return _mm_or_si128(*this, other); }
2860 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint16_t(0); }
2861 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
2863 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
2864 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
2865 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
2866 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2874 simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
2875 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
2878 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
2879 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
2881 simdutf_really_inline void store(T* ptr) const {
2888 simdutf_really_inline simd8x64<T>& operator |=(const simd8x64<T> &other) {
2896 simdutf_really_inline simd8<T> reduce_or() const {
2900 simdutf_really_inline bool is_ascii() const {
2905 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
2912 simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
2919 simdutf_really_inline uint64_t to_bitmask() const {
2927 simdutf_really_inline uint64_t eq(const T m) const {
2928 const simd8<T> mask = simd8<T>::splat(m);
2937 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
2946 simdutf_really_inline uint64_t lteq(const T m) const {
2947 const simd8<T> mask = simd8<T>::splat(m);
2956 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
2957 const simd8<T> mask_low = simd8<T>::splat(low);
2958 const simd8<T> mask_high = simd8<T>::splat(high);
2967 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
2968 const simd8<T> mask_low = simd8<T>::splat(low-1);
2969 const simd8<T> mask_high = simd8<T>::splat(high+1);
2977 simdutf_really_inline uint64_t lt(const T m) const {
2978 const simd8<T> mask = simd8<T>::splat(m);
2987 simdutf_really_inline uint64_t gt(const T m) const {
2988 const simd8<T> mask = simd8<T>::splat(m);
2996 simdutf_really_inline uint64_t gteq(const T m) const {
2997 const simd8<T> mask = simd8<T>::splat(m);
3005 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
3006 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3026 simdutf_really_inline base16(const __m128i _value) : base<simd16<T>>(_value) {}
3028 simdutf_really_inline base16(const Pointer* ptr) : base16(_mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr))) {}
3030 friend simdutf_really_inline Mask operator==(const simd16<T> lhs, const simd16<T> rhs) { return _mm_cmpeq_epi16(lhs, rhs); }
3032 static const int SIZE = sizeof(base<simd16<T>>::value);
3035 simdutf_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
3046 simdutf_really_inline simd16<bool>(const __m128i _value) : base16<bool>(_value) {}
3050 simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
3051 simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
3052 simdutf_really_inline simd16<bool> operator~() const { return *this ^ true; }
3059 static simdutf_really_inline simd16<T> load(const T values[8]) {
3060 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
3064 simdutf_really_inline base16_numeric(const __m128i _value) : base16<T>(_value) {}
3067 simdutf_really_inline void store(T dst[8]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
3070 simdutf_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
3073 simdutf_really_inline simd16<T> operator+(const simd16<T> other) const { return _mm_add_epi16(*this, other); }
3074 simdutf_really_inline simd16<T> operator-(const simd16<T> other) const { return _mm_sub_epi16(*this, other); }
3075 simdutf_really_inline simd16<T>& operator+=(const simd16<T> other) { *this = *this + other; return *static_cast<simd16<T>*>(this); }
3076 simdutf_really_inline simd16<T>& operator-=(const simd16<T> other) { *this = *this - other; return *static_cast<simd16<T>*>(this); }
3083 simdutf_really_inline simd16(const __m128i _value) : base16_numeric<int16_t>(_value) {}
3087 simdutf_really_inline simd16(const int16_t* values) : simd16(load(values)) {}
3088 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const int16_t*>(values))) {}
3093 simdutf_really_inline operator simd16<uint16_t>() const;
3096 simdutf_really_inline simd16<int16_t> max_val(const simd16<int16_t> other) const { return _mm_max_epi16(*this, other); }
3097 simdutf_really_inline simd16<int16_t> min_val(const simd16<int16_t> other) const { return _mm_min_epi16(*this, other); }
3098 simdutf_really_inline simd16<bool> operator>(const simd16<int16_t> other) const { return _mm_cmpgt_epi16(*this, other); }
3099 simdutf_really_inline simd16<bool> operator<(const simd16<int16_t> other) const { return _mm_cmpgt_epi16(other, *this); }
3106 simdutf_really_inline simd16(const __m128i _value) : base16_numeric<uint16_t>(_value) {}
3111 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
3112 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
3125 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
3126 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
3129 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm_max_epu16(*this, other); }
3130 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm_min_epu16(*this, other); }
3132 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
3134 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
3135 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
3136 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
3137 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
3138 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
3141 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
3142 simdutf_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const { return (*this & bits).bits_not_set(); }
3143 simdutf_really_inline simd16<bool> any_bits_set() const { return ~this->bits_not_set(); }
3144 simdutf_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const { return ~this->bits_not_set(bits); }
3146 simdutf_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
3147 simdutf_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
3148 simdutf_really_inline bool bits_not_set_anywhere(simd16<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
3149 simdutf_really_inline bool any_bits_set_anywhere(simd16<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
3151 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(_mm_srli_epi16(*this, N)); }
3153 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(_mm_slli_epi16(*this, N)); }
3157 simdutf_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
3160 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
3161 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
3166 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
3170 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
3178 simd16x32(const simd16x32<T>& o) = delete; // no copy allowed
3179 simd16x32<T>& operator=(const simd16<T> other) = delete; // no assignment allowed
3182 simdutf_really_inline simd16x32(const simd16<T> chunk0, const simd16<T> chunk1, const simd16<T> chunk2, const simd16<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
3183 simdutf_really_inline simd16x32(const T* ptr) : chunks{simd16<T>::load(ptr), simd16<T>::load(ptr+sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+2*sizeof(simd16<T>)/sizeof(T)), simd16<T>::load(ptr+3*sizeof(simd16<T>)/sizeof(T))} {}
3185 simdutf_really_inline void store(T* ptr) const {
3192 simdutf_really_inline simd16<T> reduce_or() const {
3196 simdutf_really_inline bool is_ascii() const {
3200 simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
3207 simdutf_really_inline uint64_t to_bitmask() const {
3222 simdutf_really_inline uint64_t eq(const T m) const {
3223 const simd16<T> mask = simd16<T>::splat(m);
3232 simdutf_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
3241 simdutf_really_inline uint64_t lteq(const T m) const {
3242 const simd16<T> mask = simd16<T>::splat(m);
3251 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
3252 const simd16<T> mask_low = simd16<T>::splat(low);
3253 const simd16<T> mask_high = simd16<T>::splat(high);
3262 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
3263 const simd16<T> mask_low = simd16<T>::splat(static_cast<T>(low-1));
3264 const simd16<T> mask_high = simd16<T>::splat(static_cast<T>(high+1));
3272 simdutf_really_inline uint64_t lt(const T m) const {
3273 const simd16<T> mask = simd16<T>::splat(m);
3347 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
3348 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
3349 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
3350 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
3351 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
3352 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
3353 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
3354 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
3355 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
3356 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
3357 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
3358 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3359 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3360 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3361 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3362 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3363 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3364 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3365 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3366 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3367 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3368 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3369 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3370 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3371 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3372 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3373 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3374 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3375 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3376 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3377 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3378 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3379 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3380 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3381 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3382 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3383 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3384 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3385 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3386 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3387 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3388 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
3389 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
3390 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
3391 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
3392 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
3393 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
3394 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
3395 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
3396 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
3397 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
3398 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
3399 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
3479 simdutf_really_inline base(const __m128i _value) : value(_value) {}
3482 simdutf_really_inline operator const __m128i &() const {
3488 simdutf_really_inline Child operator|(const Child other) const {
3491 simdutf_really_inline Child operator&(const Child other) const {
3494 simdutf_really_inline Child operator^(const Child other) const {
3497 simdutf_really_inline Child bit_andnot(const Child other) const {
3500 simdutf_really_inline Child &operator|=(const Child other) {
3505 simdutf_really_inline Child &operator&=(const Child other) {
3510 simdutf_really_inline Child &operator^=(const Child other) {
3526 simdutf_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
3528 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) {
3532 static const int SIZE = sizeof(base<simd8<T>>::value);
3535 simdutf_really_inline simd8<T> prev(simd8<T> prev_chunk) const {
3556 simdutf_really_inline simd8<bool>(const __m128i _value)
3562 simdutf_really_inline int to_bitmask() const {
3564 const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
3575 simdutf_really_inline bool any() const {
3578 simdutf_really_inline simd8<bool> operator~() const {
3589 static simdutf_really_inline simd8<T> load(const T values[16]) {
3590 return (__m128i)(vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(values)));
3602 simdutf_really_inline base8_numeric(const __m128i _value)
3606 simdutf_really_inline void store(T dst[16]) const {
3611 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
3614 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const {
3617 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const {
3620 simdutf_really_inline simd8<T> &operator+=(const simd8<T> other) {
3624 simdutf_really_inline simd8<T> &operator-=(const simd8<T> other) {
3632 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
3641 L replace15) const {
3652 simdutf_really_inline simd8(const __m128i _value)
3658 simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
3678 max_val(const simd8<int8_t> other) const {
3683 min_val(const simd8<int8_t> other) const {
3688 operator>(const simd8<int8_t> other) const {
3693 operator<(const simd8<int8_t> other) const {
3702 simdutf_really_inline simd8(const __m128i _value)
3707 simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {}
3727 saturating_add(const simd8<uint8_t> other) const {
3731 saturating_sub(const simd8<uint8_t> other) const {
3737 max_val(const simd8<uint8_t> other) const {
3741 min_val(const simd8<uint8_t> other) const {
3746 gt_bits(const simd8<uint8_t> other) const {
3751 lt_bits(const simd8<uint8_t> other) const {
3755 operator<=(const simd8<uint8_t> other) const {
3759 operator>=(const simd8<uint8_t> other) const {
3763 operator>(const simd8<uint8_t> other) const {
3767 operator<(const simd8<uint8_t> other) const {
3772 simdutf_really_inline simd8<bool> bits_not_set() const {
3775 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
3778 simdutf_really_inline simd8<bool> any_bits_set() const {
3781 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
3785 simdutf_really_inline bool is_ascii() const {
3789 simdutf_really_inline bool bits_not_set_anywhere() const {
3792 simdutf_really_inline bool any_bits_set_anywhere() const {
3795 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
3799 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
3802 template <int N> simdutf_really_inline simd8<uint8_t> shr() const {
3806 template <int N> simdutf_really_inline simd8<uint8_t> shl() const {
3818 simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
3820 operator=(const simd8<T> other) = delete; // no assignment allowed
3823 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
3824 const simd8<T> chunk2, const simd8<T> chunk3)
3827 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
3829 simdutf_really_inline void store(T* ptr) const {
3837 simdutf_really_inline simd8x64<T>& operator |=(const simd8x64<T> &other) {
3845 simdutf_really_inline simd8<T> reduce_or() const {
3851 simdutf_really_inline bool is_ascii() const {
3855 simdutf_really_inline uint64_t to_bitmask() const {
3863 simdutf_really_inline uint64_t eq(const T m) const {
3864 const simd8<T> mask = simd8<T>::splat(m);
3870 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
3878 simdutf_really_inline uint64_t lteq(const T m) const {
3879 const simd8<T> mask = simd8<T>::splat(m);
3885 simdutf_really_inline uint64_t in_range(const T low, const T high) const {
3886 const simd8<T> mask_low = simd8<T>::splat(low);
3887 const simd8<T> mask_high = simd8<T>::splat(high);
3896 simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
3897 const simd8<T> mask_low = simd8<T>::splat(low);
3898 const simd8<T> mask_high = simd8<T>::splat(high);
3906 simdutf_really_inline uint64_t lt(const T m) const {
3907 const simd8<T> mask = simd8<T>::splat(m);
3913 simdutf_really_inline uint64_t gt(const T m) const {
3914 const simd8<T> mask = simd8<T>::splat(m);
3922 simdutf_really_inline uint64_t gteq(const T m) const {
3923 const simd8<T> mask = simd8<T>::splat(m);
3931 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
3932 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
4004 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
4005 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
4006 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
4007 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
4008 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
4009 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
4010 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
4011 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
4012 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
4013 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
4014 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
4015 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
4016 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4017 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4018 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4019 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
4020 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
4021 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
4022 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4023 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4024 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4025 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4026 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4027 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4028 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4029 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4030 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4031 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4032 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4033 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4034 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4035 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4036 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4037 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4038 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4039 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4040 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4041 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4042 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4043 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4044 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4045 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4046 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4047 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4048 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4049 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4050 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4051 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4052 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4053 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4054 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4055 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4056 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4057 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4058 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4059 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4060 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4061 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
4062 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
4063 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
4064 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
4065 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
4066 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
4067 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
4068 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
4069 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
4070 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
4071 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
4072 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
4073 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
4074 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
4075 simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
4076 simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
4077 simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
4078 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;};
4127 inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
4128 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4193 inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
4194 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4262 inline simdutf_warn_unused result rewind_and_validate_with_errors(const char *start, const char *buf, size_t len) noexcept {
4284 inline size_t count_code_points(const char* buf, size_t len) {
4285 const int8_t * p = reinterpret_cast<const int8_t *>(buf);
4294 inline size_t utf16_length_from_utf8(const char* buf, size_t len) {
4295 const int8_t * p = reinterpret_cast<const int8_t *>(buf);
4304 simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, size_t length) {
4340 inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
4345 inline simdutf_warn_unused bool validate(const char16_t *buf, size_t len) noexcept {
4346 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4366 inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, size_t len) noexcept {
4367 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4387 inline size_t count_code_points(const char16_t* buf, size_t len) {
4389 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4399 inline size_t utf8_length_from_utf16(const char16_t* buf, size_t len) {
4401 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4413 inline size_t utf32_length_from_utf16(const char16_t* buf, size_t len) {
4415 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4429 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* out) {
4430 const uint16_t * input = reinterpret_cast<const uint16_t *>(in);
4439 simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t* input, size_t length) {
4458 bool implementation::supported_by_runtime_system() const {
4464 simdutf_warn_unused encoding_type implementation::autodetect_encoding(const char * input, size_t length) const noexcept {
4480 if(validate_utf16le(reinterpret_cast<const char16_t*>(input), length/2)) { return encoding_type::UTF16_LE; }
4483 if(validate_utf32(reinterpret_cast<const char32_t*>(input), length/4)) { return encoding_type::UTF32_LE; }
4495 static const icelake::implementation* get_icelake_singleton() {
4496 static const icelake::implementation icelake_singleton{};
4501 static const haswell::implementation* get_haswell_singleton() {
4502 static const haswell::implementation haswell_singleton{};
4507 static const westmere::implementation* get_westmere_singleton() {
4508 static const westmere::implementation westmere_singleton{};
4513 static const arm64::implementation* get_arm64_singleton() {
4514 static const arm64::implementation arm64_singleton{};
4519 static const ppc64::implementation* get_ppc64_singleton() {
4520 static const ppc64::implementation ppc64_singleton{};
4525 static const fallback::implementation* get_fallback_singleton() {
4526 static const fallback::implementation fallback_singleton{};
4536 const std::string &name() const noexcept final { return set_best()->name(); }
4537 const std::string &description() const noexcept final { return set_best()->description(); }
4538 uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
4540 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
4544 simdutf_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
4548 simdutf_warn_unused result validate_utf8_with_errors(const char * buf, size_t len) const noexcept final override {
4552 simdutf_warn_unused bool validate_ascii(const char * buf, size_t len) const noexcept final override {
4556 simdutf_warn_unused result validate_ascii_with_errors(const char * buf, size_t len) const noexcept final override {
4560 simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) const noexcept final override {
4564 simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) const noexcept final override {
4568 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) const noexcept final override {
4572 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t * buf, size_t len) const noexcept final override {
4576 simdutf_warn_unused bool validate_utf32(const char32_t * buf, size_t len) const noexcept final override {
4580 simdutf_warn_unused result validate_utf32_with_errors(const char32_t * buf, size_t len) const noexcept final override {
4584 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final override {
4588 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4592 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4596 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) const noexcept final override {
4600 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
4604 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept final override {
4608 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
4612 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4616 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4620 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4624 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4628 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4632 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4636 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4640 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4644 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4648 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4652 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4656 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4660 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4664 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4668 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4672 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4676 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4680 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4684 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4688 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4692 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4696 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4700 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4704 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4708 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4712 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4716 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4720 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4724 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4728 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4732 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4736 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4740 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4744 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4748 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4752 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4756 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4760 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4764 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4768 void change_endianness_utf16(const char16_t * buf, size_t len, char16_t * output) const noexcept final override {
4772 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t len) const noexcept final override {
4776 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t len) const noexcept final override {
4780 simdutf_warn_unused size_t count_utf8(const char * buf, size_t len) const noexcept final override {
4784 simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) const noexcept override {
4788 simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) const noexcept override {
4792 simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) const noexcept override {
4796 simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) const noexcept override {
4800 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
4804 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * buf, size_t len) const noexcept override {
4808 simdutf_warn_unused size_t utf16_length_from_latin1(size_t len) const noexcept override {
4812 simdutf_warn_unused size_t utf32_length_from_latin1(size_t len) const noexcept override {
4816 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
4820 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * buf, size_t len) const noexcept override {
4824 simdutf_warn_unused size_t utf16_length_from_utf8(const char * buf, size_t len) const noexcept override {
4828 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * buf, size_t len) const noexcept override {
4832 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * buf, size_t len) const noexcept override {
4836 simdutf_warn_unused size_t utf32_length_from_utf8(const char * buf, size_t len) const noexcept override {
4843 const implementation *set_best() const noexcept;
4846 static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
4847 static const std::initializer_list<const implementation *> available_implementation_pointers {
4873 simdutf_warn_unused int detect_encodings(const char *, size_t) const noexcept override {
4877 simdutf_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override {
4888 simdutf_warn_unused result validate_utf8_with_errors(const char *, size_t) const noexcept final override {
4892 simdutf_warn_unused bool validate_ascii(const char *, size_t) const noexcept final override {
4896 simdutf_warn_unused result validate_ascii_with_errors(const char *, size_t) const noexcept final override {
4900 simdutf_warn_unused bool validate_utf16le(const char16_t*, size_t) const noexcept final override {
4904 simdutf_warn_unused bool validate_utf16be(const char16_t*, size_t) const noexcept final override {
4908 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t*, size_t) const noexcept final override {
4912 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t*, size_t) const noexcept final override {
4916 simdutf_warn_unused bool validate_utf32(const char32_t*, size_t) const noexcept final override {
4920 simdutf_warn_unused result validate_utf32_with_errors(const char32_t*, size_t) const noexcept final override {
4924 simdutf_warn_unused size_t convert_latin1_to_utf8(const char*, size_t, char*) const noexcept final override {
4928 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char*, size_t, char16_t*) const noexcept final override {
4932 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char*, size_t, char16_t*) const noexcept final override {
4936 simdutf_warn_unused size_t convert_latin1_to_utf32(const char*, size_t, char32_t*) const noexcept final override {
4940 simdutf_warn_unused size_t convert_utf8_to_latin1(const char*, size_t, char*) const noexcept final override {
4944 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char*, size_t, char*) const noexcept final override {
4948 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char*, size_t, char*) const noexcept final override {
4952 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char*, size_t, char16_t*) const noexcept final override {
4956 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char*, size_t, char16_t*) const noexcept final override {
4960 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char*, size_t, char16_t*) const noexcept final override {
4964 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char*, size_t, char16_t*) const noexcept final override {
4968 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char*, size_t, char16_t*) const noexcept final override {
4972 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char*, size_t, char16_t*) const noexcept final override {
4976 simdutf_warn_unused size_t convert_utf8_to_utf32(const char*, size_t, char32_t*) const noexcept final override {
4980 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char*, size_t, char32_t*) const noexcept final override {
4984 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char*, size_t, char32_t*) const noexcept final override {
4988 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
4992 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
4996 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t*, size_t, char*) const noexcept final override {
5000 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t*, size_t, char*) const noexcept final override {
5004 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
5008 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
5012 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t*, size_t, char*) const noexcept final override {
5016 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t*, size_t, char*) const noexcept final override {
5020 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t*, size_t, char*) const noexcept final override {
5024 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t*, size_t, char*) const noexcept final override {
5028 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t*, size_t, char*) const noexcept final override {
5032 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t*, size_t, char*) const noexcept final override {
5036 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *, size_t, char* ) const noexcept final override {
5040 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *, size_t, char* ) const noexcept final override {
5044 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *, size_t, char* ) const noexcept final override {
5048 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t*, size_t, char*) const noexcept final override {
5052 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t*, size_t, char*) const noexcept final override {
5056 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t*, size_t, char*) const noexcept final override {
5060 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t*, size_t, char16_t*) const noexcept final override {
5064 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t*, size_t, char16_t*) const noexcept final override {
5068 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t*, size_t, char16_t*) const noexcept final override {
5072 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t*, size_t, char16_t*) const noexcept final override {
5076 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t*, size_t, char16_t*) const noexcept final override {
5080 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t*, size_t, char16_t*) const noexcept final override {
5084 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t*, size_t, char32_t*) const noexcept final override {
5088 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t*, size_t, char32_t*) const noexcept final override {
5092 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t*, size_t, char32_t*) const noexcept final override {
5096 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t*, size_t, char32_t*) const noexcept final override {
5100 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t*, size_t, char32_t*) const noexcept final override {
5104 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t*, size_t, char32_t*) const noexcept final override {
5108 void change_endianness_utf16(const char16_t *, size_t, char16_t *) const noexcept final override {
5112 simdutf_warn_unused size_t count_utf16le(const char16_t *, size_t) const noexcept final override {
5116 simdutf_warn_unused size_t count_utf16be(const char16_t *, size_t) const noexcept final override {
5120 simdutf_warn_unused size_t count_utf8(const char *, size_t) const noexcept final override {
5124 simdutf_warn_unused size_t latin1_length_from_utf8(const char *, size_t) const noexcept override {
5128 simdutf_warn_unused size_t latin1_length_from_utf16(size_t) const noexcept override {
5132 simdutf_warn_unused size_t latin1_length_from_utf32(size_t) const noexcept override {
5135 simdutf_warn_unused size_t utf8_length_from_latin1(const char *, size_t) const noexcept override {
5139 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override {
5143 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override {
5147 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override {
5151 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override {
5155 simdutf_warn_unused size_t utf32_length_from_latin1(size_t) const noexcept override {
5159 simdutf_warn_unused size_t utf16_length_from_utf8(const char *, size_t) const noexcept override {
5162 simdutf_warn_unused size_t utf16_length_from_latin1(size_t) const noexcept override {
5165 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *, size_t) const noexcept override {
5169 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *, size_t) const noexcept override {
5173 simdutf_warn_unused size_t utf32_length_from_utf8(const char *, size_t) const noexcept override {
5180 const unsupported_implementation unsupported_singleton{};
5182 size_t available_implementation_list::size() const noexcept {
5185 const implementation * const *available_implementation_list::begin() const noexcept {
5188 const implementation * const *available_implementation_list::end() const noexcept {
5191 const implementation *available_implementation_list::detect_best_supported() const noexcept {
5194 for (const implementation *impl : internal::get_available_implementation_pointers()) {
5201 const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
5226 SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() {
5227 static const internal::available_implementation_list available_implementations{};
5234 SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
5235 static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton;
5236 static internal::atomic_ptr<const implementation> active_implementation{&detect_best_supported_implementation_on_first_use_singleton};
5240 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
5243 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept {
5246 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept {
5249 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept {
5252 simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept {
5259 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) noexcept {
5262 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) noexcept {
5265 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) noexcept{
5268 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) noexcept {
5271 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
5274 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) noexcept {
5277 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
5280 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept {
5283 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept {
5286 simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept {
5293 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept {
5296 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept {
5299 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept {
5302 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept {
5305 simdutf_warn_unused bool validate_utf16(const char16_t * buf, size_t len) noexcept {
5312 simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) noexcept {
5315 simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) noexcept {
5318 simdutf_warn_unused result validate_utf16_with_errors(const char16_t * buf, size_t len) noexcept {
5325 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) noexcept {
5328 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t * buf, size_t len) noexcept {
5331 simdutf_warn_unused bool validate_utf32(const char32_t * buf, size_t len) noexcept {
5334 simdutf_warn_unused result validate_utf32_with_errors(const char32_t * buf, size_t len) noexcept {
5337 simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept {
5344 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) noexcept {
5347 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept {
5350 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept {
5353 simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5360 simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5367 simdutf_warn_unused size_t convert_latin1_to_utf16(const char * buf, size_t len, char16_t* utf16_output) noexcept {
5374 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5377 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5380 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5383 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5386 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5389 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5392 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5395 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5398 simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5405 simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5412 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5415 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5418 simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5425 simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5432 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5435 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5438 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5441 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5444 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5447 simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5454 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_output) noexcept {
5457 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5460 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5463 simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5470 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5473 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5476 simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5483 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5486 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5489 simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5496 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5499 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5502 simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5509 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5512 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5515 simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5522 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5525 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5528 void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept {
5531 simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept {
5538 simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept {
5541 simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept {
5544 simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept {
5547 simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) noexcept {
5556 simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) noexcept {
5559 simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept {
5566 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept {
5569 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept {
5572 simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept {
5579 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept {
5582 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept {
5585 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept {
5591 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept {
5594 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept {
5597 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept {
5600 simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * buf, size_t length) noexcept {
5603 simdutf_warn_unused int detect_encodings(const char * buf, size_t length) noexcept {
5606 const implementation * builtin_implementation() {
5607 static const implementation * builtin_impl = get_available_implementations()[SIMDUTF_STRINGIFY(SIMDUTF_BUILTIN_IMPLEMENTATION)];
5611 simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) {
5615 simdutf_warn_unused size_t trim_partial_utf16be(const char16_t* input, size_t length) {
5619 simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t length) {
5623 simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length) {
5659 encoding_type check_bom(const uint8_t* byte, size_t length) {
5676 encoding_type check_bom(const char* byte, size_t length) {
5677 return check_bom(reinterpret_cast<const uint8_t*>(byte), length);
5727 const uint8_t shufutf8[209][16] =
5940 const uint8_t utf8bigindex[4096][2] =
10055 const uint8_t pack_1_2_utf8_bytes[256][17] = {
10315 const uint8_t pack_1_2_3_utf8_bytes[256][17] = {
10594 inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
10595 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10614 inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
10615 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10653 inline simdutf_warn_unused bool validate(const char32_t *buf, size_t len) noexcept {
10654 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10665 inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, size_t len) noexcept {
10666 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10680 inline size_t utf8_length_from_utf32(const char32_t* buf, size_t len) {
10682 const uint32_t * p = reinterpret_cast<const uint32_t *>(buf);
10694 inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) {
10696 const uint32_t * p = reinterpret_cast<const uint32_t *>(buf);
10733 inline size_t utf8_length_from_latin1(const char *buf, size_t len) {
10734 const uint8_t * c = reinterpret_cast<const uint8_t *>(buf);
10765 inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output) {
10766 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10829 inline size_t convert(const char32_t* buf, size_t len, char* utf8_output) {
10830 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10878 inline result convert_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
10879 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10945 inline size_t convert_valid(const char32_t* buf, size_t len, char16_t* utf16_output) {
10946 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10989 inline size_t convert(const char32_t* buf, size_t len, char16_t* utf16_output) {
10990 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
11018 inline result convert_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
11019 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
11064 inline size_t convert_valid(const char16_t* buf, size_t len, char* utf8_output) {
11065 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11138 inline size_t convert(const char16_t* buf, size_t len, char* utf8_output) {
11139 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11197 inline result convert_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
11198 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11273 inline size_t convert_valid(const char16_t* buf, size_t len, char32_t* utf32_output) {
11274 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11314 inline size_t convert(const char16_t* buf, size_t len, char32_t* utf32_output) {
11315 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11341 inline result convert_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
11342 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11385 inline size_t convert_valid(const char* buf, size_t len, char16_t* utf16_output) {
11386 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11469 inline size_t convert(const char* buf, size_t len, char16_t* utf16_output) {
11470 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11559 inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) {
11560 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11662 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) {
11717 inline size_t convert_valid(const char* buf, size_t len, char32_t* utf32_output) {
11718 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11782 inline size_t convert(const char* buf, size_t len, char32_t* utf32_output) {
11783 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11854 inline result convert_with_errors(const char* buf, size_t len, char32_t* utf32_output) {
11855 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11940 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char32_t* utf32_output) {
11994 inline size_t convert(const char* buf, size_t len, char* utf8_output) {
11995 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12048 inline size_t convert(const char* buf, size_t len, char16_t* utf16_output) {
12049 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12063 inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) {
12064 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12094 inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) {
12095 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12121 inline size_t convert(const char* buf, size_t len, char* latin_output) {
12122 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12170 inline result convert_with_errors(const char* buf, size_t len, char* latin_output) {
12171 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12234 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char* latin1_output) {
12292 inline size_t convert(const char16_t* buf, size_t len, char* latin_output) {
12293 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12315 inline result convert_with_errors(const char16_t* buf, size_t len, char* latin_output) {
12316 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12369 inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) {
12370 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12386 inline result convert_with_errors(const char32_t *buf, size_t len, char *latin1_output) {
12387 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12427 inline size_t convert_valid(const char* buf, size_t len, char* latin_output) {
12428 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12490 inline size_t convert_valid(const char16_t* buf, size_t len, char* latin_output) {
12491 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12521 inline size_t convert_valid(const char32_t *buf, size_t len, char *latin1_output) {
12522 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12576 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
12581 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12593 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12604 const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10);
12606 const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10};
12654 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]));
12676 int arm_detect_encodings(const char * buf, size_t len) {
12677 const char* start = buf;
12678 const char* end = buf + len;
12686 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12687 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12694 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t*>(buf));
12695 uint16x8_t secondin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + simd16<uint16_t>::SIZE / sizeof(char16_t));
12696 uint16x8_t thirdin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 2*simd16<uint16_t>::SIZE / sizeof(char16_t));
12697 uint16x8_t fourthin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 3*simd16<uint16_t>::SIZE / sizeof(char16_t));
12699 const auto u0 = simd16<uint16_t>(in);
12700 const auto u1 = simd16<uint16_t>(secondin);
12701 const auto u2 = simd16<uint16_t>(thirdin);
12702 const auto u3 = simd16<uint16_t>(fourthin);
12704 const auto v0 = u0.shr<8>();
12705 const auto v1 = u1.shr<8>();
12706 const auto v2 = u2.shr<8>();
12707 const auto v3 = u3.shr<8>();
12709 const auto in16 = simd16<uint16_t>::pack(v0, v1);
12710 const auto nextin16 = simd16<uint16_t>::pack(v2, v3);
12712 const uint64_t surrogates_wordmask0 = ((in16 & v_f8) == v_d8).to_bitmask64();
12713 const uint64_t surrogates_wordmask1 = ((nextin16 & v_f8) == v_d8).to_bitmask64();
12729 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
12730 const char16_t* end16 = reinterpret_cast<const char16_t*>(start) + len/2;
12732 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12733 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12735 const uint64_t V0 = ~surrogates_wordmask0;
12737 const auto vH0 = ((in16 & v_fc) == v_dc);
12738 const uint64_t H0 = vH0.to_bitmask64();
12740 const uint64_t L0 = ~H0 & surrogates_wordmask0;
12742 const uint64_t a0 = L0 & (H0 >> 4);
12744 const uint64_t b0 = a0 << 4;
12746 const uint64_t c0 = V0 | a0 | b0;
12757 const auto in0 = simd16<uint16_t>(input);
12758 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
12759 const auto t0 = in0.shr<8>();
12760 const auto t1 = in1.shr<8>();
12761 const simd8<uint8_t> in_16 = simd16<uint16_t>::pack(t0, t1);
12763 const uint64_t surrogates_wordmask = ((in_16 & v_f8) == v_d8).to_bitmask64();
12767 const uint64_t V = ~surrogates_wordmask;
12769 const auto vH = ((in_16 & v_fc) == v_dc);
12770 const uint64_t H = vH.to_bitmask64();
12772 const uint64_t L = ~H & surrogates_wordmask;
12774 const uint64_t a = L & (H >> 4);
12776 const uint64_t b = a << 4;
12778 const uint64_t c = V | a | b;
12793 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
12794 const char32_t* end32 = reinterpret_cast<const char32_t*>(start) + len/4;
12798 const uint32x4_t offset = vmovq_n_u32(0xffff2000);
12799 const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
12801 const uint32x4_t in32 = vreinterpretq_u32_u16(in);
12802 const uint32x4_t secondin32 = vreinterpretq_u32_u16(secondin);
12803 const uint32x4_t thirdin32 = vreinterpretq_u32_u16(thirdin);
12804 const uint32x4_t fourthin32 = vreinterpretq_u32_u16(fourthin);
12817 const uint32x4_t in_32 = vld1q_u32(reinterpret_cast<const uint32_t*>(input));
12864 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
12869 const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
12871 if (vmaxvq_u32(is_zero) == 0 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
12882 const char16_t* arm_validate_utf16(const char16_t* input, size_t size) {
12883 const char16_t* end = input + size;
12884 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12885 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12886 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12887 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12898 const auto t0 = in0.shr<8>();
12899 const auto t1 = in1.shr<8>();
12900 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
12902 const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
12914 const uint64_t V = ~surrogates_wordmask;
12917 const auto vH = ((in & v_fc) == v_dc);
12918 const uint64_t H = vH.to_bitmask64();
12922 const uint64_t L = ~H & surrogates_wordmask;
12924 const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
12927 const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
12929 const uint64_t c = V | a | b; // Combine all the masks into the final one.
12950 const result arm_validate_utf16_with_errors(const char16_t* input, size_t size) {
12951 const char16_t* start = input;
12952 const char16_t* end = input + size;
12954 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12955 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12956 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12957 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12969 const auto t0 = in0.shr<8>();
12970 const auto t1 = in1.shr<8>();
12971 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
12973 const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
12985 const uint64_t V = ~surrogates_wordmask;
12988 const auto vH = ((in & v_fc) == v_dc);
12989 const uint64_t H = vH.to_bitmask64();
12993 const uint64_t L = ~H & surrogates_wordmask;
12995 const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
12998 const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
13000 const uint64_t c = V | a | b; // Combine all the masks into the final one.
13021 const char32_t* arm_validate_utf32le(const char32_t* input, size_t size) {
13022 const char32_t* end = input + size;
13024 const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
13025 const uint32x4_t offset = vmovq_n_u32(0xffff2000);
13026 const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
13031 const uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t*>(input));
13051 const result arm_validate_utf32le_with_errors(const char32_t* input, size_t size) {
13052 const char32_t* start = input;
13053 const char32_t* end = input + size;
13055 const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
13056 const uint32x4_t offset = vmovq_n_u32(0xffff2000);
13057 const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
13062 const uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t*>(input));
13088 std::pair<const char *, char *>
13089 arm_convert_latin1_to_utf8(const char *latin1_input, size_t len,
13092 const char *end = latin1_input + len;
13093 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13097 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(latin1_input));
13111 const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
13112 const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
13115 const uint16x8_t t0 = vshlq_n_u16(in16, 2);
13117 const uint16x8_t t1 = vandq_u16(t0, v_1f00);
13119 const uint16x8_t t2 = vandq_u16(in16, v_003f);
13121 const uint16x8_t t3 = vorrq_u16(t1, t2);
13123 const uint16x8_t t4 = vorrq_u16(t3, v_c080);
13125 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13126 const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f);
13127 const uint8x16_t utf8_unpacked =
13131 const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004, 0x0010, 0x0040,
13134 const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040,
13139 const uint8_t *row =
13141 const uint8x16_t shuffle = vld1q_u8(row + 1);
13142 const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
13157 std::pair<const char*, char16_t*> arm_convert_latin1_to_utf16(const char* buf, size_t len, char16_t* utf16_output) {
13158 const char* end = buf + len;
13161 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13176 std::pair<const char*, char32_t*> arm_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
13177 const char* end = buf + len;
13180 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13206 size_t convert_masked_utf8_to_utf16(const char *input,
13213 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13214 const uint16_t input_utf8_end_of_code_point_mask =
13261 const uint8_t idx =
13264 const uint8_t consumed =
13282 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13377 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13440 const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : 0x00800000;
13465 size_t convert_masked_utf8_to_utf32(const char *input,
13473 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13474 const uint16_t input_utf8_end_of_code_point_mask =
13515 const uint8_t idx =
13517 const uint8_t consumed =
13533 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13595 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13641 size_t convert_masked_utf8_to_latin1(const char *input,
13648 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13649 const uint16_t input_utf8_end_of_code_point_mask =
13665 const uint8_t idx =
13668 const uint8_t consumed =
13681 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13709 std::pair<const char16_t*, char*> arm_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
13710 const char16_t* end = buf + len;
13712 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13730 std::pair<result, char*> arm_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
13731 const char16_t* start = buf;
13732 const char16_t* end = buf + len;
13734 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13813 std::pair<const char16_t*, char*> arm_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_out) {
13815 const char16_t* end = buf + len;
13817 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
13818 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
13819 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13820 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
13822 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13826 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
13855 const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
13856 const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
13859 const uint16x8_t t0 = vshlq_n_u16(in, 2);
13861 const uint16x8_t t1 = vandq_u16(t0, v_1f00);
13863 const uint16x8_t t2 = vandq_u16(in, v_003f);
13865 const uint16x8_t t3 = vorrq_u16(t1, t2);
13867 const uint16x8_t t4 = vorrq_u16(t3, v_c080);
13869 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13870 const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
13871 const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
13874 const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
13879 const uint16x8_t mask = { 0x0001, 0x0004,
13886 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
13887 const uint8x16_t shuffle = vld1q_u8(row + 1);
13888 const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
13899 const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
13905 const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
13908 const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
13937 const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
13939 const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
13941 const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
13944 const uint16x8_t s0 = vshrq_n_u16(in, 12);
13946 const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
13948 const uint16x8_t s1s = vshlq_n_u16(s1, 2);
13950 const uint16x8_t s2 = vorrq_u16(s0, s1s);
13952 const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
13953 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
13954 const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
13955 const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
13956 const uint16x8_t s4 = veorq_u16(s3, m0);
13960 const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
13961 const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
13964 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13965 const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
13967 const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
13971 const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
13976 const uint16x8_t onemask = { 0x0001, 0x0004,
13980 const uint16x8_t twomask = { 0x0002, 0x0008,
13985 const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
13986 const uint16_t mask = vaddvq_u16(combined);
13990 const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
13991 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
13992 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
14000 const uint8_t mask0 = uint8_t(mask);
14002 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14003 const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
14004 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
14006 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14007 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14008 const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
14009 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
14065 std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_out) {
14067 const char16_t* start = buf;
14068 const char16_t* end = buf + len;
14070 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14071 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14072 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14073 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
14076 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14080 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
14109 const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14110 const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14113 const uint16x8_t t0 = vshlq_n_u16(in, 2);
14115 const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14117 const uint16x8_t t2 = vandq_u16(in, v_003f);
14119 const uint16x8_t t3 = vorrq_u16(t1, t2);
14121 const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14123 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14124 const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
14125 const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
14128 const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14133 const uint16x8_t mask = { 0x0001, 0x0004,
14140 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14141 const uint8x16_t shuffle = vld1q_u8(row + 1);
14142 const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
14153 const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14159 const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14162 const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14191 const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
14193 const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14195 const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14198 const uint16x8_t s0 = vshrq_n_u16(in, 12);
14200 const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
14202 const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14204 const uint16x8_t s2 = vorrq_u16(s0, s1s);
14206 const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14207 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14208 const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
14209 const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14210 const uint16x8_t s4 = veorq_u16(s3, m0);
14214 const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
14215 const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
14218 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14219 const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
14221 const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14225 const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14230 const uint16x8_t onemask = { 0x0001, 0x0004,
14234 const uint16x8_t twomask = { 0x0002, 0x0008,
14239 const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14240 const uint16_t mask = vaddvq_u16(combined);
14244 const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
14245 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
14246 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
14254 const uint8_t mask0 = uint8_t(mask);
14256 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14257 const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
14258 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
14260 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14261 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14262 const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
14263 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
14365 std::pair<const char16_t*, char32_t*> arm_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_out) {
14367 const char16_t* end = buf + len;
14369 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14370 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14373 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14376 const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14422 std::pair<result, char32_t*> arm_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_out) {
14424 const char16_t* start = buf;
14425 const char16_t* end = buf + len;
14427 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14428 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14431 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14434 const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14474 std::pair<const char32_t*, char*> arm_convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) {
14475 const char32_t* end = buf + len;
14477 uint32x4_t in1 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14478 uint32x4_t in2 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14497 std::pair<result, char*> arm_convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) {
14498 const char32_t* start = buf;
14499 const char32_t* end = buf + len;
14502 uint32x4_t in1 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14503 uint32x4_t in2 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14531 std::pair<const char32_t*, char*> arm_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_out) {
14533 const char32_t* end = buf + len;
14535 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14540 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14541 uint32x4_t nextin = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14564 const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14565 const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14568 const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
14570 const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14572 const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
14574 const uint16x8_t t3 = vorrq_u16(t1, t2);
14576 const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14578 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14579 const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14580 const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
14583 const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14588 const uint16x8_t mask = { 0x0001, 0x0004,
14595 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14596 const uint8x16_t shuffle = vld1q_u8(row + 1);
14597 const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
14608 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14609 const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14613 const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14616 const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14645 const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
14647 const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14649 const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14652 const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
14654 const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
14656 const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14658 const uint16x8_t s2 = vorrq_u16(s0, s1s);
14660 const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14661 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14662 const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
14663 const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14664 const uint16x8_t s4 = veorq_u16(s3, m0);
14668 const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
14669 const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
14672 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14673 const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14675 const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14679 const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14684 const uint16x8_t onemask = { 0x0001, 0x0004,
14688 const uint16x8_t twomask = { 0x0002, 0x0008,
14693 const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14694 const uint16_t mask = vaddvq_u16(combined);
14698 const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
14699 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
14700 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
14708 const uint8_t mask0 = uint8_t(mask);
14709 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14710 const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
14711 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
14713 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14714 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14715 const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
14716 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
14765 std::pair<result, char*> arm_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_out) {
14767 const char32_t* start = buf;
14768 const char32_t* end = buf + len;
14770 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14773 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14774 uint32x4_t nextin = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14797 const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14798 const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14801 const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
14803 const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14805 const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
14807 const uint16x8_t t3 = vorrq_u16(t1, t2);
14809 const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14811 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14812 const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14813 const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
14816 const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14821 const uint16x8_t mask = { 0x0001, 0x0004,
14828 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14829 const uint8x16_t shuffle = vld1q_u8(row + 1);
14830 const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
14843 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14844 const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14845 const uint16x8_t forbidden_bytemask = vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800));
14851 const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14854 const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14883 const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
14885 const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14887 const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14890 const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
14892 const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
14894 const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14896 const uint16x8_t s2 = vorrq_u16(s0, s1s);
14898 const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14899 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14900 const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
14901 const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14902 const uint16x8_t s4 = veorq_u16(s3, m0);
14906 const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
14907 const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
14910 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14911 const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14913 const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14917 const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14922 const uint16x8_t onemask = { 0x0001, 0x0004,
14926 const uint16x8_t twomask = { 0x0002, 0x0008,
14931 const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14932 const uint16_t mask = vaddvq_u16(combined);
14936 const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
14937 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
14938 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
14946 const uint8_t mask0 = uint8_t(mask);
14948 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14949 const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
14950 const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
14952 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14953 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14954 const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
14955 const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
15001 std::pair<const char32_t*, char16_t*> arm_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_out) {
15003 const char32_t* end = buf + len;
15008 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
15014 const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
15015 const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
15060 std::pair<result, char16_t*> arm_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_out) {
15062 const char32_t* start = buf;
15063 const char32_t* end = buf + len;
15066 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
15072 const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
15073 const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
15074 const uint16x4_t forbidden_bytemask = vand_u16(vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800));
15126 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
15128 simdutf_really_inline bool has_full_block() const;
15129 simdutf_really_inline const uint8_t *full_block() const;
15139 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
15142 const uint8_t *buf;
15143 const size_t len;
15144 const size_t lenminusstep;
15149 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
15159 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
15179 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
15185 simdutf_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
15190 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
15195 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
15219 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15226 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15228 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15229 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15230 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15231 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15232 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15233 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15241 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15245 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15247 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15262 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
15263 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15291 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15309 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15310 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15322 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
15325 static const uint8_t max_array[32] = {
15331 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
15346 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15363 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
15386 simdutf_really_inline bool errors() const {
15409 bool generic_validate_utf8(const uint8_t * input, size_t length) {
15426 bool generic_validate_utf8(const char * input, size_t length) {
15427 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15434 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
15443 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
15458 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
15466 result generic_validate_utf8_with_errors(const char * input, size_t length) {
15467 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15471 bool generic_validate_ascii(const uint8_t * input, size_t length) {
15487 bool generic_validate_ascii(const char * input, size_t length) {
15488 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15492 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
15498 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
15509 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
15516 result generic_validate_ascii_with_errors(const char * input, size_t length) {
15517 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15537 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
15542 const size_t safety_margin = 16; // to avoid overruns!
15546 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15610 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15617 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15619 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15620 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15621 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15622 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15623 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15624 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15632 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15636 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15638 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15653 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
15654 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15682 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15700 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15701 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15718 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15728 simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
15743 const size_t safety_margin = size - margin + 1; // to avoid overruns!
15745 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15803 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
15818 const size_t safety_margin = size - margin + 1; // to avoid overruns!
15820 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15896 simdutf_really_inline bool errors() const {
15917 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
15921 const size_t safety_margin = 16; // to avoid overruns!
15923 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15962 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15969 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15971 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15972 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15973 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15974 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15975 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15976 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15984 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15988 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15990 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16005 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
16006 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16034 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16052 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
16053 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
16070 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16080 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
16095 const size_t safety_margin = size - margin + 1; // to avoid overruns!
16097 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16154 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
16169 const size_t safety_margin = size - margin + 1; // to avoid overruns!
16171 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16241 simdutf_really_inline bool errors() const {
16261 simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
16265 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16272 simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
16277 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16299 simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
16303 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16312 simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
16317 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16333 simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
16337 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
16341 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16367 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
16377 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
16379 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
16380 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
16381 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
16382 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
16383 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
16384 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
16392 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
16396 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
16397 constexpr const uint8_t FORBIDDEN = 0xff;
16399 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16414 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
16415 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16443 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16470 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16478 simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
16493 const size_t safety_margin = size - margin + 1; // to avoid overruns!
16495 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16552 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
16567 const size_t safety_margin = size - margin + 1; // to avoid overruns!
16569 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16645 simdutf_really_inline bool errors() const {
16665 simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
16680 const size_t safety_margin = size - margin + 1; // to avoid overruns!
16682 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16740 simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
16755 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
16759 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
16763 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
16767 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
16771 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
16772 const char16_t* tail = arm_validate_utf16<endianness::LITTLE>(buf, len);
16780 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
16781 const char16_t* tail = arm_validate_utf16<endianness::BIG>(buf, len);
16789 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
16799 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
16809 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
16810 const char32_t* tail = arm_validate_utf32le(buf, len);
16818 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
16828 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
16829 std::pair<const char*, char*> ret = arm_convert_latin1_to_utf8(buf, len, utf8_output);
16833 const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
16840 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16841 std::pair<const char*, char16_t*> ret = arm_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
16844 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::LITTLE>(
16851 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16852 std::pair<const char*, char16_t*> ret = arm_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
16855 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::BIG>(
16862 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16863 std::pair<const char*, char32_t*> ret = arm_convert_latin1_to_utf32(buf, len, utf32_output);
16866 const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
16873 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
16878 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
16883 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
16887 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16892 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16897 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16902 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16907 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* input, size_t size,
16908 char16_t* utf16_output) const noexcept {
16912 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* input, size_t size,
16913 char16_t* utf16_output) const noexcept {
16917 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16922 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16927 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
16928 char32_t* utf32_output) const noexcept {
16932 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16933 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
16938 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::LITTLE>(
16946 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16947 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
16952 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::BIG>(
16960 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16977 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16994 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16999 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17004 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17005 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
17009 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::LITTLE>(
17017 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17018 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
17022 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::BIG>(
17030 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17048 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17066 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17070 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17074 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17075 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_utf8(buf, len, utf8_output);
17079 const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
17087 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17104 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17105 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
17109 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::LITTLE>(
17117 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17118 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
17122 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::BIG>(
17130 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17148 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17166 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17167 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_latin1(buf, len, latin1_output);
17172 const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
17180 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17197 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17198 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_latin1(buf, len, latin1_output);
17203 const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid(
17210 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17215 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17216 std::pair<const char32_t*, char16_t*> ret = arm_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
17220 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::LITTLE>(
17228 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17229 std::pair<const char32_t*, char16_t*> ret = arm_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
17233 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::BIG>(
17241 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17258 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17275 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17279 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17283 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17287 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17291 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
17295 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
17299 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
17303 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
17307 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
17311 simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
17315 simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
17319 simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
17322 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
17324 const int lanes = sizeof(uint8x16_t);
17326 const uint8_t *simd_end = data + (length / lanes) * lanes;
17327 const uint8x16_t threshold = vdupq_n_u8(0x80);
17336 return result + (length / lanes) * lanes + scalar::latin1::utf8_length_from_latin1((const char*)simd_end, rem);
17339 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
17343 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
17348 simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
17353 simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
17359 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
17363 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
17367 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
17371 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
17372 const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f);
17373 const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff);
17374 const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
17375 const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
17379 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17380 const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f);
17381 const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff);
17382 const uint32x4_t two_bytes_bytemask = veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask);
17383 const uint32x4_t three_bytes_bytemask = veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask);
17385 const uint16x8_t reduced_ascii_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1));
17386 const uint16x8_t reduced_two_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1));
17387 const uint16x8_t reduced_three_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1));
17389 const uint16x8_t compressed_bytemask0 = vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask);
17390 const uint16x8_t compressed_bytemask1 = vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask);
17401 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
17402 const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
17403 const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
17407 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17408 const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff);
17409 const uint16x8_t reduced_bytemask = vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1));
17410 const uint16x8_t compressed_bytemask = vpaddq_u16(reduced_bytemask, reduced_bytemask);
17417 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
17446 simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
17453 if(validate_utf16le(reinterpret_cast<const char16_t*>(input), length/2)) { out |= encoding_type::UTF16_LE; }
17456 if(validate_utf32(reinterpret_cast<const char32_t*>(input), length/4)) { out |= encoding_type::UTF32_LE; }
17462 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
17466 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
17470 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
17474 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
17478 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
17482 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
17486 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
17490 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
17494 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
17498 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
17502 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
17506 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17510 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17514 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept {
17518 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
17522 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
17526 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
17530 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17534 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17538 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17542 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17546 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17550 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17554 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17558 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17562 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
17563 char32_t* utf32_output) const noexcept {
17567 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17571 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17575 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17579 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17583 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17587 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17591 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17595 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17599 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17603 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17607 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17611 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17615 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17619 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17623 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17627 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17631 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17635 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17639 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17643 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17647 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17651 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17655 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17659 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17663 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17667 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17671 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17675 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17679 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17683 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17687 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
17691 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
17695 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
17699 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
17703 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
17707 simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
17711 simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
17715 simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
17719 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
17723 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
17727 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
17731 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
17735 simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
17739 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
17743 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
17747 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
17751 simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
17755 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
17794 using utf8_to_utf16_result = std::pair<const char*, char16_t*>;
17795 using utf8_to_utf32_result = std::pair<const char*, uint32_t*>;
17809 simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) {
18127 simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, __m512i utf32, unsigned int count, char16_t* output) {
18129 const __mmask16 valid = uint16_t((1 << count) - 1);
18131 const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff);
18132 const __mmask16 sp_mask = _mm512_mask_cmpgt_epu32_mask(valid, utf32, v_0000_ffff);
18148 const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000);
18149 const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000);
18152 const __m512i t1 = _mm512_slli_epi32(t0, 6);
18156 const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000);
18157 const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4);
18161 const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00);
18162 const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00);
18163 const __m512i t3 = _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba);
18164 const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3);
18169 const __mmask32 nonzero = _kor_mask32(0xaaaaaaaa,_mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512()));
18170 const __mmask32 nonzero_masked = _kand_mask32(nonzero, __mmask32((uint64_t(1) << (2*count)) - 1));
18202 simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, __m512i utf32, unsigned int count, char16_t* output) {
18204 const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff);
18205 const __mmask16 sp_mask = _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff);
18221 const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000);
18222 const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000);
18225 const __m512i t1 = _mm512_slli_epi32(t0, 6);
18229 const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000);
18230 const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4);
18234 const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00);
18235 const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00);
18236 const __m512i t3 = _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba);
18237 const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3);
18239 const __mmask32 nonzero = _kor_mask32(0xaaaaaaaa,_mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512()));
18256 const __m512i movemask = _mm512_setr_epi32(28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11);
18257 const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous);
18286 __m512i rotate_by_N_epi8(const __m512i input) {
18289 const __m512i permuted = _mm512_shuffle_i32x4(input, input, 0x39);
18330 const __m512i v_3f3f_3f7f = _mm512_set1_epi32(0x3f3f3f7f);
18339 const __m512i v_0140_0140 = _mm512_set1_epi32(0x01400140);
18348 const __m512i v_0001_1000 = _mm512_set1_epi32(0x00011000);
18383 const __m512i shift_left_v3 = _mm512_setr_epi64(
18394 const __m512i shift = _mm512_shuffle_epi8(shift_left_v3, char_class);
18405 const __m512i shift_right = _mm512_setr_epi64(
18416 const __m512i shift = _mm512_shuffle_epi8(shift_right, char_class);
18425 const __m512i merged = _mm512_mask_mov_epi32(lane0, 0x1000, lane1);
18426 const __m512i expand_ver2 = _mm512_setr_epi64(
18436 const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2);
18437 const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0);
18438 const __m512i t0 = _mm512_and_si512(input, v_0000_00c0);
18439 const __m512i v_0000_0080 = _mm512_set1_epi32(0x80);
18440 const __mmask16 leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080);
18448 const __m512i v_0000_000f = _mm512_set1_epi32(0x0f);
18449 const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000);
18499 const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \
18500 const __m512i expand_ver2 = _mm512_setr_epi64( \
18510 const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \
18513 const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \
18514 const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \
18515 const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \
18521 const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \
18522 const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \
18525 const int valid_count = static_cast<int>(count_ones(leading_bytes)); \
18526 const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \
18528 const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), leading_bytes, utf32); \
18532 const __mmask16 valid = uint16_t((1 << valid_count) - 1); \
18551 const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \
18569 const __m128i t0 = _mm512_castsi512_si128(utf8); \
18570 const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \
18571 const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \
18572 const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \
18578 const __m256i h0 = _mm512_castsi512_si256(utf8); \
18579 const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \
18610 std::pair<const char*, OUTPUT*> valid_utf8_to_fixed_length(const char* str, size_t len, OUTPUT* dwords) {
18626 const char* ptr = str;
18627 const char* end = ptr + len;
18637 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18638 const __m512i v_80 = _mm512_set1_epi8(char(0x80));
18639 const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80);
18647 const __m512i lane0 = broadcast_epi128<0>(utf8);
18648 const __m512i lane1 = broadcast_epi128<1>(utf8);
18651 const __m512i lane2 = broadcast_epi128<2>(utf8);
18665 const __m512i lane3 = broadcast_epi128<3>(utf8);
18670 const __m512i lane4 = _mm512_set1_epi32(tmp1);
18688 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18689 const __m512i v_80 = _mm512_set1_epi8(char(0x80));
18690 const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80);
18696 const __m512i lane0 = broadcast_epi128<0>(utf8);
18697 const __m512i lane1 = broadcast_epi128<1>(utf8);
18700 const __m512i lane2 = broadcast_epi128<2>(utf8);
18715 const __m512i lane3 = broadcast_epi128<3>(utf8);
18725 using utf8_to_utf16_result = std::pair<const char*, char16_t*>;
18731 simdutf_really_inline __m512i check_special_cases(__m512i input, const __m512i prev1) {
18741 const __m512i v_0f = _mm512_set1_epi8(0x0f);
18772 simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input,
18773 const __m512i prev_input, const __m512i sc) {
18779 const __m512i v_7f = _mm512_set1_epi8(char(0x7f));
18782 const __m512i v_80 = _mm512_set1_epi8(char(0x80));
18791 simdutf_really_inline __m512i is_incomplete(const __m512i input) {
18818 simdutf_really_inline void check_utf8_bytes(const __m512i input, const __m512i prev_input) {
18836 simdutf_really_inline bool check_next_input(const __m512i input) {
18837 const __m512i v_80 = _mm512_set1_epi8(char(0x80));
18838 const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80);
18850 simdutf_really_inline bool errors() const {
18869 utf8_to_utf16_result fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) {
18870 const char *const final_in = in + len;
18884 simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, size_t len, char16_t *out) {
18885 const char *const init_in = in;
18886 const char16_t *const init_out = out;
18887 const char *const final_in = in + len;
18909 std::pair<const char*, OUTPUT*> validating_utf8_to_fixed_length(const char* str, size_t len, OUTPUT* dwords) {
18915 const char* ptr = str;
18916 const char* end = ptr + len;
18936 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18943 const __m512i lane0 = broadcast_epi128<0>(utf8);
18944 const __m512i lane1 = broadcast_epi128<1>(utf8);
18947 const __m512i lane2 = broadcast_epi128<2>(utf8);
18961 const __m512i lane3 = broadcast_epi128<3>(utf8);
18966 const __m512i lane4 = _mm512_set1_epi32(tmp1);
18982 const char* validatedptr = ptr; // validated up to ptr
18987 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18993 const __m512i lane0 = broadcast_epi128<0>(utf8);
18994 const __m512i lane1 = broadcast_epi128<1>(utf8);
18997 const __m512i lane2 = broadcast_epi128<2>(utf8);
19012 const __m512i lane3 = broadcast_epi128<3>(utf8);
19020 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
19032 std::tuple<const char*, OUTPUT*, bool> validating_utf8_to_fixed_length_with_constant_checks(const char* str, size_t len, OUTPUT* dwords) {
19038 const char* ptr = str;
19039 const char* end = ptr + len;
19059 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
19069 const __m512i lane0 = broadcast_epi128<0>(utf8);
19070 const __m512i lane1 = broadcast_epi128<1>(utf8);
19073 const __m512i lane2 = broadcast_epi128<2>(utf8);
19087 const __m512i lane3 = broadcast_epi128<3>(utf8);
19092 const __m512i lane4 = _mm512_set1_epi32(tmp1);
19108 const char* validatedptr = ptr; // validated up to ptr
19113 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
19121 const __m512i lane0 = broadcast_epi128<0>(utf8);
19122 const __m512i lane1 = broadcast_epi128<1>(utf8);
19125 const __m512i lane2 = broadcast_epi128<2>(utf8);
19140 const __m512i lane3 = broadcast_epi128<3>(utf8);
19148 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
19165 simdutf_really_inline size_t process_block_from_utf8_to_latin1(const char *buf, size_t len,
19224 size_t utf8_to_latin1_avx512(const char *buf, size_t len, char *latin_output) {
19262 simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1(const char *buf, size_t len,
19299 size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len,
19329 size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len,
19331 const char16_t *end = buf + len;
19373 icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
19375 const char16_t *end = buf + len;
19376 const char16_t *start = buf;
19441 size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen,
19455 const char16_t * const inbuf_orig = inbuf;
19456 const unsigned char * const outbuf_orig = outbuf;
19468 const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask(
19484 const __mmask32 is12byte =
19490 const __m512i twobytes = _mm512_ternarylogic_epi32(
19495 const __m512i cmpmask =
19498 const __mmask64 smoosh = _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT);
19499 const __m512i out = _mm512_maskz_compress_epi8(smoosh, in);
19518 const __m512i fc00masked = _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00)));
19519 const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask(
19521 const __mmask32 losurr = _mm512_cmp_epu16_mask(
19531 const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16);
19546 const uint32_t h = _cvtmask32_u32(hisurr);
19547 const uint32_t l = _cvtmask32_u32(losurr);
19550 const uint32_t lonohi = l & ~(h + h + carry);
19551 const uint32_t hinolo = h & ~(l >> 1);
19570 const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask));
19571 const __mmask64 outmhi = _kshiftri_mask64(outmask, 16);
19573 const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte));
19574 const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16);
19575 const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16);
19600 const __mmask64 wantlo = _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT);
19601 const __mmask64 wanthi = _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT);
19602 const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo);
19603 const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi);
19604 const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo);
19605 const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi);
19638 std::tuple<const char16_t*, char32_t*, bool> convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
19639 const char16_t* end = buf + len;
19640 const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00);
19641 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
19642 const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00);
19644 const __m512i byteflip = _mm512_setr_epi64(
19660 const __mmask32 H = _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800);
19662 const __mmask32 L = _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00);
19666 const __mmask32 V = (L ^ (carry | (H << 1))); // A high surrogate must be followed by low one and a low one must be preceded by a high one.
19679 const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
19680 const __m512i second = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in,1));
19686 const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1);
19687 const __m512i shifted_second = _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1);
19692 const __m512i aligned_first = _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10);
19693 const __m512i aligned_second = _mm512_mask_slli_epi32(second, (__mmask16)(H>>16), second, 10);
19700 const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400);
19701 const __m512i added_first = _mm512_mask_add_epi32(aligned_first, (__mmask16)H, aligned_first, shifted_first);
19702 const __m512i utf32_first = _mm512_mask_add_epi32(added_first, (__mmask16)H, added_first, constant);
19704 const __m512i added_second = _mm512_mask_add_epi32(aligned_second, (__mmask16)(H>>16), aligned_second, shifted_second);
19705 const __m512i utf32_second = _mm512_mask_add_epi32(added_second, (__mmask16)(H>>16), added_second, constant);
19708 const __mmask32 valid = ~L & 0x7fffffff;
19711 const __m512i compressed_first = _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first);
19712 const size_t howmany1 = count_ones((uint16_t)(valid));
19715 const __m512i compressed_second = _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second);
19716 const size_t howmany2 = count_ones((uint16_t)(valid >> 16));
19743 size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len,
19745 const char32_t *end = buf + len;
19775 icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
19777 const char32_t *end = buf + len;
19778 const char32_t *start = buf;
19819 std::pair<const char32_t*, char*> avx512_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
19820 const char32_t* end = buf + len;
19821 const __m256i v_0000 = _mm256_setzero_si256();
19822 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
19823 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
19824 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
19825 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
19826 const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
19830 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
19845 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in_16),_mm256_extractf128_si256(in_16,1));
19854 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
19855 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
19858 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
19859 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
19864 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
19865 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
19868 const __m256i t0 = _mm256_slli_epi16(in_16, 2);
19870 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
19872 const __m256i t2 = _mm256_and_si256(in_16, v_003f);
19874 const __m256i t3 = _mm256_or_si256(t1, t2);
19876 const __m256i t4 = _mm256_or_si256(t3, v_c080);
19879 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
19882 const uint32_t M0 = one_byte_bitmask & 0x55555555;
19883 const uint32_t M1 = M0 >> 7;
19884 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
19887 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
19888 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
19890 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
19891 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
19893 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
19905 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
19906 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
19909 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
19912 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
19943 const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
19945 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
19947 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
19950 const __m256i s0 = _mm256_srli_epi16(in_16, 4);
19952 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
19954 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
19956 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
19957 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
19958 const __m256i s4 = _mm256_xor_si256(s3, m0);
19962 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
19963 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
19966 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
19971 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
19972 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
19973 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
19985 const uint8_t mask0 = uint8_t(mask);
19986 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
19987 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
19988 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
19990 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
19991 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
19992 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
19993 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
19995 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
19996 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
19997 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
19998 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
20001 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
20002 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
20003 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
20004 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
20048 const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
20059 std::pair<result, char*> avx512_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
20060 const char32_t* end = buf + len;
20061 const char32_t* start = buf;
20063 const __m256i v_0000 = _mm256_setzero_si256();
20064 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
20065 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
20066 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
20067 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
20068 const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
20069 const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
20071 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
20077 const __m256i max_input = _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff);
20090 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in_16),_mm256_extractf128_si256(in_16,1));
20099 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
20100 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
20103 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
20104 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
20109 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
20110 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
20113 const __m256i t0 = _mm256_slli_epi16(in_16, 2);
20115 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
20117 const __m256i t2 = _mm256_and_si256(in_16, v_003f);
20119 const __m256i t3 = _mm256_or_si256(t1, t2);
20121 const __m256i t4 = _mm256_or_si256(t3, v_c080);
20124 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
20127 const uint32_t M0 = one_byte_bitmask & 0x55555555;
20128 const uint32_t M1 = M0 >> 7;
20129 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
20132 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
20133 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
20135 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
20136 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
20138 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
20150 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
20151 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
20156 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
20157 const __m256i forbidden_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800);
20162 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
20193 const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
20195 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
20197 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
20200 const __m256i s0 = _mm256_srli_epi16(in_16, 4);
20202 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
20204 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
20206 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
20207 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
20208 const __m256i s4 = _mm256_xor_si256(s3, m0);
20212 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
20213 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
20216 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
20221 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
20222 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
20223 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
20235 const uint8_t mask0 = uint8_t(mask);
20236 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
20237 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
20238 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
20240 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
20241 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
20242 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
20243 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
20245 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
20246 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
20247 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
20248 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
20251 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
20252 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
20253 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
20254 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
20305 std::pair<const char32_t*, char16_t*> avx512_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
20306 const char32_t* end = buf + len;
20308 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
20315 const __m256i v_00000000 = _mm256_setzero_si256();
20316 const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
20319 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
20320 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
20323 const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
20324 const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
20329 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
20371 std::pair<result, char16_t*> avx512_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
20372 const char32_t* start = buf;
20373 const char32_t* end = buf + len;
20375 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
20380 const __m256i v_00000000 = _mm256_setzero_si256();
20381 const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
20384 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
20385 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
20388 const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
20389 const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
20390 const __m256i forbidden_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800);
20397 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
20437 bool validate_ascii(const char* buf, size_t len) {
20438 const char* end = buf + len;
20439 const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
20442 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)buf);
20446 const __m512i utf8 = _mm512_maskz_loadu_epi8((uint64_t(1) << (end-buf)) - 1,(const __m512i*)buf);
20455 const char32_t* validate_utf32(const char32_t* buf, size_t len) {
20456 const char32_t* end = len >= 16 ? buf + len - 16 : nullptr;
20458 const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000);
20463 __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf);
20469 const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff);
20470 const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff);
20494 const uint64_t alternate_bits = UINT64_C(0x5555555555555555);
20567 size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, char *utf8_output) {
20593 size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len,
20629 std::pair<const char*, char32_t*> avx512_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
20659 implementation::detect_encodings(const char *input,
20660 size_t length) const noexcept {
20665 const char *buf = input;
20667 const char *start = buf;
20668 const char *end = input + length;
20713 is_utf16 = validate_utf16le(reinterpret_cast<const char16_t *>(buf),
20726 const char32_t *input32 = reinterpret_cast<const char32_t *>(buf);
20727 const char32_t *end32 =
20728 reinterpret_cast<const char32_t *>(start) + length / 4;
20752 const __m512i utf8 = _mm512_maskz_loadu_epi8(
20753 (1ULL << (length - current_length)) - 1, (const __m512i *)buf);
20763 reinterpret_cast<const char16_t *>(buf),
20772 (const __m512i *)buf),
20789 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
20791 const char* ptr = buf;
20792 const char* end = ptr + len;
20794 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
20798 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - ptr))-1, (const __m512i*)ptr);
20805 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
20807 const char* ptr = buf;
20808 const char* end = ptr + len;
20811 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
20815 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
20822 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - ptr))-1, (const __m512i*)ptr);
20826 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
20835 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
20839 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
20840 const char* buf_orig = buf;
20841 const char* end = buf + len;
20842 const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
20844 const __m512i input = _mm512_loadu_si512((const __m512i*)buf);
20851 const __m512i input = _mm512_maskz_loadu_epi8((1ULL<<(end - buf))-1, (const __m512i*)buf);
20860 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
20861 const char16_t *end = buf + len;
20900 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
20901 const char16_t *end = buf + len;
20902 const __m512i byteflip = _mm512_setr_epi64(
20949 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
20950 const char16_t *start_buf = buf;
20951 const char16_t *end = buf + len;
20993 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
20994 const char16_t *start_buf = buf;
20995 const char16_t *end = buf + len;
20996 const __m512i byteflip = _mm512_setr_epi64(
21047 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
21048 const char32_t * tail = icelake::validate_utf32(buf, len);
21056 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
21058 const char32_t* end = len >= 16 ? buf + len - 16 : nullptr;
21059 const char32_t* buf_orig = buf;
21061 __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf);
21078 __m512i utf32 = _mm512_maskz_loadu_epi32(__mmask16((1<<(buf_orig + len - buf))-1),(const __m512i*)buf);
21096 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
21100 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21104 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21108 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
21109 std::pair<const char*, char32_t*> ret = avx512_convert_latin1_to_utf32(buf, len, utf32_output);
21113 const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
21121 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
21126 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
21149 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
21153 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21161 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21169 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21173 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21177 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21180 const char* end = buf + len;
21195 const size_t scalar_saved_bytes = scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
21204 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21207 const char* end = buf + len;
21222 const size_t scalar_saved_bytes = scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
21232 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_out) const noexcept {
21239 const char* end = buf + len;
21254 const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert(
21263 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32) const noexcept {
21275 const char* end = buf + len;
21304 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_out) const noexcept {
21308 const char* end = buf + len;
21323 const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid(
21333 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21337 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21341 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21345 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21349 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21354 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21359 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21366 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21373 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21384 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21395 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21399 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21403 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21407 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21411 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21416 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21417 std::pair<const char32_t*, char*> ret = avx512_convert_utf32_to_utf8(buf, len, utf8_output);
21421 const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
21429 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21446 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21450 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21451 std::pair<const char32_t*, char16_t*> ret = avx512_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
21455 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::LITTLE>(
21463 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21464 std::pair<const char32_t*, char16_t*> ret = avx512_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
21468 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::BIG>(
21476 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21493 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21510 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21514 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21518 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21519 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21523 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::LITTLE>(
21531 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21532 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21536 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::BIG>(
21544 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21545 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21567 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21568 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21590 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21591 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21595 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::LITTLE>(
21603 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21604 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21608 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::BIG>(
21616 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
21618 const __m512i byteflip = _mm512_setr_epi64(
21629 __m512i utf16 = _mm512_loadu_si512((const __m512i*)(input + pos));
21636 __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i*)(input + pos));
21643 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
21644 const char16_t* end = length >= 32 ? input + length - 32 : nullptr;
21645 const char16_t* ptr = input;
21647 const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
21648 const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
21653 __m512i utf16 = _mm512_loadu_si512((const __m512i*)ptr);
21662 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
21663 const char16_t* end = length >= 32 ? input + length - 32 : nullptr;
21664 const char16_t* ptr = input;
21666 const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
21667 const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
21670 const __m512i byteflip = _mm512_setr_epi64(
21691 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
21692 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
21697 const __m512i continuation = _mm512_set1_epi8(char(0b10111111));
21704 __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
21705 __m512i input2 = _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
21706 __m512i input3 = _mm512_loadu_si512((const __m512i *)(str + i + 2*sizeof(__m512i)));
21707 __m512i input4 = _mm512_loadu_si512((const __m512i *)(str + i + 3*sizeof(__m512i)));
21708 __m512i input5 = _mm512_loadu_si512((const __m512i *)(str + i + 4*sizeof(__m512i)));
21709 __m512i input6 = _mm512_loadu_si512((const __m512i *)(str + i + 5*sizeof(__m512i)));
21710 __m512i input7 = _mm512_loadu_si512((const __m512i *)(str + i + 6*sizeof(__m512i)));
21711 __m512i input8 = _mm512_loadu_si512((const __m512i *)(str + i + 7*sizeof(__m512i)));
21730 __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
21747 return answer + scalar::utf8::count_code_points(reinterpret_cast<const char *>(str + i), length - i);
21750 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
21754 simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
21758 simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
21762 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
21763 const char16_t* end = length >= 32 ? input + length - 32 : nullptr;
21764 const char16_t* ptr = input;
21766 const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f);
21767 const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff);
21768 const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff);
21769 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
21774 __m512i utf16 = _mm512_loadu_si512((const __m512i*)ptr);
21792 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
21793 const char16_t* end = length >= 32 ? input + length - 32 : nullptr;
21794 const char16_t* ptr = input;
21796 const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f);
21797 const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff);
21798 const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff);
21799 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
21802 const __m512i byteflip = _mm512_setr_epi64(
21813 __m512i utf16 = _mm512_loadu_si512((const __m512i*)ptr);
21831 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
21835 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
21839 simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
21844 simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
21848 simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
21849 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
21863 __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
21864 __m512i input2 = _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
21865 __m512i input3 = _mm512_loadu_si512((const __m512i *)(str + i + 2*sizeof(__m512i)));
21866 __m512i input4 = _mm512_loadu_si512((const __m512i *)(str + i + 3*sizeof(__m512i)));
21886 __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
21906 return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast<const char *>(str + i), length - i);
21909 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
21914 __m512i utf8 = _mm512_loadu_si512((const __m512i*)(input+pos));
21925 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
21926 const char32_t* end = length >= 16 ? input + length - 16 : nullptr;
21927 const char32_t* ptr = input;
21929 const __m512i v_0000_007f = _mm512_set1_epi32((uint32_t)0x7f);
21930 const __m512i v_0000_07ff = _mm512_set1_epi32((uint32_t)0x7ff);
21931 const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff);
21936 __m512i utf32 = _mm512_loadu_si512((const __m512i*)ptr);
21952 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
21953 const char32_t* end = length >= 16 ? input + length - 16 : nullptr;
21954 const char32_t* ptr = input;
21956 const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff);
21961 __m512i utf32 = _mm512_loadu_si512((const __m512i*)ptr);
21971 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
22018 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
22022 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22030 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22040 int avx2_detect_encodings(const char * buf, size_t len) {
22041 const char* start = buf;
22042 const char* end = buf + len;
22050 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22051 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22061 const auto u0 = simd16<uint16_t>(in);
22062 const auto u1 = simd16<uint16_t>(nextin);
22064 const auto v0 = u0.shr<8>();
22065 const auto v1 = u1.shr<8>();
22067 const auto in16 = simd16<uint16_t>::pack(v0, v1);
22069 const auto surrogates_wordmask0 = (in16 & v_f8) == v_d8;
22085 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
22086 const char16_t* end16 = reinterpret_cast<const char16_t*>(start) + len/2;
22088 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22089 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22091 const uint32_t V0 = ~surrogates_bitmask0;
22093 const auto vH0 = (in16 & v_fc) == v_dc;
22094 const uint32_t H0 = vH0.to_bitmask();
22096 const uint32_t L0 = ~H0 & surrogates_bitmask0;
22098 const uint32_t a0 = L0 & (H0 >> 1);
22099 const uint32_t b0 = a0 << 1;
22100 const uint32_t c0 = V0 | a0 | b0;
22111 const auto in0 = simd16<uint16_t>(input);
22112 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::ELEMENTS);
22114 const auto t0 = in0.shr<8>();
22115 const auto t1 = in1.shr<8>();
22117 const auto in_16 = simd16<uint16_t>::pack(t0, t1);
22119 const auto surrogates_wordmask = (in_16 & v_f8) == v_d8;
22120 const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask();
22124 const uint32_t V = ~surrogates_bitmask;
22126 const auto vH = (in_16 & v_fc) == v_dc;
22127 const uint32_t H = vH.to_bitmask();
22129 const uint32_t L = ~H & surrogates_bitmask;
22131 const uint32_t a = L & (H >> 1);
22133 const uint32_t b = a << 1;
22135 const uint32_t c = V | a | b;
22150 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
22151 const char32_t* end32 = reinterpret_cast<const char32_t*>(start) + len/4;
22155 const __m256i offset = _mm256_set1_epi32(0xffff2000);
22156 const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff);
22165 const __m256i in32 = _mm256_loadu_si256((__m256i *)input);
22210 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
22215 const __m256i standardmax = _mm256_set1_epi32(0x10ffff);
22217 if (_mm256_testz_si256(is_zero, is_zero) == 1 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
22273 const char16_t* avx2_validate_utf16(const char16_t* input, size_t size) {
22274 const char16_t* end = input + size;
22276 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22277 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22278 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22279 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22293 const auto t0 = in0.shr<8>();
22294 const auto t1 = in1.shr<8>();
22296 const auto in = simd16<uint16_t>::pack(t0, t1);
22299 const auto surrogates_wordmask = (in & v_f8) == v_d8;
22300 const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask();
22312 const uint32_t V = ~surrogates_bitmask;
22315 const auto vH = (in & v_fc) == v_dc;
22316 const uint32_t H = vH.to_bitmask();
22320 const uint32_t L = ~H & surrogates_bitmask;
22322 const uint32_t a = L & (H >> 1); // A low surrogate must be followed by high one.
22325 const uint32_t b = a << 1; // Just mark that the opposite fact is hold,
22327 const uint32_t c = V | a | b; // Combine all the masks into the final one.
22350 const result avx2_validate_utf16_with_errors(const char16_t* input, size_t size) {
22351 const char16_t* start = input;
22352 const char16_t* end = input + size;
22354 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22355 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22356 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22357 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22371 const auto t0 = in0.shr<8>();
22372 const auto t1 = in1.shr<8>();
22374 const auto in = simd16<uint16_t>::pack(t0, t1);
22377 const auto surrogates_wordmask = (in & v_f8) == v_d8;
22378 const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask();
22390 const uint32_t V = ~surrogates_bitmask;
22393 const auto vH = (in & v_fc) == v_dc;
22394 const uint32_t H = vH.to_bitmask();
22398 const uint32_t L = ~H & surrogates_bitmask;
22400 const uint32_t a = L & (H >> 1); // A low surrogate must be followed by high one.
22403 const uint32_t b = a << 1; // Just mark that the opposite fact is hold,
22405 const uint32_t c = V | a | b; // Combine all the masks into the final one.
22431 const char32_t* avx2_validate_utf32le(const char32_t* input, size_t size) {
22432 const char32_t* end = input + size;
22434 const __m256i standardmax = _mm256_set1_epi32(0x10ffff);
22435 const __m256i offset = _mm256_set1_epi32(0xffff2000);
22436 const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff);
22441 const __m256i in = _mm256_loadu_si256((__m256i *)input);
22460 const result avx2_validate_utf32le_with_errors(const char32_t* input, size_t size) {
22461 const char32_t* start = input;
22462 const char32_t* end = input + size;
22464 const __m256i standardmax = _mm256_set1_epi32(0x10ffff);
22465 const __m256i offset = _mm256_set1_epi32(0xffff2000);
22466 const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff);
22471 const __m256i in = _mm256_loadu_si256((__m256i *)input);
22492 std::pair<const char *, char *> avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len,
22494 const char *end = latin1_input + len;
22495 const __m256i v_0000 = _mm256_setzero_si256();
22496 const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080);
22497 const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80);
22498 const size_t safety_margin = 12;
22503 const __m128i v_80 = _mm_set1_epi8((char)0x80);
22513 const __m256i in = _mm256_cvtepu8_epi16((in8));
22518 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
22519 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
22522 const __m256i t0 = _mm256_slli_epi16(in, 2);
22524 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
22526 const __m256i t2 = _mm256_and_si256(in, v_003f);
22528 const __m256i t3 = _mm256_or_si256(t1, t2);
22530 const __m256i t4 = _mm256_or_si256(t3, v_c080);
22535 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000);
22536 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
22538 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask);
22541 const uint32_t M0 = one_byte_bitmask & 0x55555555;
22542 const uint32_t M1 = M0 >> 7;
22543 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
22546 const uint8_t *row =
22548 const uint8_t *row_2 =
22552 const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
22553 const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1));
22555 const __m256i utf8_packed = _mm256_shuffle_epi8(
22575 std::pair<const char*, char16_t*> avx2_convert_latin1_to_utf16(const char* latin1_input, size_t len, char16_t* utf16_output) {
22581 __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(latin1_input + i));
22593 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
22610 std::pair<const char*, char32_t*> avx2_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
22639 size_t convert_masked_utf8_to_utf16(const char *input,
22652 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
22653 const __m128i in = _mm_loadu_si128((__m128i *)input);
22654 const uint16_t input_utf8_end_of_code_point_mask =
22660 const __m256i swap256 = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
22671 const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
22672 const __m128i perm = _mm_shuffle_epi8(in, sh);
22673 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
22674 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
22684 const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
22685 const __m128i perm = _mm_shuffle_epi8(in, sh);
22686 const __m128i ascii =
22688 const __m128i middlebyte =
22690 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22691 const __m128i highbyte =
22693 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
22694 const __m128i composed =
22703 const uint8_t idx =
22705 const uint8_t consumed =
22713 const __m128i sh =
22714 _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
22715 const __m128i perm = _mm_shuffle_epi8(in, sh);
22716 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
22717 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
22724 const __m128i sh =
22725 _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
22726 const __m128i perm = _mm_shuffle_epi8(in, sh);
22727 const __m128i ascii =
22729 const __m128i middlebyte =
22731 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22732 const __m128i highbyte =
22734 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
22735 const __m128i composed =
22751 const __m128i sh =
22752 _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
22753 const __m128i perm = _mm_shuffle_epi8(in, sh);
22754 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f));
22755 const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00));
22756 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22759 const __m128i correct =
22762 const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
22765 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000));
22766 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
22771 const __m128i composed =
22774 const __m128i composedminus =
22776 const __m128i lowtenbits =
22779 const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff));
22780 const __m128i lowtenbitsadd =
22782 const __m128i hightenbitsadd =
22784 const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16);
22820 size_t convert_masked_utf8_to_utf32(const char *input,
22833 const __m128i in = _mm_loadu_si128((__m128i *)input);
22834 const uint16_t input_utf8_end_of_code_point_mask =
22846 const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
22847 const __m128i perm = _mm_shuffle_epi8(in, sh);
22848 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
22849 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
22850 const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
22858 const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
22859 const __m128i perm = _mm_shuffle_epi8(in, sh);
22860 const __m128i ascii =
22862 const __m128i middlebyte =
22864 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22865 const __m128i highbyte =
22867 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
22868 const __m128i composed =
22876 const uint8_t idx =
22878 const uint8_t consumed =
22886 const __m128i sh =
22887 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
22888 const __m128i perm = _mm_shuffle_epi8(in, sh);
22889 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
22890 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
22891 const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
22897 const __m128i sh =
22898 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
22899 const __m128i perm = _mm_shuffle_epi8(in, sh);
22900 const __m128i ascii =
22902 const __m128i middlebyte =
22904 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22905 const __m128i highbyte =
22907 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
22908 const __m128i composed =
22914 const __m128i sh =
22915 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
22916 const __m128i perm = _mm_shuffle_epi8(in, sh);
22917 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f));
22918 const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00));
22919 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
22922 const __m128i correct =
22925 const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
22926 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000));
22927 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
22928 const __m128i composed =
22942 std::pair<const char16_t *, char *>
22943 avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len,
22945 const char16_t *end = buf + len;
22948 __m256i in = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf));
22951 const __m256i swap = _mm256_setr_epi8(
22980 avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
22982 const char16_t *start = buf;
22983 const char16_t *end = buf + len;
22985 __m256i in = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf));
22988 const __m256i swap = _mm256_setr_epi8(
23083 std::pair<const char16_t*, char*> avx2_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_output) {
23084 const char16_t* end = buf + len;
23085 const __m256i v_0000 = _mm256_setzero_si256();
23086 const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800);
23087 const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800);
23088 const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080);
23089 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
23094 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
23099 const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80);
23102 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in),_mm256_extractf128_si256(in,1));
23111 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000);
23112 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
23115 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000);
23116 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
23122 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
23123 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
23126 const __m256i t0 = _mm256_slli_epi16(in, 2);
23128 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
23130 const __m256i t2 = _mm256_and_si256(in, v_003f);
23132 const __m256i t3 = _mm256_or_si256(t1, t2);
23134 const __m256i t4 = _mm256_or_si256(t3, v_c080);
23137 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask);
23140 const uint32_t M0 = one_byte_bitmask & 0x55555555;
23141 const uint32_t M1 = M0 >> 7;
23142 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
23145 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23146 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
23148 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
23149 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
23151 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
23165 const __m256i surrogates_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800);
23169 const uint32_t surrogates_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(surrogates_bytemask));
23174 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
23205 const __m256i t0 = _mm256_shuffle_epi8(in, dup_even);
23207 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
23209 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
23212 const __m256i s0 = _mm256_srli_epi16(in, 4);
23214 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
23216 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
23218 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
23219 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
23220 const __m256i s4 = _mm256_xor_si256(s3, m0);
23224 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
23225 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
23228 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
23233 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
23234 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
23235 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
23247 const uint8_t mask0 = uint8_t(mask);
23248 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
23249 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
23250 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
23252 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
23253 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
23254 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
23255 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
23257 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
23258 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
23259 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
23260 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
23263 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
23264 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
23265 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
23266 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
23324 std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
23325 const char16_t* start = buf;
23326 const char16_t* end = buf + len;
23328 const __m256i v_0000 = _mm256_setzero_si256();
23329 const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800);
23330 const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800);
23331 const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080);
23332 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
23337 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
23342 const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80);
23345 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in),_mm256_extractf128_si256(in,1));
23354 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000);
23355 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
23358 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000);
23359 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
23365 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
23366 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
23369 const __m256i t0 = _mm256_slli_epi16(in, 2);
23371 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
23373 const __m256i t2 = _mm256_and_si256(in, v_003f);
23375 const __m256i t3 = _mm256_or_si256(t1, t2);
23377 const __m256i t4 = _mm256_or_si256(t3, v_c080);
23380 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask);
23383 const uint32_t M0 = one_byte_bitmask & 0x55555555;
23384 const uint32_t M1 = M0 >> 7;
23385 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
23388 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23389 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
23391 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
23392 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
23394 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
23408 const __m256i surrogates_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800);
23412 const uint32_t surrogates_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(surrogates_bytemask));
23417 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
23448 const __m256i t0 = _mm256_shuffle_epi8(in, dup_even);
23450 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
23452 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
23455 const __m256i s0 = _mm256_srli_epi16(in, 4);
23457 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
23459 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
23461 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
23462 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
23463 const __m256i s4 = _mm256_xor_si256(s3, m0);
23467 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
23468 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
23471 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
23476 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
23477 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
23478 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
23490 const uint8_t mask0 = uint8_t(mask);
23491 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
23492 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
23493 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
23495 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
23496 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
23497 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
23498 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
23500 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
23501 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
23502 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
23503 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
23506 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
23507 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
23508 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
23509 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
23615 std::pair<const char16_t*, char32_t*> avx2_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
23616 const char16_t* end = buf + len;
23617 const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800);
23618 const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800);
23623 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
23631 const __m256i surrogates_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800);
23635 const uint32_t surrogates_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(surrogates_bytemask));
23682 std::pair<result, char32_t*> avx2_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
23683 const char16_t* start = buf;
23684 const char16_t* end = buf + len;
23685 const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800);
23686 const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800);
23691 const __m256i swap = _mm256_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
23699 const __m256i surrogates_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800);
23703 const uint32_t surrogates_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(surrogates_bytemask));
23744 std::pair<const char32_t *, char *>
23745 avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len,
23747 const size_t rounded_len =
23787 avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
23789 const size_t rounded_len =
23797 const char32_t *start = buf;
23838 std::pair<const char32_t*, char*> avx2_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
23839 const char32_t* end = buf + len;
23840 const __m256i v_0000 = _mm256_setzero_si256();
23841 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
23842 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
23843 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
23844 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
23845 const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
23849 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
23864 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in_16),_mm256_extractf128_si256(in_16,1));
23873 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
23874 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
23877 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
23878 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
23883 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
23884 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
23887 const __m256i t0 = _mm256_slli_epi16(in_16, 2);
23889 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
23891 const __m256i t2 = _mm256_and_si256(in_16, v_003f);
23893 const __m256i t3 = _mm256_or_si256(t1, t2);
23895 const __m256i t4 = _mm256_or_si256(t3, v_c080);
23898 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
23901 const uint32_t M0 = one_byte_bitmask & 0x55555555;
23902 const uint32_t M1 = M0 >> 7;
23903 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
23906 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23907 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
23909 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
23910 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
23912 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
23924 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
23925 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
23928 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
23931 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
23962 const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
23964 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
23966 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
23969 const __m256i s0 = _mm256_srli_epi16(in_16, 4);
23971 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
23973 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
23975 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
23976 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
23977 const __m256i s4 = _mm256_xor_si256(s3, m0);
23981 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
23982 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
23985 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
23990 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
23991 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
23992 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
24004 const uint8_t mask0 = uint8_t(mask);
24005 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
24006 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
24007 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
24009 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
24010 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
24011 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
24012 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
24014 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
24015 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
24016 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
24017 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
24020 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
24021 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
24022 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
24023 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
24067 const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
24078 std::pair<result, char*> avx2_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
24079 const char32_t* end = buf + len;
24080 const char32_t* start = buf;
24082 const __m256i v_0000 = _mm256_setzero_si256();
24083 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
24084 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
24085 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
24086 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
24087 const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
24088 const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
24090 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
24096 const __m256i max_input = _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff);
24109 const __m128i utf8_packed = _mm_packus_epi16(_mm256_castsi256_si128(in_16),_mm256_extractf128_si256(in_16,1));
24118 const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
24119 const uint32_t one_byte_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
24122 const __m256i one_or_two_bytes_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
24123 const uint32_t one_or_two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
24128 const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
24129 const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
24132 const __m256i t0 = _mm256_slli_epi16(in_16, 2);
24134 const __m256i t1 = _mm256_and_si256(t0, v_1f00);
24136 const __m256i t2 = _mm256_and_si256(in_16, v_003f);
24138 const __m256i t3 = _mm256_or_si256(t1, t2);
24140 const __m256i t4 = _mm256_or_si256(t3, v_c080);
24143 const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
24146 const uint32_t M0 = one_byte_bitmask & 0x55555555;
24147 const uint32_t M1 = M0 >> 7;
24148 const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
24151 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
24152 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
24154 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
24155 const __m128i shuffle_2 = _mm_loadu_si128((__m128i*)(row_2 + 1));
24157 const __m256i utf8_packed = _mm256_shuffle_epi8(utf8_unpacked, _mm256_setr_m128i(shuffle,shuffle_2));
24169 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
24170 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
24175 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
24176 const __m256i forbidden_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800);
24181 const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
24212 const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
24214 const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
24216 const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000));
24219 const __m256i s0 = _mm256_srli_epi16(in_16, 4);
24221 const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
24223 const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
24225 const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
24226 const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
24227 const __m256i s4 = _mm256_xor_si256(s3, m0);
24231 const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
24232 const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
24235 const uint32_t mask = (one_byte_bitmask & 0x55555555) |
24240 const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
24241 const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
24242 const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
24254 const uint8_t mask0 = uint8_t(mask);
24255 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
24256 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
24257 const __m128i utf8_0 = _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
24259 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
24260 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
24261 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
24262 const __m128i utf8_1 = _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
24264 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
24265 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
24266 const __m128i shuffle2 = _mm_loadu_si128((__m128i*)(row2 + 1));
24267 const __m128i utf8_2 = _mm_shuffle_epi8(_mm256_extractf128_si256(out0,1), shuffle2);
24270 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
24271 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
24272 const __m128i shuffle3 = _mm_loadu_si128((__m128i*)(row3 + 1));
24273 const __m128i utf8_3 = _mm_shuffle_epi8(_mm256_extractf128_si256(out1,1), shuffle3);
24321 std::pair<const char32_t*, char16_t*> avx2_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
24322 const char32_t* end = buf + len;
24324 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
24331 const __m256i v_00000000 = _mm256_setzero_si256();
24332 const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
24335 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
24336 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
24339 const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
24340 const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
24345 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
24387 std::pair<result, char16_t*> avx2_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
24388 const char32_t* start = buf;
24389 const char32_t* end = buf + len;
24391 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
24396 const __m256i v_00000000 = _mm256_setzero_si256();
24397 const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
24400 const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
24401 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
24404 const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
24405 const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
24406 const __m256i forbidden_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800);
24413 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
24458 size_t convert_masked_utf8_to_latin1(const char *input,
24470 const __m128i in = _mm_loadu_si128((__m128i *)input);
24471 const __m128i in_second_half = _mm_loadu_si128((__m128i *)(input + 16));
24473 const uint16_t input_utf8_end_of_code_point_mask =
24497 const uint8_t idx =
24499 const uint8_t consumed =
24509 const __m128i sh =
24510 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
24511 const __m128i perm = _mm_shuffle_epi8(in, sh);
24512 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
24513 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
24515 const __m128i latin1_packed = _mm_packus_epi16(composed,composed);
24537 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
24539 simdutf_really_inline bool has_full_block() const;
24540 simdutf_really_inline const uint8_t *full_block() const;
24550 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
24553 const uint8_t *buf;
24554 const size_t len;
24555 const size_t lenminusstep;
24560 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
24570 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
24590 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
24596 simdutf_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
24601 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
24606 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
24630 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
24637 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
24639 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
24640 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
24641 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
24642 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
24643 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
24644 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
24652 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
24656 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
24658 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
24673 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
24674 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
24702 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
24720 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
24721 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
24733 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
24736 static const uint8_t max_array[32] = {
24742 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
24757 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
24774 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
24797 simdutf_really_inline bool errors() const {
24820 bool generic_validate_utf8(const uint8_t * input, size_t length) {
24837 bool generic_validate_utf8(const char * input, size_t length) {
24838 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24845 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
24854 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
24869 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
24877 result generic_validate_utf8_with_errors(const char * input, size_t length) {
24878 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24882 bool generic_validate_ascii(const uint8_t * input, size_t length) {
24898 bool generic_validate_ascii(const char * input, size_t length) {
24899 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24903 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
24909 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
24920 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
24927 result generic_validate_ascii_with_errors(const char * input, size_t length) {
24928 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24948 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
24953 const size_t safety_margin = 16; // to avoid overruns!
24957 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
25021 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25028 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25030 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25031 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25032 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25033 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25034 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25035 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25043 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25047 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25049 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25064 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25065 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25093 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25111 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25112 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25139 simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
25154 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25156 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25214 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
25229 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25231 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25307 simdutf_really_inline bool errors() const {
25328 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
25332 const size_t safety_margin = 16; // to avoid overruns!
25334 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
25373 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25380 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25382 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25383 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25384 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25385 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25386 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25387 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25395 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25399 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25401 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25416 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25417 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25445 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25463 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25464 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25481 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25491 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
25506 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25508 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25565 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
25580 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25582 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25652 simdutf_really_inline bool errors() const {
25672 simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
25676 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25683 simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
25688 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25710 simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
25714 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25723 simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
25728 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25744 simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
25748 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
25752 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25780 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25790 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25792 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25793 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25794 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25795 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25796 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25797 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25805 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25809 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25810 constexpr const uint8_t FORBIDDEN = 0xff;
25812 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25827 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25828 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25856 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25883 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25891 simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
25906 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25908 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25965 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
25980 const size_t safety_margin = size - margin + 1; // to avoid overruns!
25982 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
26058 simdutf_really_inline bool errors() const {
26078 simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
26093 const size_t safety_margin = size - margin + 1; // to avoid overruns!
26095 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
26148 simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
26163 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
26167 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
26171 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
26175 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
26179 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
26180 const char16_t* tail = avx2_validate_utf16<endianness::LITTLE>(buf, len);
26188 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
26189 const char16_t* tail = avx2_validate_utf16<endianness::BIG>(buf, len);
26197 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
26207 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
26217 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
26218 const char32_t* tail = avx2_validate_utf32le(buf, len);
26226 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
26236 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
26237 std::pair<const char*, char*> ret = avx2_convert_latin1_to_utf8(buf, len, utf8_output);
26241 const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
26249 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26250 std::pair<const char*, char16_t*> ret = avx2_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
26254 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::LITTLE>(
26262 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26263 std::pair<const char*, char16_t*> ret = avx2_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
26267 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::BIG>(
26275 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26276 std::pair<const char*, char32_t*> ret = avx2_convert_latin1_to_utf32(buf, len, utf32_output);
26280 const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
26288 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
26293 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
26298 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* input, size_t size,
26299 char* latin1_output) const noexcept {
26303 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26308 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26313 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26318 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26323 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* input, size_t size,
26324 char16_t* utf16_output) const noexcept {
26328 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* input, size_t size,
26329 char16_t* utf16_output) const noexcept {
26333 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26338 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26343 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
26344 char32_t* utf32_output) const noexcept {
26349 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26350 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
26354 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::LITTLE>(
26362 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26363 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
26367 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::BIG>(
26375 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26392 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26409 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26414 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26419 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26420 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
26424 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::LITTLE>(
26432 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26433 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
26437 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::BIG>(
26445 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26463 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26481 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26485 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26489 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26490 std::pair<const char32_t*, char*> ret = avx2_convert_utf32_to_utf8(buf, len, utf8_output);
26494 const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
26502 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26503 std::pair<const char32_t*, char*> ret = avx2_convert_utf32_to_latin1(buf, len, latin1_output);
26507 const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
26515 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26532 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26536 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26553 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26554 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
26558 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::LITTLE>(
26566 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26567 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
26571 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::BIG>(
26579 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26597 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26615 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26619 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26620 std::pair<const char32_t*, char16_t*> ret = avx2_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
26624 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::LITTLE>(
26632 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26633 std::pair<const char32_t*, char16_t*> ret = avx2_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
26637 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::BIG>(
26645 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26662 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26679 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26683 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26687 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26691 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26695 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
26699 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
26703 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
26707 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
26711 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
26715 simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
26719 simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
26723 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
26727 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
26731 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
26735 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
26740 simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
26744 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
26749 simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
26753 simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char *input, size_t len) const noexcept {
26754 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
26767 __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i));
26768 __m256i input2 = _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i)));
26769 __m256i input3 = _mm256_loadu_si256((const __m256i *)(data + i + 2*sizeof(__m256i)));
26770 __m256i input4 = _mm256_loadu_si256((const __m256i *)(data + i + 3*sizeof(__m256i)));
26780 __m256i input_256_chunk = _mm256_loadu_si256((const __m256i *)(data + i));
26791 return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast<const char *>(data + i), len - i);
26794 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
26795 const __m256i v_00000000 = _mm256_setzero_si256();
26796 const __m256i v_ffffff80 = _mm256_set1_epi32((uint32_t)0xffffff80);
26797 const __m256i v_fffff800 = _mm256_set1_epi32((uint32_t)0xfffff800);
26798 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
26803 const __m256i ascii_bytes_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffffff80), v_00000000);
26804 const __m256i one_two_bytes_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_fffff800), v_00000000);
26805 const __m256i two_bytes_bytemask = _mm256_xor_si256(one_two_bytes_bytemask, ascii_bytes_bytemask);
26806 const __m256i one_two_three_bytes_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
26807 const __m256i three_bytes_bytemask = _mm256_xor_si256(one_two_three_bytes_bytemask, one_two_bytes_bytemask);
26808 const uint32_t ascii_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(ascii_bytes_bytemask));
26809 const uint32_t two_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(two_bytes_bytemask));
26810 const uint32_t three_bytes_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(three_bytes_bytemask));
26820 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
26821 const __m256i v_00000000 = _mm256_setzero_si256();
26822 const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
26827 const __m256i surrogate_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
26828 const uint32_t surrogate_bitmask = static_cast<uint32_t>(_mm256_movemask_epi8(surrogate_bytemask));
26835 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
26876 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
26881 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26889 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26909 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
26911 simdutf_really_inline bool has_full_block() const;
26912 simdutf_really_inline const uint8_t *full_block() const;
26922 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
26925 const uint8_t *buf;
26926 const size_t len;
26927 const size_t lenminusstep;
26932 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
26942 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
26962 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
26968 simdutf_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
26973 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
26978 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
27002 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27009 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27011 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27012 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27013 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27014 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27015 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27016 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27024 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27028 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27030 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27045 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27046 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27074 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27092 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27093 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27105 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
27108 static const uint8_t max_array[32] = {
27114 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
27129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27146 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
27169 simdutf_really_inline bool errors() const {
27192 bool generic_validate_utf8(const uint8_t * input, size_t length) {
27209 bool generic_validate_utf8(const char * input, size_t length) {
27210 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27217 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
27226 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
27241 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
27249 result generic_validate_utf8_with_errors(const char * input, size_t length) {
27250 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27254 bool generic_validate_ascii(const uint8_t * input, size_t length) {
27270 bool generic_validate_ascii(const char * input, size_t length) {
27271 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27275 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
27281 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
27292 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
27299 result generic_validate_ascii_with_errors(const char * input, size_t length) {
27300 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27320 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
27325 const size_t safety_margin = 16; // to avoid overruns!
27329 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27393 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27400 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27402 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27403 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27404 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27405 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27406 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27407 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27415 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27419 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27421 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27436 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27437 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27465 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27483 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27484 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27501 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27511 simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
27526 const size_t safety_margin = size - margin + 1; // to avoid overruns!
27528 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27586 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
27601 const size_t safety_margin = size - margin + 1; // to avoid overruns!
27603 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27679 simdutf_really_inline bool errors() const {
27700 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
27704 const size_t safety_margin = 16; // to avoid overruns!
27706 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27745 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27752 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27754 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27755 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27756 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27757 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27758 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27759 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27767 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27771 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27773 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27788 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27789 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27817 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27835 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27836 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27853 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27863 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
27878 const size_t safety_margin = size - margin + 1; // to avoid overruns!
27880 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27937 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
27952 const size_t safety_margin = size - margin + 1; // to avoid overruns!
27954 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28024 simdutf_really_inline bool errors() const {
28044 simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
28048 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28055 simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
28060 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28082 simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
28086 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28095 simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
28100 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28116 simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
28120 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
28124 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28146 simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
28153 if(validate_utf16(reinterpret_cast<const char16_t*>(input), length/2)) { out |= encoding_type::UTF16_LE; }
28156 if(validate_utf32(reinterpret_cast<const char32_t*>(input), length/4)) { out |= encoding_type::UTF32_LE; }
28162 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
28166 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
28170 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
28174 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
28178 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
28182 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
28186 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
28190 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
28194 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
28198 simdutf_warn_unused bool implementation::validate_utf32(const char16_t *buf, size_t len) const noexcept {
28202 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28206 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28210 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28214 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28218 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28222 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28226 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28230 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28234 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28238 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28242 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28246 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28250 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28254 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28258 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28262 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28266 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28270 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28274 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28278 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28282 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28286 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28290 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28294 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28298 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28302 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28306 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28310 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28314 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28318 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28322 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
28326 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
28330 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
28334 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
28338 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
28342 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
28346 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
28350 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
28354 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
28358 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
28362 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
28366 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
28397 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
28401 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28409 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28429 const __m128i v_u16,
28431 const __m128i one_byte_bytemask,
28432 const uint16_t one_byte_bitmask
28435 const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080);
28437 const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
28439 const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
28446 const __m128i t0 = _mm_slli_epi16(v_u16, 2);
28448 const __m128i t1 = _mm_and_si128(t0, v_1f00);
28450 const __m128i t2 = _mm_and_si128(v_u16, v_003f);
28452 const __m128i t3 = _mm_or_si128(t1, t2);
28454 const __m128i t4 = _mm_or_si128(t3, v_c080);
28457 const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask);
28461 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
28462 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
28463 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
28465 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
28466 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
28467 const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
28477 const __m128i v_u16,
28479 const __m128i v_0000,
28480 const __m128i v_ff80
28483 const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000);
28484 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
28497 int sse_detect_encodings(const char * buf, size_t len) {
28498 const char* start = buf;
28499 const char* end = buf + len;
28507 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28508 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28520 const auto u0 = simd16<uint16_t>(in);
28521 const auto u1 = simd16<uint16_t>(secondin);
28522 const auto u2 = simd16<uint16_t>(thirdin);
28523 const auto u3 = simd16<uint16_t>(fourthin);
28525 const auto v0 = u0.shr<8>();
28526 const auto v1 = u1.shr<8>();
28527 const auto v2 = u2.shr<8>();
28528 const auto v3 = u3.shr<8>();
28530 const auto in16 = simd16<uint16_t>::pack(v0, v1);
28531 const auto nextin16 = simd16<uint16_t>::pack(v2, v3);
28533 const auto surrogates_wordmask0 = (in16 & v_f8) == v_d8;
28534 const auto surrogates_wordmask1 = (nextin16 & v_f8) == v_d8;
28552 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
28553 const char16_t* end16 = reinterpret_cast<const char16_t*>(start) + len/2;
28555 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28556 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28558 const uint16_t V0 = static_cast<uint16_t>(~surrogates_bitmask0);
28560 const auto vH0 = (in16 & v_fc) == v_dc;
28561 const uint16_t H0 = static_cast<uint16_t>(vH0.to_bitmask());
28563 const uint16_t L0 = static_cast<uint16_t>(~H0 & surrogates_bitmask0);
28565 const uint16_t a0 = static_cast<uint16_t>(L0 & (H0 >> 1));
28567 const uint16_t b0 = static_cast<uint16_t>(a0 << 1);
28569 const uint16_t c0 = static_cast<uint16_t>(V0 | a0 | b0);
28581 const auto in0 = simd16<uint16_t>(input);
28582 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
28584 const auto t0 = in0.shr<8>();
28585 const auto t1 = in1.shr<8>();
28587 const auto in_16 = simd16<uint16_t>::pack(t0, t1);
28589 const auto surrogates_wordmask = (in_16 & v_f8) == v_d8;
28590 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28594 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28596 const auto vH = (in_16 & v_fc) == v_dc;
28597 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28599 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28601 const uint16_t a = static_cast<uint16_t>(L & (H >> 1));
28603 const uint16_t b = static_cast<uint16_t>(a << 1);
28605 const uint16_t c = static_cast<uint16_t>(V | a | b);
28621 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
28622 const char32_t* end32 = reinterpret_cast<const char32_t*>(start) + len/4;
28626 const __m128i offset = _mm_set1_epi32(0xffff2000);
28627 const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff);
28640 const __m128i in32 = _mm_loadu_si128((__m128i *)input);
28687 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
28692 const __m128i standardmax = _mm_set1_epi32(0x10ffff);
28694 if (_mm_testz_si128(is_zero, is_zero) == 1 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
28750 const char16_t* sse_validate_utf16(const char16_t* input, size_t size) {
28751 const char16_t* end = input + size;
28753 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28754 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28755 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28756 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28769 const auto t0 = in0.shr<8>();
28770 const auto t1 = in1.shr<8>();
28772 const auto in = simd16<uint16_t>::pack(t0, t1);
28775 const auto surrogates_wordmask = (in & v_f8) == v_d8;
28776 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28788 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28791 const auto vH = (in & v_fc) == v_dc;
28792 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28796 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28798 const uint16_t a = static_cast<uint16_t>(L & (H >> 1)); // A low surrogate must be followed by high one.
28801 const uint16_t b = static_cast<uint16_t>(a << 1); // Just mark that the opinput - startite fact is hold,
28803 const uint16_t c = static_cast<uint16_t>(V | a | b); // Combine all the masks into the final one.
28826 const result sse_validate_utf16_with_errors(const char16_t* input, size_t size) {
28827 const char16_t* start = input;
28828 const char16_t* end = input + size;
28830 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28831 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28832 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28833 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28847 const auto t0 = in0.shr<8>();
28848 const auto t1 = in1.shr<8>();
28850 const auto in = simd16<uint16_t>::pack(t0, t1);
28853 const auto surrogates_wordmask = (in & v_f8) == v_d8;
28854 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28866 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28869 const auto vH = (in & v_fc) == v_dc;
28870 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28874 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28876 const uint16_t a = static_cast<uint16_t>(L & (H >> 1)); // A low surrogate must be followed by high one.
28879 const uint16_t b = static_cast<uint16_t>(a << 1); // Just mark that the opinput - startite fact is hold,
28881 const uint16_t c = static_cast<uint16_t>(V | a | b); // Combine all the masks into the final one.
28907 const char32_t* sse_validate_utf32le(const char32_t* input, size_t size) {
28908 const char32_t* end = input + size;
28910 const __m128i standardmax = _mm_set1_epi32(0x10ffff);
28911 const __m128i offset = _mm_set1_epi32(0xffff2000);
28912 const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff);
28917 const __m128i in = _mm_loadu_si128((__m128i *)input);
28936 const result sse_validate_utf32le_with_errors(const char32_t* input, size_t size) {
28937 const char32_t* start = input;
28938 const char32_t* end = input + size;
28940 const __m128i standardmax = _mm_set1_epi32(0x10ffff);
28941 const __m128i offset = _mm_set1_epi32(0xffff2000);
28942 const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff);
28947 const __m128i in = _mm_loadu_si128((__m128i *)input);
28968 std::pair<const char* const, char* const> sse_convert_latin1_to_utf8(
28969 const char* latin_input,
28970 const size_t latin_input_length,
28972 const char* end = latin_input + latin_input_length;
28974 const __m128i v_0000 = _mm_setzero_si128();
28976 const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80);
28978 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
28980 const __m128i latin_1_half_into_u16_byte_mask = _mm_setr_epi8(
28991 const __m128i latin_2_half_into_u16_byte_mask = _mm_setr_epi8(
29053 std::pair<const char*, char16_t*> sse_convert_latin1_to_utf16(const char *latin1_input, size_t len,
29058 __m128i in = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&latin1_input[i]));
29072 std::pair<const char*, char32_t*> sse_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
29073 const char* end = buf + len;
29114 size_t convert_masked_utf8_to_utf16(const char *input,
29127 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29128 const __m128i in = _mm_loadu_si128((__m128i *)input);
29129 const uint16_t input_utf8_end_of_code_point_mask =
29147 const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29148 const __m128i perm = _mm_shuffle_epi8(in, sh);
29149 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
29150 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
29160 const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
29161 const __m128i perm = _mm_shuffle_epi8(in, sh);
29162 const __m128i ascii =
29164 const __m128i middlebyte =
29166 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29167 const __m128i highbyte =
29169 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
29170 const __m128i composed =
29180 const uint8_t idx =
29182 const uint8_t consumed =
29190 const __m128i sh =
29191 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29192 const __m128i perm = _mm_shuffle_epi8(in, sh);
29193 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
29194 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
29201 const __m128i sh =
29202 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29203 const __m128i perm = _mm_shuffle_epi8(in, sh);
29204 const __m128i ascii =
29206 const __m128i middlebyte =
29208 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29209 const __m128i highbyte =
29211 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
29212 const __m128i composed =
29228 const __m128i sh =
29229 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29230 const __m128i perm = _mm_shuffle_epi8(in, sh);
29231 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f));
29232 const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00));
29233 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29236 const __m128i correct =
29239 const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
29242 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000));
29243 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
29248 const __m128i composed =
29251 const __m128i composedminus =
29253 const __m128i lowtenbits =
29256 const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff));
29257 const __m128i lowtenbitsadd =
29259 const __m128i hightenbitsadd =
29261 const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16);
29297 size_t convert_masked_utf8_to_utf32(const char *input,
29310 const __m128i in = _mm_loadu_si128((__m128i *)input);
29311 const uint16_t input_utf8_end_of_code_point_mask =
29325 const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29326 const __m128i perm = _mm_shuffle_epi8(in, sh);
29327 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
29328 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
29329 const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
29338 const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
29339 const __m128i perm = _mm_shuffle_epi8(in, sh);
29340 const __m128i ascii =
29342 const __m128i middlebyte =
29344 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29345 const __m128i highbyte =
29347 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
29348 const __m128i composed =
29356 const uint8_t idx =
29358 const uint8_t consumed =
29366 const __m128i sh =
29367 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29368 const __m128i perm = _mm_shuffle_epi8(in, sh);
29369 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
29370 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
29371 const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
29377 const __m128i sh =
29378 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29379 const __m128i perm = _mm_shuffle_epi8(in, sh);
29380 const __m128i ascii =
29382 const __m128i middlebyte =
29384 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29385 const __m128i highbyte =
29387 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4);
29388 const __m128i composed =
29394 const __m128i sh =
29395 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29396 const __m128i perm = _mm_shuffle_epi8(in, sh);
29397 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f));
29398 const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00));
29399 const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2);
29402 const __m128i correct =
29405 const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
29406 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000));
29407 const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
29408 const __m128i composed =
29427 size_t convert_masked_utf8_to_latin1(const char *input,
29439 const __m128i in = _mm_loadu_si128((__m128i *)input);
29440 const uint16_t input_utf8_end_of_code_point_mask =
29449 const uint8_t idx =
29451 const uint8_t consumed =
29461 const __m128i sh =
29462 _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
29463 const __m128i perm = _mm_shuffle_epi8(in, sh);
29464 const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
29465 const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
29467 const __m128i latin1_packed = _mm_packus_epi16(composed,composed);
29478 std::pair<const char16_t*, char*> sse_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
29479 const char16_t* end = buf + len;
29482 __m128i in = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buf));
29485 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29505 std::pair<result, char*> sse_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
29506 const char16_t* start = buf;
29507 const char16_t* end = buf + len;
29509 __m128i in = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buf));
29512 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29593 std::pair<const char16_t*, char*> sse_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_output) {
29595 const char16_t* end = buf + len;
29597 const __m128i v_0000 = _mm_setzero_si128();
29598 const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
29599 const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
29600 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
29605 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29609 const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80);
29613 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29619 const __m128i utf8_packed = _mm_packus_epi16(in,in);
29629 const __m128i utf8_packed = _mm_packus_epi16(in,nextin);
29640 const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000);
29641 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
29644 const __m128i one_or_two_bytes_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000);
29645 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
29656 const __m128i surrogates_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800);
29660 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
29665 const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
29694 const __m128i t0 = _mm_shuffle_epi8(in, dup_even);
29696 const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
29698 const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000));
29701 const __m128i s0 = _mm_srli_epi16(in, 4);
29703 const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
29705 const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
29707 const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
29708 const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
29709 const __m128i s4 = _mm_xor_si128(s3, m0);
29713 const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
29714 const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
29717 const uint16_t mask = (one_byte_bitmask & 0x5555) |
29721 const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
29722 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
29723 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
29731 const uint8_t mask0 = uint8_t(mask);
29733 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
29734 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
29735 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
29737 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
29739 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
29740 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
29741 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
29797 std::pair<result, char*> sse_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
29798 const char16_t* start = buf;
29799 const char16_t* end = buf + len;
29801 const __m128i v_0000 = _mm_setzero_si128();
29802 const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
29803 const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
29804 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
29809 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29813 const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80);
29817 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
29823 const __m128i utf8_packed = _mm_packus_epi16(in,in);
29833 const __m128i utf8_packed = _mm_packus_epi16(in,nextin);
29844 const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000);
29845 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
29848 const __m128i one_or_two_bytes_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000);
29849 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
29860 const __m128i surrogates_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800);
29864 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
29869 const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
29898 const __m128i t0 = _mm_shuffle_epi8(in, dup_even);
29900 const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
29902 const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000));
29905 const __m128i s0 = _mm_srli_epi16(in, 4);
29907 const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
29909 const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
29911 const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
29912 const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
29913 const __m128i s4 = _mm_xor_si128(s3, m0);
29917 const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
29918 const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
29921 const uint16_t mask = (one_byte_bitmask & 0x5555) |
29925 const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
29926 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
29927 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
29935 const uint8_t mask0 = uint8_t(mask);
29937 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
29938 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
29939 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
29941 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
29943 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
29944 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
29945 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
30048 std::pair<const char16_t*, char32_t*> sse_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
30049 const char16_t* end = buf + len;
30051 const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
30052 const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
30058 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
30065 const __m128i surrogates_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800);
30069 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
30115 std::pair<result, char32_t*> sse_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
30116 const char16_t* start = buf;
30117 const char16_t* end = buf + len;
30119 const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
30120 const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
30126 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
30133 const __m128i surrogates_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800);
30137 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
30177 std::pair<const char32_t *, char *>
30178 sse_convert_utf32_to_latin1(const char32_t *buf, size_t len,
30180 const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16
30211 sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
30213 const char32_t *start = buf;
30214 const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16
30257 std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
30258 const char32_t* end = buf + len;
30260 const __m128i v_0000 = _mm_setzero_si128();//__m128 = 128 bits
30261 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); //1111 1000 0000 0000
30262 const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); //1100 0000 1000 0000
30263 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); //1111 1111 1000 0000
30264 const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); //1111 1111 1111 1111 0000 0000 0000 0000
30265 const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); //0111 1111 1111 1111 1111 1111 1111 1111
30268 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
30303 const __m128i utf8_packed = _mm_packus_epi16(in_16,in_16); //creates two copy of in_16 in 1 vector
30316 const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16);
30327 const __m128i one_byte_bytemask = _mm_cmpeq_epi16( // this takes four bytes at a time and compares:
30332 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask)); // collect the MSB from previous vector and put them into uint16_t mas
30335 const __m128i one_or_two_bytes_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000);
30336 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
30343 const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000
30344 const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111
30347 const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two
30349 const __m128i t1 = _mm_and_si128(t0, v_1f00); // potentital first utf8 byte
30351 const __m128i t2 = _mm_and_si128(in_16, v_003f);// potential second utf8 byte
30353 const __m128i t3 = _mm_or_si128(t1, t2); // first and second potential utf8 byte together
30355 const __m128i t4 = _mm_or_si128(t3, v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit
30358 const __m128i utf8_unpacked = _mm_blendv_epi8(t4, in_16, one_byte_bytemask);
30362 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
30363 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
30364 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
30366 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
30367 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
30368 const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
30381 const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
30382 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
30385 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30388 const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
30417 const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even);
30419 const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
30421 const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000));
30424 const __m128i s0 = _mm_srli_epi16(in_16, 4);
30426 const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
30428 const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
30430 const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
30431 const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
30432 const __m128i s4 = _mm_xor_si128(s3, m0);
30436 const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
30437 const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
30440 const uint16_t mask = (one_byte_bitmask & 0x5555) |
30444 const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
30445 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
30446 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
30454 const uint8_t mask0 = uint8_t(mask);
30456 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
30457 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
30458 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
30460 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
30462 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
30463 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
30464 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
30505 const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff);
30516 std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
30518 const char32_t* end = buf + len;
30519 const char32_t* start = buf;
30521 const __m128i v_0000 = _mm_setzero_si128();
30522 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30523 const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080);
30524 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
30525 const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
30526 const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff);
30527 const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff);
30529 const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
30559 const __m128i utf8_packed = _mm_packus_epi16(in_16,in_16);
30576 const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16);
30587 const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000);
30588 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
30591 const __m128i one_or_two_bytes_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000);
30592 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
30599 const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
30600 const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
30603 const __m128i t0 = _mm_slli_epi16(in_16, 2);
30605 const __m128i t1 = _mm_and_si128(t0, v_1f00);
30607 const __m128i t2 = _mm_and_si128(in_16, v_003f);
30609 const __m128i t3 = _mm_or_si128(t1, t2);
30611 const __m128i t4 = _mm_or_si128(t3, v_c080);
30614 const __m128i utf8_unpacked = _mm_blendv_epi8(t4, in_16, one_byte_bytemask);
30618 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
30619 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
30620 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
30622 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
30623 const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
30624 const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
30637 const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
30638 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
30644 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30645 const __m128i forbidden_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800);
30650 const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
30679 const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even);
30681 const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
30683 const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000));
30686 const __m128i s0 = _mm_srli_epi16(in_16, 4);
30688 const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
30690 const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
30692 const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
30693 const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000));
30694 const __m128i s4 = _mm_xor_si128(s3, m0);
30698 const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
30699 const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
30702 const uint16_t mask = (one_byte_bitmask & 0x5555) |
30706 const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
30707 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
30708 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
30716 const uint8_t mask0 = uint8_t(mask);
30718 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
30719 const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
30720 const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
30722 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
30724 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
30725 const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
30726 const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
30771 std::pair<const char32_t*, char16_t*> sse_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
30773 const char32_t* end = buf + len;
30775 const __m128i v_0000 = _mm_setzero_si128();
30776 const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
30782 const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
30783 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
30790 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30791 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30795 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
30838 std::pair<result, char16_t*> sse_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
30839 const char32_t* start = buf;
30840 const char32_t* end = buf + len;
30842 const __m128i v_0000 = _mm_setzero_si128();
30843 const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
30848 const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
30849 const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
30856 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30857 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30858 const __m128i forbidden_bytemask = _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800);
30864 const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
30916 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
30918 simdutf_really_inline bool has_full_block() const;
30919 simdutf_really_inline const uint8_t *full_block() const;
30929 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
30932 const uint8_t *buf;
30933 const size_t len;
30934 const size_t lenminusstep;
30939 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
30949 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
30969 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
30975 simdutf_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
30980 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
30985 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
31009 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31016 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31018 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31019 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31020 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31021 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31022 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31023 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31031 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31035 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31037 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31052 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31053 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31081 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31099 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31100 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31112 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
31115 static const uint8_t max_array[32] = {
31121 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
31136 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31153 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
31176 simdutf_really_inline bool errors() const {
31199 bool generic_validate_utf8(const uint8_t * input, size_t length) {
31216 bool generic_validate_utf8(const char * input, size_t length) {
31217 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31224 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
31233 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
31248 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
31256 result generic_validate_utf8_with_errors(const char * input, size_t length) {
31257 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31261 bool generic_validate_ascii(const uint8_t * input, size_t length) {
31277 bool generic_validate_ascii(const char * input, size_t length) {
31278 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31282 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
31288 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
31299 result res = scalar::ascii::validate_with_errors(reinterpret_cast<const char*>(input + count), length - count);
31306 result generic_validate_ascii_with_errors(const char * input, size_t length) {
31307 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31327 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
31332 const size_t safety_margin = 16; // to avoid overruns!
31336 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31400 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31407 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31409 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31410 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31411 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31412 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31413 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31414 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31422 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31426 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31428 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31443 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31444 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31472 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31490 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31491 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31508 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31518 simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
31533 const size_t safety_margin = size - margin + 1; // to avoid overruns!
31535 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31593 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
31608 const size_t safety_margin = size - margin + 1; // to avoid overruns!
31610 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31686 simdutf_really_inline bool errors() const {
31707 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
31711 const size_t safety_margin = 16; // to avoid overruns!
31713 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31752 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31759 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31761 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31762 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31763 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31764 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31765 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31766 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31774 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31778 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31780 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31795 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31796 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31824 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31842 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31843 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31860 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31870 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
31885 const size_t safety_margin = size - margin + 1; // to avoid overruns!
31887 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31944 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
31959 const size_t safety_margin = size - margin + 1; // to avoid overruns!
31961 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32031 simdutf_really_inline bool errors() const {
32051 simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
32055 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32062 simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
32067 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32089 simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
32093 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32102 simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
32107 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32123 simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
32127 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
32131 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32157 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
32167 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
32169 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
32170 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
32171 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
32172 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
32173 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
32174 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
32182 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
32186 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
32187 constexpr const uint8_t FORBIDDEN = 0xff;
32189 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
32204 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
32205 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
32233 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
32260 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
32268 simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
32283 const size_t safety_margin = size - margin + 1; // to avoid overruns!
32285 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32342 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
32357 const size_t safety_margin = size - margin + 1; // to avoid overruns!
32359 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32435 simdutf_really_inline bool errors() const {
32455 simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
32470 const size_t safety_margin = size - margin + 1; // to avoid overruns!
32472 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32530 simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
32545 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
32549 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
32553 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
32557 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
32561 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
32562 const char16_t* tail = sse_validate_utf16<endianness::LITTLE>(buf, len);
32570 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
32571 const char16_t* tail = sse_validate_utf16<endianness::BIG>(buf, len);
32579 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
32589 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
32599 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
32600 const char32_t* tail = sse_validate_utf32le(buf, len);
32608 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
32618 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
32620 std::pair<const char*, char*> ret = sse_convert_latin1_to_utf8(buf, len, utf8_output);
32624 const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
32632 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32633 std::pair<const char*, char16_t*> ret = sse_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
32637 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::LITTLE>(
32645 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32646 std::pair<const char*, char16_t*> ret = sse_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
32650 const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert<endianness::BIG>(
32658 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32659 std::pair<const char*, char32_t*> ret = sse_convert_latin1_to_utf32(buf, len, utf32_output);
32663 const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
32672 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
32677 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
32682 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
32686 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32691 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32696 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32701 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32707 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* input, size_t size,
32708 char16_t* utf16_output) const noexcept {
32712 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* input, size_t size,
32713 char16_t* utf16_output) const noexcept {
32717 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32722 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32727 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
32728 char32_t* utf32_output) const noexcept {
32732 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32733 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
32738 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::LITTLE>(
32746 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32747 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
32752 const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert<endianness::BIG>(
32760 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32777 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32795 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32800 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32805 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32806 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
32810 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::LITTLE>(
32818 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32819 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
32823 const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert<endianness::BIG>(
32831 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32849 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32867 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32871 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32875 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32876 std::pair<const char32_t*, char*> ret = sse_convert_utf32_to_latin1(buf, len, latin1_output);
32881 const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
32890 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32907 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32912 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
32913 std::pair<const char32_t*, char*> ret = sse_convert_utf32_to_utf8(buf, len, utf8_output);
32917 const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
32925 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
32942 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32943 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
32947 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::LITTLE>(
32955 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32956 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
32960 const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert<endianness::BIG>(
32968 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32986 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33004 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
33008 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33009 std::pair<const char32_t*, char16_t*> ret = sse_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
33013 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::LITTLE>(
33021 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33022 std::pair<const char32_t*, char16_t*> ret = sse_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
33026 const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert<endianness::BIG>(
33034 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33051 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33068 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33072 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33076 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33080 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33084 void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
33088 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
33092 simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
33096 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
33100 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
33104 simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
33108 simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
33112 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
33116 simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
33120 simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
33124 simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
33128 simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t len) const noexcept {
33129 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
33141 __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i));
33142 __m128i input2 = _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i)));
33143 __m128i input3 = _mm_loadu_si128((const __m128i *)(str + i + 2*sizeof(__m128i)));
33144 __m128i input4 = _mm_loadu_si128((const __m128i *)(str + i + 3*sizeof(__m128i)));
33163 __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i));
33172 return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast<const char *>(str + i), len - i);
33175 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
33179 simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
33183 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
33187 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
33188 const __m128i v_00000000 = _mm_setzero_si128();
33189 const __m128i v_ffffff80 = _mm_set1_epi32((uint32_t)0xffffff80);
33190 const __m128i v_fffff800 = _mm_set1_epi32((uint32_t)0xfffff800);
33191 const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
33196 const __m128i ascii_bytes_bytemask = _mm_cmpeq_epi32(_mm_and_si128(in, v_ffffff80), v_00000000);
33197 const __m128i one_two_bytes_bytemask = _mm_cmpeq_epi32(_mm_and_si128(in, v_fffff800), v_00000000);
33198 const __m128i two_bytes_bytemask = _mm_xor_si128(one_two_bytes_bytemask, ascii_bytes_bytemask);
33199 const __m128i one_two_three_bytes_bytemask = _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000);
33200 const __m128i three_bytes_bytemask = _mm_xor_si128(one_two_three_bytes_bytemask, one_two_bytes_bytemask);
33201 const uint16_t ascii_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(ascii_bytes_bytemask));
33202 const uint16_t two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(two_bytes_bytemask));
33203 const uint16_t three_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(three_bytes_bytemask));
33213 simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
33214 const __m128i v_00000000 = _mm_setzero_si128();
33215 const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
33220 const __m128i surrogate_bytemask = _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000);
33221 const uint16_t surrogate_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogate_bytemask));
33228 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {