Lines Matching defs:uint16_t

240      uint16_t array[8] = {x1, x2,  x3,  x4,  x5,  x6,  x7,  x8};               \
297 typedef uint16_t bitmask_t;
309 // We return uint32_t instead of uint16_t because that seems to be more efficient for most
310 // purposes (cutting it down to uint16_t costs performance in some compilers).
490 vst2q_u16(reinterpret_cast<uint16_t *>(p), low_pair);
492 vst2q_u16(reinterpret_cast<uint16_t *>(p + 8), high_pair);
805 typedef uint16_t bitmask_t;
824 static simdutf_really_inline simd16<bool> splat(bool _value) { return vmovq_n_u16(uint16_t(-(!!_value))); }
838 return vld1q_u16(reinterpret_cast<const uint16_t*>(values));
871 simdutf_really_inline operator simd16<uint16_t>() const;
889 struct simd16<uint16_t>: base16_numeric<uint16_t> {
890 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
891 simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<uint16_t>(_value) {}
894 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
896 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
897 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
903 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return vqaddq_u16(*this, other); }
904 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return vqsubq_u16(*this, other); }
907 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return vmaxq_u16(*this, other); }
908 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return vminq_u16(*this, other); }
910 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
912 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
913 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return vcleq_u16(*this, other); }
914 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return vcgeq_u16(*this, other); }
915 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return vcgtq_u16(*this, other); }
916 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return vcltq_u16(*this, other); }
919 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
921 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(vshrq_n_u16(*this, N)); }
923 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(vshlq_n_u16(*this, N)); }
926 simdutf_really_inline simd16<uint16_t> operator|(const simd16<uint16_t> other) const { return vorrq_u16(*this, other); }
927 simdutf_really_inline simd16<uint16_t> operator&(const simd16<uint16_t> other) const { return vandq_u16(*this, other); }
928 simdutf_really_inline simd16<uint16_t> operator^(const simd16<uint16_t> other) const { return veorq_u16(*this, other); }
930 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
931 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
936 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
940 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
1058 simdutf_really_inline uint64_t simd16x32<uint16_t>::not_in_range(const uint16_t low, const uint16_t high) const {
1059 const simd16<uint16_t> mask_low = simd16<uint16_t>::splat(low);
1060 const simd16<uint16_t> mask_high = simd16<uint16_t>::splat(high);
1061 simd16x32<uint16_t> x(
1062 simd16<uint16_t>((this->chunks[0] > mask_high) | (this->chunks[0] < mask_low)),
1063 simd16<uint16_t>((this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)),
1064 simd16<uint16_t>((this->chunks[2] > mask_high) | (this->chunks[2] < mask_low)),
1065 simd16<uint16_t>((this->chunks[3] > mask_high) | (this->chunks[3] < mask_low))
2094 static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm256_set1_epi16(uint16_t(-(!!_value))); }
2149 struct simd16<uint16_t>: base16_numeric<uint16_t> {
2150 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
2151 simdutf_really_inline simd16(const __m256i _value) : base16_numeric<uint16_t>(_value) {}
2154 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
2156 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
2157 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
2160 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm256_adds_epu16(*this, other); }
2161 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm256_subs_epu16(*this, other); }
2164 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm256_max_epu16(*this, other); }
2165 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm256_min_epu16(*this, other); }
2167 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
2169 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
2170 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
2171 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
2172 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
2173 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
2176 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
2177 simdutf_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const { return (*this & bits).bits_not_set(); }
2179 simdutf_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const { return ~this->bits_not_set(bits); }
2183 simdutf_really_inline bool bits_not_set_anywhere(simd16<uint16_t> bits) const { return _mm256_testz_si256(*this, bits); }
2184 simdutf_really_inline bool any_bits_set_anywhere(simd16<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2186 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(_mm256_srli_epi16(*this, N)); }
2188 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(_mm256_slli_epi16(*this, N)); }
2195 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
2201 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
2202 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
2283 simdutf_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
2628 typedef uint16_t bitmask_t;
2823 struct simd8<uint16_t>: base<uint16_t> {
2824 static simdutf_really_inline simd8<uint16_t> splat(uint16_t _value) { return _mm_set1_epi16(_value); }
2825 static simdutf_really_inline simd8<uint16_t> load(const uint16_t values[8]) {
2829 simdutf_really_inline simd8() : base<uint16_t>() {}
2830 simdutf_really_inline simd8(const __m128i _value) : base<uint16_t>(_value) {}
2832 simdutf_really_inline simd8(uint16_t _value) : simd8(splat(_value)) {}
2834 simdutf_really_inline simd8(const uint16_t* values) : simd8(load(values)) {}
2837 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7
2843 simdutf_really_inline simd8<uint16_t> saturating_add(const simd8<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
2844 simdutf_really_inline simd8<uint16_t> saturating_sub(const simd8<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
2847 simdutf_really_inline simd8<uint16_t> max_val(const simd8<uint16_t> other) const { return _mm_max_epu16(*this, other); }
2848 simdutf_really_inline simd8<uint16_t> min_val(const simd8<uint16_t> other) const { return _mm_min_epu16(*this, other); }
2850 simdutf_really_inline simd8<uint16_t> gt_bits(const simd8<uint16_t> other) const { return this->saturating_sub(other); }
2852 simdutf_really_inline simd8<uint16_t> lt_bits(const simd8<uint16_t> other) const { return other.saturating_sub(*this); }
2853 simdutf_really_inline simd8<bool> operator<=(const simd8<uint16_t> other) const { return other.max_val(*this) == other; }
2854 simdutf_really_inline simd8<bool> operator>=(const simd8<uint16_t> other) const { return other.min_val(*this) == other; }
2855 simdutf_really_inline simd8<bool> operator==(const simd8<uint16_t> other) const { return _mm_cmpeq_epi16(*this, other); }
2856 simdutf_really_inline simd8<bool> operator&(const simd8<uint16_t> other) const { return _mm_and_si128(*this, other); }
2857 simdutf_really_inline simd8<bool> operator|(const simd8<uint16_t> other) const { return _mm_or_si128(*this, other); }
2860 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint16_t(0); }
2865 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
2866 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
3022 typedef uint16_t bitmask_t;
3043 static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm_set1_epi16(uint16_t(-(!!_value))); }
3093 simdutf_really_inline operator simd16<uint16_t>() const;
3104 struct simd16<uint16_t>: base16_numeric<uint16_t> {
3105 simdutf_really_inline simd16() : base16_numeric<uint16_t>() {}
3106 simdutf_really_inline simd16(const __m128i _value) : base16_numeric<uint16_t>(_value) {}
3109 simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
3111 simdutf_really_inline simd16(const uint16_t* values) : simd16(load(values)) {}
3112 simdutf_really_inline simd16(const char16_t* values) : simd16(load(reinterpret_cast<const uint16_t*>(values))) {}
3115 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7)
3118 simdutf_really_inline static simd16<uint16_t> repeat_16(
3119 uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, uint16_t v5, uint16_t v6, uint16_t v7
3121 return simd16<uint16_t>(v0, v1, v2, v3, v4, v5, v6, v7);
3125 simdutf_really_inline simd16<uint16_t> saturating_add(const simd16<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
3126 simdutf_really_inline simd16<uint16_t> saturating_sub(const simd16<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
3129 simdutf_really_inline simd16<uint16_t> max_val(const simd16<uint16_t> other) const { return _mm_max_epu16(*this, other); }
3130 simdutf_really_inline simd16<uint16_t> min_val(const simd16<uint16_t> other) const { return _mm_min_epu16(*this, other); }
3132 simdutf_really_inline simd16<uint16_t> gt_bits(const simd16<uint16_t> other) const { return this->saturating_sub(other); }
3134 simdutf_really_inline simd16<uint16_t> lt_bits(const simd16<uint16_t> other) const { return other.saturating_sub(*this); }
3135 simdutf_really_inline simd16<bool> operator<=(const simd16<uint16_t> other) const { return other.max_val(*this) == other; }
3136 simdutf_really_inline simd16<bool> operator>=(const simd16<uint16_t> other) const { return other.min_val(*this) == other; }
3137 simdutf_really_inline simd16<bool> operator>(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
3138 simdutf_really_inline simd16<bool> operator<(const simd16<uint16_t> other) const { return this->gt_bits(other).any_bits_set(); }
3141 simdutf_really_inline simd16<bool> bits_not_set() const { return *this == uint16_t(0); }
3142 simdutf_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const { return (*this & bits).bits_not_set(); }
3144 simdutf_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const { return ~this->bits_not_set(bits); }
3148 simdutf_really_inline bool bits_not_set_anywhere(simd16<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
3149 simdutf_really_inline bool any_bits_set_anywhere(simd16<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
3151 simdutf_really_inline simd16<uint16_t> shr() const { return simd16<uint16_t>(_mm_srli_epi16(*this, N)); }
3153 simdutf_really_inline simd16<uint16_t> shl() const { return simd16<uint16_t>(_mm_slli_epi16(*this, N)); }
3160 simdutf_really_inline simd16<uint16_t> swap_bytes() const {
3165 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
3166 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
3170 simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { return this->value; }
3232 simdutf_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
3522 typedef uint16_t bitmask_t;
4340 inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
4341 return uint16_t((word >> 8) | (word << 8));
4346 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4349 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4352 uint16_t diff = uint16_t(word - 0xD800);
4354 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4355 uint16_t diff2 = uint16_t(next_word - 0xDC00);
4367 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4370 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4373 uint16_t diff = uint16_t(word - 0xD800);
4375 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4376 uint16_t diff2 = uint16_t(next_word - 0xDC00);
4389 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4392 uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
4401 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4404 uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
4415 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4418 uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
4430 const uint16_t * input = reinterpret_cast<const uint16_t *>(in);
4431 uint16_t * output = reinterpret_cast<uint16_t *>(out);
4433 *output++ = uint16_t(input[i] >> 8 | input[i] << 8);
4443 uint16_t last_word = uint16_t(input[length-1]);
10953 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(uint16_t(word))) : char16_t(word);
10958 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
10959 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
10998 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(uint16_t(word))) : char16_t(word);
11003 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
11004 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
11027 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(uint16_t(word))) : char16_t(word);
11032 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
11033 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
11065 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11084 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11104 uint16_t diff = uint16_t(word - 0xD800);
11106 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11107 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11139 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11157 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11178 uint16_t diff = uint16_t(word - 0xD800);
11180 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11181 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11198 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11216 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11237 uint16_t diff = uint16_t(word - 0xD800);
11239 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11240 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11274 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11278 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11285 uint16_t diff = uint16_t(word - 0xD800);
11287 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11288 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11315 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11319 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11326 uint16_t diff = uint16_t(word - 0xD800);
11329 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11330 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11342 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11346 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11353 uint16_t diff = uint16_t(word - 0xD800);
11356 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11357 uint16_t diff2 = uint16_t(next_word - 0xDC00);
11412 uint16_t code_point = uint16_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11414 code_point = utf16::swap_bytes(uint16_t(code_point));
11422 uint16_t code_point = uint16_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11424 code_point = utf16::swap_bytes(uint16_t(code_point));
11434 uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10));
11435 uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF));
11505 code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point)));
11525 code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point)));
11542 uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10));
11543 uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF));
11594 code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point)));
11612 code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point)));
11630 uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10));
11631 uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF));
12054 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12069 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12293 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12297 uint16_t word = 0;
12298 uint16_t too_large = 0;
12316 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12319 uint16_t word;
12491 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12494 uint16_t word = 0;
12694 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t*>(buf));
12695 uint16x8_t secondin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + simd16<uint16_t>::SIZE / sizeof(char16_t));
12696 uint16x8_t thirdin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 2*simd16<uint16_t>::SIZE / sizeof(char16_t));
12697 uint16x8_t fourthin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 3*simd16<uint16_t>::SIZE / sizeof(char16_t));
12699 const auto u0 = simd16<uint16_t>(in);
12700 const auto u1 = simd16<uint16_t>(secondin);
12701 const auto u2 = simd16<uint16_t>(thirdin);
12702 const auto u3 = simd16<uint16_t>(fourthin);
12709 const auto in16 = simd16<uint16_t>::pack(v0, v1);
12710 const auto nextin16 = simd16<uint16_t>::pack(v2, v3);
12757 const auto in0 = simd16<uint16_t>(input);
12758 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
12761 const simd8<uint8_t> in_16 = simd16<uint16_t>::pack(t0, t1);
12892 auto in0 = simd16<uint16_t>(input);
12893 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
12900 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
12962 auto in0 = simd16<uint16_t>(input);
12963 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
12971 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
13093 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13125 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13137 uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
13164 vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output), inlow);
13167 vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output+8), inhigh);
13214 const uint16_t input_utf8_end_of_code_point_mask =
13241 vst1_u16(reinterpret_cast<uint16_t*>(utf16_output), composed);
13254 vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output), composed);
13276 vst1q_u16(reinterpret_cast<uint16_t*>(utf16_output), composed);
13297 uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00)));
13320 vst1_u16(reinterpret_cast<uint16_t*>(utf16_output), composed);
13372 vst1q_u16(reinterpret_cast<uint16_t *>(utf16_output), composed);
13444 utf16_output[0] = uint16_t(buffer[i] >> 16);
13445 utf16_output[1] = uint16_t(buffer[i] & 0xFFFF);
13448 utf16_output[0] = uint16_t(buffer[i] & 0xFFFF);
13474 const uint16_t input_utf8_end_of_code_point_mask =
13497 vst2_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13509 vst2q_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13527 vst2q_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13549 vreinterpretq_u32_u16(vbicq_u16(vreinterpretq_u16_u32(perm), vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits
13649 const uint16_t input_utf8_end_of_code_point_mask =
13712 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13734 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13747 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
13817 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
13818 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
13819 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13822 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13826 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
13869 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13884 uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
13935 #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x))
13953 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
13964 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13986 const uint16_t mask = vaddvq_u16(combined);
14026 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14038 uint16_t diff = uint16_t(word - 0xD800);
14039 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14041 uint16_t diff2 = uint16_t(next_word - 0xDC00);
14070 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14071 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14072 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14076 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14080 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
14123 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14138 uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
14189 #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x))
14207 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14218 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14240 const uint16_t mask = vaddvq_u16(combined);
14280 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14292 uint16_t diff = uint16_t(word - 0xD800);
14293 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14295 uint16_t diff2 = uint16_t(next_word - 0xDC00);
14369 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14370 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14373 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14394 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14399 uint16_t diff = uint16_t(word - 0xD800);
14400 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14402 uint16_t diff2 = uint16_t(next_word - 0xDC00);
14427 const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14428 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14431 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14452 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14457 uint16_t diff = uint16_t(word - 0xD800);
14458 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14460 uint16_t diff2 = uint16_t(next_word - 0xDC00);
14535 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14578 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14593 uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
14608 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14609 const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14643 #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x))
14661 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14672 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14694 const uint16_t mask = vaddvq_u16(combined);
14770 const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14811 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14826 uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
14843 const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14844 const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14881 #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x))
14899 const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14910 const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14932 const uint16_t mask = vaddvq_u16(combined);
15002 uint16_t * utf16_output = reinterpret_cast<uint16_t*>(utf16_out);
15014 const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
15015 const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
15036 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
15037 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
15039 high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
15040 low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
15061 uint16_t * utf16_output = reinterpret_cast<uint16_t*>(utf16_out);
15072 const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
15073 const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
15097 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
15098 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
15100 high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
15101 low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
16303 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16317 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16341 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16343 input.store(reinterpret_cast<uint16_t *>(output));
18129 const __mmask16 valid = uint16_t((1 << count) - 1);
18532 const __mmask16 valid = uint16_t((1 << valid_count) - 1); \
18551 const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \
19392 uint16_t word;
19393 while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
19394 : uint16_t(*buf))) <= 0xff) {
19415 uint16_t word;
19416 while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
19417 : uint16_t(*buf))) <= 0xff) {
19640 const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00);
19641 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
19642 const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00);
19712 const size_t howmany1 = count_ones((uint16_t)(valid));
19716 const size_t howmany2 = count_ones((uint16_t)(valid >> 16));
19762 uint16_t mask = uint16_t((1 << (end - buf)) - 1);
19799 uint16_t mask = uint16_t((1 << (end - buf)) - 1);
19823 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
19824 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
19825 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
19909 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
19941 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
20065 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
20066 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
20067 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
20156 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
20191 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
20344 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
20349 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
20350 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
20352 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
20353 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
20412 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
20417 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
20418 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
20420 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
20421 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
20680 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20682 _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20697 diff, _mm512_set1_epi16(uint16_t(0x0400)));
20865 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20866 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20868 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
20886 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20887 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20889 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
20914 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20915 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20917 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
20935 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20936 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20938 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
20954 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20955 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20957 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
20977 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
20978 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20980 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
21008 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
21009 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
21011 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
21031 __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
21032 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
21034 __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
21647 const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
21648 const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
21666 const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
21667 const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
21766 const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f);
21767 const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff);
21768 const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff);
21769 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
21796 const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f);
21797 const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff);
21798 const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff);
21799 const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
22061 const auto u0 = simd16<uint16_t>(in);
22062 const auto u1 = simd16<uint16_t>(nextin);
22067 const auto in16 = simd16<uint16_t>::pack(v0, v1);
22103 input += simd16<uint16_t>::ELEMENTS * 2;
22105 input += simd16<uint16_t>::ELEMENTS * 2 - 1;
22110 while (input + simd16<uint16_t>::ELEMENTS * 2 < end16) {
22111 const auto in0 = simd16<uint16_t>(input);
22112 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::ELEMENTS);
22117 const auto in_16 = simd16<uint16_t>::pack(t0, t1);
22122 input += simd16<uint16_t>::ELEMENTS * 2;
22138 input += simd16<uint16_t>::ELEMENTS * 2;
22140 input += simd16<uint16_t>::ELEMENTS * 2 - 1;
22281 while (input + simd16<uint16_t>::ELEMENTS * 2 < end) {
22285 auto in0 = simd16<uint16_t>(input);
22286 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::ELEMENTS);
22296 const auto in = simd16<uint16_t>::pack(t0, t1);
22302 input += simd16<uint16_t>::ELEMENTS * 2;
22332 input += simd16<uint16_t>::ELEMENTS * 2;
22338 input += simd16<uint16_t>::ELEMENTS * 2 - 1;
22359 while (input + simd16<uint16_t>::ELEMENTS * 2 < end) {
22363 auto in0 = simd16<uint16_t>(input);
22364 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::ELEMENTS);
22374 const auto in = simd16<uint16_t>::pack(t0, t1);
22380 input += simd16<uint16_t>::ELEMENTS * 2;
22410 input += simd16<uint16_t>::ELEMENTS * 2;
22416 input += simd16<uint16_t>::ELEMENTS * 2 - 1;
22654 const uint16_t input_utf8_end_of_code_point_mask =
22798 utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
22799 utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
22802 utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
22834 const uint16_t input_utf8_end_of_code_point_mask =
23009 uint16_t word = !match_system(big_endian)
23203 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
23286 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23298 uint16_t diff = uint16_t(word - 0xD800);
23299 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23301 uint16_t diff2 = uint16_t(next_word - 0xDC00);
23446 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
23529 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23541 uint16_t diff = uint16_t(word - 0xD800);
23542 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23544 uint16_t diff2 = uint16_t(next_word - 0xDC00);
23653 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23659 uint16_t diff = uint16_t(word - 0xD800);
23660 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23662 uint16_t diff2 = uint16_t(next_word - 0xDC00);
23721 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23727 uint16_t diff = uint16_t(word - 0xD800);
23728 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23730 uint16_t diff2 = uint16_t(next_word - 0xDC00);
23842 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
23843 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
23844 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
23928 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
23960 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
24084 const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
24085 const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
24086 const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
24175 const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
24210 #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
24360 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
24365 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
24366 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
24368 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
24369 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
24428 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
24433 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
24434 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
24436 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
24437 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
24473 const uint16_t input_utf8_end_of_code_point_mask =
25714 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25728 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25752 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25754 input.store(reinterpret_cast<uint16_t *>(output));
28086 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28100 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28124 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28126 input.store(reinterpret_cast<uint16_t *>(output));
28432 const uint16_t one_byte_bitmask
28461 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
28462 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
28484 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
28520 const auto u0 = simd16<uint16_t>(in);
28521 const auto u1 = simd16<uint16_t>(secondin);
28522 const auto u2 = simd16<uint16_t>(thirdin);
28523 const auto u3 = simd16<uint16_t>(fourthin);
28530 const auto in16 = simd16<uint16_t>::pack(v0, v1);
28531 const auto nextin16 = simd16<uint16_t>::pack(v2, v3);
28535 uint16_t surrogates_bitmask0 = static_cast<uint16_t>(surrogates_wordmask0.to_bitmask());
28536 uint16_t surrogates_bitmask1 = static_cast<uint16_t>(surrogates_wordmask1.to_bitmask());
28558 const uint16_t V0 = static_cast<uint16_t>(~surrogates_bitmask0);
28561 const uint16_t H0 = static_cast<uint16_t>(vH0.to_bitmask());
28563 const uint16_t L0 = static_cast<uint16_t>(~H0 & surrogates_bitmask0);
28565 const uint16_t a0 = static_cast<uint16_t>(L0 & (H0 >> 1));
28567 const uint16_t b0 = static_cast<uint16_t>(a0 << 1);
28569 const uint16_t c0 = static_cast<uint16_t>(V0 | a0 | b0);
28580 while (input + simd16<uint16_t>::SIZE * 2 < end16) {
28581 const auto in0 = simd16<uint16_t>(input);
28582 const auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
28587 const auto in_16 = simd16<uint16_t>::pack(t0, t1);
28590 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28594 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28597 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28599 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28601 const uint16_t a = static_cast<uint16_t>(L & (H >> 1));
28603 const uint16_t b = static_cast<uint16_t>(a << 1);
28605 const uint16_t c = static_cast<uint16_t>(V | a | b);
28758 while (input + simd16<uint16_t>::SIZE * 2 < end) {
28762 auto in0 = simd16<uint16_t>(input);
28763 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
28772 const auto in = simd16<uint16_t>::pack(t0, t1);
28776 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28788 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28792 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28796 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28798 const uint16_t a = static_cast<uint16_t>(L & (H >> 1)); // A low surrogate must be followed by high one.
28801 const uint16_t b = static_cast<uint16_t>(a << 1); // Just mark that the opinput - startite fact is hold,
28803 const uint16_t c = static_cast<uint16_t>(V | a | b); // Combine all the masks into the final one.
28835 while (input + simd16<uint16_t>::SIZE * 2 < end) {
28839 auto in0 = simd16<uint16_t>(input);
28840 auto in1 = simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
28850 const auto in = simd16<uint16_t>::pack(t0, t1);
28854 const uint16_t surrogates_bitmask = static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
28866 const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
28870 const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
28874 const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
28876 const uint16_t a = static_cast<uint16_t>(L & (H >> 1)); // A low surrogate must be followed by high one.
28879 const uint16_t b = static_cast<uint16_t>(a << 1); // Just mark that the opinput - startite fact is hold,
28881 const uint16_t c = static_cast<uint16_t>(V | a | b); // Combine all the masks into the final one.
28978 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
29129 const uint16_t input_utf8_end_of_code_point_mask =
29275 utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
29276 utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
29279 utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
29311 const uint16_t input_utf8_end_of_code_point_mask =
29440 const uint16_t input_utf8_end_of_code_point_mask =
29525 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29641 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
29645 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
29660 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
29692 #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
29717 const uint16_t mask = (one_byte_bitmask & 0x5555) |
29758 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29770 uint16_t diff = uint16_t(word - 0xD800);
29771 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
29773 uint16_t diff2 = uint16_t(next_word - 0xDC00);
29845 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
29849 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
29864 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
29896 #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
29921 const uint16_t mask = (one_byte_bitmask & 0x5555) |
29962 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29974 uint16_t diff = uint16_t(word - 0xD800);
29975 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
29977 uint16_t diff2 = uint16_t(next_word - 0xDC00);
30069 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
30087 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
30092 uint16_t diff = uint16_t(word - 0xD800);
30093 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
30095 uint16_t diff2 = uint16_t(next_word - 0xDC00);
30137 const uint16_t surrogates_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogates_bytemask));
30155 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
30160 uint16_t diff = uint16_t(word - 0xD800);
30161 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
30163 uint16_t diff2 = uint16_t(next_word - 0xDC00);
30261 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); //1111 1000 0000 0000
30262 const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); //1100 0000 1000 0000
30263 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); //1111 1111 1000 0000
30332 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask)); // collect the MSB from previous vector and put them into uint16_t mas
30336 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
30362 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
30363 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
30385 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30415 #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
30440 const uint16_t mask = (one_byte_bitmask & 0x5555) |
30506 if(static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi32(_mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) {
30522 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30523 const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080);
30524 const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
30538 if(static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) {
30568 if(static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi32(next_max_input, v_10ffff))) != 0xffff) {
30588 const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
30592 const uint16_t one_or_two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
30618 const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
30619 const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
30644 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30677 #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
30702 const uint16_t mask = (one_byte_bitmask & 0x5555) |
30790 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30791 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30811 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
30816 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
30817 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
30819 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
30820 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
30856 const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
30857 const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
30880 *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
30885 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
30886 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
30888 high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
30889 low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
32093 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32107 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32131 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32133 input.store(reinterpret_cast<uint16_t *>(output));
33201 const uint16_t ascii_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(ascii_bytes_bytemask));
33202 const uint16_t two_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(two_bytes_bytemask));
33203 const uint16_t three_bytes_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(three_bytes_bytemask));
33221 const uint16_t surrogate_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(surrogate_bytemask));