Lines Matching defs:uint8_t
208 uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \
226 uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \
262 // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally.
300 static simdutf_really_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
307 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
344 struct simd8<uint8_t>: base_u8<uint8_t> {
345 static simdutf_really_inline simd8<uint8_t> splat(uint8_t _value) { return vmovq_n_u8(_value); }
346 static simdutf_really_inline simd8<uint8_t> zero() { return vdupq_n_u8(0); }
347 static simdutf_really_inline simd8<uint8_t> load(const uint8_t* values) { return vld1q_u8(values); }
348 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
352 simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
354 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
358 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
359 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
366 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
367 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
375 simdutf_really_inline static simd8<uint8_t> repeat_16(
376 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
377 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
379 return simd8<uint8_t>(
386 simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
389 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
390 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
393 simdutf_really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
394 simdutf_really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
395 simdutf_really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
396 simdutf_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
399 simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
400 simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
401 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
402 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
403 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
404 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
405 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
406 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
408 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
410 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
413 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
417 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
419 simdutf_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
421 simdutf_really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
445 simdutf_really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) const {
446 return vqtbl1q_u8(*this, simd8<uint8_t>(original));
498 const simd8<uint8_t> tb1{ 0,255,255,255, 1,255,255,255, 2,255,255,255, 3,255,255,255 };
499 const simd8<uint8_t> tb2{ 4,255,255,255, 5,255,255,255, 6,255,255,255, 7,255,255,255 };
500 const simd8<uint8_t> tb3{ 8,255,255,255, 9,255,255,255, 10,255,255,255, 11,255,255,255 };
501 const simd8<uint8_t> tb4{ 12,255,255,255, 13,255,255,255, 14,255,255,255, 15,255,255,255 };
567 simdutf_really_inline operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
617 return vqtbl1q_s8(*this, simd8<uint8_t>(original));
761 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
762 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
764 simd8<uint8_t>(uint8x16_t(this->chunks[0])) >= mask,
765 simd8<uint8_t>(uint8x16_t(this->chunks[1])) >= mask,
766 simd8<uint8_t>(uint8x16_t(this->chunks[2])) >= mask,
767 simd8<uint8_t>(uint8x16_t(this->chunks[3])) >= mask
930 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
931 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
1235 inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) {
1743 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
1824 simdutf_really_inline operator simd8<uint8_t>() const;
1859 struct simd8<uint8_t>: base8_numeric<uint8_t> {
1860 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
1861 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
1863 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
1865 simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
1868 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
1869 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
1870 uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
1871 uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31
1879 simdutf_really_inline static simd8<uint8_t> repeat_16(
1880 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
1881 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
1883 return simd8<uint8_t>(
1893 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
1894 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
1897 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
1898 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
1900 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
1902 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
1903 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
1904 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
1905 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
1906 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
1909 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
1910 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
1912 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
1916 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
1917 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
1919 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
1921 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
1927 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
1995 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
2049 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
2050 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
2052 (simd8<uint8_t>(__m256i(this->chunks[0])) >= mask),
2053 (simd8<uint8_t>(__m256i(this->chunks[1])) >= mask)
2201 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
2202 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
2649 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
2744 simdutf_really_inline operator simd8<uint8_t>() const;
2756 struct simd8<uint8_t>: base8_numeric<uint8_t> {
2757 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
2758 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
2761 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
2763 simdutf_really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
2766 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
2767 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
2773 simdutf_really_inline static simd8<uint8_t> repeat_16(
2774 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
2775 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
2777 return simd8<uint8_t>(
2784 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
2785 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
2788 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
2789 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
2791 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
2793 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
2794 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
2795 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
2796 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2797 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2800 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
2801 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
2803 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
2808 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
2809 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
2811 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
2813 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
2819 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
2937 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
3005 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
3006 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3008 simd8<uint8_t>(__m128i(this->chunks[0])) >= mask,
3009 simd8<uint8_t>(__m128i(this->chunks[1])) >= mask,
3010 simd8<uint8_t>(__m128i(this->chunks[2])) >= mask,
3011 simd8<uint8_t>(__m128i(this->chunks[3])) >= mask
3165 // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
3166 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
3590 return (__m128i)(vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(values)));
3700 template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
3701 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
3703 : base8_numeric<uint8_t>(_value) {}
3705 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
3707 simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {}
3710 simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
3711 uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
3712 uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
3716 simdutf_really_inline static simd8<uint8_t>
3717 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
3718 uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
3719 uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
3720 uint8_t v15) {
3721 return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
3726 simdutf_really_inline simd8<uint8_t>
3727 saturating_add(const simd8<uint8_t> other) const {
3730 simdutf_really_inline simd8<uint8_t>
3731 saturating_sub(const simd8<uint8_t> other) const {
3736 simdutf_really_inline simd8<uint8_t>
3737 max_val(const simd8<uint8_t> other) const {
3740 simdutf_really_inline simd8<uint8_t>
3741 min_val(const simd8<uint8_t> other) const {
3745 simdutf_really_inline simd8<uint8_t>
3746 gt_bits(const simd8<uint8_t> other) const {
3750 simdutf_really_inline simd8<uint8_t>
3751 lt_bits(const simd8<uint8_t> other) const {
3755 operator<=(const simd8<uint8_t> other) const {
3759 operator>=(const simd8<uint8_t> other) const {
3763 operator>(const simd8<uint8_t> other) const {
3767 operator<(const simd8<uint8_t> other) const {
3773 return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0)));
3775 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
3781 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
3795 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
3799 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
3802 template <int N> simdutf_really_inline simd8<uint8_t> shr() const {
3803 return simd8<uint8_t>(
3806 template <int N> simdutf_really_inline simd8<uint8_t> shl() const {
3807 return simd8<uint8_t>(
3870 simdutf_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
3931 simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
3932 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3934 simd8<uint8_t>(this->chunks[0]) >= mask,
3935 simd8<uint8_t>(this->chunks[1]) >= mask,
3936 simd8<uint8_t>(this->chunks[2]) >= mask,
3937 simd8<uint8_t>(this->chunks[3]) >= mask
4128 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4194 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4299 if(uint8_t(p[i]) >= 240) { counter++; }
4308 if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
4309 if (uint8_t(input[length-2]) >= 0xe0) { return length-2; } // 3- and 4-byte characters with only 2 bytes left
4312 if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
4318 if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
4319 if (uint8_t(input[length-2]) >= 0xe0) { return length-2; } // 3- and 4-byte characters with only 1 byte left
4320 if (uint8_t(input[length-3]) >= 0xf0) { return length-3; } // 4-byte characters with only 3 bytes left
5659 encoding_type check_bom(const uint8_t* byte, size_t length) {
5677 return check_bom(reinterpret_cast<const uint8_t*>(byte), length);
5727 const uint8_t shufutf8[209][16] =
5940 const uint8_t utf8bigindex[4096][2] =
10055 const uint8_t pack_1_2_utf8_bytes[256][17] = {
10315 const uint8_t pack_1_2_3_utf8_bytes[256][17] = {
10595 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10615 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10734 const uint8_t * c = reinterpret_cast<const uint8_t *>(buf);
11386 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11403 uint8_t leading_byte = data[pos]; // leading byte
11470 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11491 uint8_t leading_byte = data[pos]; // leading byte
11560 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11580 uint8_t leading_byte = data[pos]; // leading byte
11718 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11735 uint8_t leading_byte = data[pos]; // leading byte
11783 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11803 uint8_t leading_byte = data[pos]; // leading byte
11855 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11875 uint8_t leading_byte = data[pos]; // leading byte
12049 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12064 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12122 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12145 uint8_t leading_byte = data[pos]; // leading byte
12171 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12193 uint8_t leading_byte = data[pos]; // leading byte
12428 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12452 uint8_t leading_byte = data[pos]; // leading byte
12576 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
12577 simd8<uint8_t> bits = input.reduce_or();
12581 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12582 simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u);
12583 simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
12584 simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
12593 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12594 simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
12595 simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
12654 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]));
12686 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12687 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12732 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12733 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12761 const simd8<uint8_t> in_16 = simd16<uint16_t>::pack(t0, t1);
12843 simd::simd8x64<uint8_t> in8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(secondin), vreinterpretq_u8_u16(thirdin), vreinterpretq_u8_u16(fourthin));
12853 uint8_t block[64]{};
12856 simd::simd8x64<uint8_t> in(block);
12884 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12885 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12886 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12887 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12900 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
12954 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12955 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12956 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12957 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12971 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
13091 uint8_t *utf8_output = reinterpret_cast<uint8_t *>(utf8_out);
13097 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(latin1_input));
13139 const uint8_t *row =
13161 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13180 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13213 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13261 const uint8_t idx =
13264 const uint8_t consumed =
13282 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13377 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13473 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13515 const uint8_t idx =
13517 const uint8_t consumed =
13533 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13595 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13648 uint8x16_t in = vld1q_u8(reinterpret_cast<const uint8_t*>(input));
13660 vst1q_u8(reinterpret_cast<uint8_t*>(latin1_output), in);
13665 const uint8_t idx =
13668 const uint8_t consumed =
13681 uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
13699 vst1_u8(reinterpret_cast<uint8_t*>(latin1_output), latin1_packed);
13718 vst1_u8(reinterpret_cast<uint8_t*>(latin1_output), latin1_packed);
13740 vst1_u8(reinterpret_cast<uint8_t*>(latin1_output), latin1_packed);
13814 uint8_t * utf8_output = reinterpret_cast<uint8_t*>(utf8_out);
13886 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14000 const uint8_t mask0 = uint8_t(mask);
14002 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14006 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14007 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14066 uint8_t * utf8_output = reinterpret_cast<uint8_t*>(utf8_out);
14140 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14254 const uint8_t mask0 = uint8_t(mask);
14256 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14260 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14261 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14485 vst1_u8(reinterpret_cast<uint8_t*>(latin1_output), latin1_packed);
14511 vst1_u8(reinterpret_cast<uint8_t*>(latin1_output), latin1_packed);
14532 uint8_t * utf8_output = reinterpret_cast<uint8_t*>(utf8_out);
14595 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14708 const uint8_t mask0 = uint8_t(mask);
14709 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14713 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14714 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
14766 uint8_t * utf8_output = reinterpret_cast<uint8_t*>(utf8_out);
14828 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
14946 const uint8_t mask0 = uint8_t(mask);
14948 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
14952 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
14953 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
15126 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
15129 simdutf_really_inline const uint8_t *full_block() const;
15139 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
15142 const uint8_t *buf;
15149 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
15150 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
15151 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
15154 buf[sizeof(simd8x64<uint8_t>)] = '\0';
15159 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
15160 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
15161 in.store(reinterpret_cast<uint8_t*>(buf));
15162 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
15165 buf[sizeof(simd8x64<uint8_t>)] = '\0';
15179 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
15190 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
15195 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
15219 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15226 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15228 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15229 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15230 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15231 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15232 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15233 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15241 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15245 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15247 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15262 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
15263 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15291 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15309 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15310 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15311 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
15312 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
15313 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
15314 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
15322 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
15325 static const uint8_t max_array[32] = {
15331 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
15337 simd8<uint8_t> error;
15339 simd8<uint8_t> prev_input_block;
15341 simd8<uint8_t> prev_incomplete;
15346 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15349 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
15350 simd8<uint8_t> sc = check_special_cases(input, prev1);
15363 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
15368 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
15370 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
15373 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
15379 this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
15380 this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
15409 bool generic_validate_utf8(const uint8_t * input, size_t length) {
15413 simd::simd8x64<uint8_t> in(reader.full_block());
15417 uint8_t block[64]{};
15419 simd::simd8x64<uint8_t> in(block);
15427 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15434 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
15439 simd::simd8x64<uint8_t> in(reader.full_block());
15450 uint8_t block[64]{};
15452 simd::simd8x64<uint8_t> in(block);
15467 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15471 bool generic_validate_ascii(const uint8_t * input, size_t length) {
15473 uint8_t blocks[64]{};
15474 simd::simd8x64<uint8_t> running_or(blocks);
15476 simd::simd8x64<uint8_t> in(reader.full_block());
15480 uint8_t block[64]{};
15482 simd::simd8x64<uint8_t> in(block);
15488 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15492 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
15496 simd::simd8x64<uint8_t> in(reader.full_block());
15505 uint8_t block[64]{};
15507 simd::simd8x64<uint8_t> in(block);
15517 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
15610 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15617 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15619 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15620 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15621 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15622 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15623 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15624 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15632 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15636 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15638 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15653 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
15654 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15682 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15700 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15701 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15702 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
15703 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
15704 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
15705 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
15712 simd8<uint8_t> error;
15714 validating_transcoder() : error(uint8_t(0)) {}
15718 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15721 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
15722 simd8<uint8_t> sc = check_special_cases(input, prev1);
15752 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
15754 auto zero = simd8<uint8_t>{uint8_t(0)};
15755 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
15758 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
15827 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
15829 auto zero = simd8<uint8_t>{uint8_t(0)};
15830 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
15833 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
15962 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15969 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
15971 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
15972 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
15973 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
15974 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
15975 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
15976 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
15984 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
15988 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
15990 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16005 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
16006 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16034 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16052 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
16053 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
16054 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
16055 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
16056 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
16057 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
16064 simd8<uint8_t> error;
16066 validating_transcoder() : error(uint8_t(0)) {}
16070 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16073 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
16074 simd8<uint8_t> sc = check_special_cases(input, prev1);
16104 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
16106 auto zero = simd8<uint8_t>{uint8_t(0)};
16107 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
16110 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
16178 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
16180 auto zero = simd8<uint8_t>{uint8_t(0)};
16181 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
16184 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
16367 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
16377 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
16379 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
16380 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
16381 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
16382 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
16383 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
16384 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
16392 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
16396 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
16397 constexpr const uint8_t FORBIDDEN = 0xff;
16399 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16414 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
16415 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16443 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16464 simd8<uint8_t> error;
16466 validating_transcoder() : error(uint8_t(0)) {}
16470 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16473 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
16502 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
16504 auto zero = simd8<uint8_t>{uint8_t(0)};
16505 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
16508 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
16576 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
16578 auto zero = simd8<uint8_t>{uint8_t(0)};
16579 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
16582 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
17322 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
17325 uint8_t rem = length % lanes;
17326 const uint8_t *simd_end = data + (length / lanes) * lanes;
19395 *latin1_output++ = uint8_t(word);
19418 *latin1_output++ = uint8_t(word);
19788 *latin1_output++ = uint8_t(*buf++);
19803 *latin1_output++ = uint8_t(*buf++);
19887 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
19888 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
19985 const uint8_t mask0 = uint8_t(mask);
19986 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
19990 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
19991 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
19995 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
19996 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
20001 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
20002 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
20132 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
20133 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
20235 const uint8_t mask0 = uint8_t(mask);
20236 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
20240 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
20241 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
20245 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
20246 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
20251 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
20252 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
20439 const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
20842 const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
21190 while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
21217 while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
21249 while (ret.first != end and ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
21285 while (std::get<0>(ret) != end and ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) {
21318 while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
21692 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
21849 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
22018 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
22022 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22023 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
22024 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
22025 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
22030 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22031 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
22032 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
22050 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22051 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22088 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22089 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22189 simd::simd8x64<uint8_t> in8(in, nextin);
22199 uint8_t block[64]{};
22202 simd::simd8x64<uint8_t> in(block);
22276 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22277 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22278 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22279 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22354 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22355 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22356 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22357 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22546 const uint8_t *row =
22547 &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
22548 const uint8_t *row_2 =
22549 &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)]
22703 const uint8_t idx =
22705 const uint8_t consumed =
22876 const uint8_t idx =
22878 const uint8_t consumed =
23145 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23146 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
23247 const uint8_t mask0 = uint8_t(mask);
23248 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
23252 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
23253 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
23257 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
23258 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
23263 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
23264 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
23388 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23389 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
23490 const uint8_t mask0 = uint8_t(mask);
23491 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
23495 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
23496 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
23500 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
23501 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
23506 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
23507 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
23906 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
23907 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
24004 const uint8_t mask0 = uint8_t(mask);
24005 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
24009 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
24010 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
24014 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
24015 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
24020 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
24021 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
24151 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
24152 const uint8_t* row_2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2>>16)][0];
24254 const uint8_t mask0 = uint8_t(mask);
24255 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
24259 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
24260 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
24264 const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
24265 const uint8_t* row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
24270 const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
24271 const uint8_t* row3 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
24497 const uint8_t idx =
24499 const uint8_t consumed =
24537 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
24540 simdutf_really_inline const uint8_t *full_block() const;
24550 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
24553 const uint8_t *buf;
24560 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
24561 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
24562 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
24565 buf[sizeof(simd8x64<uint8_t>)] = '\0';
24570 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
24571 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
24572 in.store(reinterpret_cast<uint8_t*>(buf));
24573 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
24576 buf[sizeof(simd8x64<uint8_t>)] = '\0';
24590 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
24601 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
24606 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
24630 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
24637 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
24639 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
24640 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
24641 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
24642 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
24643 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
24644 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
24652 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
24656 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
24658 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
24673 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
24674 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
24702 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
24720 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
24721 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
24722 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
24723 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
24724 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
24725 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
24733 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
24736 static const uint8_t max_array[32] = {
24742 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
24748 simd8<uint8_t> error;
24750 simd8<uint8_t> prev_input_block;
24752 simd8<uint8_t> prev_incomplete;
24757 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
24760 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
24761 simd8<uint8_t> sc = check_special_cases(input, prev1);
24774 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
24779 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
24781 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
24784 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
24790 this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
24791 this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
24820 bool generic_validate_utf8(const uint8_t * input, size_t length) {
24824 simd::simd8x64<uint8_t> in(reader.full_block());
24828 uint8_t block[64]{};
24830 simd::simd8x64<uint8_t> in(block);
24838 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24845 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
24850 simd::simd8x64<uint8_t> in(reader.full_block());
24861 uint8_t block[64]{};
24863 simd::simd8x64<uint8_t> in(block);
24878 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24882 bool generic_validate_ascii(const uint8_t * input, size_t length) {
24884 uint8_t blocks[64]{};
24885 simd::simd8x64<uint8_t> running_or(blocks);
24887 simd::simd8x64<uint8_t> in(reader.full_block());
24891 uint8_t block[64]{};
24893 simd::simd8x64<uint8_t> in(block);
24899 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
24903 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
24907 simd::simd8x64<uint8_t> in(reader.full_block());
24916 uint8_t block[64]{};
24918 simd::simd8x64<uint8_t> in(block);
24928 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
25021 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25028 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25030 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25031 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25032 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25033 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25034 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25035 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25043 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25047 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25049 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25064 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25065 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25093 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25111 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25112 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25113 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
25114 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
25115 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
25116 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
25123 simd8<uint8_t> error;
25125 validating_transcoder() : error(uint8_t(0)) {}
25129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25132 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25133 simd8<uint8_t> sc = check_special_cases(input, prev1);
25163 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25165 auto zero = simd8<uint8_t>{uint8_t(0)};
25166 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25169 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
25238 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25240 auto zero = simd8<uint8_t>{uint8_t(0)};
25241 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25244 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
25373 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25380 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25382 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25383 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25384 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25385 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25386 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25387 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25395 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25399 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25401 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25416 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25417 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25445 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25463 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25464 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25465 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
25466 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
25467 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
25468 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
25475 simd8<uint8_t> error;
25477 validating_transcoder() : error(uint8_t(0)) {}
25481 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25484 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25485 simd8<uint8_t> sc = check_special_cases(input, prev1);
25515 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25517 auto zero = simd8<uint8_t>{uint8_t(0)};
25518 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25521 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
25589 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25591 auto zero = simd8<uint8_t>{uint8_t(0)};
25592 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25595 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
25780 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25790 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
25792 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
25793 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
25794 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
25795 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
25796 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
25797 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
25805 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
25809 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
25810 constexpr const uint8_t FORBIDDEN = 0xff;
25812 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25827 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
25828 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25856 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25877 simd8<uint8_t> error;
25879 validating_transcoder() : error(uint8_t(0)) {}
25883 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25886 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25915 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25917 auto zero = simd8<uint8_t>{uint8_t(0)};
25918 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25921 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
25989 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
25991 auto zero = simd8<uint8_t>{uint8_t(0)};
25992 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
25995 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
26754 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
26876 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
26881 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26882 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
26883 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
26884 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
26889 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26890 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
26891 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
26909 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
26912 simdutf_really_inline const uint8_t *full_block() const;
26922 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
26925 const uint8_t *buf;
26932 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
26933 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
26934 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
26937 buf[sizeof(simd8x64<uint8_t>)] = '\0';
26942 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
26943 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
26944 in.store(reinterpret_cast<uint8_t*>(buf));
26945 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
26948 buf[sizeof(simd8x64<uint8_t>)] = '\0';
26962 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
26973 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
26978 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
27002 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27009 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27011 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27012 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27013 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27014 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27015 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27016 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27024 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27028 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27030 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27045 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27046 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27074 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27092 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27093 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27094 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27095 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27096 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27097 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27105 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
27108 static const uint8_t max_array[32] = {
27114 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
27120 simd8<uint8_t> error;
27122 simd8<uint8_t> prev_input_block;
27124 simd8<uint8_t> prev_incomplete;
27129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27132 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27133 simd8<uint8_t> sc = check_special_cases(input, prev1);
27146 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
27151 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
27153 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
27156 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
27162 this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
27163 this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
27192 bool generic_validate_utf8(const uint8_t * input, size_t length) {
27196 simd::simd8x64<uint8_t> in(reader.full_block());
27200 uint8_t block[64]{};
27202 simd::simd8x64<uint8_t> in(block);
27210 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27217 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
27222 simd::simd8x64<uint8_t> in(reader.full_block());
27233 uint8_t block[64]{};
27235 simd::simd8x64<uint8_t> in(block);
27250 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27254 bool generic_validate_ascii(const uint8_t * input, size_t length) {
27256 uint8_t blocks[64]{};
27257 simd::simd8x64<uint8_t> running_or(blocks);
27259 simd::simd8x64<uint8_t> in(reader.full_block());
27263 uint8_t block[64]{};
27265 simd::simd8x64<uint8_t> in(block);
27271 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27275 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
27279 simd::simd8x64<uint8_t> in(reader.full_block());
27288 uint8_t block[64]{};
27290 simd::simd8x64<uint8_t> in(block);
27300 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
27393 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27400 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27402 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27403 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27404 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27405 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27406 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27407 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27415 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27419 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27421 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27436 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27437 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27465 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27483 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27484 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27485 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27486 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27487 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27488 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27495 simd8<uint8_t> error;
27497 validating_transcoder() : error(uint8_t(0)) {}
27501 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27504 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27505 simd8<uint8_t> sc = check_special_cases(input, prev1);
27535 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
27537 auto zero = simd8<uint8_t>{uint8_t(0)};
27538 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
27541 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
27610 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
27612 auto zero = simd8<uint8_t>{uint8_t(0)};
27613 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
27616 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
27745 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27752 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
27754 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
27755 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
27756 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
27757 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
27758 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
27759 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
27767 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
27771 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
27773 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27788 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
27789 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27817 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27835 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27836 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27837 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27838 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27839 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27840 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27847 simd8<uint8_t> error;
27849 validating_transcoder() : error(uint8_t(0)) {}
27853 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27856 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27857 simd8<uint8_t> sc = check_special_cases(input, prev1);
27887 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
27889 auto zero = simd8<uint8_t>{uint8_t(0)};
27890 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
27893 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
27961 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
27963 auto zero = simd8<uint8_t>{uint8_t(0)};
27964 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
27967 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
28397 simdutf_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
28401 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28402 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
28403 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
28404 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
28409 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28410 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
28411 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
28463 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
28465 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
28507 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28508 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28555 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28556 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28666 simd::simd8x64<uint8_t> in8(in, secondin, thirdin, fourthin);
28676 uint8_t block[64]{};
28679 simd::simd8x64<uint8_t> in(block);
28753 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28754 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28755 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28756 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28830 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28831 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28832 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28833 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28976 const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80);
29180 const uint8_t idx =
29182 const uint8_t consumed =
29356 const uint8_t idx =
29358 const uint8_t consumed =
29449 const uint8_t idx =
29451 const uint8_t consumed =
29731 const uint8_t mask0 = uint8_t(mask);
29733 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
29737 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
29739 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
29935 const uint8_t mask0 = uint8_t(mask);
29937 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
29941 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
29943 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
30364 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
30366 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
30454 const uint8_t mask0 = uint8_t(mask);
30456 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
30460 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
30462 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
30620 const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
30622 const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
30716 const uint8_t mask0 = uint8_t(mask);
30718 const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
30722 const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
30724 const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
30916 simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
30919 simdutf_really_inline const uint8_t *full_block() const;
30929 simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
30932 const uint8_t *buf;
30939 simdutf_unused static char * format_input_text_64(const uint8_t *text) {
30940 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
30941 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
30944 buf[sizeof(simd8x64<uint8_t>)] = '\0';
30949 simdutf_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
30950 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
30951 in.store(reinterpret_cast<uint8_t*>(buf));
30952 for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
30955 buf[sizeof(simd8x64<uint8_t>)] = '\0';
30969 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
30980 simdutf_really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
30985 simdutf_really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
31009 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31016 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31018 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31019 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31020 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31021 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31022 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31023 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31031 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31035 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31037 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31052 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31053 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31081 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31099 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31100 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31101 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31102 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31103 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31104 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31112 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
31115 static const uint8_t max_array[32] = {
31121 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
31127 simd8<uint8_t> error;
31129 simd8<uint8_t> prev_input_block;
31131 simd8<uint8_t> prev_incomplete;
31136 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31139 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31140 simd8<uint8_t> sc = check_special_cases(input, prev1);
31153 simdutf_really_inline void check_next_input(const simd8x64<uint8_t>& input) {
31158 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31160 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31163 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31169 this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
31170 this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
31199 bool generic_validate_utf8(const uint8_t * input, size_t length) {
31203 simd::simd8x64<uint8_t> in(reader.full_block());
31207 uint8_t block[64]{};
31209 simd::simd8x64<uint8_t> in(block);
31217 return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31224 result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
31229 simd::simd8x64<uint8_t> in(reader.full_block());
31240 uint8_t block[64]{};
31242 simd::simd8x64<uint8_t> in(block);
31257 return generic_validate_utf8_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31261 bool generic_validate_ascii(const uint8_t * input, size_t length) {
31263 uint8_t blocks[64]{};
31264 simd::simd8x64<uint8_t> running_or(blocks);
31266 simd::simd8x64<uint8_t> in(reader.full_block());
31270 uint8_t block[64]{};
31272 simd::simd8x64<uint8_t> in(block);
31278 return generic_validate_ascii<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31282 result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
31286 simd::simd8x64<uint8_t> in(reader.full_block());
31295 uint8_t block[64]{};
31297 simd::simd8x64<uint8_t> in(block);
31307 return generic_validate_ascii_with_errors<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
31400 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31407 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31409 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31410 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31411 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31412 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31413 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31414 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31422 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31426 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31428 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31443 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31444 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31472 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31490 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31491 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31492 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31493 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31494 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31495 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31502 simd8<uint8_t> error;
31504 validating_transcoder() : error(uint8_t(0)) {}
31508 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31511 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31512 simd8<uint8_t> sc = check_special_cases(input, prev1);
31542 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31544 auto zero = simd8<uint8_t>{uint8_t(0)};
31545 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31548 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31617 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31619 auto zero = simd8<uint8_t>{uint8_t(0)};
31620 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31623 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31752 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31759 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
31761 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
31762 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
31763 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
31764 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
31765 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
31766 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
31774 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
31778 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
31780 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31795 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
31796 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31824 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31842 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31843 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31844 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31845 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31846 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31847 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31854 simd8<uint8_t> error;
31856 validating_transcoder() : error(uint8_t(0)) {}
31860 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31863 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31864 simd8<uint8_t> sc = check_special_cases(input, prev1);
31894 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31896 auto zero = simd8<uint8_t>{uint8_t(0)};
31897 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31900 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31968 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31970 auto zero = simd8<uint8_t>{uint8_t(0)};
31971 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31974 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
32157 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
32167 constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
32169 constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
32170 constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
32171 constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____
32172 constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
32173 constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
32174 constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
32182 constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
32186 constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
32187 constexpr const uint8_t FORBIDDEN = 0xff;
32189 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
32204 constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
32205 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
32233 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
32254 simd8<uint8_t> error;
32256 validating_transcoder() : error(uint8_t(0)) {}
32260 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
32263 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
32292 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
32294 auto zero = simd8<uint8_t>{uint8_t(0)};
32295 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
32298 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
32366 static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
32368 auto zero = simd8<uint8_t>{uint8_t(0)};
32369 if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
32372 } else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
33129 const uint8_t *str = reinterpret_cast<const uint8_t *>(input);