Lines Matching defs:simd8
259 struct simd8;
262 // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally.
264 template<typename T, typename Mask=simd8<bool>>
277 simdutf_really_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
278 simdutf_really_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
279 simdutf_really_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
280 simdutf_really_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
281 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
282 simdutf_really_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
283 simdutf_really_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
284 simdutf_really_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
286 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); }
289 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
296 struct simd8<bool>: base_u8<bool> {
300 static simdutf_really_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
302 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
304 simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
306 simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {}
344 struct simd8<uint8_t>: base_u8<uint8_t> {
345 static simdutf_really_inline simd8<uint8_t> splat(uint8_t _value) { return vmovq_n_u8(_value); }
346 static simdutf_really_inline simd8<uint8_t> zero() { return vdupq_n_u8(0); }
347 static simdutf_really_inline simd8<uint8_t> load(const uint8_t* values) { return vld1q_u8(values); }
348 simdutf_really_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
350 simdutf_really_inline simd8() : simd8(zero()) {}
352 simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
354 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
357 simdutf_really_inline simd8(
360 ) : simd8(simdutf_make_uint8x16_t(
365 simdutf_really_inline simd8(
368 ) : simd8(uint8x16_t{
375 simdutf_really_inline static simd8<uint8_t> repeat_16(
379 return simd8<uint8_t>(
389 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
390 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
393 simdutf_really_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
394 simdutf_really_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
395 simdutf_really_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
396 simdutf_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
401 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
402 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
403 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
404 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
405 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
406 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
408 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
410 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
413 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
417 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
419 simdutf_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
421 simdutf_really_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
425 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
431 simdutf_really_inline simd8<L> lookup_16(
436 return lookup_16(simd8<L>::repeat_16(
445 simdutf_really_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) const {
446 return vqtbl1q_u8(*this, simd8<uint8_t>(original));
452 struct simd8<int8_t> {
455 static simdutf_really_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
456 static simdutf_really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
457 static simdutf_really_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
498 const simd8<uint8_t> tb1{ 0,255,255,255, 1,255,255,255, 2,255,255,255, 3,255,255,255 };
499 const simd8<uint8_t> tb2{ 4,255,255,255, 5,255,255,255, 6,255,255,255, 7,255,255,255 };
500 const simd8<uint8_t> tb3{ 8,255,255,255, 9,255,255,255, 10,255,255,255, 11,255,255,255 };
501 const simd8<uint8_t> tb4{ 12,255,255,255, 13,255,255,255, 14,255,255,255, 15,255,255,255 };
515 simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {}
523 simdutf_really_inline simd8() : simd8(zero()) {}
525 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
527 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
530 simdutf_really_inline simd8(
533 ) : simd8(simdutf_make_int8x16_t(
538 simdutf_really_inline simd8(
541 ) : simd8(int8x16_t{
547 simdutf_really_inline static simd8<int8_t> repeat_16(
551 return simd8<int8_t>(
565 simdutf_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
567 simdutf_really_inline operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
569 simdutf_really_inline simd8<int8_t> operator|(const simd8<int8_t> other) const { return vorrq_s8(value, other.value); }
570 simdutf_really_inline simd8<int8_t> operator&(const simd8<int8_t> other) const { return vandq_s8(value, other.value); }
571 simdutf_really_inline simd8<int8_t> operator^(const simd8<int8_t> other) const { return veorq_s8(value, other.value); }
572 simdutf_really_inline simd8<int8_t> bit_andnot(const simd8<int8_t> other) const { return vbicq_s8(value, other.value); }
575 simdutf_really_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(value, other.value); }
576 simdutf_really_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(value, other.value); }
577 simdutf_really_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
578 simdutf_really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
585 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(value, other.value); }
586 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(value, other.value); }
587 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(value, other.value); }
588 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(value, other.value); }
589 simdutf_really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(value, other.value); }
592 simdutf_really_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
598 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
602 simdutf_really_inline simd8<L> lookup_16(
607 return lookup_16(simd8<L>::repeat_16(
616 simdutf_really_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) const {
617 return vqtbl1q_s8(*this, simd8<uint8_t>(original));
623 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
625 simd8<T> chunks[NUM_CHUNKS];
628 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
631 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
632 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
635 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
636 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
637 this->chunks[2].store(ptr+sizeof(simd8<T>)*2/sizeof(T));
638 this->chunks[3].store(ptr+sizeof(simd8<T>)*3/sizeof(T));
650 simdutf_really_inline simd8<T> reduce_or() const {
660 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
661 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
662 this->chunks[2].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*2);
663 this->chunks[3].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*3);
667 this->chunks[0].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*0);
668 this->chunks[1].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*1);
669 this->chunks[2].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*2);
670 this->chunks[3].store_ascii_as_utf32_tbl(ptr+sizeof(simd8<T>)*3);
694 const simd8<T> mask = simd8<T>::splat(m);
704 const simd8<T> mask = simd8<T>::splat(m);
714 const simd8<T> mask_low = simd8<T>::splat(low);
715 const simd8<T> mask_high = simd8<T>::splat(high);
725 const simd8<T> mask_low = simd8<T>::splat(low);
726 const simd8<T> mask_high = simd8<T>::splat(high);
735 const simd8<T> mask = simd8<T>::splat(m);
744 const simd8<T> mask = simd8<T>::splat(m);
753 const simd8<T> mask = simd8<T>::splat(m);
762 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
764 simd8<uint8_t>(uint8x16_t(this->chunks[0])) >= mask,
765 simd8<uint8_t>(uint8x16_t(this->chunks[1])) >= mask,
766 simd8<uint8_t>(uint8x16_t(this->chunks[2])) >= mask,
767 simd8<uint8_t>(uint8x16_t(this->chunks[3])) >= mask
931 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
1719 struct simd8;
1721 template<typename T, typename Mask=simd8<bool>>
1722 struct base8: base<simd8<T>> {
1726 simdutf_really_inline base8() : base<simd8<T>>() {}
1727 simdutf_really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
1730 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); }
1735 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
1742 struct simd8<bool>: base8<bool> {
1743 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
1745 simdutf_really_inline simd8<bool>() : base8() {}
1746 simdutf_really_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
1748 simdutf_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
1754 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
1759 static simdutf_really_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
1760 static simdutf_really_inline simd8<T> zero() { return _mm256_setzero_si256(); }
1761 static simdutf_really_inline simd8<T> load(const T values[32]) {
1765 static simdutf_really_inline simd8<T> repeat_16(
1769 return simd8<T>(
1784 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
1785 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
1786 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
1787 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
1790 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
1794 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
1799 simdutf_really_inline simd8<L> lookup_16(
1804 return lookup_16(simd8<L>::repeat_16(
1816 struct simd8<int8_t> : base8_numeric<int8_t> {
1817 simdutf_really_inline simd8() : base8_numeric<int8_t>() {}
1818 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
1821 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
1823 simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
1824 simdutf_really_inline operator simd8<uint8_t>() const;
1826 simdutf_really_inline simd8(
1831 ) : simd8(_mm256_setr_epi8(
1838 simdutf_really_inline static simd8<int8_t> repeat_16(
1842 return simd8<int8_t>(
1851 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
1852 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
1853 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
1854 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
1859 struct simd8<uint8_t>: base8_numeric<uint8_t> {
1860 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
1861 simdutf_really_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
1863 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
1865 simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
1867 simdutf_really_inline simd8(
1872 ) : simd8(_mm256_setr_epi8(
1879 simdutf_really_inline static simd8<uint8_t> repeat_16(
1883 return simd8<uint8_t>(
1893 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
1894 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
1897 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
1898 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
1900 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
1902 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
1903 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
1904 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
1905 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
1906 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
1909 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
1910 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
1911 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
1912 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
1916 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
1917 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
1919 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
1921 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
1927 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
1932 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
1934 simd8<T> chunks[NUM_CHUNKS];
1937 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
1940 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
1941 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T))} {}
1944 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
1945 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
1960 simdutf_really_inline simd8<T> reduce_or() const {
1970 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
1971 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
1975 this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*0);
1976 this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*1);
1980 const simd8<T> mask = simd8<T>::splat(m);
1988 const simd8<T> mask = simd8<T>::splat(m);
2003 const simd8<T> mask = simd8<T>::splat(m);
2011 const simd8<T> mask_low = simd8<T>::splat(low);
2012 const simd8<T> mask_high = simd8<T>::splat(high);
2020 const simd8<T> mask_low = simd8<T>::splat(low);
2021 const simd8<T> mask_high = simd8<T>::splat(high);
2028 const simd8<T> mask = simd8<T>::splat(m);
2036 const simd8<T> mask = simd8<T>::splat(m);
2043 const simd8<T> mask = simd8<T>::splat(m);
2050 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
2052 (simd8<uint8_t>(__m256i(this->chunks[0])) >= mask),
2053 (simd8<uint8_t>(__m256i(this->chunks[1])) >= mask)
2202 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
2624 struct simd8;
2626 template<typename T, typename Mask=simd8<bool>>
2627 struct base8: base<simd8<T>> {
2633 simdutf_really_inline base8() : base<simd8<T>>() {}
2634 simdutf_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
2636 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); }
2638 static const int SIZE = sizeof(base<simd8<T>>::value);
2641 simdutf_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
2648 struct simd8<bool>: base8<bool> {
2649 static simdutf_really_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
2651 simdutf_really_inline simd8<bool>() : base8() {}
2652 simdutf_really_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
2654 simdutf_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
2660 simdutf_really_inline simd8<bool> operator~() const { return *this ^ true; }
2665 static simdutf_really_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
2666 static simdutf_really_inline simd8<T> zero() { return _mm_setzero_si128(); }
2667 static simdutf_really_inline simd8<T> load(const T values[16]) {
2671 static simdutf_really_inline simd8<T> repeat_16(
2675 return simd8<T>(
2688 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
2691 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
2692 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
2693 simdutf_really_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
2694 simdutf_really_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
2698 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
2703 simdutf_really_inline simd8<L> lookup_16(
2708 return lookup_16(simd8<L>::repeat_16(
2719 struct simd8<int8_t> : base8_numeric<int8_t> {
2720 simdutf_really_inline simd8() : base8_numeric<int8_t>() {}
2721 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
2723 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
2725 simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
2727 simdutf_really_inline simd8(
2730 ) : simd8(_mm_setr_epi8(
2735 simdutf_really_inline static simd8<int8_t> repeat_16(
2739 return simd8<int8_t>(
2744 simdutf_really_inline operator simd8<uint8_t>() const;
2748 simdutf_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
2749 simdutf_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
2750 simdutf_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
2751 simdutf_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
2756 struct simd8<uint8_t>: base8_numeric<uint8_t> {
2757 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
2758 simdutf_really_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
2761 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
2763 simdutf_really_inline simd8(const uint8_t* values) : simd8(load(values)) {}
2765 simdutf_really_inline simd8(
2768 ) : simd8(_mm_setr_epi8(
2773 simdutf_really_inline static simd8<uint8_t> repeat_16(
2777 return simd8<uint8_t>(
2784 simdutf_really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
2785 simdutf_really_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
2788 simdutf_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
2789 simdutf_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
2791 simdutf_really_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
2793 simdutf_really_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
2794 simdutf_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
2795 simdutf_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
2796 simdutf_really_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2797 simdutf_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
2800 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
2801 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
2802 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
2803 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
2808 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
2809 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
2811 simdutf_really_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
2813 simdutf_really_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
2819 simdutf_really_inline simd8<int8_t>::operator simd8<uint8_t>() const { return this->value; }
2823 struct simd8<uint16_t>: base<uint16_t> {
2824 static simdutf_really_inline simd8<uint16_t> splat(uint16_t _value) { return _mm_set1_epi16(_value); }
2825 static simdutf_really_inline simd8<uint16_t> load(const uint16_t values[8]) {
2829 simdutf_really_inline simd8() : base<uint16_t>() {}
2830 simdutf_really_inline simd8(const __m128i _value) : base<uint16_t>(_value) {}
2832 simdutf_really_inline simd8(uint16_t _value) : simd8(splat(_value)) {}
2834 simdutf_really_inline simd8(const uint16_t* values) : simd8(load(values)) {}
2836 simdutf_really_inline simd8(
2838 ) : simd8(_mm_setr_epi16(
2843 simdutf_really_inline simd8<uint16_t> saturating_add(const simd8<uint16_t> other) const { return _mm_adds_epu16(*this, other); }
2844 simdutf_really_inline simd8<uint16_t> saturating_sub(const simd8<uint16_t> other) const { return _mm_subs_epu16(*this, other); }
2847 simdutf_really_inline simd8<uint16_t> max_val(const simd8<uint16_t> other) const { return _mm_max_epu16(*this, other); }
2848 simdutf_really_inline simd8<uint16_t> min_val(const simd8<uint16_t> other) const { return _mm_min_epu16(*this, other); }
2850 simdutf_really_inline simd8<uint16_t> gt_bits(const simd8<uint16_t> other) const { return this->saturating_sub(other); }
2852 simdutf_really_inline simd8<uint16_t> lt_bits(const simd8<uint16_t> other) const { return other.saturating_sub(*this); }
2853 simdutf_really_inline simd8<bool> operator<=(const simd8<uint16_t> other) const { return other.max_val(*this) == other; }
2854 simdutf_really_inline simd8<bool> operator>=(const simd8<uint16_t> other) const { return other.min_val(*this) == other; }
2855 simdutf_really_inline simd8<bool> operator==(const simd8<uint16_t> other) const { return _mm_cmpeq_epi16(*this, other); }
2856 simdutf_really_inline simd8<bool> operator&(const simd8<uint16_t> other) const { return _mm_and_si128(*this, other); }
2857 simdutf_really_inline simd8<bool> operator|(const simd8<uint16_t> other) const { return _mm_or_si128(*this, other); }
2860 simdutf_really_inline simd8<bool> bits_not_set() const { return *this == uint16_t(0); }
2861 simdutf_really_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
2865 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint16_t> bits) const { return _mm_testz_si128(*this, bits); }
2866 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint16_t> bits) const { return !bits_not_set_anywhere(bits); }
2870 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
2872 simd8<T> chunks[NUM_CHUNKS];
2875 simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
2878 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
2879 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
2882 this->chunks[0].store(ptr+sizeof(simd8<T>)*0/sizeof(T));
2883 this->chunks[1].store(ptr+sizeof(simd8<T>)*1/sizeof(T));
2884 this->chunks[2].store(ptr+sizeof(simd8<T>)*2/sizeof(T));
2885 this->chunks[3].store(ptr+sizeof(simd8<T>)*3/sizeof(T));
2896 simdutf_really_inline simd8<T> reduce_or() const {
2906 this->chunks[0].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*0);
2907 this->chunks[1].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*1);
2908 this->chunks[2].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*2);
2909 this->chunks[3].template store_ascii_as_utf16<endian>(ptr+sizeof(simd8<T>)*3);
2913 this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*0);
2914 this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*1);
2915 this->chunks[2].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*2);
2916 this->chunks[3].store_ascii_as_utf32(ptr+sizeof(simd8<T>)*3);
2928 const simd8<T> mask = simd8<T>::splat(m);
2947 const simd8<T> mask = simd8<T>::splat(m);
2957 const simd8<T> mask_low = simd8<T>::splat(low);
2958 const simd8<T> mask_high = simd8<T>::splat(high);
2968 const simd8<T> mask_low = simd8<T>::splat(low-1);
2969 const simd8<T> mask_high = simd8<T>::splat(high+1);
2978 const simd8<T> mask = simd8<T>::splat(m);
2988 const simd8<T> mask = simd8<T>::splat(m);
2997 const simd8<T> mask = simd8<T>::splat(m);
3006 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3008 simd8<uint8_t>(__m128i(this->chunks[0])) >= mask,
3009 simd8<uint8_t>(__m128i(this->chunks[1])) >= mask,
3010 simd8<uint8_t>(__m128i(this->chunks[2])) >= mask,
3011 simd8<uint8_t>(__m128i(this->chunks[3])) >= mask
3166 static simdutf_really_inline simd8<uint8_t> pack(const simd16<uint16_t>& v0, const simd16<uint16_t>& v1) {
3518 template <typename T> struct simd8;
3520 template <typename T, typename Mask = simd8<bool>>
3521 struct base8 : base<simd8<T>> {
3525 simdutf_really_inline base8() : base<simd8<T>>() {}
3526 simdutf_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
3528 friend simdutf_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) {
3532 static const int SIZE = sizeof(base<simd8<T>>::value);
3535 simdutf_really_inline simd8<T> prev(simd8<T> prev_chunk) const {
3550 template <> struct simd8<bool> : base8<bool> {
3551 static simdutf_really_inline simd8<bool> splat(bool _value) {
3555 simdutf_really_inline simd8<bool>() : base8() {}
3556 simdutf_really_inline simd8<bool>(const __m128i _value)
3559 simdutf_really_inline simd8<bool>(bool _value)
3578 simdutf_really_inline simd8<bool> operator~() const {
3584 static simdutf_really_inline simd8<T> splat(T value) {
3588 static simdutf_really_inline simd8<T> zero() { return splat(0); }
3589 static simdutf_really_inline simd8<T> load(const T values[16]) {
3593 static simdutf_really_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
3597 return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
3611 simdutf_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
3614 simdutf_really_inline simd8<T> operator+(const simd8<T> other) const {
3617 simdutf_really_inline simd8<T> operator-(const simd8<T> other) const {
3620 simdutf_really_inline simd8<T> &operator+=(const simd8<T> other) {
3622 return *static_cast<simd8<T> *>(this);
3624 simdutf_really_inline simd8<T> &operator-=(const simd8<T> other) {
3626 return *static_cast<simd8<T> *>(this);
3632 simdutf_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
3637 simdutf_really_inline simd8<L>
3642 return lookup_16(simd8<L>::repeat_16(
3650 template <> struct simd8<int8_t> : base8_numeric<int8_t> {
3651 simdutf_really_inline simd8() : base8_numeric<int8_t>() {}
3652 simdutf_really_inline simd8(const __m128i _value)
3656 simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
3658 simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
3660 simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
3664 : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7,
3668 simdutf_really_inline static simd8<int8_t>
3672 return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
3677 simdutf_really_inline simd8<int8_t>
3678 max_val(const simd8<int8_t> other) const {
3682 simdutf_really_inline simd8<int8_t>
3683 min_val(const simd8<int8_t> other) const {
3687 simdutf_really_inline simd8<bool>
3688 operator>(const simd8<int8_t> other) const {
3692 simdutf_really_inline simd8<bool>
3693 operator<(const simd8<int8_t> other) const {
3700 template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
3701 simdutf_really_inline simd8() : base8_numeric<uint8_t>() {}
3702 simdutf_really_inline simd8(const __m128i _value)
3705 simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
3707 simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {}
3710 simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
3713 : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
3716 simdutf_really_inline static simd8<uint8_t>
3721 return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
3726 simdutf_really_inline simd8<uint8_t>
3727 saturating_add(const simd8<uint8_t> other) const {
3730 simdutf_really_inline simd8<uint8_t>
3731 saturating_sub(const simd8<uint8_t> other) const {
3736 simdutf_really_inline simd8<uint8_t>
3737 max_val(const simd8<uint8_t> other) const {
3740 simdutf_really_inline simd8<uint8_t>
3741 min_val(const simd8<uint8_t> other) const {
3745 simdutf_really_inline simd8<uint8_t>
3746 gt_bits(const simd8<uint8_t> other) const {
3750 simdutf_really_inline simd8<uint8_t>
3751 lt_bits(const simd8<uint8_t> other) const {
3754 simdutf_really_inline simd8<bool>
3755 operator<=(const simd8<uint8_t> other) const {
3758 simdutf_really_inline simd8<bool>
3759 operator>=(const simd8<uint8_t> other) const {
3762 simdutf_really_inline simd8<bool>
3763 operator>(const simd8<uint8_t> other) const {
3766 simdutf_really_inline simd8<bool>
3767 operator<(const simd8<uint8_t> other) const {
3772 simdutf_really_inline simd8<bool> bits_not_set() const {
3775 simdutf_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
3778 simdutf_really_inline simd8<bool> any_bits_set() const {
3781 simdutf_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
3795 simdutf_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
3799 simdutf_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
3802 template <int N> simdutf_really_inline simd8<uint8_t> shr() const {
3803 return simd8<uint8_t>(
3806 template <int N> simdutf_really_inline simd8<uint8_t> shl() const {
3807 return simd8<uint8_t>(
3813 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
3816 simd8<T> chunks[NUM_CHUNKS];
3820 operator=(const simd8<T> other) = delete; // no assignment allowed
3823 simdutf_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
3824 const simd8<T> chunk2, const simd8<T> chunk3)
3827 simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+2*sizeof(simd8<T>)/sizeof(T)), simd8<T>::load(ptr+3*sizeof(simd8<T>)/sizeof(T))} {}
3830 this->chunks[0].store(ptr + sizeof(simd8<T>) * 0/sizeof(T));
3831 this->chunks[1].store(ptr + sizeof(simd8<T>) * 1/sizeof(T));
3832 this->chunks[2].store(ptr + sizeof(simd8<T>) * 2/sizeof(T));
3833 this->chunks[3].store(ptr + sizeof(simd8<T>) * 3/sizeof(T));
3845 simdutf_really_inline simd8<T> reduce_or() const {
3864 const simd8<T> mask = simd8<T>::splat(m);
3879 const simd8<T> mask = simd8<T>::splat(m);
3886 const simd8<T> mask_low = simd8<T>::splat(low);
3887 const simd8<T> mask_high = simd8<T>::splat(high);
3897 const simd8<T> mask_low = simd8<T>::splat(low);
3898 const simd8<T> mask_high = simd8<T>::splat(high);
3907 const simd8<T> mask = simd8<T>::splat(m);
3914 const simd8<T> mask = simd8<T>::splat(m);
3923 const simd8<T> mask = simd8<T>::splat(m);
3932 const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
3934 simd8<uint8_t>(this->chunks[0]) >= mask,
3935 simd8<uint8_t>(this->chunks[1]) >= mask,
3936 simd8<uint8_t>(this->chunks[2]) >= mask,
3937 simd8<uint8_t>(this->chunks[3]) >= mask
12577 simd8<uint8_t> bits = input.reduce_or();
12581 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12582 simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u);
12583 simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
12584 simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
12593 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
12594 simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
12595 simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
12686 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12687 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12732 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12733 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12761 const simd8<uint8_t> in_16 = simd16<uint16_t>::pack(t0, t1);
12884 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12885 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12886 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12887 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12900 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
12954 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
12955 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
12956 const auto v_fc = simd8<uint8_t>::splat(0xfc);
12957 const auto v_dc = simd8<uint8_t>::splat(0xdc);
12971 const simd8<uint8_t> in = simd16<uint16_t>::pack(t0, t1);
13226 simd8<int8_t> temp{vreinterpretq_s8_u8(in)};
13486 simd8<int8_t> temp{vreinterpretq_s8_u8(in)};
15219 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15247 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15263 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15291 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15309 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15310 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15311 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
15312 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
15313 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
15314 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
15322 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
15331 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
15337 simd8<uint8_t> error;
15339 simd8<uint8_t> prev_input_block;
15341 simd8<uint8_t> prev_incomplete;
15346 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15349 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
15350 simd8<uint8_t> sc = check_special_cases(input, prev1);
15610 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15638 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
15654 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
15682 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
15700 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
15701 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
15702 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
15703 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
15704 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
15705 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
15712 simd8<uint8_t> error;
15718 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
15721 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
15722 simd8<uint8_t> sc = check_special_cases(input, prev1);
15754 auto zero = simd8<uint8_t>{uint8_t(0)};
15829 auto zero = simd8<uint8_t>{uint8_t(0)};
15962 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
15990 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16006 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16034 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16052 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
16053 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
16054 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
16055 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
16056 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
16057 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
16064 simd8<uint8_t> error;
16070 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16073 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
16074 simd8<uint8_t> sc = check_special_cases(input, prev1);
16106 auto zero = simd8<uint8_t>{uint8_t(0)};
16180 auto zero = simd8<uint8_t>{uint8_t(0)};
16367 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
16399 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
16415 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
16443 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
16464 simd8<uint8_t> error;
16470 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
16473 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
16504 auto zero = simd8<uint8_t>{uint8_t(0)};
16578 auto zero = simd8<uint8_t>{uint8_t(0)};
22022 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22023 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
22024 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
22025 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
22027 return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
22030 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
22031 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
22032 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
22034 return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
22050 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22051 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22088 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22089 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22276 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22277 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22278 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22279 const auto v_dc = simd8<uint8_t>::splat(0xdc);
22354 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
22355 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
22356 const auto v_fc = simd8<uint8_t>::splat(0xfc);
22357 const auto v_dc = simd8<uint8_t>::splat(0xdc);
24630 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
24658 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
24674 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
24702 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
24720 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
24721 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
24722 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
24723 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
24724 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
24725 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
24733 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
24742 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
24748 simd8<uint8_t> error;
24750 simd8<uint8_t> prev_input_block;
24752 simd8<uint8_t> prev_incomplete;
24757 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
24760 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
24761 simd8<uint8_t> sc = check_special_cases(input, prev1);
25021 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25049 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25065 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25093 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25111 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25112 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25113 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
25114 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
25115 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
25116 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
25123 simd8<uint8_t> error;
25129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25132 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25133 simd8<uint8_t> sc = check_special_cases(input, prev1);
25165 auto zero = simd8<uint8_t>{uint8_t(0)};
25240 auto zero = simd8<uint8_t>{uint8_t(0)};
25373 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25401 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25417 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25445 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25463 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
25464 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
25465 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
25466 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
25467 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
25468 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
25475 simd8<uint8_t> error;
25481 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25484 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25485 simd8<uint8_t> sc = check_special_cases(input, prev1);
25517 auto zero = simd8<uint8_t>{uint8_t(0)};
25591 auto zero = simd8<uint8_t>{uint8_t(0)};
25780 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
25812 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
25828 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
25856 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
25877 simd8<uint8_t> error;
25883 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
25886 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
25917 auto zero = simd8<uint8_t>{uint8_t(0)};
25991 auto zero = simd8<uint8_t>{uint8_t(0)};
26881 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26882 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
26883 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
26884 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
26886 return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
26889 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
26890 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
26891 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
26893 return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
27002 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27030 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27046 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27074 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27092 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27093 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27094 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27095 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27096 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27097 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27105 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
27114 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
27120 simd8<uint8_t> error;
27122 simd8<uint8_t> prev_input_block;
27124 simd8<uint8_t> prev_incomplete;
27129 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27132 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27133 simd8<uint8_t> sc = check_special_cases(input, prev1);
27393 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27421 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27437 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27465 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27483 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27484 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27485 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27486 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27487 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27488 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27495 simd8<uint8_t> error;
27501 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27504 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27505 simd8<uint8_t> sc = check_special_cases(input, prev1);
27537 auto zero = simd8<uint8_t>{uint8_t(0)};
27612 auto zero = simd8<uint8_t>{uint8_t(0)};
27745 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
27773 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
27789 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
27817 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
27835 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
27836 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
27837 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
27838 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
27839 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
27840 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
27847 simd8<uint8_t> error;
27853 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
27856 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
27857 simd8<uint8_t> sc = check_special_cases(input, prev1);
27889 auto zero = simd8<uint8_t>{uint8_t(0)};
27963 auto zero = simd8<uint8_t>{uint8_t(0)};
28401 simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28402 simd8<uint8_t> is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0
28403 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
28404 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
28406 return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
28409 simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
28410 simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
28411 simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
28413 return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
28507 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28508 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28555 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28556 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28753 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28754 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28755 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28756 const auto v_dc = simd8<uint8_t>::splat(0xdc);
28830 const auto v_d8 = simd8<uint8_t>::splat(0xd8);
28831 const auto v_f8 = simd8<uint8_t>::splat(0xf8);
28832 const auto v_fc = simd8<uint8_t>::splat(0xfc);
28833 const auto v_dc = simd8<uint8_t>::splat(0xdc);
31009 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31037 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31053 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31081 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31099 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31100 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31101 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31102 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31103 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31104 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31112 simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
31121 const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
31127 simd8<uint8_t> error;
31129 simd8<uint8_t> prev_input_block;
31131 simd8<uint8_t> prev_incomplete;
31136 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31139 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31140 simd8<uint8_t> sc = check_special_cases(input, prev1);
31400 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31428 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31444 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31472 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31490 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31491 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31492 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31493 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31494 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31495 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31502 simd8<uint8_t> error;
31508 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31511 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31512 simd8<uint8_t> sc = check_special_cases(input, prev1);
31544 auto zero = simd8<uint8_t>{uint8_t(0)};
31619 auto zero = simd8<uint8_t>{uint8_t(0)};
31752 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
31780 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
31796 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
31824 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
31842 simdutf_really_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
31843 const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
31844 simd8<uint8_t> prev2 = input.prev<2>(prev_input);
31845 simd8<uint8_t> prev3 = input.prev<3>(prev_input);
31846 simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
31847 simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
31854 simd8<uint8_t> error;
31860 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
31863 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
31864 simd8<uint8_t> sc = check_special_cases(input, prev1);
31896 auto zero = simd8<uint8_t>{uint8_t(0)};
31970 auto zero = simd8<uint8_t>{uint8_t(0)};
32157 simdutf_really_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
32189 const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
32205 const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
32233 const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
32254 simd8<uint8_t> error;
32260 simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
32263 simd8<uint8_t> prev1 = input.prev<1>(prev_input);
32294 auto zero = simd8<uint8_t>{uint8_t(0)};
32368 auto zero = simd8<uint8_t>{uint8_t(0)};