simdutf.cpp - OpenGrok cross reference for /third_party/node/deps/simdutf/simdutf.cpp

Lines Matching defs:uint16x8_t
487       const uint16x8_t low = vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0)));
488       const uint16x8_t high = vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0)));
777     uint16x8_t value;
782     simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
783     simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
784     simdutf_really_inline operator uint16x8_t&() { return this->value; }
809   simdutf_really_inline base16(const uint16x8_t _value) : base_u16<T>(_value) {}
827   simdutf_really_inline simd16<bool>(const uint16x8_t _value) : base16<bool>(_value) {}
842   simdutf_really_inline base16_numeric(const uint16x8_t _value) : base16<T>(_value) {}
862   simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<int16_t>(_value) {}
872   simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
891   simdutf_really_inline simd16(const uint16x8_t _value) : base16_numeric<uint16_t>(_value) {}
12630 simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) {
12637   uint16x8_t upper = vreinterpretq_u16_u8(in);
12640   uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in));
12642   uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F));
12645   uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6);
12649 simdutf_really_inline uint16x8_t convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) {
12658   uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh));
12662   uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits
12665   uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits
12669   uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2);
12694         uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t*>(buf));
12695         uint16x8_t secondin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + simd16<uint16_t>::SIZE / sizeof(char16_t));
12696         uint16x8_t thirdin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 2*simd16<uint16_t>::SIZE / sizeof(char16_t));
12697         uint16x8_t fourthin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 3*simd16<uint16_t>::SIZE / sizeof(char16_t));
13093   const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13107     uint16x8_t in16 = vmovl_u8(vget_low_u8(in8));
13111     const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
13112     const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
13115     const uint16x8_t t0 = vshlq_n_u16(in16, 2);
13117     const uint16x8_t t1 = vandq_u16(t0, v_1f00);
13119     const uint16x8_t t2 = vandq_u16(in16, v_003f);
13121     const uint16x8_t t3 = vorrq_u16(t1, t2);
13123     const uint16x8_t t4 = vorrq_u16(t3, v_c080);
13125     const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13126     const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f);
13131     const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004, 0x0010, 0x0040,
13134     const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040,
13162         uint16x8_t inlow = vmovl_u8(vget_low_u8(in8));
13165         uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8));
13181         uint16x8_t in8low = vmovl_u8(vget_low_u8(in8));
13184         uint16x8_t in8high = vmovl_u8(vget_high_u8(in8));
13249     uint16x8_t composed = convert_utf8_2_byte_to_utf16(in);
13270     uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx);
13355       uint16x8_t magic_with_low_2 = vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30));
13364       uint16x8_t blend = vreinterpretq_u16_u32(vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead));
13367       uint16x8_t composed = vaddq_u16(blend, magic_with_low_2);
13411     uint16x8_t masked_pair =
13422     uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00));
13506     uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in);
13524     uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx);
13573       uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in));
13576       uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6);
13685   uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh));
13689   uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits
13692   uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits
13696   uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2);
13712     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13734     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13817   const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
13818   const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
13819   const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
13822     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13826         uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
13855           const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
13856           const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
13859           const uint16x8_t t0 = vshlq_n_u16(in, 2);
13861           const uint16x8_t t1 = vandq_u16(t0, v_1f00);
13863           const uint16x8_t t2 = vandq_u16(in, v_003f);
13865           const uint16x8_t t3 = vorrq_u16(t1, t2);
13867           const uint16x8_t t4 = vorrq_u16(t3, v_c080);
13869           const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13870           const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
13874           const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
13879           const uint16x8_t mask = { 0x0001, 0x0004,
13899     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
13905         const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
13908         const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
13937         const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
13939         const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
13941         const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
13944         const uint16x8_t s0 = vshrq_n_u16(in, 12);
13946         const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
13948         const uint16x8_t s1s = vshlq_n_u16(s1, 2);
13950         const uint16x8_t s2 = vorrq_u16(s0, s1s);
13952         const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
13953         const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
13954         const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
13955         const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
13956         const uint16x8_t s4 = veorq_u16(s3, m0);
13964         const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
13965         const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
13967         const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
13971         const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
13976         const uint16x8_t onemask = { 0x0001, 0x0004,
13980         const uint16x8_t twomask = { 0x0002, 0x0008,
13985         const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14070   const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14071   const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14072   const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14076     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14080         uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
14109           const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14110           const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14113           const uint16x8_t t0 = vshlq_n_u16(in, 2);
14115           const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14117           const uint16x8_t t2 = vandq_u16(in, v_003f);
14119           const uint16x8_t t3 = vorrq_u16(t1, t2);
14121           const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14123           const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14124           const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
14128           const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14133           const uint16x8_t mask = { 0x0001, 0x0004,
14153     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14159         const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14162         const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14191         const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
14193         const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14195         const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14198         const uint16x8_t s0 = vshrq_n_u16(in, 12);
14200         const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
14202         const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14204         const uint16x8_t s2 = vorrq_u16(s0, s1s);
14206         const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14207         const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14208         const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
14209         const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14210         const uint16x8_t s4 = veorq_u16(s3, m0);
14218         const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14219         const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
14221         const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14225         const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14230         const uint16x8_t onemask = { 0x0001, 0x0004,
14234         const uint16x8_t twomask = { 0x0002, 0x0008,
14239         const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14369   const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14370   const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14373     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14376     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14427   const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
14428   const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14431     uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14434     const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
14480     uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2));
14505     uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2));
14535   const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14537   uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0);
14547       uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
14564         const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14565         const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14568         const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
14570         const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14572         const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
14574         const uint16x8_t t3 = vorrq_u16(t1, t2);
14576         const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14578         const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14579         const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14583         const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14588         const uint16x8_t mask = { 0x0001, 0x0004,
14608         const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14609         const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14613           const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14616           const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14645           const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
14647           const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14649           const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14652           const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
14654           const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
14656           const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14658           const uint16x8_t s2 = vorrq_u16(s0, s1s);
14660           const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14661           const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14662           const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
14663           const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14664           const uint16x8_t s4 = veorq_u16(s3, m0);
14672           const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14673           const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14675           const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14679           const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14684           const uint16x8_t onemask = { 0x0001, 0x0004,
14688           const uint16x8_t twomask = { 0x0002, 0x0008,
14693           const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
14770   const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
14780       uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
14797         const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
14798         const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
14801         const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
14803         const uint16x8_t t1 = vandq_u16(t0, v_1f00);
14805         const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
14807         const uint16x8_t t3 = vorrq_u16(t1, t2);
14809         const uint16x8_t t4 = vorrq_u16(t3, v_c080);
14811         const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14812         const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14816         const uint16x8_t mask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14821         const uint16x8_t mask = { 0x0001, 0x0004,
14843         const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
14844         const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
14845         const uint16x8_t forbidden_bytemask = vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800));
14851           const uint16x8_t dup_even = simdutf_make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
14854           const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
14883           const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
14885           const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
14887           const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
14890           const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
14892           const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
14894           const uint16x8_t s1s = vshlq_n_u16(s1, 2);
14896           const uint16x8_t s2 = vorrq_u16(s0, s1s);
14898           const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
14899           const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
14900           const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
14901           const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
14902           const uint16x8_t s4 = veorq_u16(s3, m0);
14910           const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
14911           const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
14913           const uint16x8_t onemask = simdutf_make_uint16x8_t(0x0001, 0x0004,
14917           const uint16x8_t twomask = simdutf_make_uint16x8_t(0x0002, 0x0008,
14922           const uint16x8_t onemask = { 0x0001, 0x0004,
14926           const uint16x8_t twomask = { 0x0002, 0x0008,
14931           const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
17385     const uint16x8_t reduced_ascii_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1));
17386     const uint16x8_t reduced_two_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1));
17387     const uint16x8_t reduced_three_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1));
17389     const uint16x8_t compressed_bytemask0 = vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask);
17390     const uint16x8_t compressed_bytemask1 = vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask);
17409     const uint16x8_t reduced_bytemask = vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1));
17410     const uint16x8_t compressed_bytemask = vpaddq_u16(reduced_bytemask, reduced_bytemask);