simdutf.cpp - OpenGrok cross reference for /third_party/node/deps/simdutf/simdutf.cpp

Lines Matching defs:pos
4129   uint64_t pos = 0;
4131   while (pos < len) {
4133     uint64_t next_pos = pos + 16;
4136       std::memcpy(&v1, data + pos, sizeof(uint64_t));
4138       std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4141         pos = next_pos;
4145     unsigned char byte = data[pos];
4148       if (++pos == len) { return true; }
4149       byte = data[pos];
4153       next_pos = pos + 2;
4155       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4157       code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4160       next_pos = pos + 3;
4162       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4163       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4166                    (data[pos + 1] & 0b00111111) << 6 |
4167                    (data[pos + 2] & 0b00111111);
4173       next_pos = pos + 4;
4175       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4176       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4177       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
4180           (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4181           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4187     pos = next_pos;
4195   size_t pos = 0;
4197   while (pos < len) {
4199     size_t next_pos = pos + 16;
4202       std::memcpy(&v1, data + pos, sizeof(uint64_t));
4204       std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4207         pos = next_pos;
4211     unsigned char byte = data[pos];
4214       if (++pos == len) { return result(error_code::SUCCESS, len); }
4215       byte = data[pos];
4219       next_pos = pos + 2;
4220       if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4221       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4223       code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4224       if ((code_point < 0x80) || (0x7ff < code_point)) { return result(error_code::OVERLONG, pos); }
4226       next_pos = pos + 3;
4227       if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4228       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4229       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4232                    (data[pos + 1] & 0b00111111) << 6 |
4233                    (data[pos + 2] & 0b00111111);
4234       if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
4235       if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
4237       next_pos = pos + 4;
4238       if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4239       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4240       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4241       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4244           (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4245           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4246       if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
4247       if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
4250       if ((byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
4251       else { return result(error_code::HEADER_BITS, pos); }
4253     pos = next_pos;
4347   uint64_t pos = 0;
4348   while (pos < len) {
4349     uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4351         if(pos + 1 >= len) { return false; }
4354         uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4357         pos += 2;
4359         pos++;
4368   size_t pos = 0;
4369   while (pos < len) {
4370     uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4372         if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
4374         if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
4375         uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4377         if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
4378         pos += 2;
4380         pos++;
4383   return result(error_code::SUCCESS, pos);
10596     uint64_t pos = 0;
10598     for (;pos + 16 <= len; pos += 16) {
10600         std::memcpy(&v1, data + pos, sizeof(uint64_t));
10602         std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10607     for (;pos < len; pos ++) {
10608         if (data[pos] >= 0b10000000) { return false; }
10616     size_t pos = 0;
10618     for (;pos + 16 <= len; pos += 16) {
10620         std::memcpy(&v1, data + pos, sizeof(uint64_t));
10622         std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10625             for (;pos < len; pos ++) {
10626                 if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10631     for (;pos < len; pos ++) {
10632         if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10634     return result(error_code::SUCCESS, pos);
10655   uint64_t pos = 0;
10656   for(;pos < len; pos++) {
10657     uint32_t word = data[pos];
10667   size_t pos = 0;
10668   for(;pos < len; pos++) {
10669     uint32_t word = data[pos];
10671         return result(error_code::TOO_LARGE, pos);
10674         return result(error_code::SURROGATE, pos);
10677   return result(error_code::SUCCESS, pos);
10767   size_t pos = 0;
10769   while (pos < len) {
10771     if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10773       ::memcpy(&v, data + pos, sizeof(uint64_t));
10775         *utf8_output++ = char(buf[pos]);
10776 				*utf8_output++ = char(buf[pos+1]);
10777         pos += 2;
10781     uint32_t word = data[pos];
10785       pos++;
10791       pos++;
10798       pos++;
10806       pos ++;
10831   size_t pos = 0;
10833   while (pos < len) {
10835     if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10837       ::memcpy(&v, data + pos, sizeof(uint64_t));
10839         *utf8_output++ = char(buf[pos]);
10840 				*utf8_output++ = char(buf[pos+1]);
10841         pos += 2;
10845     uint32_t word = data[pos];
10849       pos++;
10855       pos++;
10863       pos++;
10872       pos ++;
10880   size_t pos = 0;
10882   while (pos < len) {
10884     if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10886       ::memcpy(&v, data + pos, sizeof(uint64_t));
10888         *utf8_output++ = char(buf[pos]);
10889 				*utf8_output++ = char(buf[pos+1]);
10890         pos += 2;
10894     uint32_t word = data[pos];
10898       pos++;
10904       pos++;
10908 			if (word >= 0xD800 && word <= 0xDFFF) { return result(error_code::SURROGATE, pos); }
10912       pos++;
10916 			if (word > 0x10FFFF) { return result(error_code::TOO_LARGE, pos); }
10921       pos ++;
10947   size_t pos = 0;
10949   while (pos < len) {
10950     uint32_t word = data[pos];
10954       pos++;
10966       pos++;
10991   size_t pos = 0;
10993   while (pos < len) {
10994     uint32_t word = data[pos];
11012     pos++;
11020   size_t pos = 0;
11022   while (pos < len) {
11023     uint32_t word = data[pos];
11025       if (word >= 0xD800 && word <= 0xDFFF) { return result(error_code::SURROGATE, pos); }
11030       if (word > 0x10FFFF) { return result(error_code::TOO_LARGE, pos); }
11041     pos++;
11066   size_t pos = 0;
11068   while (pos < len) {
11070     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11072       ::memcpy(&v, data + pos, sizeof(uint64_t));
11075         size_t final_pos = pos + 4;
11076         while(pos < final_pos) {
11077           *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11078           pos++;
11084     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11088       pos++;
11094       pos++;
11101       pos++;
11105       if(pos + 1 >= len) { return 0; } // minimal bound checking
11106       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11115       pos += 2;
11140   size_t pos = 0;
11142   while (pos < len) {
11144     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11146       ::memcpy(&v, data + pos, sizeof(uint64_t));
11149         size_t final_pos = pos + 4;
11150         while(pos < final_pos) {
11151           *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11152           pos++;
11157     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11161       pos++;
11167       pos++;
11174       pos++;
11177       if(pos + 1 >= len) { return 0; }
11180       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11190       pos += 2;
11199   size_t pos = 0;
11201   while (pos < len) {
11203     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11205       ::memcpy(&v, data + pos, sizeof(uint64_t));
11208         size_t final_pos = pos + 4;
11209         while(pos < final_pos) {
11210           *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11211           pos++;
11216     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11220       pos++;
11226       pos++;
11233       pos++;
11236       if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
11238       if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
11239       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11241       if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
11249       pos += 2;
11275   size_t pos = 0;
11277   while (pos < len) {
11278     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11282       pos++;
11286       if(pos + 1 >= len) { return 0; } // minimal bound checking
11287       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11291       pos += 2;
11316   size_t pos = 0;
11318   while (pos < len) {
11319     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11323       pos++;
11328       if(pos + 1 >= len) { return 0; } // minimal bound checking
11329       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11334       pos += 2;
11343   size_t pos = 0;
11345   while (pos < len) {
11346     uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11350       pos++;
11354       if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
11355       if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); } // minimal bound checking
11356       uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11358       if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
11361       pos += 2;
11387   size_t pos = 0;
11389   while (pos < len) {
11391     if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11393       ::memcpy(&v, data + pos, sizeof(uint64_t));
11395         size_t final_pos = pos + 8;
11396         while(pos < final_pos) {
11397           *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11398           pos++;
11403     uint8_t leading_byte = data[pos]; // leading byte
11407       pos++;
11411       if(pos + 1 >= len) { break; } // minimal bound checking
11412       uint16_t code_point = uint16_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11417       pos += 2;
11421       if(pos + 2 >= len) { break; } // minimal bound checking
11422       uint16_t code_point = uint16_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11427       pos += 3;
11430       if(pos + 3 >= len) { break; } // minimal bound checking
11431       uint32_t code_point = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11432                            | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11442       pos += 4;
11471   size_t pos = 0;
11473   while (pos < len) {
11475     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11477       ::memcpy(&v1, data + pos, sizeof(uint64_t));
11479       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11482         size_t final_pos = pos + 16;
11483         while(pos < final_pos) {
11484           *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11485           pos++;
11491     uint8_t leading_byte = data[pos]; // leading byte
11495       pos++;
11499       if(pos + 1 >= len) { return 0; } // minimal bound checking
11500       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11502       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11508       pos += 2;
11512       if(pos + 2 >= len) { return 0; } // minimal bound checking
11514       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11515       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11518                    (data[pos + 1] & 0b00111111) << 6 |
11519                    (data[pos + 2] & 0b00111111);
11528       pos += 3;
11531       if(pos + 3 >= len) { return 0; } // minimal bound checking
11532       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11533       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11534       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11538           (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11539           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11550       pos += 4;
11561   size_t pos = 0;
11563   while (pos < len) {
11565     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11567       ::memcpy(&v1, data + pos, sizeof(uint64_t));
11569       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11572         size_t final_pos = pos + 16;
11573         while(pos < final_pos) {
11574           *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11575           pos++;
11580     uint8_t leading_byte = data[pos]; // leading byte
11584       pos++;
11588       if(pos + 1 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11589       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11591       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11592       if (code_point < 0x80 || 0x7ff < code_point) { return result(error_code::OVERLONG, pos); }
11597       pos += 2;
11601       if(pos + 2 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11603       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11604       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11607                    (data[pos + 1] & 0b00111111) << 6 |
11608                    (data[pos + 2] & 0b00111111);
11609       if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
11610       if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
11615       pos += 3;
11618       if(pos + 3 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11619       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11620       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11621       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11625           (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11626           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11627       if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
11628       if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
11638       pos += 4;
11641       if ((leading_byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
11642       else { return result(error_code::HEADER_BITS, pos); }
11719   size_t pos = 0;
11721   while (pos < len) {
11723     if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11725       ::memcpy(&v, data + pos, sizeof(uint64_t));
11727         size_t final_pos = pos + 8;
11728         while(pos < final_pos) {
11729           *utf32_output++ = char32_t(buf[pos]);
11730           pos++;
11735     uint8_t leading_byte = data[pos]; // leading byte
11739       pos++;
11742       if(pos + 1 >= len) { break; } // minimal bound checking
11743       *utf32_output++ = char32_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11744       pos += 2;
11747       if(pos + 2 >= len) { break; } // minimal bound checking
11748       *utf32_output++ = char32_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11749       pos += 3;
11752       if(pos + 3 >= len) { break; } // minimal bound checking
11753       uint32_t code_word = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11754                            | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11756       pos += 4;
11784   size_t pos = 0;
11786   while (pos < len) {
11788     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11790       ::memcpy(&v1, data + pos, sizeof(uint64_t));
11792       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11795         size_t final_pos = pos + 16;
11796         while(pos < final_pos) {
11797           *utf32_output++ = char32_t(buf[pos]);
11798           pos++;
11803     uint8_t leading_byte = data[pos]; // leading byte
11807       pos++;
11810       if(pos + 1 >= len) { return 0; } // minimal bound checking
11811       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11813       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11816       pos += 2;
11819       if(pos + 2 >= len) { return 0; } // minimal bound checking
11821       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11822       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11825                    (data[pos + 1] & 0b00111111) << 6 |
11826                    (data[pos + 2] & 0b00111111);
11832       pos += 3;
11835       if(pos + 3 >= len) { return 0; } // minimal bound checking
11836       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11837       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11838       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11842           (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11843           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11846       pos += 4;
11856   size_t pos = 0;
11858   while (pos < len) {
11860     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11862       ::memcpy(&v1, data + pos, sizeof(uint64_t));
11864       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11867         size_t final_pos = pos + 16;
11868         while(pos < final_pos) {
11869           *utf32_output++ = char32_t(buf[pos]);
11870           pos++;
11875     uint8_t leading_byte = data[pos]; // leading byte
11879       pos++;
11882       if(pos + 1 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11883       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11885       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11886       if (code_point < 0x80 || 0x7ff < code_point) { return result(error_code::OVERLONG, pos); }
11888       pos += 2;
11891       if(pos + 2 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11893       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11894       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11897                    (data[pos + 1] & 0b00111111) << 6 |
11898                    (data[pos + 2] & 0b00111111);
11899       if (code_point < 0x800 || 0xffff < code_point) { return result(error_code::OVERLONG, pos); }
11900       if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
11902       pos += 3;
11905       if(pos + 3 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11906       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos);}
11907       if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11908       if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11912           (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11913           (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11914       if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
11915       if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
11917       pos += 4;
11920       if ((leading_byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
11921       else { return result(error_code::HEADER_BITS, pos); }
11996   size_t pos = 0;
11998   while (pos < len) {
12000     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12002       ::memcpy(&v1, data + pos, sizeof(uint64_t));
12004       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12007         size_t final_pos = pos + 16;
12008         while(pos < final_pos) {
12009           *utf8_output++ = char(buf[pos]);
12010           pos++;
12016     unsigned char byte = data[pos];
12020       pos++;
12025       pos++;
12050   size_t pos = 0;
12053   while (pos < len) {
12054     uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12056     pos++;
12065   size_t pos = 0;
12068   while (pos < len) {
12069     uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12071     pos++;
12123   size_t pos = 0;
12126   while (pos < len) {
12128     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12130       ::memcpy(&v1, data + pos, sizeof(uint64_t));
12132       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12135         size_t final_pos = pos + 16;
12136         while(pos < final_pos) {
12137           *latin_output++ = char(buf[pos]);
12138           pos++;
12145     uint8_t leading_byte = data[pos]; // leading byte
12149       pos++;
12152       if(pos + 1 >= len) {
12155       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12157       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12162       pos += 2;
12172   size_t pos = 0;
12175   while (pos < len) {
12177     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12179       ::memcpy(&v1, data + pos, sizeof(uint64_t));
12181       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12184         size_t final_pos = pos + 16;
12185         while(pos < final_pos) {
12186           *latin_output++ = char(buf[pos]);
12187           pos++;
12193     uint8_t leading_byte = data[pos]; // leading byte
12197       pos++;
12200       if(pos + 1 >= len) {
12201         return result(error_code::TOO_SHORT, pos); } // minimal bound checking
12202       if ((data[pos + 1] & 0b11000000) != 0b10000000) {
12203         return result(error_code::TOO_SHORT, pos); } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12205       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12207         return result(error_code::OVERLONG, pos);
12210         return result(error_code::TOO_LARGE, pos);
12213       pos += 2;
12216       return result(error_code::TOO_LARGE, pos);
12219       return result(error_code::TOO_LARGE, pos);
12223         return result(error_code::TOO_LONG, pos);
12226       return result(error_code::HEADER_BITS, pos);
12294   size_t pos = 0;
12300   while (pos < len) {
12301     word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12304     pos++;
12317   size_t pos = 0;
12321   while (pos < len) {
12322     if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that they are Latin1
12324       ::memcpy(&v1, data + pos, sizeof(uint64_t));
12325       ::memcpy(&v2, data + pos + 4, sizeof(uint64_t));
12326       ::memcpy(&v3, data + pos + 8, sizeof(uint64_t));
12327       ::memcpy(&v4, data + pos  + 12, sizeof(uint64_t));
12335         size_t final_pos = pos + 16;
12336         while(pos < final_pos) {
12337           *latin_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(data[pos])) : char(data[pos]);
12338           pos++;
12343     word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12346         pos++;
12347     } else { return result(error_code::TOO_LARGE, pos); }
12373   size_t pos = 0;
12376   while (pos < len) {
12377     utf32_char = (uint32_t)data[pos];
12380     pos++;
12389   size_t pos = 0;
12390   while (pos < len) {
12391     if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
12393       ::memcpy(&v, data + pos, sizeof(uint64_t));
12395         *latin1_output++ = char(buf[pos]);
12396         *latin1_output++ = char(buf[pos+1]);
12397         pos += 2;
12401     uint32_t utf32_char = data[pos];
12404       pos++;
12405     } else { return result(error_code::TOO_LARGE, pos); };
12430   size_t pos = 0;
12433   while (pos < len) {
12435     if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12437       ::memcpy(&v1, data + pos, sizeof(uint64_t));
12439       ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12442         size_t final_pos = pos + 16;
12443         while(pos < final_pos) {
12444           *latin_output++ = char(buf[pos]);
12445           pos++;
12452     uint8_t leading_byte = data[pos]; // leading byte
12456       pos++;
12459       if(pos + 1 >= len) { break; } // minimal bound checking
12460       if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12462       uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12464       pos += 2;
12492   size_t pos = 0;
12496   while (pos < len) {
12497     word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12499     pos++;
12525   size_t pos = 0;
12527   while (pos < len) {
12528   utf32_char = (uint32_t)data[pos];
12530   if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
12532       ::memcpy(&v, data + pos, sizeof(uint64_t));
12534       *latin1_output++ = char(buf[pos]);
12535       *latin1_output++ = char(buf[pos+1]);
12536       pos += 2;
12541   pos++;
15540   size_t pos = 0;
15543   while(pos + 64 + safety_margin <= size) {
15546     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15550       pos += 64;
15563       size_t max_starting_point = (pos + 64) - 12;
15566       while(pos < max_starting_point) {
15580         size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
15582         pos += consumed;
15591   utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
15729       size_t pos = 0;
15744       while(pos + 64 + safety_margin <= size) {
15745         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15749           pos += 64;
15770           size_t max_starting_point = (pos + 64) - 12;
15772           while(pos < max_starting_point) {
15782             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
15784             pos += consumed;
15794       if(pos < size) {
15795         size_t howmany  = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
15804       size_t pos = 0;
15819       while(pos + 64 + safety_margin <= size) {
15820         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15824           pos += 64;
15840             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15841             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15842             result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15843             res.count += pos;
15852           size_t max_starting_point = (pos + 64) - 12;
15854           while(pos < max_starting_point) {
15864             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
15866             pos += consumed;
15876         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15877         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15878         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15879         res.count += pos;
15882       if(pos < size) {
15883         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15884         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15885         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15887           res.count += pos;
15919   size_t pos = 0;
15922   while(pos + 64 + safety_margin <= size) {
15923     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15927       pos += 64;
15933     size_t max_starting_point = (pos + 64) - 12;
15934     while(pos < max_starting_point) {
15935       size_t consumed = convert_masked_utf8_to_utf32(input + pos,
15937       pos += consumed;
15942   utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
16081       size_t pos = 0;
16096       while(pos + 64 + safety_margin <= size) {
16097         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16101           pos += 64;
16122           size_t max_starting_point = (pos + 64) - 12;
16124           while(pos < max_starting_point) {
16134             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
16136             pos += consumed;
16146       if(pos < size) {
16147         size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
16155       size_t pos = 0;
16170       while(pos + 64 + safety_margin <= size) {
16171         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16175           pos += 64;
16191             result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16192             res.count += pos;
16201           size_t max_starting_point = (pos + 64) - 12;
16203           while(pos < max_starting_point) {
16213             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
16215             pos += consumed;
16225         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16226         res.count += pos;
16229       if(pos < size) {
16230         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16232           res.count += pos;
16262     size_t pos = 0;
16264     for(;pos + 64 <= size; pos += 64) {
16265       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16269     return count + scalar::utf8::count_code_points(in + pos, size - pos);
16273     size_t pos = 0;
16276     for(;pos + 64 <= size; pos += 64) {
16277       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16285     return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
16300     size_t pos = 0;
16302     for(;pos < size/32*32; pos += 32) {
16303       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16308     return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
16313     size_t pos = 0;
16316     for(;pos < size/32*32; pos += 32) {
16317       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16329     return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
16338   size_t pos = 0;
16340   while (pos < size/32*32) {
16341     simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16344     pos += 32;
16348   scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
16479       size_t pos = 0;
16494       while(pos + 64 + safety_margin <= size) {
16495         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16499           pos += 64;
16520           size_t max_starting_point = (pos + 64) - 12;
16522           while(pos < max_starting_point) {
16532             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16534             pos += consumed;
16544       if(pos < size) {
16545         size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
16553       size_t pos = 0;
16568       while(pos + 64 + safety_margin <= size) {
16569         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16573           pos += 64;
16589             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16590             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16591             result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16592             res.count += pos;
16601           size_t max_starting_point = (pos + 64) - 12;
16603           while(pos < max_starting_point) {
16613             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16615             pos += consumed;
16625         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16626         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16627         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16628         res.count += pos;
16631       if(pos < size) {
16632         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16633         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16634         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16636           res.count += pos;
16666       size_t pos = 0;
16681       while(pos + 64 + safety_margin <= size) {
16682         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16686           pos += 64;
16695           size_t max_starting_point = (pos + 64) - 12;
16697           while(pos < max_starting_point) {
16707             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16709             pos += consumed;
16718       if(pos < size) {
16719         size_t howmany  = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
17376   size_t pos = 0;
17378   for(;pos + 4 <= length; pos += 4) {
17379     uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17398   return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
17404   size_t pos = 0;
17406   for(;pos + 4 <= length; pos += 4) {
17407     uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17414   return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
19226   size_t pos = 0;
19232   while (pos + 64 <= len) {
19233     size_t written = process_block_from_utf8_to_latin1<false>(buf + pos, 64, latin_output, minus64,
19239     pos += 64;
19242   if (pos < len) {
19243     size_t remaining = len - pos;
19245         process_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64, one,
19302   size_t pos = 0;
19308   while (pos + 64 <= len) {
19310         buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6);
19312     pos += 64;
19315   if (pos < len) {
19316     size_t remaining = len - pos;
19318         process_valid_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64,
20569   size_t pos = 0;
20571   for (; pos + 128 <= len; pos += 64) {
20572     __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20576   if (pos + 64 <= len) {
20577     __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20579     pos += 64;
20582   if (pos < len) {
20583     __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos));
20584     __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos));
20585     utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1);
21617   size_t pos = 0;
21628   while (pos + 32 <= length) {
21629     __m512i utf16 = _mm512_loadu_si512((const __m512i*)(input + pos));
21631     _mm512_storeu_si512(output + pos, utf16);
21632     pos += 32;
21634   if(pos < length) {
21635     __mmask32 m((1<< (length - pos))-1);
21636     __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i*)(input + pos));
21638     _mm512_mask_storeu_epi16(output + pos, m, utf16);
21910     size_t pos = 0;
21913     for(;pos + 64 <= length; pos += 64) {
21914       __m512i utf8 = _mm512_loadu_si512((const __m512i*)(input+pos));
21922     return count + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos);
24951   size_t pos = 0;
24954   while(pos + 64 + safety_margin <= size) {
24957     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
24961       pos += 64;
24974       size_t max_starting_point = (pos + 64) - 12;
24977       while(pos < max_starting_point) {
24991         size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
24993         pos += consumed;
25002   utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
25140       size_t pos = 0;
25155       while(pos + 64 + safety_margin <= size) {
25156         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25160           pos += 64;
25181           size_t max_starting_point = (pos + 64) - 12;
25183           while(pos < max_starting_point) {
25193             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
25195             pos += consumed;
25205       if(pos < size) {
25206         size_t howmany  = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
25215       size_t pos = 0;
25230       while(pos + 64 + safety_margin <= size) {
25231         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25235           pos += 64;
25251             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25252             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25253             result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25254             res.count += pos;
25263           size_t max_starting_point = (pos + 64) - 12;
25265           while(pos < max_starting_point) {
25275             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
25277             pos += consumed;
25287         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25288         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25289         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25290         res.count += pos;
25293       if(pos < size) {
25294         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25295         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25296         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25298           res.count += pos;
25330   size_t pos = 0;
25333   while(pos + 64 + safety_margin <= size) {
25334     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
25338       pos += 64;
25344     size_t max_starting_point = (pos + 64) - 12;
25345     while(pos < max_starting_point) {
25346       size_t consumed = convert_masked_utf8_to_utf32(input + pos,
25348       pos += consumed;
25353   utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
25492       size_t pos = 0;
25507       while(pos + 64 + safety_margin <= size) {
25508         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25512           pos += 64;
25533           size_t max_starting_point = (pos + 64) - 12;
25535           while(pos < max_starting_point) {
25545             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
25547             pos += consumed;
25557       if(pos < size) {
25558         size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
25566       size_t pos = 0;
25581       while(pos + 64 + safety_margin <= size) {
25582         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25586           pos += 64;
25602             result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25603             res.count += pos;
25612           size_t max_starting_point = (pos + 64) - 12;
25614           while(pos < max_starting_point) {
25624             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
25626             pos += consumed;
25636         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25637         res.count += pos;
25640       if(pos < size) {
25641         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25643           res.count += pos;
25673     size_t pos = 0;
25675     for(;pos + 64 <= size; pos += 64) {
25676       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25680     return count + scalar::utf8::count_code_points(in + pos, size - pos);
25684     size_t pos = 0;
25687     for(;pos + 64 <= size; pos += 64) {
25688       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25696     return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
25711     size_t pos = 0;
25713     for(;pos < size/32*32; pos += 32) {
25714       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25719     return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
25724     size_t pos = 0;
25727     for(;pos < size/32*32; pos += 32) {
25728       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25740     return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
25749   size_t pos = 0;
25751   while (pos < size/32*32) {
25752     simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25755     pos += 32;
25759   scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
25892       size_t pos = 0;
25907       while(pos + 64 + safety_margin <= size) {
25908         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25912           pos += 64;
25933           size_t max_starting_point = (pos + 64) - 12;
25935           while(pos < max_starting_point) {
25945             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
25947             pos += consumed;
25957       if(pos < size) {
25958         size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
25966       size_t pos = 0;
25981       while(pos + 64 + safety_margin <= size) {
25982         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25986           pos += 64;
26002             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26003             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26004             result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26005             res.count += pos;
26014           size_t max_starting_point = (pos + 64) - 12;
26016           while(pos < max_starting_point) {
26026             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
26028             pos += consumed;
26038         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26039         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26040         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26041         res.count += pos;
26044       if(pos < size) {
26045         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26046         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26047         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26049           res.count += pos;
26079       size_t pos = 0;
26094       while(pos + 64 + safety_margin <= size) {
26095         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
26099           pos += 64;
26108           size_t max_starting_point = (pos + 64) - 12;
26110           while(pos < max_starting_point) {
26120             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
26122             pos += consumed;
26131       if(pos < size) {
26132         size_t howmany  = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
26799   size_t pos = 0;
26801   for(;pos + 8 <= length; pos += 8) {
26802     __m256i in = _mm256_loadu_si256((__m256i*)(input + pos));
26817   return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
26823   size_t pos = 0;
26825   for(;pos + 8 <= length; pos += 8) {
26826     __m256i in = _mm256_loadu_si256((__m256i*)(input + pos));
26832   return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
27323   size_t pos = 0;
27326   while(pos + 64 + safety_margin <= size) {
27329     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27333       pos += 64;
27346       size_t max_starting_point = (pos + 64) - 12;
27349       while(pos < max_starting_point) {
27363         size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
27365         pos += consumed;
27374   utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
27512       size_t pos = 0;
27527       while(pos + 64 + safety_margin <= size) {
27528         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27532           pos += 64;
27553           size_t max_starting_point = (pos + 64) - 12;
27555           while(pos < max_starting_point) {
27565             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
27567             pos += consumed;
27577       if(pos < size) {
27578         size_t howmany  = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
27587       size_t pos = 0;
27602       while(pos + 64 + safety_margin <= size) {
27603         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27607           pos += 64;
27623             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27624             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27625             result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27626             res.count += pos;
27635           size_t max_starting_point = (pos + 64) - 12;
27637           while(pos < max_starting_point) {
27647             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
27649             pos += consumed;
27659         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27660         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27661         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27662         res.count += pos;
27665       if(pos < size) {
27666         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27667         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27668         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27670           res.count += pos;
27702   size_t pos = 0;
27705   while(pos + 64 + safety_margin <= size) {
27706     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27710       pos += 64;
27716     size_t max_starting_point = (pos + 64) - 12;
27717     while(pos < max_starting_point) {
27718       size_t consumed = convert_masked_utf8_to_utf32(input + pos,
27720       pos += consumed;
27725   utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
27864       size_t pos = 0;
27879       while(pos + 64 + safety_margin <= size) {
27880         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27884           pos += 64;
27905           size_t max_starting_point = (pos + 64) - 12;
27907           while(pos < max_starting_point) {
27917             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
27919             pos += consumed;
27929       if(pos < size) {
27930         size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
27938       size_t pos = 0;
27953       while(pos + 64 + safety_margin <= size) {
27954         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27958           pos += 64;
27974             result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
27975             res.count += pos;
27984           size_t max_starting_point = (pos + 64) - 12;
27986           while(pos < max_starting_point) {
27996             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
27998             pos += consumed;
28008         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28009         res.count += pos;
28012       if(pos < size) {
28013         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28015           res.count += pos;
28045     size_t pos = 0;
28047     for(;pos + 64 <= size; pos += 64) {
28048       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28052     return count + scalar::utf8::count_code_points(in + pos, size - pos);
28056     size_t pos = 0;
28059     for(;pos + 64 <= size; pos += 64) {
28060       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28068     return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
28083     size_t pos = 0;
28085     for(;pos < size/32*32; pos += 32) {
28086       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28091     return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
28096     size_t pos = 0;
28099     for(;pos < size/32*32; pos += 32) {
28100       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28112     return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
28121   size_t pos = 0;
28123   while (pos < size/32*32) {
28124     simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28127     pos += 32;
28131   scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
31330   size_t pos = 0;
31333   while(pos + 64 + safety_margin <= size) {
31336     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31340       pos += 64;
31353       size_t max_starting_point = (pos + 64) - 12;
31356       while(pos < max_starting_point) {
31370         size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
31372         pos += consumed;
31381   utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
31519       size_t pos = 0;
31534       while(pos + 64 + safety_margin <= size) {
31535         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31539           pos += 64;
31560           size_t max_starting_point = (pos + 64) - 12;
31562           while(pos < max_starting_point) {
31572             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
31574             pos += consumed;
31584       if(pos < size) {
31585         size_t howmany  = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
31594       size_t pos = 0;
31609       while(pos + 64 + safety_margin <= size) {
31610         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31614           pos += 64;
31630             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31631             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31632             result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31633             res.count += pos;
31642           size_t max_starting_point = (pos + 64) - 12;
31644           while(pos < max_starting_point) {
31654             size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
31656             pos += consumed;
31666         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31667         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31668         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31669         res.count += pos;
31672       if(pos < size) {
31673         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31674         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31675         result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31677           res.count += pos;
31709   size_t pos = 0;
31712   while(pos + 64 + safety_margin <= size) {
31713     simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31717       pos += 64;
31723     size_t max_starting_point = (pos + 64) - 12;
31724     while(pos < max_starting_point) {
31725       size_t consumed = convert_masked_utf8_to_utf32(input + pos,
31727       pos += consumed;
31732   utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
31871       size_t pos = 0;
31886       while(pos + 64 + safety_margin <= size) {
31887         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31891           pos += 64;
31912           size_t max_starting_point = (pos + 64) - 12;
31914           while(pos < max_starting_point) {
31924             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
31926             pos += consumed;
31936       if(pos < size) {
31937         size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
31945       size_t pos = 0;
31960       while(pos + 64 + safety_margin <= size) {
31961         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31965           pos += 64;
31981             result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
31982             res.count += pos;
31991           size_t max_starting_point = (pos + 64) - 12;
31993           while(pos < max_starting_point) {
32003             size_t consumed = convert_masked_utf8_to_utf32(in + pos,
32005             pos += consumed;
32015         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32016         res.count += pos;
32019       if(pos < size) {
32020         result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32022           res.count += pos;
32052     size_t pos = 0;
32054     for(;pos + 64 <= size; pos += 64) {
32055       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32059     return count + scalar::utf8::count_code_points(in + pos, size - pos);
32063     size_t pos = 0;
32066     for(;pos + 64 <= size; pos += 64) {
32067       simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32075     return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
32090     size_t pos = 0;
32092     for(;pos < size/32*32; pos += 32) {
32093       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32098     return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
32103     size_t pos = 0;
32106     for(;pos < size/32*32; pos += 32) {
32107       simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32119     return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
32128   size_t pos = 0;
32130   while (pos < size/32*32) {
32131     simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32134     pos += 32;
32138   scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
32269       size_t pos = 0;
32284       while(pos + 64 + safety_margin <= size) {
32285         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32289           pos += 64;
32310           size_t max_starting_point = (pos + 64) - 12;
32312           while(pos < max_starting_point) {
32322             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32324             pos += consumed;
32334       if(pos < size) {
32335         size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
32343       size_t pos = 0;
32358       while(pos + 64 + safety_margin <= size) {
32359         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32363           pos += 64;
32379             // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32380             // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32381             result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32382             res.count += pos;
32391           size_t max_starting_point = (pos + 64) - 12;
32393           while(pos < max_starting_point) {
32403             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32405             pos += consumed;
32415         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32416         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32417         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32418         res.count += pos;
32421       if(pos < size) {
32422         // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32423         // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32424         result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32426           res.count += pos;
32456       size_t pos = 0;
32471       while(pos + 64 + safety_margin <= size) {
32472         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32476           pos += 64;
32485           size_t max_starting_point = (pos + 64) - 12;
32487           while(pos < max_starting_point) {
32497             size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32499             pos += consumed;
32508       if(pos < size) {
32509         size_t howmany  = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
33192   size_t pos = 0;
33194   for(;pos + 4 <= length; pos += 4) {
33195     __m128i in = _mm_loadu_si128((__m128i*)(input + pos));
33210   return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
33216   size_t pos = 0;
33218   for(;pos + 4 <= length; pos += 4) {
33219     __m128i in = _mm_loadu_si128((__m128i*)(input + pos));
33225   return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);