Lines Matching defs:pos

4129   uint64_t pos = 0;
4131 while (pos < len) {
4133 uint64_t next_pos = pos + 16;
4136 std::memcpy(&v1, data + pos, sizeof(uint64_t));
4138 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4141 pos = next_pos;
4145 unsigned char byte = data[pos];
4148 if (++pos == len) { return true; }
4149 byte = data[pos];
4153 next_pos = pos + 2;
4155 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4157 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4160 next_pos = pos + 3;
4162 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4163 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4166 (data[pos + 1] & 0b00111111) << 6 |
4167 (data[pos + 2] & 0b00111111);
4173 next_pos = pos + 4;
4175 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4176 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4177 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
4180 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4181 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4187 pos = next_pos;
4195 size_t pos = 0;
4197 while (pos < len) {
4199 size_t next_pos = pos + 16;
4202 std::memcpy(&v1, data + pos, sizeof(uint64_t));
4204 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4207 pos = next_pos;
4211 unsigned char byte = data[pos];
4214 if (++pos == len) { return result(error_code::SUCCESS, len); }
4215 byte = data[pos];
4219 next_pos = pos + 2;
4220 if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4221 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4223 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4224 if ((code_point < 0x80) || (0x7ff < code_point)) { return result(error_code::OVERLONG, pos); }
4226 next_pos = pos + 3;
4227 if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4228 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4229 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4232 (data[pos + 1] & 0b00111111) << 6 |
4233 (data[pos + 2] & 0b00111111);
4234 if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
4235 if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
4237 next_pos = pos + 4;
4238 if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
4239 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4240 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4241 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4244 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4245 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4246 if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
4247 if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
4250 if ((byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
4251 else { return result(error_code::HEADER_BITS, pos); }
4253 pos = next_pos;
4347 uint64_t pos = 0;
4348 while (pos < len) {
4349 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4351 if(pos + 1 >= len) { return false; }
4354 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4357 pos += 2;
4359 pos++;
4368 size_t pos = 0;
4369 while (pos < len) {
4370 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4372 if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
4374 if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
4375 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4377 if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
4378 pos += 2;
4380 pos++;
4383 return result(error_code::SUCCESS, pos);
10596 uint64_t pos = 0;
10598 for (;pos + 16 <= len; pos += 16) {
10600 std::memcpy(&v1, data + pos, sizeof(uint64_t));
10602 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10607 for (;pos < len; pos ++) {
10608 if (data[pos] >= 0b10000000) { return false; }
10616 size_t pos = 0;
10618 for (;pos + 16 <= len; pos += 16) {
10620 std::memcpy(&v1, data + pos, sizeof(uint64_t));
10622 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10625 for (;pos < len; pos ++) {
10626 if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10631 for (;pos < len; pos ++) {
10632 if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10634 return result(error_code::SUCCESS, pos);
10655 uint64_t pos = 0;
10656 for(;pos < len; pos++) {
10657 uint32_t word = data[pos];
10667 size_t pos = 0;
10668 for(;pos < len; pos++) {
10669 uint32_t word = data[pos];
10671 return result(error_code::TOO_LARGE, pos);
10674 return result(error_code::SURROGATE, pos);
10677 return result(error_code::SUCCESS, pos);
10767 size_t pos = 0;
10769 while (pos < len) {
10771 if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10773 ::memcpy(&v, data + pos, sizeof(uint64_t));
10775 *utf8_output++ = char(buf[pos]);
10776 *utf8_output++ = char(buf[pos+1]);
10777 pos += 2;
10781 uint32_t word = data[pos];
10785 pos++;
10791 pos++;
10798 pos++;
10806 pos ++;
10831 size_t pos = 0;
10833 while (pos < len) {
10835 if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10837 ::memcpy(&v, data + pos, sizeof(uint64_t));
10839 *utf8_output++ = char(buf[pos]);
10840 *utf8_output++ = char(buf[pos+1]);
10841 pos += 2;
10845 uint32_t word = data[pos];
10849 pos++;
10855 pos++;
10863 pos++;
10872 pos ++;
10880 size_t pos = 0;
10882 while (pos < len) {
10884 if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
10886 ::memcpy(&v, data + pos, sizeof(uint64_t));
10888 *utf8_output++ = char(buf[pos]);
10889 *utf8_output++ = char(buf[pos+1]);
10890 pos += 2;
10894 uint32_t word = data[pos];
10898 pos++;
10904 pos++;
10908 if (word >= 0xD800 && word <= 0xDFFF) { return result(error_code::SURROGATE, pos); }
10912 pos++;
10916 if (word > 0x10FFFF) { return result(error_code::TOO_LARGE, pos); }
10921 pos ++;
10947 size_t pos = 0;
10949 while (pos < len) {
10950 uint32_t word = data[pos];
10954 pos++;
10966 pos++;
10991 size_t pos = 0;
10993 while (pos < len) {
10994 uint32_t word = data[pos];
11012 pos++;
11020 size_t pos = 0;
11022 while (pos < len) {
11023 uint32_t word = data[pos];
11025 if (word >= 0xD800 && word <= 0xDFFF) { return result(error_code::SURROGATE, pos); }
11030 if (word > 0x10FFFF) { return result(error_code::TOO_LARGE, pos); }
11041 pos++;
11066 size_t pos = 0;
11068 while (pos < len) {
11070 if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11072 ::memcpy(&v, data + pos, sizeof(uint64_t));
11075 size_t final_pos = pos + 4;
11076 while(pos < final_pos) {
11077 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11078 pos++;
11084 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11088 pos++;
11094 pos++;
11101 pos++;
11105 if(pos + 1 >= len) { return 0; } // minimal bound checking
11106 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11115 pos += 2;
11140 size_t pos = 0;
11142 while (pos < len) {
11144 if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11146 ::memcpy(&v, data + pos, sizeof(uint64_t));
11149 size_t final_pos = pos + 4;
11150 while(pos < final_pos) {
11151 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11152 pos++;
11157 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11161 pos++;
11167 pos++;
11174 pos++;
11177 if(pos + 1 >= len) { return 0; }
11180 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11190 pos += 2;
11199 size_t pos = 0;
11201 while (pos < len) {
11203 if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11205 ::memcpy(&v, data + pos, sizeof(uint64_t));
11208 size_t final_pos = pos + 4;
11209 while(pos < final_pos) {
11210 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11211 pos++;
11216 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11220 pos++;
11226 pos++;
11233 pos++;
11236 if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
11238 if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
11239 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11241 if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
11249 pos += 2;
11275 size_t pos = 0;
11277 while (pos < len) {
11278 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11282 pos++;
11286 if(pos + 1 >= len) { return 0; } // minimal bound checking
11287 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11291 pos += 2;
11316 size_t pos = 0;
11318 while (pos < len) {
11319 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11323 pos++;
11328 if(pos + 1 >= len) { return 0; } // minimal bound checking
11329 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11334 pos += 2;
11343 size_t pos = 0;
11345 while (pos < len) {
11346 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11350 pos++;
11354 if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
11355 if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); } // minimal bound checking
11356 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11358 if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
11361 pos += 2;
11387 size_t pos = 0;
11389 while (pos < len) {
11391 if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11393 ::memcpy(&v, data + pos, sizeof(uint64_t));
11395 size_t final_pos = pos + 8;
11396 while(pos < final_pos) {
11397 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11398 pos++;
11403 uint8_t leading_byte = data[pos]; // leading byte
11407 pos++;
11411 if(pos + 1 >= len) { break; } // minimal bound checking
11412 uint16_t code_point = uint16_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11417 pos += 2;
11421 if(pos + 2 >= len) { break; } // minimal bound checking
11422 uint16_t code_point = uint16_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11427 pos += 3;
11430 if(pos + 3 >= len) { break; } // minimal bound checking
11431 uint32_t code_point = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11432 | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11442 pos += 4;
11471 size_t pos = 0;
11473 while (pos < len) {
11475 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11477 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11479 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11482 size_t final_pos = pos + 16;
11483 while(pos < final_pos) {
11484 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11485 pos++;
11491 uint8_t leading_byte = data[pos]; // leading byte
11495 pos++;
11499 if(pos + 1 >= len) { return 0; } // minimal bound checking
11500 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11502 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11508 pos += 2;
11512 if(pos + 2 >= len) { return 0; } // minimal bound checking
11514 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11515 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11518 (data[pos + 1] & 0b00111111) << 6 |
11519 (data[pos + 2] & 0b00111111);
11528 pos += 3;
11531 if(pos + 3 >= len) { return 0; } // minimal bound checking
11532 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11533 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11534 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11538 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11539 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11550 pos += 4;
11561 size_t pos = 0;
11563 while (pos < len) {
11565 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11567 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11569 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11572 size_t final_pos = pos + 16;
11573 while(pos < final_pos) {
11574 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11575 pos++;
11580 uint8_t leading_byte = data[pos]; // leading byte
11584 pos++;
11588 if(pos + 1 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11589 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11591 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11592 if (code_point < 0x80 || 0x7ff < code_point) { return result(error_code::OVERLONG, pos); }
11597 pos += 2;
11601 if(pos + 2 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11603 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11604 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11607 (data[pos + 1] & 0b00111111) << 6 |
11608 (data[pos + 2] & 0b00111111);
11609 if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
11610 if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
11615 pos += 3;
11618 if(pos + 3 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11619 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11620 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11621 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11625 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11626 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11627 if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
11628 if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
11638 pos += 4;
11641 if ((leading_byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
11642 else { return result(error_code::HEADER_BITS, pos); }
11719 size_t pos = 0;
11721 while (pos < len) {
11723 if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
11725 ::memcpy(&v, data + pos, sizeof(uint64_t));
11727 size_t final_pos = pos + 8;
11728 while(pos < final_pos) {
11729 *utf32_output++ = char32_t(buf[pos]);
11730 pos++;
11735 uint8_t leading_byte = data[pos]; // leading byte
11739 pos++;
11742 if(pos + 1 >= len) { break; } // minimal bound checking
11743 *utf32_output++ = char32_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11744 pos += 2;
11747 if(pos + 2 >= len) { break; } // minimal bound checking
11748 *utf32_output++ = char32_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11749 pos += 3;
11752 if(pos + 3 >= len) { break; } // minimal bound checking
11753 uint32_t code_word = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11754 | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11756 pos += 4;
11784 size_t pos = 0;
11786 while (pos < len) {
11788 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11790 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11792 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11795 size_t final_pos = pos + 16;
11796 while(pos < final_pos) {
11797 *utf32_output++ = char32_t(buf[pos]);
11798 pos++;
11803 uint8_t leading_byte = data[pos]; // leading byte
11807 pos++;
11810 if(pos + 1 >= len) { return 0; } // minimal bound checking
11811 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11813 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11816 pos += 2;
11819 if(pos + 2 >= len) { return 0; } // minimal bound checking
11821 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11822 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11825 (data[pos + 1] & 0b00111111) << 6 |
11826 (data[pos + 2] & 0b00111111);
11832 pos += 3;
11835 if(pos + 3 >= len) { return 0; } // minimal bound checking
11836 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11837 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11838 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11842 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11843 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11846 pos += 4;
11856 size_t pos = 0;
11858 while (pos < len) {
11860 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
11862 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11864 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11867 size_t final_pos = pos + 16;
11868 while(pos < final_pos) {
11869 *utf32_output++ = char32_t(buf[pos]);
11870 pos++;
11875 uint8_t leading_byte = data[pos]; // leading byte
11879 pos++;
11882 if(pos + 1 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11883 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11885 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11886 if (code_point < 0x80 || 0x7ff < code_point) { return result(error_code::OVERLONG, pos); }
11888 pos += 2;
11891 if(pos + 2 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11893 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11894 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11897 (data[pos + 1] & 0b00111111) << 6 |
11898 (data[pos + 2] & 0b00111111);
11899 if (code_point < 0x800 || 0xffff < code_point) { return result(error_code::OVERLONG, pos); }
11900 if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
11902 pos += 3;
11905 if(pos + 3 >= len) { return result(error_code::TOO_SHORT, pos); } // minimal bound checking
11906 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos);}
11907 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11908 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11912 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11913 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11914 if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
11915 if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
11917 pos += 4;
11920 if ((leading_byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
11921 else { return result(error_code::HEADER_BITS, pos); }
11996 size_t pos = 0;
11998 while (pos < len) {
12000 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12002 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12004 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12007 size_t final_pos = pos + 16;
12008 while(pos < final_pos) {
12009 *utf8_output++ = char(buf[pos]);
12010 pos++;
12016 unsigned char byte = data[pos];
12020 pos++;
12025 pos++;
12050 size_t pos = 0;
12053 while (pos < len) {
12054 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12056 pos++;
12065 size_t pos = 0;
12068 while (pos < len) {
12069 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12071 pos++;
12123 size_t pos = 0;
12126 while (pos < len) {
12128 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12130 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12132 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12135 size_t final_pos = pos + 16;
12136 while(pos < final_pos) {
12137 *latin_output++ = char(buf[pos]);
12138 pos++;
12145 uint8_t leading_byte = data[pos]; // leading byte
12149 pos++;
12152 if(pos + 1 >= len) {
12155 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12157 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12162 pos += 2;
12172 size_t pos = 0;
12175 while (pos < len) {
12177 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12179 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12181 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12184 size_t final_pos = pos + 16;
12185 while(pos < final_pos) {
12186 *latin_output++ = char(buf[pos]);
12187 pos++;
12193 uint8_t leading_byte = data[pos]; // leading byte
12197 pos++;
12200 if(pos + 1 >= len) {
12201 return result(error_code::TOO_SHORT, pos); } // minimal bound checking
12202 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
12203 return result(error_code::TOO_SHORT, pos); } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12205 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12207 return result(error_code::OVERLONG, pos);
12210 return result(error_code::TOO_LARGE, pos);
12213 pos += 2;
12216 return result(error_code::TOO_LARGE, pos);
12219 return result(error_code::TOO_LARGE, pos);
12223 return result(error_code::TOO_LONG, pos);
12226 return result(error_code::HEADER_BITS, pos);
12294 size_t pos = 0;
12300 while (pos < len) {
12301 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12304 pos++;
12317 size_t pos = 0;
12321 while (pos < len) {
12322 if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that they are Latin1
12324 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12325 ::memcpy(&v2, data + pos + 4, sizeof(uint64_t));
12326 ::memcpy(&v3, data + pos + 8, sizeof(uint64_t));
12327 ::memcpy(&v4, data + pos + 12, sizeof(uint64_t));
12335 size_t final_pos = pos + 16;
12336 while(pos < final_pos) {
12337 *latin_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(data[pos])) : char(data[pos]);
12338 pos++;
12343 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12346 pos++;
12347 } else { return result(error_code::TOO_LARGE, pos); }
12373 size_t pos = 0;
12376 while (pos < len) {
12377 utf32_char = (uint32_t)data[pos];
12380 pos++;
12389 size_t pos = 0;
12390 while (pos < len) {
12391 if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
12393 ::memcpy(&v, data + pos, sizeof(uint64_t));
12395 *latin1_output++ = char(buf[pos]);
12396 *latin1_output++ = char(buf[pos+1]);
12397 pos += 2;
12401 uint32_t utf32_char = data[pos];
12404 pos++;
12405 } else { return result(error_code::TOO_LARGE, pos); };
12430 size_t pos = 0;
12433 while (pos < len) {
12435 if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
12437 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12439 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12442 size_t final_pos = pos + 16;
12443 while(pos < final_pos) {
12444 *latin_output++ = char(buf[pos]);
12445 pos++;
12452 uint8_t leading_byte = data[pos]; // leading byte
12456 pos++;
12459 if(pos + 1 >= len) { break; } // minimal bound checking
12460 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12462 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12464 pos += 2;
12492 size_t pos = 0;
12496 while (pos < len) {
12497 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12499 pos++;
12525 size_t pos = 0;
12527 while (pos < len) {
12528 utf32_char = (uint32_t)data[pos];
12530 if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
12532 ::memcpy(&v, data + pos, sizeof(uint64_t));
12534 *latin1_output++ = char(buf[pos]);
12535 *latin1_output++ = char(buf[pos+1]);
12536 pos += 2;
12541 pos++;
15540 size_t pos = 0;
15543 while(pos + 64 + safety_margin <= size) {
15546 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15550 pos += 64;
15563 size_t max_starting_point = (pos + 64) - 12;
15566 while(pos < max_starting_point) {
15580 size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
15582 pos += consumed;
15591 utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
15729 size_t pos = 0;
15744 while(pos + 64 + safety_margin <= size) {
15745 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15749 pos += 64;
15770 size_t max_starting_point = (pos + 64) - 12;
15772 while(pos < max_starting_point) {
15782 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
15784 pos += consumed;
15794 if(pos < size) {
15795 size_t howmany = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
15804 size_t pos = 0;
15819 while(pos + 64 + safety_margin <= size) {
15820 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
15824 pos += 64;
15840 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15841 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15842 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15843 res.count += pos;
15852 size_t max_starting_point = (pos + 64) - 12;
15854 while(pos < max_starting_point) {
15864 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
15866 pos += consumed;
15876 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15877 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15878 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15879 res.count += pos;
15882 if(pos < size) {
15883 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
15884 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
15885 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
15887 res.count += pos;
15919 size_t pos = 0;
15922 while(pos + 64 + safety_margin <= size) {
15923 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
15927 pos += 64;
15933 size_t max_starting_point = (pos + 64) - 12;
15934 while(pos < max_starting_point) {
15935 size_t consumed = convert_masked_utf8_to_utf32(input + pos,
15937 pos += consumed;
15942 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
16081 size_t pos = 0;
16096 while(pos + 64 + safety_margin <= size) {
16097 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16101 pos += 64;
16122 size_t max_starting_point = (pos + 64) - 12;
16124 while(pos < max_starting_point) {
16134 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
16136 pos += consumed;
16146 if(pos < size) {
16147 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
16155 size_t pos = 0;
16170 while(pos + 64 + safety_margin <= size) {
16171 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16175 pos += 64;
16191 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16192 res.count += pos;
16201 size_t max_starting_point = (pos + 64) - 12;
16203 while(pos < max_starting_point) {
16213 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
16215 pos += consumed;
16225 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16226 res.count += pos;
16229 if(pos < size) {
16230 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16232 res.count += pos;
16262 size_t pos = 0;
16264 for(;pos + 64 <= size; pos += 64) {
16265 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16269 return count + scalar::utf8::count_code_points(in + pos, size - pos);
16273 size_t pos = 0;
16276 for(;pos + 64 <= size; pos += 64) {
16277 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16285 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
16300 size_t pos = 0;
16302 for(;pos < size/32*32; pos += 32) {
16303 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16308 return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
16313 size_t pos = 0;
16316 for(;pos < size/32*32; pos += 32) {
16317 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16329 return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
16338 size_t pos = 0;
16340 while (pos < size/32*32) {
16341 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
16344 pos += 32;
16348 scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
16479 size_t pos = 0;
16494 while(pos + 64 + safety_margin <= size) {
16495 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16499 pos += 64;
16520 size_t max_starting_point = (pos + 64) - 12;
16522 while(pos < max_starting_point) {
16532 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16534 pos += consumed;
16544 if(pos < size) {
16545 size_t howmany = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
16553 size_t pos = 0;
16568 while(pos + 64 + safety_margin <= size) {
16569 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16573 pos += 64;
16589 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16590 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16591 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16592 res.count += pos;
16601 size_t max_starting_point = (pos + 64) - 12;
16603 while(pos < max_starting_point) {
16613 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16615 pos += consumed;
16625 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16626 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16627 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16628 res.count += pos;
16631 if(pos < size) {
16632 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
16633 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
16634 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
16636 res.count += pos;
16666 size_t pos = 0;
16681 while(pos + 64 + safety_margin <= size) {
16682 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
16686 pos += 64;
16695 size_t max_starting_point = (pos + 64) - 12;
16697 while(pos < max_starting_point) {
16707 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
16709 pos += consumed;
16718 if(pos < size) {
16719 size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
17376 size_t pos = 0;
17378 for(;pos + 4 <= length; pos += 4) {
17379 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17398 return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
17404 size_t pos = 0;
17406 for(;pos + 4 <= length; pos += 4) {
17407 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(input + pos));
17414 return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
19226 size_t pos = 0;
19232 while (pos + 64 <= len) {
19233 size_t written = process_block_from_utf8_to_latin1<false>(buf + pos, 64, latin_output, minus64,
19239 pos += 64;
19242 if (pos < len) {
19243 size_t remaining = len - pos;
19245 process_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64, one,
19302 size_t pos = 0;
19308 while (pos + 64 <= len) {
19310 buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6);
19312 pos += 64;
19315 if (pos < len) {
19316 size_t remaining = len - pos;
19318 process_valid_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64,
20569 size_t pos = 0;
20571 for (; pos + 128 <= len; pos += 64) {
20572 __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20576 if (pos + 64 <= len) {
20577 __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20579 pos += 64;
20582 if (pos < len) {
20583 __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos));
20584 __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos));
20585 utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1);
21617 size_t pos = 0;
21628 while (pos + 32 <= length) {
21629 __m512i utf16 = _mm512_loadu_si512((const __m512i*)(input + pos));
21631 _mm512_storeu_si512(output + pos, utf16);
21632 pos += 32;
21634 if(pos < length) {
21635 __mmask32 m((1<< (length - pos))-1);
21636 __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i*)(input + pos));
21638 _mm512_mask_storeu_epi16(output + pos, m, utf16);
21910 size_t pos = 0;
21913 for(;pos + 64 <= length; pos += 64) {
21914 __m512i utf8 = _mm512_loadu_si512((const __m512i*)(input+pos));
21922 return count + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos);
24951 size_t pos = 0;
24954 while(pos + 64 + safety_margin <= size) {
24957 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
24961 pos += 64;
24974 size_t max_starting_point = (pos + 64) - 12;
24977 while(pos < max_starting_point) {
24991 size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
24993 pos += consumed;
25002 utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
25140 size_t pos = 0;
25155 while(pos + 64 + safety_margin <= size) {
25156 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25160 pos += 64;
25181 size_t max_starting_point = (pos + 64) - 12;
25183 while(pos < max_starting_point) {
25193 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
25195 pos += consumed;
25205 if(pos < size) {
25206 size_t howmany = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
25215 size_t pos = 0;
25230 while(pos + 64 + safety_margin <= size) {
25231 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25235 pos += 64;
25251 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25252 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25253 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25254 res.count += pos;
25263 size_t max_starting_point = (pos + 64) - 12;
25265 while(pos < max_starting_point) {
25275 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
25277 pos += consumed;
25287 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25288 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25289 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25290 res.count += pos;
25293 if(pos < size) {
25294 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
25295 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
25296 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
25298 res.count += pos;
25330 size_t pos = 0;
25333 while(pos + 64 + safety_margin <= size) {
25334 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
25338 pos += 64;
25344 size_t max_starting_point = (pos + 64) - 12;
25345 while(pos < max_starting_point) {
25346 size_t consumed = convert_masked_utf8_to_utf32(input + pos,
25348 pos += consumed;
25353 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
25492 size_t pos = 0;
25507 while(pos + 64 + safety_margin <= size) {
25508 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25512 pos += 64;
25533 size_t max_starting_point = (pos + 64) - 12;
25535 while(pos < max_starting_point) {
25545 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
25547 pos += consumed;
25557 if(pos < size) {
25558 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
25566 size_t pos = 0;
25581 while(pos + 64 + safety_margin <= size) {
25582 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25586 pos += 64;
25602 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25603 res.count += pos;
25612 size_t max_starting_point = (pos + 64) - 12;
25614 while(pos < max_starting_point) {
25624 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
25626 pos += consumed;
25636 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25637 res.count += pos;
25640 if(pos < size) {
25641 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25643 res.count += pos;
25673 size_t pos = 0;
25675 for(;pos + 64 <= size; pos += 64) {
25676 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25680 return count + scalar::utf8::count_code_points(in + pos, size - pos);
25684 size_t pos = 0;
25687 for(;pos + 64 <= size; pos += 64) {
25688 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25696 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
25711 size_t pos = 0;
25713 for(;pos < size/32*32; pos += 32) {
25714 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25719 return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
25724 size_t pos = 0;
25727 for(;pos < size/32*32; pos += 32) {
25728 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25740 return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
25749 size_t pos = 0;
25751 while (pos < size/32*32) {
25752 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
25755 pos += 32;
25759 scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
25892 size_t pos = 0;
25907 while(pos + 64 + safety_margin <= size) {
25908 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25912 pos += 64;
25933 size_t max_starting_point = (pos + 64) - 12;
25935 while(pos < max_starting_point) {
25945 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
25947 pos += consumed;
25957 if(pos < size) {
25958 size_t howmany = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
25966 size_t pos = 0;
25981 while(pos + 64 + safety_margin <= size) {
25982 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
25986 pos += 64;
26002 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26003 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26004 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26005 res.count += pos;
26014 size_t max_starting_point = (pos + 64) - 12;
26016 while(pos < max_starting_point) {
26026 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
26028 pos += consumed;
26038 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26039 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26040 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26041 res.count += pos;
26044 if(pos < size) {
26045 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
26046 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
26047 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
26049 res.count += pos;
26079 size_t pos = 0;
26094 while(pos + 64 + safety_margin <= size) {
26095 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
26099 pos += 64;
26108 size_t max_starting_point = (pos + 64) - 12;
26110 while(pos < max_starting_point) {
26120 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
26122 pos += consumed;
26131 if(pos < size) {
26132 size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
26799 size_t pos = 0;
26801 for(;pos + 8 <= length; pos += 8) {
26802 __m256i in = _mm256_loadu_si256((__m256i*)(input + pos));
26817 return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
26823 size_t pos = 0;
26825 for(;pos + 8 <= length; pos += 8) {
26826 __m256i in = _mm256_loadu_si256((__m256i*)(input + pos));
26832 return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
27323 size_t pos = 0;
27326 while(pos + 64 + safety_margin <= size) {
27329 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27333 pos += 64;
27346 size_t max_starting_point = (pos + 64) - 12;
27349 while(pos < max_starting_point) {
27363 size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
27365 pos += consumed;
27374 utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
27512 size_t pos = 0;
27527 while(pos + 64 + safety_margin <= size) {
27528 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27532 pos += 64;
27553 size_t max_starting_point = (pos + 64) - 12;
27555 while(pos < max_starting_point) {
27565 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
27567 pos += consumed;
27577 if(pos < size) {
27578 size_t howmany = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
27587 size_t pos = 0;
27602 while(pos + 64 + safety_margin <= size) {
27603 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27607 pos += 64;
27623 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27624 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27625 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27626 res.count += pos;
27635 size_t max_starting_point = (pos + 64) - 12;
27637 while(pos < max_starting_point) {
27647 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
27649 pos += consumed;
27659 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27660 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27661 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27662 res.count += pos;
27665 if(pos < size) {
27666 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
27667 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
27668 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
27670 res.count += pos;
27702 size_t pos = 0;
27705 while(pos + 64 + safety_margin <= size) {
27706 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
27710 pos += 64;
27716 size_t max_starting_point = (pos + 64) - 12;
27717 while(pos < max_starting_point) {
27718 size_t consumed = convert_masked_utf8_to_utf32(input + pos,
27720 pos += consumed;
27725 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
27864 size_t pos = 0;
27879 while(pos + 64 + safety_margin <= size) {
27880 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27884 pos += 64;
27905 size_t max_starting_point = (pos + 64) - 12;
27907 while(pos < max_starting_point) {
27917 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
27919 pos += consumed;
27929 if(pos < size) {
27930 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
27938 size_t pos = 0;
27953 while(pos + 64 + safety_margin <= size) {
27954 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
27958 pos += 64;
27974 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
27975 res.count += pos;
27984 size_t max_starting_point = (pos + 64) - 12;
27986 while(pos < max_starting_point) {
27996 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
27998 pos += consumed;
28008 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28009 res.count += pos;
28012 if(pos < size) {
28013 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28015 res.count += pos;
28045 size_t pos = 0;
28047 for(;pos + 64 <= size; pos += 64) {
28048 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28052 return count + scalar::utf8::count_code_points(in + pos, size - pos);
28056 size_t pos = 0;
28059 for(;pos + 64 <= size; pos += 64) {
28060 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
28068 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
28083 size_t pos = 0;
28085 for(;pos < size/32*32; pos += 32) {
28086 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28091 return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
28096 size_t pos = 0;
28099 for(;pos < size/32*32; pos += 32) {
28100 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28112 return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
28121 size_t pos = 0;
28123 while (pos < size/32*32) {
28124 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
28127 pos += 32;
28131 scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
31330 size_t pos = 0;
31333 while(pos + 64 + safety_margin <= size) {
31336 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31340 pos += 64;
31353 size_t max_starting_point = (pos + 64) - 12;
31356 while(pos < max_starting_point) {
31370 size_t consumed = convert_masked_utf8_to_utf16<endian>(input + pos,
31372 pos += consumed;
31381 utf16_output += scalar::utf8_to_utf16::convert_valid<endian>(input + pos, size - pos, utf16_output);
31519 size_t pos = 0;
31534 while(pos + 64 + safety_margin <= size) {
31535 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31539 pos += 64;
31560 size_t max_starting_point = (pos + 64) - 12;
31562 while(pos < max_starting_point) {
31572 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
31574 pos += consumed;
31584 if(pos < size) {
31585 size_t howmany = scalar::utf8_to_utf16::convert<endian>(in + pos, size - pos, utf16_output);
31594 size_t pos = 0;
31609 while(pos + 64 + safety_margin <= size) {
31610 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31614 pos += 64;
31630 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31631 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31632 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31633 res.count += pos;
31642 size_t max_starting_point = (pos + 64) - 12;
31644 while(pos < max_starting_point) {
31654 size_t consumed = convert_masked_utf8_to_utf16<endian>(in + pos,
31656 pos += consumed;
31666 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31667 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31668 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31669 res.count += pos;
31672 if(pos < size) {
31673 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
31674 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
31675 result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors<endian>(pos, in + pos, size - pos, utf16_output);
31677 res.count += pos;
31709 size_t pos = 0;
31712 while(pos + 64 + safety_margin <= size) {
31713 simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
31717 pos += 64;
31723 size_t max_starting_point = (pos + 64) - 12;
31724 while(pos < max_starting_point) {
31725 size_t consumed = convert_masked_utf8_to_utf32(input + pos,
31727 pos += consumed;
31732 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
31871 size_t pos = 0;
31886 while(pos + 64 + safety_margin <= size) {
31887 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31891 pos += 64;
31912 size_t max_starting_point = (pos + 64) - 12;
31914 while(pos < max_starting_point) {
31924 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
31926 pos += consumed;
31936 if(pos < size) {
31937 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
31945 size_t pos = 0;
31960 while(pos + 64 + safety_margin <= size) {
31961 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
31965 pos += 64;
31981 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
31982 res.count += pos;
31991 size_t max_starting_point = (pos + 64) - 12;
31993 while(pos < max_starting_point) {
32003 size_t consumed = convert_masked_utf8_to_utf32(in + pos,
32005 pos += consumed;
32015 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32016 res.count += pos;
32019 if(pos < size) {
32020 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32022 res.count += pos;
32052 size_t pos = 0;
32054 for(;pos + 64 <= size; pos += 64) {
32055 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32059 return count + scalar::utf8::count_code_points(in + pos, size - pos);
32063 size_t pos = 0;
32066 for(;pos + 64 <= size; pos += 64) {
32067 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32075 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
32090 size_t pos = 0;
32092 for(;pos < size/32*32; pos += 32) {
32093 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32098 return count + scalar::utf16::count_code_points<big_endian>(in + pos, size - pos);
32103 size_t pos = 0;
32106 for(;pos < size/32*32; pos += 32) {
32107 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32119 return count + scalar::utf16::utf8_length_from_utf16<big_endian>(in + pos, size - pos);
32128 size_t pos = 0;
32130 while (pos < size/32*32) {
32131 simd16x32<uint16_t> input(reinterpret_cast<const uint16_t *>(in + pos));
32134 pos += 32;
32138 scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
32269 size_t pos = 0;
32284 while(pos + 64 + safety_margin <= size) {
32285 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32289 pos += 64;
32310 size_t max_starting_point = (pos + 64) - 12;
32312 while(pos < max_starting_point) {
32322 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32324 pos += consumed;
32334 if(pos < size) {
32335 size_t howmany = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
32343 size_t pos = 0;
32358 while(pos + 64 + safety_margin <= size) {
32359 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32363 pos += 64;
32379 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32380 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32381 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32382 res.count += pos;
32391 size_t max_starting_point = (pos + 64) - 12;
32393 while(pos < max_starting_point) {
32403 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32405 pos += consumed;
32415 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32416 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32417 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32418 res.count += pos;
32421 if(pos < size) {
32422 // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
32423 // with the ability to go back up to pos bytes, and read size-pos bytes forward.
32424 result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
32426 res.count += pos;
32456 size_t pos = 0;
32471 while(pos + 64 + safety_margin <= size) {
32472 simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
32476 pos += 64;
32485 size_t max_starting_point = (pos + 64) - 12;
32487 while(pos < max_starting_point) {
32497 size_t consumed = convert_masked_utf8_to_latin1(in + pos,
32499 pos += consumed;
32508 if(pos < size) {
32509 size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, latin1_output);
33192 size_t pos = 0;
33194 for(;pos + 4 <= length; pos += 4) {
33195 __m128i in = _mm_loadu_si128((__m128i*)(input + pos));
33210 return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
33216 size_t pos = 0;
33218 for(;pos + 4 <= length; pos += 4) {
33219 __m128i in = _mm_loadu_si128((__m128i*)(input + pos));
33225 return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);