Lines Matching defs:data
4128 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4136 std::memcpy(&v1, data + pos, sizeof(uint64_t));
4138 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4145 unsigned char byte = data[pos];
4149 byte = data[pos];
4155 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4157 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4162 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4163 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4166 (data[pos + 1] & 0b00111111) << 6 |
4167 (data[pos + 2] & 0b00111111);
4175 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
4176 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
4177 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
4180 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4181 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4194 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4202 std::memcpy(&v1, data + pos, sizeof(uint64_t));
4204 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
4211 unsigned char byte = data[pos];
4215 byte = data[pos];
4221 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4223 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
4228 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4229 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4232 (data[pos + 1] & 0b00111111) << 6 |
4233 (data[pos + 2] & 0b00111111);
4239 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4240 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4241 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
4244 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
4245 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
4346 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4349 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4354 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4367 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4370 uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
4375 uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
4471 // data without a BOM, it could pass as UTF-8.
10595 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10600 std::memcpy(&v1, data + pos, sizeof(uint64_t));
10602 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10608 if (data[pos] >= 0b10000000) { return false; }
10615 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10620 std::memcpy(&v1, data + pos, sizeof(uint64_t));
10622 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
10626 if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10632 if (data[pos] >= 0b10000000) { return result(error_code::TOO_LARGE, pos); }
10654 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10657 uint32_t word = data[pos];
10666 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10669 uint32_t word = data[pos];
10766 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10773 ::memcpy(&v, data + pos, sizeof(uint64_t));
10781 uint32_t word = data[pos];
10830 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10837 ::memcpy(&v, data + pos, sizeof(uint64_t));
10845 uint32_t word = data[pos];
10879 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10886 ::memcpy(&v, data + pos, sizeof(uint64_t));
10894 uint32_t word = data[pos];
10946 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10950 uint32_t word = data[pos];
10990 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10994 uint32_t word = data[pos];
11019 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
11023 uint32_t word = data[pos];
11065 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11072 ::memcpy(&v, data + pos, sizeof(uint64_t));
11084 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11106 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11139 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11146 ::memcpy(&v, data + pos, sizeof(uint64_t));
11157 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11180 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11198 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11205 ::memcpy(&v, data + pos, sizeof(uint64_t));
11216 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11239 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11274 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11278 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11287 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11315 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11319 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11329 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11342 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11346 uint16_t word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
11356 uint16_t next_word = !match_system(big_endian) ? utf16::swap_bytes(data[pos + 1]) : data[pos + 1];
11386 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11393 ::memcpy(&v, data + pos, sizeof(uint64_t));
11403 uint8_t leading_byte = data[pos]; // leading byte
11412 uint16_t code_point = uint16_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11422 uint16_t code_point = uint16_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11431 uint32_t code_point = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11432 | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11470 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11477 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11479 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11491 uint8_t leading_byte = data[pos]; // leading byte
11500 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11502 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11514 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11515 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11518 (data[pos + 1] & 0b00111111) << 6 |
11519 (data[pos + 2] & 0b00111111);
11532 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11533 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11534 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11538 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11539 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11560 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11567 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11569 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11580 uint8_t leading_byte = data[pos]; // leading byte
11589 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11591 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11603 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11604 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11607 (data[pos + 1] & 0b00111111) << 6 |
11608 (data[pos + 2] & 0b00111111);
11619 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11620 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11621 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11625 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11626 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11718 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11725 ::memcpy(&v, data + pos, sizeof(uint64_t));
11735 uint8_t leading_byte = data[pos]; // leading byte
11743 *utf32_output++ = char32_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11748 *utf32_output++ = char32_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11753 uint32_t code_word = ((leading_byte & 0b00000111) << 18 )| ((data[pos + 1] &0b00111111) << 12)
11754 | ((data[pos + 2] &0b00111111) << 6) | (data[pos + 3] &0b00111111);
11783 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11790 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11792 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11803 uint8_t leading_byte = data[pos]; // leading byte
11811 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11813 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11821 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11822 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11825 (data[pos + 1] & 0b00111111) << 6 |
11826 (data[pos + 2] & 0b00111111);
11836 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; }
11837 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; }
11838 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; }
11842 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11843 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11855 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11862 ::memcpy(&v1, data + pos, sizeof(uint64_t));
11864 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
11875 uint8_t leading_byte = data[pos]; // leading byte
11883 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11885 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
11893 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11894 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11897 (data[pos + 1] & 0b00111111) << 6 |
11898 (data[pos + 2] & 0b00111111);
11906 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos);}
11907 if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11908 if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
11912 (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
11913 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
11995 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12002 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12004 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12016 unsigned char byte = data[pos];
12049 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12054 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12064 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12069 uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
12095 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12098 *utf32_output++ = (char32_t)data[i];
12122 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12130 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12132 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12145 uint8_t leading_byte = data[pos]; // leading byte
12155 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12157 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12171 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12179 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12181 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12193 uint8_t leading_byte = data[pos]; // leading byte
12202 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
12205 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12293 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12296 char* current_write = temp_output.data();
12301 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12309 std::memcpy(latin_output, temp_output.data(), len);
12311 return current_write - temp_output.data();
12316 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12324 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12325 ::memcpy(&v2, data + pos + 4, sizeof(uint64_t));
12326 ::memcpy(&v3, data + pos + 8, sizeof(uint64_t));
12327 ::memcpy(&v4, data + pos + 12, sizeof(uint64_t));
12337 *latin_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(data[pos])) : char(data[pos]);
12343 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12370 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12377 utf32_char = (uint32_t)data[pos];
12387 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12393 ::memcpy(&v, data + pos, sizeof(uint64_t));
12401 uint32_t utf32_char = data[pos];
12428 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12437 ::memcpy(&v1, data + pos, sizeof(uint64_t));
12439 ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
12452 uint8_t leading_byte = data[pos]; // leading byte
12460 if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
12462 uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
12491 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12497 word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
12522 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12528 utf32_char = (uint32_t)data[pos];
12532 ::memcpy(&v, data + pos, sizeof(uint64_t));
12889 // 0. Load data: since the validation takes into account only higher
12959 // 0. Load data: since the validation takes into account only higher
13791 We prepare data for all these three cases in two registers.
14343 We prepare data for all these three cases in two registers.
15733 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
15808 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
16085 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
16159 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
16483 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
16557 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
16670 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
17322 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
17326 const uint8_t *simd_end = data + (length / lanes) * lanes;
17328 for (; data < simd_end; data += lanes) {
17330 uint8x16_t input_vec = vld1q_u8(data);
18080 __m512i lead = _mm512_maskz_compress_epi8(leading, leading2byte); // will contain zero for ascii, and the data
22282 // 0. Load data: since the validation takes into account only higher
22360 // 0. Load data: since the validation takes into account only higher
22657 // We process the data in chunks of 16 bytes.
22837 // We process the data in chunks of 16 bytes.
23059 We prepare data for all these three cases in two registers.
23591 We prepare data for all these three cases in two registers.
24482 // We process the data in chunks of 32 bytes.
24491 // We process the data in chunks of 16 bytes.
25144 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
25219 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
25496 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
25570 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
25896 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
25970 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
26083 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
26754 const uint8_t *data = reinterpret_cast<const uint8_t *>(input);
26767 __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i));
26768 __m256i input2 = _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i)));
26769 __m256i input3 = _mm256_loadu_si256((const __m256i *)(data + i + 2*sizeof(__m256i)));
26770 __m256i input4 = _mm256_loadu_si256((const __m256i *)(data + i + 3*sizeof(__m256i)));
26780 __m256i input_256_chunk = _mm256_loadu_si256((const __m256i *)(data + i));
26791 return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast<const char *>(data + i), len - i);
27516 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
27591 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
27868 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
27942 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
28759 // 0. Load data: since the validation takes into account only higher
28836 // 0. Load data: since the validation takes into account only higher
29132 // We process the data in chunks of 16 bytes.
29314 // We process the data in chunks of 16 bytes.
29443 // We process the data in chunks of 16 bytes.
29570 We prepare data for all these three cases in two registers.
30025 We prepare data for all these three cases in two registers.
31523 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
31598 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
31875 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
31949 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
32273 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
32347 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
32460 // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate