Lines Matching defs:utf8

4116 /* begin file src/scalar/utf8.h */
4123 namespace utf8 {
4324 } // utf8 namespace
4330 /* end file src/scalar/utf8.h */
5612 return scalar::utf8::trim_partial_utf8(input, length);
12599 // common functions for utf8 conversions
13201 // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the
13461 // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
13637 // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the
15443 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
15458 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
16252 /* begin file src/generic/utf8.h */
16257 namespace utf8 {
16269 return count + scalar::utf8::count_code_points(in + pos, size - pos);
16285 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
16287 } // utf8 namespace
16291 /* end file src/generic/utf8.h */
17304 return utf8::count_code_points(input, length);
17368 return utf8::utf16_length_from_utf8(input, length);
17418 return utf8::count_code_points(input, length);
17463 return scalar::utf8::validate(buf, len);
17467 return scalar::utf8::validate_with_errors(buf, len);
17700 return scalar::utf8::count_code_points(input, length);
17704 return scalar::utf8::count_code_points(buf,len);
17740 return scalar::utf8::utf16_length_from_utf8(input, length);
17756 return scalar::utf8::count_code_points(input, length);
18295 expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`)
18302 simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, __m512i utf8) {
18305 - utf8: bytes stored at separate 32-bit code units
18331 values = _mm512_and_si512(utf8, v_3f3f_3f7f);
18567 #define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \
18569 const __m128i t0 = _mm512_castsi512_si128(utf8); \
18570 const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \
18571 const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \
18572 const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \
18578 const __m256i h0 = _mm512_castsi512_si256(utf8); \
18579 const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \
18637 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18639 const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80);
18641 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
18647 const __m512i lane0 = broadcast_epi128<0>(utf8);
18648 const __m512i lane1 = broadcast_epi128<1>(utf8);
18651 const __m512i lane2 = broadcast_epi128<2>(utf8);
18665 const __m512i lane3 = broadcast_epi128<3>(utf8);
18688 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18690 const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80);
18692 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
18696 const __m512i lane0 = broadcast_epi128<0>(utf8);
18697 const __m512i lane1 = broadcast_epi128<1>(utf8);
18700 const __m512i lane2 = broadcast_epi128<2>(utf8);
18715 const __m512i lane3 = broadcast_epi128<3>(utf8);
18936 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18937 if(checker.check_next_input(utf8)) {
18938 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
18943 const __m512i lane0 = broadcast_epi128<0>(utf8);
18944 const __m512i lane1 = broadcast_epi128<1>(utf8);
18947 const __m512i lane2 = broadcast_epi128<2>(utf8);
18961 const __m512i lane3 = broadcast_epi128<3>(utf8);
18987 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
18988 if(checker.check_next_input(utf8)) {
18989 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
18993 const __m512i lane0 = broadcast_epi128<0>(utf8);
18994 const __m512i lane1 = broadcast_epi128<1>(utf8);
18997 const __m512i lane2 = broadcast_epi128<2>(utf8);
19012 const __m512i lane3 = broadcast_epi128<3>(utf8);
19020 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
19021 checker.check_next_input(utf8);
19059 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
19060 if(checker.check_next_input(utf8)) {
19061 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
19069 const __m512i lane0 = broadcast_epi128<0>(utf8);
19070 const __m512i lane1 = broadcast_epi128<1>(utf8);
19073 const __m512i lane2 = broadcast_epi128<2>(utf8);
19087 const __m512i lane3 = broadcast_epi128<3>(utf8);
19113 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
19114 if(checker.check_next_input(utf8)) {
19115 SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output)
19121 const __m512i lane0 = broadcast_epi128<0>(utf8);
19122 const __m512i lane1 = broadcast_epi128<1>(utf8);
19125 const __m512i lane2 = broadcast_epi128<2>(utf8);
19140 const __m512i lane3 = broadcast_epi128<3>(utf8);
19148 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
19149 checker.check_next_input(utf8);
20442 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)buf);
20443 running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, 0xf8); // running_or | (utf8 & ascii)
20446 const __m512i utf8 = _mm512_maskz_loadu_epi8((uint64_t(1) << (end-buf)) - 1,(const __m512i*)buf);
20447 running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, 0xf8); // running_or | (utf8 & ascii)
20752 const __m512i utf8 = _mm512_maskz_loadu_epi8(
20754 checker.check_next_input(utf8);
20794 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
20795 checker.check_next_input(utf8);
20798 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - ptr))-1, (const __m512i*)ptr);
20799 checker.check_next_input(utf8);
20811 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
20812 checker.check_next_input(utf8);
20815 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
20822 const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - ptr))-1, (const __m512i*)ptr);
20823 checker.check_next_input(utf8);
20826 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
21747 return answer + scalar::utf8::count_code_points(reinterpret_cast<const char *>(str + i), length - i);
21914 __m512i utf8 = _mm512_loadu_si512((const __m512i*)(input+pos));
21915 uint64_t utf8_continuation_mask = _mm512_cmple_epi8_mask(utf8, _mm512_set1_epi8(-65+1));
21919 uint64_t utf8_4byte = _mm512_cmpge_epu8_mask(utf8, _mm512_set1_epi8(int8_t(240)));
21922 return count + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos);
22634 // Convert up to 12 bytes from utf8 to utf16 using a mask indicating the
22816 // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
24454 // Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
24854 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
24869 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
25663 /* begin file src/generic/utf8.h */
25668 namespace utf8 {
25680 return count + scalar::utf8::count_code_points(in + pos, size - pos);
25696 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
25698 } // utf8 namespace
25702 /* end file src/generic/utf8.h */
26708 return utf8::count_code_points(input, length);
26745 return utf8::utf16_length_from_utf8(input, length);
26836 return utf8::count_code_points(input, length);
27226 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
27241 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
28035 /* begin file src/generic/utf8.h */
28040 namespace utf8 {
28052 return count + scalar::utf8::count_code_points(in + pos, size - pos);
28068 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
28070 } // utf8 namespace
28074 /* end file src/generic/utf8.h */
28335 return utf8::count_code_points(input, length);
28355 return scalar::utf8::utf16_length_from_utf8(input, length);
28367 return scalar::utf8::count_code_points(input, length);
28424 * first 11 bits are encoded into utf8
29002 // each latin1 takes 1-2 utf8 bytes
29109 // Convert up to 12 bytes from utf8 to utf16 using a mask indicating the
29293 // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
29423 // Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
30349 const __m128i t1 = _mm_and_si128(t0, v_1f00); // potentital first utf8 byte
30351 const __m128i t2 = _mm_and_si128(in_16, v_003f);// potential second utf8 byte
30353 const __m128i t3 = _mm_or_si128(t1, t2); // first and second potential utf8 byte together
30355 const __m128i t4 = _mm_or_si128(t3, v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit
31233 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input + count), length - count);
31248 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(input), reinterpret_cast<const char*>(input) + count, length - count);
32042 /* begin file src/generic/utf8.h */
32047 namespace utf8 {
32059 return count + scalar::utf8::count_code_points(in + pos, size - pos);
32075 return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
32077 } // utf8 namespace
32081 /* end file src/generic/utf8.h */
33097 return utf8::count_code_points(input, length);
33184 return utf8::utf16_length_from_utf8(input, length);
33229 return utf8::count_code_points(input, length);