Lines Matching refs:surrogates
4406 counter += static_cast<size_t>(word > 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes
12715 // Check for surrogates
12719 // Can still be either UTF-16LE or UTF-32 depending on the positions of the surrogates
12796 // Must start checking for surrogates
12906 // 2. We have some surrogates that have to be distinguished:
12907 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
12908 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
12916 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
12920 // L - word mask for low surrogates
12977 // 2. We have some surrogates that have to be distinguished:
12978 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
12979 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
12987 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
12991 // L - word mask for low surrogates
13387 // Because the surrogates need more work, the high surrogate is computed first.
13404 // After this is for surrogates
13405 // Blend the low and high surrogates
13424 uint32x4_t surrogates = vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic));
13431 uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed);
13594 // surrogates no longer being involved.
13765 1. an input register contains no surrogates and each value
13767 2. an input register contains no surrogates and values are
13769 3. an input register contains surrogates --- i.e. codepoints
14317 1. an input register contains no surrogates and each value
14319 2. an input register contains no surrogates and values are
14321 3. an input register contains surrogates --- i.e. codepoints
17927 // surrogates)
17989 // surrogates)
19526 // handle surrogates
19548 // check for mismatched surrogates
19659 // H - bitmask for high surrogates
19661 // H - bitmask for low surrogates
19682 /* 2. Shift by one 16-bit word to align low surrogates with high surrogates
19689 /* 3. Align all high surrogates in first and second by shifting to the left by 10 bits
19729 // no surrogates
20681 __mmask32 surrogates =
20683 if (surrogates) {
20687 // of the surrogates To be valid UTF-32, a surrogate cannot be in the
20694 if ((surrogates & 0xaaaaaaaa) != 0) {
20698 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20866 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20867 if(surrogates) {
20869 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20887 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20888 if(surrogates) {
20890 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20915 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20916 if(surrogates) {
20918 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20936 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20937 if(surrogates) {
20939 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20955 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20956 if(surrogates) {
20958 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
20978 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
20979 if(surrogates) {
20981 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
21009 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
21010 if(surrogates) {
21012 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
21032 __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
21033 if(surrogates) {
21035 __mmask32 lowsurrogates = surrogates ^ highsurrogates;
22072 // Check for surrogates
22076 // Can still be either UTF-16LE or UTF-32 depending on the positions of the surrogates
22153 // Must start checking for surrogates
22243 - there must not be two consecutive low surrogates (0xd800 .. 0xdbff)
22244 - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff)
22304 // 2. We have some surrogates that have to be distinguished:
22305 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
22306 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
22314 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
22318 // L - word mask for low surrogates
22382 // 2. We have some surrogates that have to be distinguished:
22383 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
22384 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
22392 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
22396 // L - word mask for low surrogates
22785 __m128i surrogates =
22791 surrogates = _mm_shuffle_epi8(surrogates, swap);
22795 _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates);
23033 1. an input register contains no surrogates and each value
23035 2. an input register contains no surrogates and values are
23037 3. an input register contains surrogates --- i.e. codepoints
23167 // bitmask = 0x0000 if there are no surrogates
23410 // bitmask = 0x0000 if there are no surrogates
23565 1. an input register contains no surrogates and each value
23567 2. an input register contains no surrogates and values are
23569 3. an input register contains surrogates --- i.e. codepoints
23633 // bitmask = 0x0000 if there are no surrogates
23701 // bitmask = 0x0000 if there are no surrogates
28538 // Check for surrogates
28542 // Can still be either UTF-16LE or UTF-32 depending on the positions of the surrogates
28624 // Must start checking for surrogates
28720 - there must not be two consecutive low surrogates (0xd800 .. 0xdbff)
28721 - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff)
28780 // 2. We have some surrogates that have to be distinguished:
28781 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
28782 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
28790 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
28794 // L - word mask for low surrogates
28858 // 2. We have some surrogates that have to be distinguished:
28859 // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
28860 // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
28868 // H - word-mask for high surrogates: the six highest bits are 0b1101'11
28872 // L - word mask for low surrogates
29262 __m128i surrogates =
29268 surrogates = _mm_shuffle_epi8(surrogates, swap);
29272 _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates);
29544 1. an input register contains no surrogates and each value
29546 2. an input register contains no surrogates and values are
29548 3. an input register contains surrogates --- i.e. codepoints
29658 // bitmask = 0x0000 if there are no surrogates
29862 // bitmask = 0x0000 if there are no surrogates
29999 1. an input register contains no surrogates and each value
30001 2. an input register contains no surrogates and values are
30003 3. an input register contains surrogates --- i.e. codepoints
30067 // bitmask = 0x0000 if there are no surrogates
30135 // bitmask = 0x0000 if there are no surrogates