Lines Matching refs:utf32_output
82 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
92 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
93 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1282 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1292 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1293 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1489 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1499 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1500 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2420 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2430 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2431 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3364 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3365 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4018 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4028 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4029 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4636 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4637 return set_best()->convert_utf8_to_utf32(buf, len, utf32_output);
4640 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4641 return set_best()->convert_utf8_to_utf32_with_errors(buf, len, utf32_output);
4644 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4645 return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output);
4744 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4745 return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output);
4748 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4749 return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output);
4752 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4753 return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, utf32_output);
4756 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4757 return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, utf32_output);
4760 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4761 return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output);
4764 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4765 return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output);
5299 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept {
5300 return get_active_implementation()->convert_utf8_to_utf32(input, length, utf32_output);
5302 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept {
5303 return get_active_implementation()->convert_utf8_to_utf32_with_errors(input, length, utf32_output);
11273 inline size_t convert_valid(const char16_t* buf, size_t len, char32_t* utf32_output) {
11276 char32_t* start{utf32_output};
11281 *utf32_output++ = char32_t(word);
11290 *utf32_output++ = char32_t(value);
11294 return utf32_output - start;
11314 inline size_t convert(const char16_t* buf, size_t len, char32_t* utf32_output) {
11317 char32_t* start{utf32_output};
11322 *utf32_output++ = char32_t(word);
11333 *utf32_output++ = char32_t(value);
11337 return utf32_output - start;
11341 inline result convert_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
11344 char32_t* start{utf32_output};
11349 *utf32_output++ = char32_t(word);
11360 *utf32_output++ = char32_t(value);
11364 return result(error_code::SUCCESS, utf32_output - start);
11717 inline size_t convert_valid(const char* buf, size_t len, char32_t* utf32_output) {
11720 char32_t* start{utf32_output};
11729 *utf32_output++ = char32_t(buf[pos]);
11738 *utf32_output++ = char32_t(leading_byte);
11743 *utf32_output++ = char32_t(((leading_byte &0b00011111) << 6) | (data[pos + 1] &0b00111111));
11748 *utf32_output++ = char32_t(((leading_byte &0b00001111) << 12) | ((data[pos + 1] &0b00111111) << 6) | (data[pos + 2] &0b00111111));
11755 *utf32_output++ = char32_t(code_word);
11762 return utf32_output - start;
11782 inline size_t convert(const char* buf, size_t len, char32_t* utf32_output) {
11785 char32_t* start{utf32_output};
11797 *utf32_output++ = char32_t(buf[pos]);
11806 *utf32_output++ = char32_t(leading_byte);
11815 *utf32_output++ = char32_t(code_point);
11831 *utf32_output++ = char32_t(code_point);
11845 *utf32_output++ = char32_t(code_point);
11851 return utf32_output - start;
11854 inline result convert_with_errors(const char* buf, size_t len, char32_t* utf32_output) {
11857 char32_t* start{utf32_output};
11869 *utf32_output++ = char32_t(buf[pos]);
11878 *utf32_output++ = char32_t(leading_byte);
11887 *utf32_output++ = char32_t(code_point);
11901 *utf32_output++ = char32_t(code_point);
11916 *utf32_output++ = char32_t(code_point);
11924 return result(error_code::SUCCESS, utf32_output - start);
11940 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char32_t* utf32_output) {
11970 result res = convert_with_errors(buf, len + extra_len, utf32_output);
12094 inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) {
12096 char32_t* start{utf32_output};
12098 *utf32_output++ = (char32_t)data[i];
12100 return utf32_output - start;
13176 std::pair<const char*, char32_t*> arm_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
13187 vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output), in16lowlow);
13188 vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output+4), in16lowhigh);
13189 vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output+8), in8highlow);
13190 vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output+12), in8highhigh);
13192 utf32_output += 16;
13196 return std::make_pair(buf, utf32_output);
13472 uint32_t*& utf32_output = reinterpret_cast<uint32_t*&>(utf32_out);
13488 utf32_output += 16; // We wrote 16 32-bit characters.
13497 vst2_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13498 utf32_output += 4; // We wrote 4 32-bit characters.
13509 vst2q_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13510 utf32_output += 6; // We wrote 6 32-bit characters.
13527 vst2q_u16(reinterpret_cast<uint16_t *>(utf32_output), interleaver);
13528 utf32_output += 6; // We wrote 6 32-bit characters.
13556 vst1q_u32(utf32_output, composed);
13557 utf32_output += 4; // We wrote 4 32-bit characters.
13588 vst1q_u32(utf32_output, composed);
13590 utf32_output += 3; // We wrote 3 32-bit characters.
13627 vst1q_u32(utf32_output, composed);
13628 utf32_output += 3; // We wrote 3 32-bit characters.
14366 uint32_t * utf32_output = reinterpret_cast<uint32_t*>(utf32_out);
14381 vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in)));
14382 vst1q_u32(utf32_output+4, vmovl_high_u16(in));
14383 utf32_output += 8;
14396 *utf32_output++ = char32_t(word);
14403 if((diff | diff2) > 0x3FF) { return std::make_pair(nullptr, reinterpret_cast<char32_t*>(utf32_output)); }
14405 *utf32_output++ = char32_t(value);
14411 return std::make_pair(buf, reinterpret_cast<char32_t*>(utf32_output));
14423 uint32_t * utf32_output = reinterpret_cast<uint32_t*>(utf32_out);
14439 vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in)));
14440 vst1q_u32(utf32_output+4, vmovl_high_u16(in));
14441 utf32_output += 8;
14454 *utf32_output++ = char32_t(word);
14461 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast<char32_t*>(utf32_output)); }
14463 *utf32_output++ = char32_t(value);
14469 return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char32_t*>(utf32_output));
15918 char32_t* utf32_output) noexcept {
15920 char32_t* start{utf32_output};
15925 in.store_ascii_as_utf32(utf32_output);
15926 utf32_output += 64;
15936 utf8_end_of_code_point_mask, utf32_output);
15942 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
15943 return utf32_output - start;
16080 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
16082 char32_t* start{utf32_output};
16099 input.store_ascii_as_utf32(utf32_output);
16100 utf32_output += 64;
16135 utf8_end_of_code_point_mask, utf32_output);
16147 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
16149 utf32_output += howmany;
16151 return utf32_output - start;
16154 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
16156 char32_t* start{utf32_output};
16173 input.store_ascii_as_utf32(utf32_output);
16174 utf32_output += 64;
16191 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16214 utf8_end_of_code_point_mask, utf32_output);
16225 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16230 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
16235 utf32_output += res.count;
16238 return result(error_code::SUCCESS, utf32_output - start);
16862 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16863 std::pair<const char*, char32_t*> ret = arm_convert_latin1_to_utf32(buf, len, utf32_output);
16864 size_t converted_chars = ret.second - utf32_output;
16917 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16919 return converter.convert(buf, len, utf32_output);
16922 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16924 return converter.convert_with_errors(buf, len, utf32_output);
16928 char32_t* utf32_output) const noexcept {
16929 return utf8_to_utf32::convert_valid(input, size, utf32_output);
17104 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17105 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
17107 size_t saved_bytes = ret.second - utf32_output;
17117 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17118 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
17120 size_t saved_bytes = ret.second - utf32_output;
17130 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17132 std::pair<result, char32_t*> ret = arm_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
17144 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
17148 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17150 std::pair<result, char32_t*> ret = arm_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
17162 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
17283 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17284 return convert_utf16le_to_utf32(buf, len, utf32_output);
17287 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17288 return convert_utf16be_to_utf32(buf, len, utf32_output);
17514 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept {
17515 return scalar::latin1_to_utf32::convert(buf,len,utf32_output);
17554 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17555 return scalar::utf8_to_utf32::convert(buf, len, utf32_output);
17558 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17559 return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output);
17563 char32_t* utf32_output) const noexcept {
17564 return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output);
17663 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17664 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(buf, len, utf32_output);
17667 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17668 return scalar::utf16_to_utf32::convert<endianness::BIG>(buf, len, utf32_output);
17671 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17672 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(buf, len, utf32_output);
17675 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17676 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(buf, len, utf32_output);
17679 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17680 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(buf, len, utf32_output);
17683 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17684 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(buf, len, utf32_output);
19634 Returns a pair: the first unprocessed byte from buf and utf32_output
19638 std::tuple<const char16_t*, char32_t*, bool> convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
19713 _mm512_storeu_si512((__m512i *) utf32_output, compressed_first);
19714 utf32_output += howmany1;
19718 //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second);
19719 _mm512_mask_storeu_epi32((__m512i *) utf32_output, __mmask16((1<<howmany2)-1), compressed_second);
19720 utf32_output += howmany2;
19726 return std::make_tuple(buf+carry, utf32_output, false);
19731 _mm512_storeu_si512((__m512i *)(utf32_output), _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)));
19732 _mm512_storeu_si512((__m512i *)(utf32_output) + 1, _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in,1)));
19733 utf32_output += 32;
19738 return std::make_tuple(buf+carry, utf32_output, true);
20629 std::pair<const char*, char32_t*> avx512_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
20640 _mm512_storeu_si512((__m512i*)&utf32_output[i], out);
20644 return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
21108 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
21109 std::pair<const char*, char32_t*> ret = avx512_convert_latin1_to_utf32(buf, len, utf32_output);
21111 size_t converted_chars = ret.second - utf32_output;
21233 uint32_t * utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
21234 utf8_to_utf32_result ret = icelake::validating_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21238 size_t saved_bytes = ret.second - utf32_output;
21264 uint32_t * utf32_output = reinterpret_cast<uint32_t *>(utf32);
21265 auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21274 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21291 std::get<0>(ret), len - (std::get<0>(ret) - buf), reinterpret_cast<char32_t *>(utf32_output) + saved_bytes);
21300 return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)};
21305 uint32_t * utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
21306 utf8_to_utf32_result ret = icelake::valid_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21307 size_t saved_bytes = ret.second - utf32_output;
21518 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21519 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21521 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21531 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21532 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21534 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21544 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21545 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21552 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21567 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21568 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21575 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21590 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21591 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21593 size_t saved_bytes = std::get<1>(ret) - utf32_output;
21603 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21604 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21606 size_t saved_bytes = std::get<1>(ret) - utf32_output;
22610 std::pair<const char*, char32_t*> avx2_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
22621 _mm256_storeu_si256((__m256i*)&utf32_output[i], out);
22625 return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
22822 char32_t *&utf32_output) {
22838 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), _mm256_cvtepu8_epi32(in));
22839 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output+8), _mm256_cvtepu8_epi32(_mm_srli_si128(in,8)));
22840 utf32_output += 16; // We wrote 16 32-bit characters.
22851 _mm256_storeu_si256((__m256i *)utf32_output, _mm256_cvtepu16_epi32(composed));
22852 utf32_output += 8; // We wrote 16 bytes, 8 code points.
22870 _mm_storeu_si128((__m128i *)utf32_output, composed);
22871 utf32_output += 4;
22892 _mm256_storeu_si256((__m256i *)utf32_output, _mm256_cvtepu16_epi32(composed));
22893 utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential
22910 _mm_storeu_si128((__m128i *)utf32_output, composed);
22911 utf32_output += 4;
22931 _mm_storeu_si128((__m128i *)utf32_output, composed);
22932 utf32_output += 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes.
23611 Returns a pair: the first unprocessed byte from buf and utf32_output
23615 std::pair<const char16_t*, char32_t*> avx2_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
23640 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in)));
23641 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in,1)));
23642 utf32_output += 16;
23656 *utf32_output++ = char32_t(word);
23663 if((diff | diff2) > 0x3FF) { return std::make_pair(nullptr, utf32_output); }
23665 *utf32_output++ = char32_t(value);
23671 return std::make_pair(buf, utf32_output);
23682 std::pair<result, char32_t*> avx2_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
23708 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in)));
23709 _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in,1)));
23710 utf32_output += 16;
23724 *utf32_output++ = char32_t(word);
23731 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf32_output); }
23733 *utf32_output++ = char32_t(value);
23739 return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output);
25329 char32_t* utf32_output) noexcept {
25331 char32_t* start{utf32_output};
25336 in.store_ascii_as_utf32(utf32_output);
25337 utf32_output += 64;
25347 utf8_end_of_code_point_mask, utf32_output);
25353 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
25354 return utf32_output - start;
25491 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
25493 char32_t* start{utf32_output};
25510 input.store_ascii_as_utf32(utf32_output);
25511 utf32_output += 64;
25546 utf8_end_of_code_point_mask, utf32_output);
25558 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
25560 utf32_output += howmany;
25562 return utf32_output - start;
25565 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
25567 char32_t* start{utf32_output};
25584 input.store_ascii_as_utf32(utf32_output);
25585 utf32_output += 64;
25602 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25625 utf8_end_of_code_point_mask, utf32_output);
25636 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25641 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
25646 utf32_output += res.count;
25649 return result(error_code::SUCCESS, utf32_output - start);
26275 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26276 std::pair<const char*, char32_t*> ret = avx2_convert_latin1_to_utf32(buf, len, utf32_output);
26278 size_t converted_chars = ret.second - utf32_output;
26333 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26335 return converter.convert(buf, len, utf32_output);
26338 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26340 return converter.convert_with_errors(buf, len, utf32_output);
26344 char32_t* utf32_output) const noexcept {
26345 return utf8_to_utf32::convert_valid(input, size, utf32_output);
26553 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26554 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
26556 size_t saved_bytes = ret.second - utf32_output;
26566 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26567 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
26569 size_t saved_bytes = ret.second - utf32_output;
26579 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26581 std::pair<result, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
26593 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
26597 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26599 std::pair<result, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
26611 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
26687 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26688 return convert_utf16le_to_utf32(buf, len, utf32_output);
26691 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26692 return convert_utf16be_to_utf32(buf, len, utf32_output);
27701 char32_t* utf32_output) noexcept {
27703 char32_t* start{utf32_output};
27708 in.store_ascii_as_utf32(utf32_output);
27709 utf32_output += 64;
27719 utf8_end_of_code_point_mask, utf32_output);
27725 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
27726 return utf32_output - start;
27863 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
27865 char32_t* start{utf32_output};
27882 input.store_ascii_as_utf32(utf32_output);
27883 utf32_output += 64;
27918 utf8_end_of_code_point_mask, utf32_output);
27930 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
27932 utf32_output += howmany;
27934 return utf32_output - start;
27937 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
27939 char32_t* start{utf32_output};
27956 input.store_ascii_as_utf32(utf32_output);
27957 utf32_output += 64;
27974 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
27997 utf8_end_of_code_point_mask, utf32_output);
28008 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28013 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
28018 utf32_output += res.count;
28021 return result(error_code::SUCCESS, utf32_output - start);
28298 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28299 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(buf, len, utf32_output);
28302 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28303 return scalar::utf16_to_utf32::convert<endianness::BIG>(buf, len, utf32_output);
28306 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28307 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(buf, len, utf32_output);
28310 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28311 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(buf, len, utf32_output);
28314 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28315 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(buf, len, utf32_output);
28318 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28319 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(buf, len, utf32_output);
29072 std::pair<const char*, char32_t*> sse_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
29090 _mm_storeu_si128((__m128i*)utf32_output, out1);
29091 _mm_storeu_si128((__m128i*)(utf32_output + 4), out2);
29092 _mm_storeu_si128((__m128i*)(utf32_output + 8), out3);
29093 _mm_storeu_si128((__m128i*)(utf32_output + 12), out4);
29095 utf32_output += 16;
29099 return std::make_pair(buf, utf32_output);
29299 char32_t *&utf32_output) {
29315 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu8_epi32(in));
29316 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu8_epi32(_mm_srli_si128(in,4)));
29317 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+8), _mm_cvtepu8_epi32(_mm_srli_si128(in,8)));
29318 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+12), _mm_cvtepu8_epi32(_mm_srli_si128(in,12)));
29319 utf32_output += 16; // We wrote 16 32-bit characters.
29330 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(composed));
29331 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(composed,8)));
29332 utf32_output += 8; // We wrote 32 bytes, 8 code points.
29350 _mm_storeu_si128((__m128i *)utf32_output, composed);
29351 utf32_output += 4;
29372 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(composed));
29373 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(composed,8)));
29374 utf32_output += 6; // We wrote 12 bytes, 6 code points.
29390 _mm_storeu_si128((__m128i *)utf32_output, composed);
29391 utf32_output += 4;
29411 _mm_storeu_si128((__m128i *)utf32_output, composed);
29412 utf32_output += 3;
30048 std::pair<const char16_t*, char32_t*> sse_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
30074 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(in));
30075 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(in,8)));
30076 utf32_output += 8;
30089 *utf32_output++ = char32_t(word);
30096 if((diff | diff2) > 0x3FF) { return std::make_pair(nullptr, utf32_output); }
30098 *utf32_output++ = char32_t(value);
30104 return std::make_pair(buf, utf32_output);
30115 std::pair<result, char32_t*> sse_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
30142 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(in));
30143 _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(in,8)));
30144 utf32_output += 8;
30157 *utf32_output++ = char32_t(word);
30164 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf32_output); }
30166 *utf32_output++ = char32_t(value);
30172 return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output);
31708 char32_t* utf32_output) noexcept {
31710 char32_t* start{utf32_output};
31715 in.store_ascii_as_utf32(utf32_output);
31716 utf32_output += 64;
31726 utf8_end_of_code_point_mask, utf32_output);
31732 utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
31733 return utf32_output - start;
31870 simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
31872 char32_t* start{utf32_output};
31889 input.store_ascii_as_utf32(utf32_output);
31890 utf32_output += 64;
31925 utf8_end_of_code_point_mask, utf32_output);
31937 size_t howmany = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
31939 utf32_output += howmany;
31941 return utf32_output - start;
31944 simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
31946 char32_t* start{utf32_output};
31963 input.store_ascii_as_utf32(utf32_output);
31964 utf32_output += 64;
31981 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32004 utf8_end_of_code_point_mask, utf32_output);
32015 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32020 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
32025 utf32_output += res.count;
32028 return result(error_code::SUCCESS, utf32_output - start);
32658 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32659 std::pair<const char*, char32_t*> ret = sse_convert_latin1_to_utf32(buf, len, utf32_output);
32661 size_t converted_chars = ret.second - utf32_output;
32717 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32719 return converter.convert(buf, len, utf32_output);
32722 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32724 return converter.convert_with_errors(buf, len, utf32_output);
32728 char32_t* utf32_output) const noexcept {
32729 return utf8_to_utf32::convert_valid(input, size, utf32_output);
32942 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32943 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
32945 size_t saved_bytes = ret.second - utf32_output;
32955 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32956 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
32958 size_t saved_bytes = ret.second - utf32_output;
32968 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32970 std::pair<result, char32_t*> ret = westmere::sse_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
32982 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
32986 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32988 std::pair<result, char32_t*> ret = westmere::sse_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
33000 ret.first.count = ret.second - utf32_output; // Set count to the number of 8-bit code units written
33076 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33077 return convert_utf16le_to_utf32(buf, len, utf32_output);
33080 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33081 return convert_utf16be_to_utf32(buf, len, utf32_output);