Lines Matching defs:buf

69   simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
70 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
71 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
72 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
73 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
74 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
75 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
76 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
77 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
78 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
79 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
80 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
81 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
82 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
83 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
84 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
85 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
86 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
87 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
88 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
89 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
90 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
91 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
92 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
93 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
94 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
95 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
96 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
97 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
98 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
99 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
100 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
101 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
102 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
103 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
104 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
105 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
106 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
107 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
108 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
109 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
110 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
111 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
112 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
113 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
114 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
115 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
116 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
117 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
118 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
119 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
120 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
121 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
122 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
123 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
124 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
125 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
126 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
127 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
128 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1269 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1270 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1271 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1272 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1273 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1274 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1275 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1276 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1277 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1278 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1279 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1280 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1281 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1282 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1283 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1284 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1285 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1286 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1287 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1288 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1289 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1290 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1291 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1292 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1293 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1294 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1295 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1296 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1297 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1298 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1299 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1300 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1301 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1302 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1303 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1304 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1305 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1306 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1307 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1308 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1309 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1310 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1311 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1312 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1313 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1314 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1315 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1316 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1317 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1318 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1319 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1320 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1321 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1322 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1323 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1324 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1325 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1326 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1327 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1328 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
1476 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
1477 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
1478 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
1479 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
1480 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
1481 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
1482 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
1483 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
1484 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
1485 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
1486 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
1487 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1488 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1489 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1490 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1491 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
1492 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
1493 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1494 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1495 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1496 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
1497 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1498 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1499 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1500 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
1501 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1502 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1503 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1504 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1505 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1506 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1507 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
1508 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1509 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1510 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1511 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1512 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1513 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1514 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1515 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1516 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
1517 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1518 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1519 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
1520 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1521 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1522 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1523 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1524 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1525 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
1526 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1527 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1528 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1529 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1530 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1531 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
1532 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
1533 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
1534 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
1535 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
2407 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
2408 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
2409 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
2410 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
2411 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
2412 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
2413 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
2414 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
2415 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
2416 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
2417 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
2418 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2419 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2420 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2421 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2422 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
2423 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
2424 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2425 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2426 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2427 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
2428 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2429 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2430 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2431 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
2432 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2433 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2434 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2435 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2436 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2437 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2438 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
2439 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2440 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2441 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2442 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2443 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2444 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2445 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2446 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2447 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
2448 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2449 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2450 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
2451 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2452 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2453 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2454 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2455 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2456 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
2457 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2458 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2459 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2460 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2461 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2462 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
2463 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
2464 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
2465 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
2466 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
3348 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
3349 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
3350 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
3351 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
3352 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
3353 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
3354 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
3355 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
3356 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
3357 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
3358 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3359 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3360 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3361 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
3362 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3363 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3364 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3365 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
3366 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3367 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3368 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3369 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3370 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3371 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3372 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3373 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3374 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3375 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
3376 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3377 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3378 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3379 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3380 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3381 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
3382 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3383 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3384 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3385 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3386 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3387 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
3388 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
3389 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
3390 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
3391 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
4005 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
4006 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
4007 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
4008 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
4009 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
4010 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
4011 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
4012 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
4013 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
4014 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
4015 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
4016 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4017 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4018 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4019 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
4020 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
4021 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
4022 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4023 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4024 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4025 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
4026 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4027 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4028 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4029 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
4030 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4031 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4032 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4033 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4034 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4035 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4036 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
4037 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4038 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4039 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4040 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4041 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4042 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4043 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4044 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4045 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
4046 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4047 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4048 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
4049 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4050 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4051 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4052 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4053 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4054 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
4055 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4056 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4057 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4058 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4059 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4060 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
4061 void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
4062 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
4063 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
4064 simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
4127 inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
4128 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4193 inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
4194 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
4258 // Finds the previous leading byte starting backward from buf and validates with errors from there
4262 inline simdutf_warn_unused result rewind_and_validate_with_errors(const char *start, const char *buf, size_t len) noexcept {
4270 unsigned char byte = *buf;
4274 buf--;
4279 result res = validate_with_errors(buf, len + extra_len);
4284 inline size_t count_code_points(const char* buf, size_t len) {
4285 const int8_t * p = reinterpret_cast<const int8_t *>(buf);
4294 inline size_t utf16_length_from_utf8(const char* buf, size_t len) {
4295 const int8_t * p = reinterpret_cast<const int8_t *>(buf);
4345 inline simdutf_warn_unused bool validate(const char16_t *buf, size_t len) noexcept {
4346 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4366 inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, size_t len) noexcept {
4367 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
4387 inline size_t count_code_points(const char16_t* buf, size_t len) {
4389 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4399 inline size_t utf8_length_from_utf16(const char16_t* buf, size_t len) {
4401 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4413 inline size_t utf32_length_from_utf16(const char16_t* buf, size_t len) {
4415 const uint16_t * p = reinterpret_cast<const uint16_t *>(buf);
4544 simdutf_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
4545 return set_best()->validate_utf8(buf, len);
4548 simdutf_warn_unused result validate_utf8_with_errors(const char * buf, size_t len) const noexcept final override {
4549 return set_best()->validate_utf8_with_errors(buf, len);
4552 simdutf_warn_unused bool validate_ascii(const char * buf, size_t len) const noexcept final override {
4553 return set_best()->validate_ascii(buf, len);
4556 simdutf_warn_unused result validate_ascii_with_errors(const char * buf, size_t len) const noexcept final override {
4557 return set_best()->validate_ascii_with_errors(buf, len);
4560 simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) const noexcept final override {
4561 return set_best()->validate_utf16le(buf, len);
4564 simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) const noexcept final override {
4565 return set_best()->validate_utf16be(buf, len);
4568 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) const noexcept final override {
4569 return set_best()->validate_utf16le_with_errors(buf, len);
4572 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t * buf, size_t len) const noexcept final override {
4573 return set_best()->validate_utf16be_with_errors(buf, len);
4576 simdutf_warn_unused bool validate_utf32(const char32_t * buf, size_t len) const noexcept final override {
4577 return set_best()->validate_utf32(buf, len);
4580 simdutf_warn_unused result validate_utf32_with_errors(const char32_t * buf, size_t len) const noexcept final override {
4581 return set_best()->validate_utf32_with_errors(buf, len);
4584 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final override {
4585 return set_best()->convert_latin1_to_utf8(buf, len,utf8_output);
4588 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4589 return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output);
4592 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4593 return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output);
4596 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) const noexcept final override {
4597 return set_best()->convert_latin1_to_utf32(buf, len,latin1_output);
4600 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
4601 return set_best()->convert_utf8_to_latin1(buf, len,latin1_output);
4604 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept final override {
4605 return set_best()->convert_utf8_to_latin1_with_errors(buf, len, latin1_output);
4608 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
4609 return set_best()->convert_valid_utf8_to_latin1(buf, len,latin1_output);
4612 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4613 return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output);
4616 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4617 return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output);
4620 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4621 return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, utf16_output);
4624 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4625 return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, utf16_output);
4628 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4629 return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output);
4632 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4633 return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output);
4636 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4637 return set_best()->convert_utf8_to_utf32(buf, len, utf32_output);
4640 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4641 return set_best()->convert_utf8_to_utf32_with_errors(buf, len, utf32_output);
4644 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4645 return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output);
4648 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4649 return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output);
4652 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4653 return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output);
4656 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4657 return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, latin1_output);
4660 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4661 return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, latin1_output);
4664 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4665 return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output);
4668 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
4669 return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output);
4672 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4673 return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output);
4676 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4677 return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output);
4680 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4681 return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, utf8_output);
4684 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4685 return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, utf8_output);
4688 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4689 return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output);
4692 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
4693 return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output);
4696 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4697 return set_best()->convert_utf32_to_latin1(buf, len,latin1_output);
4700 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4701 return set_best()->convert_utf32_to_latin1_with_errors(buf, len,latin1_output);
4704 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
4705 return set_best()->convert_utf32_to_latin1(buf, len,latin1_output);
4708 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4709 return set_best()->convert_utf32_to_utf8(buf, len, utf8_output);
4712 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4713 return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
4716 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
4717 return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output);
4720 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4721 return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output);
4724 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4725 return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output);
4728 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4729 return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, utf16_output);
4732 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4733 return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, utf16_output);
4736 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4737 return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output);
4740 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_output) const noexcept final override {
4741 return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output);
4744 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4745 return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output);
4748 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4749 return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output);
4752 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4753 return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, utf32_output);
4756 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4757 return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, utf32_output);
4760 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4761 return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output);
4764 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_output) const noexcept final override {
4765 return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output);
4768 void change_endianness_utf16(const char16_t * buf, size_t len, char16_t * output) const noexcept final override {
4769 set_best()->change_endianness_utf16(buf, len, output);
4772 simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t len) const noexcept final override {
4773 return set_best()->count_utf16le(buf, len);
4776 simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t len) const noexcept final override {
4777 return set_best()->count_utf16be(buf, len);
4780 simdutf_warn_unused size_t count_utf8(const char * buf, size_t len) const noexcept final override {
4781 return set_best()->count_utf8(buf, len);
4784 simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) const noexcept override {
4785 return set_best()->latin1_length_from_utf8(buf, len);
4796 simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) const noexcept override {
4797 return set_best()->utf8_length_from_latin1(buf, len);
4800 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
4801 return set_best()->utf8_length_from_utf16le(buf, len);
4804 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * buf, size_t len) const noexcept override {
4805 return set_best()->utf8_length_from_utf16be(buf, len);
4816 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
4817 return set_best()->utf32_length_from_utf16le(buf, len);
4820 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * buf, size_t len) const noexcept override {
4821 return set_best()->utf32_length_from_utf16be(buf, len);
4824 simdutf_warn_unused size_t utf16_length_from_utf8(const char * buf, size_t len) const noexcept override {
4825 return set_best()->utf16_length_from_utf8(buf, len);
4828 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * buf, size_t len) const noexcept override {
4829 return set_best()->utf8_length_from_utf32(buf, len);
4832 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * buf, size_t len) const noexcept override {
4833 return set_best()->utf16_length_from_utf32(buf, len);
4836 simdutf_warn_unused size_t utf32_length_from_utf8(const char * buf, size_t len) const noexcept override {
4837 return set_best()->utf32_length_from_utf8(buf, len);
5240 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
5241 return get_active_implementation()->validate_utf8(buf, len);
5243 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept {
5244 return get_active_implementation()->validate_utf8_with_errors(buf, len);
5246 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept {
5247 return get_active_implementation()->validate_ascii(buf, len);
5249 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept {
5250 return get_active_implementation()->validate_ascii_with_errors(buf, len);
5259 simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) noexcept {
5260 return get_active_implementation()->convert_latin1_to_utf8(buf, len,utf8_output);
5262 simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) noexcept {
5263 return get_active_implementation()->convert_latin1_to_utf16le(buf, len, utf16_output);
5265 simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) noexcept{
5266 return get_active_implementation()->convert_latin1_to_utf16be(buf, len, utf16_output);
5268 simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) noexcept {
5269 return get_active_implementation()->convert_latin1_to_utf32(buf, len,latin1_output);
5271 simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
5272 return get_active_implementation()->convert_utf8_to_latin1(buf, len,latin1_output);
5274 simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) noexcept {
5275 return get_active_implementation()->convert_utf8_to_latin1_with_errors(buf, len, latin1_output);
5277 simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
5278 return get_active_implementation()->convert_valid_utf8_to_latin1(buf, len,latin1_output);
5305 simdutf_warn_unused bool validate_utf16(const char16_t * buf, size_t len) noexcept {
5307 return validate_utf16be(buf, len);
5309 return validate_utf16le(buf, len);
5312 simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) noexcept {
5313 return get_active_implementation()->validate_utf16le(buf, len);
5315 simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) noexcept {
5316 return get_active_implementation()->validate_utf16be(buf, len);
5318 simdutf_warn_unused result validate_utf16_with_errors(const char16_t * buf, size_t len) noexcept {
5320 return validate_utf16be_with_errors(buf, len);
5322 return validate_utf16le_with_errors(buf, len);
5325 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) noexcept {
5326 return get_active_implementation()->validate_utf16le_with_errors(buf, len);
5328 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t * buf, size_t len) noexcept {
5329 return get_active_implementation()->validate_utf16be_with_errors(buf, len);
5331 simdutf_warn_unused bool validate_utf32(const char32_t * buf, size_t len) noexcept {
5332 return get_active_implementation()->validate_utf32(buf, len);
5334 simdutf_warn_unused result validate_utf32_with_errors(const char32_t * buf, size_t len) noexcept {
5335 return get_active_implementation()->validate_utf32_with_errors(buf, len);
5353 simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5355 return convert_utf16be_to_utf8(buf, len, utf8_buffer);
5357 return convert_utf16le_to_utf8(buf, len, utf8_buffer);
5360 simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5362 return convert_utf16be_to_latin1(buf, len, latin1_buffer);
5364 return convert_utf16le_to_latin1(buf, len, latin1_buffer);
5367 simdutf_warn_unused size_t convert_latin1_to_utf16(const char * buf, size_t len, char16_t* utf16_output) noexcept {
5369 return convert_latin1_to_utf16be(buf, len, utf16_output);
5371 return convert_latin1_to_utf16le(buf, len, utf16_output);
5374 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5375 return get_active_implementation()->convert_utf16be_to_latin1(buf, len, latin1_buffer);
5377 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5378 return get_active_implementation()->convert_utf16le_to_latin1(buf, len, latin1_buffer);
5380 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5381 return get_active_implementation()->convert_valid_utf16be_to_latin1(buf, len, latin1_buffer);
5383 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5384 return get_active_implementation()->convert_valid_utf16le_to_latin1(buf, len, latin1_buffer);
5386 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5387 return get_active_implementation()->convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer);
5389 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5390 return get_active_implementation()->convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer);
5392 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5393 return get_active_implementation()->convert_utf16le_to_utf8(buf, len, utf8_buffer);
5395 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5396 return get_active_implementation()->convert_utf16be_to_utf8(buf, len, utf8_buffer);
5398 simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5400 return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer);
5402 return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer);
5405 simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5407 return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer);
5409 return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer);
5412 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5413 return get_active_implementation()->convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer);
5415 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5416 return get_active_implementation()->convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer);
5418 simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5420 return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer);
5422 return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
5425 simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
5427 return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer);
5429 return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer);
5432 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5433 return get_active_implementation()->convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
5435 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
5436 return get_active_implementation()->convert_valid_utf16be_to_utf8(buf, len, utf8_buffer);
5438 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5439 return get_active_implementation()->convert_utf32_to_utf8(buf, len, utf8_buffer);
5441 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5442 return get_active_implementation()->convert_utf32_to_utf8_with_errors(buf, len, utf8_buffer);
5444 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) noexcept {
5445 return get_active_implementation()->convert_valid_utf32_to_utf8(buf, len, utf8_buffer);
5447 simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5449 return convert_utf32_to_utf16be(buf, len, utf16_buffer);
5451 return convert_utf32_to_utf16le(buf, len, utf16_buffer);
5457 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5458 return get_active_implementation()->convert_utf32_to_utf16le(buf, len, utf16_buffer);
5460 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5461 return get_active_implementation()->convert_utf32_to_utf16be(buf, len, utf16_buffer);
5463 simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5465 return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer);
5467 return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer);
5470 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5471 return get_active_implementation()->convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer);
5473 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5474 return get_active_implementation()->convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer);
5476 simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5478 return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer);
5480 return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer);
5483 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5484 return get_active_implementation()->convert_valid_utf32_to_utf16le(buf, len, utf16_buffer);
5486 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
5487 return get_active_implementation()->convert_valid_utf32_to_utf16be(buf, len, utf16_buffer);
5489 simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5491 return convert_utf16be_to_utf32(buf, len, utf32_buffer);
5493 return convert_utf16le_to_utf32(buf, len, utf32_buffer);
5496 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5497 return get_active_implementation()->convert_utf16le_to_utf32(buf, len, utf32_buffer);
5499 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5500 return get_active_implementation()->convert_utf16be_to_utf32(buf, len, utf32_buffer);
5502 simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5504 return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer);
5506 return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer);
5509 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5510 return get_active_implementation()->convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer);
5512 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5513 return get_active_implementation()->convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer);
5515 simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5517 return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer);
5519 return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer);
5522 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5523 return get_active_implementation()->convert_valid_utf16le_to_utf32(buf, len, utf32_buffer);
5525 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) noexcept {
5526 return get_active_implementation()->convert_valid_utf16be_to_utf32(buf, len, utf32_buffer);
5547 simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) noexcept {
5548 return get_active_implementation()->latin1_length_from_utf8(buf, len);
5556 simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) noexcept {
5557 return get_active_implementation()->utf8_length_from_latin1(buf, len);
5600 simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * buf, size_t length) noexcept {
5601 return get_active_implementation()->autodetect_encoding(buf, length);
5603 simdutf_warn_unused int detect_encodings(const char * buf, size_t length) noexcept {
5604 return get_active_implementation()->detect_encodings(buf, length);
10594 inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
10595 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10614 inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
10615 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
10653 inline simdutf_warn_unused bool validate(const char32_t *buf, size_t len) noexcept {
10654 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10665 inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, size_t len) noexcept {
10666 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10680 inline size_t utf8_length_from_utf32(const char32_t* buf, size_t len) {
10682 const uint32_t * p = reinterpret_cast<const uint32_t *>(buf);
10694 inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) {
10696 const uint32_t * p = reinterpret_cast<const uint32_t *>(buf);
10733 inline size_t utf8_length_from_latin1(const char *buf, size_t len) {
10734 const uint8_t * c = reinterpret_cast<const uint8_t *>(buf);
10765 inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output) {
10766 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10775 *utf8_output++ = char(buf[pos]);
10776 *utf8_output++ = char(buf[pos+1]);
10829 inline size_t convert(const char32_t* buf, size_t len, char* utf8_output) {
10830 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10839 *utf8_output++ = char(buf[pos]);
10840 *utf8_output++ = char(buf[pos+1]);
10878 inline result convert_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
10879 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10888 *utf8_output++ = char(buf[pos]);
10889 *utf8_output++ = char(buf[pos+1]);
10945 inline size_t convert_valid(const char32_t* buf, size_t len, char16_t* utf16_output) {
10946 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
10989 inline size_t convert(const char32_t* buf, size_t len, char16_t* utf16_output) {
10990 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
11018 inline result convert_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
11019 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
11064 inline size_t convert_valid(const char16_t* buf, size_t len, char* utf8_output) {
11065 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11077 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11138 inline size_t convert(const char16_t* buf, size_t len, char* utf8_output) {
11139 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11151 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11197 inline result convert_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
11198 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11210 *utf8_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(buf[pos])) : char(buf[pos]);
11273 inline size_t convert_valid(const char16_t* buf, size_t len, char32_t* utf32_output) {
11274 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11314 inline size_t convert(const char16_t* buf, size_t len, char32_t* utf32_output) {
11315 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11341 inline result convert_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
11342 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
11385 inline size_t convert_valid(const char* buf, size_t len, char16_t* utf16_output) {
11386 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11397 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11469 inline size_t convert(const char* buf, size_t len, char16_t* utf16_output) {
11470 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11484 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11559 inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) {
11560 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11574 *utf16_output++ = !match_system(big_endian) ? char16_t(utf16::swap_bytes(buf[pos])) : char16_t(buf[pos]);
11649 * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and we have
11651 * the error is at 'buf' exactly, but it could also be in the previous bytes (up to 3 bytes back).
11653 * prior_bytes indicates how many bytes, prior to 'buf' may belong to the current memory section
11654 * and can be safely accessed. We prior_bytes to access safely up to three bytes before 'buf'.
11658 * If the error is believed to have occured prior to 'buf', the count value contain in the result
11662 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) {
11672 unsigned char byte = buf[0-i];
11675 buf -= i;
11689 // [....] [continuation] [continuation] [continuation] | [buf is continuation]
11693 result res = convert_with_errors<endian>(buf, len + extra_len, utf16_output);
11717 inline size_t convert_valid(const char* buf, size_t len, char32_t* utf32_output) {
11718 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11729 *utf32_output++ = char32_t(buf[pos]);
11782 inline size_t convert(const char* buf, size_t len, char32_t* utf32_output) {
11783 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11797 *utf32_output++ = char32_t(buf[pos]);
11854 inline result convert_with_errors(const char* buf, size_t len, char32_t* utf32_output) {
11855 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
11869 *utf32_output++ = char32_t(buf[pos]);
11928 * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and we have
11930 * the error is at 'buf' exactly, but it could also be in the previous bytes location (up to 3 bytes back).
11932 * prior_bytes indicates how many bytes, prior to 'buf' may belong to the current memory section
11933 * and can be safely accessed. We prior_bytes to access safely up to three bytes before 'buf'.
11937 * If the error is believed to have occured prior to 'buf', the count value contain in the result
11940 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char32_t* utf32_output) {
11948 unsigned char byte = buf[0-i];
11951 buf -= i;
11965 // [....] [continuation] [continuation] [continuation] | [buf is continuation]
11970 result res = convert_with_errors(buf, len + extra_len, utf32_output);
11994 inline size_t convert(const char* buf, size_t len, char* utf8_output) {
11995 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12009 *utf8_output++ = char(buf[pos]);
12048 inline size_t convert(const char* buf, size_t len, char16_t* utf16_output) {
12049 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12063 inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) {
12064 const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
12094 inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) {
12095 const unsigned char *data = reinterpret_cast<const unsigned char *>(buf);
12121 inline size_t convert(const char* buf, size_t len, char* latin_output) {
12122 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12137 *latin_output++ = char(buf[pos]);
12170 inline result convert_with_errors(const char* buf, size_t len, char* latin_output) {
12171 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12186 *latin_output++ = char(buf[pos]);
12234 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char* latin1_output) {
12244 unsigned char byte = buf[0-i];
12247 buf -= i;
12261 // [....] [continuation] [continuation] [continuation] | [buf is continuation]
12265 result res = convert_with_errors(buf, len + extra_len, latin1_output);
12292 inline size_t convert(const char16_t* buf, size_t len, char* latin_output) {
12293 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12315 inline result convert_with_errors(const char16_t* buf, size_t len, char* latin_output) {
12316 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12369 inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) {
12370 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12386 inline result convert_with_errors(const char32_t *buf, size_t len, char *latin1_output) {
12387 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12395 *latin1_output++ = char(buf[pos]);
12396 *latin1_output++ = char(buf[pos+1]);
12427 inline size_t convert_valid(const char* buf, size_t len, char* latin_output) {
12428 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12444 *latin_output++ = char(buf[pos]);
12490 inline size_t convert_valid(const char16_t* buf, size_t len, char* latin_output) {
12491 const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
12521 inline size_t convert_valid(const char32_t *buf, size_t len, char *latin1_output) {
12522 const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12534 *latin1_output++ = char(buf[pos]);
12535 *latin1_output++ = char(buf[pos+1]);
12676 int arm_detect_encodings(const char * buf, size_t len) {
12677 const char* start = buf;
12678 const char* end = buf + len;
12693 while(buf + 64 <= end) {
12694 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t*>(buf));
12695 uint16x8_t secondin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + simd16<uint16_t>::SIZE / sizeof(char16_t));
12696 uint16x8_t thirdin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 2*simd16<uint16_t>::SIZE / sizeof(char16_t));
12697 uint16x8_t fourthin = vld1q_u16(reinterpret_cast<const uint16_t*>(buf) + 3*simd16<uint16_t>::SIZE / sizeof(char16_t));
12729 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
12793 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
12846 buf += 64;
12852 if (static_cast<size_t>(buf - start) != len) {
12855 std::memcpy(block, buf, len - (buf - start));
12864 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
12871 if (vmaxvq_u32(is_zero) == 0 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
13085 Returns a pair: the first unprocessed byte from buf and utf8_output
13157 std::pair<const char*, char16_t*> arm_convert_latin1_to_utf16(const char* buf, size_t len, char16_t* utf16_output) {
13158 const char* end = buf + len;
13160 while (buf + 16 <= end) {
13161 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13169 buf += 16;
13172 return std::make_pair(buf, utf16_output);
13176 std::pair<const char*, char32_t*> arm_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
13177 const char* end = buf + len;
13179 while (buf + 16 <= end) {
13180 uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf));
13193 buf += 16;
13196 return std::make_pair(buf, utf32_output);
13709 std::pair<const char16_t*, char*> arm_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
13710 const char16_t* end = buf + len;
13711 while (buf + 8 <= end) {
13712 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13720 buf += 8;
13726 return std::make_pair(buf, latin1_output);
13730 std::pair<result, char*> arm_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
13731 const char16_t* start = buf;
13732 const char16_t* end = buf + len;
13733 while (buf + 8 <= end) {
13734 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13742 buf += 8;
13747 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
13751 return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
13756 return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
13809 Returns a pair: the first unprocessed byte from buf and utf8_output
13813 std::pair<const char16_t*, char*> arm_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_out) {
13815 const char16_t* end = buf + len;
13821 while (buf + 16 + safety_margin <= end) {
13822 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
13826 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
13835 buf += 8;
13845 buf += 16;
13894 buf += 8;
13997 buf += 8;
14016 buf += 8;
14024 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14026 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14039 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14050 buf += k;
14054 return std::make_pair(buf, reinterpret_cast<char*>(utf8_output));
14061 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
14065 std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_out) {
14067 const char16_t* start = buf;
14068 const char16_t* end = buf + len;
14075 while (buf + 16 + safety_margin <= end) {
14076 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14080 uint16x8_t nextin = vld1q_u16(reinterpret_cast<const uint16_t *>(buf) + 8);
14089 buf += 8;
14099 buf += 16;
14148 buf += 8;
14251 buf += 8;
14270 buf += 8;
14278 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14280 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14293 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14296 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast<char*>(utf8_output)); }
14304 buf += k;
14308 return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char*>(utf8_output));
14361 Returns a pair: the first unprocessed byte from buf and utf8_output
14365 std::pair<const char16_t*, char32_t*> arm_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_out) {
14367 const char16_t* end = buf + len;
14372 while (buf + 8 <= end) {
14373 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14384 buf += 8;
14392 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14394 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14400 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14408 buf += k;
14411 return std::make_pair(buf, reinterpret_cast<char32_t*>(utf32_output));
14418 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
14422 std::pair<result, char32_t*> arm_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_out) {
14424 const char16_t* start = buf;
14425 const char16_t* end = buf + len;
14430 while (buf + 8 <= end) {
14431 uint16x8_t in = vld1q_u16(reinterpret_cast<const uint16_t *>(buf));
14442 buf += 8;
14450 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14452 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
14458 uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
14461 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast<char32_t*>(utf32_output)); }
14466 buf += k;
14469 return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char32_t*>(utf32_output));
14474 std::pair<const char32_t*, char*> arm_convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) {
14475 const char32_t* end = buf + len;
14476 while (buf + 8 <= end) {
14477 uint32x4_t in1 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14478 uint32x4_t in2 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14487 buf += 8;
14493 return std::make_pair(buf, latin1_output);
14497 std::pair<result, char*> arm_convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) {
14498 const char32_t* start = buf;
14499 const char32_t* end = buf + len;
14501 while (buf + 8 <= end) {
14502 uint32x4_t in1 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14503 uint32x4_t in2 = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14513 buf += 8;
14518 uint32_t word = buf[k];
14522 return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
14527 return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
14531 std::pair<const char32_t*, char*> arm_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_out) {
14533 const char32_t* end = buf + len;
14539 while (buf + 8 < end) {
14540 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14541 uint32x4_t nextin = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14555 buf += 8;
14603 buf += 8;
14705 buf += 8;
14723 buf += 8;
14732 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14734 uint32_t word = buf[k];
14753 buf += k;
14761 return std::make_pair(buf, reinterpret_cast<char*>(utf8_output));
14765 std::pair<result, char*> arm_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_out) {
14767 const char32_t* start = buf;
14768 const char32_t* end = buf + len;
14772 while (buf + 8 < end) {
14773 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
14774 uint32x4_t nextin = vld1q_u32(reinterpret_cast<const uint32_t *>(buf+4));
14788 buf += 8;
14836 buf += 8;
14847 return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast<char*>(utf8_output));
14943 buf += 8;
14962 buf += 8;
14971 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
14973 uint32_t word = buf[k];
14980 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast<char*>(utf8_output)); }
14985 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast<char*>(utf8_output)); }
14992 buf += k;
14996 return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char*>(utf8_output));
15001 std::pair<const char32_t*, char16_t*> arm_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_out) {
15003 const char32_t* end = buf + len;
15007 while(buf + 4 <= end) {
15008 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
15021 buf += 4;
15025 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
15027 uint32_t word = buf[k];
15046 buf += k;
15055 return std::make_pair(buf, reinterpret_cast<char16_t*>(utf16_output));
15060 std::pair<result, char16_t*> arm_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_out) {
15062 const char32_t* start = buf;
15063 const char32_t* end = buf + len;
15065 while(buf + 4 <= end) {
15066 uint32x4_t in = vld1q_u32(reinterpret_cast<const uint32_t *>(buf));
15076 return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast<char16_t*>(utf16_output));
15082 buf += 4;
15086 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
15088 uint32_t word = buf[k];
15091 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast<char16_t*>(utf16_output)); }
15095 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast<char16_t*>(utf16_output)); }
15107 buf += k;
15111 return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char16_t*>(utf16_output));
15142 const uint8_t *buf;
15150 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
15152 buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
15154 buf[sizeof(simd8x64<uint8_t>)] = '\0';
15155 return buf;
15160 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
15161 in.store(reinterpret_cast<uint8_t*>(buf));
15163 if (buf[i] < ' ') { buf[i] = '_'; }
15165 buf[sizeof(simd8x64<uint8_t>)] = '\0';
15166 return buf;
15170 static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
15172 buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
15174 buf[64] = '\0';
15175 return buf;
15179 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
15191 return &buf[idx];
15198 std::memcpy(dst, buf + idx, len - idx);
16755 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
16756 return arm64::utf8_validation::generic_validate_utf8(buf,len);
16759 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
16760 return arm64::utf8_validation::generic_validate_utf8_with_errors(buf,len);
16763 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
16764 return arm64::utf8_validation::generic_validate_ascii(buf,len);
16767 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
16768 return arm64::utf8_validation::generic_validate_ascii_with_errors(buf,len);
16771 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
16772 const char16_t* tail = arm_validate_utf16<endianness::LITTLE>(buf, len);
16774 return scalar::utf16::validate<endianness::LITTLE>(tail, len - (tail - buf));
16780 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
16781 const char16_t* tail = arm_validate_utf16<endianness::BIG>(buf, len);
16783 return scalar::utf16::validate<endianness::BIG>(tail, len - (tail - buf));
16789 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
16790 result res = arm_validate_utf16_with_errors<endianness::LITTLE>(buf, len);
16792 result scalar_res = scalar::utf16::validate_with_errors<endianness::LITTLE>(buf + res.count, len - res.count);
16799 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
16800 result res = arm_validate_utf16_with_errors<endianness::BIG>(buf, len);
16802 result scalar_res = scalar::utf16::validate_with_errors<endianness::BIG>(buf + res.count, len - res.count);
16809 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
16810 const char32_t* tail = arm_validate_utf32le(buf, len);
16812 return scalar::utf32::validate(tail, len - (tail - buf));
16818 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
16819 result res = arm_validate_utf32le_with_errors(buf, len);
16821 result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count);
16828 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
16829 std::pair<const char*, char*> ret = arm_convert_latin1_to_utf8(buf, len, utf8_output);
16832 if (ret.first != buf + len) {
16834 ret.first, len - (ret.first - buf), ret.second);
16840 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16841 std::pair<const char*, char16_t*> ret = arm_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
16843 if (ret.first != buf + len) {
16845 ret.first, len - (ret.first - buf), ret.second);
16851 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16852 std::pair<const char*, char16_t*> ret = arm_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
16854 if (ret.first != buf + len) {
16856 ret.first, len - (ret.first - buf), ret.second);
16862 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16863 std::pair<const char*, char32_t*> ret = arm_convert_latin1_to_utf32(buf, len, utf32_output);
16865 if (ret.first != buf + len) {
16867 ret.first, len - (ret.first - buf), ret.second);
16873 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
16875 return converter.convert(buf, len, latin1_output);
16878 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
16880 return converter.convert_with_errors(buf, len, latin1_output);
16883 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
16884 return arm64::utf8_to_latin1::convert_valid(buf,len,latin1_output);
16887 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16889 return converter.convert<endianness::LITTLE>(buf, len, utf16_output);
16892 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16894 return converter.convert<endianness::BIG>(buf, len, utf16_output);
16897 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16899 return converter.convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
16902 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
16904 return converter.convert_with_errors<endianness::BIG>(buf, len, utf16_output);
16917 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16919 return converter.convert(buf, len, utf32_output);
16922 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
16924 return converter.convert_with_errors(buf, len, utf32_output);
16932 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16933 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
16937 if (ret.first != buf + len) {
16939 ret.first, len - (ret.first - buf), ret.second);
16946 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16947 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
16951 if (ret.first != buf + len) {
16953 ret.first, len - (ret.first - buf), ret.second);
16960 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16961 std::pair<result, char*> ret = arm_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(buf, len, latin1_output);
16965 buf + ret.first.count, len - ret.first.count, ret.second);
16977 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16978 std::pair<result, char*> ret = arm_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf, len, latin1_output);
16982 buf + ret.first.count, len - ret.first.count, ret.second);
16994 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
16996 return convert_utf16be_to_latin1(buf, len, latin1_output);
16999 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17001 return convert_utf16le_to_latin1(buf, len, latin1_output);
17004 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17005 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
17008 if (ret.first != buf + len) {
17010 ret.first, len - (ret.first - buf), ret.second);
17017 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17018 std::pair<const char16_t*, char*> ret = arm_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
17021 if (ret.first != buf + len) {
17023 ret.first, len - (ret.first - buf), ret.second);
17030 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17032 std::pair<result, char*> ret = arm_convert_utf16_to_utf8_with_errors<endianness::LITTLE>(buf, len, utf8_output);
17036 buf + ret.first.count, len - ret.first.count, ret.second);
17048 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17050 std::pair<result, char*> ret = arm_convert_utf16_to_utf8_with_errors<endianness::BIG>(buf, len, utf8_output);
17054 buf + ret.first.count, len - ret.first.count, ret.second);
17066 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17067 return convert_utf16le_to_utf8(buf, len, utf8_output);
17070 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17071 return convert_utf16be_to_utf8(buf, len, utf8_output);
17074 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17075 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_utf8(buf, len, utf8_output);
17078 if (ret.first != buf + len) {
17080 ret.first, len - (ret.first - buf), ret.second);
17087 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17089 std::pair<result, char*> ret = arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
17092 buf + ret.first.count, len - ret.first.count, ret.second);
17104 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17105 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
17108 if (ret.first != buf + len) {
17110 ret.first, len - (ret.first - buf), ret.second);
17117 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17118 std::pair<const char16_t*, char32_t*> ret = arm_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
17121 if (ret.first != buf + len) {
17123 ret.first, len - (ret.first - buf), ret.second);
17130 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17132 std::pair<result, char32_t*> ret = arm_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
17136 buf + ret.first.count, len - ret.first.count, ret.second);
17148 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17150 std::pair<result, char32_t*> ret = arm_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
17154 buf + ret.first.count, len - ret.first.count, ret.second);
17166 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17167 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_latin1(buf, len, latin1_output);
17171 if (ret.first != buf + len) {
17173 ret.first, len - (ret.first - buf), ret.second);
17180 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17181 std::pair<result, char*> ret = arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
17185 buf + ret.first.count, len - ret.first.count, ret.second);
17197 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17198 std::pair<const char32_t*, char*> ret = arm_convert_utf32_to_latin1(buf, len, latin1_output);
17202 if (ret.first != buf + len) {
17204 ret.first, len - (ret.first - buf), ret.second);
17210 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17212 return convert_utf32_to_utf8(buf, len, utf8_output);
17215 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17216 std::pair<const char32_t*, char16_t*> ret = arm_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
17219 if (ret.first != buf + len) {
17221 ret.first, len - (ret.first - buf), ret.second);
17228 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17229 std::pair<const char32_t*, char16_t*> ret = arm_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
17232 if (ret.first != buf + len) {
17234 ret.first, len - (ret.first - buf), ret.second);
17241 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17243 std::pair<result, char16_t*> ret = arm_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(buf, len, utf16_output);
17246 buf + ret.first.count, len - ret.first.count, ret.second);
17258 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17260 std::pair<result, char16_t*> ret = arm_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len, utf16_output);
17263 buf + ret.first.count, len - ret.first.count, ret.second);
17275 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17276 return convert_utf32_to_utf16le(buf, len, utf16_output);
17279 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17280 return convert_utf32_to_utf16be(buf, len, utf16_output);
17283 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17284 return convert_utf16le_to_utf32(buf, len, utf32_output);
17287 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17288 return convert_utf16be_to_utf32(buf, len, utf32_output);
17307 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
17308 return count_utf8(buf,len);
17462 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
17463 return scalar::utf8::validate(buf, len);
17466 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
17467 return scalar::utf8::validate_with_errors(buf, len);
17470 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
17471 return scalar::ascii::validate(buf, len);
17474 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
17475 return scalar::ascii::validate_with_errors(buf, len);
17478 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
17479 return scalar::utf16::validate<endianness::LITTLE>(buf, len);
17482 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
17483 return scalar::utf16::validate<endianness::BIG>(buf, len);
17486 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
17487 return scalar::utf16::validate_with_errors<endianness::LITTLE>(buf, len);
17490 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
17491 return scalar::utf16::validate_with_errors<endianness::BIG>(buf, len);
17494 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
17495 return scalar::utf32::validate(buf, len);
17498 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
17499 return scalar::utf32::validate_with_errors(buf, len);
17502 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
17503 return scalar::latin1_to_utf8::convert(buf,len,utf8_output);
17506 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17507 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(buf, len, utf16_output);
17510 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17511 return scalar::latin1_to_utf16::convert<endianness::BIG>(buf, len, utf16_output);
17514 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept {
17515 return scalar::latin1_to_utf32::convert(buf,len,utf32_output);
17518 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
17519 return scalar::utf8_to_latin1::convert(buf, len, latin1_output);
17522 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
17523 return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output);
17526 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
17527 return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output);
17530 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17531 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(buf, len, utf16_output);
17534 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17535 return scalar::utf8_to_utf16::convert<endianness::BIG>(buf, len, utf16_output);
17538 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17539 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
17542 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17543 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(buf, len, utf16_output);
17546 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17547 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(buf, len, utf16_output);
17550 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
17551 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(buf, len, utf16_output);
17554 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17555 return scalar::utf8_to_utf32::convert(buf, len, utf32_output);
17558 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
17559 return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output);
17567 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17568 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(buf, len, latin1_output);
17571 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17572 return scalar::utf16_to_latin1::convert<endianness::BIG>(buf, len, latin1_output);
17575 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17576 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(buf, len, latin1_output);
17579 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17580 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(buf, len, latin1_output);
17583 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17584 return scalar::utf16_to_latin1::convert_valid<endianness::LITTLE>(buf, len, latin1_output);
17587 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
17588 return scalar::utf16_to_latin1::convert_valid<endianness::BIG>(buf, len, latin1_output);
17591 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17592 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(buf, len, utf8_output);
17595 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17596 return scalar::utf16_to_utf8::convert<endianness::BIG>(buf, len, utf8_output);
17599 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17600 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(buf, len, utf8_output);
17603 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17604 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(buf, len, utf8_output);
17607 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17608 return scalar::utf16_to_utf8::convert_valid<endianness::LITTLE>(buf, len, utf8_output);
17611 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
17612 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(buf, len, utf8_output);
17615 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17616 return scalar::utf32_to_latin1::convert(buf, len, latin1_output);
17619 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17620 return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output);
17623 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
17624 return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output);
17627 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17628 return scalar::utf32_to_utf8::convert(buf, len, utf8_output);
17631 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17632 return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output);
17635 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
17636 return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output);
17639 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17640 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(buf, len, utf16_output);
17643 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17644 return scalar::utf32_to_utf16::convert<endianness::BIG>(buf, len, utf16_output);
17647 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17648 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
17651 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17652 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(buf, len, utf16_output);
17655 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17656 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(buf, len, utf16_output);
17659 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
17660 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(buf, len, utf16_output);
17663 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17664 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(buf, len, utf32_output);
17667 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17668 return scalar::utf16_to_utf32::convert<endianness::BIG>(buf, len, utf32_output);
17671 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17672 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(buf, len, utf32_output);
17675 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17676 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(buf, len, utf32_output);
17679 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17680 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(buf, len, utf32_output);
17683 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
17684 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(buf, len, utf32_output);
17703 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
17704 return scalar::utf8::count_code_points(buf,len);
19165 simdutf_really_inline size_t process_block_from_utf8_to_latin1(const char *buf, size_t len,
19172 __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf);
19224 size_t utf8_to_latin1_avx512(const char *buf, size_t len, char *latin_output) {
19233 size_t written = process_block_from_utf8_to_latin1<false>(buf + pos, 64, latin_output, minus64,
19245 process_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64, one,
19262 simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1(const char *buf, size_t len,
19269 __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf);
19299 size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len,
19310 buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6);
19318 process_valid_block_from_utf8_to_latin1<true>(buf + pos, remaining, latin_output, minus64,
19329 size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len,
19331 const char16_t *end = buf + len;
19341 while (buf + 32 <= end) {
19342 __m512i in = _mm512_loadu_si512((__m512i *)buf);
19353 buf += 32;
19355 if (buf < end) {
19356 uint32_t mask(uint32_t(1 << (end - buf)) - 1);
19357 __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
19373 icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
19375 const char16_t *end = buf + len;
19376 const char16_t *start = buf;
19386 while (buf + 32 <= end) {
19387 __m512i in = _mm512_loadu_si512((__m512i *)buf);
19393 while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
19394 : uint16_t(*buf))) <= 0xff) {
19396 buf++;
19398 return std::make_pair(result(error_code::TOO_LARGE, buf - start),
19405 buf += 32;
19407 if (buf < end) {
19408 uint32_t mask(uint32_t(1 << (end - buf)) - 1);
19409 __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
19416 while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
19417 : uint16_t(*buf))) <= 0xff) {
19419 buf++;
19421 return std::make_pair(result(error_code::TOO_LARGE, buf - start),
19634 Returns a pair: the first unprocessed byte from buf and utf32_output
19638 std::tuple<const char16_t*, char32_t*, bool> convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
19639 const char16_t* end = buf + len;
19654 while (std::distance(buf,end) >= 32) {
19655 // Always safe because buf + 32 <= end so that end - buf >= 32 bytes:
19656 __m512i in = _mm512_loadu_si512((__m512i*)buf);
19722 buf += 31;
19726 return std::make_tuple(buf+carry, utf32_output, false);
19734 buf += 32;
19738 return std::make_tuple(buf+carry, utf32_output, true);
19743 size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len,
19745 const char32_t *end = buf + len;
19751 while (buf + 16 <= end) {
19752 __m512i in = _mm512_loadu_si512((__m512i *)buf);
19759 buf += 16;
19761 if (buf < end) {
19762 uint16_t mask = uint16_t((1 << (end - buf)) - 1);
19763 __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
19775 icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
19777 const char32_t *end = buf + len;
19778 const char32_t *start = buf;
19784 while (buf + 16 <= end) {
19785 __m512i in = _mm512_loadu_si512((__m512i *)buf);
19787 while (uint32_t(*buf) <= 0xff) {
19788 *latin1_output++ = uint8_t(*buf++);
19790 return std::make_pair(result(error_code::TOO_LARGE, buf - start),
19796 buf += 16;
19798 if (buf < end) {
19799 uint16_t mask = uint16_t((1 << (end - buf)) - 1);
19800 __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
19802 while (uint32_t(*buf) <= 0xff) {
19803 *latin1_output++ = uint8_t(*buf++);
19805 return std::make_pair(result(error_code::TOO_LARGE, buf - start),
19819 std::pair<const char32_t*, char*> avx512_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
19820 const char32_t* end = buf + len;
19832 while (buf + 16 + safety_margin <= end) {
19833 __m256i in = _mm256_loadu_si256((__m256i*)buf);
19834 __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
19849 buf += 16;
19901 buf += 16;
19982 buf += 16;
20014 buf += 16;
20022 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
20024 uint32_t word = buf[k];
20043 buf += k;
20055 return std::make_pair(buf, utf8_output);
20059 std::pair<result, char*> avx512_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
20060 const char32_t* end = buf + len;
20061 const char32_t* start = buf;
20073 while (buf + 16 + safety_margin <= end) {
20074 __m256i in = _mm256_loadu_si256((__m256i*)buf);
20075 __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
20079 return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
20094 buf += 16;
20146 buf += 16;
20159 return std::make_pair(result(error_code::SURROGATE, buf - start), utf8_output);
20232 buf += 16;
20264 buf += 16;
20272 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
20274 uint32_t word = buf[k];
20281 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf8_output); }
20286 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf8_output); }
20293 buf += k;
20297 return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
20305 std::pair<const char32_t*, char16_t*> avx512_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
20306 const char32_t* end = buf + len;
20312 while (buf + 8 + safety_margin <= end) {
20313 __m256i in = _mm256_loadu_si256((__m256i*)buf);
20334 buf += 8;
20338 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
20340 uint32_t word = buf[k];
20359 buf += k;
20366 return std::make_pair(buf, utf16_output);
20371 std::pair<result, char16_t*> avx512_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
20372 const char32_t* start = buf;
20373 const char32_t* end = buf + len;
20377 while (buf + 8 + safety_margin <= end) {
20378 __m256i in = _mm256_loadu_si256((__m256i*)buf);
20392 return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
20402 buf += 8;
20406 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
20408 uint32_t word = buf[k];
20411 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
20415 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
20427 buf += k;
20431 return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
20437 bool validate_ascii(const char* buf, size_t len) {
20438 const char* end = buf + len;
20441 for (; buf + 64 <= end; buf += 64) {
20442 const __m512i utf8 = _mm512_loadu_si512((const __m512i*)buf);
20445 if(buf < end) {
20446 const __m512i utf8 = _mm512_maskz_loadu_epi8((uint64_t(1) << (end-buf)) - 1,(const __m512i*)buf);
20455 const char32_t* validate_utf32(const char32_t* buf, size_t len) {
20456 const char32_t* end = len >= 16 ? buf + len - 16 : nullptr;
20462 while (buf <= end) {
20463 __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf);
20464 buf += 16;
20480 return buf;
20567 size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, char *utf8_output) {
20572 __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20577 __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
20584 __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos));
20629 std::pair<const char*, char32_t*> avx512_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
20634 __m128i in = _mm_loadu_si128((__m128i*)&buf[i]);
20644 return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
20665 const char *buf = input;
20667 const char *start = buf;
20678 while (buf + 64 <= end) {
20679 __m512i in = _mm512_loadu_si512((__m512i *)buf);
20706 buf +=
20711 buf += 32 * sizeof(char16_t);
20713 is_utf16 = validate_utf16le(reinterpret_cast<const char16_t *>(buf),
20714 (end - buf) / sizeof(char16_t));
20726 const char32_t *input32 = reinterpret_cast<const char32_t *>(buf);
20744 buf += 64;
20750 size_t current_length = static_cast<size_t>(buf - start);
20753 (1ULL << (length - current_length)) - 1, (const __m512i *)buf);
20763 reinterpret_cast<const char16_t *>(buf),
20764 (length - (buf - start)) / 2)) {
20771 (1ULL << (length - static_cast<size_t>(buf - start))) - 1,
20772 (const __m512i *)buf),
20789 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
20791 const char* ptr = buf;
20805 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
20807 const char* ptr = buf;
20815 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
20826 result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast<const char*>(buf), reinterpret_cast<const char*>(buf + count), len - count);
20835 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
20836 return icelake::validate_ascii(buf, len);
20839 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
20840 const char* buf_orig = buf;
20841 const char* end = buf + len;
20843 for (; buf + 64 <= end; buf += 64) {
20844 const __m512i input = _mm512_loadu_si512((const __m512i*)buf);
20847 return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u64(notascii));
20851 const __m512i input = _mm512_maskz_loadu_epi8((1ULL<<(end - buf))-1, (const __m512i*)buf);
20854 return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u64(notascii));
20860 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
20861 const char16_t *end = buf + len;
20863 for(;buf + 32 <= end; ) {
20864 __m512i in = _mm512_loadu_si512((__m512i*)buf);
20876 buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
20878 buf += 32;
20881 buf += 32;
20884 if(buf < end) {
20885 __m512i in = _mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf);
20900 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
20901 const char16_t *end = buf + len;
20912 for(;buf + 32 <= end; ) {
20913 __m512i in = _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i*)buf), byteflip);
20925 buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
20927 buf += 32;
20930 buf += 32;
20933 if(buf < end) {
20934 __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf), byteflip);
20949 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
20950 const char16_t *start_buf = buf;
20951 const char16_t *end = buf + len;
20952 for(;buf + 32 <= end; ) {
20953 __m512i in = _mm512_loadu_si512((__m512i*)buf);
20963 return result(error_code::SURROGATE, (buf - start_buf) + (extra_low < extra_high ? extra_low : extra_high));
20967 buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
20969 buf += 32;
20972 buf += 32;
20975 if(buf < end) {
20976 __m512i in = _mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf);
20986 return result(error_code::SURROGATE, (buf - start_buf) + (extra_low < extra_high ? extra_low : extra_high));
20993 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
20994 const char16_t *start_buf = buf;
20995 const char16_t *end = buf + len;
21006 for(;buf + 32 <= end; ) {
21007 __m512i in = _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i*)buf), byteflip);
21017 return result(error_code::SURROGATE, (buf - start_buf) + (extra_low < extra_high ? extra_low : extra_high));
21021 buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
21023 buf += 32;
21026 buf += 32;
21029 if(buf < end) {
21030 __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf), byteflip);
21040 return result(error_code::SURROGATE, (buf - start_buf) + (extra_low < extra_high ? extra_low : extra_high));
21047 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
21048 const char32_t * tail = icelake::validate_utf32(buf, len);
21050 return scalar::utf32::validate(tail, len - (tail - buf));
21056 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
21058 const char32_t* end = len >= 16 ? buf + len - 16 : nullptr;
21059 const char32_t* buf_orig = buf;
21060 while (buf <= end) {
21061 __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf);
21065 return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u32(outside_range));
21073 return result(error_code::SURROGATE, buf - buf_orig + _tzcnt_u32(surrogate_range));
21075 buf += 16;
21077 if(buf < buf_orig + len) {
21078 __m512i utf32 = _mm512_maskz_loadu_epi32(__mmask16((1<<(buf_orig + len - buf))-1),(const __m512i*)buf);
21082 return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u32(outside_range));
21089 return result(error_code::SURROGATE, buf - buf_orig + _tzcnt_u32(surrogate_range));
21096 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
21097 return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output);
21100 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21101 return icelake_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
21104 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21105 return icelake_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
21108 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
21109 std::pair<const char*, char32_t*> ret = avx512_convert_latin1_to_utf32(buf, len, utf32_output);
21112 if (ret.first != buf + len) {
21114 ret.first, len - (ret.first - buf), ret.second);
21121 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
21122 return icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
21126 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
21131 inlen = icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
21142 result res = scalar::utf8_to_latin1::convert_with_errors(buf + inlen, len - inlen, latin1_output + inlen);
21149 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
21150 return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output);
21153 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21154 utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
21161 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21162 utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16<endianness::BIG>(buf, len, utf16_output);
21169 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21170 return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::LITTLE>(buf, len, utf16_output);
21173 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21174 return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::BIG>(buf, len, utf16_output);
21177 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21178 utf8_to_utf16_result ret = icelake::valid_utf8_to_fixed_length<endianness::LITTLE, char16_t>(buf, len, utf16_output);
21180 const char* end = buf + len;
21196 ret.first, len - (ret.first - buf), ret.second);
21204 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
21205 utf8_to_utf16_result ret = icelake::valid_utf8_to_fixed_length<endianness::BIG, char16_t>(buf, len, utf16_output);
21207 const char* end = buf + len;
21223 ret.first, len - (ret.first - buf), ret.second);
21232 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_out) const noexcept {
21234 utf8_to_utf32_result ret = icelake::validating_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21239 const char* end = buf + len;
21255 ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
21263 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32) const noexcept {
21265 auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21269 // with the ability to go back up to new_buf - buf bytes, and read len - (new_buf - buf) bytes forward.
21270 result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(new_buf - buf, new_buf, len - (new_buf - buf), reinterpret_cast<char32_t *>(std::get<1>(ret)));
21271 res.count += (std::get<0>(ret) - buf);
21275 const char* end = buf + len;
21291 std::get<0>(ret), len - (std::get<0>(ret) - buf), reinterpret_cast<char32_t *>(utf32_output) + saved_bytes);
21293 scalar_result.count += (std::get<0>(ret) - buf);
21304 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_out) const noexcept {
21306 utf8_to_utf32_result ret = icelake::valid_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(buf, len, utf32_output);
21308 const char* end = buf + len;
21324 ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
21333 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21334 return icelake_convert_utf16_to_latin1<endianness::LITTLE>(buf,len,latin1_output);
21337 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21338 return icelake_convert_utf16_to_latin1<endianness::BIG>(buf,len,latin1_output);
21341 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21342 return icelake_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(buf,len,latin1_output).first;
21345 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21346 return icelake_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf,len,latin1_output).first;
21349 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21351 return convert_utf16be_to_latin1(buf, len, latin1_output);
21354 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
21356 return convert_utf16le_to_latin1(buf, len, latin1_output);
21359 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21361 size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(buf, len, (unsigned char*)utf8_output, &outlen);
21366 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21368 size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(buf, len, (unsigned char*)utf8_output, &outlen);
21373 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21375 size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(buf, len, (unsigned char*)utf8_output, &outlen);
21377 result res = scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(buf + inlen, len - outlen, utf8_output + outlen);
21384 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21386 size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(buf, len, (unsigned char*)utf8_output, &outlen);
21388 result res = scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(buf + inlen, len - outlen, utf8_output + outlen);
21395 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21396 return convert_utf16le_to_utf8(buf, len, utf8_output);
21399 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
21400 return convert_utf16be_to_utf8(buf, len, utf8_output);
21403 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21404 return icelake_convert_utf32_to_latin1(buf,len,latin1_output);
21407 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21408 return icelake_convert_utf32_to_latin1_with_errors(buf,len,latin1_output).first;
21411 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
21412 return icelake_convert_utf32_to_latin1(buf,len,latin1_output);
21416 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21417 std::pair<const char32_t*, char*> ret = avx512_convert_utf32_to_utf8(buf, len, utf8_output);
21420 if (ret.first != buf + len) {
21422 ret.first, len - (ret.first - buf), ret.second);
21429 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21431 std::pair<result, char*> ret = icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
21434 buf + ret.first.count, len - ret.first.count, ret.second);
21446 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
21447 return convert_utf32_to_utf8(buf, len, utf8_output);
21450 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21451 std::pair<const char32_t*, char16_t*> ret = avx512_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
21454 if (ret.first != buf + len) {
21456 ret.first, len - (ret.first - buf), ret.second);
21463 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21464 std::pair<const char32_t*, char16_t*> ret = avx512_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
21467 if (ret.first != buf + len) {
21469 ret.first, len - (ret.first - buf), ret.second);
21476 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21478 std::pair<result, char16_t*> ret = avx512_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(buf, len, utf16_output);
21481 buf + ret.first.count, len - ret.first.count, ret.second);
21493 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21495 std::pair<result, char16_t*> ret = avx512_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len, utf16_output);
21498 buf + ret.first.count, len - ret.first.count, ret.second);
21510 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21511 return convert_utf32_to_utf16le(buf, len, utf16_output);
21514 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
21515 return convert_utf32_to_utf16be(buf, len, utf16_output);
21518 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21519 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21522 if (std::get<0>(ret) != buf + len) {
21524 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21531 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21532 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21535 if (std::get<0>(ret) != buf + len) {
21537 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21544 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21545 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21548 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21549 scalar_res.count += (std::get<0>(ret) - buf);
21553 if (std::get<0>(ret) != buf + len) {
21555 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21557 scalar_res.count += (std::get<0>(ret) - buf);
21567 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21568 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21571 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21572 scalar_res.count += (std::get<0>(ret) - buf);
21576 if (std::get<0>(ret) != buf + len) {
21578 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21580 scalar_res.count += (std::get<0>(ret) - buf);
21590 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21591 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
21594 if (std::get<0>(ret) != buf + len) {
21596 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21603 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
21604 std::tuple<const char16_t*, char32_t*, bool> ret = icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
21607 if (std::get<0>(ret) != buf + len) {
21609 std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
21750 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
21751 return count_utf8(buf,len);
22040 int avx2_detect_encodings(const char * buf, size_t len) {
22041 const char* start = buf;
22042 const char* end = buf + len;
22057 while(buf + 64 <= end) {
22058 __m256i in = _mm256_loadu_si256((__m256i*)buf);
22059 __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
22085 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
22150 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
22192 buf += 64;
22198 if (static_cast<size_t>(buf - start) != len) {
22201 std::memcpy(block, buf, len - (buf - start));
22210 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
22217 if (_mm256_testz_si256(is_zero, is_zero) == 1 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
22610 std::pair<const char*, char32_t*> avx2_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
22615 __m128i in = _mm_loadl_epi64((__m128i*)&buf[i]);
22625 return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
22943 avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len,
22945 const char16_t *end = buf + len;
22946 while (buf + 16 <= end) {
22948 __m256i in = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf));
22969 buf += 16;
22975 return std::make_pair(buf, latin1_output);
22980 avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
22982 const char16_t *start = buf;
22983 const char16_t *end = buf + len;
22984 while (buf + 16 <= end) {
22985 __m256i in = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf));
23004 buf += 16;
23010 ? scalar::utf16::swap_bytes(buf[k])
23011 : buf[k];
23016 result{error_code::TOO_LARGE, (size_t)(buf - start + k)},
23020 buf += 16;
23023 return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)},
23079 Returns a pair: the first unprocessed byte from buf and utf8_output
23083 std::pair<const char16_t*, char*> avx2_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_output) {
23084 const char16_t* end = buf + len;
23091 while (buf + 16 + safety_margin <= end) {
23092 __m256i in = _mm256_loadu_si256((__m256i*)buf);
23106 buf += 16;
23159 buf += 16;
23244 buf += 16;
23276 buf += 16;
23284 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
23286 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23299 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23310 buf += k;
23313 return std::make_pair(buf, utf8_output);
23320 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
23324 std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
23325 const char16_t* start = buf;
23326 const char16_t* end = buf + len;
23334 while (buf + 16 + safety_margin <= end) {
23335 __m256i in = _mm256_loadu_si256((__m256i*)buf);
23349 buf += 16;
23402 buf += 16;
23487 buf += 16;
23519 buf += 16;
23527 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
23529 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23542 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23545 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf8_output); }
23553 buf += k;
23556 return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
23611 Returns a pair: the first unprocessed byte from buf and utf32_output
23615 std::pair<const char16_t*, char32_t*> avx2_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
23616 const char16_t* end = buf + len;
23620 while (buf + 16 <= end) {
23621 __m256i in = _mm256_loadu_si256((__m256i*)buf);
23643 buf += 16;
23651 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
23653 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23660 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23668 buf += k;
23671 return std::make_pair(buf, utf32_output);
23678 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
23682 std::pair<result, char32_t*> avx2_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
23683 const char16_t* start = buf;
23684 const char16_t* end = buf + len;
23688 while (buf + 16 <= end) {
23689 __m256i in = _mm256_loadu_si256((__m256i*)buf);
23711 buf += 16;
23719 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
23721 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
23728 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
23731 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf32_output); }
23736 buf += k;
23739 return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output);
23745 avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len,
23757 __m256i in1 = _mm256_loadu_si256((__m256i *)buf);
23758 __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8));
23781 buf += 16;
23784 return std::make_pair(buf, latin1_output);
23787 avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
23797 const char32_t *start = buf;
23800 __m256i in1 = _mm256_loadu_si256((__m256i *)buf);
23801 __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8));
23808 char32_t codepoint = buf[k];
23812 return std::make_pair(result(error_code::TOO_LARGE, buf - start + k),
23816 buf += 8;
23830 buf += 16;
23834 return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
23838 std::pair<const char32_t*, char*> avx2_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
23839 const char32_t* end = buf + len;
23851 while (buf + 16 + safety_margin <= end) {
23852 __m256i in = _mm256_loadu_si256((__m256i*)buf);
23853 __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
23868 buf += 16;
23920 buf += 16;
24001 buf += 16;
24033 buf += 16;
24041 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
24043 uint32_t word = buf[k];
24062 buf += k;
24074 return std::make_pair(buf, utf8_output);
24078 std::pair<result, char*> avx2_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
24079 const char32_t* end = buf + len;
24080 const char32_t* start = buf;
24092 while (buf + 16 + safety_margin <= end) {
24093 __m256i in = _mm256_loadu_si256((__m256i*)buf);
24094 __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
24098 return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
24113 buf += 16;
24165 buf += 16;
24178 return std::make_pair(result(error_code::SURROGATE, buf - start), utf8_output);
24251 buf += 16;
24283 buf += 16;
24291 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
24293 uint32_t word = buf[k];
24300 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf8_output); }
24305 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf8_output); }
24312 buf += k;
24316 return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
24321 std::pair<const char32_t*, char16_t*> avx2_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
24322 const char32_t* end = buf + len;
24328 while (buf + 8 + safety_margin <= end) {
24329 __m256i in = _mm256_loadu_si256((__m256i*)buf);
24350 buf += 8;
24354 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
24356 uint32_t word = buf[k];
24375 buf += k;
24382 return std::make_pair(buf, utf16_output);
24387 std::pair<result, char16_t*> avx2_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
24388 const char32_t* start = buf;
24389 const char32_t* end = buf + len;
24393 while (buf + 8 + safety_margin <= end) {
24394 __m256i in = _mm256_loadu_si256((__m256i*)buf);
24408 return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
24418 buf += 8;
24422 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
24424 uint32_t word = buf[k];
24427 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
24431 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
24443 buf += k;
24447 return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
24553 const uint8_t *buf;
24561 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
24563 buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
24565 buf[sizeof(simd8x64<uint8_t>)] = '\0';
24566 return buf;
24571 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
24572 in.store(reinterpret_cast<uint8_t*>(buf));
24574 if (buf[i] < ' ') { buf[i] = '_'; }
24576 buf[sizeof(simd8x64<uint8_t>)] = '\0';
24577 return buf;
24581 static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
24583 buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
24585 buf[64] = '\0';
24586 return buf;
24590 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
24602 return &buf[idx];
24609 std::memcpy(dst, buf + idx, len - idx);
26163 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
26164 return haswell::utf8_validation::generic_validate_utf8(buf,len);
26167 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
26168 return haswell::utf8_validation::generic_validate_utf8_with_errors(buf,len);
26171 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
26172 return haswell::utf8_validation::generic_validate_ascii(buf,len);
26175 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
26176 return haswell::utf8_validation::generic_validate_ascii_with_errors(buf,len);
26179 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
26180 const char16_t* tail = avx2_validate_utf16<endianness::LITTLE>(buf, len);
26182 return scalar::utf16::validate<endianness::LITTLE>(tail, len - (tail - buf));
26188 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
26189 const char16_t* tail = avx2_validate_utf16<endianness::BIG>(buf, len);
26191 return scalar::utf16::validate<endianness::BIG>(tail, len - (tail - buf));
26197 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
26198 result res = avx2_validate_utf16_with_errors<endianness::LITTLE>(buf, len);
26200 result scalar_res = scalar::utf16::validate_with_errors<endianness::LITTLE>(buf + res.count, len - res.count);
26207 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
26208 result res = avx2_validate_utf16_with_errors<endianness::BIG>(buf, len);
26210 result scalar_res = scalar::utf16::validate_with_errors<endianness::BIG>(buf + res.count, len - res.count);
26217 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
26218 const char32_t* tail = avx2_validate_utf32le(buf, len);
26220 return scalar::utf32::validate(tail, len - (tail - buf));
26226 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
26227 result res = avx2_validate_utf32le_with_errors(buf, len);
26229 result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count);
26236 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
26237 std::pair<const char*, char*> ret = avx2_convert_latin1_to_utf8(buf, len, utf8_output);
26240 if (ret.first != buf + len) {
26242 ret.first, len - (ret.first - buf), ret.second);
26249 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26250 std::pair<const char*, char16_t*> ret = avx2_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
26253 if (ret.first != buf + len) {
26255 ret.first, len - (ret.first - buf), ret.second);
26262 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26263 std::pair<const char*, char16_t*> ret = avx2_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
26266 if (ret.first != buf + len) {
26268 ret.first, len - (ret.first - buf), ret.second);
26275 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26276 std::pair<const char*, char32_t*> ret = avx2_convert_latin1_to_utf32(buf, len, utf32_output);
26279 if (ret.first != buf + len) {
26281 ret.first, len - (ret.first - buf), ret.second);
26288 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
26290 return converter.convert(buf, len, latin1_output);
26293 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
26295 return converter.convert_with_errors(buf, len, latin1_output);
26303 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26305 return converter.convert<endianness::LITTLE>(buf, len, utf16_output);
26308 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26310 return converter.convert<endianness::BIG>(buf, len, utf16_output);
26313 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26315 return converter.convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
26318 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
26320 return converter.convert_with_errors<endianness::BIG>(buf, len, utf16_output);
26333 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26335 return converter.convert(buf, len, utf32_output);
26338 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
26340 return converter.convert_with_errors(buf, len, utf32_output);
26349 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26350 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
26353 if (ret.first != buf + len) {
26355 ret.first, len - (ret.first - buf), ret.second);
26362 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26363 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
26366 if (ret.first != buf + len) {
26368 ret.first, len - (ret.first - buf), ret.second);
26375 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26376 std::pair<result, char*> ret = avx2_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(buf, len, latin1_output);
26380 buf + ret.first.count, len - ret.first.count, ret.second);
26392 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26393 std::pair<result, char*> ret = avx2_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf, len, latin1_output);
26397 buf + ret.first.count, len - ret.first.count, ret.second);
26409 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26411 return convert_utf16be_to_latin1(buf, len, latin1_output);
26414 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
26416 return convert_utf16le_to_latin1(buf, len, latin1_output);
26419 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26420 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
26423 if (ret.first != buf + len) {
26425 ret.first, len - (ret.first - buf), ret.second);
26432 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26433 std::pair<const char16_t*, char*> ret = haswell::avx2_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
26436 if (ret.first != buf + len) {
26438 ret.first, len - (ret.first - buf), ret.second);
26445 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26447 std::pair<result, char*> ret = haswell::avx2_convert_utf16_to_utf8_with_errors<endianness::LITTLE>(buf, len, utf8_output);
26451 buf + ret.first.count, len - ret.first.count, ret.second);
26463 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26465 std::pair<result, char*> ret = haswell::avx2_convert_utf16_to_utf8_with_errors<endianness::BIG>(buf, len, utf8_output);
26469 buf + ret.first.count, len - ret.first.count, ret.second);
26481 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26482 return convert_utf16le_to_utf8(buf, len, utf8_output);
26485 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
26486 return convert_utf16be_to_utf8(buf, len, utf8_output);
26489 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26490 std::pair<const char32_t*, char*> ret = avx2_convert_utf32_to_utf8(buf, len, utf8_output);
26493 if (ret.first != buf + len) {
26495 ret.first, len - (ret.first - buf), ret.second);
26502 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26503 std::pair<const char32_t*, char*> ret = avx2_convert_utf32_to_latin1(buf, len, latin1_output);
26506 if (ret.first != buf + len) {
26508 ret.first, len - (ret.first - buf), ret.second);
26515 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26517 std::pair<result, char*> ret = avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
26520 buf + ret.first.count, len - ret.first.count, ret.second);
26532 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
26533 return convert_utf32_to_latin1(buf,len,latin1_output);
26536 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26538 std::pair<result, char*> ret = haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
26541 buf + ret.first.count, len - ret.first.count, ret.second);
26553 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26554 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
26557 if (ret.first != buf + len) {
26559 ret.first, len - (ret.first - buf), ret.second);
26566 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26567 std::pair<const char16_t*, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
26570 if (ret.first != buf + len) {
26572 ret.first, len - (ret.first - buf), ret.second);
26579 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26581 std::pair<result, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
26585 buf + ret.first.count, len - ret.first.count, ret.second);
26597 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26599 std::pair<result, char32_t*> ret = haswell::avx2_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
26603 buf + ret.first.count, len - ret.first.count, ret.second);
26615 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
26616 return convert_utf32_to_utf8(buf, len, utf8_output);
26619 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26620 std::pair<const char32_t*, char16_t*> ret = avx2_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
26623 if (ret.first != buf + len) {
26625 ret.first, len - (ret.first - buf), ret.second);
26632 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26633 std::pair<const char32_t*, char16_t*> ret = avx2_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
26636 if (ret.first != buf + len) {
26638 ret.first, len - (ret.first - buf), ret.second);
26645 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26647 std::pair<result, char16_t*> ret = haswell::avx2_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(buf, len, utf16_output);
26650 buf + ret.first.count, len - ret.first.count, ret.second);
26662 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26664 std::pair<result, char16_t*> ret = haswell::avx2_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len, utf16_output);
26667 buf + ret.first.count, len - ret.first.count, ret.second);
26679 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26680 return convert_utf32_to_utf16le(buf, len, utf16_output);
26683 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
26684 return convert_utf32_to_utf16be(buf, len, utf16_output);
26687 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26688 return convert_utf16le_to_utf32(buf, len, utf32_output);
26691 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
26692 return convert_utf16be_to_utf32(buf, len, utf32_output);
26711 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
26712 return count_utf8(buf,len);
26925 const uint8_t *buf;
26933 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
26935 buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
26937 buf[sizeof(simd8x64<uint8_t>)] = '\0';
26938 return buf;
26943 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
26944 in.store(reinterpret_cast<uint8_t*>(buf));
26946 if (buf[i] < ' ') { buf[i] = '_'; }
26948 buf[sizeof(simd8x64<uint8_t>)] = '\0';
26949 return buf;
26953 static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
26955 buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
26957 buf[64] = '\0';
26958 return buf;
26962 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
26974 return &buf[idx];
26981 std::memcpy(dst, buf + idx, len - idx);
28162 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
28163 return ppc64::utf8_validation::generic_validate_utf8(buf,len);
28166 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
28167 return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf,len);
28170 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
28171 return ppc64::utf8_validation::generic_validate_ascii(buf,len);
28174 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
28175 return ppc64::utf8_validation::generic_validate_ascii_with_errors(buf,len);
28178 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
28179 return scalar::utf16::validate<endianness::LITTLE>(buf, len);
28182 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
28183 return scalar::utf16::validate<endianness::BIG>(buf, len);
28186 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
28187 return scalar::utf16::validate_with_errors<endianness::LITTLE>(buf, len);
28190 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
28191 return scalar::utf16::validate_with_errors<endianness::BIG>(buf, len);
28194 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
28195 return scalar::utf32::validate_with_errors(buf, len);
28198 simdutf_warn_unused bool implementation::validate_utf32(const char16_t *buf, size_t len) const noexcept {
28199 return scalar::utf32::validate(buf, len);
28202 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28206 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28210 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28214 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28218 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28222 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* /*buf*/, size_t /*len*/, char16_t* /*utf16_output*/) const noexcept {
28226 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28230 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28234 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* /*buf*/, size_t /*len*/, char32_t* /*utf16_output*/) const noexcept {
28238 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28239 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(buf, len, utf8_output);
28242 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28243 return scalar::utf16_to_utf8::convert<endianness::BIG>(buf, len, utf8_output);
28246 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28247 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(buf, len, utf8_output);
28250 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28251 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(buf, len, utf8_output);
28254 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28255 return scalar::utf16_to_utf8::convert_valid<endianness::LITTLE>(buf, len, utf8_output);
28258 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
28259 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(buf, len, utf8_output);
28262 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28263 return scalar::utf32_to_utf8::convert(buf, len, utf8_output);
28266 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28267 return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output);
28270 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
28271 return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output);
28274 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28275 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(buf, len, utf16_output);
28278 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28279 return scalar::utf32_to_utf16::convert<endianness::BIG>(buf, len, utf16_output);
28282 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28283 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
28286 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28287 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(buf, len, utf16_output);
28290 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28291 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(buf, len, utf16_output);
28294 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
28295 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(buf, len, utf16_output);
28298 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28299 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(buf, len, utf32_output);
28302 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28303 return scalar::utf16_to_utf32::convert<endianness::BIG>(buf, len, utf32_output);
28306 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28307 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(buf, len, utf32_output);
28310 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28311 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(buf, len, utf32_output);
28314 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28315 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(buf, len, utf32_output);
28318 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
28319 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(buf, len, utf32_output);
28497 int sse_detect_encodings(const char * buf, size_t len) {
28498 const char* start = buf;
28499 const char* end = buf + len;
28514 while(buf + 64 <= end) {
28515 __m128i in = _mm_loadu_si128((__m128i*)buf);
28516 __m128i secondin = _mm_loadu_si128((__m128i*)buf+1);
28517 __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2);
28518 __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3);
28552 const char16_t * input = reinterpret_cast<const char16_t*>(buf);
28621 const char32_t * input = reinterpret_cast<const char32_t*>(buf);
28669 buf += 64;
28675 if (static_cast<size_t>(buf - start) != len) {
28678 std::memcpy(block, buf, len - (buf - start));
28687 if (is_utf16 && scalar::utf16::validate<endianness::LITTLE>(reinterpret_cast<const char16_t*>(buf), (len - (buf - start))/2)) {
28694 if (_mm_testz_si128(is_zero, is_zero) == 1 && scalar::utf32::validate(reinterpret_cast<const char32_t*>(buf), (len - (buf - start))/4)) {
29072 std::pair<const char*, char32_t*> sse_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
29073 const char* end = buf + len;
29075 while (buf + 16 <= end) {
29077 __m128i in = _mm_loadu_si128((__m128i*)buf);
29096 buf += 16;
29099 return std::make_pair(buf, utf32_output);
29478 std::pair<const char16_t*, char*> sse_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
29479 const char16_t* end = buf + len;
29480 while (buf + 8 <= end) {
29482 __m128i in = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buf));
29495 buf += 8;
29501 return std::make_pair(buf, latin1_output);
29505 std::pair<result, char*> sse_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
29506 const char16_t* start = buf;
29507 const char16_t* end = buf + len;
29508 while (buf + 8 <= end) {
29509 __m128i in = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buf));
29520 buf += 8;
29525 uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29529 return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
29532 buf += 8;
29535 return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
29589 Returns a pair: the first unprocessed byte from buf and utf8_output
29593 std::pair<const char16_t*, char*> sse_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_output) {
29595 const char16_t* end = buf + len;
29602 while (buf + 16 + safety_margin <= end) {
29603 __m128i in = _mm_loadu_si128((__m128i*)buf);
29611 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
29623 buf += 8;
29633 buf += 16;
29649 buf += 8;
29728 buf += 8;
29748 buf += 8;
29756 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
29758 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29771 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
29782 buf += k;
29786 return std::make_pair(buf, utf8_output);
29793 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
29797 std::pair<result, char*> sse_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) {
29798 const char16_t* start = buf;
29799 const char16_t* end = buf + len;
29806 while (buf + 16 + safety_margin <= end) {
29807 __m128i in = _mm_loadu_si128((__m128i*)buf);
29815 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
29827 buf += 8;
29837 buf += 16;
29853 buf += 8;
29932 buf += 8;
29952 buf += 8;
29960 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
29962 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
29975 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
29978 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf8_output); }
29986 buf += k;
29990 return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
30044 Returns a pair: the first unprocessed byte from buf and utf8_output
30048 std::pair<const char16_t*, char32_t*> sse_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) {
30049 const char16_t* end = buf + len;
30054 while (buf + 8 <= end) {
30055 __m128i in = _mm_loadu_si128((__m128i*)buf);
30077 buf += 8;
30085 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30087 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
30093 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
30101 buf += k;
30104 return std::make_pair(buf, utf32_output);
30111 Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
30115 std::pair<result, char32_t*> sse_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) {
30116 const char16_t* start = buf;
30117 const char16_t* end = buf + len;
30122 while (buf + 8 <= end) {
30123 __m128i in = _mm_loadu_si128((__m128i*)buf);
30145 buf += 8;
30153 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30155 uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
30161 uint16_t next_word = big_endian ? scalar::utf16::swap_bytes(buf[k+1]) : buf[k+1];
30164 if((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), utf32_output); }
30169 buf += k;
30172 return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output);
30178 sse_convert_utf32_to_latin1(const char32_t *buf, size_t len,
30187 __m128i in1 = _mm_loadu_si128((__m128i *)buf);
30188 __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4));
30189 __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8));
30190 __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12));
30204 buf += 16;
30207 return std::make_pair(buf, latin1_output);
30211 sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
30213 const char32_t *start = buf;
30221 __m128i in1 = _mm_loadu_si128((__m128i *)buf);
30222 __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4));
30223 __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8));
30224 __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12));
30233 char32_t codepoint = buf[k];
30237 return std::make_pair(result(error_code::TOO_LARGE, buf - start + k),
30241 buf += 16;
30249 buf += 16;
30252 return std::make_pair(result(error_code::SUCCESS, buf - start),
30257 std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
30258 const char32_t* end = buf + len;
30270 while (buf + 16 + safety_margin <= end) { //buf is a char32_t pointer, each char32_t has 4 bytes or 32 bits, thus buf + 16 * char_32t = 512 bits = 64 bytes
30272 __m128i in = _mm_loadu_si128((__m128i*)buf);
30273 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);//These two values can hold only 8 UTF32 chars
30296 __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2);
30297 __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3);
30307 buf += 8; //the char32_t buffer pointer goes up 8 char32_t chars* 32 bits = 256 bits
30320 buf += 16;
30374 buf += 8;
30451 buf += 8;
30471 buf += 8;
30479 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30481 uint32_t word = buf[k];
30500 buf += k;
30512 return std::make_pair(buf, utf8_output);
30516 std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) {
30518 const char32_t* end = buf + len;
30519 const char32_t* start = buf;
30531 while (buf + 16 + safety_margin <= end) {
30533 __m128i in = _mm_loadu_si128((__m128i*)buf);
30534 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
30539 return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
30553 __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2);
30554 __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3);
30563 buf += 8;
30569 return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
30580 buf += 16;
30630 buf += 8;
30647 return std::make_pair(result(error_code::SURROGATE, buf - start), utf8_output);
30713 buf += 8;
30733 buf += 8;
30741 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30743 uint32_t word = buf[k];
30750 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf8_output); }
30755 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf- start + k), utf8_output); }
30762 buf += k;
30766 return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
30771 std::pair<const char32_t*, char16_t*> sse_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
30773 const char32_t* end = buf + len;
30779 while (buf + 8 <= end) {
30780 __m128i in = _mm_loadu_si128((__m128i*)buf);
30781 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
30801 buf += 8;
30805 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30807 uint32_t word = buf[k];
30826 buf += k;
30833 return std::make_pair(buf, utf16_output);
30838 std::pair<result, char16_t*> sse_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
30839 const char32_t* start = buf;
30840 const char32_t* end = buf + len;
30845 while (buf + 8 <= end) {
30846 __m128i in = _mm_loadu_si128((__m128i*)buf);
30847 __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
30860 return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
30870 buf += 8;
30874 if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
30876 uint32_t word = buf[k];
30879 if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
30883 if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
30895 buf += k;
30899 return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
30932 const uint8_t *buf;
30940 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
30942 buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
30944 buf[sizeof(simd8x64<uint8_t>)] = '\0';
30945 return buf;
30950 static char *buf = reinterpret_cast<char*>(malloc(sizeof(simd8x64<uint8_t>) + 1));
30951 in.store(reinterpret_cast<uint8_t*>(buf));
30953 if (buf[i] < ' ') { buf[i] = '_'; }
30955 buf[sizeof(simd8x64<uint8_t>)] = '\0';
30956 return buf;
30960 static char *buf = reinterpret_cast<char*>(malloc(64 + 1));
30962 buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
30964 buf[64] = '\0';
30965 return buf;
30969 simdutf_really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
30981 return &buf[idx];
30988 std::memcpy(dst, buf + idx, len - idx);
32545 simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
32546 return westmere::utf8_validation::generic_validate_utf8(buf, len);
32549 simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
32550 return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len);
32553 simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
32554 return westmere::utf8_validation::generic_validate_ascii(buf, len);
32557 simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
32558 return westmere::utf8_validation::generic_validate_ascii_with_errors(buf,len);
32561 simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
32562 const char16_t* tail = sse_validate_utf16<endianness::LITTLE>(buf, len);
32564 return scalar::utf16::validate<endianness::LITTLE>(tail, len - (tail - buf));
32570 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
32571 const char16_t* tail = sse_validate_utf16<endianness::BIG>(buf, len);
32573 return scalar::utf16::validate<endianness::BIG>(tail, len - (tail - buf));
32579 simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
32580 result res = sse_validate_utf16_with_errors<endianness::LITTLE>(buf, len);
32582 result scalar_res = scalar::utf16::validate_with_errors<endianness::LITTLE>(buf + res.count, len - res.count);
32589 simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
32590 result res = sse_validate_utf16_with_errors<endianness::BIG>(buf, len);
32592 result scalar_res = scalar::utf16::validate_with_errors<endianness::BIG>(buf + res.count, len - res.count);
32599 simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
32600 const char32_t* tail = sse_validate_utf32le(buf, len);
32602 return scalar::utf32::validate(tail, len - (tail - buf));
32608 simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
32609 result res = sse_validate_utf32le_with_errors(buf, len);
32611 result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count);
32618 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
32620 std::pair<const char*, char*> ret = sse_convert_latin1_to_utf8(buf, len, utf8_output);
32623 if (ret.first != buf + len) {
32625 ret.first, len - (ret.first - buf), ret.second);
32632 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32633 std::pair<const char*, char16_t*> ret = sse_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
32636 if (ret.first != buf + len) {
32638 ret.first, len - (ret.first - buf), ret.second);
32645 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32646 std::pair<const char*, char16_t*> ret = sse_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
32649 if (ret.first != buf + len) {
32651 ret.first, len - (ret.first - buf), ret.second);
32658 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32659 std::pair<const char*, char32_t*> ret = sse_convert_latin1_to_utf32(buf, len, utf32_output);
32662 if (ret.first != buf + len) {
32664 ret.first, len - (ret.first - buf), ret.second);
32672 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
32674 return converter.convert(buf, len, latin1_output);
32677 simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
32679 return converter.convert_with_errors(buf, len, latin1_output);
32682 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
32683 return westmere::utf8_to_latin1::convert_valid(buf,len,latin1_output);
32686 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32688 return converter.convert<endianness::LITTLE>(buf, len, utf16_output);
32691 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32693 return converter.convert<endianness::BIG>(buf, len, utf16_output);
32696 simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32698 return converter.convert_with_errors<endianness::LITTLE>(buf, len, utf16_output);
32701 simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
32703 return converter.convert_with_errors<endianness::BIG>(buf, len, utf16_output);
32717 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32719 return converter.convert(buf, len, utf32_output);
32722 simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
32724 return converter.convert_with_errors(buf, len, utf32_output);
32732 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32733 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
32737 if (ret.first != buf + len) {
32739 ret.first, len - (ret.first - buf), ret.second);
32746 simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32747 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
32751 if (ret.first != buf + len) {
32753 ret.first, len - (ret.first - buf), ret.second);
32760 simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32761 std::pair<result, char*> ret = sse_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(buf, len, latin1_output);
32765 buf + ret.first.count, len - ret.first.count, ret.second);
32777 simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32778 std::pair<result, char*> ret = sse_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf, len, latin1_output);
32782 buf + ret.first.count, len - ret.first.count, ret.second);
32795 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32797 return convert_utf16be_to_latin1(buf, len, latin1_output);
32800 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
32802 return convert_utf16le_to_latin1(buf, len, latin1_output);
32805 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32806 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
32809 if (ret.first != buf + len) {
32811 ret.first, len - (ret.first - buf), ret.second);
32818 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32819 std::pair<const char16_t*, char*> ret = sse_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
32822 if (ret.first != buf + len) {
32824 ret.first, len - (ret.first - buf), ret.second);
32831 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32833 std::pair<result, char*> ret = westmere::sse_convert_utf16_to_utf8_with_errors<endianness::LITTLE>(buf, len, utf8_output);
32837 buf + ret.first.count, len - ret.first.count, ret.second);
32849 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32851 std::pair<result, char*> ret = westmere::sse_convert_utf16_to_utf8_with_errors<endianness::BIG>(buf, len, utf8_output);
32855 buf + ret.first.count, len - ret.first.count, ret.second);
32867 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32868 return convert_utf16le_to_utf8(buf, len, utf8_output);
32871 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
32872 return convert_utf16be_to_utf8(buf, len, utf8_output);
32875 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32876 std::pair<const char32_t*, char*> ret = sse_convert_utf32_to_latin1(buf, len, latin1_output);
32879 // if (ret.first != buf + len) {
32880 if (ret.first < buf + len) {
32882 ret.first, len - (ret.first - buf), ret.second);
32890 simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32892 std::pair<result, char*> ret = westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
32895 buf + ret.first.count, len - ret.first.count, ret.second);
32907 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
32909 return convert_utf32_to_latin1(buf,len,latin1_output);
32912 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
32913 std::pair<const char32_t*, char*> ret = sse_convert_utf32_to_utf8(buf, len, utf8_output);
32916 if (ret.first != buf + len) {
32918 ret.first, len - (ret.first - buf), ret.second);
32925 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
32927 std::pair<result, char*> ret = westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
32930 buf + ret.first.count, len - ret.first.count, ret.second);
32942 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32943 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
32946 if (ret.first != buf + len) {
32948 ret.first, len - (ret.first - buf), ret.second);
32955 simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32956 std::pair<const char16_t*, char32_t*> ret = sse_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
32959 if (ret.first != buf + len) {
32961 ret.first, len - (ret.first - buf), ret.second);
32968 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32970 std::pair<result, char32_t*> ret = westmere::sse_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(buf, len, utf32_output);
32974 buf + ret.first.count, len - ret.first.count, ret.second);
32986 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
32988 std::pair<result, char32_t*> ret = westmere::sse_convert_utf16_to_utf32_with_errors<endianness::BIG>(buf, len, utf32_output);
32992 buf + ret.first.count, len - ret.first.count, ret.second);
33004 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
33005 return convert_utf32_to_utf8(buf, len, utf8_output);
33008 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33009 std::pair<const char32_t*, char16_t*> ret = sse_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
33012 if (ret.first != buf + len) {
33014 ret.first, len - (ret.first - buf), ret.second);
33021 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33022 std::pair<const char32_t*, char16_t*> ret = sse_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
33025 if (ret.first != buf + len) {
33027 ret.first, len - (ret.first - buf), ret.second);
33034 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33036 std::pair<result, char16_t*> ret = westmere::sse_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(buf, len, utf16_output);
33039 buf + ret.first.count, len - ret.first.count, ret.second);
33051 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33053 std::pair<result, char16_t*> ret = westmere::sse_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len, utf16_output);
33056 buf + ret.first.count, len - ret.first.count, ret.second);
33068 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33069 return convert_utf32_to_utf16le(buf, len, utf16_output);
33072 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
33073 return convert_utf32_to_utf16be(buf, len, utf16_output);
33076 simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33077 return convert_utf16le_to_utf32(buf, len, utf32_output);
33080 simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
33081 return convert_utf16be_to_utf32(buf, len, utf32_output);
33100 simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
33101 return count_utf8(buf,len);