1 /* auto-generated on 2023-12-01 13:59:01 -0500. Do not edit! */
2 /* begin file include/simdutf.h */
3 #ifndef SIMDUTF_H
4 #define SIMDUTF_H
5 #include <cstring>
6 
7 /* begin file include/simdutf/compiler_check.h */
8 #ifndef SIMDUTF_COMPILER_CHECK_H
9 #define SIMDUTF_COMPILER_CHECK_H
10 
11 #ifndef __cplusplus
12 #error simdutf requires a C++ compiler
13 #endif
14 
15 #ifndef SIMDUTF_CPLUSPLUS
16 #if defined(_MSVC_LANG) && !defined(__clang__)
17 #define SIMDUTF_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
18 #else
19 #define SIMDUTF_CPLUSPLUS __cplusplus
20 #endif
21 #endif
22 
23 // C++ 17
24 #if !defined(SIMDUTF_CPLUSPLUS17) && (SIMDUTF_CPLUSPLUS >= 201703L)
25 #define SIMDUTF_CPLUSPLUS17 1
26 #endif
27 
28 // C++ 14
29 #if !defined(SIMDUTF_CPLUSPLUS14) && (SIMDUTF_CPLUSPLUS >= 201402L)
30 #define SIMDUTF_CPLUSPLUS14 1
31 #endif
32 
33 // C++ 11
34 #if !defined(SIMDUTF_CPLUSPLUS11) && (SIMDUTF_CPLUSPLUS >= 201103L)
35 #define SIMDUTF_CPLUSPLUS11 1
36 #endif
37 
38 #ifndef SIMDUTF_CPLUSPLUS11
39 #error simdutf requires a compiler compliant with the C++11 standard
40 #endif
41 
42 #endif // SIMDUTF_COMPILER_CHECK_H
43 /* end file include/simdutf/compiler_check.h */
44 /* begin file include/simdutf/common_defs.h */
45 #ifndef SIMDUTF_COMMON_DEFS_H
46 #define SIMDUTF_COMMON_DEFS_H
47 
48 #include <cassert>
49 /* begin file include/simdutf/portability.h */
50 #ifndef SIMDUTF_PORTABILITY_H
51 #define SIMDUTF_PORTABILITY_H
52 
53 #include <cstddef>
54 #include <cstdint>
55 #include <cstdlib>
56 #include <cfloat>
57 #include <cassert>
58 #ifndef _WIN32
59 // strcasecmp, strncasecmp
60 #include <strings.h>
61 #endif
62 
63 /**
64  * We want to check that it is actually a little endian system at
65  * compile-time.
66  */
67 
68 #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
69 #define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
70 #elif defined(_WIN32)
71 #define SIMDUTF_IS_BIG_ENDIAN 0
72 #else
73 #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
74 #include <machine/endian.h>
75 #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
76 #include <sys/byteorder.h>
77 #else  // defined(__APPLE__) || defined(__FreeBSD__)
78 
79 #ifdef __has_include
80 #if __has_include(<endian.h>)
81 #include <endian.h>
82 #endif //__has_include(<endian.h>)
83 #endif //__has_include
84 
85 #endif // defined(__APPLE__) || defined(__FreeBSD__)
86 
87 
88 #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
89 #define SIMDUTF_IS_BIG_ENDIAN 0
90 #endif
91 
92 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
93 #define SIMDUTF_IS_BIG_ENDIAN 0
94 #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
95 #define SIMDUTF_IS_BIG_ENDIAN 1
96 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
97 
98 #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
99 
100 
101 /**
102  * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined.
103  */
104 
105 #ifdef _MSC_VER
106 #define SIMDUTF_VISUAL_STUDIO 1
107 /**
108  * We want to differentiate carefully between
109  * clang under visual studio and regular visual
110  * studio.
111  *
112  * Under clang for Windows, we enable:
113  *  * target pragmas so that part and only part of the
114  *     code gets compiled for advanced instructions.
115  *
116  */
117 #ifdef __clang__
118 // clang under visual studio
119 #define SIMDUTF_CLANG_VISUAL_STUDIO 1
120 #else
121 // just regular visual studio (best guess)
122 #define SIMDUTF_REGULAR_VISUAL_STUDIO 1
123 #endif // __clang__
124 #endif // _MSC_VER
125 
126 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
127 // https://en.wikipedia.org/wiki/C_alternative_tokens
128 // This header should have no effect, except maybe
129 // under Visual Studio.
130 #include <iso646.h>
131 #endif
132 
133 #if defined(__x86_64__) || defined(_M_AMD64)
134 #define SIMDUTF_IS_X86_64 1
135 #elif defined(__aarch64__) || defined(_M_ARM64)
136 #define SIMDUTF_IS_ARM64 1
137 #elif defined(__PPC64__) || defined(_M_PPC64)
138 //#define SIMDUTF_IS_PPC64 1
139 // The simdutf library does yet support SIMD acceleration under
140 // POWER processors. Please see https://github.com/lemire/simdutf/issues/51
141 #elif defined(__s390__)
142 // s390 IBM system. Big endian.
143 #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
144 // RISC-V 64-bit
145 #else
146 // The simdutf library is designed
147 // for 64-bit processors and it seems that you are not
148 // compiling for a known 64-bit platform. Please
149 // use a 64-bit target such as x64 or 64-bit ARM for best performance.
150 #define SIMDUTF_IS_32BITS 1
151 
152 // We do not support 32-bit platforms, but it can be
153 // handy to identify them.
154 #if defined(_M_IX86) || defined(__i386__)
155 #define SIMDUTF_IS_X86_32BITS 1
156 #elif defined(__arm__) || defined(_M_ARM)
157 #define SIMDUTF_IS_ARM_32BITS 1
158 #elif defined(__PPC__) || defined(_M_PPC)
159 #define SIMDUTF_IS_PPC_32BITS 1
160 #endif
161 
162 #endif // defined(__x86_64__) || defined(_M_AMD64)
163 
164 #ifdef SIMDUTF_IS_32BITS
165 #ifndef SIMDUTF_NO_PORTABILITY_WARNING
166 // In the future, we may want to warn users of 32-bit systems that
167 // the simdutf does not support accelerated kernels for such systems.
168 #endif // SIMDUTF_NO_PORTABILITY_WARNING
169 #endif // SIMDUTF_IS_32BITS
170 
171 // this is almost standard?
172 #define SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) #a
173 #define SIMDUTF_STRINGIFY(a) SIMDUTF_STRINGIFY_IMPLEMENTATION_(a)
174 
175 // Our fast kernels require 64-bit systems.
176 //
177 // On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
178 // Furthermore, the number of SIMD registers is reduced.
179 //
180 // On 32-bit ARM, we would have smaller registers.
181 //
182 // The simdutf users should still have the fallback kernel. It is
183 // slower, but it should run everywhere.
184 
185 //
186 // Enable valid runtime implementations, and select SIMDUTF_BUILTIN_IMPLEMENTATION
187 //
188 
189 // We are going to use runtime dispatch.
190 #ifdef SIMDUTF_IS_X86_64
191 #ifdef __clang__
192 // clang does not have GCC push pop
193 // warning: clang attribute push can't be used within a namespace in clang up
194 // til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be *outside* of a
195 // namespace.
196 #define SIMDUTF_TARGET_REGION(T)                                                       \
197   _Pragma(SIMDUTF_STRINGIFY(                                                           \
198       clang attribute push(__attribute__((target(T))), apply_to = function)))
199 #define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop")
200 #elif defined(__GNUC__)
201 // GCC is easier
202 #define SIMDUTF_TARGET_REGION(T)                                                       \
203   _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T)))
204 #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options")
205 #endif // clang then gcc
206 
207 #endif // x86
208 
209 // Default target region macros don't do anything.
210 #ifndef SIMDUTF_TARGET_REGION
211 #define SIMDUTF_TARGET_REGION(T)
212 #define SIMDUTF_UNTARGET_REGION
213 #endif
214 
215 // Is threading enabled?
216 #if defined(_REENTRANT) || defined(_MT)
217 #ifndef SIMDUTF_THREADS_ENABLED
218 #define SIMDUTF_THREADS_ENABLED
219 #endif
220 #endif
221 
222 // workaround for large stack sizes under -O0.
223 // https://github.com/simdutf/simdutf/issues/691
224 #ifdef __APPLE__
225 #ifndef __OPTIMIZE__
226 // Apple systems have small stack sizes in secondary threads.
227 // Lack of compiler optimization may generate high stack usage.
228 // Users may want to disable threads for safety, but only when
229 // in debug mode which we detect by the fact that the __OPTIMIZE__
230 // macro is not defined.
231 #undef SIMDUTF_THREADS_ENABLED
232 #endif
233 #endif
234 
235 #ifdef SIMDUTF_VISUAL_STUDIO
236 // This is one case where we do not distinguish between
237 // regular visual studio and clang under visual studio.
238 // clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
239 #define simdutf_strcasecmp _stricmp
240 #define simdutf_strncasecmp _strnicmp
241 #else
242 // The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
243 // So they are only useful for ASCII in our context.
244 // https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
245 #define simdutf_strcasecmp strcasecmp
246 #define simdutf_strncasecmp strncasecmp
247 #endif
248 
249 #ifdef NDEBUG
250 
251 #ifdef SIMDUTF_VISUAL_STUDIO
252 #define SIMDUTF_UNREACHABLE() __assume(0)
253 #define SIMDUTF_ASSUME(COND) __assume(COND)
254 #else
255 #define SIMDUTF_UNREACHABLE() __builtin_unreachable();
256 #define SIMDUTF_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0)
257 #endif
258 
259 #else // NDEBUG
260 
261 #define SIMDUTF_UNREACHABLE() assert(0);
262 #define SIMDUTF_ASSUME(COND) assert(COND)
263 
264 #endif
265 
266 
267 #if defined(__GNUC__) && !defined(__clang__)
268 #if __GNUC__ >= 11
269 #define SIMDUTF_GCC11ORMORE 1
270 #endif //  __GNUC__ >= 11
271 #endif // defined(__GNUC__) && !defined(__clang__)
272 
273 
274 #endif // SIMDUTF_PORTABILITY_H
275 /* end file include/simdutf/portability.h */
276 /* begin file include/simdutf/avx512.h */
277 #ifndef SIMDUTF_AVX512_H_
278 #define SIMDUTF_AVX512_H_
279 
280 /*
281     It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS.
282 
283     All preprocessor directives has form `SIMDUTF_HAS_AVX512{feature}`,
284     where a feature is a code name for extensions.
285 
286     Please see the listing below to find which are supported.
287 */
288 
289 #ifndef SIMDUTF_HAS_AVX512F
290 # if defined(__AVX512F__) && __AVX512F__ == 1
291 #   define SIMDUTF_HAS_AVX512F 1
292 # endif
293 #endif
294 
295 #ifndef SIMDUTF_HAS_AVX512DQ
296 # if defined(__AVX512DQ__) && __AVX512DQ__ == 1
297 #   define SIMDUTF_HAS_AVX512DQ 1
298 # endif
299 #endif
300 
301 #ifndef SIMDUTF_HAS_AVX512IFMA
302 # if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
303 #   define SIMDUTF_HAS_AVX512IFMA 1
304 # endif
305 #endif
306 
307 #ifndef SIMDUTF_HAS_AVX512CD
308 # if defined(__AVX512CD__) && __AVX512CD__ == 1
309 #   define SIMDUTF_HAS_AVX512CD 1
310 # endif
311 #endif
312 
313 #ifndef SIMDUTF_HAS_AVX512BW
314 # if defined(__AVX512BW__) && __AVX512BW__ == 1
315 #   define SIMDUTF_HAS_AVX512BW 1
316 # endif
317 #endif
318 
319 #ifndef SIMDUTF_HAS_AVX512VL
320 # if defined(__AVX512VL__) && __AVX512VL__ == 1
321 #   define SIMDUTF_HAS_AVX512VL 1
322 # endif
323 #endif
324 
325 #ifndef SIMDUTF_HAS_AVX512VBMI
326 # if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
327 #   define SIMDUTF_HAS_AVX512VBMI 1
328 # endif
329 #endif
330 
331 #ifndef SIMDUTF_HAS_AVX512VBMI2
332 # if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
333 #   define SIMDUTF_HAS_AVX512VBMI2 1
334 # endif
335 #endif
336 
337 #ifndef SIMDUTF_HAS_AVX512VNNI
338 # if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
339 #   define SIMDUTF_HAS_AVX512VNNI 1
340 # endif
341 #endif
342 
343 #ifndef SIMDUTF_HAS_AVX512BITALG
344 # if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
345 #   define SIMDUTF_HAS_AVX512BITALG 1
346 # endif
347 #endif
348 
349 #ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ
350 # if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
351 #   define SIMDUTF_HAS_AVX512VPOPCNTDQ 1
352 # endif
353 #endif
354 
355 #endif // SIMDUTF_AVX512_H_
356 /* end file include/simdutf/avx512.h */
357 
358 
359 #if defined(__GNUC__)
360   // Marks a block with a name so that MCA analysis can see it.
361   #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
362   #define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
363   #define SIMDUTF_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
364 #else
365   #define SIMDUTF_BEGIN_DEBUG_BLOCK(name)
366   #define SIMDUTF_END_DEBUG_BLOCK(name)
367   #define SIMDUTF_DEBUG_BLOCK(name, block)
368 #endif
369 
370 // Align to N-byte boundary
371 #define SIMDUTF_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
372 #define SIMDUTF_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
373 
374 #define SIMDUTF_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
375 
376 #if defined(SIMDUTF_REGULAR_VISUAL_STUDIO)
377 
378   #define simdutf_really_inline __forceinline
379   #define simdutf_never_inline __declspec(noinline)
380 
381   #define simdutf_unused
382   #define simdutf_warn_unused
383 
384   #ifndef simdutf_likely
385   #define simdutf_likely(x) x
386   #endif
387   #ifndef simdutf_unlikely
388   #define simdutf_unlikely(x) x
389   #endif
390 
391   #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
392   #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
393   #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
394   // Get rid of Intellisense-only warnings (Code Analysis)
395   // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
396   #ifdef __has_include
397   #if __has_include(<CppCoreCheck\Warnings.h>)
398   #include <CppCoreCheck\Warnings.h>
399   #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
400   #endif
401   #endif
402 
403   #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS
404   #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
405   #endif
406 
407   #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996)
408   #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING
409   #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop ))
410 
411 #else // SIMDUTF_REGULAR_VISUAL_STUDIO
412 
413   #define simdutf_really_inline inline __attribute__((always_inline))
414   #define simdutf_never_inline inline __attribute__((noinline))
415 
416   #define simdutf_unused __attribute__((unused))
417   #define simdutf_warn_unused __attribute__((warn_unused_result))
418 
419   #ifndef simdutf_likely
420   #define simdutf_likely(x) __builtin_expect(!!(x), 1)
421   #endif
422   #ifndef simdutf_unlikely
423   #define simdutf_unlikely(x) __builtin_expect(!!(x), 0)
424   #endif
425 
426   #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
427   // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
428   #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS SIMDUTF_PUSH_DISABLE_WARNINGS \
429     SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \
430     SIMDUTF_DISABLE_GCC_WARNING(-Wall) \
431     SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \
432     SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \
433     SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \
434     SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
435     SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
436     SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \
437     SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \
438     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \
439     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable)
440   #define SIMDUTF_PRAGMA(P) _Pragma(#P)
441   #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING)
442   #if defined(SIMDUTF_CLANG_VISUAL_STUDIO)
443   #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include)
444   #else
445   #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
446   #endif
447   #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
448   #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow)
449   #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
450 
451 
452 
453 #endif // MSC_VER
454 
455 #ifndef SIMDUTF_DLLIMPORTEXPORT
456     #if defined(SIMDUTF_VISUAL_STUDIO)
457       /**
458        * It does not matter here whether you are using
459        * the regular visual studio or clang under visual
460        * studio.
461        */
462       #if SIMDUTF_USING_LIBRARY
463       #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
464       #else
465       #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
466       #endif
467     #else
468       #define SIMDUTF_DLLIMPORTEXPORT
469     #endif
470 #endif
471 
472 /// If EXPR is an error, returns it.
473 #define SIMDUTF_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
474 
475 
476 #endif // SIMDUTF_COMMON_DEFS_H
477 /* end file include/simdutf/common_defs.h */
478 /* begin file include/simdutf/encoding_types.h */
479 #include <string>
480 
481 namespace simdutf {
482 
483 enum encoding_type {
484         UTF8 = 1,       // BOM 0xef 0xbb 0xbf
485         UTF16_LE = 2,   // BOM 0xff 0xfe
486         UTF16_BE = 4,   // BOM 0xfe 0xff
487         UTF32_LE = 8,   // BOM 0xff 0xfe 0x00 0x00
488         UTF32_BE = 16,   // BOM 0x00 0x00 0xfe 0xff
489         Latin1 = 32,
490 
491         unspecified = 0
492 };
493 
494 enum endianness {
495         LITTLE = 0,
496         BIG = 1
497 };
498 
499 bool match_system(endianness e);
500 
501 std::string to_string(encoding_type bom);
502 
503 // Note that BOM for UTF8 is discouraged.
504 namespace BOM {
505 
506 /**
507  * Checks for a BOM. If not, returns unspecified
508  * @param input         the string to process
509  * @param length        the length of the string in code units
510  * @return the corresponding encoding
511  */
512 
513 encoding_type check_bom(const uint8_t* byte, size_t length);
514 encoding_type check_bom(const char* byte, size_t length);
515 /**
516  * Returns the size, in bytes, of the BOM for a given encoding type.
517  * Note that UTF8 BOM are discouraged.
518  * @param bom         the encoding type
519  * @return the size in bytes of the corresponding BOM
520  */
521 size_t bom_byte_size(encoding_type bom);
522 
523 } // BOM namespace
524 } // simdutf namespace
525 /* end file include/simdutf/encoding_types.h */
526 /* begin file include/simdutf/error.h */
527 #ifndef SIMDUTF_ERROR_H
528 #define SIMDUTF_ERROR_H
529 namespace simdutf {
530 
531 enum error_code {
532   SUCCESS = 0,
533   HEADER_BITS,  // Any byte must have fewer than 5 header bits.
534   TOO_SHORT,    // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
535                 // This is also the error when the input is truncated.
536   TOO_LONG,     // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
537   OVERLONG,     // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
538                 // and U+FFFF for four-byte characters.
539   TOO_LARGE,    // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
540   SURROGATE,    // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
541                 // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
542                 // there must be no surrogate at all (Latin1)
543   OTHER         // Not related to validation/transcoding.
544 };
545 
546 struct result {
547   error_code error;
548   size_t count;     // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
549 
550   simdutf_really_inline result();
551 
552   simdutf_really_inline result(error_code, size_t);
553 };
554 
555 }
556 #endif
557 /* end file include/simdutf/error.h */
558 
559 SIMDUTF_PUSH_DISABLE_WARNINGS
560 SIMDUTF_DISABLE_UNDESIRED_WARNINGS
561 
562 // Public API
563 /* begin file include/simdutf/simdutf_version.h */
564 // /include/simdutf/simdutf_version.h automatically generated by release.py,
565 // do not change by hand
566 #ifndef SIMDUTF_SIMDUTF_VERSION_H
567 #define SIMDUTF_SIMDUTF_VERSION_H
568 
569 /** The version of simdutf being used (major.minor.revision) */
570 #define SIMDUTF_VERSION "4.0.8"
571 
572 namespace simdutf {
573 enum {
574   /**
575    * The major version (MAJOR.minor.revision) of simdutf being used.
576    */
577   SIMDUTF_VERSION_MAJOR = 4,
578   /**
579    * The minor version (major.MINOR.revision) of simdutf being used.
580    */
581   SIMDUTF_VERSION_MINOR = 0,
582   /**
583    * The revision (major.minor.REVISION) of simdutf being used.
584    */
585   SIMDUTF_VERSION_REVISION = 8
586 };
587 } // namespace simdutf
588 
589 #endif // SIMDUTF_SIMDUTF_VERSION_H
590 /* end file include/simdutf/simdutf_version.h */
591 /* begin file include/simdutf/implementation.h */
592 #ifndef SIMDUTF_IMPLEMENTATION_H
593 #define SIMDUTF_IMPLEMENTATION_H
594 #include <string>
595 #if !defined(SIMDUTF_NO_THREADS)
596 #include <atomic>
597 #endif
598 #include <vector>
599 #include <tuple>
600 /* begin file include/simdutf/internal/isadetection.h */
601 /* From
602 https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
603 Highly modified.
604 
605 Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
606 Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
607 Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
608 Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
609 Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
610 Copyright (c) 2011-2013 NYU                      (Clement Farabet)
611 Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
612 Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
613 (Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
614 Samy Bengio, Johnny Mariethoz)
615 
616 All rights reserved.
617 
618 Redistribution and use in source and binary forms, with or without
619 modification, are permitted provided that the following conditions are met:
620 
621 1. Redistributions of source code must retain the above copyright
622    notice, this list of conditions and the following disclaimer.
623 
624 2. Redistributions in binary form must reproduce the above copyright
625    notice, this list of conditions and the following disclaimer in the
626    documentation and/or other materials provided with the distribution.
627 
628 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
629 America and IDIAP Research Institute nor the names of its contributors may be
630    used to endorse or promote products derived from this software without
631    specific prior written permission.
632 
633 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
634 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
635 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
636 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
637 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
638 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
639 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
640 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
641 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
642 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
643 POSSIBILITY OF SUCH DAMAGE.
644 */
645 
646 #ifndef SIMDutf_INTERNAL_ISADETECTION_H
647 #define SIMDutf_INTERNAL_ISADETECTION_H
648 
649 #include <cstdint>
650 #include <cstdlib>
651 #if defined(_MSC_VER)
652 #include <intrin.h>
653 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
654 #include <cpuid.h>
655 #endif
656 
657 namespace simdutf {
658 namespace internal {
659 
660 enum instruction_set {
661   DEFAULT = 0x0,
662   NEON = 0x1,
663   AVX2 = 0x4,
664   SSE42 = 0x8,
665   PCLMULQDQ = 0x10,
666   BMI1 = 0x20,
667   BMI2 = 0x40,
668   ALTIVEC = 0x80,
669   AVX512F = 0x100,
670   AVX512DQ = 0x200,
671   AVX512IFMA = 0x400,
672   AVX512PF = 0x800,
673   AVX512ER = 0x1000,
674   AVX512CD = 0x2000,
675   AVX512BW = 0x4000,
676   AVX512VL = 0x8000,
677   AVX512VBMI2 = 0x10000,
678   AVX512VPOPCNTDQ = 0x2000
679 };
680 
681 #if defined(__PPC64__)
682 
detect_supported_architectures()683 static inline uint32_t detect_supported_architectures() {
684   return instruction_set::ALTIVEC;
685 }
686 
687 #elif defined(__aarch64__) || defined(_M_ARM64)
688 
detect_supported_architectures()689 static inline uint32_t detect_supported_architectures() {
690   return instruction_set::NEON;
691 }
692 
693 #elif defined(__x86_64__) || defined(_M_AMD64) // x64
694 
695 
696 namespace {
697 namespace cpuid_bit {
698     // Can be found on Intel ISA Reference for CPUID
699 
700     // EAX = 0x01
701     constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit  1 of ECX for EAX=0x1
702     constexpr uint32_t sse42 = uint32_t(1) << 20;    ///< @private bit 20 of ECX for EAX=0x1
703     constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
704 
705     // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
706     // See: "Table 3-8. Information Returned by CPUID Instruction"
707     namespace ebx {
708       constexpr uint32_t bmi1 = uint32_t(1) << 3;
709       constexpr uint32_t avx2 = uint32_t(1) << 5;
710       constexpr uint32_t bmi2 = uint32_t(1) << 8;
711       constexpr uint32_t avx512f = uint32_t(1) << 16;
712       constexpr uint32_t avx512dq = uint32_t(1) << 17;
713       constexpr uint32_t avx512ifma = uint32_t(1) << 21;
714       constexpr uint32_t avx512cd = uint32_t(1) << 28;
715       constexpr uint32_t avx512bw = uint32_t(1) << 30;
716       constexpr uint32_t avx512vl = uint32_t(1) << 31;
717     }
718 
719     namespace ecx {
720       constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
721       constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
722       constexpr uint32_t avx512vnni = uint32_t(1) << 11;
723       constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
724       constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
725     }
726     namespace edx {
727       constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
728     }
729     namespace xcr0_bit {
730      constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
731      constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
732    }
733   }
734 }
735 
736 
737 
cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)738 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
739                          uint32_t *edx) {
740 #if defined(_MSC_VER)
741   int cpu_info[4];
742   __cpuidex(cpu_info, *eax, *ecx);
743   *eax = cpu_info[0];
744   *ebx = cpu_info[1];
745   *ecx = cpu_info[2];
746   *edx = cpu_info[3];
747 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
748   uint32_t level = *eax;
749   __get_cpuid(level, eax, ebx, ecx, edx);
750 #else
751   uint32_t a = *eax, b, c = *ecx, d;
752   asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
753   *eax = a;
754   *ebx = b;
755   *ecx = c;
756   *edx = d;
757 #endif
758 }
759 
xgetbv()760 static inline uint64_t xgetbv() {
761  #if defined(_MSC_VER)
762    return _xgetbv(0);
763  #else
764    uint32_t xcr0_lo, xcr0_hi;
765    asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
766    return xcr0_lo | ((uint64_t)xcr0_hi << 32);
767  #endif
768  }
769 
detect_supported_architectures()770 static inline uint32_t detect_supported_architectures() {
771   uint32_t eax;
772   uint32_t ebx = 0;
773   uint32_t ecx = 0;
774   uint32_t edx = 0;
775   uint32_t host_isa = 0x0;
776 
777   // EBX for EAX=0x1
778   eax = 0x1;
779   cpuid(&eax, &ebx, &ecx, &edx);
780 
781   if (ecx & cpuid_bit::sse42) {
782     host_isa |= instruction_set::SSE42;
783   }
784 
785   if (ecx & cpuid_bit::pclmulqdq) {
786     host_isa |= instruction_set::PCLMULQDQ;
787   }
788 
789   if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
790     return host_isa;
791   }
792 
793   // xgetbv for checking if the OS saves registers
794   uint64_t xcr0 = xgetbv();
795 
796   if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
797     return host_isa;
798   }
799   // ECX for EAX=0x7
800   eax = 0x7;
801   ecx = 0x0; // Sub-leaf = 0
802   cpuid(&eax, &ebx, &ecx, &edx);
803   if (ebx & cpuid_bit::ebx::avx2) {
804     host_isa |= instruction_set::AVX2;
805   }
806   if (ebx & cpuid_bit::ebx::bmi1) {
807     host_isa |= instruction_set::BMI1;
808   }
809   if (ebx & cpuid_bit::ebx::bmi2) {
810     host_isa |= instruction_set::BMI2;
811   }
812   if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) {
813     return host_isa;
814   }
815   if (ebx & cpuid_bit::ebx::avx512f) {
816     host_isa |= instruction_set::AVX512F;
817   }
818   if (ebx & cpuid_bit::ebx::avx512bw) {
819     host_isa |= instruction_set::AVX512BW;
820   }
821   if (ebx & cpuid_bit::ebx::avx512cd) {
822     host_isa |= instruction_set::AVX512CD;
823   }
824   if (ebx & cpuid_bit::ebx::avx512dq) {
825     host_isa |= instruction_set::AVX512DQ;
826   }
827   if (ebx & cpuid_bit::ebx::avx512vl) {
828     host_isa |= instruction_set::AVX512VL;
829   }
830   if (ecx & cpuid_bit::ecx::avx512vbmi2) {
831     host_isa |= instruction_set::AVX512VBMI2;
832   }
833   if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
834     host_isa |= instruction_set::AVX512VPOPCNTDQ;
835   }
836   return host_isa;
837 }
838 #else // fallback
839 
840 // includes 32-bit ARM.
detect_supported_architectures()841 static inline uint32_t detect_supported_architectures() {
842   return instruction_set::DEFAULT;
843 }
844 
845 
846 #endif // end SIMD extension detection code
847 
848 } // namespace internal
849 } // namespace simdutf
850 
851 #endif // SIMDutf_INTERNAL_ISADETECTION_H
852 /* end file include/simdutf/internal/isadetection.h */
853 
854 
855 namespace simdutf {
856 
857 /**
858  * Autodetect the encoding of the input, a single encoding is recommended.
859  * E.g., the function might return simdutf::encoding_type::UTF8,
860  * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or
861  * simdutf::encoding_type::UTF32_LE.
862  *
863  * @param input the string to analyze.
864  * @param length the length of the string in bytes.
865  * @return the detected encoding type
866  */
867 simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * input, size_t length) noexcept;
868 simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t * input, size_t length) noexcept {
869   return autodetect_encoding(reinterpret_cast<const char *>(input), length);
870 }
871 
872 /**
873  * Autodetect the possible encodings of the input in one pass.
874  * E.g., if the input might be UTF-16LE or UTF-8, this function returns
875  * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE).
876  *
877  * Overriden by each implementation.
878  *
879  * @param input the string to analyze.
880  * @param length the length of the string in bytes.
881  * @return the detected encoding type
882  */
883 simdutf_warn_unused int detect_encodings(const char * input, size_t length) noexcept;
884 simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * input, size_t length) noexcept {
885   return detect_encodings(reinterpret_cast<const char *>(input), length);
886 }
887 
888 /**
889  * Validate the UTF-8 string. This function may be best when you expect
890  * the input to be almost always valid. Otherwise, consider using
891  * validate_utf8_with_errors.
892  *
893  * Overridden by each implementation.
894  *
895  * @param buf the UTF-8 string to validate.
896  * @param len the length of the string in bytes.
897  * @return true if and only if the string is valid UTF-8.
898  */
899 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
900 
901 /**
902  * Validate the UTF-8 string and stop on error.
903  *
904  * Overridden by each implementation.
905  *
906  * @param buf the UTF-8 string to validate.
907  * @param len the length of the string in bytes.
908  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
909  */
910 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept;
911 
912 /**
913  * Validate the ASCII string.
914  *
915  * Overridden by each implementation.
916  *
917  * @param buf the ASCII string to validate.
918  * @param len the length of the string in bytes.
919  * @return true if and only if the string is valid ASCII.
920  */
921 simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
922 
923 /**
924  * Validate the ASCII string and stop on error. It might be faster than
925  * validate_utf8 when an error is expected to occur early.
926  *
927  * Overridden by each implementation.
928  *
929  * @param buf the ASCII string to validate.
930  * @param len the length of the string in bytes.
931  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
932  */
933 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept;
934 
935 /**
936  * Using native endianness; Validate the UTF-16 string.
937  * This function may be best when you expect the input to be almost always valid.
938  * Otherwise, consider using validate_utf16_with_errors.
939  *
940  * Overridden by each implementation.
941  *
942  * This function is not BOM-aware.
943  *
944  * @param buf the UTF-16 string to validate.
945  * @param len the length of the string in number of 2-byte code units (char16_t).
946  * @return true if and only if the string is valid UTF-16.
947  */
948 simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept;
949 
950 /**
951  * Validate the UTF-16LE string. This function may be best when you expect
952  * the input to be almost always valid. Otherwise, consider using
953  * validate_utf16le_with_errors.
954  *
955  * Overridden by each implementation.
956  *
957  * This function is not BOM-aware.
958  *
959  * @param buf the UTF-16LE string to validate.
960  * @param len the length of the string in number of 2-byte code units (char16_t).
961  * @return true if and only if the string is valid UTF-16LE.
962  */
963 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept;
964 
965 /**
966  * Validate the UTF-16BE string. This function may be best when you expect
967  * the input to be almost always valid. Otherwise, consider using
968  * validate_utf16be_with_errors.
969  *
970  * Overridden by each implementation.
971  *
972  * This function is not BOM-aware.
973  *
974  * @param buf the UTF-16BE string to validate.
975  * @param len the length of the string in number of 2-byte code units (char16_t).
976  * @return true if and only if the string is valid UTF-16BE.
977  */
978 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept;
979 
980 /**
981  * Using native endianness; Validate the UTF-16 string and stop on error.
982  * It might be faster than validate_utf16 when an error is expected to occur early.
983  *
984  * Overridden by each implementation.
985  *
986  * This function is not BOM-aware.
987  *
988  * @param buf the UTF-16 string to validate.
989  * @param len the length of the string in number of 2-byte code units (char16_t).
990  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
991  */
992 simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept;
993 
994 /**
995  * Validate the UTF-16LE string and stop on error. It might be faster than
996  * validate_utf16le when an error is expected to occur early.
997  *
998  * Overridden by each implementation.
999  *
1000  * This function is not BOM-aware.
1001  *
1002  * @param buf the UTF-16LE string to validate.
1003  * @param len the length of the string in number of 2-byte code units (char16_t).
1004  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1005  */
1006 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept;
1007 
1008 /**
1009  * Validate the UTF-16BE string and stop on error. It might be faster than
1010  * validate_utf16be when an error is expected to occur early.
1011  *
1012  * Overridden by each implementation.
1013  *
1014  * This function is not BOM-aware.
1015  *
1016  * @param buf the UTF-16BE string to validate.
1017  * @param len the length of the string in number of 2-byte code units (char16_t).
1018  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1019  */
1020 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept;
1021 
1022 /**
1023  * Validate the UTF-32 string. This function may be best when you expect
1024  * the input to be almost always valid. Otherwise, consider using
1025  * validate_utf32_with_errors.
1026  *
1027  * Overridden by each implementation.
1028  *
1029  * This function is not BOM-aware.
1030  *
1031  * @param buf the UTF-32 string to validate.
1032  * @param len the length of the string in number of 4-byte code units (char32_t).
1033  * @return true if and only if the string is valid UTF-32.
1034  */
1035 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept;
1036 
1037 /**
1038  * Validate the UTF-32 string and stop on error. It might be faster than
1039  * validate_utf32 when an error is expected to occur early.
1040  *
1041  * Overridden by each implementation.
1042  *
1043  * This function is not BOM-aware.
1044  *
1045  * @param buf the UTF-32 string to validate.
1046  * @param len the length of the string in number of 4-byte code units (char32_t).
1047  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1048  */
1049 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
1050 
1051   /**
1052    * Convert Latin1 string into UTF8 string.
1053    *
1054    * This function is suitable to work with inputs from untrusted sources.
1055    *
1056    * @param input         the Latin1 string to convert
1057    * @param length        the length of the string in bytes
1058    * @param latin1_output  the pointer to buffer that can hold conversion result
1059    * @return the number of written char; 0 if conversion is not possible
1060    */
1061   simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept;
1062 
1063 
1064     /**
1065    * Convert possibly Latin1 string into UTF-16LE string.
1066    *
1067    * This function is suitable to work with inputs from untrusted sources.
1068    *
1069    * @param input         the Latin1  string to convert
1070    * @param length        the length of the string in bytes
1071    * @param utf16_buffer  the pointer to buffer that can hold conversion result
1072    * @return the number of written char16_t; 0 if conversion is not possible
1073    */
1074   simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
1075 
1076   /**
1077    * Convert Latin1 string into UTF-16BE string.
1078    *
1079    * This function is suitable to work with inputs from untrusted sources.
1080    *
1081    * @param input         the Latin1 string to convert
1082    * @param length        the length of the string in bytes
1083    * @param utf16_buffer  the pointer to buffer that can hold conversion result
1084    * @return the number of written char16_t; 0 if conversion is not possible
1085    */
1086   simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
1087 
1088   /**
1089    * Convert Latin1 string into UTF-32 string.
1090    *
1091    * This function is suitable to work with inputs from untrusted sources.
1092    *
1093    * @param input         the Latin1 string to convert
1094    * @param length        the length of the string in bytes
1095    * @param utf32_buffer  the pointer to buffer that can hold conversion result
1096    * @return the number of written char32_t; 0 if conversion is not possible
1097    */
1098   simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
1099 
1100  /**
1101    * Convert possibly broken UTF-8 string into latin1 string.
1102    *
1103    * During the conversion also validation of the input string is done.
1104    * This function is suitable to work with inputs from untrusted sources.
1105    *
1106    * @param input         the UTF-8 string to convert
1107    * @param length        the length of the string in bytes
1108    * @param latin1_output  the pointer to buffer that can hold conversion result
1109    * @return the number of written char; 0 if the input was not valid UTF-8 string
1110    */
1111   simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
1112 
1113 /**
1114  * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 string.
1115  *
1116  * During the conversion also validation of the input string is done.
1117  * This function is suitable to work with inputs from untrusted sources.
1118  *
1119  * @param input         the UTF-8 string to convert
1120  * @param length        the length of the string in bytes
1121  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1122  * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1123  */
1124 simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
1125 
1126 
1127 /**
1128  * Using native endianness, convert a Latin1 string into a UTF-16 string.
1129  *
1130  * @param input         the UTF-8 string to convert
1131  * @param length        the length of the string in bytes
1132  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1133  * @return the number of written char16_t.
1134  */
1135 simdutf_warn_unused size_t convert_latin1_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
1136 
1137 /**
1138  * Convert possibly broken UTF-8 string into UTF-16LE string.
1139  *
1140  * During the conversion also validation of the input string is done.
1141  * This function is suitable to work with inputs from untrusted sources.
1142  *
1143  * @param input         the UTF-8 string to convert
1144  * @param length        the length of the string in bytes
1145  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1146  * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1147  */
1148 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
1149 
1150 /**
1151  * Convert possibly broken UTF-8 string into UTF-16BE string.
1152  *
1153  * During the conversion also validation of the input string is done.
1154  * This function is suitable to work with inputs from untrusted sources.
1155  *
1156  * @param input         the UTF-8 string to convert
1157  * @param length        the length of the string in bytes
1158  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1159  * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1160  */
1161 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
1162 
1163 
1164   /**
1165    * Convert possibly broken UTF-8 string into latin1 string with errors.
1166    *
1167    * During the conversion also validation of the input string is done.
1168    * This function is suitable to work with inputs from untrusted sources.
1169    *
1170    * @param input         the UTF-8 string to convert
1171    * @param length        the length of the string in bytes
1172    * @param latin1_output  the pointer to buffer that can hold conversion result
1173    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1174    */
1175   simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept;
1176 
1177 /**
1178  * Using native endianness, convert possibly broken UTF-8 string into UTF-16
1179  * string and stop on error.
1180  *
1181  * During the conversion also validation of the input string is done.
1182  * This function is suitable to work with inputs from untrusted sources.
1183  *
1184  * @param input         the UTF-8 string to convert
1185  * @param length        the length of the string in bytes
1186  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1187  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1188  */
1189 simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1190 
1191 /**
1192  * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
1193  *
1194  * During the conversion also validation of the input string is done.
1195  * This function is suitable to work with inputs from untrusted sources.
1196  *
1197  * @param input         the UTF-8 string to convert
1198  * @param length        the length of the string in bytes
1199  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1200  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1201  */
1202 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1203 
1204 /**
1205  * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
1206  *
1207  * During the conversion also validation of the input string is done.
1208  * This function is suitable to work with inputs from untrusted sources.
1209  *
1210  * @param input         the UTF-8 string to convert
1211  * @param length        the length of the string in bytes
1212  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1213  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1214  */
1215 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1216 
1217 /**
1218  * Convert possibly broken UTF-8 string into UTF-32 string.
1219  *
1220  * During the conversion also validation of the input string is done.
1221  * This function is suitable to work with inputs from untrusted sources.
1222  *
1223  * @param input         the UTF-8 string to convert
1224  * @param length        the length of the string in bytes
1225  * @param utf32_buffer  the pointer to buffer that can hold conversion result
1226  * @return the number of written char32_t; 0 if the input was not valid UTF-8 string
1227  */
1228 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept;
1229 
1230 /**
1231  * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
1232  *
1233  * During the conversion also validation of the input string is done.
1234  * This function is suitable to work with inputs from untrusted sources.
1235  *
1236  * @param input         the UTF-8 string to convert
1237  * @param length        the length of the string in bytes
1238  * @param utf32_buffer  the pointer to buffer that can hold conversion result
1239  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1240  */
1241 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
1242 
1243     /**
1244    * Convert valid UTF-8 string into latin1 string.
1245    *
1246    * This function assumes that the input string is valid UTF-8.
1247    *
1248    * This function is not BOM-aware.
1249    *
1250    * @param input         the UTF-8 string to convert
1251    * @param length        the length of the string in bytes
1252    * @param latin1_output  the pointer to buffer that can hold conversion result
1253    * @return the number of written char; 0 if the input was not valid UTF-8 string
1254    */
1255   simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
1256 
1257 
1258 /**
1259  * Using native endianness, convert valid UTF-8 string into a UTF-16 string.
1260  *
1261  * This function assumes that the input string is valid UTF-8.
1262  *
1263  * @param input         the UTF-8 string to convert
1264  * @param length        the length of the string in bytes
1265  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1266  * @return the number of written char16_t
1267  */
1268 simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1269 
1270 /**
1271  * Convert valid UTF-8 string into UTF-16LE string.
1272  *
1273  * This function assumes that the input string is valid UTF-8.
1274  *
1275  * @param input         the UTF-8 string to convert
1276  * @param length        the length of the string in bytes
1277  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1278  * @return the number of written char16_t
1279  */
1280 simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1281 
1282 /**
1283  * Convert valid UTF-8 string into UTF-16BE string.
1284  *
1285  * This function assumes that the input string is valid UTF-8.
1286  *
1287  * @param input         the UTF-8 string to convert
1288  * @param length        the length of the string in bytes
1289  * @param utf16_buffer  the pointer to buffer that can hold conversion result
1290  * @return the number of written char16_t
1291  */
1292 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1293 
1294 /**
1295  * Convert valid UTF-8 string into UTF-32 string.
1296  *
1297  * This function assumes that the input string is valid UTF-8.
1298  *
1299  * @param input         the UTF-8 string to convert
1300  * @param length        the length of the string in bytes
1301  * @param utf32_buffer  the pointer to buffer that can hold conversion result
1302  * @return the number of written char32_t
1303  */
1304 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
1305 
1306 
1307 /**
1308  * Return the number of bytes that this Latin1 string would require in UTF-8 format.
1309  *
1310  * @param input         the Latin1 string to convert
1311  * @param length        the length of the string bytes
1312  * @return the number of bytes required to encode the Latin1 string as UTF-8
1313  */
1314 simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept;
1315 
1316 /**
1317  * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
1318  *
1319  * This function does not validate the input.
1320  *
1321  * This function is not BOM-aware.
1322  *
1323  * @param input         the UTF-8 string to convert
1324  * @param length        the length of the string in byte
1325  * @return the number of bytes required to encode the UTF-8 string as Latin1
1326  */
1327 simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept;
1328 
1329 /**
1330  * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
1331  *
1332  * This function does not validate the input.
1333  *
1334  * This function is not BOM-aware.
1335  *
1336  * @param input         the UTF-8 string to process
1337  * @param length        the length of the string in bytes
1338  * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
1339  */
1340 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept;
1341 
1342 /**
1343  * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
1344  *
1345  * This function is equivalent to count_utf8
1346  *
1347  * This function does not validate the input.
1348  *
1349  * This function is not BOM-aware.
1350  *
1351  * @param input         the UTF-8 string to process
1352  * @param length        the length of the string in bytes
1353  * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
1354  */
1355 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept;
1356 
1357 /**
1358  * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string.
1359  *
1360  * During the conversion also validation of the input string is done.
1361  * This function is suitable to work with inputs from untrusted sources.
1362  *
1363  * This function is not BOM-aware.
1364  *
1365  * @param input         the UTF-16 string to convert
1366  * @param length        the length of the string in 2-byte code units (char16_t)
1367  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1368  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1369  */
1370 simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1371 
1372 
1373 
1374 /**
1375  * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
1376  *
1377  * During the conversion also validation of the input string is done.
1378  * This function is suitable to work with inputs from untrusted sources.
1379  *
1380  * This function is not BOM-aware.
1381  *
1382  * @param input         the UTF-16 string to convert
1383  * @param length        the length of the string in 2-byte code units (char16_t)
1384  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1385  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1386  */
1387 simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1388 
1389 /**
1390  * Convert possibly broken UTF-16LE string into Latin1 string.
1391  *
1392  * During the conversion also validation of the input string is done.
1393  * This function is suitable to work with inputs from untrusted sources.
1394  *
1395  * This function is not BOM-aware.
1396  *
1397  * @param input         the UTF-16LE string to convert
1398  * @param length        the length of the string in 2-byte code units (char16_t)
1399  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1400  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1401  */
1402 simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1403 
1404 /**
1405  * Convert possibly broken UTF-16BE string into Latin1 string.
1406  *
1407  * During the conversion also validation of the input string is done.
1408  * This function is suitable to work with inputs from untrusted sources.
1409  *
1410  * This function is not BOM-aware.
1411  *
1412  * @param input         the UTF-16BE string to convert
1413  * @param length        the length of the string in 2-byte code units (char16_t)
1414  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1415  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1416  */
1417 simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1418 
1419 
1420 /**
1421  * Convert possibly broken UTF-16LE string into UTF-8 string.
1422  *
1423  * During the conversion also validation of the input string is done.
1424  * This function is suitable to work with inputs from untrusted sources.
1425  *
1426  * This function is not BOM-aware.
1427  *
1428  * @param input         the UTF-16LE string to convert
1429  * @param length        the length of the string in 2-byte code units (char16_t)
1430  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1431  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1432  */
1433 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1434 
1435 /**
1436  * Convert possibly broken UTF-16BE string into UTF-8 string.
1437  *
1438  * During the conversion also validation of the input string is done.
1439  * This function is suitable to work with inputs from untrusted sources.
1440  *
1441  * This function is not BOM-aware.
1442  *
1443  * @param input         the UTF-16BE string to convert
1444  * @param length        the length of the string in 2-byte code units (char16_t)
1445  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1446  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1447  */
1448 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1449 
1450 /**
1451  * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
1452  *
1453  * During the conversion also validation of the input string is done.
1454  * This function is suitable to work with inputs from untrusted sources.
1455  * This function is not BOM-aware.
1456  *
1457  * @param input         the UTF-16 string to convert
1458  * @param length        the length of the string in 2-byte code units (char16_t)
1459  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1460  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1461  */
1462 simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1463 
1464 /**
1465  * Convert possibly broken UTF-16LE string into Latin1 string.
1466  *
1467  * During the conversion also validation of the input string is done.
1468  * This function is suitable to work with inputs from untrusted sources.
1469  * This function is not BOM-aware.
1470  *
1471  * @param input         the UTF-16LE string to convert
1472  * @param length        the length of the string in 2-byte code units (char16_t)
1473  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1474  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1475  */
1476 simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1477 
1478 /**
1479  * Convert possibly broken UTF-16BE string into Latin1 string.
1480  *
1481  * During the conversion also validation of the input string is done.
1482  * This function is suitable to work with inputs from untrusted sources.
1483  * This function is not BOM-aware.
1484  *
1485  * @param input         the UTF-16BE string to convert
1486  * @param length        the length of the string in 2-byte code units (char16_t)
1487  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1488  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1489  */
1490 simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1491 
1492 
1493 /**
1494  * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string and stop on error.
1495  *
1496  * During the conversion also validation of the input string is done.
1497  * This function is suitable to work with inputs from untrusted sources.
1498  *
1499  * This function is not BOM-aware.
1500  *
1501  * @param input         the UTF-16 string to convert
1502  * @param length        the length of the string in 2-byte code units (char16_t)
1503  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1504  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1505  */
1506 simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1507 
1508 /**
1509  * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
1510  *
1511  * During the conversion also validation of the input string is done.
1512  * This function is suitable to work with inputs from untrusted sources.
1513  *
1514  * This function is not BOM-aware.
1515  *
1516  * @param input         the UTF-16LE string to convert
1517  * @param length        the length of the string in 2-byte code units (char16_t)
1518  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1519  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1520  */
1521 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1522 
1523 /**
1524  * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
1525  *
1526  * During the conversion also validation of the input string is done.
1527  * This function is suitable to work with inputs from untrusted sources.
1528  *
1529  * This function is not BOM-aware.
1530  *
1531  * @param input         the UTF-16BE string to convert
1532  * @param length        the length of the string in 2-byte code units (char16_t)
1533  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1534  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1535  */
1536 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1537 
1538 /**
1539  * Using native endianness, convert valid UTF-16 string into UTF-8 string.
1540  *
1541  * This function assumes that the input string is valid UTF-16LE.
1542  *
1543  * This function is not BOM-aware.
1544  *
1545  * @param input         the UTF-16 string to convert
1546  * @param length        the length of the string in 2-byte code units (char16_t)
1547  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1548  * @return number of written code units; 0 if conversion is not possible
1549  */
1550 simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1551 
1552 
1553 /**
1554  * Using native endianness, convert UTF-16 string into Latin1 string.
1555  *
1556  * This function assumes that the input string is valid UTF-8.
1557  *
1558  * This function is not BOM-aware.
1559  *
1560  * @param input         the UTF-16 string to convert
1561  * @param length        the length of the string in 2-byte code units (char16_t)
1562  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1563  * @return number of written code units; 0 if conversion is not possible
1564  */
1565 simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1566 
1567 /**
1568  * Convert valid UTF-16LE string into Latin1 string.
1569  *
1570  * This function assumes that the input string is valid UTF-16LE.
1571  *
1572  * This function is not BOM-aware.
1573  *
1574  * @param input         the UTF-16LE string to convert
1575  * @param length        the length of the string in 2-byte code units (char16_t)
1576  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1577  * @return number of written code units; 0 if conversion is not possible
1578  */
1579 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1580 
1581 /**
1582  * Convert valid UTF-16BE string into Latin1 string.
1583  *
1584  * This function assumes that the input string is valid UTF-16BE.
1585  *
1586  * This function is not BOM-aware.
1587  *
1588  * @param input         the UTF-16BE string to convert
1589  * @param length        the length of the string in 2-byte code units (char16_t)
1590  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1591  * @return number of written code units; 0 if conversion is not possible
1592  */
1593 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1594 
1595 
1596 /**
1597  * Convert valid UTF-16LE string into UTF-8 string.
1598  *
1599  * This function assumes that the input string is valid UTF-16LE.
1600  *
1601  * This function is not BOM-aware.
1602  *
1603  * @param input         the UTF-16LE string to convert
1604  * @param length        the length of the string in 2-byte code units (char16_t)
1605  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1606  * @return number of written code units; 0 if conversion is not possible
1607  */
1608 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1609 
1610 /**
1611  * Convert valid UTF-16BE string into UTF-8 string.
1612  *
1613  * This function assumes that the input string is valid UTF-16BE.
1614  *
1615  * This function is not BOM-aware.
1616  *
1617  * @param input         the UTF-16BE string to convert
1618  * @param length        the length of the string in 2-byte code units (char16_t)
1619  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1620  * @return number of written code units; 0 if conversion is not possible
1621  */
1622 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1623 
1624 /**
1625  * Using native endianness, convert possibly broken UTF-16 string into UTF-32 string.
1626  *
1627  * During the conversion also validation of the input string is done.
1628  * This function is suitable to work with inputs from untrusted sources.
1629  *
1630  * This function is not BOM-aware.
1631  *
1632  * @param input         the UTF-16 string to convert
1633  * @param length        the length of the string in 2-byte code units (char16_t)
1634  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1635  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1636  */
1637 simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1638 
1639 /**
1640  * Convert possibly broken UTF-16LE string into UTF-32 string.
1641  *
1642  * During the conversion also validation of the input string is done.
1643  * This function is suitable to work with inputs from untrusted sources.
1644  *
1645  * This function is not BOM-aware.
1646  *
1647  * @param input         the UTF-16LE string to convert
1648  * @param length        the length of the string in 2-byte code units (char16_t)
1649  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1650  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1651  */
1652 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1653 
1654 /**
1655  * Convert possibly broken UTF-16BE string into UTF-32 string.
1656  *
1657  * During the conversion also validation of the input string is done.
1658  * This function is suitable to work with inputs from untrusted sources.
1659  *
1660  * This function is not BOM-aware.
1661  *
1662  * @param input         the UTF-16BE string to convert
1663  * @param length        the length of the string in 2-byte code units (char16_t)
1664  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1665  * @return number of written code units; 0 if input is not a valid UTF-16LE string
1666  */
1667 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1668 
1669 /**
1670  * Using native endianness, convert possibly broken UTF-16 string into
1671  * UTF-32 string and stop on error.
1672  *
1673  * During the conversion also validation of the input string is done.
1674  * This function is suitable to work with inputs from untrusted sources.
1675  *
1676  * This function is not BOM-aware.
1677  *
1678  * @param input         the UTF-16 string to convert
1679  * @param length        the length of the string in 2-byte code units (char16_t)
1680  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1681  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1682  */
1683 simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1684 
1685 /**
1686  * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
1687  *
1688  * During the conversion also validation of the input string is done.
1689  * This function is suitable to work with inputs from untrusted sources.
1690  *
1691  * This function is not BOM-aware.
1692  *
1693  * @param input         the UTF-16LE string to convert
1694  * @param length        the length of the string in 2-byte code units (char16_t)
1695  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1696  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1697  */
1698 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1699 
1700 /**
1701  * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
1702  *
1703  * During the conversion also validation of the input string is done.
1704  * This function is suitable to work with inputs from untrusted sources.
1705  *
1706  * This function is not BOM-aware.
1707  *
1708  * @param input         the UTF-16BE string to convert
1709  * @param length        the length of the string in 2-byte code units (char16_t)
1710  * @param utf32_buffer   the pointer to buffer that can hold conversion result
1711  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1712  */
1713 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1714 
1715 /**
1716  * Using native endianness, convert valid UTF-16 string into UTF-32 string.
1717  *
1718  * This function assumes that the input string is valid UTF-16 (native endianness).
1719  *
1720  * This function is not BOM-aware.
1721  *
1722  * @param input         the UTF-16 string to convert
1723  * @param length        the length of the string in 2-byte code units (char16_t)
1724  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1725  * @return number of written code units; 0 if conversion is not possible
1726  */
1727 simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1728 
1729 /**
1730  * Convert valid UTF-16LE string into UTF-32 string.
1731  *
1732  * This function assumes that the input string is valid UTF-16LE.
1733  *
1734  * This function is not BOM-aware.
1735  *
1736  * @param input         the UTF-16LE string to convert
1737  * @param length        the length of the string in 2-byte code units (char16_t)
1738  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1739  * @return number of written code units; 0 if conversion is not possible
1740  */
1741 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1742 
1743 /**
1744  * Convert valid UTF-16BE string into UTF-32 string.
1745  *
1746  * This function assumes that the input string is valid UTF-16LE.
1747  *
1748  * This function is not BOM-aware.
1749  *
1750  * @param input         the UTF-16BE string to convert
1751  * @param length        the length of the string in 2-byte code units (char16_t)
1752  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1753  * @return number of written code units; 0 if conversion is not possible
1754  */
1755 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1756 
1757 
1758 /*
1759  * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
1760  *
1761  * This function does not validate the input.
1762  *
1763  * This function is not BOM-aware.
1764  *
1765  * @param length        the length of the string in 2-byte code units (char16_t)
1766  * @return the number of bytes required to encode the UTF-16LE string as Latin1
1767  */
1768 simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
1769 
1770 
1771 /**
1772  * Using native endianness; Compute the number of bytes that this UTF-16
1773  * string would require in UTF-8 format.
1774  *
1775  * This function does not validate the input.
1776  *
1777  * @param input         the UTF-16 string to convert
1778  * @param length        the length of the string in 2-byte code units (char16_t)
1779  * @return the number of bytes required to encode the UTF-16LE string as UTF-8
1780  */
1781 simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept;
1782 
1783 /**
1784  * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
1785  *
1786  * This function does not validate the input.
1787  *
1788  * @param input         the UTF-16LE string to convert
1789  * @param length        the length of the string in 2-byte code units (char16_t)
1790  * @return the number of bytes required to encode the UTF-16LE string as UTF-8
1791  */
1792 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept;
1793 
1794 /**
1795  * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
1796  *
1797  * This function does not validate the input.
1798  *
1799  * @param input         the UTF-16BE string to convert
1800  * @param length        the length of the string in 2-byte code units (char16_t)
1801  * @return the number of bytes required to encode the UTF-16BE string as UTF-8
1802  */
1803 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept;
1804 
1805 /**
1806  * Convert possibly broken UTF-32 string into UTF-8 string.
1807  *
1808  * During the conversion also validation of the input string is done.
1809  * This function is suitable to work with inputs from untrusted sources.
1810  *
1811  * This function is not BOM-aware.
1812  *
1813  * @param input         the UTF-32 string to convert
1814  * @param length        the length of the string in 4-byte code units (char32_t)
1815  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1816  * @return number of written code units; 0 if input is not a valid UTF-32 string
1817  */
1818 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1819 
1820 /**
1821  * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
1822  *
1823  * During the conversion also validation of the input string is done.
1824  * This function is suitable to work with inputs from untrusted sources.
1825  *
1826  * This function is not BOM-aware.
1827  *
1828  * @param input         the UTF-32 string to convert
1829  * @param length        the length of the string in 4-byte code units (char32_t)
1830  * @param utf8_buffer   the pointer to buffer that can hold conversion result
1831  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1832  */
1833 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1834 
1835 /**
1836  * Convert valid UTF-32 string into UTF-8 string.
1837  *
1838  * This function assumes that the input string is valid UTF-32.
1839  *
1840  * This function is not BOM-aware.
1841  *
1842  * @param input         the UTF-32 string to convert
1843  * @param length        the length of the string in 4-byte code units (char32_t)
1844  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1845  * @return number of written code units; 0 if conversion is not possible
1846  */
1847 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1848 
1849 /**
1850  * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 string.
1851  *
1852  * During the conversion also validation of the input string is done.
1853  * This function is suitable to work with inputs from untrusted sources.
1854  *
1855  * This function is not BOM-aware.
1856  *
1857  * @param input         the UTF-32 string to convert
1858  * @param length        the length of the string in 4-byte code units (char32_t)
1859  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1860  * @return number of written code units; 0 if input is not a valid UTF-32 string
1861  */
1862 simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1863 
1864 /**
1865  * Convert possibly broken UTF-32 string into UTF-16LE string.
1866  *
1867  * During the conversion also validation of the input string is done.
1868  * This function is suitable to work with inputs from untrusted sources.
1869  *
1870  * This function is not BOM-aware.
1871  *
1872  * @param input         the UTF-32 string to convert
1873  * @param length        the length of the string in 4-byte code units (char32_t)
1874  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1875  * @return number of written code units; 0 if input is not a valid UTF-32 string
1876  */
1877 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1878 
1879 /**
1880  * Convert possibly broken UTF-32 string into Latin1 string.
1881  *
1882  * During the conversion also validation of the input string is done.
1883  * This function is suitable to work with inputs from untrusted sources.
1884  *
1885  * This function is not BOM-aware.
1886  *
1887  * @param input         the UTF-32 string to convert
1888  * @param length        the length of the string in 4-byte code units (char32_t)
1889  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1890  * @return number of written code units; 0 if input is not a valid UTF-32 string
1891  */
1892 simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1893 
1894 
1895 /**
1896  * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
1897  *
1898  * During the conversion also validation of the input string is done.
1899  * This function is suitable to work with inputs from untrusted sources.
1900  *
1901  * This function is not BOM-aware.
1902  *
1903  * @param input         the UTF-32 string to convert
1904  * @param length        the length of the string in 4-byte code units (char32_t)
1905  * @param latin1_buffer   the pointer to buffer that can hold conversion result
1906  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1907  */
1908 simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1909 
1910 /**
1911  * Convert valid UTF-32 string into Latin1 string.
1912  *
1913  * This function assumes that the input string is valid UTF-32.
1914  *
1915  * This function is not BOM-aware.
1916  *
1917  * @param input         the UTF-32 string to convert
1918  * @param length        the length of the string in 4-byte code units (char32_t)
1919  * @param latin1_buffer   the pointer to buffer that can hold the conversion result
1920  * @return number of written code units; 0 if conversion is not possible
1921  */
1922 simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1923 
1924 /**
1925  * Convert possibly broken UTF-32 string into UTF-16BE string.
1926  *
1927  * During the conversion also validation of the input string is done.
1928  * This function is suitable to work with inputs from untrusted sources.
1929  *
1930  * This function is not BOM-aware.
1931  *
1932  * @param input         the UTF-32 string to convert
1933  * @param length        the length of the string in 4-byte code units (char32_t)
1934  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1935  * @return number of written code units; 0 if input is not a valid UTF-32 string
1936  */
1937 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1938 
1939 /**
1940  * Using native endianness, convert possibly broken UTF-32 string into UTF-16
1941  * string and stop on error.
1942  *
1943  * During the conversion also validation of the input string is done.
1944  * This function is suitable to work with inputs from untrusted sources.
1945  *
1946  * This function is not BOM-aware.
1947  *
1948  * @param input         the UTF-32 string to convert
1949  * @param length        the length of the string in 4-byte code units (char32_t)
1950  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1951  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1952  */
1953 simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1954 
1955 /**
1956  * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
1957  *
1958  * During the conversion also validation of the input string is done.
1959  * This function is suitable to work with inputs from untrusted sources.
1960  *
1961  * This function is not BOM-aware.
1962  *
1963  * @param input         the UTF-32 string to convert
1964  * @param length        the length of the string in 4-byte code units (char32_t)
1965  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1966  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1967  */
1968 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1969 
1970 /**
1971  * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
1972  *
1973  * During the conversion also validation of the input string is done.
1974  * This function is suitable to work with inputs from untrusted sources.
1975  *
1976  * This function is not BOM-aware.
1977  *
1978  * @param input         the UTF-32 string to convert
1979  * @param length        the length of the string in 4-byte code units (char32_t)
1980  * @param utf16_buffer   the pointer to buffer that can hold conversion result
1981  * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1982  */
1983 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1984 
1985 /**
1986  * Using native endianness, convert valid UTF-32 string into a UTF-16 string.
1987  *
1988  * This function assumes that the input string is valid UTF-32.
1989  *
1990  * This function is not BOM-aware.
1991  *
1992  * @param input         the UTF-32 string to convert
1993  * @param length        the length of the string in 4-byte code units (char32_t)
1994  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
1995  * @return number of written code units; 0 if conversion is not possible
1996  */
1997 simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1998 
1999 /**
2000  * Convert valid UTF-32 string into UTF-16LE string.
2001  *
2002  * This function assumes that the input string is valid UTF-32.
2003  *
2004  * This function is not BOM-aware.
2005  *
2006  * @param input         the UTF-32 string to convert
2007  * @param length        the length of the string in 4-byte code units (char32_t)
2008  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
2009  * @return number of written code units; 0 if conversion is not possible
2010  */
2011 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
2012 
2013 /**
2014  * Convert valid UTF-32 string into UTF-16BE string.
2015  *
2016  * This function assumes that the input string is valid UTF-32.
2017  *
2018  * This function is not BOM-aware.
2019  *
2020  * @param input         the UTF-32 string to convert
2021  * @param length        the length of the string in 4-byte code units (char32_t)
2022  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
2023  * @return number of written code units; 0 if conversion is not possible
2024  */
2025 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
2026 
2027 /**
2028  * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
2029  * from UTF-16BE to UTF-16LE.
2030  *
2031  * This function does not validate the input.
2032  *
2033  * This function is not BOM-aware.
2034  *
2035  * @param input         the UTF-16 string to process
2036  * @param length        the length of the string in 2-byte code units (char16_t)
2037  * @param output        the pointer to buffer that can hold the conversion result
2038  */
2039 void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept;
2040 
2041 /**
2042  * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
2043  *
2044  * This function does not validate the input.
2045  *
2046  * @param input         the UTF-32 string to convert
2047  * @param length        the length of the string in 4-byte code units (char32_t)
2048  * @return the number of bytes required to encode the UTF-32 string as UTF-8
2049  */
2050 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept;
2051 
2052 /**
2053  * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
2054  *
2055  * This function does not validate the input.
2056  *
2057  * @param input         the UTF-32 string to convert
2058  * @param length        the length of the string in 4-byte code units (char32_t)
2059  * @return the number of bytes required to encode the UTF-32 string as UTF-16
2060  */
2061 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept;
2062 
2063 /**
2064  * Using native endianness; Compute the number of bytes that this UTF-16
2065  * string would require in UTF-32 format.
2066  *
2067  * This function is equivalent to count_utf16.
2068  *
2069  * This function does not validate the input.
2070  *
2071  * This function is not BOM-aware.
2072  *
2073  * @param input         the UTF-16 string to convert
2074  * @param length        the length of the string in 2-byte code units (char16_t)
2075  * @return the number of bytes required to encode the UTF-16LE string as UTF-32
2076  */
2077 simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept;
2078 
2079 /**
2080  * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
2081  *
2082  * This function is equivalent to count_utf16le.
2083  *
2084  * This function does not validate the input.
2085  *
2086  * This function is not BOM-aware.
2087  *
2088  * @param input         the UTF-16LE string to convert
2089  * @param length        the length of the string in 2-byte code units (char16_t)
2090  * @return the number of bytes required to encode the UTF-16LE string as UTF-32
2091  */
2092 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept;
2093 
2094 /**
2095  * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
2096  *
2097  * This function is equivalent to count_utf16be.
2098  *
2099  * This function does not validate the input.
2100  *
2101  * This function is not BOM-aware.
2102  *
2103  * @param input         the UTF-16BE string to convert
2104  * @param length        the length of the string in 2-byte code units (char16_t)
2105  * @return the number of bytes required to encode the UTF-16BE string as UTF-32
2106  */
2107 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept;
2108 
2109 /**
2110  * Count the number of code points (characters) in the string assuming that
2111  * it is valid.
2112  *
2113  * This function assumes that the input string is valid UTF-16 (native endianness).
2114  *
2115  * This function is not BOM-aware.
2116  *
2117  * @param input         the UTF-16 string to process
2118  * @param length        the length of the string in 2-byte code units (char16_t)
2119  * @return number of code points
2120  */
2121 simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept;
2122 
2123 /**
2124  * Count the number of code points (characters) in the string assuming that
2125  * it is valid.
2126  *
2127  * This function assumes that the input string is valid UTF-16LE.
2128  *
2129  * This function is not BOM-aware.
2130  *
2131  * @param input         the UTF-16LE string to process
2132  * @param length        the length of the string in 2-byte code units (char16_t)
2133  * @return number of code points
2134  */
2135 simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept;
2136 
2137 /**
2138  * Count the number of code points (characters) in the string assuming that
2139  * it is valid.
2140  *
2141  * This function assumes that the input string is valid UTF-16BE.
2142  *
2143  * This function is not BOM-aware.
2144  *
2145  * @param input         the UTF-16BE string to process
2146  * @param length        the length of the string in 2-byte code units (char16_t)
2147  * @return number of code points
2148  */
2149 simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept;
2150 
2151 /**
2152  * Count the number of code points (characters) in the string assuming that
2153  * it is valid.
2154  *
2155  * This function assumes that the input string is valid UTF-8.
2156  *
2157  * @param input         the UTF-8 string to process
2158  * @param length        the length of the string in bytes
2159  * @return number of code points
2160  */
2161 simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept;
2162 
2163 /**
2164  * Given a valid UTF-8 string having a possibly truncated last character,
2165  * this function checks the end of string. If the last character is truncated (or partial),
2166  * then it returns a shorter length (shorter by 1 to 3 bytes) so that the short UTF-8
2167  * strings only contain complete characters. If there is no truncated character,
2168  * the original length is returned.
2169  *
2170  * This function assumes that the input string is valid UTF-8, but possibly truncated.
2171  *
2172  * @param input         the UTF-8 string to process
2173  * @param length        the length of the string in bytes
2174  * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes
2175  */
2176 simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
2177 
2178 /**
2179  * Given a valid UTF-16BE string having a possibly truncated last character,
2180  * this function checks the end of string. If the last character is truncated (or partial),
2181  * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16BE
2182  * strings only contain complete characters. If there is no truncated character,
2183  * the original length is returned.
2184  *
2185  * This function assumes that the input string is valid UTF-16BE, but possibly truncated.
2186  *
2187  * @param input         the UTF-16BE string to process
2188  * @param length        the length of the string in bytes
2189  * @return the length of the string in bytes, possibly shorter by 1 unit
2190  */
2191 simdutf_warn_unused size_t trim_partial_utf16be(const char16_t* input, size_t length);
2192 
2193 /**
2194  * Given a valid UTF-16LE string having a possibly truncated last character,
2195  * this function checks the end of string. If the last character is truncated (or partial),
2196  * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16LE
2197  * strings only contain complete characters. If there is no truncated character,
2198  * the original length is returned.
2199  *
2200  * This function assumes that the input string is valid UTF-16LE, but possibly truncated.
2201  *
2202  * @param input         the UTF-16LE string to process
2203  * @param length        the length of the string in bytes
2204  * @return the length of the string in unit, possibly shorter by 1 unit
2205  */
2206 simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t length);
2207 
2208 
2209 /**
2210  * Given a valid UTF-16 string having a possibly truncated last character,
2211  * this function checks the end of string. If the last character is truncated (or partial),
2212  * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16
2213  * strings only contain complete characters. If there is no truncated character,
2214  * the original length is returned.
2215  *
2216  * This function assumes that the input string is valid UTF-16, but possibly truncated.
2217  * We use the native endianness.
2218  *
2219  * @param input         the UTF-16 string to process
2220  * @param length        the length of the string in bytes
2221  * @return the length of the string in unit, possibly shorter by 1 unit
2222  */
2223 simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
2224 
2225 /**
2226  * An implementation of simdutf for a particular CPU architecture.
2227  *
2228  * Also used to maintain the currently active implementation. The active implementation is
2229  * automatically initialized on first use to the most advanced implementation supported by the host.
2230  */
2231 class implementation {
2232 public:
2233 
2234   /**
2235    * The name of this implementation.
2236    *
2237    *     const implementation *impl = simdutf::active_implementation;
2238    *     cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
2239    *
2240    * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
2241    */
name() const2242   virtual const std::string &name() const { return _name; }
2243 
2244   /**
2245    * The description of this implementation.
2246    *
2247    *     const implementation *impl = simdutf::active_implementation;
2248    *     cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
2249    *
2250    * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
2251    */
description() const2252   virtual const std::string &description() const { return _description; }
2253 
2254   /**
2255    * The instruction sets this implementation is compiled against
2256    * and the current CPU match. This function may poll the current CPU/system
2257    * and should therefore not be called too often if performance is a concern.
2258    *
2259    *
2260    * @return true if the implementation can be safely used on the current system (determined at runtime)
2261    */
2262   bool supported_by_runtime_system() const;
2263 
2264   /**
2265    * This function will try to detect the encoding
2266    * @param input the string to identify
2267    * @param length the length of the string in bytes.
2268    * @return the encoding type detected
2269    */
2270   virtual encoding_type autodetect_encoding(const char * input, size_t length) const noexcept;
2271 
2272   /**
2273    * This function will try to detect the possible encodings in one pass
2274    * @param input the string to identify
2275    * @param length the length of the string in bytes.
2276    * @return the encoding type detected
2277    */
2278   virtual int detect_encodings(const char * input, size_t length) const noexcept = 0;
2279 
2280   /**
2281    * @private For internal implementation use
2282    *
2283    * The instruction sets this implementation is compiled against.
2284    *
2285    * @return a mask of all required `internal::instruction_set::` values
2286    */
required_instruction_sets() const2287   virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
2288 
2289 
2290   /**
2291    * Validate the UTF-8 string.
2292    *
2293    * Overridden by each implementation.
2294    *
2295    * @param buf the UTF-8 string to validate.
2296    * @param len the length of the string in bytes.
2297    * @return true if and only if the string is valid UTF-8.
2298    */
2299   simdutf_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
2300 
2301   /**
2302    * Validate the UTF-8 string and stop on errors.
2303    *
2304    * Overridden by each implementation.
2305    *
2306    * @param buf the UTF-8 string to validate.
2307    * @param len the length of the string in bytes.
2308    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2309    */
2310   simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
2311 
2312   /**
2313    * Validate the ASCII string.
2314    *
2315    * Overridden by each implementation.
2316    *
2317    * @param buf the ASCII string to validate.
2318    * @param len the length of the string in bytes.
2319    * @return true if and only if the string is valid ASCII.
2320    */
2321   simdutf_warn_unused virtual bool validate_ascii(const char *buf, size_t len) const noexcept = 0;
2322 
2323   /**
2324    * Validate the ASCII string and stop on error.
2325    *
2326    * Overridden by each implementation.
2327    *
2328    * @param buf the ASCII string to validate.
2329    * @param len the length of the string in bytes.
2330    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2331    */
2332   simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
2333 
2334   /**
2335    * Validate the UTF-16LE string.This function may be best when you expect
2336    * the input to be almost always valid. Otherwise, consider using
2337    * validate_utf16le_with_errors.
2338    *
2339    * Overridden by each implementation.
2340    *
2341    * This function is not BOM-aware.
2342    *
2343    * @param buf the UTF-16LE string to validate.
2344    * @param len the length of the string in number of 2-byte code units (char16_t).
2345    * @return true if and only if the string is valid UTF-16LE.
2346    */
2347   simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
2348 
2349   /**
2350    * Validate the UTF-16BE string. This function may be best when you expect
2351    * the input to be almost always valid. Otherwise, consider using
2352    * validate_utf16be_with_errors.
2353    *
2354    * Overridden by each implementation.
2355    *
2356    * This function is not BOM-aware.
2357    *
2358    * @param buf the UTF-16BE string to validate.
2359    * @param len the length of the string in number of 2-byte code units (char16_t).
2360    * @return true if and only if the string is valid UTF-16BE.
2361    */
2362   simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
2363 
2364   /**
2365    * Validate the UTF-16LE string and stop on error.  It might be faster than
2366  * validate_utf16le when an error is expected to occur early.
2367    *
2368    * Overridden by each implementation.
2369    *
2370    * This function is not BOM-aware.
2371    *
2372    * @param buf the UTF-16LE string to validate.
2373    * @param len the length of the string in number of 2-byte code units (char16_t).
2374    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2375    */
2376   simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
2377 
2378   /**
2379    * Validate the UTF-16BE string and stop on error. It might be faster than
2380    * validate_utf16be when an error is expected to occur early.
2381    *
2382    * Overridden by each implementation.
2383    *
2384    * This function is not BOM-aware.
2385    *
2386    * @param buf the UTF-16BE string to validate.
2387    * @param len the length of the string in number of 2-byte code units (char16_t).
2388    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2389    */
2390   simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
2391 
2392   /**
2393    * Validate the UTF-32 string.
2394    *
2395    * Overridden by each implementation.
2396    *
2397    * This function is not BOM-aware.
2398    *
2399    * @param buf the UTF-32 string to validate.
2400    * @param len the length of the string in number of 4-byte code units (char32_t).
2401    * @return true if and only if the string is valid UTF-32.
2402    */
2403   simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
2404 
2405   /**
2406    * Validate the UTF-32 string and stop on error.
2407    *
2408    * Overridden by each implementation.
2409    *
2410    * This function is not BOM-aware.
2411    *
2412    * @param buf the UTF-32 string to validate.
2413    * @param len the length of the string in number of 4-byte code units (char32_t).
2414    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2415    */
2416   simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0;
2417 
2418   /**
2419    * Convert Latin1 string into UTF8 string.
2420    *
2421    * This function is suitable to work with inputs from untrusted sources.
2422    *
2423    * @param input         the Latin1 string to convert
2424    * @param length        the length of the string in bytes
2425    * @param latin1_output  the pointer to buffer that can hold conversion result
2426    * @return the number of written char; 0 if conversion is not possible
2427    */
2428   simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0;
2429 
2430 
2431     /**
2432    * Convert possibly Latin1 string into UTF-16LE string.
2433    *
2434    * This function is suitable to work with inputs from untrusted sources.
2435    *
2436    * @param input         the Latin1  string to convert
2437    * @param length        the length of the string in bytes
2438    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2439    * @return the number of written char16_t; 0 if conversion is not possible
2440    */
2441   simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2442 
2443   /**
2444    * Convert Latin1 string into UTF-16BE string.
2445    *
2446    * This function is suitable to work with inputs from untrusted sources.
2447    *
2448    * @param input         the Latin1 string to convert
2449    * @param length        the length of the string in bytes
2450    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2451    * @return the number of written char16_t; 0 if conversion is not possible
2452    */
2453   simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2454 
2455   /**
2456    * Convert Latin1 string into UTF-32 string.
2457    *
2458    * This function is suitable to work with inputs from untrusted sources.
2459    *
2460    * @param input         the Latin1 string to convert
2461    * @param length        the length of the string in bytes
2462    * @param utf32_buffer  the pointer to buffer that can hold conversion result
2463    * @return the number of written char32_t; 0 if conversion is not possible
2464    */
2465   simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2466 
2467  /**
2468    * Convert possibly broken UTF-8 string into latin1 string.
2469    *
2470    * During the conversion also validation of the input string is done.
2471    * This function is suitable to work with inputs from untrusted sources.
2472    *
2473    * @param input         the UTF-8 string to convert
2474    * @param length        the length of the string in bytes
2475    * @param latin1_output  the pointer to buffer that can hold conversion result
2476    * @return the number of written char; 0 if the input was not valid UTF-8 string
2477    */
2478   simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2479 
2480   /**
2481    * Convert possibly broken UTF-8 string into latin1 string with errors
2482    *
2483    * During the conversion also validation of the input string is done.
2484    * This function is suitable to work with inputs from untrusted sources.
2485    *
2486    * @param input         the UTF-8 string to convert
2487    * @param length        the length of the string in bytes
2488    * @param latin1_output  the pointer to buffer that can hold conversion result
2489    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2490    */
2491   simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2492 
2493     /**
2494    * Convert valid UTF-8 string into latin1 string.
2495    *
2496    * This function assumes that the input string is valid UTF-8.
2497    *
2498    * This function is not BOM-aware.
2499    *
2500    * @param input         the UTF-8 string to convert
2501    * @param length        the length of the string in bytes
2502    * @param latin1_output  the pointer to buffer that can hold conversion result
2503    * @return the number of written char; 0 if the input was not valid UTF-8 string
2504    */
2505   simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2506 
2507 
2508   /**
2509    * Convert possibly broken UTF-8 string into UTF-16LE string.
2510    *
2511    * During the conversion also validation of the input string is done.
2512    * This function is suitable to work with inputs from untrusted sources.
2513    *
2514    * @param input         the UTF-8 string to convert
2515    * @param length        the length of the string in bytes
2516    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2517    * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2518    */
2519   simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2520 
2521   /**
2522    * Convert possibly broken UTF-8 string into UTF-16BE string.
2523    *
2524    * During the conversion also validation of the input string is done.
2525    * This function is suitable to work with inputs from untrusted sources.
2526    *
2527    * @param input         the UTF-8 string to convert
2528    * @param length        the length of the string in bytes
2529    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2530    * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2531    */
2532   simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2533 
2534   /**
2535    * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
2536    *
2537    * During the conversion also validation of the input string is done.
2538    * This function is suitable to work with inputs from untrusted sources.
2539    *
2540    * @param input         the UTF-8 string to convert
2541    * @param length        the length of the string in bytes
2542    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2543    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2544    */
2545   simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2546 
2547   /**
2548    * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
2549    *
2550    * During the conversion also validation of the input string is done.
2551    * This function is suitable to work with inputs from untrusted sources.
2552    *
2553    * @param input         the UTF-8 string to convert
2554    * @param length        the length of the string in bytes
2555    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2556    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2557    */
2558   simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2559 
2560   /**
2561    * Convert possibly broken UTF-8 string into UTF-32 string.
2562    *
2563    * During the conversion also validation of the input string is done.
2564    * This function is suitable to work with inputs from untrusted sources.
2565    *
2566    * @param input         the UTF-8 string to convert
2567    * @param length        the length of the string in bytes
2568    * @param utf32_buffer  the pointer to buffer that can hold conversion result
2569    * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2570    */
2571   simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
2572 
2573   /**
2574    * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
2575    *
2576    * During the conversion also validation of the input string is done.
2577    * This function is suitable to work with inputs from untrusted sources.
2578    *
2579    * @param input         the UTF-8 string to convert
2580    * @param length        the length of the string in bytes
2581    * @param utf32_buffer  the pointer to buffer that can hold conversion result
2582    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2583    */
2584   simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
2585 
2586   /**
2587    * Convert valid UTF-8 string into UTF-16LE string.
2588    *
2589    * This function assumes that the input string is valid UTF-8.
2590    *
2591    * @param input         the UTF-8 string to convert
2592    * @param length        the length of the string in bytes
2593    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2594    * @return the number of written char16_t
2595    */
2596   simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
2597 
2598 /**
2599    * Convert valid UTF-8 string into UTF-16BE string.
2600    *
2601    * This function assumes that the input string is valid UTF-8.
2602    *
2603    * @param input         the UTF-8 string to convert
2604    * @param length        the length of the string in bytes
2605    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2606    * @return the number of written char16_t
2607    */
2608   simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
2609 
2610   /**
2611    * Convert valid UTF-8 string into UTF-32 string.
2612    *
2613    * This function assumes that the input string is valid UTF-8.
2614    *
2615    * @param input         the UTF-8 string to convert
2616    * @param length        the length of the string in bytes
2617    * @param utf16_buffer  the pointer to buffer that can hold conversion result
2618    * @return the number of written char32_t
2619    */
2620   simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2621 
2622   /**
2623    * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
2624    *
2625    * This function does not validate the input.
2626    *
2627    * @param input         the UTF-8 string to process
2628    * @param length        the length of the string in bytes
2629    * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
2630    */
2631   simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0;
2632 
2633    /**
2634    * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
2635    *
2636    * This function is equivalent to count_utf8.
2637    *
2638    * This function does not validate the input.
2639    *
2640    * @param input         the UTF-8 string to process
2641    * @param length        the length of the string in bytes
2642    * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
2643    */
2644   simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
2645 
2646   /**
2647    * Convert possibly broken UTF-16LE string into Latin1 string.
2648    *
2649    * During the conversion also validation of the input string is done.
2650    * This function is suitable to work with inputs from untrusted sources.
2651    *
2652    * This function is not BOM-aware.
2653    *
2654    * @param input         the UTF-16LE string to convert
2655    * @param length        the length of the string in 2-byte code units (char16_t)
2656    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2657    * @return number of written code units; 0 if input is not a valid UTF-16LE string
2658    */
2659   simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2660 
2661   /**
2662    * Convert possibly broken UTF-16BE string into Latin1 string.
2663    *
2664    * During the conversion also validation of the input string is done.
2665    * This function is suitable to work with inputs from untrusted sources.
2666    *
2667    * This function is not BOM-aware.
2668    *
2669    * @param input         the UTF-16BE string to convert
2670    * @param length        the length of the string in 2-byte code units (char16_t)
2671    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2672    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2673    */
2674   simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2675 
2676   /**
2677    * Convert possibly broken UTF-16LE string into Latin1 string.
2678    *
2679    * During the conversion also validation of the input string is done.
2680    * This function is suitable to work with inputs from untrusted sources.
2681    * This function is not BOM-aware.
2682    *
2683    * @param input         the UTF-16LE string to convert
2684    * @param length        the length of the string in 2-byte code units (char16_t)
2685    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2686    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2687    */
2688   simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2689 
2690   /**
2691    * Convert possibly broken UTF-16BE string into Latin1 string.
2692    *
2693    * During the conversion also validation of the input string is done.
2694    * This function is suitable to work with inputs from untrusted sources.
2695    * This function is not BOM-aware.
2696    *
2697    * @param input         the UTF-16BE string to convert
2698    * @param length        the length of the string in 2-byte code units (char16_t)
2699    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2700    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2701    */
2702   simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2703 
2704   /**
2705    * Convert valid UTF-16LE string into Latin1 string.
2706    *
2707    * This function assumes that the input string is valid UTF-8.
2708 
2709    * This function is not BOM-aware.
2710    *
2711    * @param input         the UTF-16LE string to convert
2712    * @param length        the length of the string in 2-byte code units (char16_t)
2713    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2714    * @return number of written code units; 0 if conversion is not possible
2715    */
2716   simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2717 
2718   /**
2719    * Convert valid UTF-16BE string into Latin1 string.
2720    *
2721    * This function assumes that the input string is valid UTF-8.
2722    *
2723    * This function is not BOM-aware.
2724    *
2725    * @param input         the UTF-16BE string to convert
2726    * @param length        the length of the string in 2-byte code units (char16_t)
2727    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2728    * @return number of written code units; 0 if conversion is not possible
2729    */
2730   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2731 
2732   /**
2733    * Convert possibly broken UTF-16LE string into UTF-8 string.
2734    *
2735    * During the conversion also validation of the input string is done.
2736    * This function is suitable to work with inputs from untrusted sources.
2737    *
2738    * This function is not BOM-aware.
2739    *
2740    * @param input         the UTF-16LE string to convert
2741    * @param length        the length of the string in 2-byte code units (char16_t)
2742    * @param utf8_buffer   the pointer to buffer that can hold conversion result
2743    * @return number of written code units; 0 if input is not a valid UTF-16LE string
2744    */
2745   simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2746 
2747   /**
2748    * Convert possibly broken UTF-16BE string into UTF-8 string.
2749    *
2750    * During the conversion also validation of the input string is done.
2751    * This function is suitable to work with inputs from untrusted sources.
2752    *
2753    * This function is not BOM-aware.
2754    *
2755    * @param input         the UTF-16BE string to convert
2756    * @param length        the length of the string in 2-byte code units (char16_t)
2757    * @param utf8_buffer   the pointer to buffer that can hold conversion result
2758    * @return number of written code units; 0 if input is not a valid UTF-16BE string
2759    */
2760   simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2761 
2762   /**
2763    * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
2764    *
2765    * During the conversion also validation of the input string is done.
2766    * This function is suitable to work with inputs from untrusted sources.
2767    *
2768    * This function is not BOM-aware.
2769    *
2770    * @param input         the UTF-16LE string to convert
2771    * @param length        the length of the string in 2-byte code units (char16_t)
2772    * @param utf8_buffer   the pointer to buffer that can hold conversion result
2773    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2774    */
2775   simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2776 
2777   /**
2778    * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
2779    *
2780    * During the conversion also validation of the input string is done.
2781    * This function is suitable to work with inputs from untrusted sources.
2782    *
2783    * This function is not BOM-aware.
2784    *
2785    * @param input         the UTF-16BE string to convert
2786    * @param length        the length of the string in 2-byte code units (char16_t)
2787    * @param utf8_buffer   the pointer to buffer that can hold conversion result
2788    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2789    */
2790   simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2791 
2792   /**
2793    * Convert valid UTF-16LE string into UTF-8 string.
2794    *
2795    * This function assumes that the input string is valid UTF-16LE.
2796    *
2797    * This function is not BOM-aware.
2798    *
2799    * @param input         the UTF-16LE string to convert
2800    * @param length        the length of the string in 2-byte code units (char16_t)
2801    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
2802    * @return number of written code units; 0 if conversion is not possible
2803    */
2804   simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2805 
2806   /**
2807    * Convert valid UTF-16BE string into UTF-8 string.
2808    *
2809    * This function assumes that the input string is valid UTF-16BE.
2810    *
2811    * This function is not BOM-aware.
2812    *
2813    * @param input         the UTF-16BE string to convert
2814    * @param length        the length of the string in 2-byte code units (char16_t)
2815    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
2816    * @return number of written code units; 0 if conversion is not possible
2817    */
2818   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2819 
2820   /**
2821    * Convert possibly broken UTF-16LE string into UTF-32 string.
2822    *
2823    * During the conversion also validation of the input string is done.
2824    * This function is suitable to work with inputs from untrusted sources.
2825    *
2826    * This function is not BOM-aware.
2827    *
2828    * @param input         the UTF-16LE string to convert
2829    * @param length        the length of the string in 2-byte code units (char16_t)
2830    * @param utf32_buffer   the pointer to buffer that can hold conversion result
2831    * @return number of written code units; 0 if input is not a valid UTF-16LE string
2832    */
2833   simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2834 
2835   /**
2836    * Convert possibly broken UTF-16BE string into UTF-32 string.
2837    *
2838    * During the conversion also validation of the input string is done.
2839    * This function is suitable to work with inputs from untrusted sources.
2840    *
2841    * This function is not BOM-aware.
2842    *
2843    * @param input         the UTF-16BE string to convert
2844    * @param length        the length of the string in 2-byte code units (char16_t)
2845    * @param utf32_buffer   the pointer to buffer that can hold conversion result
2846    * @return number of written code units; 0 if input is not a valid UTF-16BE string
2847    */
2848   simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2849 
2850   /**
2851    * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
2852    *
2853    * During the conversion also validation of the input string is done.
2854    * This function is suitable to work with inputs from untrusted sources.
2855    *
2856    * This function is not BOM-aware.
2857    *
2858    * @param input         the UTF-16LE string to convert
2859    * @param length        the length of the string in 2-byte code units (char16_t)
2860    * @param utf32_buffer   the pointer to buffer that can hold conversion result
2861    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2862    */
2863   simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2864 
2865   /**
2866    * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
2867    *
2868    * During the conversion also validation of the input string is done.
2869    * This function is suitable to work with inputs from untrusted sources.
2870    *
2871    * This function is not BOM-aware.
2872    *
2873    * @param input         the UTF-16BE string to convert
2874    * @param length        the length of the string in 2-byte code units (char16_t)
2875    * @param utf32_buffer   the pointer to buffer that can hold conversion result
2876    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2877    */
2878   simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2879 
2880   /**
2881    * Convert valid UTF-16LE string into UTF-32 string.
2882    *
2883    * This function assumes that the input string is valid UTF-16LE.
2884    *
2885    * This function is not BOM-aware.
2886    *
2887    * @param input         the UTF-16LE string to convert
2888    * @param length        the length of the string in 2-byte code units (char16_t)
2889    * @param utf32_buffer   the pointer to buffer that can hold the conversion result
2890    * @return number of written code units; 0 if conversion is not possible
2891    */
2892   simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2893 
2894   /**
2895    * Convert valid UTF-16LE string into UTF-32BE string.
2896    *
2897    * This function assumes that the input string is valid UTF-16BE.
2898    *
2899    * This function is not BOM-aware.
2900    *
2901    * @param input         the UTF-16BE string to convert
2902    * @param length        the length of the string in 2-byte code units (char16_t)
2903    * @param utf32_buffer   the pointer to buffer that can hold the conversion result
2904    * @return number of written code units; 0 if conversion is not possible
2905    */
2906   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2907 
2908   /**
2909    * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
2910    *
2911    * This function does not validate the input.
2912    *
2913    * This function is not BOM-aware.
2914    *
2915    * @param input         the UTF-16LE string to convert
2916    * @param length        the length of the string in 2-byte code units (char16_t)
2917    * @return the number of bytes required to encode the UTF-16LE string as UTF-8
2918    */
2919   simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
2920 
2921   /**
2922    * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
2923    *
2924    * This function does not validate the input.
2925    *
2926    * This function is not BOM-aware.
2927    *
2928    * @param input         the UTF-16BE string to convert
2929    * @param length        the length of the string in 2-byte code units (char16_t)
2930    * @return the number of bytes required to encode the UTF-16BE string as UTF-8
2931    */
2932   simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
2933 
2934   /**
2935    * Convert possibly broken UTF-32 string into Latin1 string.
2936    *
2937    * During the conversion also validation of the input string is done.
2938    * This function is suitable to work with inputs from untrusted sources.
2939    *
2940    * This function is not BOM-aware.
2941    *
2942    * @param input         the UTF-32 string to convert
2943    * @param length        the length of the string in 4-byte code units (char32_t)
2944    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2945    * @return number of written code units; 0 if input is not a valid UTF-32 string
2946    */
2947 
2948   simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2949 
2950   /**
2951    * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
2952    *
2953    * During the conversion also validation of the input string is done.
2954    * This function is suitable to work with inputs from untrusted sources.
2955    *
2956    * This function is not BOM-aware.
2957    *
2958    * @param input         the UTF-32 string to convert
2959    * @param length        the length of the string in 4-byte code units (char32_t)
2960    * @param latin1_buffer   the pointer to buffer that can hold conversion result
2961    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2962    */
2963 
2964   simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2965 
2966   /**
2967    * Convert valid UTF-32 string into Latin1 string.
2968    *
2969    * This function assumes that the input string is valid UTF-32.
2970    *
2971    * This function is not BOM-aware.
2972    *
2973    * @param input         the UTF-32 string to convert
2974    * @param length        the length of the string in 4-byte code units (char32_t)
2975    * @param latin1_buffer   the pointer to buffer that can hold the conversion result
2976    * @return number of written code units; 0 if conversion is not possible
2977    */
2978   simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2979 
2980   /**
2981    * Convert possibly broken UTF-32 string into UTF-8 string.
2982    *
2983    * During the conversion also validation of the input string is done.
2984    * This function is suitable to work with inputs from untrusted sources.
2985    *
2986    * This function is not BOM-aware.
2987    *
2988    * @param input         the UTF-32 string to convert
2989    * @param length        the length of the string in 4-byte code units (char32_t)
2990    * @param utf8_buffer   the pointer to buffer that can hold conversion result
2991    * @return number of written code units; 0 if input is not a valid UTF-32 string
2992    */
2993   simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2994 
2995   /**
2996    * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
2997    *
2998    * During the conversion also validation of the input string is done.
2999    * This function is suitable to work with inputs from untrusted sources.
3000    *
3001    * This function is not BOM-aware.
3002    *
3003    * @param input         the UTF-32 string to convert
3004    * @param length        the length of the string in 4-byte code units (char32_t)
3005    * @param utf8_buffer   the pointer to buffer that can hold conversion result
3006    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
3007    */
3008   simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
3009 
3010   /**
3011    * Convert valid UTF-32 string into UTF-8 string.
3012    *
3013    * This function assumes that the input string is valid UTF-32.
3014    *
3015    * This function is not BOM-aware.
3016    *
3017    * @param input         the UTF-32 string to convert
3018    * @param length        the length of the string in 4-byte code units (char32_t)
3019    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
3020    * @return number of written code units; 0 if conversion is not possible
3021    */
3022   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
3023 
3024 
3025     /**
3026    * Return the number of bytes that this UTF-16 string would require in Latin1 format.
3027    *
3028    *
3029    * @param input         the UTF-16 string to convert
3030    * @param length        the length of the string in 2-byte code units (char16_t)
3031    * @return the number of bytes required to encode the UTF-16 string as Latin1
3032    */
3033     simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0;
3034 
3035   /**
3036    * Convert possibly broken UTF-32 string into UTF-16LE string.
3037    *
3038    * During the conversion also validation of the input string is done.
3039    * This function is suitable to work with inputs from untrusted sources.
3040    *
3041    * This function is not BOM-aware.
3042    *
3043    * @param input         the UTF-32 string to convert
3044    * @param length        the length of the string in 4-byte code units (char32_t)
3045    * @param utf16_buffer   the pointer to buffer that can hold conversion result
3046    * @return number of written code units; 0 if input is not a valid UTF-32 string
3047    */
3048   simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3049 
3050   /**
3051    * Convert possibly broken UTF-32 string into UTF-16BE string.
3052    *
3053    * During the conversion also validation of the input string is done.
3054    * This function is suitable to work with inputs from untrusted sources.
3055    *
3056    * This function is not BOM-aware.
3057    *
3058    * @param input         the UTF-32 string to convert
3059    * @param length        the length of the string in 4-byte code units (char32_t)
3060    * @param utf16_buffer   the pointer to buffer that can hold conversion result
3061    * @return number of written code units; 0 if input is not a valid UTF-32 string
3062    */
3063   simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3064 
3065   /**
3066    * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
3067    *
3068    * During the conversion also validation of the input string is done.
3069    * This function is suitable to work with inputs from untrusted sources.
3070    *
3071    * This function is not BOM-aware.
3072    *
3073    * @param input         the UTF-32 string to convert
3074    * @param length        the length of the string in 4-byte code units (char32_t)
3075    * @param utf16_buffer   the pointer to buffer that can hold conversion result
3076    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
3077    */
3078   simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3079 
3080   /**
3081    * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
3082    *
3083    * During the conversion also validation of the input string is done.
3084    * This function is suitable to work with inputs from untrusted sources.
3085    *
3086    * This function is not BOM-aware.
3087    *
3088    * @param input         the UTF-32 string to convert
3089    * @param length        the length of the string in 4-byte code units (char32_t)
3090    * @param utf16_buffer   the pointer to buffer that can hold conversion result
3091    * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
3092    */
3093   simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3094 
3095   /**
3096    * Convert valid UTF-32 string into UTF-16LE string.
3097    *
3098    * This function assumes that the input string is valid UTF-32.
3099    *
3100    * This function is not BOM-aware.
3101    *
3102    * @param input         the UTF-32 string to convert
3103    * @param length        the length of the string in 4-byte code units (char32_t)
3104    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
3105    * @return number of written code units; 0 if conversion is not possible
3106    */
3107   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3108 
3109   /**
3110    * Convert valid UTF-32 string into UTF-16BE string.
3111    *
3112    * This function assumes that the input string is valid UTF-32.
3113    *
3114    * This function is not BOM-aware.
3115    *
3116    * @param input         the UTF-32 string to convert
3117    * @param length        the length of the string in 4-byte code units (char32_t)
3118    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
3119    * @return number of written code units; 0 if conversion is not possible
3120    */
3121   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3122 
3123   /**
3124    * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
3125    * from UTF-16BE to UTF-16LE.
3126    *
3127    * This function does not validate the input.
3128    *
3129    * This function is not BOM-aware.
3130    *
3131    * @param input         the UTF-16 string to process
3132    * @param length        the length of the string in 2-byte code units (char16_t)
3133    * @param output        the pointer to buffer that can hold the conversion result
3134    */
3135   virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
3136 
3137  /**
3138    * Return the number of bytes that this Latin1 string would require in UTF-8 format.
3139    *
3140    * @param input         the Latin1 string to convert
3141    * @param length        the length of the string bytes
3142    * @return the number of bytes required to encode the Latin1 string as UTF-8
3143    */
3144     simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0;
3145 
3146   /**
3147    * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
3148    *
3149    * This function does not validate the input.
3150    *
3151    * @param input         the UTF-32 string to convert
3152    * @param length        the length of the string in 4-byte code units (char32_t)
3153    * @return the number of bytes required to encode the UTF-32 string as UTF-8
3154    */
3155   simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
3156 
3157   /**
3158    * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
3159    *
3160    * This function does not validate the input.
3161    *
3162    * @param length        the length of the string in 4-byte code units (char32_t)
3163    * @return the number of bytes required to encode the UTF-32 string as Latin1
3164    */
3165   simdutf_warn_unused virtual size_t latin1_length_from_utf32(size_t length) const noexcept = 0;
3166 
3167   /**
3168    * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
3169    *
3170    * This function does not validate the input.
3171    *
3172    * @param input         the UTF-8 string to convert
3173    * @param length        the length of the string in byte
3174    * @return the number of bytes required to encode the UTF-8 string as Latin1
3175    */
3176   simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0;
3177 
3178   /*
3179    * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
3180    *
3181    * This function does not validate the input.
3182    *
3183    * This function is not BOM-aware.
3184    *
3185    * @param input         the UTF-16LE string to convert
3186    * @param length        the length of the string in 2-byte code units (char16_t)
3187    * @return the number of bytes required to encode the UTF-16LE string as Latin1
3188    */
3189   simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
3190 
3191   /**
3192    * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
3193    *
3194    * This function does not validate the input.
3195    *
3196    * @param input         the UTF-32 string to convert
3197    * @param length        the length of the string in 4-byte code units (char32_t)
3198    * @return the number of bytes required to encode the UTF-32 string as UTF-16
3199    */
3200   simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
3201 
3202 
3203     /**
3204    * Return the number of bytes that this UTF-32 string would require in Latin1 format.
3205    *
3206    * This function does not validate the input.
3207    *
3208    * @param input         the UTF-32 string to convert
3209    * @param length        the length of the string in 4-byte code units (char32_t)
3210    * @return the number of bytes required to encode the UTF-32 string as Latin1
3211    */
3212     simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0;
3213 
3214   /*
3215    * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
3216    *
3217    * This function is equivalent to count_utf16le.
3218    *
3219    * This function does not validate the input.
3220    *
3221    * This function is not BOM-aware.
3222    *
3223    * @param input         the UTF-16LE string to convert
3224    * @param length        the length of the string in 2-byte code units (char16_t)
3225    * @return the number of bytes required to encode the UTF-16LE string as UTF-32
3226    */
3227   simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
3228 
3229   /*
3230    * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
3231    *
3232    * This function is equivalent to count_utf16be.
3233    *
3234    * This function does not validate the input.
3235    *
3236    * This function is not BOM-aware.
3237    *
3238    * @param input         the UTF-16BE string to convert
3239    * @param length        the length of the string in 2-byte code units (char16_t)
3240    * @return the number of bytes required to encode the UTF-16BE string as UTF-32
3241    */
3242   simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
3243 
3244   /**
3245    * Count the number of code points (characters) in the string assuming that
3246    * it is valid.
3247    *
3248    * This function assumes that the input string is valid UTF-16LE.
3249    *
3250    * This function is not BOM-aware.
3251    *
3252    * @param input         the UTF-16LE string to process
3253    * @param length        the length of the string in 2-byte code units (char16_t)
3254    * @return number of code points
3255    */
3256   simdutf_warn_unused virtual size_t count_utf16le(const char16_t * input, size_t length) const noexcept = 0;
3257 
3258   /**
3259    * Count the number of code points (characters) in the string assuming that
3260    * it is valid.
3261    *
3262    * This function assumes that the input string is valid UTF-16BE.
3263    *
3264    * This function is not BOM-aware.
3265    *
3266    * @param input         the UTF-16BE string to process
3267    * @param length        the length of the string in 2-byte code units (char16_t)
3268    * @return number of code points
3269    */
3270   simdutf_warn_unused virtual size_t count_utf16be(const char16_t * input, size_t length) const noexcept = 0;
3271 
3272 
3273   /**
3274    * Count the number of code points (characters) in the string assuming that
3275    * it is valid.
3276    *
3277    * This function assumes that the input string is valid UTF-8.
3278    *
3279    * @param input         the UTF-8 string to process
3280    * @param length        the length of the string in bytes
3281    * @return number of code points
3282    */
3283   simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0;
3284 
3285 
3286 
3287 protected:
3288   /** @private Construct an implementation with the given name and description. For subclasses. */
implementation( std::string name, std::string description, uint32_t required_instruction_sets )3289   simdutf_really_inline implementation(
3290     std::string name,
3291     std::string description,
3292     uint32_t required_instruction_sets
3293   ) :
3294     _name(name),
3295     _description(description),
3296     _required_instruction_sets(required_instruction_sets)
3297   {
3298   }
3299   virtual ~implementation()=default;
3300 
3301 private:
3302   /**
3303    * The name of this implementation.
3304    */
3305   const std::string _name;
3306 
3307   /**
3308    * The description of this implementation.
3309    */
3310   const std::string _description;
3311 
3312   /**
3313    * Instruction sets required for this implementation.
3314    */
3315   const uint32_t _required_instruction_sets;
3316 };
3317 
3318 /** @private */
3319 namespace internal {
3320 
3321 /**
3322  * The list of available implementations compiled into simdutf.
3323  */
3324 class available_implementation_list {
3325 public:
3326   /** Get the list of available implementations compiled into simdutf */
available_implementation_list()3327   simdutf_really_inline available_implementation_list() {}
3328   /** Number of implementations */
3329   size_t size() const noexcept;
3330   /** STL const begin() iterator */
3331   const implementation * const *begin() const noexcept;
3332   /** STL const end() iterator */
3333   const implementation * const *end() const noexcept;
3334 
3335   /**
3336    * Get the implementation with the given name.
3337    *
3338    * Case sensitive.
3339    *
3340    *     const implementation *impl = simdutf::available_implementations["westmere"];
3341    *     if (!impl) { exit(1); }
3342    *     if (!imp->supported_by_runtime_system()) { exit(1); }
3343    *     simdutf::active_implementation = impl;
3344    *
3345    * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
3346    * @return the implementation, or nullptr if the parse failed.
3347    */
3348   const implementation * operator[](const std::string &name) const noexcept {
3349     for (const implementation * impl : *this) {
3350       if (impl->name() == name) { return impl; }
3351     }
3352     return nullptr;
3353   }
3354 
3355   /**
3356    * Detect the most advanced implementation supported by the current host.
3357    *
3358    * This is used to initialize the implementation on startup.
3359    *
3360    *     const implementation *impl = simdutf::available_implementation::detect_best_supported();
3361    *     simdutf::active_implementation = impl;
3362    *
3363    * @return the most advanced supported implementation for the current host, or an
3364    *         implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
3365    *         implementation. Will never return nullptr.
3366    */
3367   const implementation *detect_best_supported() const noexcept;
3368 };
3369 
3370 template<typename T>
3371 class atomic_ptr {
3372 public:
atomic_ptr(T *_ptr)3373   atomic_ptr(T *_ptr) : ptr{_ptr} {}
3374 
3375 #if defined(SIMDUTF_NO_THREADS)
operator const T*() const3376   operator const T*() const { return ptr; }
operator *() const3377   const T& operator*() const { return *ptr; }
operator ->() const3378   const T* operator->() const { return ptr; }
3379 
operator T*()3380   operator T*() { return ptr; }
operator *()3381   T& operator*() { return *ptr; }
operator ->()3382   T* operator->() { return ptr; }
operator =(T *_ptr)3383   atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
3384 
3385 #else
operator const T*() const3386   operator const T*() const { return ptr.load(); }
operator *() const3387   const T& operator*() const { return *ptr; }
operator ->() const3388   const T* operator->() const { return ptr.load(); }
3389 
operator T*()3390   operator T*() { return ptr.load(); }
operator *()3391   T& operator*() { return *ptr; }
operator ->()3392   T* operator->() { return ptr.load(); }
operator =(T *_ptr)3393   atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
3394 
3395 #endif
3396 
3397 private:
3398 #if defined(SIMDUTF_NO_THREADS)
3399   T* ptr;
3400 #else
3401   std::atomic<T*> ptr;
3402 #endif
3403 };
3404 
3405 class detect_best_supported_implementation_on_first_use;
3406 
3407 } // namespace internal
3408 
3409 /**
3410  * The list of available implementations compiled into simdutf.
3411  */
3412 extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
3413 
3414 /**
3415   * The active implementation.
3416   *
3417   * Automatically initialized on first use to the most advanced implementation supported by this hardware.
3418   */
3419 extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
3420 
3421 
3422 } // namespace simdutf
3423 
3424 #endif // SIMDUTF_IMPLEMENTATION_H
3425 /* end file include/simdutf/implementation.h */
3426 
3427 
3428 // Implementation-internal files (must be included before the implementations themselves, to keep
3429 // amalgamation working--otherwise, the first time a file is included, it might be put inside the
3430 // #ifdef SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
3431 // compile unless that implementation is turned on).
3432 
3433 
3434 SIMDUTF_POP_DISABLE_WARNINGS
3435 
3436 #endif // SIMDUTF_H
3437 /* end file include/simdutf.h */
3438