xref: /third_party/node/deps/simdutf/simdutf.h (revision 1cb0ef41)
1/* auto-generated on 2023-12-01 13:59:01 -0500. Do not edit! */
2/* begin file include/simdutf.h */
3#ifndef SIMDUTF_H
4#define SIMDUTF_H
5#include <cstring>
6
7/* begin file include/simdutf/compiler_check.h */
8#ifndef SIMDUTF_COMPILER_CHECK_H
9#define SIMDUTF_COMPILER_CHECK_H
10
11#ifndef __cplusplus
12#error simdutf requires a C++ compiler
13#endif
14
15#ifndef SIMDUTF_CPLUSPLUS
16#if defined(_MSVC_LANG) && !defined(__clang__)
17#define SIMDUTF_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
18#else
19#define SIMDUTF_CPLUSPLUS __cplusplus
20#endif
21#endif
22
23// C++ 17
24#if !defined(SIMDUTF_CPLUSPLUS17) && (SIMDUTF_CPLUSPLUS >= 201703L)
25#define SIMDUTF_CPLUSPLUS17 1
26#endif
27
28// C++ 14
29#if !defined(SIMDUTF_CPLUSPLUS14) && (SIMDUTF_CPLUSPLUS >= 201402L)
30#define SIMDUTF_CPLUSPLUS14 1
31#endif
32
33// C++ 11
34#if !defined(SIMDUTF_CPLUSPLUS11) && (SIMDUTF_CPLUSPLUS >= 201103L)
35#define SIMDUTF_CPLUSPLUS11 1
36#endif
37
38#ifndef SIMDUTF_CPLUSPLUS11
39#error simdutf requires a compiler compliant with the C++11 standard
40#endif
41
42#endif // SIMDUTF_COMPILER_CHECK_H
43/* end file include/simdutf/compiler_check.h */
44/* begin file include/simdutf/common_defs.h */
45#ifndef SIMDUTF_COMMON_DEFS_H
46#define SIMDUTF_COMMON_DEFS_H
47
48#include <cassert>
49/* begin file include/simdutf/portability.h */
50#ifndef SIMDUTF_PORTABILITY_H
51#define SIMDUTF_PORTABILITY_H
52
53#include <cstddef>
54#include <cstdint>
55#include <cstdlib>
56#include <cfloat>
57#include <cassert>
58#ifndef _WIN32
59// strcasecmp, strncasecmp
60#include <strings.h>
61#endif
62
63/**
64 * We want to check that it is actually a little endian system at
65 * compile-time.
66 */
67
68#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
69#define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
70#elif defined(_WIN32)
71#define SIMDUTF_IS_BIG_ENDIAN 0
72#else
73#if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
74#include <machine/endian.h>
75#elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
76#include <sys/byteorder.h>
77#else  // defined(__APPLE__) || defined(__FreeBSD__)
78
79#ifdef __has_include
80#if __has_include(<endian.h>)
81#include <endian.h>
82#endif //__has_include(<endian.h>)
83#endif //__has_include
84
85#endif // defined(__APPLE__) || defined(__FreeBSD__)
86
87
88#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
89#define SIMDUTF_IS_BIG_ENDIAN 0
90#endif
91
92#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
93#define SIMDUTF_IS_BIG_ENDIAN 0
94#else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
95#define SIMDUTF_IS_BIG_ENDIAN 1
96#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
97
98#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
99
100
101/**
102 * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined.
103 */
104
105#ifdef _MSC_VER
106#define SIMDUTF_VISUAL_STUDIO 1
107/**
108 * We want to differentiate carefully between
109 * clang under visual studio and regular visual
110 * studio.
111 *
112 * Under clang for Windows, we enable:
113 *  * target pragmas so that part and only part of the
114 *     code gets compiled for advanced instructions.
115 *
116 */
117#ifdef __clang__
118// clang under visual studio
119#define SIMDUTF_CLANG_VISUAL_STUDIO 1
120#else
121// just regular visual studio (best guess)
122#define SIMDUTF_REGULAR_VISUAL_STUDIO 1
123#endif // __clang__
124#endif // _MSC_VER
125
126#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
127// https://en.wikipedia.org/wiki/C_alternative_tokens
128// This header should have no effect, except maybe
129// under Visual Studio.
130#include <iso646.h>
131#endif
132
133#if defined(__x86_64__) || defined(_M_AMD64)
134#define SIMDUTF_IS_X86_64 1
135#elif defined(__aarch64__) || defined(_M_ARM64)
136#define SIMDUTF_IS_ARM64 1
137#elif defined(__PPC64__) || defined(_M_PPC64)
138//#define SIMDUTF_IS_PPC64 1
139// The simdutf library does yet support SIMD acceleration under
140// POWER processors. Please see https://github.com/lemire/simdutf/issues/51
141#elif defined(__s390__)
142// s390 IBM system. Big endian.
143#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
144// RISC-V 64-bit
145#else
146// The simdutf library is designed
147// for 64-bit processors and it seems that you are not
148// compiling for a known 64-bit platform. Please
149// use a 64-bit target such as x64 or 64-bit ARM for best performance.
150#define SIMDUTF_IS_32BITS 1
151
152// We do not support 32-bit platforms, but it can be
153// handy to identify them.
154#if defined(_M_IX86) || defined(__i386__)
155#define SIMDUTF_IS_X86_32BITS 1
156#elif defined(__arm__) || defined(_M_ARM)
157#define SIMDUTF_IS_ARM_32BITS 1
158#elif defined(__PPC__) || defined(_M_PPC)
159#define SIMDUTF_IS_PPC_32BITS 1
160#endif
161
162#endif // defined(__x86_64__) || defined(_M_AMD64)
163
164#ifdef SIMDUTF_IS_32BITS
165#ifndef SIMDUTF_NO_PORTABILITY_WARNING
166// In the future, we may want to warn users of 32-bit systems that
167// the simdutf does not support accelerated kernels for such systems.
168#endif // SIMDUTF_NO_PORTABILITY_WARNING
169#endif // SIMDUTF_IS_32BITS
170
171// this is almost standard?
172#define SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) #a
173#define SIMDUTF_STRINGIFY(a) SIMDUTF_STRINGIFY_IMPLEMENTATION_(a)
174
175// Our fast kernels require 64-bit systems.
176//
177// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
178// Furthermore, the number of SIMD registers is reduced.
179//
180// On 32-bit ARM, we would have smaller registers.
181//
182// The simdutf users should still have the fallback kernel. It is
183// slower, but it should run everywhere.
184
185//
186// Enable valid runtime implementations, and select SIMDUTF_BUILTIN_IMPLEMENTATION
187//
188
189// We are going to use runtime dispatch.
190#ifdef SIMDUTF_IS_X86_64
191#ifdef __clang__
192// clang does not have GCC push pop
193// warning: clang attribute push can't be used within a namespace in clang up
194// til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be *outside* of a
195// namespace.
196#define SIMDUTF_TARGET_REGION(T)                                                       \
197  _Pragma(SIMDUTF_STRINGIFY(                                                           \
198      clang attribute push(__attribute__((target(T))), apply_to = function)))
199#define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop")
200#elif defined(__GNUC__)
201// GCC is easier
202#define SIMDUTF_TARGET_REGION(T)                                                       \
203  _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T)))
204#define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options")
205#endif // clang then gcc
206
207#endif // x86
208
209// Default target region macros don't do anything.
210#ifndef SIMDUTF_TARGET_REGION
211#define SIMDUTF_TARGET_REGION(T)
212#define SIMDUTF_UNTARGET_REGION
213#endif
214
215// Is threading enabled?
216#if defined(_REENTRANT) || defined(_MT)
217#ifndef SIMDUTF_THREADS_ENABLED
218#define SIMDUTF_THREADS_ENABLED
219#endif
220#endif
221
222// workaround for large stack sizes under -O0.
223// https://github.com/simdutf/simdutf/issues/691
224#ifdef __APPLE__
225#ifndef __OPTIMIZE__
226// Apple systems have small stack sizes in secondary threads.
227// Lack of compiler optimization may generate high stack usage.
228// Users may want to disable threads for safety, but only when
229// in debug mode which we detect by the fact that the __OPTIMIZE__
230// macro is not defined.
231#undef SIMDUTF_THREADS_ENABLED
232#endif
233#endif
234
235#ifdef SIMDUTF_VISUAL_STUDIO
236// This is one case where we do not distinguish between
237// regular visual studio and clang under visual studio.
238// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
239#define simdutf_strcasecmp _stricmp
240#define simdutf_strncasecmp _strnicmp
241#else
242// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
243// So they are only useful for ASCII in our context.
244// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
245#define simdutf_strcasecmp strcasecmp
246#define simdutf_strncasecmp strncasecmp
247#endif
248
249#ifdef NDEBUG
250
251#ifdef SIMDUTF_VISUAL_STUDIO
252#define SIMDUTF_UNREACHABLE() __assume(0)
253#define SIMDUTF_ASSUME(COND) __assume(COND)
254#else
255#define SIMDUTF_UNREACHABLE() __builtin_unreachable();
256#define SIMDUTF_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0)
257#endif
258
259#else // NDEBUG
260
261#define SIMDUTF_UNREACHABLE() assert(0);
262#define SIMDUTF_ASSUME(COND) assert(COND)
263
264#endif
265
266
267#if defined(__GNUC__) && !defined(__clang__)
268#if __GNUC__ >= 11
269#define SIMDUTF_GCC11ORMORE 1
270#endif //  __GNUC__ >= 11
271#endif // defined(__GNUC__) && !defined(__clang__)
272
273
274#endif // SIMDUTF_PORTABILITY_H
275/* end file include/simdutf/portability.h */
276/* begin file include/simdutf/avx512.h */
277#ifndef SIMDUTF_AVX512_H_
278#define SIMDUTF_AVX512_H_
279
280/*
281    It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS.
282
283    All preprocessor directives has form `SIMDUTF_HAS_AVX512{feature}`,
284    where a feature is a code name for extensions.
285
286    Please see the listing below to find which are supported.
287*/
288
289#ifndef SIMDUTF_HAS_AVX512F
290# if defined(__AVX512F__) && __AVX512F__ == 1
291#   define SIMDUTF_HAS_AVX512F 1
292# endif
293#endif
294
295#ifndef SIMDUTF_HAS_AVX512DQ
296# if defined(__AVX512DQ__) && __AVX512DQ__ == 1
297#   define SIMDUTF_HAS_AVX512DQ 1
298# endif
299#endif
300
301#ifndef SIMDUTF_HAS_AVX512IFMA
302# if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
303#   define SIMDUTF_HAS_AVX512IFMA 1
304# endif
305#endif
306
307#ifndef SIMDUTF_HAS_AVX512CD
308# if defined(__AVX512CD__) && __AVX512CD__ == 1
309#   define SIMDUTF_HAS_AVX512CD 1
310# endif
311#endif
312
313#ifndef SIMDUTF_HAS_AVX512BW
314# if defined(__AVX512BW__) && __AVX512BW__ == 1
315#   define SIMDUTF_HAS_AVX512BW 1
316# endif
317#endif
318
319#ifndef SIMDUTF_HAS_AVX512VL
320# if defined(__AVX512VL__) && __AVX512VL__ == 1
321#   define SIMDUTF_HAS_AVX512VL 1
322# endif
323#endif
324
325#ifndef SIMDUTF_HAS_AVX512VBMI
326# if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
327#   define SIMDUTF_HAS_AVX512VBMI 1
328# endif
329#endif
330
331#ifndef SIMDUTF_HAS_AVX512VBMI2
332# if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
333#   define SIMDUTF_HAS_AVX512VBMI2 1
334# endif
335#endif
336
337#ifndef SIMDUTF_HAS_AVX512VNNI
338# if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
339#   define SIMDUTF_HAS_AVX512VNNI 1
340# endif
341#endif
342
343#ifndef SIMDUTF_HAS_AVX512BITALG
344# if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
345#   define SIMDUTF_HAS_AVX512BITALG 1
346# endif
347#endif
348
349#ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ
350# if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
351#   define SIMDUTF_HAS_AVX512VPOPCNTDQ 1
352# endif
353#endif
354
355#endif // SIMDUTF_AVX512_H_
356/* end file include/simdutf/avx512.h */
357
358
359#if defined(__GNUC__)
360  // Marks a block with a name so that MCA analysis can see it.
361  #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
362  #define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
363  #define SIMDUTF_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
364#else
365  #define SIMDUTF_BEGIN_DEBUG_BLOCK(name)
366  #define SIMDUTF_END_DEBUG_BLOCK(name)
367  #define SIMDUTF_DEBUG_BLOCK(name, block)
368#endif
369
370// Align to N-byte boundary
371#define SIMDUTF_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
372#define SIMDUTF_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
373
374#define SIMDUTF_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
375
376#if defined(SIMDUTF_REGULAR_VISUAL_STUDIO)
377
378  #define simdutf_really_inline __forceinline
379  #define simdutf_never_inline __declspec(noinline)
380
381  #define simdutf_unused
382  #define simdutf_warn_unused
383
384  #ifndef simdutf_likely
385  #define simdutf_likely(x) x
386  #endif
387  #ifndef simdutf_unlikely
388  #define simdutf_unlikely(x) x
389  #endif
390
391  #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
392  #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
393  #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
394  // Get rid of Intellisense-only warnings (Code Analysis)
395  // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
396  #ifdef __has_include
397  #if __has_include(<CppCoreCheck\Warnings.h>)
398  #include <CppCoreCheck\Warnings.h>
399  #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
400  #endif
401  #endif
402
403  #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS
404  #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
405  #endif
406
407  #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996)
408  #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING
409  #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop ))
410
411#else // SIMDUTF_REGULAR_VISUAL_STUDIO
412
413  #define simdutf_really_inline inline __attribute__((always_inline))
414  #define simdutf_never_inline inline __attribute__((noinline))
415
416  #define simdutf_unused __attribute__((unused))
417  #define simdutf_warn_unused __attribute__((warn_unused_result))
418
419  #ifndef simdutf_likely
420  #define simdutf_likely(x) __builtin_expect(!!(x), 1)
421  #endif
422  #ifndef simdutf_unlikely
423  #define simdutf_unlikely(x) __builtin_expect(!!(x), 0)
424  #endif
425
426  #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
427  // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
428  #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS SIMDUTF_PUSH_DISABLE_WARNINGS \
429    SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \
430    SIMDUTF_DISABLE_GCC_WARNING(-Wall) \
431    SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \
432    SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \
433    SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \
434    SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
435    SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
436    SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \
437    SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \
438    SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \
439    SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable)
440  #define SIMDUTF_PRAGMA(P) _Pragma(#P)
441  #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING)
442  #if defined(SIMDUTF_CLANG_VISUAL_STUDIO)
443  #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include)
444  #else
445  #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
446  #endif
447  #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
448  #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow)
449  #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
450
451
452
453#endif // MSC_VER
454
455#ifndef SIMDUTF_DLLIMPORTEXPORT
456    #if defined(SIMDUTF_VISUAL_STUDIO)
457      /**
458       * It does not matter here whether you are using
459       * the regular visual studio or clang under visual
460       * studio.
461       */
462      #if SIMDUTF_USING_LIBRARY
463      #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
464      #else
465      #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
466      #endif
467    #else
468      #define SIMDUTF_DLLIMPORTEXPORT
469    #endif
470#endif
471
472/// If EXPR is an error, returns it.
473#define SIMDUTF_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
474
475
476#endif // SIMDUTF_COMMON_DEFS_H
477/* end file include/simdutf/common_defs.h */
478/* begin file include/simdutf/encoding_types.h */
479#include <string>
480
481namespace simdutf {
482
483enum encoding_type {
484        UTF8 = 1,       // BOM 0xef 0xbb 0xbf
485        UTF16_LE = 2,   // BOM 0xff 0xfe
486        UTF16_BE = 4,   // BOM 0xfe 0xff
487        UTF32_LE = 8,   // BOM 0xff 0xfe 0x00 0x00
488        UTF32_BE = 16,   // BOM 0x00 0x00 0xfe 0xff
489        Latin1 = 32,
490
491        unspecified = 0
492};
493
494enum endianness {
495        LITTLE = 0,
496        BIG = 1
497};
498
499bool match_system(endianness e);
500
501std::string to_string(encoding_type bom);
502
503// Note that BOM for UTF8 is discouraged.
504namespace BOM {
505
506/**
507 * Checks for a BOM. If not, returns unspecified
508 * @param input         the string to process
509 * @param length        the length of the string in code units
510 * @return the corresponding encoding
511 */
512
513encoding_type check_bom(const uint8_t* byte, size_t length);
514encoding_type check_bom(const char* byte, size_t length);
515/**
516 * Returns the size, in bytes, of the BOM for a given encoding type.
517 * Note that UTF8 BOM are discouraged.
518 * @param bom         the encoding type
519 * @return the size in bytes of the corresponding BOM
520 */
521size_t bom_byte_size(encoding_type bom);
522
523} // BOM namespace
524} // simdutf namespace
525/* end file include/simdutf/encoding_types.h */
526/* begin file include/simdutf/error.h */
527#ifndef SIMDUTF_ERROR_H
528#define SIMDUTF_ERROR_H
529namespace simdutf {
530
531enum error_code {
532  SUCCESS = 0,
533  HEADER_BITS,  // Any byte must have fewer than 5 header bits.
534  TOO_SHORT,    // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
535                // This is also the error when the input is truncated.
536  TOO_LONG,     // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
537  OVERLONG,     // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
538                // and U+FFFF for four-byte characters.
539  TOO_LARGE,    // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
540  SURROGATE,    // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
541                // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
542                // there must be no surrogate at all (Latin1)
543  OTHER         // Not related to validation/transcoding.
544};
545
546struct result {
547  error_code error;
548  size_t count;     // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
549
550  simdutf_really_inline result();
551
552  simdutf_really_inline result(error_code, size_t);
553};
554
555}
556#endif
557/* end file include/simdutf/error.h */
558
559SIMDUTF_PUSH_DISABLE_WARNINGS
560SIMDUTF_DISABLE_UNDESIRED_WARNINGS
561
562// Public API
563/* begin file include/simdutf/simdutf_version.h */
564// /include/simdutf/simdutf_version.h automatically generated by release.py,
565// do not change by hand
566#ifndef SIMDUTF_SIMDUTF_VERSION_H
567#define SIMDUTF_SIMDUTF_VERSION_H
568
569/** The version of simdutf being used (major.minor.revision) */
570#define SIMDUTF_VERSION "4.0.8"
571
572namespace simdutf {
573enum {
574  /**
575   * The major version (MAJOR.minor.revision) of simdutf being used.
576   */
577  SIMDUTF_VERSION_MAJOR = 4,
578  /**
579   * The minor version (major.MINOR.revision) of simdutf being used.
580   */
581  SIMDUTF_VERSION_MINOR = 0,
582  /**
583   * The revision (major.minor.REVISION) of simdutf being used.
584   */
585  SIMDUTF_VERSION_REVISION = 8
586};
587} // namespace simdutf
588
589#endif // SIMDUTF_SIMDUTF_VERSION_H
590/* end file include/simdutf/simdutf_version.h */
591/* begin file include/simdutf/implementation.h */
592#ifndef SIMDUTF_IMPLEMENTATION_H
593#define SIMDUTF_IMPLEMENTATION_H
594#include <string>
595#if !defined(SIMDUTF_NO_THREADS)
596#include <atomic>
597#endif
598#include <vector>
599#include <tuple>
600/* begin file include/simdutf/internal/isadetection.h */
601/* From
602https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
603Highly modified.
604
605Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
606Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
607Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
608Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
609Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
610Copyright (c) 2011-2013 NYU                      (Clement Farabet)
611Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
612Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
613(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
614Samy Bengio, Johnny Mariethoz)
615
616All rights reserved.
617
618Redistribution and use in source and binary forms, with or without
619modification, are permitted provided that the following conditions are met:
620
6211. Redistributions of source code must retain the above copyright
622   notice, this list of conditions and the following disclaimer.
623
6242. Redistributions in binary form must reproduce the above copyright
625   notice, this list of conditions and the following disclaimer in the
626   documentation and/or other materials provided with the distribution.
627
6283. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
629America and IDIAP Research Institute nor the names of its contributors may be
630   used to endorse or promote products derived from this software without
631   specific prior written permission.
632
633THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
634AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
635IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
636ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
637LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
638CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
639SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
640INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
641CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
642ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
643POSSIBILITY OF SUCH DAMAGE.
644*/
645
646#ifndef SIMDutf_INTERNAL_ISADETECTION_H
647#define SIMDutf_INTERNAL_ISADETECTION_H
648
649#include <cstdint>
650#include <cstdlib>
651#if defined(_MSC_VER)
652#include <intrin.h>
653#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
654#include <cpuid.h>
655#endif
656
657namespace simdutf {
658namespace internal {
659
660enum instruction_set {
661  DEFAULT = 0x0,
662  NEON = 0x1,
663  AVX2 = 0x4,
664  SSE42 = 0x8,
665  PCLMULQDQ = 0x10,
666  BMI1 = 0x20,
667  BMI2 = 0x40,
668  ALTIVEC = 0x80,
669  AVX512F = 0x100,
670  AVX512DQ = 0x200,
671  AVX512IFMA = 0x400,
672  AVX512PF = 0x800,
673  AVX512ER = 0x1000,
674  AVX512CD = 0x2000,
675  AVX512BW = 0x4000,
676  AVX512VL = 0x8000,
677  AVX512VBMI2 = 0x10000,
678  AVX512VPOPCNTDQ = 0x2000
679};
680
681#if defined(__PPC64__)
682
683static inline uint32_t detect_supported_architectures() {
684  return instruction_set::ALTIVEC;
685}
686
687#elif defined(__aarch64__) || defined(_M_ARM64)
688
689static inline uint32_t detect_supported_architectures() {
690  return instruction_set::NEON;
691}
692
693#elif defined(__x86_64__) || defined(_M_AMD64) // x64
694
695
696namespace {
697namespace cpuid_bit {
698    // Can be found on Intel ISA Reference for CPUID
699
700    // EAX = 0x01
701    constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit  1 of ECX for EAX=0x1
702    constexpr uint32_t sse42 = uint32_t(1) << 20;    ///< @private bit 20 of ECX for EAX=0x1
703    constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
704
705    // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
706    // See: "Table 3-8. Information Returned by CPUID Instruction"
707    namespace ebx {
708      constexpr uint32_t bmi1 = uint32_t(1) << 3;
709      constexpr uint32_t avx2 = uint32_t(1) << 5;
710      constexpr uint32_t bmi2 = uint32_t(1) << 8;
711      constexpr uint32_t avx512f = uint32_t(1) << 16;
712      constexpr uint32_t avx512dq = uint32_t(1) << 17;
713      constexpr uint32_t avx512ifma = uint32_t(1) << 21;
714      constexpr uint32_t avx512cd = uint32_t(1) << 28;
715      constexpr uint32_t avx512bw = uint32_t(1) << 30;
716      constexpr uint32_t avx512vl = uint32_t(1) << 31;
717    }
718
719    namespace ecx {
720      constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
721      constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
722      constexpr uint32_t avx512vnni = uint32_t(1) << 11;
723      constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
724      constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
725    }
726    namespace edx {
727      constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
728    }
729    namespace xcr0_bit {
730     constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
731     constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
732   }
733  }
734}
735
736
737
738static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
739                         uint32_t *edx) {
740#if defined(_MSC_VER)
741  int cpu_info[4];
742  __cpuidex(cpu_info, *eax, *ecx);
743  *eax = cpu_info[0];
744  *ebx = cpu_info[1];
745  *ecx = cpu_info[2];
746  *edx = cpu_info[3];
747#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
748  uint32_t level = *eax;
749  __get_cpuid(level, eax, ebx, ecx, edx);
750#else
751  uint32_t a = *eax, b, c = *ecx, d;
752  asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
753  *eax = a;
754  *ebx = b;
755  *ecx = c;
756  *edx = d;
757#endif
758}
759
760static inline uint64_t xgetbv() {
761 #if defined(_MSC_VER)
762   return _xgetbv(0);
763 #else
764   uint32_t xcr0_lo, xcr0_hi;
765   asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
766   return xcr0_lo | ((uint64_t)xcr0_hi << 32);
767 #endif
768 }
769
770static inline uint32_t detect_supported_architectures() {
771  uint32_t eax;
772  uint32_t ebx = 0;
773  uint32_t ecx = 0;
774  uint32_t edx = 0;
775  uint32_t host_isa = 0x0;
776
777  // EBX for EAX=0x1
778  eax = 0x1;
779  cpuid(&eax, &ebx, &ecx, &edx);
780
781  if (ecx & cpuid_bit::sse42) {
782    host_isa |= instruction_set::SSE42;
783  }
784
785  if (ecx & cpuid_bit::pclmulqdq) {
786    host_isa |= instruction_set::PCLMULQDQ;
787  }
788
789  if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
790    return host_isa;
791  }
792
793  // xgetbv for checking if the OS saves registers
794  uint64_t xcr0 = xgetbv();
795
796  if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
797    return host_isa;
798  }
799  // ECX for EAX=0x7
800  eax = 0x7;
801  ecx = 0x0; // Sub-leaf = 0
802  cpuid(&eax, &ebx, &ecx, &edx);
803  if (ebx & cpuid_bit::ebx::avx2) {
804    host_isa |= instruction_set::AVX2;
805  }
806  if (ebx & cpuid_bit::ebx::bmi1) {
807    host_isa |= instruction_set::BMI1;
808  }
809  if (ebx & cpuid_bit::ebx::bmi2) {
810    host_isa |= instruction_set::BMI2;
811  }
812  if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) {
813    return host_isa;
814  }
815  if (ebx & cpuid_bit::ebx::avx512f) {
816    host_isa |= instruction_set::AVX512F;
817  }
818  if (ebx & cpuid_bit::ebx::avx512bw) {
819    host_isa |= instruction_set::AVX512BW;
820  }
821  if (ebx & cpuid_bit::ebx::avx512cd) {
822    host_isa |= instruction_set::AVX512CD;
823  }
824  if (ebx & cpuid_bit::ebx::avx512dq) {
825    host_isa |= instruction_set::AVX512DQ;
826  }
827  if (ebx & cpuid_bit::ebx::avx512vl) {
828    host_isa |= instruction_set::AVX512VL;
829  }
830  if (ecx & cpuid_bit::ecx::avx512vbmi2) {
831    host_isa |= instruction_set::AVX512VBMI2;
832  }
833  if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
834    host_isa |= instruction_set::AVX512VPOPCNTDQ;
835  }
836  return host_isa;
837}
838#else // fallback
839
840// includes 32-bit ARM.
841static inline uint32_t detect_supported_architectures() {
842  return instruction_set::DEFAULT;
843}
844
845
846#endif // end SIMD extension detection code
847
848} // namespace internal
849} // namespace simdutf
850
851#endif // SIMDutf_INTERNAL_ISADETECTION_H
852/* end file include/simdutf/internal/isadetection.h */
853
854
855namespace simdutf {
856
857/**
858 * Autodetect the encoding of the input, a single encoding is recommended.
859 * E.g., the function might return simdutf::encoding_type::UTF8,
860 * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or
861 * simdutf::encoding_type::UTF32_LE.
862 *
863 * @param input the string to analyze.
864 * @param length the length of the string in bytes.
865 * @return the detected encoding type
866 */
867simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * input, size_t length) noexcept;
868simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t * input, size_t length) noexcept {
869  return autodetect_encoding(reinterpret_cast<const char *>(input), length);
870}
871
872/**
873 * Autodetect the possible encodings of the input in one pass.
874 * E.g., if the input might be UTF-16LE or UTF-8, this function returns
875 * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE).
876 *
877 * Overriden by each implementation.
878 *
879 * @param input the string to analyze.
880 * @param length the length of the string in bytes.
881 * @return the detected encoding type
882 */
883simdutf_warn_unused int detect_encodings(const char * input, size_t length) noexcept;
884simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * input, size_t length) noexcept {
885  return detect_encodings(reinterpret_cast<const char *>(input), length);
886}
887
888/**
889 * Validate the UTF-8 string. This function may be best when you expect
890 * the input to be almost always valid. Otherwise, consider using
891 * validate_utf8_with_errors.
892 *
893 * Overridden by each implementation.
894 *
895 * @param buf the UTF-8 string to validate.
896 * @param len the length of the string in bytes.
897 * @return true if and only if the string is valid UTF-8.
898 */
899simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
900
901/**
902 * Validate the UTF-8 string and stop on error.
903 *
904 * Overridden by each implementation.
905 *
906 * @param buf the UTF-8 string to validate.
907 * @param len the length of the string in bytes.
908 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
909 */
910simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept;
911
912/**
913 * Validate the ASCII string.
914 *
915 * Overridden by each implementation.
916 *
917 * @param buf the ASCII string to validate.
918 * @param len the length of the string in bytes.
919 * @return true if and only if the string is valid ASCII.
920 */
921simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
922
923/**
924 * Validate the ASCII string and stop on error. It might be faster than
925 * validate_utf8 when an error is expected to occur early.
926 *
927 * Overridden by each implementation.
928 *
929 * @param buf the ASCII string to validate.
930 * @param len the length of the string in bytes.
931 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
932 */
933simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept;
934
935/**
936 * Using native endianness; Validate the UTF-16 string.
937 * This function may be best when you expect the input to be almost always valid.
938 * Otherwise, consider using validate_utf16_with_errors.
939 *
940 * Overridden by each implementation.
941 *
942 * This function is not BOM-aware.
943 *
944 * @param buf the UTF-16 string to validate.
945 * @param len the length of the string in number of 2-byte code units (char16_t).
946 * @return true if and only if the string is valid UTF-16.
947 */
948simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept;
949
950/**
951 * Validate the UTF-16LE string. This function may be best when you expect
952 * the input to be almost always valid. Otherwise, consider using
953 * validate_utf16le_with_errors.
954 *
955 * Overridden by each implementation.
956 *
957 * This function is not BOM-aware.
958 *
959 * @param buf the UTF-16LE string to validate.
960 * @param len the length of the string in number of 2-byte code units (char16_t).
961 * @return true if and only if the string is valid UTF-16LE.
962 */
963simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept;
964
965/**
966 * Validate the UTF-16BE string. This function may be best when you expect
967 * the input to be almost always valid. Otherwise, consider using
968 * validate_utf16be_with_errors.
969 *
970 * Overridden by each implementation.
971 *
972 * This function is not BOM-aware.
973 *
974 * @param buf the UTF-16BE string to validate.
975 * @param len the length of the string in number of 2-byte code units (char16_t).
976 * @return true if and only if the string is valid UTF-16BE.
977 */
978simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept;
979
980/**
981 * Using native endianness; Validate the UTF-16 string and stop on error.
982 * It might be faster than validate_utf16 when an error is expected to occur early.
983 *
984 * Overridden by each implementation.
985 *
986 * This function is not BOM-aware.
987 *
988 * @param buf the UTF-16 string to validate.
989 * @param len the length of the string in number of 2-byte code units (char16_t).
990 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
991 */
992simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept;
993
994/**
995 * Validate the UTF-16LE string and stop on error. It might be faster than
996 * validate_utf16le when an error is expected to occur early.
997 *
998 * Overridden by each implementation.
999 *
1000 * This function is not BOM-aware.
1001 *
1002 * @param buf the UTF-16LE string to validate.
1003 * @param len the length of the string in number of 2-byte code units (char16_t).
1004 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1005 */
1006simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept;
1007
1008/**
1009 * Validate the UTF-16BE string and stop on error. It might be faster than
1010 * validate_utf16be when an error is expected to occur early.
1011 *
1012 * Overridden by each implementation.
1013 *
1014 * This function is not BOM-aware.
1015 *
1016 * @param buf the UTF-16BE string to validate.
1017 * @param len the length of the string in number of 2-byte code units (char16_t).
1018 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1019 */
1020simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept;
1021
1022/**
1023 * Validate the UTF-32 string. This function may be best when you expect
1024 * the input to be almost always valid. Otherwise, consider using
1025 * validate_utf32_with_errors.
1026 *
1027 * Overridden by each implementation.
1028 *
1029 * This function is not BOM-aware.
1030 *
1031 * @param buf the UTF-32 string to validate.
1032 * @param len the length of the string in number of 4-byte code units (char32_t).
1033 * @return true if and only if the string is valid UTF-32.
1034 */
1035simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept;
1036
1037/**
1038 * Validate the UTF-32 string and stop on error. It might be faster than
1039 * validate_utf32 when an error is expected to occur early.
1040 *
1041 * Overridden by each implementation.
1042 *
1043 * This function is not BOM-aware.
1044 *
1045 * @param buf the UTF-32 string to validate.
1046 * @param len the length of the string in number of 4-byte code units (char32_t).
1047 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1048 */
1049simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
1050
1051  /**
1052   * Convert Latin1 string into UTF8 string.
1053   *
1054   * This function is suitable to work with inputs from untrusted sources.
1055   *
1056   * @param input         the Latin1 string to convert
1057   * @param length        the length of the string in bytes
1058   * @param latin1_output  the pointer to buffer that can hold conversion result
1059   * @return the number of written char; 0 if conversion is not possible
1060   */
1061  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept;
1062
1063
1064    /**
1065   * Convert possibly Latin1 string into UTF-16LE string.
1066   *
1067   * This function is suitable to work with inputs from untrusted sources.
1068   *
1069   * @param input         the Latin1  string to convert
1070   * @param length        the length of the string in bytes
1071   * @param utf16_buffer  the pointer to buffer that can hold conversion result
1072   * @return the number of written char16_t; 0 if conversion is not possible
1073   */
1074  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
1075
1076  /**
1077   * Convert Latin1 string into UTF-16BE string.
1078   *
1079   * This function is suitable to work with inputs from untrusted sources.
1080   *
1081   * @param input         the Latin1 string to convert
1082   * @param length        the length of the string in bytes
1083   * @param utf16_buffer  the pointer to buffer that can hold conversion result
1084   * @return the number of written char16_t; 0 if conversion is not possible
1085   */
1086  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
1087
1088  /**
1089   * Convert Latin1 string into UTF-32 string.
1090   *
1091   * This function is suitable to work with inputs from untrusted sources.
1092   *
1093   * @param input         the Latin1 string to convert
1094   * @param length        the length of the string in bytes
1095   * @param utf32_buffer  the pointer to buffer that can hold conversion result
1096   * @return the number of written char32_t; 0 if conversion is not possible
1097   */
1098  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
1099
1100 /**
1101   * Convert possibly broken UTF-8 string into latin1 string.
1102   *
1103   * During the conversion also validation of the input string is done.
1104   * This function is suitable to work with inputs from untrusted sources.
1105   *
1106   * @param input         the UTF-8 string to convert
1107   * @param length        the length of the string in bytes
1108   * @param latin1_output  the pointer to buffer that can hold conversion result
1109   * @return the number of written char; 0 if the input was not valid UTF-8 string
1110   */
1111  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
1112
1113/**
1114 * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 string.
1115 *
1116 * During the conversion also validation of the input string is done.
1117 * This function is suitable to work with inputs from untrusted sources.
1118 *
1119 * @param input         the UTF-8 string to convert
1120 * @param length        the length of the string in bytes
1121 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1122 * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1123 */
1124simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
1125
1126
1127/**
1128 * Using native endianness, convert a Latin1 string into a UTF-16 string.
1129 *
1130 * @param input         the UTF-8 string to convert
1131 * @param length        the length of the string in bytes
1132 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1133 * @return the number of written char16_t.
1134 */
1135simdutf_warn_unused size_t convert_latin1_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
1136
1137/**
1138 * Convert possibly broken UTF-8 string into UTF-16LE string.
1139 *
1140 * During the conversion also validation of the input string is done.
1141 * This function is suitable to work with inputs from untrusted sources.
1142 *
1143 * @param input         the UTF-8 string to convert
1144 * @param length        the length of the string in bytes
1145 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1146 * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1147 */
1148simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
1149
1150/**
1151 * Convert possibly broken UTF-8 string into UTF-16BE string.
1152 *
1153 * During the conversion also validation of the input string is done.
1154 * This function is suitable to work with inputs from untrusted sources.
1155 *
1156 * @param input         the UTF-8 string to convert
1157 * @param length        the length of the string in bytes
1158 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1159 * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
1160 */
1161simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
1162
1163
1164  /**
1165   * Convert possibly broken UTF-8 string into latin1 string with errors.
1166   *
1167   * During the conversion also validation of the input string is done.
1168   * This function is suitable to work with inputs from untrusted sources.
1169   *
1170   * @param input         the UTF-8 string to convert
1171   * @param length        the length of the string in bytes
1172   * @param latin1_output  the pointer to buffer that can hold conversion result
1173   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
1174   */
1175  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept;
1176
1177/**
1178 * Using native endianness, convert possibly broken UTF-8 string into UTF-16
1179 * string and stop on error.
1180 *
1181 * During the conversion also validation of the input string is done.
1182 * This function is suitable to work with inputs from untrusted sources.
1183 *
1184 * @param input         the UTF-8 string to convert
1185 * @param length        the length of the string in bytes
1186 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1187 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1188 */
1189simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1190
1191/**
1192 * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
1193 *
1194 * During the conversion also validation of the input string is done.
1195 * This function is suitable to work with inputs from untrusted sources.
1196 *
1197 * @param input         the UTF-8 string to convert
1198 * @param length        the length of the string in bytes
1199 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1200 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1201 */
1202simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1203
1204/**
1205 * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
1206 *
1207 * During the conversion also validation of the input string is done.
1208 * This function is suitable to work with inputs from untrusted sources.
1209 *
1210 * @param input         the UTF-8 string to convert
1211 * @param length        the length of the string in bytes
1212 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1213 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1214 */
1215simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
1216
1217/**
1218 * Convert possibly broken UTF-8 string into UTF-32 string.
1219 *
1220 * During the conversion also validation of the input string is done.
1221 * This function is suitable to work with inputs from untrusted sources.
1222 *
1223 * @param input         the UTF-8 string to convert
1224 * @param length        the length of the string in bytes
1225 * @param utf32_buffer  the pointer to buffer that can hold conversion result
1226 * @return the number of written char32_t; 0 if the input was not valid UTF-8 string
1227 */
1228simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept;
1229
1230/**
1231 * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
1232 *
1233 * During the conversion also validation of the input string is done.
1234 * This function is suitable to work with inputs from untrusted sources.
1235 *
1236 * @param input         the UTF-8 string to convert
1237 * @param length        the length of the string in bytes
1238 * @param utf32_buffer  the pointer to buffer that can hold conversion result
1239 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1240 */
1241simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
1242
1243    /**
1244   * Convert valid UTF-8 string into latin1 string.
1245   *
1246   * This function assumes that the input string is valid UTF-8.
1247   *
1248   * This function is not BOM-aware.
1249   *
1250   * @param input         the UTF-8 string to convert
1251   * @param length        the length of the string in bytes
1252   * @param latin1_output  the pointer to buffer that can hold conversion result
1253   * @return the number of written char; 0 if the input was not valid UTF-8 string
1254   */
1255  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
1256
1257
1258/**
1259 * Using native endianness, convert valid UTF-8 string into a UTF-16 string.
1260 *
1261 * This function assumes that the input string is valid UTF-8.
1262 *
1263 * @param input         the UTF-8 string to convert
1264 * @param length        the length of the string in bytes
1265 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1266 * @return the number of written char16_t
1267 */
1268simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1269
1270/**
1271 * Convert valid UTF-8 string into UTF-16LE string.
1272 *
1273 * This function assumes that the input string is valid UTF-8.
1274 *
1275 * @param input         the UTF-8 string to convert
1276 * @param length        the length of the string in bytes
1277 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1278 * @return the number of written char16_t
1279 */
1280simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1281
1282/**
1283 * Convert valid UTF-8 string into UTF-16BE string.
1284 *
1285 * This function assumes that the input string is valid UTF-8.
1286 *
1287 * @param input         the UTF-8 string to convert
1288 * @param length        the length of the string in bytes
1289 * @param utf16_buffer  the pointer to buffer that can hold conversion result
1290 * @return the number of written char16_t
1291 */
1292simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
1293
1294/**
1295 * Convert valid UTF-8 string into UTF-32 string.
1296 *
1297 * This function assumes that the input string is valid UTF-8.
1298 *
1299 * @param input         the UTF-8 string to convert
1300 * @param length        the length of the string in bytes
1301 * @param utf32_buffer  the pointer to buffer that can hold conversion result
1302 * @return the number of written char32_t
1303 */
1304simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
1305
1306
1307/**
1308 * Return the number of bytes that this Latin1 string would require in UTF-8 format.
1309 *
1310 * @param input         the Latin1 string to convert
1311 * @param length        the length of the string bytes
1312 * @return the number of bytes required to encode the Latin1 string as UTF-8
1313 */
1314simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept;
1315
1316/**
1317 * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
1318 *
1319 * This function does not validate the input.
1320 *
1321 * This function is not BOM-aware.
1322 *
1323 * @param input         the UTF-8 string to convert
1324 * @param length        the length of the string in byte
1325 * @return the number of bytes required to encode the UTF-8 string as Latin1
1326 */
1327simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept;
1328
1329/**
1330 * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
1331 *
1332 * This function does not validate the input.
1333 *
1334 * This function is not BOM-aware.
1335 *
1336 * @param input         the UTF-8 string to process
1337 * @param length        the length of the string in bytes
1338 * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
1339 */
1340simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept;
1341
1342/**
1343 * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
1344 *
1345 * This function is equivalent to count_utf8
1346 *
1347 * This function does not validate the input.
1348 *
1349 * This function is not BOM-aware.
1350 *
1351 * @param input         the UTF-8 string to process
1352 * @param length        the length of the string in bytes
1353 * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
1354 */
1355simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept;
1356
1357/**
1358 * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string.
1359 *
1360 * During the conversion also validation of the input string is done.
1361 * This function is suitable to work with inputs from untrusted sources.
1362 *
1363 * This function is not BOM-aware.
1364 *
1365 * @param input         the UTF-16 string to convert
1366 * @param length        the length of the string in 2-byte code units (char16_t)
1367 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1368 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1369 */
1370simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1371
1372
1373
1374/**
1375 * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
1376 *
1377 * During the conversion also validation of the input string is done.
1378 * This function is suitable to work with inputs from untrusted sources.
1379 *
1380 * This function is not BOM-aware.
1381 *
1382 * @param input         the UTF-16 string to convert
1383 * @param length        the length of the string in 2-byte code units (char16_t)
1384 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1385 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1386 */
1387simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1388
1389/**
1390 * Convert possibly broken UTF-16LE string into Latin1 string.
1391 *
1392 * During the conversion also validation of the input string is done.
1393 * This function is suitable to work with inputs from untrusted sources.
1394 *
1395 * This function is not BOM-aware.
1396 *
1397 * @param input         the UTF-16LE string to convert
1398 * @param length        the length of the string in 2-byte code units (char16_t)
1399 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1400 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1401 */
1402simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1403
1404/**
1405 * Convert possibly broken UTF-16BE string into Latin1 string.
1406 *
1407 * During the conversion also validation of the input string is done.
1408 * This function is suitable to work with inputs from untrusted sources.
1409 *
1410 * This function is not BOM-aware.
1411 *
1412 * @param input         the UTF-16BE string to convert
1413 * @param length        the length of the string in 2-byte code units (char16_t)
1414 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1415 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1416 */
1417simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1418
1419
1420/**
1421 * Convert possibly broken UTF-16LE string into UTF-8 string.
1422 *
1423 * During the conversion also validation of the input string is done.
1424 * This function is suitable to work with inputs from untrusted sources.
1425 *
1426 * This function is not BOM-aware.
1427 *
1428 * @param input         the UTF-16LE string to convert
1429 * @param length        the length of the string in 2-byte code units (char16_t)
1430 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1431 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1432 */
1433simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1434
1435/**
1436 * Convert possibly broken UTF-16BE string into UTF-8 string.
1437 *
1438 * During the conversion also validation of the input string is done.
1439 * This function is suitable to work with inputs from untrusted sources.
1440 *
1441 * This function is not BOM-aware.
1442 *
1443 * @param input         the UTF-16BE string to convert
1444 * @param length        the length of the string in 2-byte code units (char16_t)
1445 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1446 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1447 */
1448simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1449
1450/**
1451 * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
1452 *
1453 * During the conversion also validation of the input string is done.
1454 * This function is suitable to work with inputs from untrusted sources.
1455 * This function is not BOM-aware.
1456 *
1457 * @param input         the UTF-16 string to convert
1458 * @param length        the length of the string in 2-byte code units (char16_t)
1459 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1460 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1461 */
1462simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1463
1464/**
1465 * Convert possibly broken UTF-16LE string into Latin1 string.
1466 *
1467 * During the conversion also validation of the input string is done.
1468 * This function is suitable to work with inputs from untrusted sources.
1469 * This function is not BOM-aware.
1470 *
1471 * @param input         the UTF-16LE string to convert
1472 * @param length        the length of the string in 2-byte code units (char16_t)
1473 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1474 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1475 */
1476simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1477
1478/**
1479 * Convert possibly broken UTF-16BE string into Latin1 string.
1480 *
1481 * During the conversion also validation of the input string is done.
1482 * This function is suitable to work with inputs from untrusted sources.
1483 * This function is not BOM-aware.
1484 *
1485 * @param input         the UTF-16BE string to convert
1486 * @param length        the length of the string in 2-byte code units (char16_t)
1487 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1488 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1489 */
1490simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1491
1492
1493/**
1494 * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string and stop on error.
1495 *
1496 * During the conversion also validation of the input string is done.
1497 * This function is suitable to work with inputs from untrusted sources.
1498 *
1499 * This function is not BOM-aware.
1500 *
1501 * @param input         the UTF-16 string to convert
1502 * @param length        the length of the string in 2-byte code units (char16_t)
1503 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1504 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1505 */
1506simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1507
1508/**
1509 * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
1510 *
1511 * During the conversion also validation of the input string is done.
1512 * This function is suitable to work with inputs from untrusted sources.
1513 *
1514 * This function is not BOM-aware.
1515 *
1516 * @param input         the UTF-16LE string to convert
1517 * @param length        the length of the string in 2-byte code units (char16_t)
1518 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1519 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1520 */
1521simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1522
1523/**
1524 * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
1525 *
1526 * During the conversion also validation of the input string is done.
1527 * This function is suitable to work with inputs from untrusted sources.
1528 *
1529 * This function is not BOM-aware.
1530 *
1531 * @param input         the UTF-16BE string to convert
1532 * @param length        the length of the string in 2-byte code units (char16_t)
1533 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1534 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1535 */
1536simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1537
1538/**
1539 * Using native endianness, convert valid UTF-16 string into UTF-8 string.
1540 *
1541 * This function assumes that the input string is valid UTF-16LE.
1542 *
1543 * This function is not BOM-aware.
1544 *
1545 * @param input         the UTF-16 string to convert
1546 * @param length        the length of the string in 2-byte code units (char16_t)
1547 * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1548 * @return number of written code units; 0 if conversion is not possible
1549 */
1550simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1551
1552
1553/**
1554 * Using native endianness, convert UTF-16 string into Latin1 string.
1555 *
1556 * This function assumes that the input string is valid UTF-8.
1557 *
1558 * This function is not BOM-aware.
1559 *
1560 * @param input         the UTF-16 string to convert
1561 * @param length        the length of the string in 2-byte code units (char16_t)
1562 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1563 * @return number of written code units; 0 if conversion is not possible
1564 */
1565simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1566
1567/**
1568 * Convert valid UTF-16LE string into Latin1 string.
1569 *
1570 * This function assumes that the input string is valid UTF-16LE.
1571 *
1572 * This function is not BOM-aware.
1573 *
1574 * @param input         the UTF-16LE string to convert
1575 * @param length        the length of the string in 2-byte code units (char16_t)
1576 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1577 * @return number of written code units; 0 if conversion is not possible
1578 */
1579simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1580
1581/**
1582 * Convert valid UTF-16BE string into Latin1 string.
1583 *
1584 * This function assumes that the input string is valid UTF-16BE.
1585 *
1586 * This function is not BOM-aware.
1587 *
1588 * @param input         the UTF-16BE string to convert
1589 * @param length        the length of the string in 2-byte code units (char16_t)
1590 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1591 * @return number of written code units; 0 if conversion is not possible
1592 */
1593simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
1594
1595
1596/**
1597 * Convert valid UTF-16LE string into UTF-8 string.
1598 *
1599 * This function assumes that the input string is valid UTF-16LE.
1600 *
1601 * This function is not BOM-aware.
1602 *
1603 * @param input         the UTF-16LE string to convert
1604 * @param length        the length of the string in 2-byte code units (char16_t)
1605 * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1606 * @return number of written code units; 0 if conversion is not possible
1607 */
1608simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1609
1610/**
1611 * Convert valid UTF-16BE string into UTF-8 string.
1612 *
1613 * This function assumes that the input string is valid UTF-16BE.
1614 *
1615 * This function is not BOM-aware.
1616 *
1617 * @param input         the UTF-16BE string to convert
1618 * @param length        the length of the string in 2-byte code units (char16_t)
1619 * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1620 * @return number of written code units; 0 if conversion is not possible
1621 */
1622simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
1623
1624/**
1625 * Using native endianness, convert possibly broken UTF-16 string into UTF-32 string.
1626 *
1627 * During the conversion also validation of the input string is done.
1628 * This function is suitable to work with inputs from untrusted sources.
1629 *
1630 * This function is not BOM-aware.
1631 *
1632 * @param input         the UTF-16 string to convert
1633 * @param length        the length of the string in 2-byte code units (char16_t)
1634 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1635 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1636 */
1637simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1638
1639/**
1640 * Convert possibly broken UTF-16LE string into UTF-32 string.
1641 *
1642 * During the conversion also validation of the input string is done.
1643 * This function is suitable to work with inputs from untrusted sources.
1644 *
1645 * This function is not BOM-aware.
1646 *
1647 * @param input         the UTF-16LE string to convert
1648 * @param length        the length of the string in 2-byte code units (char16_t)
1649 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1650 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1651 */
1652simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1653
1654/**
1655 * Convert possibly broken UTF-16BE string into UTF-32 string.
1656 *
1657 * During the conversion also validation of the input string is done.
1658 * This function is suitable to work with inputs from untrusted sources.
1659 *
1660 * This function is not BOM-aware.
1661 *
1662 * @param input         the UTF-16BE string to convert
1663 * @param length        the length of the string in 2-byte code units (char16_t)
1664 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1665 * @return number of written code units; 0 if input is not a valid UTF-16LE string
1666 */
1667simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1668
1669/**
1670 * Using native endianness, convert possibly broken UTF-16 string into
1671 * UTF-32 string and stop on error.
1672 *
1673 * During the conversion also validation of the input string is done.
1674 * This function is suitable to work with inputs from untrusted sources.
1675 *
1676 * This function is not BOM-aware.
1677 *
1678 * @param input         the UTF-16 string to convert
1679 * @param length        the length of the string in 2-byte code units (char16_t)
1680 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1681 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1682 */
1683simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1684
1685/**
1686 * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
1687 *
1688 * During the conversion also validation of the input string is done.
1689 * This function is suitable to work with inputs from untrusted sources.
1690 *
1691 * This function is not BOM-aware.
1692 *
1693 * @param input         the UTF-16LE string to convert
1694 * @param length        the length of the string in 2-byte code units (char16_t)
1695 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1696 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1697 */
1698simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1699
1700/**
1701 * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
1702 *
1703 * During the conversion also validation of the input string is done.
1704 * This function is suitable to work with inputs from untrusted sources.
1705 *
1706 * This function is not BOM-aware.
1707 *
1708 * @param input         the UTF-16BE string to convert
1709 * @param length        the length of the string in 2-byte code units (char16_t)
1710 * @param utf32_buffer   the pointer to buffer that can hold conversion result
1711 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
1712 */
1713simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1714
1715/**
1716 * Using native endianness, convert valid UTF-16 string into UTF-32 string.
1717 *
1718 * This function assumes that the input string is valid UTF-16 (native endianness).
1719 *
1720 * This function is not BOM-aware.
1721 *
1722 * @param input         the UTF-16 string to convert
1723 * @param length        the length of the string in 2-byte code units (char16_t)
1724 * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1725 * @return number of written code units; 0 if conversion is not possible
1726 */
1727simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1728
1729/**
1730 * Convert valid UTF-16LE string into UTF-32 string.
1731 *
1732 * This function assumes that the input string is valid UTF-16LE.
1733 *
1734 * This function is not BOM-aware.
1735 *
1736 * @param input         the UTF-16LE string to convert
1737 * @param length        the length of the string in 2-byte code units (char16_t)
1738 * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1739 * @return number of written code units; 0 if conversion is not possible
1740 */
1741simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1742
1743/**
1744 * Convert valid UTF-16BE string into UTF-32 string.
1745 *
1746 * This function assumes that the input string is valid UTF-16LE.
1747 *
1748 * This function is not BOM-aware.
1749 *
1750 * @param input         the UTF-16BE string to convert
1751 * @param length        the length of the string in 2-byte code units (char16_t)
1752 * @param utf32_buffer   the pointer to buffer that can hold the conversion result
1753 * @return number of written code units; 0 if conversion is not possible
1754 */
1755simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
1756
1757
1758/*
1759 * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
1760 *
1761 * This function does not validate the input.
1762 *
1763 * This function is not BOM-aware.
1764 *
1765 * @param length        the length of the string in 2-byte code units (char16_t)
1766 * @return the number of bytes required to encode the UTF-16LE string as Latin1
1767 */
1768simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
1769
1770
1771/**
1772 * Using native endianness; Compute the number of bytes that this UTF-16
1773 * string would require in UTF-8 format.
1774 *
1775 * This function does not validate the input.
1776 *
1777 * @param input         the UTF-16 string to convert
1778 * @param length        the length of the string in 2-byte code units (char16_t)
1779 * @return the number of bytes required to encode the UTF-16LE string as UTF-8
1780 */
1781simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept;
1782
1783/**
1784 * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
1785 *
1786 * This function does not validate the input.
1787 *
1788 * @param input         the UTF-16LE string to convert
1789 * @param length        the length of the string in 2-byte code units (char16_t)
1790 * @return the number of bytes required to encode the UTF-16LE string as UTF-8
1791 */
1792simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept;
1793
1794/**
1795 * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
1796 *
1797 * This function does not validate the input.
1798 *
1799 * @param input         the UTF-16BE string to convert
1800 * @param length        the length of the string in 2-byte code units (char16_t)
1801 * @return the number of bytes required to encode the UTF-16BE string as UTF-8
1802 */
1803simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept;
1804
1805/**
1806 * Convert possibly broken UTF-32 string into UTF-8 string.
1807 *
1808 * During the conversion also validation of the input string is done.
1809 * This function is suitable to work with inputs from untrusted sources.
1810 *
1811 * This function is not BOM-aware.
1812 *
1813 * @param input         the UTF-32 string to convert
1814 * @param length        the length of the string in 4-byte code units (char32_t)
1815 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1816 * @return number of written code units; 0 if input is not a valid UTF-32 string
1817 */
1818simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1819
1820/**
1821 * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
1822 *
1823 * During the conversion also validation of the input string is done.
1824 * This function is suitable to work with inputs from untrusted sources.
1825 *
1826 * This function is not BOM-aware.
1827 *
1828 * @param input         the UTF-32 string to convert
1829 * @param length        the length of the string in 4-byte code units (char32_t)
1830 * @param utf8_buffer   the pointer to buffer that can hold conversion result
1831 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1832 */
1833simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1834
1835/**
1836 * Convert valid UTF-32 string into UTF-8 string.
1837 *
1838 * This function assumes that the input string is valid UTF-32.
1839 *
1840 * This function is not BOM-aware.
1841 *
1842 * @param input         the UTF-32 string to convert
1843 * @param length        the length of the string in 4-byte code units (char32_t)
1844 * @param utf8_buffer   the pointer to buffer that can hold the conversion result
1845 * @return number of written code units; 0 if conversion is not possible
1846 */
1847simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
1848
1849/**
1850 * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 string.
1851 *
1852 * During the conversion also validation of the input string is done.
1853 * This function is suitable to work with inputs from untrusted sources.
1854 *
1855 * This function is not BOM-aware.
1856 *
1857 * @param input         the UTF-32 string to convert
1858 * @param length        the length of the string in 4-byte code units (char32_t)
1859 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1860 * @return number of written code units; 0 if input is not a valid UTF-32 string
1861 */
1862simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1863
1864/**
1865 * Convert possibly broken UTF-32 string into UTF-16LE string.
1866 *
1867 * During the conversion also validation of the input string is done.
1868 * This function is suitable to work with inputs from untrusted sources.
1869 *
1870 * This function is not BOM-aware.
1871 *
1872 * @param input         the UTF-32 string to convert
1873 * @param length        the length of the string in 4-byte code units (char32_t)
1874 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1875 * @return number of written code units; 0 if input is not a valid UTF-32 string
1876 */
1877simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1878
1879/**
1880 * Convert possibly broken UTF-32 string into Latin1 string.
1881 *
1882 * During the conversion also validation of the input string is done.
1883 * This function is suitable to work with inputs from untrusted sources.
1884 *
1885 * This function is not BOM-aware.
1886 *
1887 * @param input         the UTF-32 string to convert
1888 * @param length        the length of the string in 4-byte code units (char32_t)
1889 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1890 * @return number of written code units; 0 if input is not a valid UTF-32 string
1891 */
1892simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1893
1894
1895/**
1896 * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
1897 *
1898 * During the conversion also validation of the input string is done.
1899 * This function is suitable to work with inputs from untrusted sources.
1900 *
1901 * This function is not BOM-aware.
1902 *
1903 * @param input         the UTF-32 string to convert
1904 * @param length        the length of the string in 4-byte code units (char32_t)
1905 * @param latin1_buffer   the pointer to buffer that can hold conversion result
1906 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
1907 */
1908simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1909
1910/**
1911 * Convert valid UTF-32 string into Latin1 string.
1912 *
1913 * This function assumes that the input string is valid UTF-32.
1914 *
1915 * This function is not BOM-aware.
1916 *
1917 * @param input         the UTF-32 string to convert
1918 * @param length        the length of the string in 4-byte code units (char32_t)
1919 * @param latin1_buffer   the pointer to buffer that can hold the conversion result
1920 * @return number of written code units; 0 if conversion is not possible
1921 */
1922simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
1923
1924/**
1925 * Convert possibly broken UTF-32 string into UTF-16BE string.
1926 *
1927 * During the conversion also validation of the input string is done.
1928 * This function is suitable to work with inputs from untrusted sources.
1929 *
1930 * This function is not BOM-aware.
1931 *
1932 * @param input         the UTF-32 string to convert
1933 * @param length        the length of the string in 4-byte code units (char32_t)
1934 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1935 * @return number of written code units; 0 if input is not a valid UTF-32 string
1936 */
1937simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1938
1939/**
1940 * Using native endianness, convert possibly broken UTF-32 string into UTF-16
1941 * string and stop on error.
1942 *
1943 * During the conversion also validation of the input string is done.
1944 * This function is suitable to work with inputs from untrusted sources.
1945 *
1946 * This function is not BOM-aware.
1947 *
1948 * @param input         the UTF-32 string to convert
1949 * @param length        the length of the string in 4-byte code units (char32_t)
1950 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1951 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1952 */
1953simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1954
1955/**
1956 * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
1957 *
1958 * During the conversion also validation of the input string is done.
1959 * This function is suitable to work with inputs from untrusted sources.
1960 *
1961 * This function is not BOM-aware.
1962 *
1963 * @param input         the UTF-32 string to convert
1964 * @param length        the length of the string in 4-byte code units (char32_t)
1965 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1966 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1967 */
1968simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1969
1970/**
1971 * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
1972 *
1973 * During the conversion also validation of the input string is done.
1974 * This function is suitable to work with inputs from untrusted sources.
1975 *
1976 * This function is not BOM-aware.
1977 *
1978 * @param input         the UTF-32 string to convert
1979 * @param length        the length of the string in 4-byte code units (char32_t)
1980 * @param utf16_buffer   the pointer to buffer that can hold conversion result
1981 * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
1982 */
1983simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1984
1985/**
1986 * Using native endianness, convert valid UTF-32 string into a UTF-16 string.
1987 *
1988 * This function assumes that the input string is valid UTF-32.
1989 *
1990 * This function is not BOM-aware.
1991 *
1992 * @param input         the UTF-32 string to convert
1993 * @param length        the length of the string in 4-byte code units (char32_t)
1994 * @param utf16_buffer   the pointer to buffer that can hold the conversion result
1995 * @return number of written code units; 0 if conversion is not possible
1996 */
1997simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
1998
1999/**
2000 * Convert valid UTF-32 string into UTF-16LE string.
2001 *
2002 * This function assumes that the input string is valid UTF-32.
2003 *
2004 * This function is not BOM-aware.
2005 *
2006 * @param input         the UTF-32 string to convert
2007 * @param length        the length of the string in 4-byte code units (char32_t)
2008 * @param utf16_buffer   the pointer to buffer that can hold the conversion result
2009 * @return number of written code units; 0 if conversion is not possible
2010 */
2011simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
2012
2013/**
2014 * Convert valid UTF-32 string into UTF-16BE string.
2015 *
2016 * This function assumes that the input string is valid UTF-32.
2017 *
2018 * This function is not BOM-aware.
2019 *
2020 * @param input         the UTF-32 string to convert
2021 * @param length        the length of the string in 4-byte code units (char32_t)
2022 * @param utf16_buffer   the pointer to buffer that can hold the conversion result
2023 * @return number of written code units; 0 if conversion is not possible
2024 */
2025simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
2026
2027/**
2028 * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
2029 * from UTF-16BE to UTF-16LE.
2030 *
2031 * This function does not validate the input.
2032 *
2033 * This function is not BOM-aware.
2034 *
2035 * @param input         the UTF-16 string to process
2036 * @param length        the length of the string in 2-byte code units (char16_t)
2037 * @param output        the pointer to buffer that can hold the conversion result
2038 */
2039void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept;
2040
2041/**
2042 * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
2043 *
2044 * This function does not validate the input.
2045 *
2046 * @param input         the UTF-32 string to convert
2047 * @param length        the length of the string in 4-byte code units (char32_t)
2048 * @return the number of bytes required to encode the UTF-32 string as UTF-8
2049 */
2050simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept;
2051
2052/**
2053 * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
2054 *
2055 * This function does not validate the input.
2056 *
2057 * @param input         the UTF-32 string to convert
2058 * @param length        the length of the string in 4-byte code units (char32_t)
2059 * @return the number of bytes required to encode the UTF-32 string as UTF-16
2060 */
2061simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept;
2062
2063/**
2064 * Using native endianness; Compute the number of bytes that this UTF-16
2065 * string would require in UTF-32 format.
2066 *
2067 * This function is equivalent to count_utf16.
2068 *
2069 * This function does not validate the input.
2070 *
2071 * This function is not BOM-aware.
2072 *
2073 * @param input         the UTF-16 string to convert
2074 * @param length        the length of the string in 2-byte code units (char16_t)
2075 * @return the number of bytes required to encode the UTF-16LE string as UTF-32
2076 */
2077simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept;
2078
2079/**
2080 * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
2081 *
2082 * This function is equivalent to count_utf16le.
2083 *
2084 * This function does not validate the input.
2085 *
2086 * This function is not BOM-aware.
2087 *
2088 * @param input         the UTF-16LE string to convert
2089 * @param length        the length of the string in 2-byte code units (char16_t)
2090 * @return the number of bytes required to encode the UTF-16LE string as UTF-32
2091 */
2092simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept;
2093
2094/**
2095 * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
2096 *
2097 * This function is equivalent to count_utf16be.
2098 *
2099 * This function does not validate the input.
2100 *
2101 * This function is not BOM-aware.
2102 *
2103 * @param input         the UTF-16BE string to convert
2104 * @param length        the length of the string in 2-byte code units (char16_t)
2105 * @return the number of bytes required to encode the UTF-16BE string as UTF-32
2106 */
2107simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept;
2108
2109/**
2110 * Count the number of code points (characters) in the string assuming that
2111 * it is valid.
2112 *
2113 * This function assumes that the input string is valid UTF-16 (native endianness).
2114 *
2115 * This function is not BOM-aware.
2116 *
2117 * @param input         the UTF-16 string to process
2118 * @param length        the length of the string in 2-byte code units (char16_t)
2119 * @return number of code points
2120 */
2121simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept;
2122
2123/**
2124 * Count the number of code points (characters) in the string assuming that
2125 * it is valid.
2126 *
2127 * This function assumes that the input string is valid UTF-16LE.
2128 *
2129 * This function is not BOM-aware.
2130 *
2131 * @param input         the UTF-16LE string to process
2132 * @param length        the length of the string in 2-byte code units (char16_t)
2133 * @return number of code points
2134 */
2135simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept;
2136
2137/**
2138 * Count the number of code points (characters) in the string assuming that
2139 * it is valid.
2140 *
2141 * This function assumes that the input string is valid UTF-16BE.
2142 *
2143 * This function is not BOM-aware.
2144 *
2145 * @param input         the UTF-16BE string to process
2146 * @param length        the length of the string in 2-byte code units (char16_t)
2147 * @return number of code points
2148 */
2149simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept;
2150
2151/**
2152 * Count the number of code points (characters) in the string assuming that
2153 * it is valid.
2154 *
2155 * This function assumes that the input string is valid UTF-8.
2156 *
2157 * @param input         the UTF-8 string to process
2158 * @param length        the length of the string in bytes
2159 * @return number of code points
2160 */
2161simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept;
2162
2163/**
2164 * Given a valid UTF-8 string having a possibly truncated last character,
2165 * this function checks the end of string. If the last character is truncated (or partial),
2166 * then it returns a shorter length (shorter by 1 to 3 bytes) so that the short UTF-8
2167 * strings only contain complete characters. If there is no truncated character,
2168 * the original length is returned.
2169 *
2170 * This function assumes that the input string is valid UTF-8, but possibly truncated.
2171 *
2172 * @param input         the UTF-8 string to process
2173 * @param length        the length of the string in bytes
2174 * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes
2175 */
2176simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
2177
2178/**
2179 * Given a valid UTF-16BE string having a possibly truncated last character,
2180 * this function checks the end of string. If the last character is truncated (or partial),
2181 * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16BE
2182 * strings only contain complete characters. If there is no truncated character,
2183 * the original length is returned.
2184 *
2185 * This function assumes that the input string is valid UTF-16BE, but possibly truncated.
2186 *
2187 * @param input         the UTF-16BE string to process
2188 * @param length        the length of the string in bytes
2189 * @return the length of the string in bytes, possibly shorter by 1 unit
2190 */
2191simdutf_warn_unused size_t trim_partial_utf16be(const char16_t* input, size_t length);
2192
2193/**
2194 * Given a valid UTF-16LE string having a possibly truncated last character,
2195 * this function checks the end of string. If the last character is truncated (or partial),
2196 * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16LE
2197 * strings only contain complete characters. If there is no truncated character,
2198 * the original length is returned.
2199 *
2200 * This function assumes that the input string is valid UTF-16LE, but possibly truncated.
2201 *
2202 * @param input         the UTF-16LE string to process
2203 * @param length        the length of the string in bytes
2204 * @return the length of the string in unit, possibly shorter by 1 unit
2205 */
2206simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t length);
2207
2208
2209/**
2210 * Given a valid UTF-16 string having a possibly truncated last character,
2211 * this function checks the end of string. If the last character is truncated (or partial),
2212 * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16
2213 * strings only contain complete characters. If there is no truncated character,
2214 * the original length is returned.
2215 *
2216 * This function assumes that the input string is valid UTF-16, but possibly truncated.
2217 * We use the native endianness.
2218 *
2219 * @param input         the UTF-16 string to process
2220 * @param length        the length of the string in bytes
2221 * @return the length of the string in unit, possibly shorter by 1 unit
2222 */
2223simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
2224
2225/**
2226 * An implementation of simdutf for a particular CPU architecture.
2227 *
2228 * Also used to maintain the currently active implementation. The active implementation is
2229 * automatically initialized on first use to the most advanced implementation supported by the host.
2230 */
2231class implementation {
2232public:
2233
2234  /**
2235   * The name of this implementation.
2236   *
2237   *     const implementation *impl = simdutf::active_implementation;
2238   *     cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
2239   *
2240   * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
2241   */
2242  virtual const std::string &name() const { return _name; }
2243
2244  /**
2245   * The description of this implementation.
2246   *
2247   *     const implementation *impl = simdutf::active_implementation;
2248   *     cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
2249   *
2250   * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
2251   */
2252  virtual const std::string &description() const { return _description; }
2253
2254  /**
2255   * The instruction sets this implementation is compiled against
2256   * and the current CPU match. This function may poll the current CPU/system
2257   * and should therefore not be called too often if performance is a concern.
2258   *
2259   *
2260   * @return true if the implementation can be safely used on the current system (determined at runtime)
2261   */
2262  bool supported_by_runtime_system() const;
2263
2264  /**
2265   * This function will try to detect the encoding
2266   * @param input the string to identify
2267   * @param length the length of the string in bytes.
2268   * @return the encoding type detected
2269   */
2270  virtual encoding_type autodetect_encoding(const char * input, size_t length) const noexcept;
2271
2272  /**
2273   * This function will try to detect the possible encodings in one pass
2274   * @param input the string to identify
2275   * @param length the length of the string in bytes.
2276   * @return the encoding type detected
2277   */
2278  virtual int detect_encodings(const char * input, size_t length) const noexcept = 0;
2279
2280  /**
2281   * @private For internal implementation use
2282   *
2283   * The instruction sets this implementation is compiled against.
2284   *
2285   * @return a mask of all required `internal::instruction_set::` values
2286   */
2287  virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
2288
2289
2290  /**
2291   * Validate the UTF-8 string.
2292   *
2293   * Overridden by each implementation.
2294   *
2295   * @param buf the UTF-8 string to validate.
2296   * @param len the length of the string in bytes.
2297   * @return true if and only if the string is valid UTF-8.
2298   */
2299  simdutf_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
2300
2301  /**
2302   * Validate the UTF-8 string and stop on errors.
2303   *
2304   * Overridden by each implementation.
2305   *
2306   * @param buf the UTF-8 string to validate.
2307   * @param len the length of the string in bytes.
2308   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2309   */
2310  simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
2311
2312  /**
2313   * Validate the ASCII string.
2314   *
2315   * Overridden by each implementation.
2316   *
2317   * @param buf the ASCII string to validate.
2318   * @param len the length of the string in bytes.
2319   * @return true if and only if the string is valid ASCII.
2320   */
2321  simdutf_warn_unused virtual bool validate_ascii(const char *buf, size_t len) const noexcept = 0;
2322
2323  /**
2324   * Validate the ASCII string and stop on error.
2325   *
2326   * Overridden by each implementation.
2327   *
2328   * @param buf the ASCII string to validate.
2329   * @param len the length of the string in bytes.
2330   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2331   */
2332  simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
2333
2334  /**
2335   * Validate the UTF-16LE string.This function may be best when you expect
2336   * the input to be almost always valid. Otherwise, consider using
2337   * validate_utf16le_with_errors.
2338   *
2339   * Overridden by each implementation.
2340   *
2341   * This function is not BOM-aware.
2342   *
2343   * @param buf the UTF-16LE string to validate.
2344   * @param len the length of the string in number of 2-byte code units (char16_t).
2345   * @return true if and only if the string is valid UTF-16LE.
2346   */
2347  simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
2348
2349  /**
2350   * Validate the UTF-16BE string. This function may be best when you expect
2351   * the input to be almost always valid. Otherwise, consider using
2352   * validate_utf16be_with_errors.
2353   *
2354   * Overridden by each implementation.
2355   *
2356   * This function is not BOM-aware.
2357   *
2358   * @param buf the UTF-16BE string to validate.
2359   * @param len the length of the string in number of 2-byte code units (char16_t).
2360   * @return true if and only if the string is valid UTF-16BE.
2361   */
2362  simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
2363
2364  /**
2365   * Validate the UTF-16LE string and stop on error.  It might be faster than
2366 * validate_utf16le when an error is expected to occur early.
2367   *
2368   * Overridden by each implementation.
2369   *
2370   * This function is not BOM-aware.
2371   *
2372   * @param buf the UTF-16LE string to validate.
2373   * @param len the length of the string in number of 2-byte code units (char16_t).
2374   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2375   */
2376  simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
2377
2378  /**
2379   * Validate the UTF-16BE string and stop on error. It might be faster than
2380   * validate_utf16be when an error is expected to occur early.
2381   *
2382   * Overridden by each implementation.
2383   *
2384   * This function is not BOM-aware.
2385   *
2386   * @param buf the UTF-16BE string to validate.
2387   * @param len the length of the string in number of 2-byte code units (char16_t).
2388   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2389   */
2390  simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
2391
2392  /**
2393   * Validate the UTF-32 string.
2394   *
2395   * Overridden by each implementation.
2396   *
2397   * This function is not BOM-aware.
2398   *
2399   * @param buf the UTF-32 string to validate.
2400   * @param len the length of the string in number of 4-byte code units (char32_t).
2401   * @return true if and only if the string is valid UTF-32.
2402   */
2403  simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
2404
2405  /**
2406   * Validate the UTF-32 string and stop on error.
2407   *
2408   * Overridden by each implementation.
2409   *
2410   * This function is not BOM-aware.
2411   *
2412   * @param buf the UTF-32 string to validate.
2413   * @param len the length of the string in number of 4-byte code units (char32_t).
2414   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2415   */
2416  simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0;
2417
2418  /**
2419   * Convert Latin1 string into UTF8 string.
2420   *
2421   * This function is suitable to work with inputs from untrusted sources.
2422   *
2423   * @param input         the Latin1 string to convert
2424   * @param length        the length of the string in bytes
2425   * @param latin1_output  the pointer to buffer that can hold conversion result
2426   * @return the number of written char; 0 if conversion is not possible
2427   */
2428  simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0;
2429
2430
2431    /**
2432   * Convert possibly Latin1 string into UTF-16LE string.
2433   *
2434   * This function is suitable to work with inputs from untrusted sources.
2435   *
2436   * @param input         the Latin1  string to convert
2437   * @param length        the length of the string in bytes
2438   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2439   * @return the number of written char16_t; 0 if conversion is not possible
2440   */
2441  simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2442
2443  /**
2444   * Convert Latin1 string into UTF-16BE string.
2445   *
2446   * This function is suitable to work with inputs from untrusted sources.
2447   *
2448   * @param input         the Latin1 string to convert
2449   * @param length        the length of the string in bytes
2450   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2451   * @return the number of written char16_t; 0 if conversion is not possible
2452   */
2453  simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2454
2455  /**
2456   * Convert Latin1 string into UTF-32 string.
2457   *
2458   * This function is suitable to work with inputs from untrusted sources.
2459   *
2460   * @param input         the Latin1 string to convert
2461   * @param length        the length of the string in bytes
2462   * @param utf32_buffer  the pointer to buffer that can hold conversion result
2463   * @return the number of written char32_t; 0 if conversion is not possible
2464   */
2465  simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2466
2467 /**
2468   * Convert possibly broken UTF-8 string into latin1 string.
2469   *
2470   * During the conversion also validation of the input string is done.
2471   * This function is suitable to work with inputs from untrusted sources.
2472   *
2473   * @param input         the UTF-8 string to convert
2474   * @param length        the length of the string in bytes
2475   * @param latin1_output  the pointer to buffer that can hold conversion result
2476   * @return the number of written char; 0 if the input was not valid UTF-8 string
2477   */
2478  simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2479
2480  /**
2481   * Convert possibly broken UTF-8 string into latin1 string with errors
2482   *
2483   * During the conversion also validation of the input string is done.
2484   * This function is suitable to work with inputs from untrusted sources.
2485   *
2486   * @param input         the UTF-8 string to convert
2487   * @param length        the length of the string in bytes
2488   * @param latin1_output  the pointer to buffer that can hold conversion result
2489   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2490   */
2491  simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2492
2493    /**
2494   * Convert valid UTF-8 string into latin1 string.
2495   *
2496   * This function assumes that the input string is valid UTF-8.
2497   *
2498   * This function is not BOM-aware.
2499   *
2500   * @param input         the UTF-8 string to convert
2501   * @param length        the length of the string in bytes
2502   * @param latin1_output  the pointer to buffer that can hold conversion result
2503   * @return the number of written char; 0 if the input was not valid UTF-8 string
2504   */
2505  simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
2506
2507
2508  /**
2509   * Convert possibly broken UTF-8 string into UTF-16LE string.
2510   *
2511   * During the conversion also validation of the input string is done.
2512   * This function is suitable to work with inputs from untrusted sources.
2513   *
2514   * @param input         the UTF-8 string to convert
2515   * @param length        the length of the string in bytes
2516   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2517   * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2518   */
2519  simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2520
2521  /**
2522   * Convert possibly broken UTF-8 string into UTF-16BE string.
2523   *
2524   * During the conversion also validation of the input string is done.
2525   * This function is suitable to work with inputs from untrusted sources.
2526   *
2527   * @param input         the UTF-8 string to convert
2528   * @param length        the length of the string in bytes
2529   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2530   * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2531   */
2532  simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2533
2534  /**
2535   * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
2536   *
2537   * During the conversion also validation of the input string is done.
2538   * This function is suitable to work with inputs from untrusted sources.
2539   *
2540   * @param input         the UTF-8 string to convert
2541   * @param length        the length of the string in bytes
2542   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2543   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2544   */
2545  simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2546
2547  /**
2548   * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
2549   *
2550   * During the conversion also validation of the input string is done.
2551   * This function is suitable to work with inputs from untrusted sources.
2552   *
2553   * @param input         the UTF-8 string to convert
2554   * @param length        the length of the string in bytes
2555   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2556   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
2557   */
2558  simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
2559
2560  /**
2561   * Convert possibly broken UTF-8 string into UTF-32 string.
2562   *
2563   * During the conversion also validation of the input string is done.
2564   * This function is suitable to work with inputs from untrusted sources.
2565   *
2566   * @param input         the UTF-8 string to convert
2567   * @param length        the length of the string in bytes
2568   * @param utf32_buffer  the pointer to buffer that can hold conversion result
2569   * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
2570   */
2571  simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
2572
2573  /**
2574   * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
2575   *
2576   * During the conversion also validation of the input string is done.
2577   * This function is suitable to work with inputs from untrusted sources.
2578   *
2579   * @param input         the UTF-8 string to convert
2580   * @param length        the length of the string in bytes
2581   * @param utf32_buffer  the pointer to buffer that can hold conversion result
2582   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2583   */
2584  simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
2585
2586  /**
2587   * Convert valid UTF-8 string into UTF-16LE string.
2588   *
2589   * This function assumes that the input string is valid UTF-8.
2590   *
2591   * @param input         the UTF-8 string to convert
2592   * @param length        the length of the string in bytes
2593   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2594   * @return the number of written char16_t
2595   */
2596  simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
2597
2598/**
2599   * Convert valid UTF-8 string into UTF-16BE string.
2600   *
2601   * This function assumes that the input string is valid UTF-8.
2602   *
2603   * @param input         the UTF-8 string to convert
2604   * @param length        the length of the string in bytes
2605   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2606   * @return the number of written char16_t
2607   */
2608  simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
2609
2610  /**
2611   * Convert valid UTF-8 string into UTF-32 string.
2612   *
2613   * This function assumes that the input string is valid UTF-8.
2614   *
2615   * @param input         the UTF-8 string to convert
2616   * @param length        the length of the string in bytes
2617   * @param utf16_buffer  the pointer to buffer that can hold conversion result
2618   * @return the number of written char32_t
2619   */
2620  simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2621
2622  /**
2623   * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
2624   *
2625   * This function does not validate the input.
2626   *
2627   * @param input         the UTF-8 string to process
2628   * @param length        the length of the string in bytes
2629   * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
2630   */
2631  simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0;
2632
2633   /**
2634   * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
2635   *
2636   * This function is equivalent to count_utf8.
2637   *
2638   * This function does not validate the input.
2639   *
2640   * @param input         the UTF-8 string to process
2641   * @param length        the length of the string in bytes
2642   * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
2643   */
2644  simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
2645
2646  /**
2647   * Convert possibly broken UTF-16LE string into Latin1 string.
2648   *
2649   * During the conversion also validation of the input string is done.
2650   * This function is suitable to work with inputs from untrusted sources.
2651   *
2652   * This function is not BOM-aware.
2653   *
2654   * @param input         the UTF-16LE string to convert
2655   * @param length        the length of the string in 2-byte code units (char16_t)
2656   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2657   * @return number of written code units; 0 if input is not a valid UTF-16LE string
2658   */
2659  simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2660
2661  /**
2662   * Convert possibly broken UTF-16BE string into Latin1 string.
2663   *
2664   * During the conversion also validation of the input string is done.
2665   * This function is suitable to work with inputs from untrusted sources.
2666   *
2667   * This function is not BOM-aware.
2668   *
2669   * @param input         the UTF-16BE string to convert
2670   * @param length        the length of the string in 2-byte code units (char16_t)
2671   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2672   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2673   */
2674  simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2675
2676  /**
2677   * Convert possibly broken UTF-16LE string into Latin1 string.
2678   *
2679   * During the conversion also validation of the input string is done.
2680   * This function is suitable to work with inputs from untrusted sources.
2681   * This function is not BOM-aware.
2682   *
2683   * @param input         the UTF-16LE string to convert
2684   * @param length        the length of the string in 2-byte code units (char16_t)
2685   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2686   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2687   */
2688  simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2689
2690  /**
2691   * Convert possibly broken UTF-16BE string into Latin1 string.
2692   *
2693   * During the conversion also validation of the input string is done.
2694   * This function is suitable to work with inputs from untrusted sources.
2695   * This function is not BOM-aware.
2696   *
2697   * @param input         the UTF-16BE string to convert
2698   * @param length        the length of the string in 2-byte code units (char16_t)
2699   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2700   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2701   */
2702  simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2703
2704  /**
2705   * Convert valid UTF-16LE string into Latin1 string.
2706   *
2707   * This function assumes that the input string is valid UTF-8.
2708
2709   * This function is not BOM-aware.
2710   *
2711   * @param input         the UTF-16LE string to convert
2712   * @param length        the length of the string in 2-byte code units (char16_t)
2713   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2714   * @return number of written code units; 0 if conversion is not possible
2715   */
2716  simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2717
2718  /**
2719   * Convert valid UTF-16BE string into Latin1 string.
2720   *
2721   * This function assumes that the input string is valid UTF-8.
2722   *
2723   * This function is not BOM-aware.
2724   *
2725   * @param input         the UTF-16BE string to convert
2726   * @param length        the length of the string in 2-byte code units (char16_t)
2727   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2728   * @return number of written code units; 0 if conversion is not possible
2729   */
2730  simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2731
2732  /**
2733   * Convert possibly broken UTF-16LE string into UTF-8 string.
2734   *
2735   * During the conversion also validation of the input string is done.
2736   * This function is suitable to work with inputs from untrusted sources.
2737   *
2738   * This function is not BOM-aware.
2739   *
2740   * @param input         the UTF-16LE string to convert
2741   * @param length        the length of the string in 2-byte code units (char16_t)
2742   * @param utf8_buffer   the pointer to buffer that can hold conversion result
2743   * @return number of written code units; 0 if input is not a valid UTF-16LE string
2744   */
2745  simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2746
2747  /**
2748   * Convert possibly broken UTF-16BE string into UTF-8 string.
2749   *
2750   * During the conversion also validation of the input string is done.
2751   * This function is suitable to work with inputs from untrusted sources.
2752   *
2753   * This function is not BOM-aware.
2754   *
2755   * @param input         the UTF-16BE string to convert
2756   * @param length        the length of the string in 2-byte code units (char16_t)
2757   * @param utf8_buffer   the pointer to buffer that can hold conversion result
2758   * @return number of written code units; 0 if input is not a valid UTF-16BE string
2759   */
2760  simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2761
2762  /**
2763   * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
2764   *
2765   * During the conversion also validation of the input string is done.
2766   * This function is suitable to work with inputs from untrusted sources.
2767   *
2768   * This function is not BOM-aware.
2769   *
2770   * @param input         the UTF-16LE string to convert
2771   * @param length        the length of the string in 2-byte code units (char16_t)
2772   * @param utf8_buffer   the pointer to buffer that can hold conversion result
2773   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2774   */
2775  simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2776
2777  /**
2778   * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
2779   *
2780   * During the conversion also validation of the input string is done.
2781   * This function is suitable to work with inputs from untrusted sources.
2782   *
2783   * This function is not BOM-aware.
2784   *
2785   * @param input         the UTF-16BE string to convert
2786   * @param length        the length of the string in 2-byte code units (char16_t)
2787   * @param utf8_buffer   the pointer to buffer that can hold conversion result
2788   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2789   */
2790  simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2791
2792  /**
2793   * Convert valid UTF-16LE string into UTF-8 string.
2794   *
2795   * This function assumes that the input string is valid UTF-16LE.
2796   *
2797   * This function is not BOM-aware.
2798   *
2799   * @param input         the UTF-16LE string to convert
2800   * @param length        the length of the string in 2-byte code units (char16_t)
2801   * @param utf8_buffer   the pointer to buffer that can hold the conversion result
2802   * @return number of written code units; 0 if conversion is not possible
2803   */
2804  simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2805
2806  /**
2807   * Convert valid UTF-16BE string into UTF-8 string.
2808   *
2809   * This function assumes that the input string is valid UTF-16BE.
2810   *
2811   * This function is not BOM-aware.
2812   *
2813   * @param input         the UTF-16BE string to convert
2814   * @param length        the length of the string in 2-byte code units (char16_t)
2815   * @param utf8_buffer   the pointer to buffer that can hold the conversion result
2816   * @return number of written code units; 0 if conversion is not possible
2817   */
2818  simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2819
2820  /**
2821   * Convert possibly broken UTF-16LE string into UTF-32 string.
2822   *
2823   * During the conversion also validation of the input string is done.
2824   * This function is suitable to work with inputs from untrusted sources.
2825   *
2826   * This function is not BOM-aware.
2827   *
2828   * @param input         the UTF-16LE string to convert
2829   * @param length        the length of the string in 2-byte code units (char16_t)
2830   * @param utf32_buffer   the pointer to buffer that can hold conversion result
2831   * @return number of written code units; 0 if input is not a valid UTF-16LE string
2832   */
2833  simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2834
2835  /**
2836   * Convert possibly broken UTF-16BE string into UTF-32 string.
2837   *
2838   * During the conversion also validation of the input string is done.
2839   * This function is suitable to work with inputs from untrusted sources.
2840   *
2841   * This function is not BOM-aware.
2842   *
2843   * @param input         the UTF-16BE string to convert
2844   * @param length        the length of the string in 2-byte code units (char16_t)
2845   * @param utf32_buffer   the pointer to buffer that can hold conversion result
2846   * @return number of written code units; 0 if input is not a valid UTF-16BE string
2847   */
2848  simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2849
2850  /**
2851   * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
2852   *
2853   * During the conversion also validation of the input string is done.
2854   * This function is suitable to work with inputs from untrusted sources.
2855   *
2856   * This function is not BOM-aware.
2857   *
2858   * @param input         the UTF-16LE string to convert
2859   * @param length        the length of the string in 2-byte code units (char16_t)
2860   * @param utf32_buffer   the pointer to buffer that can hold conversion result
2861   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2862   */
2863  simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2864
2865  /**
2866   * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
2867   *
2868   * During the conversion also validation of the input string is done.
2869   * This function is suitable to work with inputs from untrusted sources.
2870   *
2871   * This function is not BOM-aware.
2872   *
2873   * @param input         the UTF-16BE string to convert
2874   * @param length        the length of the string in 2-byte code units (char16_t)
2875   * @param utf32_buffer   the pointer to buffer that can hold conversion result
2876   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
2877   */
2878  simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2879
2880  /**
2881   * Convert valid UTF-16LE string into UTF-32 string.
2882   *
2883   * This function assumes that the input string is valid UTF-16LE.
2884   *
2885   * This function is not BOM-aware.
2886   *
2887   * @param input         the UTF-16LE string to convert
2888   * @param length        the length of the string in 2-byte code units (char16_t)
2889   * @param utf32_buffer   the pointer to buffer that can hold the conversion result
2890   * @return number of written code units; 0 if conversion is not possible
2891   */
2892  simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2893
2894  /**
2895   * Convert valid UTF-16LE string into UTF-32BE string.
2896   *
2897   * This function assumes that the input string is valid UTF-16BE.
2898   *
2899   * This function is not BOM-aware.
2900   *
2901   * @param input         the UTF-16BE string to convert
2902   * @param length        the length of the string in 2-byte code units (char16_t)
2903   * @param utf32_buffer   the pointer to buffer that can hold the conversion result
2904   * @return number of written code units; 0 if conversion is not possible
2905   */
2906  simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
2907
2908  /**
2909   * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
2910   *
2911   * This function does not validate the input.
2912   *
2913   * This function is not BOM-aware.
2914   *
2915   * @param input         the UTF-16LE string to convert
2916   * @param length        the length of the string in 2-byte code units (char16_t)
2917   * @return the number of bytes required to encode the UTF-16LE string as UTF-8
2918   */
2919  simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
2920
2921  /**
2922   * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
2923   *
2924   * This function does not validate the input.
2925   *
2926   * This function is not BOM-aware.
2927   *
2928   * @param input         the UTF-16BE string to convert
2929   * @param length        the length of the string in 2-byte code units (char16_t)
2930   * @return the number of bytes required to encode the UTF-16BE string as UTF-8
2931   */
2932  simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
2933
2934  /**
2935   * Convert possibly broken UTF-32 string into Latin1 string.
2936   *
2937   * During the conversion also validation of the input string is done.
2938   * This function is suitable to work with inputs from untrusted sources.
2939   *
2940   * This function is not BOM-aware.
2941   *
2942   * @param input         the UTF-32 string to convert
2943   * @param length        the length of the string in 4-byte code units (char32_t)
2944   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2945   * @return number of written code units; 0 if input is not a valid UTF-32 string
2946   */
2947
2948  simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2949
2950  /**
2951   * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
2952   *
2953   * During the conversion also validation of the input string is done.
2954   * This function is suitable to work with inputs from untrusted sources.
2955   *
2956   * This function is not BOM-aware.
2957   *
2958   * @param input         the UTF-32 string to convert
2959   * @param length        the length of the string in 4-byte code units (char32_t)
2960   * @param latin1_buffer   the pointer to buffer that can hold conversion result
2961   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
2962   */
2963
2964  simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2965
2966  /**
2967   * Convert valid UTF-32 string into Latin1 string.
2968   *
2969   * This function assumes that the input string is valid UTF-32.
2970   *
2971   * This function is not BOM-aware.
2972   *
2973   * @param input         the UTF-32 string to convert
2974   * @param length        the length of the string in 4-byte code units (char32_t)
2975   * @param latin1_buffer   the pointer to buffer that can hold the conversion result
2976   * @return number of written code units; 0 if conversion is not possible
2977   */
2978  simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
2979
2980  /**
2981   * Convert possibly broken UTF-32 string into UTF-8 string.
2982   *
2983   * During the conversion also validation of the input string is done.
2984   * This function is suitable to work with inputs from untrusted sources.
2985   *
2986   * This function is not BOM-aware.
2987   *
2988   * @param input         the UTF-32 string to convert
2989   * @param length        the length of the string in 4-byte code units (char32_t)
2990   * @param utf8_buffer   the pointer to buffer that can hold conversion result
2991   * @return number of written code units; 0 if input is not a valid UTF-32 string
2992   */
2993  simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
2994
2995  /**
2996   * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
2997   *
2998   * During the conversion also validation of the input string is done.
2999   * This function is suitable to work with inputs from untrusted sources.
3000   *
3001   * This function is not BOM-aware.
3002   *
3003   * @param input         the UTF-32 string to convert
3004   * @param length        the length of the string in 4-byte code units (char32_t)
3005   * @param utf8_buffer   the pointer to buffer that can hold conversion result
3006   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
3007   */
3008  simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
3009
3010  /**
3011   * Convert valid UTF-32 string into UTF-8 string.
3012   *
3013   * This function assumes that the input string is valid UTF-32.
3014   *
3015   * This function is not BOM-aware.
3016   *
3017   * @param input         the UTF-32 string to convert
3018   * @param length        the length of the string in 4-byte code units (char32_t)
3019   * @param utf8_buffer   the pointer to buffer that can hold the conversion result
3020   * @return number of written code units; 0 if conversion is not possible
3021   */
3022  simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
3023
3024
3025    /**
3026   * Return the number of bytes that this UTF-16 string would require in Latin1 format.
3027   *
3028   *
3029   * @param input         the UTF-16 string to convert
3030   * @param length        the length of the string in 2-byte code units (char16_t)
3031   * @return the number of bytes required to encode the UTF-16 string as Latin1
3032   */
3033    simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0;
3034
3035  /**
3036   * Convert possibly broken UTF-32 string into UTF-16LE string.
3037   *
3038   * During the conversion also validation of the input string is done.
3039   * This function is suitable to work with inputs from untrusted sources.
3040   *
3041   * This function is not BOM-aware.
3042   *
3043   * @param input         the UTF-32 string to convert
3044   * @param length        the length of the string in 4-byte code units (char32_t)
3045   * @param utf16_buffer   the pointer to buffer that can hold conversion result
3046   * @return number of written code units; 0 if input is not a valid UTF-32 string
3047   */
3048  simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3049
3050  /**
3051   * Convert possibly broken UTF-32 string into UTF-16BE string.
3052   *
3053   * During the conversion also validation of the input string is done.
3054   * This function is suitable to work with inputs from untrusted sources.
3055   *
3056   * This function is not BOM-aware.
3057   *
3058   * @param input         the UTF-32 string to convert
3059   * @param length        the length of the string in 4-byte code units (char32_t)
3060   * @param utf16_buffer   the pointer to buffer that can hold conversion result
3061   * @return number of written code units; 0 if input is not a valid UTF-32 string
3062   */
3063  simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3064
3065  /**
3066   * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
3067   *
3068   * During the conversion also validation of the input string is done.
3069   * This function is suitable to work with inputs from untrusted sources.
3070   *
3071   * This function is not BOM-aware.
3072   *
3073   * @param input         the UTF-32 string to convert
3074   * @param length        the length of the string in 4-byte code units (char32_t)
3075   * @param utf16_buffer   the pointer to buffer that can hold conversion result
3076   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
3077   */
3078  simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3079
3080  /**
3081   * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
3082   *
3083   * During the conversion also validation of the input string is done.
3084   * This function is suitable to work with inputs from untrusted sources.
3085   *
3086   * This function is not BOM-aware.
3087   *
3088   * @param input         the UTF-32 string to convert
3089   * @param length        the length of the string in 4-byte code units (char32_t)
3090   * @param utf16_buffer   the pointer to buffer that can hold conversion result
3091   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
3092   */
3093  simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3094
3095  /**
3096   * Convert valid UTF-32 string into UTF-16LE string.
3097   *
3098   * This function assumes that the input string is valid UTF-32.
3099   *
3100   * This function is not BOM-aware.
3101   *
3102   * @param input         the UTF-32 string to convert
3103   * @param length        the length of the string in 4-byte code units (char32_t)
3104   * @param utf16_buffer   the pointer to buffer that can hold the conversion result
3105   * @return number of written code units; 0 if conversion is not possible
3106   */
3107  simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3108
3109  /**
3110   * Convert valid UTF-32 string into UTF-16BE string.
3111   *
3112   * This function assumes that the input string is valid UTF-32.
3113   *
3114   * This function is not BOM-aware.
3115   *
3116   * @param input         the UTF-32 string to convert
3117   * @param length        the length of the string in 4-byte code units (char32_t)
3118   * @param utf16_buffer   the pointer to buffer that can hold the conversion result
3119   * @return number of written code units; 0 if conversion is not possible
3120   */
3121  simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
3122
3123  /**
3124   * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
3125   * from UTF-16BE to UTF-16LE.
3126   *
3127   * This function does not validate the input.
3128   *
3129   * This function is not BOM-aware.
3130   *
3131   * @param input         the UTF-16 string to process
3132   * @param length        the length of the string in 2-byte code units (char16_t)
3133   * @param output        the pointer to buffer that can hold the conversion result
3134   */
3135  virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
3136
3137 /**
3138   * Return the number of bytes that this Latin1 string would require in UTF-8 format.
3139   *
3140   * @param input         the Latin1 string to convert
3141   * @param length        the length of the string bytes
3142   * @return the number of bytes required to encode the Latin1 string as UTF-8
3143   */
3144    simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0;
3145
3146  /**
3147   * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
3148   *
3149   * This function does not validate the input.
3150   *
3151   * @param input         the UTF-32 string to convert
3152   * @param length        the length of the string in 4-byte code units (char32_t)
3153   * @return the number of bytes required to encode the UTF-32 string as UTF-8
3154   */
3155  simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
3156
3157  /**
3158   * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
3159   *
3160   * This function does not validate the input.
3161   *
3162   * @param length        the length of the string in 4-byte code units (char32_t)
3163   * @return the number of bytes required to encode the UTF-32 string as Latin1
3164   */
3165  simdutf_warn_unused virtual size_t latin1_length_from_utf32(size_t length) const noexcept = 0;
3166
3167  /**
3168   * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
3169   *
3170   * This function does not validate the input.
3171   *
3172   * @param input         the UTF-8 string to convert
3173   * @param length        the length of the string in byte
3174   * @return the number of bytes required to encode the UTF-8 string as Latin1
3175   */
3176  simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0;
3177
3178  /*
3179   * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
3180   *
3181   * This function does not validate the input.
3182   *
3183   * This function is not BOM-aware.
3184   *
3185   * @param input         the UTF-16LE string to convert
3186   * @param length        the length of the string in 2-byte code units (char16_t)
3187   * @return the number of bytes required to encode the UTF-16LE string as Latin1
3188   */
3189  simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
3190
3191  /**
3192   * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
3193   *
3194   * This function does not validate the input.
3195   *
3196   * @param input         the UTF-32 string to convert
3197   * @param length        the length of the string in 4-byte code units (char32_t)
3198   * @return the number of bytes required to encode the UTF-32 string as UTF-16
3199   */
3200  simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
3201
3202
3203    /**
3204   * Return the number of bytes that this UTF-32 string would require in Latin1 format.
3205   *
3206   * This function does not validate the input.
3207   *
3208   * @param input         the UTF-32 string to convert
3209   * @param length        the length of the string in 4-byte code units (char32_t)
3210   * @return the number of bytes required to encode the UTF-32 string as Latin1
3211   */
3212    simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0;
3213
3214  /*
3215   * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
3216   *
3217   * This function is equivalent to count_utf16le.
3218   *
3219   * This function does not validate the input.
3220   *
3221   * This function is not BOM-aware.
3222   *
3223   * @param input         the UTF-16LE string to convert
3224   * @param length        the length of the string in 2-byte code units (char16_t)
3225   * @return the number of bytes required to encode the UTF-16LE string as UTF-32
3226   */
3227  simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
3228
3229  /*
3230   * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
3231   *
3232   * This function is equivalent to count_utf16be.
3233   *
3234   * This function does not validate the input.
3235   *
3236   * This function is not BOM-aware.
3237   *
3238   * @param input         the UTF-16BE string to convert
3239   * @param length        the length of the string in 2-byte code units (char16_t)
3240   * @return the number of bytes required to encode the UTF-16BE string as UTF-32
3241   */
3242  simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
3243
3244  /**
3245   * Count the number of code points (characters) in the string assuming that
3246   * it is valid.
3247   *
3248   * This function assumes that the input string is valid UTF-16LE.
3249   *
3250   * This function is not BOM-aware.
3251   *
3252   * @param input         the UTF-16LE string to process
3253   * @param length        the length of the string in 2-byte code units (char16_t)
3254   * @return number of code points
3255   */
3256  simdutf_warn_unused virtual size_t count_utf16le(const char16_t * input, size_t length) const noexcept = 0;
3257
3258  /**
3259   * Count the number of code points (characters) in the string assuming that
3260   * it is valid.
3261   *
3262   * This function assumes that the input string is valid UTF-16BE.
3263   *
3264   * This function is not BOM-aware.
3265   *
3266   * @param input         the UTF-16BE string to process
3267   * @param length        the length of the string in 2-byte code units (char16_t)
3268   * @return number of code points
3269   */
3270  simdutf_warn_unused virtual size_t count_utf16be(const char16_t * input, size_t length) const noexcept = 0;
3271
3272
3273  /**
3274   * Count the number of code points (characters) in the string assuming that
3275   * it is valid.
3276   *
3277   * This function assumes that the input string is valid UTF-8.
3278   *
3279   * @param input         the UTF-8 string to process
3280   * @param length        the length of the string in bytes
3281   * @return number of code points
3282   */
3283  simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0;
3284
3285
3286
3287protected:
3288  /** @private Construct an implementation with the given name and description. For subclasses. */
3289  simdutf_really_inline implementation(
3290    std::string name,
3291    std::string description,
3292    uint32_t required_instruction_sets
3293  ) :
3294    _name(name),
3295    _description(description),
3296    _required_instruction_sets(required_instruction_sets)
3297  {
3298  }
3299  virtual ~implementation()=default;
3300
3301private:
3302  /**
3303   * The name of this implementation.
3304   */
3305  const std::string _name;
3306
3307  /**
3308   * The description of this implementation.
3309   */
3310  const std::string _description;
3311
3312  /**
3313   * Instruction sets required for this implementation.
3314   */
3315  const uint32_t _required_instruction_sets;
3316};
3317
3318/** @private */
3319namespace internal {
3320
3321/**
3322 * The list of available implementations compiled into simdutf.
3323 */
3324class available_implementation_list {
3325public:
3326  /** Get the list of available implementations compiled into simdutf */
3327  simdutf_really_inline available_implementation_list() {}
3328  /** Number of implementations */
3329  size_t size() const noexcept;
3330  /** STL const begin() iterator */
3331  const implementation * const *begin() const noexcept;
3332  /** STL const end() iterator */
3333  const implementation * const *end() const noexcept;
3334
3335  /**
3336   * Get the implementation with the given name.
3337   *
3338   * Case sensitive.
3339   *
3340   *     const implementation *impl = simdutf::available_implementations["westmere"];
3341   *     if (!impl) { exit(1); }
3342   *     if (!imp->supported_by_runtime_system()) { exit(1); }
3343   *     simdutf::active_implementation = impl;
3344   *
3345   * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
3346   * @return the implementation, or nullptr if the parse failed.
3347   */
3348  const implementation * operator[](const std::string &name) const noexcept {
3349    for (const implementation * impl : *this) {
3350      if (impl->name() == name) { return impl; }
3351    }
3352    return nullptr;
3353  }
3354
3355  /**
3356   * Detect the most advanced implementation supported by the current host.
3357   *
3358   * This is used to initialize the implementation on startup.
3359   *
3360   *     const implementation *impl = simdutf::available_implementation::detect_best_supported();
3361   *     simdutf::active_implementation = impl;
3362   *
3363   * @return the most advanced supported implementation for the current host, or an
3364   *         implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
3365   *         implementation. Will never return nullptr.
3366   */
3367  const implementation *detect_best_supported() const noexcept;
3368};
3369
3370template<typename T>
3371class atomic_ptr {
3372public:
3373  atomic_ptr(T *_ptr) : ptr{_ptr} {}
3374
3375#if defined(SIMDUTF_NO_THREADS)
3376  operator const T*() const { return ptr; }
3377  const T& operator*() const { return *ptr; }
3378  const T* operator->() const { return ptr; }
3379
3380  operator T*() { return ptr; }
3381  T& operator*() { return *ptr; }
3382  T* operator->() { return ptr; }
3383  atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
3384
3385#else
3386  operator const T*() const { return ptr.load(); }
3387  const T& operator*() const { return *ptr; }
3388  const T* operator->() const { return ptr.load(); }
3389
3390  operator T*() { return ptr.load(); }
3391  T& operator*() { return *ptr; }
3392  T* operator->() { return ptr.load(); }
3393  atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
3394
3395#endif
3396
3397private:
3398#if defined(SIMDUTF_NO_THREADS)
3399  T* ptr;
3400#else
3401  std::atomic<T*> ptr;
3402#endif
3403};
3404
3405class detect_best_supported_implementation_on_first_use;
3406
3407} // namespace internal
3408
3409/**
3410 * The list of available implementations compiled into simdutf.
3411 */
3412extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
3413
3414/**
3415  * The active implementation.
3416  *
3417  * Automatically initialized on first use to the most advanced implementation supported by this hardware.
3418  */
3419extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
3420
3421
3422} // namespace simdutf
3423
3424#endif // SIMDUTF_IMPLEMENTATION_H
3425/* end file include/simdutf/implementation.h */
3426
3427
3428// Implementation-internal files (must be included before the implementations themselves, to keep
3429// amalgamation working--otherwise, the first time a file is included, it might be put inside the
3430// #ifdef SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
3431// compile unless that implementation is turned on).
3432
3433
3434SIMDUTF_POP_DISABLE_WARNINGS
3435
3436#endif // SIMDUTF_H
3437/* end file include/simdutf.h */
3438