17db96d56Sopenharmony_ci#ifndef Py_CPYTHON_UNICODEOBJECT_H
27db96d56Sopenharmony_ci#  error "this header file must not be included directly"
37db96d56Sopenharmony_ci#endif
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ci/* Py_UNICODE was the native Unicode storage format (code unit) used by
67db96d56Sopenharmony_ci   Python and represents a single Unicode element in the Unicode type.
77db96d56Sopenharmony_ci   With PEP 393, Py_UNICODE is deprecated and replaced with a
87db96d56Sopenharmony_ci   typedef to wchar_t. */
97db96d56Sopenharmony_ci#define PY_UNICODE_TYPE wchar_t
107db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ci/* --- Internal Unicode Operations ---------------------------------------- */
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ci#ifndef USE_UNICODE_WCHAR_CACHE
157db96d56Sopenharmony_ci#  define USE_UNICODE_WCHAR_CACHE 1
167db96d56Sopenharmony_ci#endif /* USE_UNICODE_WCHAR_CACHE */
177db96d56Sopenharmony_ci
187db96d56Sopenharmony_ci/* Since splitting on whitespace is an important use case, and
197db96d56Sopenharmony_ci   whitespace in most situations is solely ASCII whitespace, we
207db96d56Sopenharmony_ci   optimize for the common case by using a quick look-up table
217db96d56Sopenharmony_ci   _Py_ascii_whitespace (see below) with an inlined check.
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci */
247db96d56Sopenharmony_ci#define Py_UNICODE_ISSPACE(ch) \
257db96d56Sopenharmony_ci    ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
287db96d56Sopenharmony_ci#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
297db96d56Sopenharmony_ci#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
307db96d56Sopenharmony_ci#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
337db96d56Sopenharmony_ci#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
347db96d56Sopenharmony_ci#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
357db96d56Sopenharmony_ci
367db96d56Sopenharmony_ci#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
377db96d56Sopenharmony_ci#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
387db96d56Sopenharmony_ci#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
397db96d56Sopenharmony_ci#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
427db96d56Sopenharmony_ci#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
437db96d56Sopenharmony_ci#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci#define Py_UNICODE_ISALNUM(ch) \
487db96d56Sopenharmony_ci   (Py_UNICODE_ISALPHA(ch) || \
497db96d56Sopenharmony_ci    Py_UNICODE_ISDECIMAL(ch) || \
507db96d56Sopenharmony_ci    Py_UNICODE_ISDIGIT(ch) || \
517db96d56Sopenharmony_ci    Py_UNICODE_ISNUMERIC(ch))
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci/* macros to work with surrogates */
547db96d56Sopenharmony_ci#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
557db96d56Sopenharmony_ci#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
567db96d56Sopenharmony_ci#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
577db96d56Sopenharmony_ci/* Join two surrogate characters and return a single Py_UCS4 value. */
587db96d56Sopenharmony_ci#define Py_UNICODE_JOIN_SURROGATES(high, low)  \
597db96d56Sopenharmony_ci    (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
607db96d56Sopenharmony_ci      ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
617db96d56Sopenharmony_ci/* high surrogate = top 10 bits added to D800 */
627db96d56Sopenharmony_ci#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
637db96d56Sopenharmony_ci/* low surrogate = bottom 10 bits added to DC00 */
647db96d56Sopenharmony_ci#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci/* --- Unicode Type ------------------------------------------------------- */
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
697db96d56Sopenharmony_ci   structure. state.ascii and state.compact are set, and the data
707db96d56Sopenharmony_ci   immediately follow the structure. utf8_length and wstr_length can be found
717db96d56Sopenharmony_ci   in the length field; the utf8 pointer is equal to the data pointer. */
727db96d56Sopenharmony_citypedef struct {
737db96d56Sopenharmony_ci    /* There are 4 forms of Unicode strings:
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci       - compact ascii:
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci         * structure = PyASCIIObject
787db96d56Sopenharmony_ci         * test: PyUnicode_IS_COMPACT_ASCII(op)
797db96d56Sopenharmony_ci         * kind = PyUnicode_1BYTE_KIND
807db96d56Sopenharmony_ci         * compact = 1
817db96d56Sopenharmony_ci         * ascii = 1
827db96d56Sopenharmony_ci         * ready = 1
837db96d56Sopenharmony_ci         * (length is the length of the utf8 and wstr strings)
847db96d56Sopenharmony_ci         * (data starts just after the structure)
857db96d56Sopenharmony_ci         * (since ASCII is decoded from UTF-8, the utf8 string are the data)
867db96d56Sopenharmony_ci
877db96d56Sopenharmony_ci       - compact:
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci         * structure = PyCompactUnicodeObject
907db96d56Sopenharmony_ci         * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
917db96d56Sopenharmony_ci         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
927db96d56Sopenharmony_ci           PyUnicode_4BYTE_KIND
937db96d56Sopenharmony_ci         * compact = 1
947db96d56Sopenharmony_ci         * ready = 1
957db96d56Sopenharmony_ci         * ascii = 0
967db96d56Sopenharmony_ci         * utf8 is not shared with data
977db96d56Sopenharmony_ci         * utf8_length = 0 if utf8 is NULL
987db96d56Sopenharmony_ci         * wstr is shared with data and wstr_length=length
997db96d56Sopenharmony_ci           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
1007db96d56Sopenharmony_ci           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
1017db96d56Sopenharmony_ci         * wstr_length = 0 if wstr is NULL
1027db96d56Sopenharmony_ci         * (data starts just after the structure)
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci       - legacy string, not ready:
1057db96d56Sopenharmony_ci
1067db96d56Sopenharmony_ci         * structure = PyUnicodeObject
1077db96d56Sopenharmony_ci         * test: kind == PyUnicode_WCHAR_KIND
1087db96d56Sopenharmony_ci         * length = 0 (use wstr_length)
1097db96d56Sopenharmony_ci         * hash = -1
1107db96d56Sopenharmony_ci         * kind = PyUnicode_WCHAR_KIND
1117db96d56Sopenharmony_ci         * compact = 0
1127db96d56Sopenharmony_ci         * ascii = 0
1137db96d56Sopenharmony_ci         * ready = 0
1147db96d56Sopenharmony_ci         * interned = SSTATE_NOT_INTERNED
1157db96d56Sopenharmony_ci         * wstr is not NULL
1167db96d56Sopenharmony_ci         * data.any is NULL
1177db96d56Sopenharmony_ci         * utf8 is NULL
1187db96d56Sopenharmony_ci         * utf8_length = 0
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci       - legacy string, ready:
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci         * structure = PyUnicodeObject structure
1237db96d56Sopenharmony_ci         * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
1247db96d56Sopenharmony_ci         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
1257db96d56Sopenharmony_ci           PyUnicode_4BYTE_KIND
1267db96d56Sopenharmony_ci         * compact = 0
1277db96d56Sopenharmony_ci         * ready = 1
1287db96d56Sopenharmony_ci         * data.any is not NULL
1297db96d56Sopenharmony_ci         * utf8 is shared and utf8_length = length with data.any if ascii = 1
1307db96d56Sopenharmony_ci         * utf8_length = 0 if utf8 is NULL
1317db96d56Sopenharmony_ci         * wstr is shared with data.any and wstr_length = length
1327db96d56Sopenharmony_ci           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
1337db96d56Sopenharmony_ci           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
1347db96d56Sopenharmony_ci         * wstr_length = 0 if wstr is NULL
1357db96d56Sopenharmony_ci
1367db96d56Sopenharmony_ci       Compact strings use only one memory block (structure + characters),
1377db96d56Sopenharmony_ci       whereas legacy strings use one block for the structure and one block
1387db96d56Sopenharmony_ci       for characters.
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci       Legacy strings are created by PyUnicode_FromUnicode() and
1417db96d56Sopenharmony_ci       PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
1427db96d56Sopenharmony_ci       when PyUnicode_READY() is called.
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci       See also _PyUnicode_CheckConsistency().
1457db96d56Sopenharmony_ci    */
1467db96d56Sopenharmony_ci    PyObject_HEAD
1477db96d56Sopenharmony_ci    Py_ssize_t length;          /* Number of code points in the string */
1487db96d56Sopenharmony_ci    Py_hash_t hash;             /* Hash value; -1 if not set */
1497db96d56Sopenharmony_ci    struct {
1507db96d56Sopenharmony_ci        /*
1517db96d56Sopenharmony_ci           SSTATE_NOT_INTERNED (0)
1527db96d56Sopenharmony_ci           SSTATE_INTERNED_MORTAL (1)
1537db96d56Sopenharmony_ci           SSTATE_INTERNED_IMMORTAL (2)
1547db96d56Sopenharmony_ci
1557db96d56Sopenharmony_ci           If interned != SSTATE_NOT_INTERNED, the two references from the
1567db96d56Sopenharmony_ci           dictionary to this object are *not* counted in ob_refcnt.
1577db96d56Sopenharmony_ci         */
1587db96d56Sopenharmony_ci        unsigned int interned:2;
1597db96d56Sopenharmony_ci        /* Character size:
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_ci           - PyUnicode_WCHAR_KIND (0):
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ci             * character type = wchar_t (16 or 32 bits, depending on the
1647db96d56Sopenharmony_ci               platform)
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_ci           - PyUnicode_1BYTE_KIND (1):
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci             * character type = Py_UCS1 (8 bits, unsigned)
1697db96d56Sopenharmony_ci             * all characters are in the range U+0000-U+00FF (latin1)
1707db96d56Sopenharmony_ci             * if ascii is set, all characters are in the range U+0000-U+007F
1717db96d56Sopenharmony_ci               (ASCII), otherwise at least one character is in the range
1727db96d56Sopenharmony_ci               U+0080-U+00FF
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ci           - PyUnicode_2BYTE_KIND (2):
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci             * character type = Py_UCS2 (16 bits, unsigned)
1777db96d56Sopenharmony_ci             * all characters are in the range U+0000-U+FFFF (BMP)
1787db96d56Sopenharmony_ci             * at least one character is in the range U+0100-U+FFFF
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci           - PyUnicode_4BYTE_KIND (4):
1817db96d56Sopenharmony_ci
1827db96d56Sopenharmony_ci             * character type = Py_UCS4 (32 bits, unsigned)
1837db96d56Sopenharmony_ci             * all characters are in the range U+0000-U+10FFFF
1847db96d56Sopenharmony_ci             * at least one character is in the range U+10000-U+10FFFF
1857db96d56Sopenharmony_ci         */
1867db96d56Sopenharmony_ci        unsigned int kind:3;
1877db96d56Sopenharmony_ci        /* Compact is with respect to the allocation scheme. Compact unicode
1887db96d56Sopenharmony_ci           objects only require one memory block while non-compact objects use
1897db96d56Sopenharmony_ci           one block for the PyUnicodeObject struct and another for its data
1907db96d56Sopenharmony_ci           buffer. */
1917db96d56Sopenharmony_ci        unsigned int compact:1;
1927db96d56Sopenharmony_ci        /* The string only contains characters in the range U+0000-U+007F (ASCII)
1937db96d56Sopenharmony_ci           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
1947db96d56Sopenharmony_ci           set, use the PyASCIIObject structure. */
1957db96d56Sopenharmony_ci        unsigned int ascii:1;
1967db96d56Sopenharmony_ci        /* The ready flag indicates whether the object layout is initialized
1977db96d56Sopenharmony_ci           completely. This means that this is either a compact object, or
1987db96d56Sopenharmony_ci           the data pointer is filled out. The bit is redundant, and helps
1997db96d56Sopenharmony_ci           to minimize the test in PyUnicode_IS_READY(). */
2007db96d56Sopenharmony_ci        unsigned int ready:1;
2017db96d56Sopenharmony_ci        /* Padding to ensure that PyUnicode_DATA() is always aligned to
2027db96d56Sopenharmony_ci           4 bytes (see issue #19537 on m68k). */
2037db96d56Sopenharmony_ci        unsigned int :24;
2047db96d56Sopenharmony_ci    } state;
2057db96d56Sopenharmony_ci    wchar_t *wstr;              /* wchar_t representation (null-terminated) */
2067db96d56Sopenharmony_ci} PyASCIIObject;
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci/* Non-ASCII strings allocated through PyUnicode_New use the
2097db96d56Sopenharmony_ci   PyCompactUnicodeObject structure. state.compact is set, and the data
2107db96d56Sopenharmony_ci   immediately follow the structure. */
2117db96d56Sopenharmony_citypedef struct {
2127db96d56Sopenharmony_ci    PyASCIIObject _base;
2137db96d56Sopenharmony_ci    Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
2147db96d56Sopenharmony_ci                                 * terminating \0. */
2157db96d56Sopenharmony_ci    char *utf8;                 /* UTF-8 representation (null-terminated) */
2167db96d56Sopenharmony_ci    Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
2177db96d56Sopenharmony_ci                                 * surrogates count as two code points. */
2187db96d56Sopenharmony_ci} PyCompactUnicodeObject;
2197db96d56Sopenharmony_ci
2207db96d56Sopenharmony_ci/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
2217db96d56Sopenharmony_ci   PyUnicodeObject structure. The actual string data is initially in the wstr
2227db96d56Sopenharmony_ci   block, and copied into the data block using _PyUnicode_Ready. */
2237db96d56Sopenharmony_citypedef struct {
2247db96d56Sopenharmony_ci    PyCompactUnicodeObject _base;
2257db96d56Sopenharmony_ci    union {
2267db96d56Sopenharmony_ci        void *any;
2277db96d56Sopenharmony_ci        Py_UCS1 *latin1;
2287db96d56Sopenharmony_ci        Py_UCS2 *ucs2;
2297db96d56Sopenharmony_ci        Py_UCS4 *ucs4;
2307db96d56Sopenharmony_ci    } data;                     /* Canonical, smallest-form Unicode buffer */
2317db96d56Sopenharmony_ci} PyUnicodeObject;
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_CheckConsistency(
2347db96d56Sopenharmony_ci    PyObject *op,
2357db96d56Sopenharmony_ci    int check_content);
2367db96d56Sopenharmony_ci
2377db96d56Sopenharmony_ci
2387db96d56Sopenharmony_ci#define _PyASCIIObject_CAST(op) \
2397db96d56Sopenharmony_ci    (assert(PyUnicode_Check(op)), \
2407db96d56Sopenharmony_ci     _Py_CAST(PyASCIIObject*, (op)))
2417db96d56Sopenharmony_ci#define _PyCompactUnicodeObject_CAST(op) \
2427db96d56Sopenharmony_ci    (assert(PyUnicode_Check(op)), \
2437db96d56Sopenharmony_ci     _Py_CAST(PyCompactUnicodeObject*, (op)))
2447db96d56Sopenharmony_ci#define _PyUnicodeObject_CAST(op) \
2457db96d56Sopenharmony_ci    (assert(PyUnicode_Check(op)), \
2467db96d56Sopenharmony_ci     _Py_CAST(PyUnicodeObject*, (op)))
2477db96d56Sopenharmony_ci
2487db96d56Sopenharmony_ci
2497db96d56Sopenharmony_ci/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
2507db96d56Sopenharmony_ci
2517db96d56Sopenharmony_ci/* Values for PyASCIIObject.state: */
2527db96d56Sopenharmony_ci
2537db96d56Sopenharmony_ci/* Interning state. */
2547db96d56Sopenharmony_ci#define SSTATE_NOT_INTERNED 0
2557db96d56Sopenharmony_ci#define SSTATE_INTERNED_MORTAL 1
2567db96d56Sopenharmony_ci#define SSTATE_INTERNED_IMMORTAL 2
2577db96d56Sopenharmony_ci
2587db96d56Sopenharmony_ci/* Use only if you know it's a string */
2597db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
2607db96d56Sopenharmony_ci    return _PyASCIIObject_CAST(op)->state.interned;
2617db96d56Sopenharmony_ci}
2627db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
2637db96d56Sopenharmony_ci#  define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))
2647db96d56Sopenharmony_ci#endif
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci/* Fast check to determine whether an object is ready. Equivalent to:
2677db96d56Sopenharmony_ci   PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */
2687db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_READY(PyObject *op) {
2697db96d56Sopenharmony_ci    return _PyASCIIObject_CAST(op)->state.ready;
2707db96d56Sopenharmony_ci}
2717db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
2727db96d56Sopenharmony_ci#  define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op))
2737db96d56Sopenharmony_ci#endif
2747db96d56Sopenharmony_ci
2757db96d56Sopenharmony_ci/* Return true if the string contains only ASCII characters, or 0 if not. The
2767db96d56Sopenharmony_ci   string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
2777db96d56Sopenharmony_ci   ready. */
2787db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_ASCII(PyObject *op) {
2797db96d56Sopenharmony_ci    assert(PyUnicode_IS_READY(op));
2807db96d56Sopenharmony_ci    return _PyASCIIObject_CAST(op)->state.ascii;
2817db96d56Sopenharmony_ci}
2827db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
2837db96d56Sopenharmony_ci#  define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op))
2847db96d56Sopenharmony_ci#endif
2857db96d56Sopenharmony_ci
2867db96d56Sopenharmony_ci/* Return true if the string is compact or 0 if not.
2877db96d56Sopenharmony_ci   No type checks or Ready calls are performed. */
2887db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) {
2897db96d56Sopenharmony_ci    return _PyASCIIObject_CAST(op)->state.compact;
2907db96d56Sopenharmony_ci}
2917db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
2927db96d56Sopenharmony_ci#  define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op))
2937db96d56Sopenharmony_ci#endif
2947db96d56Sopenharmony_ci
2957db96d56Sopenharmony_ci/* Return true if the string is a compact ASCII string (use PyASCIIObject
2967db96d56Sopenharmony_ci   structure), or 0 if not.  No type checks or Ready calls are performed. */
2977db96d56Sopenharmony_cistatic inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
2987db96d56Sopenharmony_ci    return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op));
2997db96d56Sopenharmony_ci}
3007db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
3017db96d56Sopenharmony_ci#  define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op))
3027db96d56Sopenharmony_ci#endif
3037db96d56Sopenharmony_ci
3047db96d56Sopenharmony_cienum PyUnicode_Kind {
3057db96d56Sopenharmony_ci/* String contains only wstr byte characters.  This is only possible
3067db96d56Sopenharmony_ci   when the string was created with a legacy API and _PyUnicode_Ready()
3077db96d56Sopenharmony_ci   has not been called yet.  */
3087db96d56Sopenharmony_ci    PyUnicode_WCHAR_KIND = 0,
3097db96d56Sopenharmony_ci/* Return values of the PyUnicode_KIND() function: */
3107db96d56Sopenharmony_ci    PyUnicode_1BYTE_KIND = 1,
3117db96d56Sopenharmony_ci    PyUnicode_2BYTE_KIND = 2,
3127db96d56Sopenharmony_ci    PyUnicode_4BYTE_KIND = 4
3137db96d56Sopenharmony_ci};
3147db96d56Sopenharmony_ci
3157db96d56Sopenharmony_ci/* Return one of the PyUnicode_*_KIND values defined above. */
3167db96d56Sopenharmony_ci#define PyUnicode_KIND(op) \
3177db96d56Sopenharmony_ci    (assert(PyUnicode_IS_READY(op)), \
3187db96d56Sopenharmony_ci     _PyASCIIObject_CAST(op)->state.kind)
3197db96d56Sopenharmony_ci
3207db96d56Sopenharmony_ci/* Return a void pointer to the raw unicode buffer. */
3217db96d56Sopenharmony_cistatic inline void* _PyUnicode_COMPACT_DATA(PyObject *op) {
3227db96d56Sopenharmony_ci    if (PyUnicode_IS_ASCII(op)) {
3237db96d56Sopenharmony_ci        return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1));
3247db96d56Sopenharmony_ci    }
3257db96d56Sopenharmony_ci    return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1));
3267db96d56Sopenharmony_ci}
3277db96d56Sopenharmony_ci
3287db96d56Sopenharmony_cistatic inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) {
3297db96d56Sopenharmony_ci    void *data;
3307db96d56Sopenharmony_ci    assert(!PyUnicode_IS_COMPACT(op));
3317db96d56Sopenharmony_ci    data = _PyUnicodeObject_CAST(op)->data.any;
3327db96d56Sopenharmony_ci    assert(data != NULL);
3337db96d56Sopenharmony_ci    return data;
3347db96d56Sopenharmony_ci}
3357db96d56Sopenharmony_ci
3367db96d56Sopenharmony_cistatic inline void* PyUnicode_DATA(PyObject *op) {
3377db96d56Sopenharmony_ci    if (PyUnicode_IS_COMPACT(op)) {
3387db96d56Sopenharmony_ci        return _PyUnicode_COMPACT_DATA(op);
3397db96d56Sopenharmony_ci    }
3407db96d56Sopenharmony_ci    return _PyUnicode_NONCOMPACT_DATA(op);
3417db96d56Sopenharmony_ci}
3427db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
3437db96d56Sopenharmony_ci#  define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op))
3447db96d56Sopenharmony_ci#endif
3457db96d56Sopenharmony_ci
3467db96d56Sopenharmony_ci/* Return pointers to the canonical representation cast to unsigned char,
3477db96d56Sopenharmony_ci   Py_UCS2, or Py_UCS4 for direct character access.
3487db96d56Sopenharmony_ci   No checks are performed, use PyUnicode_KIND() before to ensure
3497db96d56Sopenharmony_ci   these will work correctly. */
3507db96d56Sopenharmony_ci
3517db96d56Sopenharmony_ci#define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op))
3527db96d56Sopenharmony_ci#define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op))
3537db96d56Sopenharmony_ci#define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op))
3547db96d56Sopenharmony_ci
3557db96d56Sopenharmony_ci/* Returns the length of the unicode string. The caller has to make sure that
3567db96d56Sopenharmony_ci   the string has it's canonical representation set before calling
3577db96d56Sopenharmony_ci   this function.  Call PyUnicode_(FAST_)Ready to ensure that. */
3587db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
3597db96d56Sopenharmony_ci    assert(PyUnicode_IS_READY(op));
3607db96d56Sopenharmony_ci    return _PyASCIIObject_CAST(op)->length;
3617db96d56Sopenharmony_ci}
3627db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
3637db96d56Sopenharmony_ci#  define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
3647db96d56Sopenharmony_ci#endif
3657db96d56Sopenharmony_ci
3667db96d56Sopenharmony_ci/* Write into the canonical representation, this function does not do any sanity
3677db96d56Sopenharmony_ci   checks and is intended for usage in loops.  The caller should cache the
3687db96d56Sopenharmony_ci   kind and data pointers obtained from other function calls.
3697db96d56Sopenharmony_ci   index is the index in the string (starts at 0) and value is the new
3707db96d56Sopenharmony_ci   code point value which should be written to that location. */
3717db96d56Sopenharmony_cistatic inline void PyUnicode_WRITE(int kind, void *data,
3727db96d56Sopenharmony_ci                                   Py_ssize_t index, Py_UCS4 value)
3737db96d56Sopenharmony_ci{
3747db96d56Sopenharmony_ci    if (kind == PyUnicode_1BYTE_KIND) {
3757db96d56Sopenharmony_ci        assert(value <= 0xffU);
3767db96d56Sopenharmony_ci        _Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value);
3777db96d56Sopenharmony_ci    }
3787db96d56Sopenharmony_ci    else if (kind == PyUnicode_2BYTE_KIND) {
3797db96d56Sopenharmony_ci        assert(value <= 0xffffU);
3807db96d56Sopenharmony_ci        _Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value);
3817db96d56Sopenharmony_ci    }
3827db96d56Sopenharmony_ci    else {
3837db96d56Sopenharmony_ci        assert(kind == PyUnicode_4BYTE_KIND);
3847db96d56Sopenharmony_ci        assert(value <= 0x10ffffU);
3857db96d56Sopenharmony_ci        _Py_STATIC_CAST(Py_UCS4*, data)[index] = value;
3867db96d56Sopenharmony_ci    }
3877db96d56Sopenharmony_ci}
3887db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
3897db96d56Sopenharmony_ci#define PyUnicode_WRITE(kind, data, index, value) \
3907db96d56Sopenharmony_ci    PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \
3917db96d56Sopenharmony_ci                    (index), _Py_STATIC_CAST(Py_UCS4, value))
3927db96d56Sopenharmony_ci#endif
3937db96d56Sopenharmony_ci
3947db96d56Sopenharmony_ci/* Read a code point from the string's canonical representation.  No checks
3957db96d56Sopenharmony_ci   or ready calls are performed. */
3967db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_READ(int kind,
3977db96d56Sopenharmony_ci                                     const void *data, Py_ssize_t index)
3987db96d56Sopenharmony_ci{
3997db96d56Sopenharmony_ci    if (kind == PyUnicode_1BYTE_KIND) {
4007db96d56Sopenharmony_ci        return _Py_STATIC_CAST(const Py_UCS1*, data)[index];
4017db96d56Sopenharmony_ci    }
4027db96d56Sopenharmony_ci    if (kind == PyUnicode_2BYTE_KIND) {
4037db96d56Sopenharmony_ci        return _Py_STATIC_CAST(const Py_UCS2*, data)[index];
4047db96d56Sopenharmony_ci    }
4057db96d56Sopenharmony_ci    assert(kind == PyUnicode_4BYTE_KIND);
4067db96d56Sopenharmony_ci    return _Py_STATIC_CAST(const Py_UCS4*, data)[index];
4077db96d56Sopenharmony_ci}
4087db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
4097db96d56Sopenharmony_ci#define PyUnicode_READ(kind, data, index) \
4107db96d56Sopenharmony_ci    PyUnicode_READ(_Py_STATIC_CAST(int, kind), \
4117db96d56Sopenharmony_ci                   _Py_STATIC_CAST(const void*, data), \
4127db96d56Sopenharmony_ci                   (index))
4137db96d56Sopenharmony_ci#endif
4147db96d56Sopenharmony_ci
4157db96d56Sopenharmony_ci/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
4167db96d56Sopenharmony_ci   calls PyUnicode_KIND() and might call it twice.  For single reads, use
4177db96d56Sopenharmony_ci   PyUnicode_READ_CHAR, for multiple consecutive reads callers should
4187db96d56Sopenharmony_ci   cache kind and use PyUnicode_READ instead. */
4197db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index)
4207db96d56Sopenharmony_ci{
4217db96d56Sopenharmony_ci    int kind;
4227db96d56Sopenharmony_ci    assert(PyUnicode_IS_READY(unicode));
4237db96d56Sopenharmony_ci    kind = PyUnicode_KIND(unicode);
4247db96d56Sopenharmony_ci    if (kind == PyUnicode_1BYTE_KIND) {
4257db96d56Sopenharmony_ci        return PyUnicode_1BYTE_DATA(unicode)[index];
4267db96d56Sopenharmony_ci    }
4277db96d56Sopenharmony_ci    if (kind == PyUnicode_2BYTE_KIND) {
4287db96d56Sopenharmony_ci        return PyUnicode_2BYTE_DATA(unicode)[index];
4297db96d56Sopenharmony_ci    }
4307db96d56Sopenharmony_ci    assert(kind == PyUnicode_4BYTE_KIND);
4317db96d56Sopenharmony_ci    return PyUnicode_4BYTE_DATA(unicode)[index];
4327db96d56Sopenharmony_ci}
4337db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
4347db96d56Sopenharmony_ci#  define PyUnicode_READ_CHAR(unicode, index) \
4357db96d56Sopenharmony_ci       PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index))
4367db96d56Sopenharmony_ci#endif
4377db96d56Sopenharmony_ci
4387db96d56Sopenharmony_ci/* Return a maximum character value which is suitable for creating another
4397db96d56Sopenharmony_ci   string based on op.  This is always an approximation but more efficient
4407db96d56Sopenharmony_ci   than iterating over the string. */
4417db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
4427db96d56Sopenharmony_ci{
4437db96d56Sopenharmony_ci    int kind;
4447db96d56Sopenharmony_ci
4457db96d56Sopenharmony_ci    assert(PyUnicode_IS_READY(op));
4467db96d56Sopenharmony_ci    if (PyUnicode_IS_ASCII(op)) {
4477db96d56Sopenharmony_ci        return 0x7fU;
4487db96d56Sopenharmony_ci    }
4497db96d56Sopenharmony_ci
4507db96d56Sopenharmony_ci    kind = PyUnicode_KIND(op);
4517db96d56Sopenharmony_ci    if (kind == PyUnicode_1BYTE_KIND) {
4527db96d56Sopenharmony_ci       return 0xffU;
4537db96d56Sopenharmony_ci    }
4547db96d56Sopenharmony_ci    if (kind == PyUnicode_2BYTE_KIND) {
4557db96d56Sopenharmony_ci        return 0xffffU;
4567db96d56Sopenharmony_ci    }
4577db96d56Sopenharmony_ci    assert(kind == PyUnicode_4BYTE_KIND);
4587db96d56Sopenharmony_ci    return 0x10ffffU;
4597db96d56Sopenharmony_ci}
4607db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
4617db96d56Sopenharmony_ci#  define PyUnicode_MAX_CHAR_VALUE(op) \
4627db96d56Sopenharmony_ci       PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op))
4637db96d56Sopenharmony_ci#endif
4647db96d56Sopenharmony_ci
4657db96d56Sopenharmony_ci/* === Public API ========================================================= */
4667db96d56Sopenharmony_ci
4677db96d56Sopenharmony_ci/* --- Plain Py_UNICODE --------------------------------------------------- */
4687db96d56Sopenharmony_ci
4697db96d56Sopenharmony_ci/* With PEP 393, this is the recommended way to allocate a new unicode object.
4707db96d56Sopenharmony_ci   This function will allocate the object and its buffer in a single memory
4717db96d56Sopenharmony_ci   block.  Objects created using this function are not resizable. */
4727db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) PyUnicode_New(
4737db96d56Sopenharmony_ci    Py_ssize_t size,            /* Number of code points in the new string */
4747db96d56Sopenharmony_ci    Py_UCS4 maxchar             /* maximum code point value in the string */
4757db96d56Sopenharmony_ci    );
4767db96d56Sopenharmony_ci
4777db96d56Sopenharmony_ci/* Initializes the canonical string representation from the deprecated
4787db96d56Sopenharmony_ci   wstr/Py_UNICODE representation. This function is used to convert Unicode
4797db96d56Sopenharmony_ci   objects which were created using the old API to the new flexible format
4807db96d56Sopenharmony_ci   introduced with PEP 393.
4817db96d56Sopenharmony_ci
4827db96d56Sopenharmony_ci   Don't call this function directly, use the public PyUnicode_READY() function
4837db96d56Sopenharmony_ci   instead. */
4847db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_Ready(
4857db96d56Sopenharmony_ci    PyObject *unicode           /* Unicode object */
4867db96d56Sopenharmony_ci    );
4877db96d56Sopenharmony_ci
4887db96d56Sopenharmony_ci/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
4897db96d56Sopenharmony_ci   case.  If the canonical representation is not yet set, it will still call
4907db96d56Sopenharmony_ci   _PyUnicode_Ready().
4917db96d56Sopenharmony_ci   Returns 0 on success and -1 on errors. */
4927db96d56Sopenharmony_cistatic inline int PyUnicode_READY(PyObject *op)
4937db96d56Sopenharmony_ci{
4947db96d56Sopenharmony_ci    if (PyUnicode_IS_READY(op)) {
4957db96d56Sopenharmony_ci        return 0;
4967db96d56Sopenharmony_ci    }
4977db96d56Sopenharmony_ci    return _PyUnicode_Ready(op);
4987db96d56Sopenharmony_ci}
4997db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
5007db96d56Sopenharmony_ci#  define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op))
5017db96d56Sopenharmony_ci#endif
5027db96d56Sopenharmony_ci
5037db96d56Sopenharmony_ci/* Get a copy of a Unicode string. */
5047db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_Copy(
5057db96d56Sopenharmony_ci    PyObject *unicode
5067db96d56Sopenharmony_ci    );
5077db96d56Sopenharmony_ci
5087db96d56Sopenharmony_ci/* Copy character from one unicode object into another, this function performs
5097db96d56Sopenharmony_ci   character conversion when necessary and falls back to memcpy() if possible.
5107db96d56Sopenharmony_ci
5117db96d56Sopenharmony_ci   Fail if to is too small (smaller than *how_many* or smaller than
5127db96d56Sopenharmony_ci   len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
5137db96d56Sopenharmony_ci   kind(to), or if *to* has more than 1 reference.
5147db96d56Sopenharmony_ci
5157db96d56Sopenharmony_ci   Return the number of written character, or return -1 and raise an exception
5167db96d56Sopenharmony_ci   on error.
5177db96d56Sopenharmony_ci
5187db96d56Sopenharmony_ci   Pseudo-code:
5197db96d56Sopenharmony_ci
5207db96d56Sopenharmony_ci       how_many = min(how_many, len(from) - from_start)
5217db96d56Sopenharmony_ci       to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
5227db96d56Sopenharmony_ci       return how_many
5237db96d56Sopenharmony_ci
5247db96d56Sopenharmony_ci   Note: The function doesn't write a terminating null character.
5257db96d56Sopenharmony_ci   */
5267db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
5277db96d56Sopenharmony_ci    PyObject *to,
5287db96d56Sopenharmony_ci    Py_ssize_t to_start,
5297db96d56Sopenharmony_ci    PyObject *from,
5307db96d56Sopenharmony_ci    Py_ssize_t from_start,
5317db96d56Sopenharmony_ci    Py_ssize_t how_many
5327db96d56Sopenharmony_ci    );
5337db96d56Sopenharmony_ci
5347db96d56Sopenharmony_ci/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
5357db96d56Sopenharmony_ci   may crash if parameters are invalid (e.g. if the output string
5367db96d56Sopenharmony_ci   is too short). */
5377db96d56Sopenharmony_ciPyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
5387db96d56Sopenharmony_ci    PyObject *to,
5397db96d56Sopenharmony_ci    Py_ssize_t to_start,
5407db96d56Sopenharmony_ci    PyObject *from,
5417db96d56Sopenharmony_ci    Py_ssize_t from_start,
5427db96d56Sopenharmony_ci    Py_ssize_t how_many
5437db96d56Sopenharmony_ci    );
5447db96d56Sopenharmony_ci
5457db96d56Sopenharmony_ci/* Fill a string with a character: write fill_char into
5467db96d56Sopenharmony_ci   unicode[start:start+length].
5477db96d56Sopenharmony_ci
5487db96d56Sopenharmony_ci   Fail if fill_char is bigger than the string maximum character, or if the
5497db96d56Sopenharmony_ci   string has more than 1 reference.
5507db96d56Sopenharmony_ci
5517db96d56Sopenharmony_ci   Return the number of written character, or return -1 and raise an exception
5527db96d56Sopenharmony_ci   on error. */
5537db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
5547db96d56Sopenharmony_ci    PyObject *unicode,
5557db96d56Sopenharmony_ci    Py_ssize_t start,
5567db96d56Sopenharmony_ci    Py_ssize_t length,
5577db96d56Sopenharmony_ci    Py_UCS4 fill_char
5587db96d56Sopenharmony_ci    );
5597db96d56Sopenharmony_ci
5607db96d56Sopenharmony_ci/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
5617db96d56Sopenharmony_ci   if parameters are invalid (e.g. if length is longer than the string). */
5627db96d56Sopenharmony_ciPyAPI_FUNC(void) _PyUnicode_FastFill(
5637db96d56Sopenharmony_ci    PyObject *unicode,
5647db96d56Sopenharmony_ci    Py_ssize_t start,
5657db96d56Sopenharmony_ci    Py_ssize_t length,
5667db96d56Sopenharmony_ci    Py_UCS4 fill_char
5677db96d56Sopenharmony_ci    );
5687db96d56Sopenharmony_ci
5697db96d56Sopenharmony_ci/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
5707db96d56Sopenharmony_ci   Scan the string to find the maximum character. */
5717db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
5727db96d56Sopenharmony_ci    int kind,
5737db96d56Sopenharmony_ci    const void *buffer,
5747db96d56Sopenharmony_ci    Py_ssize_t size);
5757db96d56Sopenharmony_ci
5767db96d56Sopenharmony_ci/* Create a new string from a buffer of ASCII characters.
5777db96d56Sopenharmony_ci   WARNING: Don't check if the string contains any non-ASCII character. */
5787db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
5797db96d56Sopenharmony_ci    const char *buffer,
5807db96d56Sopenharmony_ci    Py_ssize_t size);
5817db96d56Sopenharmony_ci
5827db96d56Sopenharmony_ci/* Compute the maximum character of the substring unicode[start:end].
5837db96d56Sopenharmony_ci   Return 127 for an empty string. */
5847db96d56Sopenharmony_ciPyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
5857db96d56Sopenharmony_ci    PyObject *unicode,
5867db96d56Sopenharmony_ci    Py_ssize_t start,
5877db96d56Sopenharmony_ci    Py_ssize_t end);
5887db96d56Sopenharmony_ci
5897db96d56Sopenharmony_ci/* --- Legacy deprecated API ---------------------------------------------- */
5907db96d56Sopenharmony_ci
5917db96d56Sopenharmony_ci/* Create a Unicode Object from the Py_UNICODE buffer u of the given
5927db96d56Sopenharmony_ci   size.
5937db96d56Sopenharmony_ci
5947db96d56Sopenharmony_ci   u may be NULL which causes the contents to be undefined. It is the
5957db96d56Sopenharmony_ci   user's responsibility to fill in the needed data afterwards. Note
5967db96d56Sopenharmony_ci   that modifying the Unicode object contents after construction is
5977db96d56Sopenharmony_ci   only allowed if u was set to NULL.
5987db96d56Sopenharmony_ci
5997db96d56Sopenharmony_ci   The buffer is copied into the new object. */
6007db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
6017db96d56Sopenharmony_ci    const Py_UNICODE *u,        /* Unicode buffer */
6027db96d56Sopenharmony_ci    Py_ssize_t size             /* size of buffer */
6037db96d56Sopenharmony_ci    );
6047db96d56Sopenharmony_ci
6057db96d56Sopenharmony_ci/* Return a read-only pointer to the Unicode object's internal
6067db96d56Sopenharmony_ci   Py_UNICODE buffer.
6077db96d56Sopenharmony_ci   If the wchar_t/Py_UNICODE representation is not yet available, this
6087db96d56Sopenharmony_ci   function will calculate it. */
6097db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
6107db96d56Sopenharmony_ci    PyObject *unicode           /* Unicode object */
6117db96d56Sopenharmony_ci    );
6127db96d56Sopenharmony_ci
6137db96d56Sopenharmony_ci/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
6147db96d56Sopenharmony_ci   contains null characters. */
6157db96d56Sopenharmony_ciPyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
6167db96d56Sopenharmony_ci    PyObject *unicode           /* Unicode object */
6177db96d56Sopenharmony_ci    );
6187db96d56Sopenharmony_ci
6197db96d56Sopenharmony_ci/* Return a read-only pointer to the Unicode object's internal
6207db96d56Sopenharmony_ci   Py_UNICODE buffer and save the length at size.
6217db96d56Sopenharmony_ci   If the wchar_t/Py_UNICODE representation is not yet available, this
6227db96d56Sopenharmony_ci   function will calculate it. */
6237db96d56Sopenharmony_ci
6247db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
6257db96d56Sopenharmony_ci    PyObject *unicode,          /* Unicode object */
6267db96d56Sopenharmony_ci    Py_ssize_t *size            /* location where to save the length */
6277db96d56Sopenharmony_ci    );
6287db96d56Sopenharmony_ci
6297db96d56Sopenharmony_ci
6307db96d56Sopenharmony_ci/* Fast access macros */
6317db96d56Sopenharmony_ci
6327db96d56Sopenharmony_ciPy_DEPRECATED(3.3)
6337db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op)
6347db96d56Sopenharmony_ci{
6357db96d56Sopenharmony_ci    if (PyUnicode_IS_COMPACT_ASCII(op)) {
6367db96d56Sopenharmony_ci        return _PyASCIIObject_CAST(op)->length;
6377db96d56Sopenharmony_ci    }
6387db96d56Sopenharmony_ci    else {
6397db96d56Sopenharmony_ci        return _PyCompactUnicodeObject_CAST(op)->wstr_length;
6407db96d56Sopenharmony_ci    }
6417db96d56Sopenharmony_ci}
6427db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
6437db96d56Sopenharmony_ci#  define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op))
6447db96d56Sopenharmony_ci#endif
6457db96d56Sopenharmony_ci
6467db96d56Sopenharmony_ci/* Returns the deprecated Py_UNICODE representation's size in code units
6477db96d56Sopenharmony_ci   (this includes surrogate pairs as 2 units).
6487db96d56Sopenharmony_ci   If the Py_UNICODE representation is not available, it will be computed
6497db96d56Sopenharmony_ci   on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
6507db96d56Sopenharmony_ci
6517db96d56Sopenharmony_ciPy_DEPRECATED(3.3)
6527db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op)
6537db96d56Sopenharmony_ci{
6547db96d56Sopenharmony_ci    _Py_COMP_DIAG_PUSH
6557db96d56Sopenharmony_ci    _Py_COMP_DIAG_IGNORE_DEPR_DECLS
6567db96d56Sopenharmony_ci    if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) {
6577db96d56Sopenharmony_ci        (void)PyUnicode_AsUnicode(op);
6587db96d56Sopenharmony_ci        assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL);
6597db96d56Sopenharmony_ci    }
6607db96d56Sopenharmony_ci    return PyUnicode_WSTR_LENGTH(op);
6617db96d56Sopenharmony_ci    _Py_COMP_DIAG_POP
6627db96d56Sopenharmony_ci}
6637db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
6647db96d56Sopenharmony_ci#  define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op))
6657db96d56Sopenharmony_ci#endif
6667db96d56Sopenharmony_ci
6677db96d56Sopenharmony_ciPy_DEPRECATED(3.3)
6687db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op)
6697db96d56Sopenharmony_ci{
6707db96d56Sopenharmony_ci    _Py_COMP_DIAG_PUSH
6717db96d56Sopenharmony_ci    _Py_COMP_DIAG_IGNORE_DEPR_DECLS
6727db96d56Sopenharmony_ci    return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE;
6737db96d56Sopenharmony_ci    _Py_COMP_DIAG_POP
6747db96d56Sopenharmony_ci}
6757db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
6767db96d56Sopenharmony_ci#  define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op))
6777db96d56Sopenharmony_ci#endif
6787db96d56Sopenharmony_ci
6797db96d56Sopenharmony_ci/* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
6807db96d56Sopenharmony_ci   representation on demand.  Using this macro is very inefficient now,
6817db96d56Sopenharmony_ci   try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
6827db96d56Sopenharmony_ci   use PyUnicode_WRITE() and PyUnicode_READ(). */
6837db96d56Sopenharmony_ci
6847db96d56Sopenharmony_ciPy_DEPRECATED(3.3)
6857db96d56Sopenharmony_cistatic inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op)
6867db96d56Sopenharmony_ci{
6877db96d56Sopenharmony_ci    wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr;
6887db96d56Sopenharmony_ci    if (wstr != _Py_NULL) {
6897db96d56Sopenharmony_ci        return wstr;
6907db96d56Sopenharmony_ci    }
6917db96d56Sopenharmony_ci
6927db96d56Sopenharmony_ci    _Py_COMP_DIAG_PUSH
6937db96d56Sopenharmony_ci    _Py_COMP_DIAG_IGNORE_DEPR_DECLS
6947db96d56Sopenharmony_ci    return PyUnicode_AsUnicode(op);
6957db96d56Sopenharmony_ci    _Py_COMP_DIAG_POP
6967db96d56Sopenharmony_ci}
6977db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
6987db96d56Sopenharmony_ci#  define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op))
6997db96d56Sopenharmony_ci#endif
7007db96d56Sopenharmony_ci
7017db96d56Sopenharmony_ciPy_DEPRECATED(3.3)
7027db96d56Sopenharmony_cistatic inline const char* PyUnicode_AS_DATA(PyObject *op)
7037db96d56Sopenharmony_ci{
7047db96d56Sopenharmony_ci    _Py_COMP_DIAG_PUSH
7057db96d56Sopenharmony_ci    _Py_COMP_DIAG_IGNORE_DEPR_DECLS
7067db96d56Sopenharmony_ci    Py_UNICODE *data = PyUnicode_AS_UNICODE(op);
7077db96d56Sopenharmony_ci    // In C++, casting directly PyUnicode* to const char* is not valid
7087db96d56Sopenharmony_ci    return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data));
7097db96d56Sopenharmony_ci    _Py_COMP_DIAG_POP
7107db96d56Sopenharmony_ci}
7117db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
7127db96d56Sopenharmony_ci#  define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op))
7137db96d56Sopenharmony_ci#endif
7147db96d56Sopenharmony_ci
7157db96d56Sopenharmony_ci
7167db96d56Sopenharmony_ci/* --- _PyUnicodeWriter API ----------------------------------------------- */
7177db96d56Sopenharmony_ci
7187db96d56Sopenharmony_citypedef struct {
7197db96d56Sopenharmony_ci    PyObject *buffer;
7207db96d56Sopenharmony_ci    void *data;
7217db96d56Sopenharmony_ci    enum PyUnicode_Kind kind;
7227db96d56Sopenharmony_ci    Py_UCS4 maxchar;
7237db96d56Sopenharmony_ci    Py_ssize_t size;
7247db96d56Sopenharmony_ci    Py_ssize_t pos;
7257db96d56Sopenharmony_ci
7267db96d56Sopenharmony_ci    /* minimum number of allocated characters (default: 0) */
7277db96d56Sopenharmony_ci    Py_ssize_t min_length;
7287db96d56Sopenharmony_ci
7297db96d56Sopenharmony_ci    /* minimum character (default: 127, ASCII) */
7307db96d56Sopenharmony_ci    Py_UCS4 min_char;
7317db96d56Sopenharmony_ci
7327db96d56Sopenharmony_ci    /* If non-zero, overallocate the buffer (default: 0). */
7337db96d56Sopenharmony_ci    unsigned char overallocate;
7347db96d56Sopenharmony_ci
7357db96d56Sopenharmony_ci    /* If readonly is 1, buffer is a shared string (cannot be modified)
7367db96d56Sopenharmony_ci       and size is set to 0. */
7377db96d56Sopenharmony_ci    unsigned char readonly;
7387db96d56Sopenharmony_ci} _PyUnicodeWriter ;
7397db96d56Sopenharmony_ci
7407db96d56Sopenharmony_ci/* Initialize a Unicode writer.
7417db96d56Sopenharmony_ci *
7427db96d56Sopenharmony_ci * By default, the minimum buffer size is 0 character and overallocation is
7437db96d56Sopenharmony_ci * disabled. Set min_length, min_char and overallocate attributes to control
7447db96d56Sopenharmony_ci * the allocation of the buffer. */
7457db96d56Sopenharmony_ciPyAPI_FUNC(void)
7467db96d56Sopenharmony_ci_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
7477db96d56Sopenharmony_ci
7487db96d56Sopenharmony_ci/* Prepare the buffer to write 'length' characters
7497db96d56Sopenharmony_ci   with the specified maximum character.
7507db96d56Sopenharmony_ci
7517db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
7527db96d56Sopenharmony_ci#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
7537db96d56Sopenharmony_ci    (((MAXCHAR) <= (WRITER)->maxchar                                  \
7547db96d56Sopenharmony_ci      && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
7557db96d56Sopenharmony_ci     ? 0                                                              \
7567db96d56Sopenharmony_ci     : (((LENGTH) == 0)                                               \
7577db96d56Sopenharmony_ci        ? 0                                                           \
7587db96d56Sopenharmony_ci        : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
7597db96d56Sopenharmony_ci
7607db96d56Sopenharmony_ci/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
7617db96d56Sopenharmony_ci   instead. */
7627db96d56Sopenharmony_ciPyAPI_FUNC(int)
7637db96d56Sopenharmony_ci_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
7647db96d56Sopenharmony_ci                                 Py_ssize_t length, Py_UCS4 maxchar);
7657db96d56Sopenharmony_ci
7667db96d56Sopenharmony_ci/* Prepare the buffer to have at least the kind KIND.
7677db96d56Sopenharmony_ci   For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
7687db96d56Sopenharmony_ci   support characters in range U+000-U+FFFF.
7697db96d56Sopenharmony_ci
7707db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
7717db96d56Sopenharmony_ci#define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
7727db96d56Sopenharmony_ci    (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
7737db96d56Sopenharmony_ci     (KIND) <= (WRITER)->kind                                         \
7747db96d56Sopenharmony_ci     ? 0                                                              \
7757db96d56Sopenharmony_ci     : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
7767db96d56Sopenharmony_ci
7777db96d56Sopenharmony_ci/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
7787db96d56Sopenharmony_ci   macro instead. */
7797db96d56Sopenharmony_ciPyAPI_FUNC(int)
7807db96d56Sopenharmony_ci_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
7817db96d56Sopenharmony_ci                                     enum PyUnicode_Kind kind);
7827db96d56Sopenharmony_ci
7837db96d56Sopenharmony_ci/* Append a Unicode character.
7847db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
7857db96d56Sopenharmony_ciPyAPI_FUNC(int)
7867db96d56Sopenharmony_ci_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
7877db96d56Sopenharmony_ci    Py_UCS4 ch
7887db96d56Sopenharmony_ci    );
7897db96d56Sopenharmony_ci
7907db96d56Sopenharmony_ci/* Append a Unicode string.
7917db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
7927db96d56Sopenharmony_ciPyAPI_FUNC(int)
7937db96d56Sopenharmony_ci_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
7947db96d56Sopenharmony_ci    PyObject *str               /* Unicode string */
7957db96d56Sopenharmony_ci    );
7967db96d56Sopenharmony_ci
7977db96d56Sopenharmony_ci/* Append a substring of a Unicode string.
7987db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
7997db96d56Sopenharmony_ciPyAPI_FUNC(int)
8007db96d56Sopenharmony_ci_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
8017db96d56Sopenharmony_ci    PyObject *str,              /* Unicode string */
8027db96d56Sopenharmony_ci    Py_ssize_t start,
8037db96d56Sopenharmony_ci    Py_ssize_t end
8047db96d56Sopenharmony_ci    );
8057db96d56Sopenharmony_ci
8067db96d56Sopenharmony_ci/* Append an ASCII-encoded byte string.
8077db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
8087db96d56Sopenharmony_ciPyAPI_FUNC(int)
8097db96d56Sopenharmony_ci_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
8107db96d56Sopenharmony_ci    const char *str,           /* ASCII-encoded byte string */
8117db96d56Sopenharmony_ci    Py_ssize_t len             /* number of bytes, or -1 if unknown */
8127db96d56Sopenharmony_ci    );
8137db96d56Sopenharmony_ci
8147db96d56Sopenharmony_ci/* Append a latin1-encoded byte string.
8157db96d56Sopenharmony_ci   Return 0 on success, raise an exception and return -1 on error. */
8167db96d56Sopenharmony_ciPyAPI_FUNC(int)
8177db96d56Sopenharmony_ci_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
8187db96d56Sopenharmony_ci    const char *str,           /* latin1-encoded byte string */
8197db96d56Sopenharmony_ci    Py_ssize_t len             /* length in bytes */
8207db96d56Sopenharmony_ci    );
8217db96d56Sopenharmony_ci
8227db96d56Sopenharmony_ci/* Get the value of the writer as a Unicode string. Clear the
8237db96d56Sopenharmony_ci   buffer of the writer. Raise an exception and return NULL
8247db96d56Sopenharmony_ci   on error. */
8257db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *)
8267db96d56Sopenharmony_ci_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
8277db96d56Sopenharmony_ci
8287db96d56Sopenharmony_ci/* Deallocate memory of a writer (clear its internal buffer). */
8297db96d56Sopenharmony_ciPyAPI_FUNC(void)
8307db96d56Sopenharmony_ci_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
8317db96d56Sopenharmony_ci
8327db96d56Sopenharmony_ci
8337db96d56Sopenharmony_ci/* Format the object based on the format_spec, as defined in PEP 3101
8347db96d56Sopenharmony_ci   (Advanced String Formatting). */
8357db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
8367db96d56Sopenharmony_ci    _PyUnicodeWriter *writer,
8377db96d56Sopenharmony_ci    PyObject *obj,
8387db96d56Sopenharmony_ci    PyObject *format_spec,
8397db96d56Sopenharmony_ci    Py_ssize_t start,
8407db96d56Sopenharmony_ci    Py_ssize_t end);
8417db96d56Sopenharmony_ci
8427db96d56Sopenharmony_ci/* --- Manage the default encoding ---------------------------------------- */
8437db96d56Sopenharmony_ci
8447db96d56Sopenharmony_ci/* Returns a pointer to the default encoding (UTF-8) of the
8457db96d56Sopenharmony_ci   Unicode object unicode.
8467db96d56Sopenharmony_ci
8477db96d56Sopenharmony_ci   Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
8487db96d56Sopenharmony_ci   in the unicodeobject.
8497db96d56Sopenharmony_ci
8507db96d56Sopenharmony_ci   _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
8517db96d56Sopenharmony_ci   support the previous internal function with the same behaviour.
8527db96d56Sopenharmony_ci
8537db96d56Sopenharmony_ci   Use of this API is DEPRECATED since no size information can be
8547db96d56Sopenharmony_ci   extracted from the returned data.
8557db96d56Sopenharmony_ci*/
8567db96d56Sopenharmony_ci
8577db96d56Sopenharmony_ciPyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
8587db96d56Sopenharmony_ci
8597db96d56Sopenharmony_ci#define _PyUnicode_AsString PyUnicode_AsUTF8
8607db96d56Sopenharmony_ci
8617db96d56Sopenharmony_ci/* --- UTF-7 Codecs ------------------------------------------------------- */
8627db96d56Sopenharmony_ci
8637db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
8647db96d56Sopenharmony_ci    PyObject *unicode,          /* Unicode object */
8657db96d56Sopenharmony_ci    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
8667db96d56Sopenharmony_ci    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
8677db96d56Sopenharmony_ci    const char *errors          /* error handling */
8687db96d56Sopenharmony_ci    );
8697db96d56Sopenharmony_ci
8707db96d56Sopenharmony_ci/* --- UTF-8 Codecs ------------------------------------------------------- */
8717db96d56Sopenharmony_ci
8727db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
8737db96d56Sopenharmony_ci    PyObject *unicode,
8747db96d56Sopenharmony_ci    const char *errors);
8757db96d56Sopenharmony_ci
8767db96d56Sopenharmony_ci/* --- UTF-32 Codecs ------------------------------------------------------ */
8777db96d56Sopenharmony_ci
8787db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
8797db96d56Sopenharmony_ci    PyObject *object,           /* Unicode object */
8807db96d56Sopenharmony_ci    const char *errors,         /* error handling */
8817db96d56Sopenharmony_ci    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
8827db96d56Sopenharmony_ci    );
8837db96d56Sopenharmony_ci
8847db96d56Sopenharmony_ci/* --- UTF-16 Codecs ------------------------------------------------------ */
8857db96d56Sopenharmony_ci
8867db96d56Sopenharmony_ci/* Returns a Python string object holding the UTF-16 encoded value of
8877db96d56Sopenharmony_ci   the Unicode data.
8887db96d56Sopenharmony_ci
8897db96d56Sopenharmony_ci   If byteorder is not 0, output is written according to the following
8907db96d56Sopenharmony_ci   byte order:
8917db96d56Sopenharmony_ci
8927db96d56Sopenharmony_ci   byteorder == -1: little endian
8937db96d56Sopenharmony_ci   byteorder == 0:  native byte order (writes a BOM mark)
8947db96d56Sopenharmony_ci   byteorder == 1:  big endian
8957db96d56Sopenharmony_ci
8967db96d56Sopenharmony_ci   If byteorder is 0, the output string will always start with the
8977db96d56Sopenharmony_ci   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
8987db96d56Sopenharmony_ci   prepended.
8997db96d56Sopenharmony_ci*/
9007db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
9017db96d56Sopenharmony_ci    PyObject* unicode,          /* Unicode object */
9027db96d56Sopenharmony_ci    const char *errors,         /* error handling */
9037db96d56Sopenharmony_ci    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
9047db96d56Sopenharmony_ci    );
9057db96d56Sopenharmony_ci
9067db96d56Sopenharmony_ci/* --- Unicode-Escape Codecs ---------------------------------------------- */
9077db96d56Sopenharmony_ci
9087db96d56Sopenharmony_ci/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
9097db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
9107db96d56Sopenharmony_ci        const char *string,     /* Unicode-Escape encoded string */
9117db96d56Sopenharmony_ci        Py_ssize_t length,      /* size of string */
9127db96d56Sopenharmony_ci        const char *errors,     /* error handling */
9137db96d56Sopenharmony_ci        Py_ssize_t *consumed    /* bytes consumed */
9147db96d56Sopenharmony_ci);
9157db96d56Sopenharmony_ci/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
9167db96d56Sopenharmony_ci   chars. */
9177db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
9187db96d56Sopenharmony_ci        const char *string,     /* Unicode-Escape encoded string */
9197db96d56Sopenharmony_ci        Py_ssize_t length,      /* size of string */
9207db96d56Sopenharmony_ci        const char *errors,     /* error handling */
9217db96d56Sopenharmony_ci        Py_ssize_t *consumed,   /* bytes consumed */
9227db96d56Sopenharmony_ci        const char **first_invalid_escape  /* on return, points to first
9237db96d56Sopenharmony_ci                                              invalid escaped char in
9247db96d56Sopenharmony_ci                                              string. */
9257db96d56Sopenharmony_ci);
9267db96d56Sopenharmony_ci
9277db96d56Sopenharmony_ci/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
9287db96d56Sopenharmony_ci
9297db96d56Sopenharmony_ci/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
9307db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
9317db96d56Sopenharmony_ci        const char *string,     /* Unicode-Escape encoded string */
9327db96d56Sopenharmony_ci        Py_ssize_t length,      /* size of string */
9337db96d56Sopenharmony_ci        const char *errors,     /* error handling */
9347db96d56Sopenharmony_ci        Py_ssize_t *consumed    /* bytes consumed */
9357db96d56Sopenharmony_ci);
9367db96d56Sopenharmony_ci
9377db96d56Sopenharmony_ci/* --- Latin-1 Codecs ----------------------------------------------------- */
9387db96d56Sopenharmony_ci
9397db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
9407db96d56Sopenharmony_ci    PyObject* unicode,
9417db96d56Sopenharmony_ci    const char* errors);
9427db96d56Sopenharmony_ci
9437db96d56Sopenharmony_ci/* --- ASCII Codecs ------------------------------------------------------- */
9447db96d56Sopenharmony_ci
9457db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
9467db96d56Sopenharmony_ci    PyObject* unicode,
9477db96d56Sopenharmony_ci    const char* errors);
9487db96d56Sopenharmony_ci
9497db96d56Sopenharmony_ci/* --- Character Map Codecs ----------------------------------------------- */
9507db96d56Sopenharmony_ci
9517db96d56Sopenharmony_ci/* Translate an Unicode object by applying a character mapping table to
9527db96d56Sopenharmony_ci   it and return the resulting Unicode object.
9537db96d56Sopenharmony_ci
9547db96d56Sopenharmony_ci   The mapping table must map Unicode ordinal integers to Unicode strings,
9557db96d56Sopenharmony_ci   Unicode ordinal integers or None (causing deletion of the character).
9567db96d56Sopenharmony_ci
9577db96d56Sopenharmony_ci   Mapping tables may be dictionaries or sequences. Unmapped character
9587db96d56Sopenharmony_ci   ordinals (ones which cause a LookupError) are left untouched and
9597db96d56Sopenharmony_ci   are copied as-is.
9607db96d56Sopenharmony_ci*/
9617db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
9627db96d56Sopenharmony_ci    PyObject *unicode,          /* Unicode object */
9637db96d56Sopenharmony_ci    PyObject *mapping,          /* encoding mapping */
9647db96d56Sopenharmony_ci    const char *errors          /* error handling */
9657db96d56Sopenharmony_ci    );
9667db96d56Sopenharmony_ci
9677db96d56Sopenharmony_ci/* --- Decimal Encoder ---------------------------------------------------- */
9687db96d56Sopenharmony_ci
9697db96d56Sopenharmony_ci/* Coverts a Unicode object holding a decimal value to an ASCII string
9707db96d56Sopenharmony_ci   for using in int, float and complex parsers.
9717db96d56Sopenharmony_ci   Transforms code points that have decimal digit property to the
9727db96d56Sopenharmony_ci   corresponding ASCII digit code points.  Transforms spaces to ASCII.
9737db96d56Sopenharmony_ci   Transforms code points starting from the first non-ASCII code point that
9747db96d56Sopenharmony_ci   is neither a decimal digit nor a space to the end into '?'. */
9757db96d56Sopenharmony_ci
9767db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
9777db96d56Sopenharmony_ci    PyObject *unicode           /* Unicode object */
9787db96d56Sopenharmony_ci    );
9797db96d56Sopenharmony_ci
9807db96d56Sopenharmony_ci/* --- Methods & Slots ---------------------------------------------------- */
9817db96d56Sopenharmony_ci
9827db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
9837db96d56Sopenharmony_ci    PyObject *separator,
9847db96d56Sopenharmony_ci    PyObject *const *items,
9857db96d56Sopenharmony_ci    Py_ssize_t seqlen
9867db96d56Sopenharmony_ci    );
9877db96d56Sopenharmony_ci
9887db96d56Sopenharmony_ci/* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
9897db96d56Sopenharmony_ci   0 otherwise.  The right argument must be ASCII identifier.
9907db96d56Sopenharmony_ci   Any error occurs inside will be cleared before return. */
9917db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
9927db96d56Sopenharmony_ci    PyObject *left,             /* Left string */
9937db96d56Sopenharmony_ci    _Py_Identifier *right       /* Right identifier */
9947db96d56Sopenharmony_ci    );
9957db96d56Sopenharmony_ci
9967db96d56Sopenharmony_ci/* Test whether a unicode is equal to ASCII string.  Return 1 if true,
9977db96d56Sopenharmony_ci   0 otherwise.  The right argument must be ASCII-encoded string.
9987db96d56Sopenharmony_ci   Any error occurs inside will be cleared before return. */
9997db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
10007db96d56Sopenharmony_ci    PyObject *left,
10017db96d56Sopenharmony_ci    const char *right           /* ASCII-encoded string */
10027db96d56Sopenharmony_ci    );
10037db96d56Sopenharmony_ci
10047db96d56Sopenharmony_ci/* Externally visible for str.strip(unicode) */
10057db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
10067db96d56Sopenharmony_ci    PyObject *self,
10077db96d56Sopenharmony_ci    int striptype,
10087db96d56Sopenharmony_ci    PyObject *sepobj
10097db96d56Sopenharmony_ci    );
10107db96d56Sopenharmony_ci
10117db96d56Sopenharmony_ci/* Using explicit passed-in values, insert the thousands grouping
10127db96d56Sopenharmony_ci   into the string pointed to by buffer.  For the argument descriptions,
10137db96d56Sopenharmony_ci   see Objects/stringlib/localeutil.h */
10147db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
10157db96d56Sopenharmony_ci    _PyUnicodeWriter *writer,
10167db96d56Sopenharmony_ci    Py_ssize_t n_buffer,
10177db96d56Sopenharmony_ci    PyObject *digits,
10187db96d56Sopenharmony_ci    Py_ssize_t d_pos,
10197db96d56Sopenharmony_ci    Py_ssize_t n_digits,
10207db96d56Sopenharmony_ci    Py_ssize_t min_width,
10217db96d56Sopenharmony_ci    const char *grouping,
10227db96d56Sopenharmony_ci    PyObject *thousands_sep,
10237db96d56Sopenharmony_ci    Py_UCS4 *maxchar);
10247db96d56Sopenharmony_ci
10257db96d56Sopenharmony_ci/* === Characters Type APIs =============================================== */
10267db96d56Sopenharmony_ci
10277db96d56Sopenharmony_ci/* Helper array used by Py_UNICODE_ISSPACE(). */
10287db96d56Sopenharmony_ci
10297db96d56Sopenharmony_ciPyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
10307db96d56Sopenharmony_ci
10317db96d56Sopenharmony_ci/* These should not be used directly. Use the Py_UNICODE_IS* and
10327db96d56Sopenharmony_ci   Py_UNICODE_TO* macros instead.
10337db96d56Sopenharmony_ci
10347db96d56Sopenharmony_ci   These APIs are implemented in Objects/unicodectype.c.
10357db96d56Sopenharmony_ci
10367db96d56Sopenharmony_ci*/
10377db96d56Sopenharmony_ci
10387db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsLowercase(
10397db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10407db96d56Sopenharmony_ci    );
10417db96d56Sopenharmony_ci
10427db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsUppercase(
10437db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10447db96d56Sopenharmony_ci    );
10457db96d56Sopenharmony_ci
10467db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsTitlecase(
10477db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10487db96d56Sopenharmony_ci    );
10497db96d56Sopenharmony_ci
10507db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsXidStart(
10517db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10527db96d56Sopenharmony_ci    );
10537db96d56Sopenharmony_ci
10547db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsXidContinue(
10557db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10567db96d56Sopenharmony_ci    );
10577db96d56Sopenharmony_ci
10587db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsWhitespace(
10597db96d56Sopenharmony_ci    const Py_UCS4 ch         /* Unicode character */
10607db96d56Sopenharmony_ci    );
10617db96d56Sopenharmony_ci
10627db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsLinebreak(
10637db96d56Sopenharmony_ci    const Py_UCS4 ch         /* Unicode character */
10647db96d56Sopenharmony_ci    );
10657db96d56Sopenharmony_ci
10667db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
10677db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10687db96d56Sopenharmony_ci    );
10697db96d56Sopenharmony_ci
10707db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
10717db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10727db96d56Sopenharmony_ci    );
10737db96d56Sopenharmony_ci
10747db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
10757db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
10767db96d56Sopenharmony_ci    );
10777db96d56Sopenharmony_ci
10787db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToLowerFull(
10797db96d56Sopenharmony_ci    Py_UCS4 ch,       /* Unicode character */
10807db96d56Sopenharmony_ci    Py_UCS4 *res
10817db96d56Sopenharmony_ci    );
10827db96d56Sopenharmony_ci
10837db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToTitleFull(
10847db96d56Sopenharmony_ci    Py_UCS4 ch,       /* Unicode character */
10857db96d56Sopenharmony_ci    Py_UCS4 *res
10867db96d56Sopenharmony_ci    );
10877db96d56Sopenharmony_ci
10887db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToUpperFull(
10897db96d56Sopenharmony_ci    Py_UCS4 ch,       /* Unicode character */
10907db96d56Sopenharmony_ci    Py_UCS4 *res
10917db96d56Sopenharmony_ci    );
10927db96d56Sopenharmony_ci
10937db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
10947db96d56Sopenharmony_ci    Py_UCS4 ch,       /* Unicode character */
10957db96d56Sopenharmony_ci    Py_UCS4 *res
10967db96d56Sopenharmony_ci    );
10977db96d56Sopenharmony_ci
10987db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
10997db96d56Sopenharmony_ci    Py_UCS4 ch         /* Unicode character */
11007db96d56Sopenharmony_ci    );
11017db96d56Sopenharmony_ci
11027db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsCased(
11037db96d56Sopenharmony_ci    Py_UCS4 ch         /* Unicode character */
11047db96d56Sopenharmony_ci    );
11057db96d56Sopenharmony_ci
11067db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
11077db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11087db96d56Sopenharmony_ci    );
11097db96d56Sopenharmony_ci
11107db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToDigit(
11117db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11127db96d56Sopenharmony_ci    );
11137db96d56Sopenharmony_ci
11147db96d56Sopenharmony_ciPyAPI_FUNC(double) _PyUnicode_ToNumeric(
11157db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11167db96d56Sopenharmony_ci    );
11177db96d56Sopenharmony_ci
11187db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
11197db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11207db96d56Sopenharmony_ci    );
11217db96d56Sopenharmony_ci
11227db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsDigit(
11237db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11247db96d56Sopenharmony_ci    );
11257db96d56Sopenharmony_ci
11267db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsNumeric(
11277db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11287db96d56Sopenharmony_ci    );
11297db96d56Sopenharmony_ci
11307db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsPrintable(
11317db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11327db96d56Sopenharmony_ci    );
11337db96d56Sopenharmony_ci
11347db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsAlpha(
11357db96d56Sopenharmony_ci    Py_UCS4 ch       /* Unicode character */
11367db96d56Sopenharmony_ci    );
11377db96d56Sopenharmony_ci
11387db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
11397db96d56Sopenharmony_ci
11407db96d56Sopenharmony_ci/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
11417db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
11427db96d56Sopenharmony_ci
11437db96d56Sopenharmony_ci/* Fast equality check when the inputs are known to be exact unicode types
11447db96d56Sopenharmony_ci   and where the hash values are equal (i.e. a very probable match) */
11457db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
11467db96d56Sopenharmony_ci
11477db96d56Sopenharmony_ci/* Equality check. Returns -1 on failure. */
11487db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);
11497db96d56Sopenharmony_ci
11507db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
11517db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
11527db96d56Sopenharmony_ci
11537db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1154