17db96d56Sopenharmony_ci#ifndef Py_CPYTHON_UNICODEOBJECT_H 27db96d56Sopenharmony_ci# error "this header file must not be included directly" 37db96d56Sopenharmony_ci#endif 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ci/* Py_UNICODE was the native Unicode storage format (code unit) used by 67db96d56Sopenharmony_ci Python and represents a single Unicode element in the Unicode type. 77db96d56Sopenharmony_ci With PEP 393, Py_UNICODE is deprecated and replaced with a 87db96d56Sopenharmony_ci typedef to wchar_t. */ 97db96d56Sopenharmony_ci#define PY_UNICODE_TYPE wchar_t 107db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE; 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ci/* --- Internal Unicode Operations ---------------------------------------- */ 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci#ifndef USE_UNICODE_WCHAR_CACHE 157db96d56Sopenharmony_ci# define USE_UNICODE_WCHAR_CACHE 1 167db96d56Sopenharmony_ci#endif /* USE_UNICODE_WCHAR_CACHE */ 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ci/* Since splitting on whitespace is an important use case, and 197db96d56Sopenharmony_ci whitespace in most situations is solely ASCII whitespace, we 207db96d56Sopenharmony_ci optimize for the common case by using a quick look-up table 217db96d56Sopenharmony_ci _Py_ascii_whitespace (see below) with an inlined check. 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ci */ 247db96d56Sopenharmony_ci#define Py_UNICODE_ISSPACE(ch) \ 257db96d56Sopenharmony_ci ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) 287db96d56Sopenharmony_ci#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) 297db96d56Sopenharmony_ci#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) 307db96d56Sopenharmony_ci#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) 337db96d56Sopenharmony_ci#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) 347db96d56Sopenharmony_ci#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) 377db96d56Sopenharmony_ci#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) 387db96d56Sopenharmony_ci#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) 397db96d56Sopenharmony_ci#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) 427db96d56Sopenharmony_ci#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) 437db96d56Sopenharmony_ci#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci#define Py_UNICODE_ISALNUM(ch) \ 487db96d56Sopenharmony_ci (Py_UNICODE_ISALPHA(ch) || \ 497db96d56Sopenharmony_ci Py_UNICODE_ISDECIMAL(ch) || \ 507db96d56Sopenharmony_ci Py_UNICODE_ISDIGIT(ch) || \ 517db96d56Sopenharmony_ci Py_UNICODE_ISNUMERIC(ch)) 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci/* macros to work with surrogates */ 547db96d56Sopenharmony_ci#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) 557db96d56Sopenharmony_ci#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) 567db96d56Sopenharmony_ci#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) 577db96d56Sopenharmony_ci/* Join two surrogate characters and return a single Py_UCS4 value. */ 587db96d56Sopenharmony_ci#define Py_UNICODE_JOIN_SURROGATES(high, low) \ 597db96d56Sopenharmony_ci (((((Py_UCS4)(high) & 0x03FF) << 10) | \ 607db96d56Sopenharmony_ci ((Py_UCS4)(low) & 0x03FF)) + 0x10000) 617db96d56Sopenharmony_ci/* high surrogate = top 10 bits added to D800 */ 627db96d56Sopenharmony_ci#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) 637db96d56Sopenharmony_ci/* low surrogate = bottom 10 bits added to DC00 */ 647db96d56Sopenharmony_ci#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci/* --- Unicode Type ------------------------------------------------------- */ 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_ci/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject 697db96d56Sopenharmony_ci structure. state.ascii and state.compact are set, and the data 707db96d56Sopenharmony_ci immediately follow the structure. utf8_length and wstr_length can be found 717db96d56Sopenharmony_ci in the length field; the utf8 pointer is equal to the data pointer. */ 727db96d56Sopenharmony_citypedef struct { 737db96d56Sopenharmony_ci /* There are 4 forms of Unicode strings: 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci - compact ascii: 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci * structure = PyASCIIObject 787db96d56Sopenharmony_ci * test: PyUnicode_IS_COMPACT_ASCII(op) 797db96d56Sopenharmony_ci * kind = PyUnicode_1BYTE_KIND 807db96d56Sopenharmony_ci * compact = 1 817db96d56Sopenharmony_ci * ascii = 1 827db96d56Sopenharmony_ci * ready = 1 837db96d56Sopenharmony_ci * (length is the length of the utf8 and wstr strings) 847db96d56Sopenharmony_ci * (data starts just after the structure) 857db96d56Sopenharmony_ci * (since ASCII is decoded from UTF-8, the utf8 string are the data) 867db96d56Sopenharmony_ci 877db96d56Sopenharmony_ci - compact: 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci * structure = PyCompactUnicodeObject 907db96d56Sopenharmony_ci * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 917db96d56Sopenharmony_ci * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 927db96d56Sopenharmony_ci PyUnicode_4BYTE_KIND 937db96d56Sopenharmony_ci * compact = 1 947db96d56Sopenharmony_ci * ready = 1 957db96d56Sopenharmony_ci * ascii = 0 967db96d56Sopenharmony_ci * utf8 is not shared with data 977db96d56Sopenharmony_ci * utf8_length = 0 if utf8 is NULL 987db96d56Sopenharmony_ci * wstr is shared with data and wstr_length=length 997db96d56Sopenharmony_ci if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 1007db96d56Sopenharmony_ci or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 1017db96d56Sopenharmony_ci * wstr_length = 0 if wstr is NULL 1027db96d56Sopenharmony_ci * (data starts just after the structure) 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci - legacy string, not ready: 1057db96d56Sopenharmony_ci 1067db96d56Sopenharmony_ci * structure = PyUnicodeObject 1077db96d56Sopenharmony_ci * test: kind == PyUnicode_WCHAR_KIND 1087db96d56Sopenharmony_ci * length = 0 (use wstr_length) 1097db96d56Sopenharmony_ci * hash = -1 1107db96d56Sopenharmony_ci * kind = PyUnicode_WCHAR_KIND 1117db96d56Sopenharmony_ci * compact = 0 1127db96d56Sopenharmony_ci * ascii = 0 1137db96d56Sopenharmony_ci * ready = 0 1147db96d56Sopenharmony_ci * interned = SSTATE_NOT_INTERNED 1157db96d56Sopenharmony_ci * wstr is not NULL 1167db96d56Sopenharmony_ci * data.any is NULL 1177db96d56Sopenharmony_ci * utf8 is NULL 1187db96d56Sopenharmony_ci * utf8_length = 0 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci - legacy string, ready: 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci * structure = PyUnicodeObject structure 1237db96d56Sopenharmony_ci * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 1247db96d56Sopenharmony_ci * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 1257db96d56Sopenharmony_ci PyUnicode_4BYTE_KIND 1267db96d56Sopenharmony_ci * compact = 0 1277db96d56Sopenharmony_ci * ready = 1 1287db96d56Sopenharmony_ci * data.any is not NULL 1297db96d56Sopenharmony_ci * utf8 is shared and utf8_length = length with data.any if ascii = 1 1307db96d56Sopenharmony_ci * utf8_length = 0 if utf8 is NULL 1317db96d56Sopenharmony_ci * wstr is shared with data.any and wstr_length = length 1327db96d56Sopenharmony_ci if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 1337db96d56Sopenharmony_ci or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 1347db96d56Sopenharmony_ci * wstr_length = 0 if wstr is NULL 1357db96d56Sopenharmony_ci 1367db96d56Sopenharmony_ci Compact strings use only one memory block (structure + characters), 1377db96d56Sopenharmony_ci whereas legacy strings use one block for the structure and one block 1387db96d56Sopenharmony_ci for characters. 1397db96d56Sopenharmony_ci 1407db96d56Sopenharmony_ci Legacy strings are created by PyUnicode_FromUnicode() and 1417db96d56Sopenharmony_ci PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 1427db96d56Sopenharmony_ci when PyUnicode_READY() is called. 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci See also _PyUnicode_CheckConsistency(). 1457db96d56Sopenharmony_ci */ 1467db96d56Sopenharmony_ci PyObject_HEAD 1477db96d56Sopenharmony_ci Py_ssize_t length; /* Number of code points in the string */ 1487db96d56Sopenharmony_ci Py_hash_t hash; /* Hash value; -1 if not set */ 1497db96d56Sopenharmony_ci struct { 1507db96d56Sopenharmony_ci /* 1517db96d56Sopenharmony_ci SSTATE_NOT_INTERNED (0) 1527db96d56Sopenharmony_ci SSTATE_INTERNED_MORTAL (1) 1537db96d56Sopenharmony_ci SSTATE_INTERNED_IMMORTAL (2) 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ci If interned != SSTATE_NOT_INTERNED, the two references from the 1567db96d56Sopenharmony_ci dictionary to this object are *not* counted in ob_refcnt. 1577db96d56Sopenharmony_ci */ 1587db96d56Sopenharmony_ci unsigned int interned:2; 1597db96d56Sopenharmony_ci /* Character size: 1607db96d56Sopenharmony_ci 1617db96d56Sopenharmony_ci - PyUnicode_WCHAR_KIND (0): 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci * character type = wchar_t (16 or 32 bits, depending on the 1647db96d56Sopenharmony_ci platform) 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_ci - PyUnicode_1BYTE_KIND (1): 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci * character type = Py_UCS1 (8 bits, unsigned) 1697db96d56Sopenharmony_ci * all characters are in the range U+0000-U+00FF (latin1) 1707db96d56Sopenharmony_ci * if ascii is set, all characters are in the range U+0000-U+007F 1717db96d56Sopenharmony_ci (ASCII), otherwise at least one character is in the range 1727db96d56Sopenharmony_ci U+0080-U+00FF 1737db96d56Sopenharmony_ci 1747db96d56Sopenharmony_ci - PyUnicode_2BYTE_KIND (2): 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci * character type = Py_UCS2 (16 bits, unsigned) 1777db96d56Sopenharmony_ci * all characters are in the range U+0000-U+FFFF (BMP) 1787db96d56Sopenharmony_ci * at least one character is in the range U+0100-U+FFFF 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci - PyUnicode_4BYTE_KIND (4): 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_ci * character type = Py_UCS4 (32 bits, unsigned) 1837db96d56Sopenharmony_ci * all characters are in the range U+0000-U+10FFFF 1847db96d56Sopenharmony_ci * at least one character is in the range U+10000-U+10FFFF 1857db96d56Sopenharmony_ci */ 1867db96d56Sopenharmony_ci unsigned int kind:3; 1877db96d56Sopenharmony_ci /* Compact is with respect to the allocation scheme. Compact unicode 1887db96d56Sopenharmony_ci objects only require one memory block while non-compact objects use 1897db96d56Sopenharmony_ci one block for the PyUnicodeObject struct and another for its data 1907db96d56Sopenharmony_ci buffer. */ 1917db96d56Sopenharmony_ci unsigned int compact:1; 1927db96d56Sopenharmony_ci /* The string only contains characters in the range U+0000-U+007F (ASCII) 1937db96d56Sopenharmony_ci and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is 1947db96d56Sopenharmony_ci set, use the PyASCIIObject structure. */ 1957db96d56Sopenharmony_ci unsigned int ascii:1; 1967db96d56Sopenharmony_ci /* The ready flag indicates whether the object layout is initialized 1977db96d56Sopenharmony_ci completely. This means that this is either a compact object, or 1987db96d56Sopenharmony_ci the data pointer is filled out. The bit is redundant, and helps 1997db96d56Sopenharmony_ci to minimize the test in PyUnicode_IS_READY(). */ 2007db96d56Sopenharmony_ci unsigned int ready:1; 2017db96d56Sopenharmony_ci /* Padding to ensure that PyUnicode_DATA() is always aligned to 2027db96d56Sopenharmony_ci 4 bytes (see issue #19537 on m68k). */ 2037db96d56Sopenharmony_ci unsigned int :24; 2047db96d56Sopenharmony_ci } state; 2057db96d56Sopenharmony_ci wchar_t *wstr; /* wchar_t representation (null-terminated) */ 2067db96d56Sopenharmony_ci} PyASCIIObject; 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci/* Non-ASCII strings allocated through PyUnicode_New use the 2097db96d56Sopenharmony_ci PyCompactUnicodeObject structure. state.compact is set, and the data 2107db96d56Sopenharmony_ci immediately follow the structure. */ 2117db96d56Sopenharmony_citypedef struct { 2127db96d56Sopenharmony_ci PyASCIIObject _base; 2137db96d56Sopenharmony_ci Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the 2147db96d56Sopenharmony_ci * terminating \0. */ 2157db96d56Sopenharmony_ci char *utf8; /* UTF-8 representation (null-terminated) */ 2167db96d56Sopenharmony_ci Py_ssize_t wstr_length; /* Number of code points in wstr, possible 2177db96d56Sopenharmony_ci * surrogates count as two code points. */ 2187db96d56Sopenharmony_ci} PyCompactUnicodeObject; 2197db96d56Sopenharmony_ci 2207db96d56Sopenharmony_ci/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the 2217db96d56Sopenharmony_ci PyUnicodeObject structure. The actual string data is initially in the wstr 2227db96d56Sopenharmony_ci block, and copied into the data block using _PyUnicode_Ready. */ 2237db96d56Sopenharmony_citypedef struct { 2247db96d56Sopenharmony_ci PyCompactUnicodeObject _base; 2257db96d56Sopenharmony_ci union { 2267db96d56Sopenharmony_ci void *any; 2277db96d56Sopenharmony_ci Py_UCS1 *latin1; 2287db96d56Sopenharmony_ci Py_UCS2 *ucs2; 2297db96d56Sopenharmony_ci Py_UCS4 *ucs4; 2307db96d56Sopenharmony_ci } data; /* Canonical, smallest-form Unicode buffer */ 2317db96d56Sopenharmony_ci} PyUnicodeObject; 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_CheckConsistency( 2347db96d56Sopenharmony_ci PyObject *op, 2357db96d56Sopenharmony_ci int check_content); 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci#define _PyASCIIObject_CAST(op) \ 2397db96d56Sopenharmony_ci (assert(PyUnicode_Check(op)), \ 2407db96d56Sopenharmony_ci _Py_CAST(PyASCIIObject*, (op))) 2417db96d56Sopenharmony_ci#define _PyCompactUnicodeObject_CAST(op) \ 2427db96d56Sopenharmony_ci (assert(PyUnicode_Check(op)), \ 2437db96d56Sopenharmony_ci _Py_CAST(PyCompactUnicodeObject*, (op))) 2447db96d56Sopenharmony_ci#define _PyUnicodeObject_CAST(op) \ 2457db96d56Sopenharmony_ci (assert(PyUnicode_Check(op)), \ 2467db96d56Sopenharmony_ci _Py_CAST(PyUnicodeObject*, (op))) 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci 2497db96d56Sopenharmony_ci/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ 2507db96d56Sopenharmony_ci 2517db96d56Sopenharmony_ci/* Values for PyASCIIObject.state: */ 2527db96d56Sopenharmony_ci 2537db96d56Sopenharmony_ci/* Interning state. */ 2547db96d56Sopenharmony_ci#define SSTATE_NOT_INTERNED 0 2557db96d56Sopenharmony_ci#define SSTATE_INTERNED_MORTAL 1 2567db96d56Sopenharmony_ci#define SSTATE_INTERNED_IMMORTAL 2 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_ci/* Use only if you know it's a string */ 2597db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { 2607db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->state.interned; 2617db96d56Sopenharmony_ci} 2627db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 2637db96d56Sopenharmony_ci# define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op)) 2647db96d56Sopenharmony_ci#endif 2657db96d56Sopenharmony_ci 2667db96d56Sopenharmony_ci/* Fast check to determine whether an object is ready. Equivalent to: 2677db96d56Sopenharmony_ci PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */ 2687db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_READY(PyObject *op) { 2697db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->state.ready; 2707db96d56Sopenharmony_ci} 2717db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 2727db96d56Sopenharmony_ci# define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op)) 2737db96d56Sopenharmony_ci#endif 2747db96d56Sopenharmony_ci 2757db96d56Sopenharmony_ci/* Return true if the string contains only ASCII characters, or 0 if not. The 2767db96d56Sopenharmony_ci string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be 2777db96d56Sopenharmony_ci ready. */ 2787db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_ASCII(PyObject *op) { 2797db96d56Sopenharmony_ci assert(PyUnicode_IS_READY(op)); 2807db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->state.ascii; 2817db96d56Sopenharmony_ci} 2827db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 2837db96d56Sopenharmony_ci# define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op)) 2847db96d56Sopenharmony_ci#endif 2857db96d56Sopenharmony_ci 2867db96d56Sopenharmony_ci/* Return true if the string is compact or 0 if not. 2877db96d56Sopenharmony_ci No type checks or Ready calls are performed. */ 2887db96d56Sopenharmony_cistatic inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) { 2897db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->state.compact; 2907db96d56Sopenharmony_ci} 2917db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 2927db96d56Sopenharmony_ci# define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op)) 2937db96d56Sopenharmony_ci#endif 2947db96d56Sopenharmony_ci 2957db96d56Sopenharmony_ci/* Return true if the string is a compact ASCII string (use PyASCIIObject 2967db96d56Sopenharmony_ci structure), or 0 if not. No type checks or Ready calls are performed. */ 2977db96d56Sopenharmony_cistatic inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) { 2987db96d56Sopenharmony_ci return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op)); 2997db96d56Sopenharmony_ci} 3007db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 3017db96d56Sopenharmony_ci# define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op)) 3027db96d56Sopenharmony_ci#endif 3037db96d56Sopenharmony_ci 3047db96d56Sopenharmony_cienum PyUnicode_Kind { 3057db96d56Sopenharmony_ci/* String contains only wstr byte characters. This is only possible 3067db96d56Sopenharmony_ci when the string was created with a legacy API and _PyUnicode_Ready() 3077db96d56Sopenharmony_ci has not been called yet. */ 3087db96d56Sopenharmony_ci PyUnicode_WCHAR_KIND = 0, 3097db96d56Sopenharmony_ci/* Return values of the PyUnicode_KIND() function: */ 3107db96d56Sopenharmony_ci PyUnicode_1BYTE_KIND = 1, 3117db96d56Sopenharmony_ci PyUnicode_2BYTE_KIND = 2, 3127db96d56Sopenharmony_ci PyUnicode_4BYTE_KIND = 4 3137db96d56Sopenharmony_ci}; 3147db96d56Sopenharmony_ci 3157db96d56Sopenharmony_ci/* Return one of the PyUnicode_*_KIND values defined above. */ 3167db96d56Sopenharmony_ci#define PyUnicode_KIND(op) \ 3177db96d56Sopenharmony_ci (assert(PyUnicode_IS_READY(op)), \ 3187db96d56Sopenharmony_ci _PyASCIIObject_CAST(op)->state.kind) 3197db96d56Sopenharmony_ci 3207db96d56Sopenharmony_ci/* Return a void pointer to the raw unicode buffer. */ 3217db96d56Sopenharmony_cistatic inline void* _PyUnicode_COMPACT_DATA(PyObject *op) { 3227db96d56Sopenharmony_ci if (PyUnicode_IS_ASCII(op)) { 3237db96d56Sopenharmony_ci return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1)); 3247db96d56Sopenharmony_ci } 3257db96d56Sopenharmony_ci return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1)); 3267db96d56Sopenharmony_ci} 3277db96d56Sopenharmony_ci 3287db96d56Sopenharmony_cistatic inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) { 3297db96d56Sopenharmony_ci void *data; 3307db96d56Sopenharmony_ci assert(!PyUnicode_IS_COMPACT(op)); 3317db96d56Sopenharmony_ci data = _PyUnicodeObject_CAST(op)->data.any; 3327db96d56Sopenharmony_ci assert(data != NULL); 3337db96d56Sopenharmony_ci return data; 3347db96d56Sopenharmony_ci} 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_cistatic inline void* PyUnicode_DATA(PyObject *op) { 3377db96d56Sopenharmony_ci if (PyUnicode_IS_COMPACT(op)) { 3387db96d56Sopenharmony_ci return _PyUnicode_COMPACT_DATA(op); 3397db96d56Sopenharmony_ci } 3407db96d56Sopenharmony_ci return _PyUnicode_NONCOMPACT_DATA(op); 3417db96d56Sopenharmony_ci} 3427db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 3437db96d56Sopenharmony_ci# define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) 3447db96d56Sopenharmony_ci#endif 3457db96d56Sopenharmony_ci 3467db96d56Sopenharmony_ci/* Return pointers to the canonical representation cast to unsigned char, 3477db96d56Sopenharmony_ci Py_UCS2, or Py_UCS4 for direct character access. 3487db96d56Sopenharmony_ci No checks are performed, use PyUnicode_KIND() before to ensure 3497db96d56Sopenharmony_ci these will work correctly. */ 3507db96d56Sopenharmony_ci 3517db96d56Sopenharmony_ci#define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op)) 3527db96d56Sopenharmony_ci#define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op)) 3537db96d56Sopenharmony_ci#define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op)) 3547db96d56Sopenharmony_ci 3557db96d56Sopenharmony_ci/* Returns the length of the unicode string. The caller has to make sure that 3567db96d56Sopenharmony_ci the string has it's canonical representation set before calling 3577db96d56Sopenharmony_ci this function. Call PyUnicode_(FAST_)Ready to ensure that. */ 3587db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { 3597db96d56Sopenharmony_ci assert(PyUnicode_IS_READY(op)); 3607db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->length; 3617db96d56Sopenharmony_ci} 3627db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 3637db96d56Sopenharmony_ci# define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op)) 3647db96d56Sopenharmony_ci#endif 3657db96d56Sopenharmony_ci 3667db96d56Sopenharmony_ci/* Write into the canonical representation, this function does not do any sanity 3677db96d56Sopenharmony_ci checks and is intended for usage in loops. The caller should cache the 3687db96d56Sopenharmony_ci kind and data pointers obtained from other function calls. 3697db96d56Sopenharmony_ci index is the index in the string (starts at 0) and value is the new 3707db96d56Sopenharmony_ci code point value which should be written to that location. */ 3717db96d56Sopenharmony_cistatic inline void PyUnicode_WRITE(int kind, void *data, 3727db96d56Sopenharmony_ci Py_ssize_t index, Py_UCS4 value) 3737db96d56Sopenharmony_ci{ 3747db96d56Sopenharmony_ci if (kind == PyUnicode_1BYTE_KIND) { 3757db96d56Sopenharmony_ci assert(value <= 0xffU); 3767db96d56Sopenharmony_ci _Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value); 3777db96d56Sopenharmony_ci } 3787db96d56Sopenharmony_ci else if (kind == PyUnicode_2BYTE_KIND) { 3797db96d56Sopenharmony_ci assert(value <= 0xffffU); 3807db96d56Sopenharmony_ci _Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value); 3817db96d56Sopenharmony_ci } 3827db96d56Sopenharmony_ci else { 3837db96d56Sopenharmony_ci assert(kind == PyUnicode_4BYTE_KIND); 3847db96d56Sopenharmony_ci assert(value <= 0x10ffffU); 3857db96d56Sopenharmony_ci _Py_STATIC_CAST(Py_UCS4*, data)[index] = value; 3867db96d56Sopenharmony_ci } 3877db96d56Sopenharmony_ci} 3887db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 3897db96d56Sopenharmony_ci#define PyUnicode_WRITE(kind, data, index, value) \ 3907db96d56Sopenharmony_ci PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \ 3917db96d56Sopenharmony_ci (index), _Py_STATIC_CAST(Py_UCS4, value)) 3927db96d56Sopenharmony_ci#endif 3937db96d56Sopenharmony_ci 3947db96d56Sopenharmony_ci/* Read a code point from the string's canonical representation. No checks 3957db96d56Sopenharmony_ci or ready calls are performed. */ 3967db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_READ(int kind, 3977db96d56Sopenharmony_ci const void *data, Py_ssize_t index) 3987db96d56Sopenharmony_ci{ 3997db96d56Sopenharmony_ci if (kind == PyUnicode_1BYTE_KIND) { 4007db96d56Sopenharmony_ci return _Py_STATIC_CAST(const Py_UCS1*, data)[index]; 4017db96d56Sopenharmony_ci } 4027db96d56Sopenharmony_ci if (kind == PyUnicode_2BYTE_KIND) { 4037db96d56Sopenharmony_ci return _Py_STATIC_CAST(const Py_UCS2*, data)[index]; 4047db96d56Sopenharmony_ci } 4057db96d56Sopenharmony_ci assert(kind == PyUnicode_4BYTE_KIND); 4067db96d56Sopenharmony_ci return _Py_STATIC_CAST(const Py_UCS4*, data)[index]; 4077db96d56Sopenharmony_ci} 4087db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 4097db96d56Sopenharmony_ci#define PyUnicode_READ(kind, data, index) \ 4107db96d56Sopenharmony_ci PyUnicode_READ(_Py_STATIC_CAST(int, kind), \ 4117db96d56Sopenharmony_ci _Py_STATIC_CAST(const void*, data), \ 4127db96d56Sopenharmony_ci (index)) 4137db96d56Sopenharmony_ci#endif 4147db96d56Sopenharmony_ci 4157db96d56Sopenharmony_ci/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it 4167db96d56Sopenharmony_ci calls PyUnicode_KIND() and might call it twice. For single reads, use 4177db96d56Sopenharmony_ci PyUnicode_READ_CHAR, for multiple consecutive reads callers should 4187db96d56Sopenharmony_ci cache kind and use PyUnicode_READ instead. */ 4197db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index) 4207db96d56Sopenharmony_ci{ 4217db96d56Sopenharmony_ci int kind; 4227db96d56Sopenharmony_ci assert(PyUnicode_IS_READY(unicode)); 4237db96d56Sopenharmony_ci kind = PyUnicode_KIND(unicode); 4247db96d56Sopenharmony_ci if (kind == PyUnicode_1BYTE_KIND) { 4257db96d56Sopenharmony_ci return PyUnicode_1BYTE_DATA(unicode)[index]; 4267db96d56Sopenharmony_ci } 4277db96d56Sopenharmony_ci if (kind == PyUnicode_2BYTE_KIND) { 4287db96d56Sopenharmony_ci return PyUnicode_2BYTE_DATA(unicode)[index]; 4297db96d56Sopenharmony_ci } 4307db96d56Sopenharmony_ci assert(kind == PyUnicode_4BYTE_KIND); 4317db96d56Sopenharmony_ci return PyUnicode_4BYTE_DATA(unicode)[index]; 4327db96d56Sopenharmony_ci} 4337db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 4347db96d56Sopenharmony_ci# define PyUnicode_READ_CHAR(unicode, index) \ 4357db96d56Sopenharmony_ci PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index)) 4367db96d56Sopenharmony_ci#endif 4377db96d56Sopenharmony_ci 4387db96d56Sopenharmony_ci/* Return a maximum character value which is suitable for creating another 4397db96d56Sopenharmony_ci string based on op. This is always an approximation but more efficient 4407db96d56Sopenharmony_ci than iterating over the string. */ 4417db96d56Sopenharmony_cistatic inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op) 4427db96d56Sopenharmony_ci{ 4437db96d56Sopenharmony_ci int kind; 4447db96d56Sopenharmony_ci 4457db96d56Sopenharmony_ci assert(PyUnicode_IS_READY(op)); 4467db96d56Sopenharmony_ci if (PyUnicode_IS_ASCII(op)) { 4477db96d56Sopenharmony_ci return 0x7fU; 4487db96d56Sopenharmony_ci } 4497db96d56Sopenharmony_ci 4507db96d56Sopenharmony_ci kind = PyUnicode_KIND(op); 4517db96d56Sopenharmony_ci if (kind == PyUnicode_1BYTE_KIND) { 4527db96d56Sopenharmony_ci return 0xffU; 4537db96d56Sopenharmony_ci } 4547db96d56Sopenharmony_ci if (kind == PyUnicode_2BYTE_KIND) { 4557db96d56Sopenharmony_ci return 0xffffU; 4567db96d56Sopenharmony_ci } 4577db96d56Sopenharmony_ci assert(kind == PyUnicode_4BYTE_KIND); 4587db96d56Sopenharmony_ci return 0x10ffffU; 4597db96d56Sopenharmony_ci} 4607db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 4617db96d56Sopenharmony_ci# define PyUnicode_MAX_CHAR_VALUE(op) \ 4627db96d56Sopenharmony_ci PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op)) 4637db96d56Sopenharmony_ci#endif 4647db96d56Sopenharmony_ci 4657db96d56Sopenharmony_ci/* === Public API ========================================================= */ 4667db96d56Sopenharmony_ci 4677db96d56Sopenharmony_ci/* --- Plain Py_UNICODE --------------------------------------------------- */ 4687db96d56Sopenharmony_ci 4697db96d56Sopenharmony_ci/* With PEP 393, this is the recommended way to allocate a new unicode object. 4707db96d56Sopenharmony_ci This function will allocate the object and its buffer in a single memory 4717db96d56Sopenharmony_ci block. Objects created using this function are not resizable. */ 4727db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) PyUnicode_New( 4737db96d56Sopenharmony_ci Py_ssize_t size, /* Number of code points in the new string */ 4747db96d56Sopenharmony_ci Py_UCS4 maxchar /* maximum code point value in the string */ 4757db96d56Sopenharmony_ci ); 4767db96d56Sopenharmony_ci 4777db96d56Sopenharmony_ci/* Initializes the canonical string representation from the deprecated 4787db96d56Sopenharmony_ci wstr/Py_UNICODE representation. This function is used to convert Unicode 4797db96d56Sopenharmony_ci objects which were created using the old API to the new flexible format 4807db96d56Sopenharmony_ci introduced with PEP 393. 4817db96d56Sopenharmony_ci 4827db96d56Sopenharmony_ci Don't call this function directly, use the public PyUnicode_READY() function 4837db96d56Sopenharmony_ci instead. */ 4847db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_Ready( 4857db96d56Sopenharmony_ci PyObject *unicode /* Unicode object */ 4867db96d56Sopenharmony_ci ); 4877db96d56Sopenharmony_ci 4887db96d56Sopenharmony_ci/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best 4897db96d56Sopenharmony_ci case. If the canonical representation is not yet set, it will still call 4907db96d56Sopenharmony_ci _PyUnicode_Ready(). 4917db96d56Sopenharmony_ci Returns 0 on success and -1 on errors. */ 4927db96d56Sopenharmony_cistatic inline int PyUnicode_READY(PyObject *op) 4937db96d56Sopenharmony_ci{ 4947db96d56Sopenharmony_ci if (PyUnicode_IS_READY(op)) { 4957db96d56Sopenharmony_ci return 0; 4967db96d56Sopenharmony_ci } 4977db96d56Sopenharmony_ci return _PyUnicode_Ready(op); 4987db96d56Sopenharmony_ci} 4997db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 5007db96d56Sopenharmony_ci# define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op)) 5017db96d56Sopenharmony_ci#endif 5027db96d56Sopenharmony_ci 5037db96d56Sopenharmony_ci/* Get a copy of a Unicode string. */ 5047db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_Copy( 5057db96d56Sopenharmony_ci PyObject *unicode 5067db96d56Sopenharmony_ci ); 5077db96d56Sopenharmony_ci 5087db96d56Sopenharmony_ci/* Copy character from one unicode object into another, this function performs 5097db96d56Sopenharmony_ci character conversion when necessary and falls back to memcpy() if possible. 5107db96d56Sopenharmony_ci 5117db96d56Sopenharmony_ci Fail if to is too small (smaller than *how_many* or smaller than 5127db96d56Sopenharmony_ci len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > 5137db96d56Sopenharmony_ci kind(to), or if *to* has more than 1 reference. 5147db96d56Sopenharmony_ci 5157db96d56Sopenharmony_ci Return the number of written character, or return -1 and raise an exception 5167db96d56Sopenharmony_ci on error. 5177db96d56Sopenharmony_ci 5187db96d56Sopenharmony_ci Pseudo-code: 5197db96d56Sopenharmony_ci 5207db96d56Sopenharmony_ci how_many = min(how_many, len(from) - from_start) 5217db96d56Sopenharmony_ci to[to_start:to_start+how_many] = from[from_start:from_start+how_many] 5227db96d56Sopenharmony_ci return how_many 5237db96d56Sopenharmony_ci 5247db96d56Sopenharmony_ci Note: The function doesn't write a terminating null character. 5257db96d56Sopenharmony_ci */ 5267db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( 5277db96d56Sopenharmony_ci PyObject *to, 5287db96d56Sopenharmony_ci Py_ssize_t to_start, 5297db96d56Sopenharmony_ci PyObject *from, 5307db96d56Sopenharmony_ci Py_ssize_t from_start, 5317db96d56Sopenharmony_ci Py_ssize_t how_many 5327db96d56Sopenharmony_ci ); 5337db96d56Sopenharmony_ci 5347db96d56Sopenharmony_ci/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so 5357db96d56Sopenharmony_ci may crash if parameters are invalid (e.g. if the output string 5367db96d56Sopenharmony_ci is too short). */ 5377db96d56Sopenharmony_ciPyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( 5387db96d56Sopenharmony_ci PyObject *to, 5397db96d56Sopenharmony_ci Py_ssize_t to_start, 5407db96d56Sopenharmony_ci PyObject *from, 5417db96d56Sopenharmony_ci Py_ssize_t from_start, 5427db96d56Sopenharmony_ci Py_ssize_t how_many 5437db96d56Sopenharmony_ci ); 5447db96d56Sopenharmony_ci 5457db96d56Sopenharmony_ci/* Fill a string with a character: write fill_char into 5467db96d56Sopenharmony_ci unicode[start:start+length]. 5477db96d56Sopenharmony_ci 5487db96d56Sopenharmony_ci Fail if fill_char is bigger than the string maximum character, or if the 5497db96d56Sopenharmony_ci string has more than 1 reference. 5507db96d56Sopenharmony_ci 5517db96d56Sopenharmony_ci Return the number of written character, or return -1 and raise an exception 5527db96d56Sopenharmony_ci on error. */ 5537db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( 5547db96d56Sopenharmony_ci PyObject *unicode, 5557db96d56Sopenharmony_ci Py_ssize_t start, 5567db96d56Sopenharmony_ci Py_ssize_t length, 5577db96d56Sopenharmony_ci Py_UCS4 fill_char 5587db96d56Sopenharmony_ci ); 5597db96d56Sopenharmony_ci 5607db96d56Sopenharmony_ci/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash 5617db96d56Sopenharmony_ci if parameters are invalid (e.g. if length is longer than the string). */ 5627db96d56Sopenharmony_ciPyAPI_FUNC(void) _PyUnicode_FastFill( 5637db96d56Sopenharmony_ci PyObject *unicode, 5647db96d56Sopenharmony_ci Py_ssize_t start, 5657db96d56Sopenharmony_ci Py_ssize_t length, 5667db96d56Sopenharmony_ci Py_UCS4 fill_char 5677db96d56Sopenharmony_ci ); 5687db96d56Sopenharmony_ci 5697db96d56Sopenharmony_ci/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. 5707db96d56Sopenharmony_ci Scan the string to find the maximum character. */ 5717db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( 5727db96d56Sopenharmony_ci int kind, 5737db96d56Sopenharmony_ci const void *buffer, 5747db96d56Sopenharmony_ci Py_ssize_t size); 5757db96d56Sopenharmony_ci 5767db96d56Sopenharmony_ci/* Create a new string from a buffer of ASCII characters. 5777db96d56Sopenharmony_ci WARNING: Don't check if the string contains any non-ASCII character. */ 5787db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( 5797db96d56Sopenharmony_ci const char *buffer, 5807db96d56Sopenharmony_ci Py_ssize_t size); 5817db96d56Sopenharmony_ci 5827db96d56Sopenharmony_ci/* Compute the maximum character of the substring unicode[start:end]. 5837db96d56Sopenharmony_ci Return 127 for an empty string. */ 5847db96d56Sopenharmony_ciPyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( 5857db96d56Sopenharmony_ci PyObject *unicode, 5867db96d56Sopenharmony_ci Py_ssize_t start, 5877db96d56Sopenharmony_ci Py_ssize_t end); 5887db96d56Sopenharmony_ci 5897db96d56Sopenharmony_ci/* --- Legacy deprecated API ---------------------------------------------- */ 5907db96d56Sopenharmony_ci 5917db96d56Sopenharmony_ci/* Create a Unicode Object from the Py_UNICODE buffer u of the given 5927db96d56Sopenharmony_ci size. 5937db96d56Sopenharmony_ci 5947db96d56Sopenharmony_ci u may be NULL which causes the contents to be undefined. It is the 5957db96d56Sopenharmony_ci user's responsibility to fill in the needed data afterwards. Note 5967db96d56Sopenharmony_ci that modifying the Unicode object contents after construction is 5977db96d56Sopenharmony_ci only allowed if u was set to NULL. 5987db96d56Sopenharmony_ci 5997db96d56Sopenharmony_ci The buffer is copied into the new object. */ 6007db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( 6017db96d56Sopenharmony_ci const Py_UNICODE *u, /* Unicode buffer */ 6027db96d56Sopenharmony_ci Py_ssize_t size /* size of buffer */ 6037db96d56Sopenharmony_ci ); 6047db96d56Sopenharmony_ci 6057db96d56Sopenharmony_ci/* Return a read-only pointer to the Unicode object's internal 6067db96d56Sopenharmony_ci Py_UNICODE buffer. 6077db96d56Sopenharmony_ci If the wchar_t/Py_UNICODE representation is not yet available, this 6087db96d56Sopenharmony_ci function will calculate it. */ 6097db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( 6107db96d56Sopenharmony_ci PyObject *unicode /* Unicode object */ 6117db96d56Sopenharmony_ci ); 6127db96d56Sopenharmony_ci 6137db96d56Sopenharmony_ci/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string 6147db96d56Sopenharmony_ci contains null characters. */ 6157db96d56Sopenharmony_ciPyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( 6167db96d56Sopenharmony_ci PyObject *unicode /* Unicode object */ 6177db96d56Sopenharmony_ci ); 6187db96d56Sopenharmony_ci 6197db96d56Sopenharmony_ci/* Return a read-only pointer to the Unicode object's internal 6207db96d56Sopenharmony_ci Py_UNICODE buffer and save the length at size. 6217db96d56Sopenharmony_ci If the wchar_t/Py_UNICODE representation is not yet available, this 6227db96d56Sopenharmony_ci function will calculate it. */ 6237db96d56Sopenharmony_ci 6247db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( 6257db96d56Sopenharmony_ci PyObject *unicode, /* Unicode object */ 6267db96d56Sopenharmony_ci Py_ssize_t *size /* location where to save the length */ 6277db96d56Sopenharmony_ci ); 6287db96d56Sopenharmony_ci 6297db96d56Sopenharmony_ci 6307db96d56Sopenharmony_ci/* Fast access macros */ 6317db96d56Sopenharmony_ci 6327db96d56Sopenharmony_ciPy_DEPRECATED(3.3) 6337db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) 6347db96d56Sopenharmony_ci{ 6357db96d56Sopenharmony_ci if (PyUnicode_IS_COMPACT_ASCII(op)) { 6367db96d56Sopenharmony_ci return _PyASCIIObject_CAST(op)->length; 6377db96d56Sopenharmony_ci } 6387db96d56Sopenharmony_ci else { 6397db96d56Sopenharmony_ci return _PyCompactUnicodeObject_CAST(op)->wstr_length; 6407db96d56Sopenharmony_ci } 6417db96d56Sopenharmony_ci} 6427db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 6437db96d56Sopenharmony_ci# define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op)) 6447db96d56Sopenharmony_ci#endif 6457db96d56Sopenharmony_ci 6467db96d56Sopenharmony_ci/* Returns the deprecated Py_UNICODE representation's size in code units 6477db96d56Sopenharmony_ci (this includes surrogate pairs as 2 units). 6487db96d56Sopenharmony_ci If the Py_UNICODE representation is not available, it will be computed 6497db96d56Sopenharmony_ci on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ 6507db96d56Sopenharmony_ci 6517db96d56Sopenharmony_ciPy_DEPRECATED(3.3) 6527db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op) 6537db96d56Sopenharmony_ci{ 6547db96d56Sopenharmony_ci _Py_COMP_DIAG_PUSH 6557db96d56Sopenharmony_ci _Py_COMP_DIAG_IGNORE_DEPR_DECLS 6567db96d56Sopenharmony_ci if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) { 6577db96d56Sopenharmony_ci (void)PyUnicode_AsUnicode(op); 6587db96d56Sopenharmony_ci assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL); 6597db96d56Sopenharmony_ci } 6607db96d56Sopenharmony_ci return PyUnicode_WSTR_LENGTH(op); 6617db96d56Sopenharmony_ci _Py_COMP_DIAG_POP 6627db96d56Sopenharmony_ci} 6637db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 6647db96d56Sopenharmony_ci# define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op)) 6657db96d56Sopenharmony_ci#endif 6667db96d56Sopenharmony_ci 6677db96d56Sopenharmony_ciPy_DEPRECATED(3.3) 6687db96d56Sopenharmony_cistatic inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op) 6697db96d56Sopenharmony_ci{ 6707db96d56Sopenharmony_ci _Py_COMP_DIAG_PUSH 6717db96d56Sopenharmony_ci _Py_COMP_DIAG_IGNORE_DEPR_DECLS 6727db96d56Sopenharmony_ci return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE; 6737db96d56Sopenharmony_ci _Py_COMP_DIAG_POP 6747db96d56Sopenharmony_ci} 6757db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 6767db96d56Sopenharmony_ci# define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op)) 6777db96d56Sopenharmony_ci#endif 6787db96d56Sopenharmony_ci 6797db96d56Sopenharmony_ci/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE 6807db96d56Sopenharmony_ci representation on demand. Using this macro is very inefficient now, 6817db96d56Sopenharmony_ci try to port your code to use the new PyUnicode_*BYTE_DATA() macros or 6827db96d56Sopenharmony_ci use PyUnicode_WRITE() and PyUnicode_READ(). */ 6837db96d56Sopenharmony_ci 6847db96d56Sopenharmony_ciPy_DEPRECATED(3.3) 6857db96d56Sopenharmony_cistatic inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op) 6867db96d56Sopenharmony_ci{ 6877db96d56Sopenharmony_ci wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr; 6887db96d56Sopenharmony_ci if (wstr != _Py_NULL) { 6897db96d56Sopenharmony_ci return wstr; 6907db96d56Sopenharmony_ci } 6917db96d56Sopenharmony_ci 6927db96d56Sopenharmony_ci _Py_COMP_DIAG_PUSH 6937db96d56Sopenharmony_ci _Py_COMP_DIAG_IGNORE_DEPR_DECLS 6947db96d56Sopenharmony_ci return PyUnicode_AsUnicode(op); 6957db96d56Sopenharmony_ci _Py_COMP_DIAG_POP 6967db96d56Sopenharmony_ci} 6977db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 6987db96d56Sopenharmony_ci# define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op)) 6997db96d56Sopenharmony_ci#endif 7007db96d56Sopenharmony_ci 7017db96d56Sopenharmony_ciPy_DEPRECATED(3.3) 7027db96d56Sopenharmony_cistatic inline const char* PyUnicode_AS_DATA(PyObject *op) 7037db96d56Sopenharmony_ci{ 7047db96d56Sopenharmony_ci _Py_COMP_DIAG_PUSH 7057db96d56Sopenharmony_ci _Py_COMP_DIAG_IGNORE_DEPR_DECLS 7067db96d56Sopenharmony_ci Py_UNICODE *data = PyUnicode_AS_UNICODE(op); 7077db96d56Sopenharmony_ci // In C++, casting directly PyUnicode* to const char* is not valid 7087db96d56Sopenharmony_ci return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data)); 7097db96d56Sopenharmony_ci _Py_COMP_DIAG_POP 7107db96d56Sopenharmony_ci} 7117db96d56Sopenharmony_ci#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 7127db96d56Sopenharmony_ci# define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op)) 7137db96d56Sopenharmony_ci#endif 7147db96d56Sopenharmony_ci 7157db96d56Sopenharmony_ci 7167db96d56Sopenharmony_ci/* --- _PyUnicodeWriter API ----------------------------------------------- */ 7177db96d56Sopenharmony_ci 7187db96d56Sopenharmony_citypedef struct { 7197db96d56Sopenharmony_ci PyObject *buffer; 7207db96d56Sopenharmony_ci void *data; 7217db96d56Sopenharmony_ci enum PyUnicode_Kind kind; 7227db96d56Sopenharmony_ci Py_UCS4 maxchar; 7237db96d56Sopenharmony_ci Py_ssize_t size; 7247db96d56Sopenharmony_ci Py_ssize_t pos; 7257db96d56Sopenharmony_ci 7267db96d56Sopenharmony_ci /* minimum number of allocated characters (default: 0) */ 7277db96d56Sopenharmony_ci Py_ssize_t min_length; 7287db96d56Sopenharmony_ci 7297db96d56Sopenharmony_ci /* minimum character (default: 127, ASCII) */ 7307db96d56Sopenharmony_ci Py_UCS4 min_char; 7317db96d56Sopenharmony_ci 7327db96d56Sopenharmony_ci /* If non-zero, overallocate the buffer (default: 0). */ 7337db96d56Sopenharmony_ci unsigned char overallocate; 7347db96d56Sopenharmony_ci 7357db96d56Sopenharmony_ci /* If readonly is 1, buffer is a shared string (cannot be modified) 7367db96d56Sopenharmony_ci and size is set to 0. */ 7377db96d56Sopenharmony_ci unsigned char readonly; 7387db96d56Sopenharmony_ci} _PyUnicodeWriter ; 7397db96d56Sopenharmony_ci 7407db96d56Sopenharmony_ci/* Initialize a Unicode writer. 7417db96d56Sopenharmony_ci * 7427db96d56Sopenharmony_ci * By default, the minimum buffer size is 0 character and overallocation is 7437db96d56Sopenharmony_ci * disabled. Set min_length, min_char and overallocate attributes to control 7447db96d56Sopenharmony_ci * the allocation of the buffer. */ 7457db96d56Sopenharmony_ciPyAPI_FUNC(void) 7467db96d56Sopenharmony_ci_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); 7477db96d56Sopenharmony_ci 7487db96d56Sopenharmony_ci/* Prepare the buffer to write 'length' characters 7497db96d56Sopenharmony_ci with the specified maximum character. 7507db96d56Sopenharmony_ci 7517db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 7527db96d56Sopenharmony_ci#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ 7537db96d56Sopenharmony_ci (((MAXCHAR) <= (WRITER)->maxchar \ 7547db96d56Sopenharmony_ci && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ 7557db96d56Sopenharmony_ci ? 0 \ 7567db96d56Sopenharmony_ci : (((LENGTH) == 0) \ 7577db96d56Sopenharmony_ci ? 0 \ 7587db96d56Sopenharmony_ci : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) 7597db96d56Sopenharmony_ci 7607db96d56Sopenharmony_ci/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro 7617db96d56Sopenharmony_ci instead. */ 7627db96d56Sopenharmony_ciPyAPI_FUNC(int) 7637db96d56Sopenharmony_ci_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, 7647db96d56Sopenharmony_ci Py_ssize_t length, Py_UCS4 maxchar); 7657db96d56Sopenharmony_ci 7667db96d56Sopenharmony_ci/* Prepare the buffer to have at least the kind KIND. 7677db96d56Sopenharmony_ci For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will 7687db96d56Sopenharmony_ci support characters in range U+000-U+FFFF. 7697db96d56Sopenharmony_ci 7707db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 7717db96d56Sopenharmony_ci#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ 7727db96d56Sopenharmony_ci (assert((KIND) != PyUnicode_WCHAR_KIND), \ 7737db96d56Sopenharmony_ci (KIND) <= (WRITER)->kind \ 7747db96d56Sopenharmony_ci ? 0 \ 7757db96d56Sopenharmony_ci : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) 7767db96d56Sopenharmony_ci 7777db96d56Sopenharmony_ci/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() 7787db96d56Sopenharmony_ci macro instead. */ 7797db96d56Sopenharmony_ciPyAPI_FUNC(int) 7807db96d56Sopenharmony_ci_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, 7817db96d56Sopenharmony_ci enum PyUnicode_Kind kind); 7827db96d56Sopenharmony_ci 7837db96d56Sopenharmony_ci/* Append a Unicode character. 7847db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 7857db96d56Sopenharmony_ciPyAPI_FUNC(int) 7867db96d56Sopenharmony_ci_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, 7877db96d56Sopenharmony_ci Py_UCS4 ch 7887db96d56Sopenharmony_ci ); 7897db96d56Sopenharmony_ci 7907db96d56Sopenharmony_ci/* Append a Unicode string. 7917db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 7927db96d56Sopenharmony_ciPyAPI_FUNC(int) 7937db96d56Sopenharmony_ci_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, 7947db96d56Sopenharmony_ci PyObject *str /* Unicode string */ 7957db96d56Sopenharmony_ci ); 7967db96d56Sopenharmony_ci 7977db96d56Sopenharmony_ci/* Append a substring of a Unicode string. 7987db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 7997db96d56Sopenharmony_ciPyAPI_FUNC(int) 8007db96d56Sopenharmony_ci_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, 8017db96d56Sopenharmony_ci PyObject *str, /* Unicode string */ 8027db96d56Sopenharmony_ci Py_ssize_t start, 8037db96d56Sopenharmony_ci Py_ssize_t end 8047db96d56Sopenharmony_ci ); 8057db96d56Sopenharmony_ci 8067db96d56Sopenharmony_ci/* Append an ASCII-encoded byte string. 8077db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 8087db96d56Sopenharmony_ciPyAPI_FUNC(int) 8097db96d56Sopenharmony_ci_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, 8107db96d56Sopenharmony_ci const char *str, /* ASCII-encoded byte string */ 8117db96d56Sopenharmony_ci Py_ssize_t len /* number of bytes, or -1 if unknown */ 8127db96d56Sopenharmony_ci ); 8137db96d56Sopenharmony_ci 8147db96d56Sopenharmony_ci/* Append a latin1-encoded byte string. 8157db96d56Sopenharmony_ci Return 0 on success, raise an exception and return -1 on error. */ 8167db96d56Sopenharmony_ciPyAPI_FUNC(int) 8177db96d56Sopenharmony_ci_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, 8187db96d56Sopenharmony_ci const char *str, /* latin1-encoded byte string */ 8197db96d56Sopenharmony_ci Py_ssize_t len /* length in bytes */ 8207db96d56Sopenharmony_ci ); 8217db96d56Sopenharmony_ci 8227db96d56Sopenharmony_ci/* Get the value of the writer as a Unicode string. Clear the 8237db96d56Sopenharmony_ci buffer of the writer. Raise an exception and return NULL 8247db96d56Sopenharmony_ci on error. */ 8257db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *) 8267db96d56Sopenharmony_ci_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); 8277db96d56Sopenharmony_ci 8287db96d56Sopenharmony_ci/* Deallocate memory of a writer (clear its internal buffer). */ 8297db96d56Sopenharmony_ciPyAPI_FUNC(void) 8307db96d56Sopenharmony_ci_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); 8317db96d56Sopenharmony_ci 8327db96d56Sopenharmony_ci 8337db96d56Sopenharmony_ci/* Format the object based on the format_spec, as defined in PEP 3101 8347db96d56Sopenharmony_ci (Advanced String Formatting). */ 8357db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( 8367db96d56Sopenharmony_ci _PyUnicodeWriter *writer, 8377db96d56Sopenharmony_ci PyObject *obj, 8387db96d56Sopenharmony_ci PyObject *format_spec, 8397db96d56Sopenharmony_ci Py_ssize_t start, 8407db96d56Sopenharmony_ci Py_ssize_t end); 8417db96d56Sopenharmony_ci 8427db96d56Sopenharmony_ci/* --- Manage the default encoding ---------------------------------------- */ 8437db96d56Sopenharmony_ci 8447db96d56Sopenharmony_ci/* Returns a pointer to the default encoding (UTF-8) of the 8457db96d56Sopenharmony_ci Unicode object unicode. 8467db96d56Sopenharmony_ci 8477db96d56Sopenharmony_ci Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation 8487db96d56Sopenharmony_ci in the unicodeobject. 8497db96d56Sopenharmony_ci 8507db96d56Sopenharmony_ci _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to 8517db96d56Sopenharmony_ci support the previous internal function with the same behaviour. 8527db96d56Sopenharmony_ci 8537db96d56Sopenharmony_ci Use of this API is DEPRECATED since no size information can be 8547db96d56Sopenharmony_ci extracted from the returned data. 8557db96d56Sopenharmony_ci*/ 8567db96d56Sopenharmony_ci 8577db96d56Sopenharmony_ciPyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); 8587db96d56Sopenharmony_ci 8597db96d56Sopenharmony_ci#define _PyUnicode_AsString PyUnicode_AsUTF8 8607db96d56Sopenharmony_ci 8617db96d56Sopenharmony_ci/* --- UTF-7 Codecs ------------------------------------------------------- */ 8627db96d56Sopenharmony_ci 8637db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( 8647db96d56Sopenharmony_ci PyObject *unicode, /* Unicode object */ 8657db96d56Sopenharmony_ci int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 8667db96d56Sopenharmony_ci int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 8677db96d56Sopenharmony_ci const char *errors /* error handling */ 8687db96d56Sopenharmony_ci ); 8697db96d56Sopenharmony_ci 8707db96d56Sopenharmony_ci/* --- UTF-8 Codecs ------------------------------------------------------- */ 8717db96d56Sopenharmony_ci 8727db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( 8737db96d56Sopenharmony_ci PyObject *unicode, 8747db96d56Sopenharmony_ci const char *errors); 8757db96d56Sopenharmony_ci 8767db96d56Sopenharmony_ci/* --- UTF-32 Codecs ------------------------------------------------------ */ 8777db96d56Sopenharmony_ci 8787db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( 8797db96d56Sopenharmony_ci PyObject *object, /* Unicode object */ 8807db96d56Sopenharmony_ci const char *errors, /* error handling */ 8817db96d56Sopenharmony_ci int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 8827db96d56Sopenharmony_ci ); 8837db96d56Sopenharmony_ci 8847db96d56Sopenharmony_ci/* --- UTF-16 Codecs ------------------------------------------------------ */ 8857db96d56Sopenharmony_ci 8867db96d56Sopenharmony_ci/* Returns a Python string object holding the UTF-16 encoded value of 8877db96d56Sopenharmony_ci the Unicode data. 8887db96d56Sopenharmony_ci 8897db96d56Sopenharmony_ci If byteorder is not 0, output is written according to the following 8907db96d56Sopenharmony_ci byte order: 8917db96d56Sopenharmony_ci 8927db96d56Sopenharmony_ci byteorder == -1: little endian 8937db96d56Sopenharmony_ci byteorder == 0: native byte order (writes a BOM mark) 8947db96d56Sopenharmony_ci byteorder == 1: big endian 8957db96d56Sopenharmony_ci 8967db96d56Sopenharmony_ci If byteorder is 0, the output string will always start with the 8977db96d56Sopenharmony_ci Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 8987db96d56Sopenharmony_ci prepended. 8997db96d56Sopenharmony_ci*/ 9007db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( 9017db96d56Sopenharmony_ci PyObject* unicode, /* Unicode object */ 9027db96d56Sopenharmony_ci const char *errors, /* error handling */ 9037db96d56Sopenharmony_ci int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 9047db96d56Sopenharmony_ci ); 9057db96d56Sopenharmony_ci 9067db96d56Sopenharmony_ci/* --- Unicode-Escape Codecs ---------------------------------------------- */ 9077db96d56Sopenharmony_ci 9087db96d56Sopenharmony_ci/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */ 9097db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful( 9107db96d56Sopenharmony_ci const char *string, /* Unicode-Escape encoded string */ 9117db96d56Sopenharmony_ci Py_ssize_t length, /* size of string */ 9127db96d56Sopenharmony_ci const char *errors, /* error handling */ 9137db96d56Sopenharmony_ci Py_ssize_t *consumed /* bytes consumed */ 9147db96d56Sopenharmony_ci); 9157db96d56Sopenharmony_ci/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape 9167db96d56Sopenharmony_ci chars. */ 9177db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( 9187db96d56Sopenharmony_ci const char *string, /* Unicode-Escape encoded string */ 9197db96d56Sopenharmony_ci Py_ssize_t length, /* size of string */ 9207db96d56Sopenharmony_ci const char *errors, /* error handling */ 9217db96d56Sopenharmony_ci Py_ssize_t *consumed, /* bytes consumed */ 9227db96d56Sopenharmony_ci const char **first_invalid_escape /* on return, points to first 9237db96d56Sopenharmony_ci invalid escaped char in 9247db96d56Sopenharmony_ci string. */ 9257db96d56Sopenharmony_ci); 9267db96d56Sopenharmony_ci 9277db96d56Sopenharmony_ci/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ 9287db96d56Sopenharmony_ci 9297db96d56Sopenharmony_ci/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */ 9307db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful( 9317db96d56Sopenharmony_ci const char *string, /* Unicode-Escape encoded string */ 9327db96d56Sopenharmony_ci Py_ssize_t length, /* size of string */ 9337db96d56Sopenharmony_ci const char *errors, /* error handling */ 9347db96d56Sopenharmony_ci Py_ssize_t *consumed /* bytes consumed */ 9357db96d56Sopenharmony_ci); 9367db96d56Sopenharmony_ci 9377db96d56Sopenharmony_ci/* --- Latin-1 Codecs ----------------------------------------------------- */ 9387db96d56Sopenharmony_ci 9397db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( 9407db96d56Sopenharmony_ci PyObject* unicode, 9417db96d56Sopenharmony_ci const char* errors); 9427db96d56Sopenharmony_ci 9437db96d56Sopenharmony_ci/* --- ASCII Codecs ------------------------------------------------------- */ 9447db96d56Sopenharmony_ci 9457db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( 9467db96d56Sopenharmony_ci PyObject* unicode, 9477db96d56Sopenharmony_ci const char* errors); 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_ci/* --- Character Map Codecs ----------------------------------------------- */ 9507db96d56Sopenharmony_ci 9517db96d56Sopenharmony_ci/* Translate an Unicode object by applying a character mapping table to 9527db96d56Sopenharmony_ci it and return the resulting Unicode object. 9537db96d56Sopenharmony_ci 9547db96d56Sopenharmony_ci The mapping table must map Unicode ordinal integers to Unicode strings, 9557db96d56Sopenharmony_ci Unicode ordinal integers or None (causing deletion of the character). 9567db96d56Sopenharmony_ci 9577db96d56Sopenharmony_ci Mapping tables may be dictionaries or sequences. Unmapped character 9587db96d56Sopenharmony_ci ordinals (ones which cause a LookupError) are left untouched and 9597db96d56Sopenharmony_ci are copied as-is. 9607db96d56Sopenharmony_ci*/ 9617db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( 9627db96d56Sopenharmony_ci PyObject *unicode, /* Unicode object */ 9637db96d56Sopenharmony_ci PyObject *mapping, /* encoding mapping */ 9647db96d56Sopenharmony_ci const char *errors /* error handling */ 9657db96d56Sopenharmony_ci ); 9667db96d56Sopenharmony_ci 9677db96d56Sopenharmony_ci/* --- Decimal Encoder ---------------------------------------------------- */ 9687db96d56Sopenharmony_ci 9697db96d56Sopenharmony_ci/* Coverts a Unicode object holding a decimal value to an ASCII string 9707db96d56Sopenharmony_ci for using in int, float and complex parsers. 9717db96d56Sopenharmony_ci Transforms code points that have decimal digit property to the 9727db96d56Sopenharmony_ci corresponding ASCII digit code points. Transforms spaces to ASCII. 9737db96d56Sopenharmony_ci Transforms code points starting from the first non-ASCII code point that 9747db96d56Sopenharmony_ci is neither a decimal digit nor a space to the end into '?'. */ 9757db96d56Sopenharmony_ci 9767db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( 9777db96d56Sopenharmony_ci PyObject *unicode /* Unicode object */ 9787db96d56Sopenharmony_ci ); 9797db96d56Sopenharmony_ci 9807db96d56Sopenharmony_ci/* --- Methods & Slots ---------------------------------------------------- */ 9817db96d56Sopenharmony_ci 9827db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( 9837db96d56Sopenharmony_ci PyObject *separator, 9847db96d56Sopenharmony_ci PyObject *const *items, 9857db96d56Sopenharmony_ci Py_ssize_t seqlen 9867db96d56Sopenharmony_ci ); 9877db96d56Sopenharmony_ci 9887db96d56Sopenharmony_ci/* Test whether a unicode is equal to ASCII identifier. Return 1 if true, 9897db96d56Sopenharmony_ci 0 otherwise. The right argument must be ASCII identifier. 9907db96d56Sopenharmony_ci Any error occurs inside will be cleared before return. */ 9917db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( 9927db96d56Sopenharmony_ci PyObject *left, /* Left string */ 9937db96d56Sopenharmony_ci _Py_Identifier *right /* Right identifier */ 9947db96d56Sopenharmony_ci ); 9957db96d56Sopenharmony_ci 9967db96d56Sopenharmony_ci/* Test whether a unicode is equal to ASCII string. Return 1 if true, 9977db96d56Sopenharmony_ci 0 otherwise. The right argument must be ASCII-encoded string. 9987db96d56Sopenharmony_ci Any error occurs inside will be cleared before return. */ 9997db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( 10007db96d56Sopenharmony_ci PyObject *left, 10017db96d56Sopenharmony_ci const char *right /* ASCII-encoded string */ 10027db96d56Sopenharmony_ci ); 10037db96d56Sopenharmony_ci 10047db96d56Sopenharmony_ci/* Externally visible for str.strip(unicode) */ 10057db96d56Sopenharmony_ciPyAPI_FUNC(PyObject *) _PyUnicode_XStrip( 10067db96d56Sopenharmony_ci PyObject *self, 10077db96d56Sopenharmony_ci int striptype, 10087db96d56Sopenharmony_ci PyObject *sepobj 10097db96d56Sopenharmony_ci ); 10107db96d56Sopenharmony_ci 10117db96d56Sopenharmony_ci/* Using explicit passed-in values, insert the thousands grouping 10127db96d56Sopenharmony_ci into the string pointed to by buffer. For the argument descriptions, 10137db96d56Sopenharmony_ci see Objects/stringlib/localeutil.h */ 10147db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( 10157db96d56Sopenharmony_ci _PyUnicodeWriter *writer, 10167db96d56Sopenharmony_ci Py_ssize_t n_buffer, 10177db96d56Sopenharmony_ci PyObject *digits, 10187db96d56Sopenharmony_ci Py_ssize_t d_pos, 10197db96d56Sopenharmony_ci Py_ssize_t n_digits, 10207db96d56Sopenharmony_ci Py_ssize_t min_width, 10217db96d56Sopenharmony_ci const char *grouping, 10227db96d56Sopenharmony_ci PyObject *thousands_sep, 10237db96d56Sopenharmony_ci Py_UCS4 *maxchar); 10247db96d56Sopenharmony_ci 10257db96d56Sopenharmony_ci/* === Characters Type APIs =============================================== */ 10267db96d56Sopenharmony_ci 10277db96d56Sopenharmony_ci/* Helper array used by Py_UNICODE_ISSPACE(). */ 10287db96d56Sopenharmony_ci 10297db96d56Sopenharmony_ciPyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; 10307db96d56Sopenharmony_ci 10317db96d56Sopenharmony_ci/* These should not be used directly. Use the Py_UNICODE_IS* and 10327db96d56Sopenharmony_ci Py_UNICODE_TO* macros instead. 10337db96d56Sopenharmony_ci 10347db96d56Sopenharmony_ci These APIs are implemented in Objects/unicodectype.c. 10357db96d56Sopenharmony_ci 10367db96d56Sopenharmony_ci*/ 10377db96d56Sopenharmony_ci 10387db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsLowercase( 10397db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10407db96d56Sopenharmony_ci ); 10417db96d56Sopenharmony_ci 10427db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsUppercase( 10437db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10447db96d56Sopenharmony_ci ); 10457db96d56Sopenharmony_ci 10467db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsTitlecase( 10477db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10487db96d56Sopenharmony_ci ); 10497db96d56Sopenharmony_ci 10507db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsXidStart( 10517db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10527db96d56Sopenharmony_ci ); 10537db96d56Sopenharmony_ci 10547db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsXidContinue( 10557db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10567db96d56Sopenharmony_ci ); 10577db96d56Sopenharmony_ci 10587db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsWhitespace( 10597db96d56Sopenharmony_ci const Py_UCS4 ch /* Unicode character */ 10607db96d56Sopenharmony_ci ); 10617db96d56Sopenharmony_ci 10627db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsLinebreak( 10637db96d56Sopenharmony_ci const Py_UCS4 ch /* Unicode character */ 10647db96d56Sopenharmony_ci ); 10657db96d56Sopenharmony_ci 10667db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( 10677db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10687db96d56Sopenharmony_ci ); 10697db96d56Sopenharmony_ci 10707db96d56Sopenharmony_ci/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( 10717db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10727db96d56Sopenharmony_ci ); 10737db96d56Sopenharmony_ci 10747db96d56Sopenharmony_ciPy_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( 10757db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 10767db96d56Sopenharmony_ci ); 10777db96d56Sopenharmony_ci 10787db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToLowerFull( 10797db96d56Sopenharmony_ci Py_UCS4 ch, /* Unicode character */ 10807db96d56Sopenharmony_ci Py_UCS4 *res 10817db96d56Sopenharmony_ci ); 10827db96d56Sopenharmony_ci 10837db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToTitleFull( 10847db96d56Sopenharmony_ci Py_UCS4 ch, /* Unicode character */ 10857db96d56Sopenharmony_ci Py_UCS4 *res 10867db96d56Sopenharmony_ci ); 10877db96d56Sopenharmony_ci 10887db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToUpperFull( 10897db96d56Sopenharmony_ci Py_UCS4 ch, /* Unicode character */ 10907db96d56Sopenharmony_ci Py_UCS4 *res 10917db96d56Sopenharmony_ci ); 10927db96d56Sopenharmony_ci 10937db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToFoldedFull( 10947db96d56Sopenharmony_ci Py_UCS4 ch, /* Unicode character */ 10957db96d56Sopenharmony_ci Py_UCS4 *res 10967db96d56Sopenharmony_ci ); 10977db96d56Sopenharmony_ci 10987db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( 10997db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11007db96d56Sopenharmony_ci ); 11017db96d56Sopenharmony_ci 11027db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsCased( 11037db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11047db96d56Sopenharmony_ci ); 11057db96d56Sopenharmony_ci 11067db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( 11077db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11087db96d56Sopenharmony_ci ); 11097db96d56Sopenharmony_ci 11107db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_ToDigit( 11117db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11127db96d56Sopenharmony_ci ); 11137db96d56Sopenharmony_ci 11147db96d56Sopenharmony_ciPyAPI_FUNC(double) _PyUnicode_ToNumeric( 11157db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11167db96d56Sopenharmony_ci ); 11177db96d56Sopenharmony_ci 11187db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( 11197db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11207db96d56Sopenharmony_ci ); 11217db96d56Sopenharmony_ci 11227db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsDigit( 11237db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11247db96d56Sopenharmony_ci ); 11257db96d56Sopenharmony_ci 11267db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsNumeric( 11277db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11287db96d56Sopenharmony_ci ); 11297db96d56Sopenharmony_ci 11307db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsPrintable( 11317db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11327db96d56Sopenharmony_ci ); 11337db96d56Sopenharmony_ci 11347db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_IsAlpha( 11357db96d56Sopenharmony_ci Py_UCS4 ch /* Unicode character */ 11367db96d56Sopenharmony_ci ); 11377db96d56Sopenharmony_ci 11387db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); 11397db96d56Sopenharmony_ci 11407db96d56Sopenharmony_ci/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ 11417db96d56Sopenharmony_ciPyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); 11427db96d56Sopenharmony_ci 11437db96d56Sopenharmony_ci/* Fast equality check when the inputs are known to be exact unicode types 11447db96d56Sopenharmony_ci and where the hash values are equal (i.e. a very probable match) */ 11457db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); 11467db96d56Sopenharmony_ci 11477db96d56Sopenharmony_ci/* Equality check. Returns -1 on failure. */ 11487db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); 11497db96d56Sopenharmony_ci 11507db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); 11517db96d56Sopenharmony_ciPyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); 11527db96d56Sopenharmony_ci 11537db96d56Sopenharmony_ciPyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); 1154