17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci * Secret Labs' Regular Expression Engine 37db96d56Sopenharmony_ci * 47db96d56Sopenharmony_ci * regular expression matching engine 57db96d56Sopenharmony_ci * 67db96d56Sopenharmony_ci * partial history: 77db96d56Sopenharmony_ci * 1999-10-24 fl created (based on existing template matcher code) 87db96d56Sopenharmony_ci * 2000-03-06 fl first alpha, sort of 97db96d56Sopenharmony_ci * 2000-08-01 fl fixes for 1.6b1 107db96d56Sopenharmony_ci * 2000-08-07 fl use PyOS_CheckStack() if available 117db96d56Sopenharmony_ci * 2000-09-20 fl added expand method 127db96d56Sopenharmony_ci * 2001-03-20 fl lots of fixes for 2.1b2 137db96d56Sopenharmony_ci * 2001-04-15 fl export copyright as Python attribute, not global 147db96d56Sopenharmony_ci * 2001-04-28 fl added __copy__ methods (work in progress) 157db96d56Sopenharmony_ci * 2001-05-14 fl fixes for 1.5.2 compatibility 167db96d56Sopenharmony_ci * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) 177db96d56Sopenharmony_ci * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) 187db96d56Sopenharmony_ci * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1 197db96d56Sopenharmony_ci * 2001-10-21 fl added sub/subn primitive 207db96d56Sopenharmony_ci * 2001-10-24 fl added finditer primitive (for 2.2 only) 217db96d56Sopenharmony_ci * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) 227db96d56Sopenharmony_ci * 2002-11-09 fl fixed empty sub/subn return type 237db96d56Sopenharmony_ci * 2003-04-18 mvl fully support 4-byte codes 247db96d56Sopenharmony_ci * 2003-10-17 gn implemented non recursive scheme 257db96d56Sopenharmony_ci * 2013-02-04 mrab added fullmatch primitive 267db96d56Sopenharmony_ci * 277db96d56Sopenharmony_ci * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 287db96d56Sopenharmony_ci * 297db96d56Sopenharmony_ci * This version of the SRE library can be redistributed under CNRI's 307db96d56Sopenharmony_ci * Python 1.6 license. For any other use, please contact Secret Labs 317db96d56Sopenharmony_ci * AB (info@pythonware.com). 327db96d56Sopenharmony_ci * 337db96d56Sopenharmony_ci * Portions of this engine have been developed in cooperation with 347db96d56Sopenharmony_ci * CNRI. Hewlett-Packard provided funding for 1.6 integration and 357db96d56Sopenharmony_ci * other compatibility work. 367db96d56Sopenharmony_ci */ 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_cistatic const char copyright[] = 397db96d56Sopenharmony_ci " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci#define PY_SSIZE_T_CLEAN 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci#include "Python.h" 447db96d56Sopenharmony_ci#include "pycore_long.h" // _PyLong_GetZero() 457db96d56Sopenharmony_ci#include "pycore_moduleobject.h" // _PyModule_GetState() 467db96d56Sopenharmony_ci#include "structmember.h" // PyMemberDef 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci#include "sre.h" 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_ci#define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) 517db96d56Sopenharmony_ci 527db96d56Sopenharmony_ci#include <ctype.h> 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ci/* name of this module, minus the leading underscore */ 557db96d56Sopenharmony_ci#if !defined(SRE_MODULE) 567db96d56Sopenharmony_ci#define SRE_MODULE "sre" 577db96d56Sopenharmony_ci#endif 587db96d56Sopenharmony_ci 597db96d56Sopenharmony_ci#define SRE_PY_MODULE "re" 607db96d56Sopenharmony_ci 617db96d56Sopenharmony_ci/* defining this one enables tracing */ 627db96d56Sopenharmony_ci#undef VERBOSE 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci#if defined(_MSC_VER) 677db96d56Sopenharmony_ci#pragma optimize("agtw", on) /* doesn't seem to make much difference... */ 687db96d56Sopenharmony_ci#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ 697db96d56Sopenharmony_ci/* fastest possible local call under MSVC */ 707db96d56Sopenharmony_ci#define LOCAL(type) static __inline type __fastcall 717db96d56Sopenharmony_ci#else 727db96d56Sopenharmony_ci#define LOCAL(type) static inline type 737db96d56Sopenharmony_ci#endif 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci/* error codes */ 767db96d56Sopenharmony_ci#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ 777db96d56Sopenharmony_ci#define SRE_ERROR_STATE -2 /* illegal state */ 787db96d56Sopenharmony_ci#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ 797db96d56Sopenharmony_ci#define SRE_ERROR_MEMORY -9 /* out of memory */ 807db96d56Sopenharmony_ci#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci#if defined(VERBOSE) 837db96d56Sopenharmony_ci#define TRACE(v) printf v 847db96d56Sopenharmony_ci#else 857db96d56Sopenharmony_ci#define TRACE(v) 867db96d56Sopenharmony_ci#endif 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 897db96d56Sopenharmony_ci/* search engine state */ 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_ci#define SRE_IS_DIGIT(ch)\ 927db96d56Sopenharmony_ci ((ch) <= '9' && Py_ISDIGIT(ch)) 937db96d56Sopenharmony_ci#define SRE_IS_SPACE(ch)\ 947db96d56Sopenharmony_ci ((ch) <= ' ' && Py_ISSPACE(ch)) 957db96d56Sopenharmony_ci#define SRE_IS_LINEBREAK(ch)\ 967db96d56Sopenharmony_ci ((ch) == '\n') 977db96d56Sopenharmony_ci#define SRE_IS_WORD(ch)\ 987db96d56Sopenharmony_ci ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_cistatic unsigned int sre_lower_ascii(unsigned int ch) 1017db96d56Sopenharmony_ci{ 1027db96d56Sopenharmony_ci return ((ch) < 128 ? Py_TOLOWER(ch) : ch); 1037db96d56Sopenharmony_ci} 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci/* locale-specific character predicates */ 1067db96d56Sopenharmony_ci/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids 1077db96d56Sopenharmony_ci * warnings when c's type supports only numbers < N+1 */ 1087db96d56Sopenharmony_ci#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) 1097db96d56Sopenharmony_ci#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_cistatic unsigned int sre_lower_locale(unsigned int ch) 1127db96d56Sopenharmony_ci{ 1137db96d56Sopenharmony_ci return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); 1147db96d56Sopenharmony_ci} 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_cistatic unsigned int sre_upper_locale(unsigned int ch) 1177db96d56Sopenharmony_ci{ 1187db96d56Sopenharmony_ci return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); 1197db96d56Sopenharmony_ci} 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci/* unicode-specific character predicates */ 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) 1247db96d56Sopenharmony_ci#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) 1257db96d56Sopenharmony_ci#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) 1267db96d56Sopenharmony_ci#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) 1277db96d56Sopenharmony_ci#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') 1287db96d56Sopenharmony_ci 1297db96d56Sopenharmony_cistatic unsigned int sre_lower_unicode(unsigned int ch) 1307db96d56Sopenharmony_ci{ 1317db96d56Sopenharmony_ci return (unsigned int) Py_UNICODE_TOLOWER(ch); 1327db96d56Sopenharmony_ci} 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_cistatic unsigned int sre_upper_unicode(unsigned int ch) 1357db96d56Sopenharmony_ci{ 1367db96d56Sopenharmony_ci return (unsigned int) Py_UNICODE_TOUPPER(ch); 1377db96d56Sopenharmony_ci} 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_ciLOCAL(int) 1407db96d56Sopenharmony_cisre_category(SRE_CODE category, unsigned int ch) 1417db96d56Sopenharmony_ci{ 1427db96d56Sopenharmony_ci switch (category) { 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci case SRE_CATEGORY_DIGIT: 1457db96d56Sopenharmony_ci return SRE_IS_DIGIT(ch); 1467db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_DIGIT: 1477db96d56Sopenharmony_ci return !SRE_IS_DIGIT(ch); 1487db96d56Sopenharmony_ci case SRE_CATEGORY_SPACE: 1497db96d56Sopenharmony_ci return SRE_IS_SPACE(ch); 1507db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_SPACE: 1517db96d56Sopenharmony_ci return !SRE_IS_SPACE(ch); 1527db96d56Sopenharmony_ci case SRE_CATEGORY_WORD: 1537db96d56Sopenharmony_ci return SRE_IS_WORD(ch); 1547db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_WORD: 1557db96d56Sopenharmony_ci return !SRE_IS_WORD(ch); 1567db96d56Sopenharmony_ci case SRE_CATEGORY_LINEBREAK: 1577db96d56Sopenharmony_ci return SRE_IS_LINEBREAK(ch); 1587db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_LINEBREAK: 1597db96d56Sopenharmony_ci return !SRE_IS_LINEBREAK(ch); 1607db96d56Sopenharmony_ci 1617db96d56Sopenharmony_ci case SRE_CATEGORY_LOC_WORD: 1627db96d56Sopenharmony_ci return SRE_LOC_IS_WORD(ch); 1637db96d56Sopenharmony_ci case SRE_CATEGORY_LOC_NOT_WORD: 1647db96d56Sopenharmony_ci return !SRE_LOC_IS_WORD(ch); 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_DIGIT: 1677db96d56Sopenharmony_ci return SRE_UNI_IS_DIGIT(ch); 1687db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_DIGIT: 1697db96d56Sopenharmony_ci return !SRE_UNI_IS_DIGIT(ch); 1707db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_SPACE: 1717db96d56Sopenharmony_ci return SRE_UNI_IS_SPACE(ch); 1727db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_SPACE: 1737db96d56Sopenharmony_ci return !SRE_UNI_IS_SPACE(ch); 1747db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_WORD: 1757db96d56Sopenharmony_ci return SRE_UNI_IS_WORD(ch); 1767db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_WORD: 1777db96d56Sopenharmony_ci return !SRE_UNI_IS_WORD(ch); 1787db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_LINEBREAK: 1797db96d56Sopenharmony_ci return SRE_UNI_IS_LINEBREAK(ch); 1807db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_LINEBREAK: 1817db96d56Sopenharmony_ci return !SRE_UNI_IS_LINEBREAK(ch); 1827db96d56Sopenharmony_ci } 1837db96d56Sopenharmony_ci return 0; 1847db96d56Sopenharmony_ci} 1857db96d56Sopenharmony_ci 1867db96d56Sopenharmony_ciLOCAL(int) 1877db96d56Sopenharmony_cichar_loc_ignore(SRE_CODE pattern, SRE_CODE ch) 1887db96d56Sopenharmony_ci{ 1897db96d56Sopenharmony_ci return ch == pattern 1907db96d56Sopenharmony_ci || (SRE_CODE) sre_lower_locale(ch) == pattern 1917db96d56Sopenharmony_ci || (SRE_CODE) sre_upper_locale(ch) == pattern; 1927db96d56Sopenharmony_ci} 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci 1957db96d56Sopenharmony_ci/* helpers */ 1967db96d56Sopenharmony_ci 1977db96d56Sopenharmony_cistatic void 1987db96d56Sopenharmony_cidata_stack_dealloc(SRE_STATE* state) 1997db96d56Sopenharmony_ci{ 2007db96d56Sopenharmony_ci if (state->data_stack) { 2017db96d56Sopenharmony_ci PyMem_Free(state->data_stack); 2027db96d56Sopenharmony_ci state->data_stack = NULL; 2037db96d56Sopenharmony_ci } 2047db96d56Sopenharmony_ci state->data_stack_size = state->data_stack_base = 0; 2057db96d56Sopenharmony_ci} 2067db96d56Sopenharmony_ci 2077db96d56Sopenharmony_cistatic int 2087db96d56Sopenharmony_cidata_stack_grow(SRE_STATE* state, Py_ssize_t size) 2097db96d56Sopenharmony_ci{ 2107db96d56Sopenharmony_ci Py_ssize_t minsize, cursize; 2117db96d56Sopenharmony_ci minsize = state->data_stack_base+size; 2127db96d56Sopenharmony_ci cursize = state->data_stack_size; 2137db96d56Sopenharmony_ci if (cursize < minsize) { 2147db96d56Sopenharmony_ci void* stack; 2157db96d56Sopenharmony_ci cursize = minsize+minsize/4+1024; 2167db96d56Sopenharmony_ci TRACE(("allocate/grow stack %zd\n", cursize)); 2177db96d56Sopenharmony_ci stack = PyMem_Realloc(state->data_stack, cursize); 2187db96d56Sopenharmony_ci if (!stack) { 2197db96d56Sopenharmony_ci data_stack_dealloc(state); 2207db96d56Sopenharmony_ci return SRE_ERROR_MEMORY; 2217db96d56Sopenharmony_ci } 2227db96d56Sopenharmony_ci state->data_stack = (char *)stack; 2237db96d56Sopenharmony_ci state->data_stack_size = cursize; 2247db96d56Sopenharmony_ci } 2257db96d56Sopenharmony_ci return 0; 2267db96d56Sopenharmony_ci} 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_ci/* generate 8-bit version */ 2297db96d56Sopenharmony_ci 2307db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS1 2317db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 1 2327db96d56Sopenharmony_ci#define SRE(F) sre_ucs1_##F 2337db96d56Sopenharmony_ci#include "sre_lib.h" 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci/* generate 16-bit unicode version */ 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS2 2387db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 2 2397db96d56Sopenharmony_ci#define SRE(F) sre_ucs2_##F 2407db96d56Sopenharmony_ci#include "sre_lib.h" 2417db96d56Sopenharmony_ci 2427db96d56Sopenharmony_ci/* generate 32-bit unicode version */ 2437db96d56Sopenharmony_ci 2447db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS4 2457db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 4 2467db96d56Sopenharmony_ci#define SRE(F) sre_ucs4_##F 2477db96d56Sopenharmony_ci#include "sre_lib.h" 2487db96d56Sopenharmony_ci 2497db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 2507db96d56Sopenharmony_ci/* factories and destructors */ 2517db96d56Sopenharmony_ci 2527db96d56Sopenharmony_ci/* module state */ 2537db96d56Sopenharmony_citypedef struct { 2547db96d56Sopenharmony_ci PyTypeObject *Pattern_Type; 2557db96d56Sopenharmony_ci PyTypeObject *Match_Type; 2567db96d56Sopenharmony_ci PyTypeObject *Scanner_Type; 2577db96d56Sopenharmony_ci} _sremodulestate; 2587db96d56Sopenharmony_ci 2597db96d56Sopenharmony_cistatic _sremodulestate * 2607db96d56Sopenharmony_ciget_sre_module_state(PyObject *m) 2617db96d56Sopenharmony_ci{ 2627db96d56Sopenharmony_ci _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m); 2637db96d56Sopenharmony_ci assert(state); 2647db96d56Sopenharmony_ci return state; 2657db96d56Sopenharmony_ci} 2667db96d56Sopenharmony_ci 2677db96d56Sopenharmony_cistatic struct PyModuleDef sremodule; 2687db96d56Sopenharmony_ci#define get_sre_module_state_by_class(cls) \ 2697db96d56Sopenharmony_ci (get_sre_module_state(PyType_GetModule(cls))) 2707db96d56Sopenharmony_ci 2717db96d56Sopenharmony_ci/* see sre.h for object declarations */ 2727db96d56Sopenharmony_cistatic PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); 2737db96d56Sopenharmony_cistatic PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); 2747db96d56Sopenharmony_ci 2757db96d56Sopenharmony_ci/*[clinic input] 2767db96d56Sopenharmony_cimodule _sre 2777db96d56Sopenharmony_ciclass _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" 2787db96d56Sopenharmony_ciclass _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type" 2797db96d56Sopenharmony_ciclass _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type" 2807db96d56Sopenharmony_ci[clinic start generated code]*/ 2817db96d56Sopenharmony_ci/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/ 2827db96d56Sopenharmony_ci 2837db96d56Sopenharmony_ci/*[clinic input] 2847db96d56Sopenharmony_ci_sre.getcodesize -> int 2857db96d56Sopenharmony_ci[clinic start generated code]*/ 2867db96d56Sopenharmony_ci 2877db96d56Sopenharmony_cistatic int 2887db96d56Sopenharmony_ci_sre_getcodesize_impl(PyObject *module) 2897db96d56Sopenharmony_ci/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ 2907db96d56Sopenharmony_ci{ 2917db96d56Sopenharmony_ci return sizeof(SRE_CODE); 2927db96d56Sopenharmony_ci} 2937db96d56Sopenharmony_ci 2947db96d56Sopenharmony_ci/*[clinic input] 2957db96d56Sopenharmony_ci_sre.ascii_iscased -> bool 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci character: int 2987db96d56Sopenharmony_ci / 2997db96d56Sopenharmony_ci 3007db96d56Sopenharmony_ci[clinic start generated code]*/ 3017db96d56Sopenharmony_ci 3027db96d56Sopenharmony_cistatic int 3037db96d56Sopenharmony_ci_sre_ascii_iscased_impl(PyObject *module, int character) 3047db96d56Sopenharmony_ci/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ 3057db96d56Sopenharmony_ci{ 3067db96d56Sopenharmony_ci unsigned int ch = (unsigned int)character; 3077db96d56Sopenharmony_ci return ch < 128 && Py_ISALPHA(ch); 3087db96d56Sopenharmony_ci} 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci/*[clinic input] 3117db96d56Sopenharmony_ci_sre.unicode_iscased -> bool 3127db96d56Sopenharmony_ci 3137db96d56Sopenharmony_ci character: int 3147db96d56Sopenharmony_ci / 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_ci[clinic start generated code]*/ 3177db96d56Sopenharmony_ci 3187db96d56Sopenharmony_cistatic int 3197db96d56Sopenharmony_ci_sre_unicode_iscased_impl(PyObject *module, int character) 3207db96d56Sopenharmony_ci/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ 3217db96d56Sopenharmony_ci{ 3227db96d56Sopenharmony_ci unsigned int ch = (unsigned int)character; 3237db96d56Sopenharmony_ci return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); 3247db96d56Sopenharmony_ci} 3257db96d56Sopenharmony_ci 3267db96d56Sopenharmony_ci/*[clinic input] 3277db96d56Sopenharmony_ci_sre.ascii_tolower -> int 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_ci character: int 3307db96d56Sopenharmony_ci / 3317db96d56Sopenharmony_ci 3327db96d56Sopenharmony_ci[clinic start generated code]*/ 3337db96d56Sopenharmony_ci 3347db96d56Sopenharmony_cistatic int 3357db96d56Sopenharmony_ci_sre_ascii_tolower_impl(PyObject *module, int character) 3367db96d56Sopenharmony_ci/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ 3377db96d56Sopenharmony_ci{ 3387db96d56Sopenharmony_ci return sre_lower_ascii(character); 3397db96d56Sopenharmony_ci} 3407db96d56Sopenharmony_ci 3417db96d56Sopenharmony_ci/*[clinic input] 3427db96d56Sopenharmony_ci_sre.unicode_tolower -> int 3437db96d56Sopenharmony_ci 3447db96d56Sopenharmony_ci character: int 3457db96d56Sopenharmony_ci / 3467db96d56Sopenharmony_ci 3477db96d56Sopenharmony_ci[clinic start generated code]*/ 3487db96d56Sopenharmony_ci 3497db96d56Sopenharmony_cistatic int 3507db96d56Sopenharmony_ci_sre_unicode_tolower_impl(PyObject *module, int character) 3517db96d56Sopenharmony_ci/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ 3527db96d56Sopenharmony_ci{ 3537db96d56Sopenharmony_ci return sre_lower_unicode(character); 3547db96d56Sopenharmony_ci} 3557db96d56Sopenharmony_ci 3567db96d56Sopenharmony_ciLOCAL(void) 3577db96d56Sopenharmony_cistate_reset(SRE_STATE* state) 3587db96d56Sopenharmony_ci{ 3597db96d56Sopenharmony_ci /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ 3607db96d56Sopenharmony_ci /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ 3617db96d56Sopenharmony_ci 3627db96d56Sopenharmony_ci state->lastmark = -1; 3637db96d56Sopenharmony_ci state->lastindex = -1; 3647db96d56Sopenharmony_ci 3657db96d56Sopenharmony_ci state->repeat = NULL; 3667db96d56Sopenharmony_ci 3677db96d56Sopenharmony_ci data_stack_dealloc(state); 3687db96d56Sopenharmony_ci} 3697db96d56Sopenharmony_ci 3707db96d56Sopenharmony_cistatic const void* 3717db96d56Sopenharmony_cigetstring(PyObject* string, Py_ssize_t* p_length, 3727db96d56Sopenharmony_ci int* p_isbytes, int* p_charsize, 3737db96d56Sopenharmony_ci Py_buffer *view) 3747db96d56Sopenharmony_ci{ 3757db96d56Sopenharmony_ci /* given a python object, return a data pointer, a length (in 3767db96d56Sopenharmony_ci characters), and a character size. return NULL if the object 3777db96d56Sopenharmony_ci is not a string (or not compatible) */ 3787db96d56Sopenharmony_ci 3797db96d56Sopenharmony_ci /* Unicode objects do not support the buffer API. So, get the data 3807db96d56Sopenharmony_ci directly instead. */ 3817db96d56Sopenharmony_ci if (PyUnicode_Check(string)) { 3827db96d56Sopenharmony_ci if (PyUnicode_READY(string) == -1) 3837db96d56Sopenharmony_ci return NULL; 3847db96d56Sopenharmony_ci *p_length = PyUnicode_GET_LENGTH(string); 3857db96d56Sopenharmony_ci *p_charsize = PyUnicode_KIND(string); 3867db96d56Sopenharmony_ci *p_isbytes = 0; 3877db96d56Sopenharmony_ci return PyUnicode_DATA(string); 3887db96d56Sopenharmony_ci } 3897db96d56Sopenharmony_ci 3907db96d56Sopenharmony_ci /* get pointer to byte string buffer */ 3917db96d56Sopenharmony_ci if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { 3927db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, "expected string or bytes-like " 3937db96d56Sopenharmony_ci "object, got '%.200s'", Py_TYPE(string)->tp_name); 3947db96d56Sopenharmony_ci return NULL; 3957db96d56Sopenharmony_ci } 3967db96d56Sopenharmony_ci 3977db96d56Sopenharmony_ci *p_length = view->len; 3987db96d56Sopenharmony_ci *p_charsize = 1; 3997db96d56Sopenharmony_ci *p_isbytes = 1; 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci if (view->buf == NULL) { 4027db96d56Sopenharmony_ci PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); 4037db96d56Sopenharmony_ci PyBuffer_Release(view); 4047db96d56Sopenharmony_ci view->buf = NULL; 4057db96d56Sopenharmony_ci return NULL; 4067db96d56Sopenharmony_ci } 4077db96d56Sopenharmony_ci return view->buf; 4087db96d56Sopenharmony_ci} 4097db96d56Sopenharmony_ci 4107db96d56Sopenharmony_ciLOCAL(PyObject*) 4117db96d56Sopenharmony_cistate_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, 4127db96d56Sopenharmony_ci Py_ssize_t start, Py_ssize_t end) 4137db96d56Sopenharmony_ci{ 4147db96d56Sopenharmony_ci /* prepare state object */ 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci Py_ssize_t length; 4177db96d56Sopenharmony_ci int isbytes, charsize; 4187db96d56Sopenharmony_ci const void* ptr; 4197db96d56Sopenharmony_ci 4207db96d56Sopenharmony_ci memset(state, 0, sizeof(SRE_STATE)); 4217db96d56Sopenharmony_ci 4227db96d56Sopenharmony_ci state->mark = PyMem_New(const void *, pattern->groups * 2); 4237db96d56Sopenharmony_ci if (!state->mark) { 4247db96d56Sopenharmony_ci PyErr_NoMemory(); 4257db96d56Sopenharmony_ci goto err; 4267db96d56Sopenharmony_ci } 4277db96d56Sopenharmony_ci state->lastmark = -1; 4287db96d56Sopenharmony_ci state->lastindex = -1; 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci state->buffer.buf = NULL; 4317db96d56Sopenharmony_ci ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); 4327db96d56Sopenharmony_ci if (!ptr) 4337db96d56Sopenharmony_ci goto err; 4347db96d56Sopenharmony_ci 4357db96d56Sopenharmony_ci if (isbytes && pattern->isbytes == 0) { 4367db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 4377db96d56Sopenharmony_ci "cannot use a string pattern on a bytes-like object"); 4387db96d56Sopenharmony_ci goto err; 4397db96d56Sopenharmony_ci } 4407db96d56Sopenharmony_ci if (!isbytes && pattern->isbytes > 0) { 4417db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 4427db96d56Sopenharmony_ci "cannot use a bytes pattern on a string-like object"); 4437db96d56Sopenharmony_ci goto err; 4447db96d56Sopenharmony_ci } 4457db96d56Sopenharmony_ci 4467db96d56Sopenharmony_ci /* adjust boundaries */ 4477db96d56Sopenharmony_ci if (start < 0) 4487db96d56Sopenharmony_ci start = 0; 4497db96d56Sopenharmony_ci else if (start > length) 4507db96d56Sopenharmony_ci start = length; 4517db96d56Sopenharmony_ci 4527db96d56Sopenharmony_ci if (end < 0) 4537db96d56Sopenharmony_ci end = 0; 4547db96d56Sopenharmony_ci else if (end > length) 4557db96d56Sopenharmony_ci end = length; 4567db96d56Sopenharmony_ci 4577db96d56Sopenharmony_ci state->isbytes = isbytes; 4587db96d56Sopenharmony_ci state->charsize = charsize; 4597db96d56Sopenharmony_ci state->match_all = 0; 4607db96d56Sopenharmony_ci state->must_advance = 0; 4617db96d56Sopenharmony_ci 4627db96d56Sopenharmony_ci state->beginning = ptr; 4637db96d56Sopenharmony_ci 4647db96d56Sopenharmony_ci state->start = (void*) ((char*) ptr + start * state->charsize); 4657db96d56Sopenharmony_ci state->end = (void*) ((char*) ptr + end * state->charsize); 4667db96d56Sopenharmony_ci 4677db96d56Sopenharmony_ci Py_INCREF(string); 4687db96d56Sopenharmony_ci state->string = string; 4697db96d56Sopenharmony_ci state->pos = start; 4707db96d56Sopenharmony_ci state->endpos = end; 4717db96d56Sopenharmony_ci 4727db96d56Sopenharmony_ci return string; 4737db96d56Sopenharmony_ci err: 4747db96d56Sopenharmony_ci /* We add an explicit cast here because MSVC has a bug when 4757db96d56Sopenharmony_ci compiling C code where it believes that `const void**` cannot be 4767db96d56Sopenharmony_ci safely casted to `void*`, see bpo-39943 for details. */ 4777db96d56Sopenharmony_ci PyMem_Free((void*) state->mark); 4787db96d56Sopenharmony_ci state->mark = NULL; 4797db96d56Sopenharmony_ci if (state->buffer.buf) 4807db96d56Sopenharmony_ci PyBuffer_Release(&state->buffer); 4817db96d56Sopenharmony_ci return NULL; 4827db96d56Sopenharmony_ci} 4837db96d56Sopenharmony_ci 4847db96d56Sopenharmony_ciLOCAL(void) 4857db96d56Sopenharmony_cistate_fini(SRE_STATE* state) 4867db96d56Sopenharmony_ci{ 4877db96d56Sopenharmony_ci if (state->buffer.buf) 4887db96d56Sopenharmony_ci PyBuffer_Release(&state->buffer); 4897db96d56Sopenharmony_ci Py_XDECREF(state->string); 4907db96d56Sopenharmony_ci data_stack_dealloc(state); 4917db96d56Sopenharmony_ci /* See above PyMem_Del for why we explicitly cast here. */ 4927db96d56Sopenharmony_ci PyMem_Free((void*) state->mark); 4937db96d56Sopenharmony_ci state->mark = NULL; 4947db96d56Sopenharmony_ci} 4957db96d56Sopenharmony_ci 4967db96d56Sopenharmony_ci/* calculate offset from start of string */ 4977db96d56Sopenharmony_ci#define STATE_OFFSET(state, member)\ 4987db96d56Sopenharmony_ci (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) 4997db96d56Sopenharmony_ci 5007db96d56Sopenharmony_ciLOCAL(PyObject*) 5017db96d56Sopenharmony_cigetslice(int isbytes, const void *ptr, 5027db96d56Sopenharmony_ci PyObject* string, Py_ssize_t start, Py_ssize_t end) 5037db96d56Sopenharmony_ci{ 5047db96d56Sopenharmony_ci if (isbytes) { 5057db96d56Sopenharmony_ci if (PyBytes_CheckExact(string) && 5067db96d56Sopenharmony_ci start == 0 && end == PyBytes_GET_SIZE(string)) { 5077db96d56Sopenharmony_ci Py_INCREF(string); 5087db96d56Sopenharmony_ci return string; 5097db96d56Sopenharmony_ci } 5107db96d56Sopenharmony_ci return PyBytes_FromStringAndSize( 5117db96d56Sopenharmony_ci (const char *)ptr + start, end - start); 5127db96d56Sopenharmony_ci } 5137db96d56Sopenharmony_ci else { 5147db96d56Sopenharmony_ci return PyUnicode_Substring(string, start, end); 5157db96d56Sopenharmony_ci } 5167db96d56Sopenharmony_ci} 5177db96d56Sopenharmony_ci 5187db96d56Sopenharmony_ciLOCAL(PyObject*) 5197db96d56Sopenharmony_cistate_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) 5207db96d56Sopenharmony_ci{ 5217db96d56Sopenharmony_ci Py_ssize_t i, j; 5227db96d56Sopenharmony_ci 5237db96d56Sopenharmony_ci index = (index - 1) * 2; 5247db96d56Sopenharmony_ci 5257db96d56Sopenharmony_ci if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { 5267db96d56Sopenharmony_ci if (empty) 5277db96d56Sopenharmony_ci /* want empty string */ 5287db96d56Sopenharmony_ci i = j = 0; 5297db96d56Sopenharmony_ci else { 5307db96d56Sopenharmony_ci Py_RETURN_NONE; 5317db96d56Sopenharmony_ci } 5327db96d56Sopenharmony_ci } else { 5337db96d56Sopenharmony_ci i = STATE_OFFSET(state, state->mark[index]); 5347db96d56Sopenharmony_ci j = STATE_OFFSET(state, state->mark[index+1]); 5357db96d56Sopenharmony_ci 5367db96d56Sopenharmony_ci /* check wrong span */ 5377db96d56Sopenharmony_ci if (i > j) { 5387db96d56Sopenharmony_ci PyErr_SetString(PyExc_SystemError, 5397db96d56Sopenharmony_ci "The span of capturing group is wrong," 5407db96d56Sopenharmony_ci " please report a bug for the re module."); 5417db96d56Sopenharmony_ci return NULL; 5427db96d56Sopenharmony_ci } 5437db96d56Sopenharmony_ci } 5447db96d56Sopenharmony_ci 5457db96d56Sopenharmony_ci return getslice(state->isbytes, state->beginning, string, i, j); 5467db96d56Sopenharmony_ci} 5477db96d56Sopenharmony_ci 5487db96d56Sopenharmony_cistatic void 5497db96d56Sopenharmony_cipattern_error(Py_ssize_t status) 5507db96d56Sopenharmony_ci{ 5517db96d56Sopenharmony_ci switch (status) { 5527db96d56Sopenharmony_ci case SRE_ERROR_RECURSION_LIMIT: 5537db96d56Sopenharmony_ci /* This error code seems to be unused. */ 5547db96d56Sopenharmony_ci PyErr_SetString( 5557db96d56Sopenharmony_ci PyExc_RecursionError, 5567db96d56Sopenharmony_ci "maximum recursion limit exceeded" 5577db96d56Sopenharmony_ci ); 5587db96d56Sopenharmony_ci break; 5597db96d56Sopenharmony_ci case SRE_ERROR_MEMORY: 5607db96d56Sopenharmony_ci PyErr_NoMemory(); 5617db96d56Sopenharmony_ci break; 5627db96d56Sopenharmony_ci case SRE_ERROR_INTERRUPTED: 5637db96d56Sopenharmony_ci /* An exception has already been raised, so let it fly */ 5647db96d56Sopenharmony_ci break; 5657db96d56Sopenharmony_ci default: 5667db96d56Sopenharmony_ci /* other error codes indicate compiler/engine bugs */ 5677db96d56Sopenharmony_ci PyErr_SetString( 5687db96d56Sopenharmony_ci PyExc_RuntimeError, 5697db96d56Sopenharmony_ci "internal error in regular expression engine" 5707db96d56Sopenharmony_ci ); 5717db96d56Sopenharmony_ci } 5727db96d56Sopenharmony_ci} 5737db96d56Sopenharmony_ci 5747db96d56Sopenharmony_cistatic int 5757db96d56Sopenharmony_cipattern_traverse(PatternObject *self, visitproc visit, void *arg) 5767db96d56Sopenharmony_ci{ 5777db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 5787db96d56Sopenharmony_ci Py_VISIT(self->groupindex); 5797db96d56Sopenharmony_ci Py_VISIT(self->indexgroup); 5807db96d56Sopenharmony_ci Py_VISIT(self->pattern); 5817db96d56Sopenharmony_ci return 0; 5827db96d56Sopenharmony_ci} 5837db96d56Sopenharmony_ci 5847db96d56Sopenharmony_cistatic int 5857db96d56Sopenharmony_cipattern_clear(PatternObject *self) 5867db96d56Sopenharmony_ci{ 5877db96d56Sopenharmony_ci Py_CLEAR(self->groupindex); 5887db96d56Sopenharmony_ci Py_CLEAR(self->indexgroup); 5897db96d56Sopenharmony_ci Py_CLEAR(self->pattern); 5907db96d56Sopenharmony_ci return 0; 5917db96d56Sopenharmony_ci} 5927db96d56Sopenharmony_ci 5937db96d56Sopenharmony_cistatic void 5947db96d56Sopenharmony_cipattern_dealloc(PatternObject* self) 5957db96d56Sopenharmony_ci{ 5967db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 5977db96d56Sopenharmony_ci 5987db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 5997db96d56Sopenharmony_ci if (self->weakreflist != NULL) { 6007db96d56Sopenharmony_ci PyObject_ClearWeakRefs((PyObject *) self); 6017db96d56Sopenharmony_ci } 6027db96d56Sopenharmony_ci (void)pattern_clear(self); 6037db96d56Sopenharmony_ci tp->tp_free(self); 6047db96d56Sopenharmony_ci Py_DECREF(tp); 6057db96d56Sopenharmony_ci} 6067db96d56Sopenharmony_ci 6077db96d56Sopenharmony_ciLOCAL(Py_ssize_t) 6087db96d56Sopenharmony_cisre_match(SRE_STATE* state, SRE_CODE* pattern) 6097db96d56Sopenharmony_ci{ 6107db96d56Sopenharmony_ci if (state->charsize == 1) 6117db96d56Sopenharmony_ci return sre_ucs1_match(state, pattern, 1); 6127db96d56Sopenharmony_ci if (state->charsize == 2) 6137db96d56Sopenharmony_ci return sre_ucs2_match(state, pattern, 1); 6147db96d56Sopenharmony_ci assert(state->charsize == 4); 6157db96d56Sopenharmony_ci return sre_ucs4_match(state, pattern, 1); 6167db96d56Sopenharmony_ci} 6177db96d56Sopenharmony_ci 6187db96d56Sopenharmony_ciLOCAL(Py_ssize_t) 6197db96d56Sopenharmony_cisre_search(SRE_STATE* state, SRE_CODE* pattern) 6207db96d56Sopenharmony_ci{ 6217db96d56Sopenharmony_ci if (state->charsize == 1) 6227db96d56Sopenharmony_ci return sre_ucs1_search(state, pattern); 6237db96d56Sopenharmony_ci if (state->charsize == 2) 6247db96d56Sopenharmony_ci return sre_ucs2_search(state, pattern); 6257db96d56Sopenharmony_ci assert(state->charsize == 4); 6267db96d56Sopenharmony_ci return sre_ucs4_search(state, pattern); 6277db96d56Sopenharmony_ci} 6287db96d56Sopenharmony_ci 6297db96d56Sopenharmony_ci/*[clinic input] 6307db96d56Sopenharmony_ci_sre.SRE_Pattern.match 6317db96d56Sopenharmony_ci 6327db96d56Sopenharmony_ci cls: defining_class 6337db96d56Sopenharmony_ci / 6347db96d56Sopenharmony_ci string: object 6357db96d56Sopenharmony_ci pos: Py_ssize_t = 0 6367db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 6377db96d56Sopenharmony_ci 6387db96d56Sopenharmony_ciMatches zero or more characters at the beginning of the string. 6397db96d56Sopenharmony_ci[clinic start generated code]*/ 6407db96d56Sopenharmony_ci 6417db96d56Sopenharmony_cistatic PyObject * 6427db96d56Sopenharmony_ci_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, 6437db96d56Sopenharmony_ci PyObject *string, Py_ssize_t pos, 6447db96d56Sopenharmony_ci Py_ssize_t endpos) 6457db96d56Sopenharmony_ci/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ 6467db96d56Sopenharmony_ci{ 6477db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 6487db96d56Sopenharmony_ci SRE_STATE state; 6497db96d56Sopenharmony_ci Py_ssize_t status; 6507db96d56Sopenharmony_ci PyObject *match; 6517db96d56Sopenharmony_ci 6527db96d56Sopenharmony_ci if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) 6537db96d56Sopenharmony_ci return NULL; 6547db96d56Sopenharmony_ci 6557db96d56Sopenharmony_ci state.ptr = state.start; 6567db96d56Sopenharmony_ci 6577db96d56Sopenharmony_ci TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); 6587db96d56Sopenharmony_ci 6597db96d56Sopenharmony_ci status = sre_match(&state, PatternObject_GetCode(self)); 6607db96d56Sopenharmony_ci 6617db96d56Sopenharmony_ci TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 6627db96d56Sopenharmony_ci if (PyErr_Occurred()) { 6637db96d56Sopenharmony_ci state_fini(&state); 6647db96d56Sopenharmony_ci return NULL; 6657db96d56Sopenharmony_ci } 6667db96d56Sopenharmony_ci 6677db96d56Sopenharmony_ci match = pattern_new_match(module_state, self, &state, status); 6687db96d56Sopenharmony_ci state_fini(&state); 6697db96d56Sopenharmony_ci return match; 6707db96d56Sopenharmony_ci} 6717db96d56Sopenharmony_ci 6727db96d56Sopenharmony_ci/*[clinic input] 6737db96d56Sopenharmony_ci_sre.SRE_Pattern.fullmatch 6747db96d56Sopenharmony_ci 6757db96d56Sopenharmony_ci cls: defining_class 6767db96d56Sopenharmony_ci / 6777db96d56Sopenharmony_ci string: object 6787db96d56Sopenharmony_ci pos: Py_ssize_t = 0 6797db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 6807db96d56Sopenharmony_ci 6817db96d56Sopenharmony_ciMatches against all of the string. 6827db96d56Sopenharmony_ci[clinic start generated code]*/ 6837db96d56Sopenharmony_ci 6847db96d56Sopenharmony_cistatic PyObject * 6857db96d56Sopenharmony_ci_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, 6867db96d56Sopenharmony_ci PyObject *string, Py_ssize_t pos, 6877db96d56Sopenharmony_ci Py_ssize_t endpos) 6887db96d56Sopenharmony_ci/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/ 6897db96d56Sopenharmony_ci{ 6907db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 6917db96d56Sopenharmony_ci SRE_STATE state; 6927db96d56Sopenharmony_ci Py_ssize_t status; 6937db96d56Sopenharmony_ci PyObject *match; 6947db96d56Sopenharmony_ci 6957db96d56Sopenharmony_ci if (!state_init(&state, self, string, pos, endpos)) 6967db96d56Sopenharmony_ci return NULL; 6977db96d56Sopenharmony_ci 6987db96d56Sopenharmony_ci state.ptr = state.start; 6997db96d56Sopenharmony_ci 7007db96d56Sopenharmony_ci TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); 7017db96d56Sopenharmony_ci 7027db96d56Sopenharmony_ci state.match_all = 1; 7037db96d56Sopenharmony_ci status = sre_match(&state, PatternObject_GetCode(self)); 7047db96d56Sopenharmony_ci 7057db96d56Sopenharmony_ci TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 7067db96d56Sopenharmony_ci if (PyErr_Occurred()) { 7077db96d56Sopenharmony_ci state_fini(&state); 7087db96d56Sopenharmony_ci return NULL; 7097db96d56Sopenharmony_ci } 7107db96d56Sopenharmony_ci 7117db96d56Sopenharmony_ci match = pattern_new_match(module_state, self, &state, status); 7127db96d56Sopenharmony_ci state_fini(&state); 7137db96d56Sopenharmony_ci return match; 7147db96d56Sopenharmony_ci} 7157db96d56Sopenharmony_ci 7167db96d56Sopenharmony_ci/*[clinic input] 7177db96d56Sopenharmony_ci_sre.SRE_Pattern.search 7187db96d56Sopenharmony_ci 7197db96d56Sopenharmony_ci cls: defining_class 7207db96d56Sopenharmony_ci / 7217db96d56Sopenharmony_ci string: object 7227db96d56Sopenharmony_ci pos: Py_ssize_t = 0 7237db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 7247db96d56Sopenharmony_ci 7257db96d56Sopenharmony_ciScan through string looking for a match, and return a corresponding match object instance. 7267db96d56Sopenharmony_ci 7277db96d56Sopenharmony_ciReturn None if no position in the string matches. 7287db96d56Sopenharmony_ci[clinic start generated code]*/ 7297db96d56Sopenharmony_ci 7307db96d56Sopenharmony_cistatic PyObject * 7317db96d56Sopenharmony_ci_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, 7327db96d56Sopenharmony_ci PyObject *string, Py_ssize_t pos, 7337db96d56Sopenharmony_ci Py_ssize_t endpos) 7347db96d56Sopenharmony_ci/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/ 7357db96d56Sopenharmony_ci{ 7367db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 7377db96d56Sopenharmony_ci SRE_STATE state; 7387db96d56Sopenharmony_ci Py_ssize_t status; 7397db96d56Sopenharmony_ci PyObject *match; 7407db96d56Sopenharmony_ci 7417db96d56Sopenharmony_ci if (!state_init(&state, self, string, pos, endpos)) 7427db96d56Sopenharmony_ci return NULL; 7437db96d56Sopenharmony_ci 7447db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); 7457db96d56Sopenharmony_ci 7467db96d56Sopenharmony_ci status = sre_search(&state, PatternObject_GetCode(self)); 7477db96d56Sopenharmony_ci 7487db96d56Sopenharmony_ci TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 7497db96d56Sopenharmony_ci 7507db96d56Sopenharmony_ci if (PyErr_Occurred()) { 7517db96d56Sopenharmony_ci state_fini(&state); 7527db96d56Sopenharmony_ci return NULL; 7537db96d56Sopenharmony_ci } 7547db96d56Sopenharmony_ci 7557db96d56Sopenharmony_ci match = pattern_new_match(module_state, self, &state, status); 7567db96d56Sopenharmony_ci state_fini(&state); 7577db96d56Sopenharmony_ci return match; 7587db96d56Sopenharmony_ci} 7597db96d56Sopenharmony_ci 7607db96d56Sopenharmony_cistatic PyObject* 7617db96d56Sopenharmony_cicall(const char* module, const char* function, PyObject* args) 7627db96d56Sopenharmony_ci{ 7637db96d56Sopenharmony_ci PyObject* name; 7647db96d56Sopenharmony_ci PyObject* mod; 7657db96d56Sopenharmony_ci PyObject* func; 7667db96d56Sopenharmony_ci PyObject* result; 7677db96d56Sopenharmony_ci 7687db96d56Sopenharmony_ci if (!args) 7697db96d56Sopenharmony_ci return NULL; 7707db96d56Sopenharmony_ci name = PyUnicode_FromString(module); 7717db96d56Sopenharmony_ci if (!name) 7727db96d56Sopenharmony_ci return NULL; 7737db96d56Sopenharmony_ci mod = PyImport_Import(name); 7747db96d56Sopenharmony_ci Py_DECREF(name); 7757db96d56Sopenharmony_ci if (!mod) 7767db96d56Sopenharmony_ci return NULL; 7777db96d56Sopenharmony_ci func = PyObject_GetAttrString(mod, function); 7787db96d56Sopenharmony_ci Py_DECREF(mod); 7797db96d56Sopenharmony_ci if (!func) 7807db96d56Sopenharmony_ci return NULL; 7817db96d56Sopenharmony_ci result = PyObject_CallObject(func, args); 7827db96d56Sopenharmony_ci Py_DECREF(func); 7837db96d56Sopenharmony_ci Py_DECREF(args); 7847db96d56Sopenharmony_ci return result; 7857db96d56Sopenharmony_ci} 7867db96d56Sopenharmony_ci 7877db96d56Sopenharmony_ci/*[clinic input] 7887db96d56Sopenharmony_ci_sre.SRE_Pattern.findall 7897db96d56Sopenharmony_ci 7907db96d56Sopenharmony_ci string: object 7917db96d56Sopenharmony_ci pos: Py_ssize_t = 0 7927db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 7937db96d56Sopenharmony_ci 7947db96d56Sopenharmony_ciReturn a list of all non-overlapping matches of pattern in string. 7957db96d56Sopenharmony_ci[clinic start generated code]*/ 7967db96d56Sopenharmony_ci 7977db96d56Sopenharmony_cistatic PyObject * 7987db96d56Sopenharmony_ci_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, 7997db96d56Sopenharmony_ci Py_ssize_t pos, Py_ssize_t endpos) 8007db96d56Sopenharmony_ci/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/ 8017db96d56Sopenharmony_ci{ 8027db96d56Sopenharmony_ci SRE_STATE state; 8037db96d56Sopenharmony_ci PyObject* list; 8047db96d56Sopenharmony_ci Py_ssize_t status; 8057db96d56Sopenharmony_ci Py_ssize_t i, b, e; 8067db96d56Sopenharmony_ci 8077db96d56Sopenharmony_ci if (!state_init(&state, self, string, pos, endpos)) 8087db96d56Sopenharmony_ci return NULL; 8097db96d56Sopenharmony_ci 8107db96d56Sopenharmony_ci list = PyList_New(0); 8117db96d56Sopenharmony_ci if (!list) { 8127db96d56Sopenharmony_ci state_fini(&state); 8137db96d56Sopenharmony_ci return NULL; 8147db96d56Sopenharmony_ci } 8157db96d56Sopenharmony_ci 8167db96d56Sopenharmony_ci while (state.start <= state.end) { 8177db96d56Sopenharmony_ci 8187db96d56Sopenharmony_ci PyObject* item; 8197db96d56Sopenharmony_ci 8207db96d56Sopenharmony_ci state_reset(&state); 8217db96d56Sopenharmony_ci 8227db96d56Sopenharmony_ci state.ptr = state.start; 8237db96d56Sopenharmony_ci 8247db96d56Sopenharmony_ci status = sre_search(&state, PatternObject_GetCode(self)); 8257db96d56Sopenharmony_ci if (PyErr_Occurred()) 8267db96d56Sopenharmony_ci goto error; 8277db96d56Sopenharmony_ci 8287db96d56Sopenharmony_ci if (status <= 0) { 8297db96d56Sopenharmony_ci if (status == 0) 8307db96d56Sopenharmony_ci break; 8317db96d56Sopenharmony_ci pattern_error(status); 8327db96d56Sopenharmony_ci goto error; 8337db96d56Sopenharmony_ci } 8347db96d56Sopenharmony_ci 8357db96d56Sopenharmony_ci /* don't bother to build a match object */ 8367db96d56Sopenharmony_ci switch (self->groups) { 8377db96d56Sopenharmony_ci case 0: 8387db96d56Sopenharmony_ci b = STATE_OFFSET(&state, state.start); 8397db96d56Sopenharmony_ci e = STATE_OFFSET(&state, state.ptr); 8407db96d56Sopenharmony_ci item = getslice(state.isbytes, state.beginning, 8417db96d56Sopenharmony_ci string, b, e); 8427db96d56Sopenharmony_ci if (!item) 8437db96d56Sopenharmony_ci goto error; 8447db96d56Sopenharmony_ci break; 8457db96d56Sopenharmony_ci case 1: 8467db96d56Sopenharmony_ci item = state_getslice(&state, 1, string, 1); 8477db96d56Sopenharmony_ci if (!item) 8487db96d56Sopenharmony_ci goto error; 8497db96d56Sopenharmony_ci break; 8507db96d56Sopenharmony_ci default: 8517db96d56Sopenharmony_ci item = PyTuple_New(self->groups); 8527db96d56Sopenharmony_ci if (!item) 8537db96d56Sopenharmony_ci goto error; 8547db96d56Sopenharmony_ci for (i = 0; i < self->groups; i++) { 8557db96d56Sopenharmony_ci PyObject* o = state_getslice(&state, i+1, string, 1); 8567db96d56Sopenharmony_ci if (!o) { 8577db96d56Sopenharmony_ci Py_DECREF(item); 8587db96d56Sopenharmony_ci goto error; 8597db96d56Sopenharmony_ci } 8607db96d56Sopenharmony_ci PyTuple_SET_ITEM(item, i, o); 8617db96d56Sopenharmony_ci } 8627db96d56Sopenharmony_ci break; 8637db96d56Sopenharmony_ci } 8647db96d56Sopenharmony_ci 8657db96d56Sopenharmony_ci status = PyList_Append(list, item); 8667db96d56Sopenharmony_ci Py_DECREF(item); 8677db96d56Sopenharmony_ci if (status < 0) 8687db96d56Sopenharmony_ci goto error; 8697db96d56Sopenharmony_ci 8707db96d56Sopenharmony_ci state.must_advance = (state.ptr == state.start); 8717db96d56Sopenharmony_ci state.start = state.ptr; 8727db96d56Sopenharmony_ci } 8737db96d56Sopenharmony_ci 8747db96d56Sopenharmony_ci state_fini(&state); 8757db96d56Sopenharmony_ci return list; 8767db96d56Sopenharmony_ci 8777db96d56Sopenharmony_cierror: 8787db96d56Sopenharmony_ci Py_DECREF(list); 8797db96d56Sopenharmony_ci state_fini(&state); 8807db96d56Sopenharmony_ci return NULL; 8817db96d56Sopenharmony_ci 8827db96d56Sopenharmony_ci} 8837db96d56Sopenharmony_ci 8847db96d56Sopenharmony_ci/*[clinic input] 8857db96d56Sopenharmony_ci_sre.SRE_Pattern.finditer 8867db96d56Sopenharmony_ci 8877db96d56Sopenharmony_ci cls: defining_class 8887db96d56Sopenharmony_ci / 8897db96d56Sopenharmony_ci string: object 8907db96d56Sopenharmony_ci pos: Py_ssize_t = 0 8917db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 8927db96d56Sopenharmony_ci 8937db96d56Sopenharmony_ciReturn an iterator over all non-overlapping matches for the RE pattern in string. 8947db96d56Sopenharmony_ci 8957db96d56Sopenharmony_ciFor each match, the iterator returns a match object. 8967db96d56Sopenharmony_ci[clinic start generated code]*/ 8977db96d56Sopenharmony_ci 8987db96d56Sopenharmony_cistatic PyObject * 8997db96d56Sopenharmony_ci_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, 9007db96d56Sopenharmony_ci PyObject *string, Py_ssize_t pos, 9017db96d56Sopenharmony_ci Py_ssize_t endpos) 9027db96d56Sopenharmony_ci/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/ 9037db96d56Sopenharmony_ci{ 9047db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 9057db96d56Sopenharmony_ci PyObject* scanner; 9067db96d56Sopenharmony_ci PyObject* search; 9077db96d56Sopenharmony_ci PyObject* iterator; 9087db96d56Sopenharmony_ci 9097db96d56Sopenharmony_ci scanner = pattern_scanner(module_state, self, string, pos, endpos); 9107db96d56Sopenharmony_ci if (!scanner) 9117db96d56Sopenharmony_ci return NULL; 9127db96d56Sopenharmony_ci 9137db96d56Sopenharmony_ci search = PyObject_GetAttrString(scanner, "search"); 9147db96d56Sopenharmony_ci Py_DECREF(scanner); 9157db96d56Sopenharmony_ci if (!search) 9167db96d56Sopenharmony_ci return NULL; 9177db96d56Sopenharmony_ci 9187db96d56Sopenharmony_ci iterator = PyCallIter_New(search, Py_None); 9197db96d56Sopenharmony_ci Py_DECREF(search); 9207db96d56Sopenharmony_ci 9217db96d56Sopenharmony_ci return iterator; 9227db96d56Sopenharmony_ci} 9237db96d56Sopenharmony_ci 9247db96d56Sopenharmony_ci/*[clinic input] 9257db96d56Sopenharmony_ci_sre.SRE_Pattern.scanner 9267db96d56Sopenharmony_ci 9277db96d56Sopenharmony_ci cls: defining_class 9287db96d56Sopenharmony_ci / 9297db96d56Sopenharmony_ci string: object 9307db96d56Sopenharmony_ci pos: Py_ssize_t = 0 9317db96d56Sopenharmony_ci endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 9327db96d56Sopenharmony_ci 9337db96d56Sopenharmony_ci[clinic start generated code]*/ 9347db96d56Sopenharmony_ci 9357db96d56Sopenharmony_cistatic PyObject * 9367db96d56Sopenharmony_ci_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, 9377db96d56Sopenharmony_ci PyObject *string, Py_ssize_t pos, 9387db96d56Sopenharmony_ci Py_ssize_t endpos) 9397db96d56Sopenharmony_ci/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/ 9407db96d56Sopenharmony_ci{ 9417db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 9427db96d56Sopenharmony_ci 9437db96d56Sopenharmony_ci return pattern_scanner(module_state, self, string, pos, endpos); 9447db96d56Sopenharmony_ci} 9457db96d56Sopenharmony_ci 9467db96d56Sopenharmony_ci/*[clinic input] 9477db96d56Sopenharmony_ci_sre.SRE_Pattern.split 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_ci string: object 9507db96d56Sopenharmony_ci maxsplit: Py_ssize_t = 0 9517db96d56Sopenharmony_ci 9527db96d56Sopenharmony_ciSplit string by the occurrences of pattern. 9537db96d56Sopenharmony_ci[clinic start generated code]*/ 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_cistatic PyObject * 9567db96d56Sopenharmony_ci_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, 9577db96d56Sopenharmony_ci Py_ssize_t maxsplit) 9587db96d56Sopenharmony_ci/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/ 9597db96d56Sopenharmony_ci{ 9607db96d56Sopenharmony_ci SRE_STATE state; 9617db96d56Sopenharmony_ci PyObject* list; 9627db96d56Sopenharmony_ci PyObject* item; 9637db96d56Sopenharmony_ci Py_ssize_t status; 9647db96d56Sopenharmony_ci Py_ssize_t n; 9657db96d56Sopenharmony_ci Py_ssize_t i; 9667db96d56Sopenharmony_ci const void* last; 9677db96d56Sopenharmony_ci 9687db96d56Sopenharmony_ci assert(self->codesize != 0); 9697db96d56Sopenharmony_ci 9707db96d56Sopenharmony_ci if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) 9717db96d56Sopenharmony_ci return NULL; 9727db96d56Sopenharmony_ci 9737db96d56Sopenharmony_ci list = PyList_New(0); 9747db96d56Sopenharmony_ci if (!list) { 9757db96d56Sopenharmony_ci state_fini(&state); 9767db96d56Sopenharmony_ci return NULL; 9777db96d56Sopenharmony_ci } 9787db96d56Sopenharmony_ci 9797db96d56Sopenharmony_ci n = 0; 9807db96d56Sopenharmony_ci last = state.start; 9817db96d56Sopenharmony_ci 9827db96d56Sopenharmony_ci while (!maxsplit || n < maxsplit) { 9837db96d56Sopenharmony_ci 9847db96d56Sopenharmony_ci state_reset(&state); 9857db96d56Sopenharmony_ci 9867db96d56Sopenharmony_ci state.ptr = state.start; 9877db96d56Sopenharmony_ci 9887db96d56Sopenharmony_ci status = sre_search(&state, PatternObject_GetCode(self)); 9897db96d56Sopenharmony_ci if (PyErr_Occurred()) 9907db96d56Sopenharmony_ci goto error; 9917db96d56Sopenharmony_ci 9927db96d56Sopenharmony_ci if (status <= 0) { 9937db96d56Sopenharmony_ci if (status == 0) 9947db96d56Sopenharmony_ci break; 9957db96d56Sopenharmony_ci pattern_error(status); 9967db96d56Sopenharmony_ci goto error; 9977db96d56Sopenharmony_ci } 9987db96d56Sopenharmony_ci 9997db96d56Sopenharmony_ci /* get segment before this match */ 10007db96d56Sopenharmony_ci item = getslice(state.isbytes, state.beginning, 10017db96d56Sopenharmony_ci string, STATE_OFFSET(&state, last), 10027db96d56Sopenharmony_ci STATE_OFFSET(&state, state.start) 10037db96d56Sopenharmony_ci ); 10047db96d56Sopenharmony_ci if (!item) 10057db96d56Sopenharmony_ci goto error; 10067db96d56Sopenharmony_ci status = PyList_Append(list, item); 10077db96d56Sopenharmony_ci Py_DECREF(item); 10087db96d56Sopenharmony_ci if (status < 0) 10097db96d56Sopenharmony_ci goto error; 10107db96d56Sopenharmony_ci 10117db96d56Sopenharmony_ci /* add groups (if any) */ 10127db96d56Sopenharmony_ci for (i = 0; i < self->groups; i++) { 10137db96d56Sopenharmony_ci item = state_getslice(&state, i+1, string, 0); 10147db96d56Sopenharmony_ci if (!item) 10157db96d56Sopenharmony_ci goto error; 10167db96d56Sopenharmony_ci status = PyList_Append(list, item); 10177db96d56Sopenharmony_ci Py_DECREF(item); 10187db96d56Sopenharmony_ci if (status < 0) 10197db96d56Sopenharmony_ci goto error; 10207db96d56Sopenharmony_ci } 10217db96d56Sopenharmony_ci 10227db96d56Sopenharmony_ci n = n + 1; 10237db96d56Sopenharmony_ci state.must_advance = (state.ptr == state.start); 10247db96d56Sopenharmony_ci last = state.start = state.ptr; 10257db96d56Sopenharmony_ci 10267db96d56Sopenharmony_ci } 10277db96d56Sopenharmony_ci 10287db96d56Sopenharmony_ci /* get segment following last match (even if empty) */ 10297db96d56Sopenharmony_ci item = getslice(state.isbytes, state.beginning, 10307db96d56Sopenharmony_ci string, STATE_OFFSET(&state, last), state.endpos 10317db96d56Sopenharmony_ci ); 10327db96d56Sopenharmony_ci if (!item) 10337db96d56Sopenharmony_ci goto error; 10347db96d56Sopenharmony_ci status = PyList_Append(list, item); 10357db96d56Sopenharmony_ci Py_DECREF(item); 10367db96d56Sopenharmony_ci if (status < 0) 10377db96d56Sopenharmony_ci goto error; 10387db96d56Sopenharmony_ci 10397db96d56Sopenharmony_ci state_fini(&state); 10407db96d56Sopenharmony_ci return list; 10417db96d56Sopenharmony_ci 10427db96d56Sopenharmony_cierror: 10437db96d56Sopenharmony_ci Py_DECREF(list); 10447db96d56Sopenharmony_ci state_fini(&state); 10457db96d56Sopenharmony_ci return NULL; 10467db96d56Sopenharmony_ci 10477db96d56Sopenharmony_ci} 10487db96d56Sopenharmony_ci 10497db96d56Sopenharmony_cistatic PyObject* 10507db96d56Sopenharmony_cipattern_subx(_sremodulestate* module_state, 10517db96d56Sopenharmony_ci PatternObject* self, 10527db96d56Sopenharmony_ci PyObject* ptemplate, 10537db96d56Sopenharmony_ci PyObject* string, 10547db96d56Sopenharmony_ci Py_ssize_t count, 10557db96d56Sopenharmony_ci Py_ssize_t subn) 10567db96d56Sopenharmony_ci{ 10577db96d56Sopenharmony_ci SRE_STATE state; 10587db96d56Sopenharmony_ci PyObject* list; 10597db96d56Sopenharmony_ci PyObject* joiner; 10607db96d56Sopenharmony_ci PyObject* item; 10617db96d56Sopenharmony_ci PyObject* filter; 10627db96d56Sopenharmony_ci PyObject* match; 10637db96d56Sopenharmony_ci const void* ptr; 10647db96d56Sopenharmony_ci Py_ssize_t status; 10657db96d56Sopenharmony_ci Py_ssize_t n; 10667db96d56Sopenharmony_ci Py_ssize_t i, b, e; 10677db96d56Sopenharmony_ci int isbytes, charsize; 10687db96d56Sopenharmony_ci int filter_is_callable; 10697db96d56Sopenharmony_ci Py_buffer view; 10707db96d56Sopenharmony_ci 10717db96d56Sopenharmony_ci if (PyCallable_Check(ptemplate)) { 10727db96d56Sopenharmony_ci /* sub/subn takes either a function or a template */ 10737db96d56Sopenharmony_ci filter = ptemplate; 10747db96d56Sopenharmony_ci Py_INCREF(filter); 10757db96d56Sopenharmony_ci filter_is_callable = 1; 10767db96d56Sopenharmony_ci } else { 10777db96d56Sopenharmony_ci /* if not callable, check if it's a literal string */ 10787db96d56Sopenharmony_ci int literal; 10797db96d56Sopenharmony_ci view.buf = NULL; 10807db96d56Sopenharmony_ci ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); 10817db96d56Sopenharmony_ci if (ptr) { 10827db96d56Sopenharmony_ci if (charsize == 1) 10837db96d56Sopenharmony_ci literal = memchr(ptr, '\\', n) == NULL; 10847db96d56Sopenharmony_ci else 10857db96d56Sopenharmony_ci literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; 10867db96d56Sopenharmony_ci } else { 10877db96d56Sopenharmony_ci PyErr_Clear(); 10887db96d56Sopenharmony_ci literal = 0; 10897db96d56Sopenharmony_ci } 10907db96d56Sopenharmony_ci if (view.buf) 10917db96d56Sopenharmony_ci PyBuffer_Release(&view); 10927db96d56Sopenharmony_ci if (literal) { 10937db96d56Sopenharmony_ci filter = ptemplate; 10947db96d56Sopenharmony_ci Py_INCREF(filter); 10957db96d56Sopenharmony_ci filter_is_callable = 0; 10967db96d56Sopenharmony_ci } else { 10977db96d56Sopenharmony_ci /* not a literal; hand it over to the template compiler */ 10987db96d56Sopenharmony_ci filter = call( 10997db96d56Sopenharmony_ci SRE_PY_MODULE, "_subx", 11007db96d56Sopenharmony_ci PyTuple_Pack(2, self, ptemplate) 11017db96d56Sopenharmony_ci ); 11027db96d56Sopenharmony_ci if (!filter) 11037db96d56Sopenharmony_ci return NULL; 11047db96d56Sopenharmony_ci filter_is_callable = PyCallable_Check(filter); 11057db96d56Sopenharmony_ci } 11067db96d56Sopenharmony_ci } 11077db96d56Sopenharmony_ci 11087db96d56Sopenharmony_ci if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { 11097db96d56Sopenharmony_ci Py_DECREF(filter); 11107db96d56Sopenharmony_ci return NULL; 11117db96d56Sopenharmony_ci } 11127db96d56Sopenharmony_ci 11137db96d56Sopenharmony_ci list = PyList_New(0); 11147db96d56Sopenharmony_ci if (!list) { 11157db96d56Sopenharmony_ci Py_DECREF(filter); 11167db96d56Sopenharmony_ci state_fini(&state); 11177db96d56Sopenharmony_ci return NULL; 11187db96d56Sopenharmony_ci } 11197db96d56Sopenharmony_ci 11207db96d56Sopenharmony_ci n = i = 0; 11217db96d56Sopenharmony_ci 11227db96d56Sopenharmony_ci while (!count || n < count) { 11237db96d56Sopenharmony_ci 11247db96d56Sopenharmony_ci state_reset(&state); 11257db96d56Sopenharmony_ci 11267db96d56Sopenharmony_ci state.ptr = state.start; 11277db96d56Sopenharmony_ci 11287db96d56Sopenharmony_ci status = sre_search(&state, PatternObject_GetCode(self)); 11297db96d56Sopenharmony_ci if (PyErr_Occurred()) 11307db96d56Sopenharmony_ci goto error; 11317db96d56Sopenharmony_ci 11327db96d56Sopenharmony_ci if (status <= 0) { 11337db96d56Sopenharmony_ci if (status == 0) 11347db96d56Sopenharmony_ci break; 11357db96d56Sopenharmony_ci pattern_error(status); 11367db96d56Sopenharmony_ci goto error; 11377db96d56Sopenharmony_ci } 11387db96d56Sopenharmony_ci 11397db96d56Sopenharmony_ci b = STATE_OFFSET(&state, state.start); 11407db96d56Sopenharmony_ci e = STATE_OFFSET(&state, state.ptr); 11417db96d56Sopenharmony_ci 11427db96d56Sopenharmony_ci if (i < b) { 11437db96d56Sopenharmony_ci /* get segment before this match */ 11447db96d56Sopenharmony_ci item = getslice(state.isbytes, state.beginning, 11457db96d56Sopenharmony_ci string, i, b); 11467db96d56Sopenharmony_ci if (!item) 11477db96d56Sopenharmony_ci goto error; 11487db96d56Sopenharmony_ci status = PyList_Append(list, item); 11497db96d56Sopenharmony_ci Py_DECREF(item); 11507db96d56Sopenharmony_ci if (status < 0) 11517db96d56Sopenharmony_ci goto error; 11527db96d56Sopenharmony_ci 11537db96d56Sopenharmony_ci } 11547db96d56Sopenharmony_ci 11557db96d56Sopenharmony_ci if (filter_is_callable) { 11567db96d56Sopenharmony_ci /* pass match object through filter */ 11577db96d56Sopenharmony_ci match = pattern_new_match(module_state, self, &state, 1); 11587db96d56Sopenharmony_ci if (!match) 11597db96d56Sopenharmony_ci goto error; 11607db96d56Sopenharmony_ci item = PyObject_CallOneArg(filter, match); 11617db96d56Sopenharmony_ci Py_DECREF(match); 11627db96d56Sopenharmony_ci if (!item) 11637db96d56Sopenharmony_ci goto error; 11647db96d56Sopenharmony_ci } else { 11657db96d56Sopenharmony_ci /* filter is literal string */ 11667db96d56Sopenharmony_ci item = filter; 11677db96d56Sopenharmony_ci Py_INCREF(item); 11687db96d56Sopenharmony_ci } 11697db96d56Sopenharmony_ci 11707db96d56Sopenharmony_ci /* add to list */ 11717db96d56Sopenharmony_ci if (item != Py_None) { 11727db96d56Sopenharmony_ci status = PyList_Append(list, item); 11737db96d56Sopenharmony_ci Py_DECREF(item); 11747db96d56Sopenharmony_ci if (status < 0) 11757db96d56Sopenharmony_ci goto error; 11767db96d56Sopenharmony_ci } 11777db96d56Sopenharmony_ci 11787db96d56Sopenharmony_ci i = e; 11797db96d56Sopenharmony_ci n = n + 1; 11807db96d56Sopenharmony_ci state.must_advance = (state.ptr == state.start); 11817db96d56Sopenharmony_ci state.start = state.ptr; 11827db96d56Sopenharmony_ci } 11837db96d56Sopenharmony_ci 11847db96d56Sopenharmony_ci /* get segment following last match */ 11857db96d56Sopenharmony_ci if (i < state.endpos) { 11867db96d56Sopenharmony_ci item = getslice(state.isbytes, state.beginning, 11877db96d56Sopenharmony_ci string, i, state.endpos); 11887db96d56Sopenharmony_ci if (!item) 11897db96d56Sopenharmony_ci goto error; 11907db96d56Sopenharmony_ci status = PyList_Append(list, item); 11917db96d56Sopenharmony_ci Py_DECREF(item); 11927db96d56Sopenharmony_ci if (status < 0) 11937db96d56Sopenharmony_ci goto error; 11947db96d56Sopenharmony_ci } 11957db96d56Sopenharmony_ci 11967db96d56Sopenharmony_ci state_fini(&state); 11977db96d56Sopenharmony_ci 11987db96d56Sopenharmony_ci Py_DECREF(filter); 11997db96d56Sopenharmony_ci 12007db96d56Sopenharmony_ci /* convert list to single string (also removes list) */ 12017db96d56Sopenharmony_ci joiner = getslice(state.isbytes, state.beginning, string, 0, 0); 12027db96d56Sopenharmony_ci if (!joiner) { 12037db96d56Sopenharmony_ci Py_DECREF(list); 12047db96d56Sopenharmony_ci return NULL; 12057db96d56Sopenharmony_ci } 12067db96d56Sopenharmony_ci if (PyList_GET_SIZE(list) == 0) { 12077db96d56Sopenharmony_ci Py_DECREF(list); 12087db96d56Sopenharmony_ci item = joiner; 12097db96d56Sopenharmony_ci } 12107db96d56Sopenharmony_ci else { 12117db96d56Sopenharmony_ci if (state.isbytes) 12127db96d56Sopenharmony_ci item = _PyBytes_Join(joiner, list); 12137db96d56Sopenharmony_ci else 12147db96d56Sopenharmony_ci item = PyUnicode_Join(joiner, list); 12157db96d56Sopenharmony_ci Py_DECREF(joiner); 12167db96d56Sopenharmony_ci Py_DECREF(list); 12177db96d56Sopenharmony_ci if (!item) 12187db96d56Sopenharmony_ci return NULL; 12197db96d56Sopenharmony_ci } 12207db96d56Sopenharmony_ci 12217db96d56Sopenharmony_ci if (subn) 12227db96d56Sopenharmony_ci return Py_BuildValue("Nn", item, n); 12237db96d56Sopenharmony_ci 12247db96d56Sopenharmony_ci return item; 12257db96d56Sopenharmony_ci 12267db96d56Sopenharmony_cierror: 12277db96d56Sopenharmony_ci Py_DECREF(list); 12287db96d56Sopenharmony_ci state_fini(&state); 12297db96d56Sopenharmony_ci Py_DECREF(filter); 12307db96d56Sopenharmony_ci return NULL; 12317db96d56Sopenharmony_ci 12327db96d56Sopenharmony_ci} 12337db96d56Sopenharmony_ci 12347db96d56Sopenharmony_ci/*[clinic input] 12357db96d56Sopenharmony_ci_sre.SRE_Pattern.sub 12367db96d56Sopenharmony_ci 12377db96d56Sopenharmony_ci cls: defining_class 12387db96d56Sopenharmony_ci / 12397db96d56Sopenharmony_ci repl: object 12407db96d56Sopenharmony_ci string: object 12417db96d56Sopenharmony_ci count: Py_ssize_t = 0 12427db96d56Sopenharmony_ci 12437db96d56Sopenharmony_ciReturn the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. 12447db96d56Sopenharmony_ci[clinic start generated code]*/ 12457db96d56Sopenharmony_ci 12467db96d56Sopenharmony_cistatic PyObject * 12477db96d56Sopenharmony_ci_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, 12487db96d56Sopenharmony_ci PyObject *repl, PyObject *string, Py_ssize_t count) 12497db96d56Sopenharmony_ci/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/ 12507db96d56Sopenharmony_ci{ 12517db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 12527db96d56Sopenharmony_ci 12537db96d56Sopenharmony_ci return pattern_subx(module_state, self, repl, string, count, 0); 12547db96d56Sopenharmony_ci} 12557db96d56Sopenharmony_ci 12567db96d56Sopenharmony_ci/*[clinic input] 12577db96d56Sopenharmony_ci_sre.SRE_Pattern.subn 12587db96d56Sopenharmony_ci 12597db96d56Sopenharmony_ci cls: defining_class 12607db96d56Sopenharmony_ci / 12617db96d56Sopenharmony_ci repl: object 12627db96d56Sopenharmony_ci string: object 12637db96d56Sopenharmony_ci count: Py_ssize_t = 0 12647db96d56Sopenharmony_ci 12657db96d56Sopenharmony_ciReturn the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. 12667db96d56Sopenharmony_ci[clinic start generated code]*/ 12677db96d56Sopenharmony_ci 12687db96d56Sopenharmony_cistatic PyObject * 12697db96d56Sopenharmony_ci_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, 12707db96d56Sopenharmony_ci PyObject *repl, PyObject *string, 12717db96d56Sopenharmony_ci Py_ssize_t count) 12727db96d56Sopenharmony_ci/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/ 12737db96d56Sopenharmony_ci{ 12747db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 12757db96d56Sopenharmony_ci 12767db96d56Sopenharmony_ci return pattern_subx(module_state, self, repl, string, count, 1); 12777db96d56Sopenharmony_ci} 12787db96d56Sopenharmony_ci 12797db96d56Sopenharmony_ci/*[clinic input] 12807db96d56Sopenharmony_ci_sre.SRE_Pattern.__copy__ 12817db96d56Sopenharmony_ci 12827db96d56Sopenharmony_ci[clinic start generated code]*/ 12837db96d56Sopenharmony_ci 12847db96d56Sopenharmony_cistatic PyObject * 12857db96d56Sopenharmony_ci_sre_SRE_Pattern___copy___impl(PatternObject *self) 12867db96d56Sopenharmony_ci/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ 12877db96d56Sopenharmony_ci{ 12887db96d56Sopenharmony_ci Py_INCREF(self); 12897db96d56Sopenharmony_ci return (PyObject *)self; 12907db96d56Sopenharmony_ci} 12917db96d56Sopenharmony_ci 12927db96d56Sopenharmony_ci/*[clinic input] 12937db96d56Sopenharmony_ci_sre.SRE_Pattern.__deepcopy__ 12947db96d56Sopenharmony_ci 12957db96d56Sopenharmony_ci memo: object 12967db96d56Sopenharmony_ci / 12977db96d56Sopenharmony_ci 12987db96d56Sopenharmony_ci[clinic start generated code]*/ 12997db96d56Sopenharmony_ci 13007db96d56Sopenharmony_cistatic PyObject * 13017db96d56Sopenharmony_ci_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) 13027db96d56Sopenharmony_ci/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/ 13037db96d56Sopenharmony_ci{ 13047db96d56Sopenharmony_ci Py_INCREF(self); 13057db96d56Sopenharmony_ci return (PyObject *)self; 13067db96d56Sopenharmony_ci} 13077db96d56Sopenharmony_ci 13087db96d56Sopenharmony_cistatic PyObject * 13097db96d56Sopenharmony_cipattern_repr(PatternObject *obj) 13107db96d56Sopenharmony_ci{ 13117db96d56Sopenharmony_ci static const struct { 13127db96d56Sopenharmony_ci const char *name; 13137db96d56Sopenharmony_ci int value; 13147db96d56Sopenharmony_ci } flag_names[] = { 13157db96d56Sopenharmony_ci {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, 13167db96d56Sopenharmony_ci {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, 13177db96d56Sopenharmony_ci {"re.LOCALE", SRE_FLAG_LOCALE}, 13187db96d56Sopenharmony_ci {"re.MULTILINE", SRE_FLAG_MULTILINE}, 13197db96d56Sopenharmony_ci {"re.DOTALL", SRE_FLAG_DOTALL}, 13207db96d56Sopenharmony_ci {"re.UNICODE", SRE_FLAG_UNICODE}, 13217db96d56Sopenharmony_ci {"re.VERBOSE", SRE_FLAG_VERBOSE}, 13227db96d56Sopenharmony_ci {"re.DEBUG", SRE_FLAG_DEBUG}, 13237db96d56Sopenharmony_ci {"re.ASCII", SRE_FLAG_ASCII}, 13247db96d56Sopenharmony_ci }; 13257db96d56Sopenharmony_ci PyObject *result = NULL; 13267db96d56Sopenharmony_ci PyObject *flag_items; 13277db96d56Sopenharmony_ci size_t i; 13287db96d56Sopenharmony_ci int flags = obj->flags; 13297db96d56Sopenharmony_ci 13307db96d56Sopenharmony_ci /* Omit re.UNICODE for valid string patterns. */ 13317db96d56Sopenharmony_ci if (obj->isbytes == 0 && 13327db96d56Sopenharmony_ci (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == 13337db96d56Sopenharmony_ci SRE_FLAG_UNICODE) 13347db96d56Sopenharmony_ci flags &= ~SRE_FLAG_UNICODE; 13357db96d56Sopenharmony_ci 13367db96d56Sopenharmony_ci flag_items = PyList_New(0); 13377db96d56Sopenharmony_ci if (!flag_items) 13387db96d56Sopenharmony_ci return NULL; 13397db96d56Sopenharmony_ci 13407db96d56Sopenharmony_ci for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { 13417db96d56Sopenharmony_ci if (flags & flag_names[i].value) { 13427db96d56Sopenharmony_ci PyObject *item = PyUnicode_FromString(flag_names[i].name); 13437db96d56Sopenharmony_ci if (!item) 13447db96d56Sopenharmony_ci goto done; 13457db96d56Sopenharmony_ci 13467db96d56Sopenharmony_ci if (PyList_Append(flag_items, item) < 0) { 13477db96d56Sopenharmony_ci Py_DECREF(item); 13487db96d56Sopenharmony_ci goto done; 13497db96d56Sopenharmony_ci } 13507db96d56Sopenharmony_ci Py_DECREF(item); 13517db96d56Sopenharmony_ci flags &= ~flag_names[i].value; 13527db96d56Sopenharmony_ci } 13537db96d56Sopenharmony_ci } 13547db96d56Sopenharmony_ci if (flags) { 13557db96d56Sopenharmony_ci PyObject *item = PyUnicode_FromFormat("0x%x", flags); 13567db96d56Sopenharmony_ci if (!item) 13577db96d56Sopenharmony_ci goto done; 13587db96d56Sopenharmony_ci 13597db96d56Sopenharmony_ci if (PyList_Append(flag_items, item) < 0) { 13607db96d56Sopenharmony_ci Py_DECREF(item); 13617db96d56Sopenharmony_ci goto done; 13627db96d56Sopenharmony_ci } 13637db96d56Sopenharmony_ci Py_DECREF(item); 13647db96d56Sopenharmony_ci } 13657db96d56Sopenharmony_ci 13667db96d56Sopenharmony_ci if (PyList_Size(flag_items) > 0) { 13677db96d56Sopenharmony_ci PyObject *flags_result; 13687db96d56Sopenharmony_ci PyObject *sep = PyUnicode_FromString("|"); 13697db96d56Sopenharmony_ci if (!sep) 13707db96d56Sopenharmony_ci goto done; 13717db96d56Sopenharmony_ci flags_result = PyUnicode_Join(sep, flag_items); 13727db96d56Sopenharmony_ci Py_DECREF(sep); 13737db96d56Sopenharmony_ci if (!flags_result) 13747db96d56Sopenharmony_ci goto done; 13757db96d56Sopenharmony_ci result = PyUnicode_FromFormat("re.compile(%.200R, %S)", 13767db96d56Sopenharmony_ci obj->pattern, flags_result); 13777db96d56Sopenharmony_ci Py_DECREF(flags_result); 13787db96d56Sopenharmony_ci } 13797db96d56Sopenharmony_ci else { 13807db96d56Sopenharmony_ci result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); 13817db96d56Sopenharmony_ci } 13827db96d56Sopenharmony_ci 13837db96d56Sopenharmony_cidone: 13847db96d56Sopenharmony_ci Py_DECREF(flag_items); 13857db96d56Sopenharmony_ci return result; 13867db96d56Sopenharmony_ci} 13877db96d56Sopenharmony_ci 13887db96d56Sopenharmony_ciPyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); 13897db96d56Sopenharmony_ci 13907db96d56Sopenharmony_ci/* PatternObject's 'groupindex' method. */ 13917db96d56Sopenharmony_cistatic PyObject * 13927db96d56Sopenharmony_cipattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) 13937db96d56Sopenharmony_ci{ 13947db96d56Sopenharmony_ci if (self->groupindex == NULL) 13957db96d56Sopenharmony_ci return PyDict_New(); 13967db96d56Sopenharmony_ci return PyDictProxy_New(self->groupindex); 13977db96d56Sopenharmony_ci} 13987db96d56Sopenharmony_ci 13997db96d56Sopenharmony_cistatic int _validate(PatternObject *self); /* Forward */ 14007db96d56Sopenharmony_ci 14017db96d56Sopenharmony_ci/*[clinic input] 14027db96d56Sopenharmony_ci_sre.compile 14037db96d56Sopenharmony_ci 14047db96d56Sopenharmony_ci pattern: object 14057db96d56Sopenharmony_ci flags: int 14067db96d56Sopenharmony_ci code: object(subclass_of='&PyList_Type') 14077db96d56Sopenharmony_ci groups: Py_ssize_t 14087db96d56Sopenharmony_ci groupindex: object(subclass_of='&PyDict_Type') 14097db96d56Sopenharmony_ci indexgroup: object(subclass_of='&PyTuple_Type') 14107db96d56Sopenharmony_ci 14117db96d56Sopenharmony_ci[clinic start generated code]*/ 14127db96d56Sopenharmony_ci 14137db96d56Sopenharmony_cistatic PyObject * 14147db96d56Sopenharmony_ci_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, 14157db96d56Sopenharmony_ci PyObject *code, Py_ssize_t groups, PyObject *groupindex, 14167db96d56Sopenharmony_ci PyObject *indexgroup) 14177db96d56Sopenharmony_ci/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/ 14187db96d56Sopenharmony_ci{ 14197db96d56Sopenharmony_ci /* "compile" pattern descriptor to pattern object */ 14207db96d56Sopenharmony_ci 14217db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state(module); 14227db96d56Sopenharmony_ci PatternObject* self; 14237db96d56Sopenharmony_ci Py_ssize_t i, n; 14247db96d56Sopenharmony_ci 14257db96d56Sopenharmony_ci n = PyList_GET_SIZE(code); 14267db96d56Sopenharmony_ci /* coverity[ampersand_in_size] */ 14277db96d56Sopenharmony_ci self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); 14287db96d56Sopenharmony_ci if (!self) 14297db96d56Sopenharmony_ci return NULL; 14307db96d56Sopenharmony_ci self->weakreflist = NULL; 14317db96d56Sopenharmony_ci self->pattern = NULL; 14327db96d56Sopenharmony_ci self->groupindex = NULL; 14337db96d56Sopenharmony_ci self->indexgroup = NULL; 14347db96d56Sopenharmony_ci 14357db96d56Sopenharmony_ci self->codesize = n; 14367db96d56Sopenharmony_ci 14377db96d56Sopenharmony_ci for (i = 0; i < n; i++) { 14387db96d56Sopenharmony_ci PyObject *o = PyList_GET_ITEM(code, i); 14397db96d56Sopenharmony_ci unsigned long value = PyLong_AsUnsignedLong(o); 14407db96d56Sopenharmony_ci self->code[i] = (SRE_CODE) value; 14417db96d56Sopenharmony_ci if ((unsigned long) self->code[i] != value) { 14427db96d56Sopenharmony_ci PyErr_SetString(PyExc_OverflowError, 14437db96d56Sopenharmony_ci "regular expression code size limit exceeded"); 14447db96d56Sopenharmony_ci break; 14457db96d56Sopenharmony_ci } 14467db96d56Sopenharmony_ci } 14477db96d56Sopenharmony_ci PyObject_GC_Track(self); 14487db96d56Sopenharmony_ci 14497db96d56Sopenharmony_ci if (PyErr_Occurred()) { 14507db96d56Sopenharmony_ci Py_DECREF(self); 14517db96d56Sopenharmony_ci return NULL; 14527db96d56Sopenharmony_ci } 14537db96d56Sopenharmony_ci 14547db96d56Sopenharmony_ci if (pattern == Py_None) { 14557db96d56Sopenharmony_ci self->isbytes = -1; 14567db96d56Sopenharmony_ci } 14577db96d56Sopenharmony_ci else { 14587db96d56Sopenharmony_ci Py_ssize_t p_length; 14597db96d56Sopenharmony_ci int charsize; 14607db96d56Sopenharmony_ci Py_buffer view; 14617db96d56Sopenharmony_ci view.buf = NULL; 14627db96d56Sopenharmony_ci if (!getstring(pattern, &p_length, &self->isbytes, 14637db96d56Sopenharmony_ci &charsize, &view)) { 14647db96d56Sopenharmony_ci Py_DECREF(self); 14657db96d56Sopenharmony_ci return NULL; 14667db96d56Sopenharmony_ci } 14677db96d56Sopenharmony_ci if (view.buf) 14687db96d56Sopenharmony_ci PyBuffer_Release(&view); 14697db96d56Sopenharmony_ci } 14707db96d56Sopenharmony_ci 14717db96d56Sopenharmony_ci Py_INCREF(pattern); 14727db96d56Sopenharmony_ci self->pattern = pattern; 14737db96d56Sopenharmony_ci 14747db96d56Sopenharmony_ci self->flags = flags; 14757db96d56Sopenharmony_ci 14767db96d56Sopenharmony_ci self->groups = groups; 14777db96d56Sopenharmony_ci 14787db96d56Sopenharmony_ci if (PyDict_GET_SIZE(groupindex) > 0) { 14797db96d56Sopenharmony_ci Py_INCREF(groupindex); 14807db96d56Sopenharmony_ci self->groupindex = groupindex; 14817db96d56Sopenharmony_ci if (PyTuple_GET_SIZE(indexgroup) > 0) { 14827db96d56Sopenharmony_ci Py_INCREF(indexgroup); 14837db96d56Sopenharmony_ci self->indexgroup = indexgroup; 14847db96d56Sopenharmony_ci } 14857db96d56Sopenharmony_ci } 14867db96d56Sopenharmony_ci 14877db96d56Sopenharmony_ci if (!_validate(self)) { 14887db96d56Sopenharmony_ci Py_DECREF(self); 14897db96d56Sopenharmony_ci return NULL; 14907db96d56Sopenharmony_ci } 14917db96d56Sopenharmony_ci 14927db96d56Sopenharmony_ci return (PyObject*) self; 14937db96d56Sopenharmony_ci} 14947db96d56Sopenharmony_ci 14957db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 14967db96d56Sopenharmony_ci/* Code validation */ 14977db96d56Sopenharmony_ci 14987db96d56Sopenharmony_ci/* To learn more about this code, have a look at the _compile() function in 14997db96d56Sopenharmony_ci Lib/sre_compile.py. The validation functions below checks the code array 15007db96d56Sopenharmony_ci for conformance with the code patterns generated there. 15017db96d56Sopenharmony_ci 15027db96d56Sopenharmony_ci The nice thing about the generated code is that it is position-independent: 15037db96d56Sopenharmony_ci all jumps are relative jumps forward. Also, jumps don't cross each other: 15047db96d56Sopenharmony_ci the target of a later jump is always earlier than the target of an earlier 15057db96d56Sopenharmony_ci jump. IOW, this is okay: 15067db96d56Sopenharmony_ci 15077db96d56Sopenharmony_ci J---------J-------T--------T 15087db96d56Sopenharmony_ci \ \_____/ / 15097db96d56Sopenharmony_ci \______________________/ 15107db96d56Sopenharmony_ci 15117db96d56Sopenharmony_ci but this is not: 15127db96d56Sopenharmony_ci 15137db96d56Sopenharmony_ci J---------J-------T--------T 15147db96d56Sopenharmony_ci \_________\_____/ / 15157db96d56Sopenharmony_ci \____________/ 15167db96d56Sopenharmony_ci 15177db96d56Sopenharmony_ci It also helps that SRE_CODE is always an unsigned type. 15187db96d56Sopenharmony_ci*/ 15197db96d56Sopenharmony_ci 15207db96d56Sopenharmony_ci/* Defining this one enables tracing of the validator */ 15217db96d56Sopenharmony_ci#undef VVERBOSE 15227db96d56Sopenharmony_ci 15237db96d56Sopenharmony_ci/* Trace macro for the validator */ 15247db96d56Sopenharmony_ci#if defined(VVERBOSE) 15257db96d56Sopenharmony_ci#define VTRACE(v) printf v 15267db96d56Sopenharmony_ci#else 15277db96d56Sopenharmony_ci#define VTRACE(v) do {} while(0) /* do nothing */ 15287db96d56Sopenharmony_ci#endif 15297db96d56Sopenharmony_ci 15307db96d56Sopenharmony_ci/* Report failure */ 15317db96d56Sopenharmony_ci#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0) 15327db96d56Sopenharmony_ci 15337db96d56Sopenharmony_ci/* Extract opcode, argument, or skip count from code array */ 15347db96d56Sopenharmony_ci#define GET_OP \ 15357db96d56Sopenharmony_ci do { \ 15367db96d56Sopenharmony_ci VTRACE(("%p: ", code)); \ 15377db96d56Sopenharmony_ci if (code >= end) FAIL; \ 15387db96d56Sopenharmony_ci op = *code++; \ 15397db96d56Sopenharmony_ci VTRACE(("%lu (op)\n", (unsigned long)op)); \ 15407db96d56Sopenharmony_ci } while (0) 15417db96d56Sopenharmony_ci#define GET_ARG \ 15427db96d56Sopenharmony_ci do { \ 15437db96d56Sopenharmony_ci VTRACE(("%p= ", code)); \ 15447db96d56Sopenharmony_ci if (code >= end) FAIL; \ 15457db96d56Sopenharmony_ci arg = *code++; \ 15467db96d56Sopenharmony_ci VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ 15477db96d56Sopenharmony_ci } while (0) 15487db96d56Sopenharmony_ci#define GET_SKIP_ADJ(adj) \ 15497db96d56Sopenharmony_ci do { \ 15507db96d56Sopenharmony_ci VTRACE(("%p= ", code)); \ 15517db96d56Sopenharmony_ci if (code >= end) FAIL; \ 15527db96d56Sopenharmony_ci skip = *code; \ 15537db96d56Sopenharmony_ci VTRACE(("%lu (skip to %p)\n", \ 15547db96d56Sopenharmony_ci (unsigned long)skip, code+skip)); \ 15557db96d56Sopenharmony_ci if (skip-adj > (uintptr_t)(end - code)) \ 15567db96d56Sopenharmony_ci FAIL; \ 15577db96d56Sopenharmony_ci code++; \ 15587db96d56Sopenharmony_ci } while (0) 15597db96d56Sopenharmony_ci#define GET_SKIP GET_SKIP_ADJ(0) 15607db96d56Sopenharmony_ci 15617db96d56Sopenharmony_cistatic int 15627db96d56Sopenharmony_ci_validate_charset(SRE_CODE *code, SRE_CODE *end) 15637db96d56Sopenharmony_ci{ 15647db96d56Sopenharmony_ci /* Some variables are manipulated by the macros above */ 15657db96d56Sopenharmony_ci SRE_CODE op; 15667db96d56Sopenharmony_ci SRE_CODE arg; 15677db96d56Sopenharmony_ci SRE_CODE offset; 15687db96d56Sopenharmony_ci int i; 15697db96d56Sopenharmony_ci 15707db96d56Sopenharmony_ci while (code < end) { 15717db96d56Sopenharmony_ci GET_OP; 15727db96d56Sopenharmony_ci switch (op) { 15737db96d56Sopenharmony_ci 15747db96d56Sopenharmony_ci case SRE_OP_NEGATE: 15757db96d56Sopenharmony_ci break; 15767db96d56Sopenharmony_ci 15777db96d56Sopenharmony_ci case SRE_OP_LITERAL: 15787db96d56Sopenharmony_ci GET_ARG; 15797db96d56Sopenharmony_ci break; 15807db96d56Sopenharmony_ci 15817db96d56Sopenharmony_ci case SRE_OP_RANGE: 15827db96d56Sopenharmony_ci case SRE_OP_RANGE_UNI_IGNORE: 15837db96d56Sopenharmony_ci GET_ARG; 15847db96d56Sopenharmony_ci GET_ARG; 15857db96d56Sopenharmony_ci break; 15867db96d56Sopenharmony_ci 15877db96d56Sopenharmony_ci case SRE_OP_CHARSET: 15887db96d56Sopenharmony_ci offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ 15897db96d56Sopenharmony_ci if (offset > (uintptr_t)(end - code)) 15907db96d56Sopenharmony_ci FAIL; 15917db96d56Sopenharmony_ci code += offset; 15927db96d56Sopenharmony_ci break; 15937db96d56Sopenharmony_ci 15947db96d56Sopenharmony_ci case SRE_OP_BIGCHARSET: 15957db96d56Sopenharmony_ci GET_ARG; /* Number of blocks */ 15967db96d56Sopenharmony_ci offset = 256/sizeof(SRE_CODE); /* 256-byte table */ 15977db96d56Sopenharmony_ci if (offset > (uintptr_t)(end - code)) 15987db96d56Sopenharmony_ci FAIL; 15997db96d56Sopenharmony_ci /* Make sure that each byte points to a valid block */ 16007db96d56Sopenharmony_ci for (i = 0; i < 256; i++) { 16017db96d56Sopenharmony_ci if (((unsigned char *)code)[i] >= arg) 16027db96d56Sopenharmony_ci FAIL; 16037db96d56Sopenharmony_ci } 16047db96d56Sopenharmony_ci code += offset; 16057db96d56Sopenharmony_ci offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ 16067db96d56Sopenharmony_ci if (offset > (uintptr_t)(end - code)) 16077db96d56Sopenharmony_ci FAIL; 16087db96d56Sopenharmony_ci code += offset; 16097db96d56Sopenharmony_ci break; 16107db96d56Sopenharmony_ci 16117db96d56Sopenharmony_ci case SRE_OP_CATEGORY: 16127db96d56Sopenharmony_ci GET_ARG; 16137db96d56Sopenharmony_ci switch (arg) { 16147db96d56Sopenharmony_ci case SRE_CATEGORY_DIGIT: 16157db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_DIGIT: 16167db96d56Sopenharmony_ci case SRE_CATEGORY_SPACE: 16177db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_SPACE: 16187db96d56Sopenharmony_ci case SRE_CATEGORY_WORD: 16197db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_WORD: 16207db96d56Sopenharmony_ci case SRE_CATEGORY_LINEBREAK: 16217db96d56Sopenharmony_ci case SRE_CATEGORY_NOT_LINEBREAK: 16227db96d56Sopenharmony_ci case SRE_CATEGORY_LOC_WORD: 16237db96d56Sopenharmony_ci case SRE_CATEGORY_LOC_NOT_WORD: 16247db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_DIGIT: 16257db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_DIGIT: 16267db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_SPACE: 16277db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_SPACE: 16287db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_WORD: 16297db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_WORD: 16307db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_LINEBREAK: 16317db96d56Sopenharmony_ci case SRE_CATEGORY_UNI_NOT_LINEBREAK: 16327db96d56Sopenharmony_ci break; 16337db96d56Sopenharmony_ci default: 16347db96d56Sopenharmony_ci FAIL; 16357db96d56Sopenharmony_ci } 16367db96d56Sopenharmony_ci break; 16377db96d56Sopenharmony_ci 16387db96d56Sopenharmony_ci default: 16397db96d56Sopenharmony_ci FAIL; 16407db96d56Sopenharmony_ci 16417db96d56Sopenharmony_ci } 16427db96d56Sopenharmony_ci } 16437db96d56Sopenharmony_ci 16447db96d56Sopenharmony_ci return 0; 16457db96d56Sopenharmony_ci} 16467db96d56Sopenharmony_ci 16477db96d56Sopenharmony_ci/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */ 16487db96d56Sopenharmony_cistatic int 16497db96d56Sopenharmony_ci_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 16507db96d56Sopenharmony_ci{ 16517db96d56Sopenharmony_ci /* Some variables are manipulated by the macros above */ 16527db96d56Sopenharmony_ci SRE_CODE op; 16537db96d56Sopenharmony_ci SRE_CODE arg; 16547db96d56Sopenharmony_ci SRE_CODE skip; 16557db96d56Sopenharmony_ci 16567db96d56Sopenharmony_ci VTRACE(("code=%p, end=%p\n", code, end)); 16577db96d56Sopenharmony_ci 16587db96d56Sopenharmony_ci if (code > end) 16597db96d56Sopenharmony_ci FAIL; 16607db96d56Sopenharmony_ci 16617db96d56Sopenharmony_ci while (code < end) { 16627db96d56Sopenharmony_ci GET_OP; 16637db96d56Sopenharmony_ci switch (op) { 16647db96d56Sopenharmony_ci 16657db96d56Sopenharmony_ci case SRE_OP_MARK: 16667db96d56Sopenharmony_ci /* We don't check whether marks are properly nested; the 16677db96d56Sopenharmony_ci sre_match() code is robust even if they don't, and the worst 16687db96d56Sopenharmony_ci you can get is nonsensical match results. */ 16697db96d56Sopenharmony_ci GET_ARG; 16707db96d56Sopenharmony_ci if (arg > 2 * (size_t)groups + 1) { 16717db96d56Sopenharmony_ci VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); 16727db96d56Sopenharmony_ci FAIL; 16737db96d56Sopenharmony_ci } 16747db96d56Sopenharmony_ci break; 16757db96d56Sopenharmony_ci 16767db96d56Sopenharmony_ci case SRE_OP_LITERAL: 16777db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL: 16787db96d56Sopenharmony_ci case SRE_OP_LITERAL_IGNORE: 16797db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_IGNORE: 16807db96d56Sopenharmony_ci case SRE_OP_LITERAL_UNI_IGNORE: 16817db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_UNI_IGNORE: 16827db96d56Sopenharmony_ci case SRE_OP_LITERAL_LOC_IGNORE: 16837db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_LOC_IGNORE: 16847db96d56Sopenharmony_ci GET_ARG; 16857db96d56Sopenharmony_ci /* The arg is just a character, nothing to check */ 16867db96d56Sopenharmony_ci break; 16877db96d56Sopenharmony_ci 16887db96d56Sopenharmony_ci case SRE_OP_SUCCESS: 16897db96d56Sopenharmony_ci case SRE_OP_FAILURE: 16907db96d56Sopenharmony_ci /* Nothing to check; these normally end the matching process */ 16917db96d56Sopenharmony_ci break; 16927db96d56Sopenharmony_ci 16937db96d56Sopenharmony_ci case SRE_OP_AT: 16947db96d56Sopenharmony_ci GET_ARG; 16957db96d56Sopenharmony_ci switch (arg) { 16967db96d56Sopenharmony_ci case SRE_AT_BEGINNING: 16977db96d56Sopenharmony_ci case SRE_AT_BEGINNING_STRING: 16987db96d56Sopenharmony_ci case SRE_AT_BEGINNING_LINE: 16997db96d56Sopenharmony_ci case SRE_AT_END: 17007db96d56Sopenharmony_ci case SRE_AT_END_LINE: 17017db96d56Sopenharmony_ci case SRE_AT_END_STRING: 17027db96d56Sopenharmony_ci case SRE_AT_BOUNDARY: 17037db96d56Sopenharmony_ci case SRE_AT_NON_BOUNDARY: 17047db96d56Sopenharmony_ci case SRE_AT_LOC_BOUNDARY: 17057db96d56Sopenharmony_ci case SRE_AT_LOC_NON_BOUNDARY: 17067db96d56Sopenharmony_ci case SRE_AT_UNI_BOUNDARY: 17077db96d56Sopenharmony_ci case SRE_AT_UNI_NON_BOUNDARY: 17087db96d56Sopenharmony_ci break; 17097db96d56Sopenharmony_ci default: 17107db96d56Sopenharmony_ci FAIL; 17117db96d56Sopenharmony_ci } 17127db96d56Sopenharmony_ci break; 17137db96d56Sopenharmony_ci 17147db96d56Sopenharmony_ci case SRE_OP_ANY: 17157db96d56Sopenharmony_ci case SRE_OP_ANY_ALL: 17167db96d56Sopenharmony_ci /* These have no operands */ 17177db96d56Sopenharmony_ci break; 17187db96d56Sopenharmony_ci 17197db96d56Sopenharmony_ci case SRE_OP_IN: 17207db96d56Sopenharmony_ci case SRE_OP_IN_IGNORE: 17217db96d56Sopenharmony_ci case SRE_OP_IN_UNI_IGNORE: 17227db96d56Sopenharmony_ci case SRE_OP_IN_LOC_IGNORE: 17237db96d56Sopenharmony_ci GET_SKIP; 17247db96d56Sopenharmony_ci /* Stop 1 before the end; we check the FAILURE below */ 17257db96d56Sopenharmony_ci if (_validate_charset(code, code+skip-2)) 17267db96d56Sopenharmony_ci FAIL; 17277db96d56Sopenharmony_ci if (code[skip-2] != SRE_OP_FAILURE) 17287db96d56Sopenharmony_ci FAIL; 17297db96d56Sopenharmony_ci code += skip-1; 17307db96d56Sopenharmony_ci break; 17317db96d56Sopenharmony_ci 17327db96d56Sopenharmony_ci case SRE_OP_INFO: 17337db96d56Sopenharmony_ci { 17347db96d56Sopenharmony_ci /* A minimal info field is 17357db96d56Sopenharmony_ci <INFO> <1=skip> <2=flags> <3=min> <4=max>; 17367db96d56Sopenharmony_ci If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, 17377db96d56Sopenharmony_ci more follows. */ 17387db96d56Sopenharmony_ci SRE_CODE flags, i; 17397db96d56Sopenharmony_ci SRE_CODE *newcode; 17407db96d56Sopenharmony_ci GET_SKIP; 17417db96d56Sopenharmony_ci newcode = code+skip-1; 17427db96d56Sopenharmony_ci GET_ARG; flags = arg; 17437db96d56Sopenharmony_ci GET_ARG; 17447db96d56Sopenharmony_ci GET_ARG; 17457db96d56Sopenharmony_ci /* Check that only valid flags are present */ 17467db96d56Sopenharmony_ci if ((flags & ~(SRE_INFO_PREFIX | 17477db96d56Sopenharmony_ci SRE_INFO_LITERAL | 17487db96d56Sopenharmony_ci SRE_INFO_CHARSET)) != 0) 17497db96d56Sopenharmony_ci FAIL; 17507db96d56Sopenharmony_ci /* PREFIX and CHARSET are mutually exclusive */ 17517db96d56Sopenharmony_ci if ((flags & SRE_INFO_PREFIX) && 17527db96d56Sopenharmony_ci (flags & SRE_INFO_CHARSET)) 17537db96d56Sopenharmony_ci FAIL; 17547db96d56Sopenharmony_ci /* LITERAL implies PREFIX */ 17557db96d56Sopenharmony_ci if ((flags & SRE_INFO_LITERAL) && 17567db96d56Sopenharmony_ci !(flags & SRE_INFO_PREFIX)) 17577db96d56Sopenharmony_ci FAIL; 17587db96d56Sopenharmony_ci /* Validate the prefix */ 17597db96d56Sopenharmony_ci if (flags & SRE_INFO_PREFIX) { 17607db96d56Sopenharmony_ci SRE_CODE prefix_len; 17617db96d56Sopenharmony_ci GET_ARG; prefix_len = arg; 17627db96d56Sopenharmony_ci GET_ARG; 17637db96d56Sopenharmony_ci /* Here comes the prefix string */ 17647db96d56Sopenharmony_ci if (prefix_len > (uintptr_t)(newcode - code)) 17657db96d56Sopenharmony_ci FAIL; 17667db96d56Sopenharmony_ci code += prefix_len; 17677db96d56Sopenharmony_ci /* And here comes the overlap table */ 17687db96d56Sopenharmony_ci if (prefix_len > (uintptr_t)(newcode - code)) 17697db96d56Sopenharmony_ci FAIL; 17707db96d56Sopenharmony_ci /* Each overlap value should be < prefix_len */ 17717db96d56Sopenharmony_ci for (i = 0; i < prefix_len; i++) { 17727db96d56Sopenharmony_ci if (code[i] >= prefix_len) 17737db96d56Sopenharmony_ci FAIL; 17747db96d56Sopenharmony_ci } 17757db96d56Sopenharmony_ci code += prefix_len; 17767db96d56Sopenharmony_ci } 17777db96d56Sopenharmony_ci /* Validate the charset */ 17787db96d56Sopenharmony_ci if (flags & SRE_INFO_CHARSET) { 17797db96d56Sopenharmony_ci if (_validate_charset(code, newcode-1)) 17807db96d56Sopenharmony_ci FAIL; 17817db96d56Sopenharmony_ci if (newcode[-1] != SRE_OP_FAILURE) 17827db96d56Sopenharmony_ci FAIL; 17837db96d56Sopenharmony_ci code = newcode; 17847db96d56Sopenharmony_ci } 17857db96d56Sopenharmony_ci else if (code != newcode) { 17867db96d56Sopenharmony_ci VTRACE(("code=%p, newcode=%p\n", code, newcode)); 17877db96d56Sopenharmony_ci FAIL; 17887db96d56Sopenharmony_ci } 17897db96d56Sopenharmony_ci } 17907db96d56Sopenharmony_ci break; 17917db96d56Sopenharmony_ci 17927db96d56Sopenharmony_ci case SRE_OP_BRANCH: 17937db96d56Sopenharmony_ci { 17947db96d56Sopenharmony_ci SRE_CODE *target = NULL; 17957db96d56Sopenharmony_ci for (;;) { 17967db96d56Sopenharmony_ci GET_SKIP; 17977db96d56Sopenharmony_ci if (skip == 0) 17987db96d56Sopenharmony_ci break; 17997db96d56Sopenharmony_ci /* Stop 2 before the end; we check the JUMP below */ 18007db96d56Sopenharmony_ci if (_validate_inner(code, code+skip-3, groups)) 18017db96d56Sopenharmony_ci FAIL; 18027db96d56Sopenharmony_ci code += skip-3; 18037db96d56Sopenharmony_ci /* Check that it ends with a JUMP, and that each JUMP 18047db96d56Sopenharmony_ci has the same target */ 18057db96d56Sopenharmony_ci GET_OP; 18067db96d56Sopenharmony_ci if (op != SRE_OP_JUMP) 18077db96d56Sopenharmony_ci FAIL; 18087db96d56Sopenharmony_ci GET_SKIP; 18097db96d56Sopenharmony_ci if (target == NULL) 18107db96d56Sopenharmony_ci target = code+skip-1; 18117db96d56Sopenharmony_ci else if (code+skip-1 != target) 18127db96d56Sopenharmony_ci FAIL; 18137db96d56Sopenharmony_ci } 18147db96d56Sopenharmony_ci if (code != target) 18157db96d56Sopenharmony_ci FAIL; 18167db96d56Sopenharmony_ci } 18177db96d56Sopenharmony_ci break; 18187db96d56Sopenharmony_ci 18197db96d56Sopenharmony_ci case SRE_OP_REPEAT_ONE: 18207db96d56Sopenharmony_ci case SRE_OP_MIN_REPEAT_ONE: 18217db96d56Sopenharmony_ci case SRE_OP_POSSESSIVE_REPEAT_ONE: 18227db96d56Sopenharmony_ci { 18237db96d56Sopenharmony_ci SRE_CODE min, max; 18247db96d56Sopenharmony_ci GET_SKIP; 18257db96d56Sopenharmony_ci GET_ARG; min = arg; 18267db96d56Sopenharmony_ci GET_ARG; max = arg; 18277db96d56Sopenharmony_ci if (min > max) 18287db96d56Sopenharmony_ci FAIL; 18297db96d56Sopenharmony_ci if (max > SRE_MAXREPEAT) 18307db96d56Sopenharmony_ci FAIL; 18317db96d56Sopenharmony_ci if (_validate_inner(code, code+skip-4, groups)) 18327db96d56Sopenharmony_ci FAIL; 18337db96d56Sopenharmony_ci code += skip-4; 18347db96d56Sopenharmony_ci GET_OP; 18357db96d56Sopenharmony_ci if (op != SRE_OP_SUCCESS) 18367db96d56Sopenharmony_ci FAIL; 18377db96d56Sopenharmony_ci } 18387db96d56Sopenharmony_ci break; 18397db96d56Sopenharmony_ci 18407db96d56Sopenharmony_ci case SRE_OP_REPEAT: 18417db96d56Sopenharmony_ci case SRE_OP_POSSESSIVE_REPEAT: 18427db96d56Sopenharmony_ci { 18437db96d56Sopenharmony_ci SRE_CODE op1 = op, min, max; 18447db96d56Sopenharmony_ci GET_SKIP; 18457db96d56Sopenharmony_ci GET_ARG; min = arg; 18467db96d56Sopenharmony_ci GET_ARG; max = arg; 18477db96d56Sopenharmony_ci if (min > max) 18487db96d56Sopenharmony_ci FAIL; 18497db96d56Sopenharmony_ci if (max > SRE_MAXREPEAT) 18507db96d56Sopenharmony_ci FAIL; 18517db96d56Sopenharmony_ci if (_validate_inner(code, code+skip-3, groups)) 18527db96d56Sopenharmony_ci FAIL; 18537db96d56Sopenharmony_ci code += skip-3; 18547db96d56Sopenharmony_ci GET_OP; 18557db96d56Sopenharmony_ci if (op1 == SRE_OP_POSSESSIVE_REPEAT) { 18567db96d56Sopenharmony_ci if (op != SRE_OP_SUCCESS) 18577db96d56Sopenharmony_ci FAIL; 18587db96d56Sopenharmony_ci } 18597db96d56Sopenharmony_ci else { 18607db96d56Sopenharmony_ci if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) 18617db96d56Sopenharmony_ci FAIL; 18627db96d56Sopenharmony_ci } 18637db96d56Sopenharmony_ci } 18647db96d56Sopenharmony_ci break; 18657db96d56Sopenharmony_ci 18667db96d56Sopenharmony_ci case SRE_OP_ATOMIC_GROUP: 18677db96d56Sopenharmony_ci { 18687db96d56Sopenharmony_ci GET_SKIP; 18697db96d56Sopenharmony_ci if (_validate_inner(code, code+skip-2, groups)) 18707db96d56Sopenharmony_ci FAIL; 18717db96d56Sopenharmony_ci code += skip-2; 18727db96d56Sopenharmony_ci GET_OP; 18737db96d56Sopenharmony_ci if (op != SRE_OP_SUCCESS) 18747db96d56Sopenharmony_ci FAIL; 18757db96d56Sopenharmony_ci } 18767db96d56Sopenharmony_ci break; 18777db96d56Sopenharmony_ci 18787db96d56Sopenharmony_ci case SRE_OP_GROUPREF: 18797db96d56Sopenharmony_ci case SRE_OP_GROUPREF_IGNORE: 18807db96d56Sopenharmony_ci case SRE_OP_GROUPREF_UNI_IGNORE: 18817db96d56Sopenharmony_ci case SRE_OP_GROUPREF_LOC_IGNORE: 18827db96d56Sopenharmony_ci GET_ARG; 18837db96d56Sopenharmony_ci if (arg >= (size_t)groups) 18847db96d56Sopenharmony_ci FAIL; 18857db96d56Sopenharmony_ci break; 18867db96d56Sopenharmony_ci 18877db96d56Sopenharmony_ci case SRE_OP_GROUPREF_EXISTS: 18887db96d56Sopenharmony_ci /* The regex syntax for this is: '(?(group)then|else)', where 18897db96d56Sopenharmony_ci 'group' is either an integer group number or a group name, 18907db96d56Sopenharmony_ci 'then' and 'else' are sub-regexes, and 'else' is optional. */ 18917db96d56Sopenharmony_ci GET_ARG; 18927db96d56Sopenharmony_ci if (arg >= (size_t)groups) 18937db96d56Sopenharmony_ci FAIL; 18947db96d56Sopenharmony_ci GET_SKIP_ADJ(1); 18957db96d56Sopenharmony_ci code--; /* The skip is relative to the first arg! */ 18967db96d56Sopenharmony_ci /* There are two possibilities here: if there is both a 'then' 18977db96d56Sopenharmony_ci part and an 'else' part, the generated code looks like: 18987db96d56Sopenharmony_ci 18997db96d56Sopenharmony_ci GROUPREF_EXISTS 19007db96d56Sopenharmony_ci <group> 19017db96d56Sopenharmony_ci <skipyes> 19027db96d56Sopenharmony_ci ...then part... 19037db96d56Sopenharmony_ci JUMP 19047db96d56Sopenharmony_ci <skipno> 19057db96d56Sopenharmony_ci (<skipyes> jumps here) 19067db96d56Sopenharmony_ci ...else part... 19077db96d56Sopenharmony_ci (<skipno> jumps here) 19087db96d56Sopenharmony_ci 19097db96d56Sopenharmony_ci If there is only a 'then' part, it looks like: 19107db96d56Sopenharmony_ci 19117db96d56Sopenharmony_ci GROUPREF_EXISTS 19127db96d56Sopenharmony_ci <group> 19137db96d56Sopenharmony_ci <skip> 19147db96d56Sopenharmony_ci ...then part... 19157db96d56Sopenharmony_ci (<skip> jumps here) 19167db96d56Sopenharmony_ci 19177db96d56Sopenharmony_ci There is no direct way to decide which it is, and we don't want 19187db96d56Sopenharmony_ci to allow arbitrary jumps anywhere in the code; so we just look 19197db96d56Sopenharmony_ci for a JUMP opcode preceding our skip target. 19207db96d56Sopenharmony_ci */ 19217db96d56Sopenharmony_ci VTRACE(("then part:\n")); 19227db96d56Sopenharmony_ci int rc = _validate_inner(code+1, code+skip-1, groups); 19237db96d56Sopenharmony_ci if (rc == 1) { 19247db96d56Sopenharmony_ci VTRACE(("else part:\n")); 19257db96d56Sopenharmony_ci code += skip-2; /* Position after JUMP, at <skipno> */ 19267db96d56Sopenharmony_ci GET_SKIP; 19277db96d56Sopenharmony_ci rc = _validate_inner(code, code+skip-1, groups); 19287db96d56Sopenharmony_ci } 19297db96d56Sopenharmony_ci if (rc) 19307db96d56Sopenharmony_ci FAIL; 19317db96d56Sopenharmony_ci code += skip-1; 19327db96d56Sopenharmony_ci break; 19337db96d56Sopenharmony_ci 19347db96d56Sopenharmony_ci case SRE_OP_ASSERT: 19357db96d56Sopenharmony_ci case SRE_OP_ASSERT_NOT: 19367db96d56Sopenharmony_ci GET_SKIP; 19377db96d56Sopenharmony_ci GET_ARG; /* 0 for lookahead, width for lookbehind */ 19387db96d56Sopenharmony_ci code--; /* Back up over arg to simplify math below */ 19397db96d56Sopenharmony_ci if (arg & 0x80000000) 19407db96d56Sopenharmony_ci FAIL; /* Width too large */ 19417db96d56Sopenharmony_ci /* Stop 1 before the end; we check the SUCCESS below */ 19427db96d56Sopenharmony_ci if (_validate_inner(code+1, code+skip-2, groups)) 19437db96d56Sopenharmony_ci FAIL; 19447db96d56Sopenharmony_ci code += skip-2; 19457db96d56Sopenharmony_ci GET_OP; 19467db96d56Sopenharmony_ci if (op != SRE_OP_SUCCESS) 19477db96d56Sopenharmony_ci FAIL; 19487db96d56Sopenharmony_ci break; 19497db96d56Sopenharmony_ci 19507db96d56Sopenharmony_ci case SRE_OP_JUMP: 19517db96d56Sopenharmony_ci if (code + 1 != end) 19527db96d56Sopenharmony_ci FAIL; 19537db96d56Sopenharmony_ci VTRACE(("JUMP: %d\n", __LINE__)); 19547db96d56Sopenharmony_ci return 1; 19557db96d56Sopenharmony_ci 19567db96d56Sopenharmony_ci default: 19577db96d56Sopenharmony_ci FAIL; 19587db96d56Sopenharmony_ci 19597db96d56Sopenharmony_ci } 19607db96d56Sopenharmony_ci } 19617db96d56Sopenharmony_ci 19627db96d56Sopenharmony_ci VTRACE(("okay\n")); 19637db96d56Sopenharmony_ci return 0; 19647db96d56Sopenharmony_ci} 19657db96d56Sopenharmony_ci 19667db96d56Sopenharmony_cistatic int 19677db96d56Sopenharmony_ci_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 19687db96d56Sopenharmony_ci{ 19697db96d56Sopenharmony_ci if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || 19707db96d56Sopenharmony_ci code >= end || end[-1] != SRE_OP_SUCCESS) 19717db96d56Sopenharmony_ci FAIL; 19727db96d56Sopenharmony_ci return _validate_inner(code, end-1, groups); 19737db96d56Sopenharmony_ci} 19747db96d56Sopenharmony_ci 19757db96d56Sopenharmony_cistatic int 19767db96d56Sopenharmony_ci_validate(PatternObject *self) 19777db96d56Sopenharmony_ci{ 19787db96d56Sopenharmony_ci if (_validate_outer(self->code, self->code+self->codesize, self->groups)) 19797db96d56Sopenharmony_ci { 19807db96d56Sopenharmony_ci PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); 19817db96d56Sopenharmony_ci return 0; 19827db96d56Sopenharmony_ci } 19837db96d56Sopenharmony_ci else 19847db96d56Sopenharmony_ci VTRACE(("Success!\n")); 19857db96d56Sopenharmony_ci return 1; 19867db96d56Sopenharmony_ci} 19877db96d56Sopenharmony_ci 19887db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 19897db96d56Sopenharmony_ci/* match methods */ 19907db96d56Sopenharmony_ci 19917db96d56Sopenharmony_cistatic int 19927db96d56Sopenharmony_cimatch_traverse(MatchObject *self, visitproc visit, void *arg) 19937db96d56Sopenharmony_ci{ 19947db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 19957db96d56Sopenharmony_ci Py_VISIT(self->string); 19967db96d56Sopenharmony_ci Py_VISIT(self->regs); 19977db96d56Sopenharmony_ci Py_VISIT(self->pattern); 19987db96d56Sopenharmony_ci return 0; 19997db96d56Sopenharmony_ci} 20007db96d56Sopenharmony_ci 20017db96d56Sopenharmony_cistatic int 20027db96d56Sopenharmony_cimatch_clear(MatchObject *self) 20037db96d56Sopenharmony_ci{ 20047db96d56Sopenharmony_ci Py_CLEAR(self->string); 20057db96d56Sopenharmony_ci Py_CLEAR(self->regs); 20067db96d56Sopenharmony_ci Py_CLEAR(self->pattern); 20077db96d56Sopenharmony_ci return 0; 20087db96d56Sopenharmony_ci} 20097db96d56Sopenharmony_ci 20107db96d56Sopenharmony_cistatic void 20117db96d56Sopenharmony_cimatch_dealloc(MatchObject* self) 20127db96d56Sopenharmony_ci{ 20137db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 20147db96d56Sopenharmony_ci 20157db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 20167db96d56Sopenharmony_ci (void)match_clear(self); 20177db96d56Sopenharmony_ci tp->tp_free(self); 20187db96d56Sopenharmony_ci Py_DECREF(tp); 20197db96d56Sopenharmony_ci} 20207db96d56Sopenharmony_ci 20217db96d56Sopenharmony_cistatic PyObject* 20227db96d56Sopenharmony_cimatch_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) 20237db96d56Sopenharmony_ci{ 20247db96d56Sopenharmony_ci Py_ssize_t length; 20257db96d56Sopenharmony_ci int isbytes, charsize; 20267db96d56Sopenharmony_ci Py_buffer view; 20277db96d56Sopenharmony_ci PyObject *result; 20287db96d56Sopenharmony_ci const void* ptr; 20297db96d56Sopenharmony_ci Py_ssize_t i, j; 20307db96d56Sopenharmony_ci 20317db96d56Sopenharmony_ci assert(0 <= index && index < self->groups); 20327db96d56Sopenharmony_ci index *= 2; 20337db96d56Sopenharmony_ci 20347db96d56Sopenharmony_ci if (self->string == Py_None || self->mark[index] < 0) { 20357db96d56Sopenharmony_ci /* return default value if the string or group is undefined */ 20367db96d56Sopenharmony_ci Py_INCREF(def); 20377db96d56Sopenharmony_ci return def; 20387db96d56Sopenharmony_ci } 20397db96d56Sopenharmony_ci 20407db96d56Sopenharmony_ci ptr = getstring(self->string, &length, &isbytes, &charsize, &view); 20417db96d56Sopenharmony_ci if (ptr == NULL) 20427db96d56Sopenharmony_ci return NULL; 20437db96d56Sopenharmony_ci 20447db96d56Sopenharmony_ci i = self->mark[index]; 20457db96d56Sopenharmony_ci j = self->mark[index+1]; 20467db96d56Sopenharmony_ci i = Py_MIN(i, length); 20477db96d56Sopenharmony_ci j = Py_MIN(j, length); 20487db96d56Sopenharmony_ci result = getslice(isbytes, ptr, self->string, i, j); 20497db96d56Sopenharmony_ci if (isbytes && view.buf != NULL) 20507db96d56Sopenharmony_ci PyBuffer_Release(&view); 20517db96d56Sopenharmony_ci return result; 20527db96d56Sopenharmony_ci} 20537db96d56Sopenharmony_ci 20547db96d56Sopenharmony_cistatic Py_ssize_t 20557db96d56Sopenharmony_cimatch_getindex(MatchObject* self, PyObject* index) 20567db96d56Sopenharmony_ci{ 20577db96d56Sopenharmony_ci Py_ssize_t i; 20587db96d56Sopenharmony_ci 20597db96d56Sopenharmony_ci if (index == NULL) 20607db96d56Sopenharmony_ci /* Default value */ 20617db96d56Sopenharmony_ci return 0; 20627db96d56Sopenharmony_ci 20637db96d56Sopenharmony_ci if (PyIndex_Check(index)) { 20647db96d56Sopenharmony_ci i = PyNumber_AsSsize_t(index, NULL); 20657db96d56Sopenharmony_ci } 20667db96d56Sopenharmony_ci else { 20677db96d56Sopenharmony_ci i = -1; 20687db96d56Sopenharmony_ci 20697db96d56Sopenharmony_ci if (self->pattern->groupindex) { 20707db96d56Sopenharmony_ci index = PyDict_GetItemWithError(self->pattern->groupindex, index); 20717db96d56Sopenharmony_ci if (index && PyLong_Check(index)) { 20727db96d56Sopenharmony_ci i = PyLong_AsSsize_t(index); 20737db96d56Sopenharmony_ci } 20747db96d56Sopenharmony_ci } 20757db96d56Sopenharmony_ci } 20767db96d56Sopenharmony_ci if (i < 0 || i >= self->groups) { 20777db96d56Sopenharmony_ci /* raise IndexError if we were given a bad group number */ 20787db96d56Sopenharmony_ci if (!PyErr_Occurred()) { 20797db96d56Sopenharmony_ci PyErr_SetString(PyExc_IndexError, "no such group"); 20807db96d56Sopenharmony_ci } 20817db96d56Sopenharmony_ci return -1; 20827db96d56Sopenharmony_ci } 20837db96d56Sopenharmony_ci 20847db96d56Sopenharmony_ci return i; 20857db96d56Sopenharmony_ci} 20867db96d56Sopenharmony_ci 20877db96d56Sopenharmony_cistatic PyObject* 20887db96d56Sopenharmony_cimatch_getslice(MatchObject* self, PyObject* index, PyObject* def) 20897db96d56Sopenharmony_ci{ 20907db96d56Sopenharmony_ci Py_ssize_t i = match_getindex(self, index); 20917db96d56Sopenharmony_ci 20927db96d56Sopenharmony_ci if (i < 0) { 20937db96d56Sopenharmony_ci return NULL; 20947db96d56Sopenharmony_ci } 20957db96d56Sopenharmony_ci 20967db96d56Sopenharmony_ci return match_getslice_by_index(self, i, def); 20977db96d56Sopenharmony_ci} 20987db96d56Sopenharmony_ci 20997db96d56Sopenharmony_ci/*[clinic input] 21007db96d56Sopenharmony_ci_sre.SRE_Match.expand 21017db96d56Sopenharmony_ci 21027db96d56Sopenharmony_ci template: object 21037db96d56Sopenharmony_ci 21047db96d56Sopenharmony_ciReturn the string obtained by doing backslash substitution on the string template, as done by the sub() method. 21057db96d56Sopenharmony_ci[clinic start generated code]*/ 21067db96d56Sopenharmony_ci 21077db96d56Sopenharmony_cistatic PyObject * 21087db96d56Sopenharmony_ci_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) 21097db96d56Sopenharmony_ci/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ 21107db96d56Sopenharmony_ci{ 21117db96d56Sopenharmony_ci /* delegate to Python code */ 21127db96d56Sopenharmony_ci return call( 21137db96d56Sopenharmony_ci SRE_PY_MODULE, "_expand", 21147db96d56Sopenharmony_ci PyTuple_Pack(3, self->pattern, self, template) 21157db96d56Sopenharmony_ci ); 21167db96d56Sopenharmony_ci} 21177db96d56Sopenharmony_ci 21187db96d56Sopenharmony_cistatic PyObject* 21197db96d56Sopenharmony_cimatch_group(MatchObject* self, PyObject* args) 21207db96d56Sopenharmony_ci{ 21217db96d56Sopenharmony_ci PyObject* result; 21227db96d56Sopenharmony_ci Py_ssize_t i, size; 21237db96d56Sopenharmony_ci 21247db96d56Sopenharmony_ci size = PyTuple_GET_SIZE(args); 21257db96d56Sopenharmony_ci 21267db96d56Sopenharmony_ci switch (size) { 21277db96d56Sopenharmony_ci case 0: 21287db96d56Sopenharmony_ci result = match_getslice(self, _PyLong_GetZero(), Py_None); 21297db96d56Sopenharmony_ci break; 21307db96d56Sopenharmony_ci case 1: 21317db96d56Sopenharmony_ci result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); 21327db96d56Sopenharmony_ci break; 21337db96d56Sopenharmony_ci default: 21347db96d56Sopenharmony_ci /* fetch multiple items */ 21357db96d56Sopenharmony_ci result = PyTuple_New(size); 21367db96d56Sopenharmony_ci if (!result) 21377db96d56Sopenharmony_ci return NULL; 21387db96d56Sopenharmony_ci for (i = 0; i < size; i++) { 21397db96d56Sopenharmony_ci PyObject* item = match_getslice( 21407db96d56Sopenharmony_ci self, PyTuple_GET_ITEM(args, i), Py_None 21417db96d56Sopenharmony_ci ); 21427db96d56Sopenharmony_ci if (!item) { 21437db96d56Sopenharmony_ci Py_DECREF(result); 21447db96d56Sopenharmony_ci return NULL; 21457db96d56Sopenharmony_ci } 21467db96d56Sopenharmony_ci PyTuple_SET_ITEM(result, i, item); 21477db96d56Sopenharmony_ci } 21487db96d56Sopenharmony_ci break; 21497db96d56Sopenharmony_ci } 21507db96d56Sopenharmony_ci return result; 21517db96d56Sopenharmony_ci} 21527db96d56Sopenharmony_ci 21537db96d56Sopenharmony_cistatic PyObject* 21547db96d56Sopenharmony_cimatch_getitem(MatchObject* self, PyObject* name) 21557db96d56Sopenharmony_ci{ 21567db96d56Sopenharmony_ci return match_getslice(self, name, Py_None); 21577db96d56Sopenharmony_ci} 21587db96d56Sopenharmony_ci 21597db96d56Sopenharmony_ci/*[clinic input] 21607db96d56Sopenharmony_ci_sre.SRE_Match.groups 21617db96d56Sopenharmony_ci 21627db96d56Sopenharmony_ci default: object = None 21637db96d56Sopenharmony_ci Is used for groups that did not participate in the match. 21647db96d56Sopenharmony_ci 21657db96d56Sopenharmony_ciReturn a tuple containing all the subgroups of the match, from 1. 21667db96d56Sopenharmony_ci[clinic start generated code]*/ 21677db96d56Sopenharmony_ci 21687db96d56Sopenharmony_cistatic PyObject * 21697db96d56Sopenharmony_ci_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) 21707db96d56Sopenharmony_ci/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ 21717db96d56Sopenharmony_ci{ 21727db96d56Sopenharmony_ci PyObject* result; 21737db96d56Sopenharmony_ci Py_ssize_t index; 21747db96d56Sopenharmony_ci 21757db96d56Sopenharmony_ci result = PyTuple_New(self->groups-1); 21767db96d56Sopenharmony_ci if (!result) 21777db96d56Sopenharmony_ci return NULL; 21787db96d56Sopenharmony_ci 21797db96d56Sopenharmony_ci for (index = 1; index < self->groups; index++) { 21807db96d56Sopenharmony_ci PyObject* item; 21817db96d56Sopenharmony_ci item = match_getslice_by_index(self, index, default_value); 21827db96d56Sopenharmony_ci if (!item) { 21837db96d56Sopenharmony_ci Py_DECREF(result); 21847db96d56Sopenharmony_ci return NULL; 21857db96d56Sopenharmony_ci } 21867db96d56Sopenharmony_ci PyTuple_SET_ITEM(result, index-1, item); 21877db96d56Sopenharmony_ci } 21887db96d56Sopenharmony_ci 21897db96d56Sopenharmony_ci return result; 21907db96d56Sopenharmony_ci} 21917db96d56Sopenharmony_ci 21927db96d56Sopenharmony_ci/*[clinic input] 21937db96d56Sopenharmony_ci_sre.SRE_Match.groupdict 21947db96d56Sopenharmony_ci 21957db96d56Sopenharmony_ci default: object = None 21967db96d56Sopenharmony_ci Is used for groups that did not participate in the match. 21977db96d56Sopenharmony_ci 21987db96d56Sopenharmony_ciReturn a dictionary containing all the named subgroups of the match, keyed by the subgroup name. 21997db96d56Sopenharmony_ci[clinic start generated code]*/ 22007db96d56Sopenharmony_ci 22017db96d56Sopenharmony_cistatic PyObject * 22027db96d56Sopenharmony_ci_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) 22037db96d56Sopenharmony_ci/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ 22047db96d56Sopenharmony_ci{ 22057db96d56Sopenharmony_ci PyObject *result; 22067db96d56Sopenharmony_ci PyObject *key; 22077db96d56Sopenharmony_ci PyObject *value; 22087db96d56Sopenharmony_ci Py_ssize_t pos = 0; 22097db96d56Sopenharmony_ci Py_hash_t hash; 22107db96d56Sopenharmony_ci 22117db96d56Sopenharmony_ci result = PyDict_New(); 22127db96d56Sopenharmony_ci if (!result || !self->pattern->groupindex) 22137db96d56Sopenharmony_ci return result; 22147db96d56Sopenharmony_ci 22157db96d56Sopenharmony_ci while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { 22167db96d56Sopenharmony_ci int status; 22177db96d56Sopenharmony_ci Py_INCREF(key); 22187db96d56Sopenharmony_ci value = match_getslice(self, key, default_value); 22197db96d56Sopenharmony_ci if (!value) { 22207db96d56Sopenharmony_ci Py_DECREF(key); 22217db96d56Sopenharmony_ci goto failed; 22227db96d56Sopenharmony_ci } 22237db96d56Sopenharmony_ci status = _PyDict_SetItem_KnownHash(result, key, value, hash); 22247db96d56Sopenharmony_ci Py_DECREF(value); 22257db96d56Sopenharmony_ci Py_DECREF(key); 22267db96d56Sopenharmony_ci if (status < 0) 22277db96d56Sopenharmony_ci goto failed; 22287db96d56Sopenharmony_ci } 22297db96d56Sopenharmony_ci 22307db96d56Sopenharmony_ci return result; 22317db96d56Sopenharmony_ci 22327db96d56Sopenharmony_cifailed: 22337db96d56Sopenharmony_ci Py_DECREF(result); 22347db96d56Sopenharmony_ci return NULL; 22357db96d56Sopenharmony_ci} 22367db96d56Sopenharmony_ci 22377db96d56Sopenharmony_ci/*[clinic input] 22387db96d56Sopenharmony_ci_sre.SRE_Match.start -> Py_ssize_t 22397db96d56Sopenharmony_ci 22407db96d56Sopenharmony_ci group: object(c_default="NULL") = 0 22417db96d56Sopenharmony_ci / 22427db96d56Sopenharmony_ci 22437db96d56Sopenharmony_ciReturn index of the start of the substring matched by group. 22447db96d56Sopenharmony_ci[clinic start generated code]*/ 22457db96d56Sopenharmony_ci 22467db96d56Sopenharmony_cistatic Py_ssize_t 22477db96d56Sopenharmony_ci_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) 22487db96d56Sopenharmony_ci/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ 22497db96d56Sopenharmony_ci{ 22507db96d56Sopenharmony_ci Py_ssize_t index = match_getindex(self, group); 22517db96d56Sopenharmony_ci 22527db96d56Sopenharmony_ci if (index < 0) { 22537db96d56Sopenharmony_ci return -1; 22547db96d56Sopenharmony_ci } 22557db96d56Sopenharmony_ci 22567db96d56Sopenharmony_ci /* mark is -1 if group is undefined */ 22577db96d56Sopenharmony_ci return self->mark[index*2]; 22587db96d56Sopenharmony_ci} 22597db96d56Sopenharmony_ci 22607db96d56Sopenharmony_ci/*[clinic input] 22617db96d56Sopenharmony_ci_sre.SRE_Match.end -> Py_ssize_t 22627db96d56Sopenharmony_ci 22637db96d56Sopenharmony_ci group: object(c_default="NULL") = 0 22647db96d56Sopenharmony_ci / 22657db96d56Sopenharmony_ci 22667db96d56Sopenharmony_ciReturn index of the end of the substring matched by group. 22677db96d56Sopenharmony_ci[clinic start generated code]*/ 22687db96d56Sopenharmony_ci 22697db96d56Sopenharmony_cistatic Py_ssize_t 22707db96d56Sopenharmony_ci_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) 22717db96d56Sopenharmony_ci/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ 22727db96d56Sopenharmony_ci{ 22737db96d56Sopenharmony_ci Py_ssize_t index = match_getindex(self, group); 22747db96d56Sopenharmony_ci 22757db96d56Sopenharmony_ci if (index < 0) { 22767db96d56Sopenharmony_ci return -1; 22777db96d56Sopenharmony_ci } 22787db96d56Sopenharmony_ci 22797db96d56Sopenharmony_ci /* mark is -1 if group is undefined */ 22807db96d56Sopenharmony_ci return self->mark[index*2+1]; 22817db96d56Sopenharmony_ci} 22827db96d56Sopenharmony_ci 22837db96d56Sopenharmony_ciLOCAL(PyObject*) 22847db96d56Sopenharmony_ci_pair(Py_ssize_t i1, Py_ssize_t i2) 22857db96d56Sopenharmony_ci{ 22867db96d56Sopenharmony_ci PyObject* pair; 22877db96d56Sopenharmony_ci PyObject* item; 22887db96d56Sopenharmony_ci 22897db96d56Sopenharmony_ci pair = PyTuple_New(2); 22907db96d56Sopenharmony_ci if (!pair) 22917db96d56Sopenharmony_ci return NULL; 22927db96d56Sopenharmony_ci 22937db96d56Sopenharmony_ci item = PyLong_FromSsize_t(i1); 22947db96d56Sopenharmony_ci if (!item) 22957db96d56Sopenharmony_ci goto error; 22967db96d56Sopenharmony_ci PyTuple_SET_ITEM(pair, 0, item); 22977db96d56Sopenharmony_ci 22987db96d56Sopenharmony_ci item = PyLong_FromSsize_t(i2); 22997db96d56Sopenharmony_ci if (!item) 23007db96d56Sopenharmony_ci goto error; 23017db96d56Sopenharmony_ci PyTuple_SET_ITEM(pair, 1, item); 23027db96d56Sopenharmony_ci 23037db96d56Sopenharmony_ci return pair; 23047db96d56Sopenharmony_ci 23057db96d56Sopenharmony_ci error: 23067db96d56Sopenharmony_ci Py_DECREF(pair); 23077db96d56Sopenharmony_ci return NULL; 23087db96d56Sopenharmony_ci} 23097db96d56Sopenharmony_ci 23107db96d56Sopenharmony_ci/*[clinic input] 23117db96d56Sopenharmony_ci_sre.SRE_Match.span 23127db96d56Sopenharmony_ci 23137db96d56Sopenharmony_ci group: object(c_default="NULL") = 0 23147db96d56Sopenharmony_ci / 23157db96d56Sopenharmony_ci 23167db96d56Sopenharmony_ciFor match object m, return the 2-tuple (m.start(group), m.end(group)). 23177db96d56Sopenharmony_ci[clinic start generated code]*/ 23187db96d56Sopenharmony_ci 23197db96d56Sopenharmony_cistatic PyObject * 23207db96d56Sopenharmony_ci_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) 23217db96d56Sopenharmony_ci/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ 23227db96d56Sopenharmony_ci{ 23237db96d56Sopenharmony_ci Py_ssize_t index = match_getindex(self, group); 23247db96d56Sopenharmony_ci 23257db96d56Sopenharmony_ci if (index < 0) { 23267db96d56Sopenharmony_ci return NULL; 23277db96d56Sopenharmony_ci } 23287db96d56Sopenharmony_ci 23297db96d56Sopenharmony_ci /* marks are -1 if group is undefined */ 23307db96d56Sopenharmony_ci return _pair(self->mark[index*2], self->mark[index*2+1]); 23317db96d56Sopenharmony_ci} 23327db96d56Sopenharmony_ci 23337db96d56Sopenharmony_cistatic PyObject* 23347db96d56Sopenharmony_cimatch_regs(MatchObject* self) 23357db96d56Sopenharmony_ci{ 23367db96d56Sopenharmony_ci PyObject* regs; 23377db96d56Sopenharmony_ci PyObject* item; 23387db96d56Sopenharmony_ci Py_ssize_t index; 23397db96d56Sopenharmony_ci 23407db96d56Sopenharmony_ci regs = PyTuple_New(self->groups); 23417db96d56Sopenharmony_ci if (!regs) 23427db96d56Sopenharmony_ci return NULL; 23437db96d56Sopenharmony_ci 23447db96d56Sopenharmony_ci for (index = 0; index < self->groups; index++) { 23457db96d56Sopenharmony_ci item = _pair(self->mark[index*2], self->mark[index*2+1]); 23467db96d56Sopenharmony_ci if (!item) { 23477db96d56Sopenharmony_ci Py_DECREF(regs); 23487db96d56Sopenharmony_ci return NULL; 23497db96d56Sopenharmony_ci } 23507db96d56Sopenharmony_ci PyTuple_SET_ITEM(regs, index, item); 23517db96d56Sopenharmony_ci } 23527db96d56Sopenharmony_ci 23537db96d56Sopenharmony_ci Py_INCREF(regs); 23547db96d56Sopenharmony_ci self->regs = regs; 23557db96d56Sopenharmony_ci 23567db96d56Sopenharmony_ci return regs; 23577db96d56Sopenharmony_ci} 23587db96d56Sopenharmony_ci 23597db96d56Sopenharmony_ci/*[clinic input] 23607db96d56Sopenharmony_ci_sre.SRE_Match.__copy__ 23617db96d56Sopenharmony_ci 23627db96d56Sopenharmony_ci[clinic start generated code]*/ 23637db96d56Sopenharmony_ci 23647db96d56Sopenharmony_cistatic PyObject * 23657db96d56Sopenharmony_ci_sre_SRE_Match___copy___impl(MatchObject *self) 23667db96d56Sopenharmony_ci/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ 23677db96d56Sopenharmony_ci{ 23687db96d56Sopenharmony_ci Py_INCREF(self); 23697db96d56Sopenharmony_ci return (PyObject *)self; 23707db96d56Sopenharmony_ci} 23717db96d56Sopenharmony_ci 23727db96d56Sopenharmony_ci/*[clinic input] 23737db96d56Sopenharmony_ci_sre.SRE_Match.__deepcopy__ 23747db96d56Sopenharmony_ci 23757db96d56Sopenharmony_ci memo: object 23767db96d56Sopenharmony_ci / 23777db96d56Sopenharmony_ci 23787db96d56Sopenharmony_ci[clinic start generated code]*/ 23797db96d56Sopenharmony_ci 23807db96d56Sopenharmony_cistatic PyObject * 23817db96d56Sopenharmony_ci_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) 23827db96d56Sopenharmony_ci/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/ 23837db96d56Sopenharmony_ci{ 23847db96d56Sopenharmony_ci Py_INCREF(self); 23857db96d56Sopenharmony_ci return (PyObject *)self; 23867db96d56Sopenharmony_ci} 23877db96d56Sopenharmony_ci 23887db96d56Sopenharmony_ciPyDoc_STRVAR(match_doc, 23897db96d56Sopenharmony_ci"The result of re.match() and re.search().\n\ 23907db96d56Sopenharmony_ciMatch objects always have a boolean value of True."); 23917db96d56Sopenharmony_ci 23927db96d56Sopenharmony_ciPyDoc_STRVAR(match_group_doc, 23937db96d56Sopenharmony_ci"group([group1, ...]) -> str or tuple.\n\ 23947db96d56Sopenharmony_ci Return subgroup(s) of the match by indices or names.\n\ 23957db96d56Sopenharmony_ci For 0 returns the entire match."); 23967db96d56Sopenharmony_ci 23977db96d56Sopenharmony_cistatic PyObject * 23987db96d56Sopenharmony_cimatch_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) 23997db96d56Sopenharmony_ci{ 24007db96d56Sopenharmony_ci if (self->lastindex >= 0) 24017db96d56Sopenharmony_ci return PyLong_FromSsize_t(self->lastindex); 24027db96d56Sopenharmony_ci Py_RETURN_NONE; 24037db96d56Sopenharmony_ci} 24047db96d56Sopenharmony_ci 24057db96d56Sopenharmony_cistatic PyObject * 24067db96d56Sopenharmony_cimatch_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) 24077db96d56Sopenharmony_ci{ 24087db96d56Sopenharmony_ci if (self->pattern->indexgroup && 24097db96d56Sopenharmony_ci self->lastindex >= 0 && 24107db96d56Sopenharmony_ci self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) 24117db96d56Sopenharmony_ci { 24127db96d56Sopenharmony_ci PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, 24137db96d56Sopenharmony_ci self->lastindex); 24147db96d56Sopenharmony_ci Py_INCREF(result); 24157db96d56Sopenharmony_ci return result; 24167db96d56Sopenharmony_ci } 24177db96d56Sopenharmony_ci Py_RETURN_NONE; 24187db96d56Sopenharmony_ci} 24197db96d56Sopenharmony_ci 24207db96d56Sopenharmony_cistatic PyObject * 24217db96d56Sopenharmony_cimatch_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) 24227db96d56Sopenharmony_ci{ 24237db96d56Sopenharmony_ci if (self->regs) { 24247db96d56Sopenharmony_ci Py_INCREF(self->regs); 24257db96d56Sopenharmony_ci return self->regs; 24267db96d56Sopenharmony_ci } else 24277db96d56Sopenharmony_ci return match_regs(self); 24287db96d56Sopenharmony_ci} 24297db96d56Sopenharmony_ci 24307db96d56Sopenharmony_cistatic PyObject * 24317db96d56Sopenharmony_cimatch_repr(MatchObject *self) 24327db96d56Sopenharmony_ci{ 24337db96d56Sopenharmony_ci PyObject *result; 24347db96d56Sopenharmony_ci PyObject *group0 = match_getslice_by_index(self, 0, Py_None); 24357db96d56Sopenharmony_ci if (group0 == NULL) 24367db96d56Sopenharmony_ci return NULL; 24377db96d56Sopenharmony_ci result = PyUnicode_FromFormat( 24387db96d56Sopenharmony_ci "<%s object; span=(%zd, %zd), match=%.50R>", 24397db96d56Sopenharmony_ci Py_TYPE(self)->tp_name, 24407db96d56Sopenharmony_ci self->mark[0], self->mark[1], group0); 24417db96d56Sopenharmony_ci Py_DECREF(group0); 24427db96d56Sopenharmony_ci return result; 24437db96d56Sopenharmony_ci} 24447db96d56Sopenharmony_ci 24457db96d56Sopenharmony_ci 24467db96d56Sopenharmony_cistatic PyObject* 24477db96d56Sopenharmony_cipattern_new_match(_sremodulestate* module_state, 24487db96d56Sopenharmony_ci PatternObject* pattern, 24497db96d56Sopenharmony_ci SRE_STATE* state, 24507db96d56Sopenharmony_ci Py_ssize_t status) 24517db96d56Sopenharmony_ci{ 24527db96d56Sopenharmony_ci /* create match object (from state object) */ 24537db96d56Sopenharmony_ci 24547db96d56Sopenharmony_ci MatchObject* match; 24557db96d56Sopenharmony_ci Py_ssize_t i, j; 24567db96d56Sopenharmony_ci char* base; 24577db96d56Sopenharmony_ci int n; 24587db96d56Sopenharmony_ci 24597db96d56Sopenharmony_ci if (status > 0) { 24607db96d56Sopenharmony_ci 24617db96d56Sopenharmony_ci /* create match object (with room for extra group marks) */ 24627db96d56Sopenharmony_ci /* coverity[ampersand_in_size] */ 24637db96d56Sopenharmony_ci match = PyObject_GC_NewVar(MatchObject, 24647db96d56Sopenharmony_ci module_state->Match_Type, 24657db96d56Sopenharmony_ci 2*(pattern->groups+1)); 24667db96d56Sopenharmony_ci if (!match) 24677db96d56Sopenharmony_ci return NULL; 24687db96d56Sopenharmony_ci 24697db96d56Sopenharmony_ci Py_INCREF(pattern); 24707db96d56Sopenharmony_ci match->pattern = pattern; 24717db96d56Sopenharmony_ci 24727db96d56Sopenharmony_ci Py_INCREF(state->string); 24737db96d56Sopenharmony_ci match->string = state->string; 24747db96d56Sopenharmony_ci 24757db96d56Sopenharmony_ci match->regs = NULL; 24767db96d56Sopenharmony_ci match->groups = pattern->groups+1; 24777db96d56Sopenharmony_ci 24787db96d56Sopenharmony_ci /* fill in group slices */ 24797db96d56Sopenharmony_ci 24807db96d56Sopenharmony_ci base = (char*) state->beginning; 24817db96d56Sopenharmony_ci n = state->charsize; 24827db96d56Sopenharmony_ci 24837db96d56Sopenharmony_ci match->mark[0] = ((char*) state->start - base) / n; 24847db96d56Sopenharmony_ci match->mark[1] = ((char*) state->ptr - base) / n; 24857db96d56Sopenharmony_ci 24867db96d56Sopenharmony_ci for (i = j = 0; i < pattern->groups; i++, j+=2) 24877db96d56Sopenharmony_ci if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { 24887db96d56Sopenharmony_ci match->mark[j+2] = ((char*) state->mark[j] - base) / n; 24897db96d56Sopenharmony_ci match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; 24907db96d56Sopenharmony_ci 24917db96d56Sopenharmony_ci /* check wrong span */ 24927db96d56Sopenharmony_ci if (match->mark[j+2] > match->mark[j+3]) { 24937db96d56Sopenharmony_ci PyErr_SetString(PyExc_SystemError, 24947db96d56Sopenharmony_ci "The span of capturing group is wrong," 24957db96d56Sopenharmony_ci " please report a bug for the re module."); 24967db96d56Sopenharmony_ci Py_DECREF(match); 24977db96d56Sopenharmony_ci return NULL; 24987db96d56Sopenharmony_ci } 24997db96d56Sopenharmony_ci } else 25007db96d56Sopenharmony_ci match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ 25017db96d56Sopenharmony_ci 25027db96d56Sopenharmony_ci match->pos = state->pos; 25037db96d56Sopenharmony_ci match->endpos = state->endpos; 25047db96d56Sopenharmony_ci 25057db96d56Sopenharmony_ci match->lastindex = state->lastindex; 25067db96d56Sopenharmony_ci 25077db96d56Sopenharmony_ci PyObject_GC_Track(match); 25087db96d56Sopenharmony_ci return (PyObject*) match; 25097db96d56Sopenharmony_ci 25107db96d56Sopenharmony_ci } else if (status == 0) { 25117db96d56Sopenharmony_ci 25127db96d56Sopenharmony_ci /* no match */ 25137db96d56Sopenharmony_ci Py_RETURN_NONE; 25147db96d56Sopenharmony_ci 25157db96d56Sopenharmony_ci } 25167db96d56Sopenharmony_ci 25177db96d56Sopenharmony_ci /* internal error */ 25187db96d56Sopenharmony_ci pattern_error(status); 25197db96d56Sopenharmony_ci return NULL; 25207db96d56Sopenharmony_ci} 25217db96d56Sopenharmony_ci 25227db96d56Sopenharmony_ci 25237db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */ 25247db96d56Sopenharmony_ci/* scanner methods (experimental) */ 25257db96d56Sopenharmony_ci 25267db96d56Sopenharmony_cistatic int 25277db96d56Sopenharmony_ciscanner_traverse(ScannerObject *self, visitproc visit, void *arg) 25287db96d56Sopenharmony_ci{ 25297db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 25307db96d56Sopenharmony_ci Py_VISIT(self->pattern); 25317db96d56Sopenharmony_ci return 0; 25327db96d56Sopenharmony_ci} 25337db96d56Sopenharmony_ci 25347db96d56Sopenharmony_cistatic int 25357db96d56Sopenharmony_ciscanner_clear(ScannerObject *self) 25367db96d56Sopenharmony_ci{ 25377db96d56Sopenharmony_ci Py_CLEAR(self->pattern); 25387db96d56Sopenharmony_ci return 0; 25397db96d56Sopenharmony_ci} 25407db96d56Sopenharmony_ci 25417db96d56Sopenharmony_cistatic void 25427db96d56Sopenharmony_ciscanner_dealloc(ScannerObject* self) 25437db96d56Sopenharmony_ci{ 25447db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 25457db96d56Sopenharmony_ci 25467db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 25477db96d56Sopenharmony_ci state_fini(&self->state); 25487db96d56Sopenharmony_ci (void)scanner_clear(self); 25497db96d56Sopenharmony_ci tp->tp_free(self); 25507db96d56Sopenharmony_ci Py_DECREF(tp); 25517db96d56Sopenharmony_ci} 25527db96d56Sopenharmony_ci 25537db96d56Sopenharmony_cistatic int 25547db96d56Sopenharmony_ciscanner_begin(ScannerObject* self) 25557db96d56Sopenharmony_ci{ 25567db96d56Sopenharmony_ci if (self->executing) { 25577db96d56Sopenharmony_ci PyErr_SetString(PyExc_ValueError, 25587db96d56Sopenharmony_ci "regular expression scanner already executing"); 25597db96d56Sopenharmony_ci return 0; 25607db96d56Sopenharmony_ci } 25617db96d56Sopenharmony_ci self->executing = 1; 25627db96d56Sopenharmony_ci return 1; 25637db96d56Sopenharmony_ci} 25647db96d56Sopenharmony_ci 25657db96d56Sopenharmony_cistatic void 25667db96d56Sopenharmony_ciscanner_end(ScannerObject* self) 25677db96d56Sopenharmony_ci{ 25687db96d56Sopenharmony_ci assert(self->executing); 25697db96d56Sopenharmony_ci self->executing = 0; 25707db96d56Sopenharmony_ci} 25717db96d56Sopenharmony_ci 25727db96d56Sopenharmony_ci/*[clinic input] 25737db96d56Sopenharmony_ci_sre.SRE_Scanner.match 25747db96d56Sopenharmony_ci 25757db96d56Sopenharmony_ci cls: defining_class 25767db96d56Sopenharmony_ci / 25777db96d56Sopenharmony_ci 25787db96d56Sopenharmony_ci[clinic start generated code]*/ 25797db96d56Sopenharmony_ci 25807db96d56Sopenharmony_cistatic PyObject * 25817db96d56Sopenharmony_ci_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) 25827db96d56Sopenharmony_ci/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ 25837db96d56Sopenharmony_ci{ 25847db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 25857db96d56Sopenharmony_ci SRE_STATE* state = &self->state; 25867db96d56Sopenharmony_ci PyObject* match; 25877db96d56Sopenharmony_ci Py_ssize_t status; 25887db96d56Sopenharmony_ci 25897db96d56Sopenharmony_ci if (!scanner_begin(self)) { 25907db96d56Sopenharmony_ci return NULL; 25917db96d56Sopenharmony_ci } 25927db96d56Sopenharmony_ci if (state->start == NULL) { 25937db96d56Sopenharmony_ci scanner_end(self); 25947db96d56Sopenharmony_ci Py_RETURN_NONE; 25957db96d56Sopenharmony_ci } 25967db96d56Sopenharmony_ci 25977db96d56Sopenharmony_ci state_reset(state); 25987db96d56Sopenharmony_ci 25997db96d56Sopenharmony_ci state->ptr = state->start; 26007db96d56Sopenharmony_ci 26017db96d56Sopenharmony_ci status = sre_match(state, PatternObject_GetCode(self->pattern)); 26027db96d56Sopenharmony_ci if (PyErr_Occurred()) { 26037db96d56Sopenharmony_ci scanner_end(self); 26047db96d56Sopenharmony_ci return NULL; 26057db96d56Sopenharmony_ci } 26067db96d56Sopenharmony_ci 26077db96d56Sopenharmony_ci match = pattern_new_match(module_state, (PatternObject*) self->pattern, 26087db96d56Sopenharmony_ci state, status); 26097db96d56Sopenharmony_ci 26107db96d56Sopenharmony_ci if (status == 0) 26117db96d56Sopenharmony_ci state->start = NULL; 26127db96d56Sopenharmony_ci else { 26137db96d56Sopenharmony_ci state->must_advance = (state->ptr == state->start); 26147db96d56Sopenharmony_ci state->start = state->ptr; 26157db96d56Sopenharmony_ci } 26167db96d56Sopenharmony_ci 26177db96d56Sopenharmony_ci scanner_end(self); 26187db96d56Sopenharmony_ci return match; 26197db96d56Sopenharmony_ci} 26207db96d56Sopenharmony_ci 26217db96d56Sopenharmony_ci 26227db96d56Sopenharmony_ci/*[clinic input] 26237db96d56Sopenharmony_ci_sre.SRE_Scanner.search 26247db96d56Sopenharmony_ci 26257db96d56Sopenharmony_ci cls: defining_class 26267db96d56Sopenharmony_ci / 26277db96d56Sopenharmony_ci 26287db96d56Sopenharmony_ci[clinic start generated code]*/ 26297db96d56Sopenharmony_ci 26307db96d56Sopenharmony_cistatic PyObject * 26317db96d56Sopenharmony_ci_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) 26327db96d56Sopenharmony_ci/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/ 26337db96d56Sopenharmony_ci{ 26347db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(cls); 26357db96d56Sopenharmony_ci SRE_STATE* state = &self->state; 26367db96d56Sopenharmony_ci PyObject* match; 26377db96d56Sopenharmony_ci Py_ssize_t status; 26387db96d56Sopenharmony_ci 26397db96d56Sopenharmony_ci if (!scanner_begin(self)) { 26407db96d56Sopenharmony_ci return NULL; 26417db96d56Sopenharmony_ci } 26427db96d56Sopenharmony_ci if (state->start == NULL) { 26437db96d56Sopenharmony_ci scanner_end(self); 26447db96d56Sopenharmony_ci Py_RETURN_NONE; 26457db96d56Sopenharmony_ci } 26467db96d56Sopenharmony_ci 26477db96d56Sopenharmony_ci state_reset(state); 26487db96d56Sopenharmony_ci 26497db96d56Sopenharmony_ci state->ptr = state->start; 26507db96d56Sopenharmony_ci 26517db96d56Sopenharmony_ci status = sre_search(state, PatternObject_GetCode(self->pattern)); 26527db96d56Sopenharmony_ci if (PyErr_Occurred()) { 26537db96d56Sopenharmony_ci scanner_end(self); 26547db96d56Sopenharmony_ci return NULL; 26557db96d56Sopenharmony_ci } 26567db96d56Sopenharmony_ci 26577db96d56Sopenharmony_ci match = pattern_new_match(module_state, (PatternObject*) self->pattern, 26587db96d56Sopenharmony_ci state, status); 26597db96d56Sopenharmony_ci 26607db96d56Sopenharmony_ci if (status == 0) 26617db96d56Sopenharmony_ci state->start = NULL; 26627db96d56Sopenharmony_ci else { 26637db96d56Sopenharmony_ci state->must_advance = (state->ptr == state->start); 26647db96d56Sopenharmony_ci state->start = state->ptr; 26657db96d56Sopenharmony_ci } 26667db96d56Sopenharmony_ci 26677db96d56Sopenharmony_ci scanner_end(self); 26687db96d56Sopenharmony_ci return match; 26697db96d56Sopenharmony_ci} 26707db96d56Sopenharmony_ci 26717db96d56Sopenharmony_cistatic PyObject * 26727db96d56Sopenharmony_cipattern_scanner(_sremodulestate *module_state, 26737db96d56Sopenharmony_ci PatternObject *self, 26747db96d56Sopenharmony_ci PyObject *string, 26757db96d56Sopenharmony_ci Py_ssize_t pos, 26767db96d56Sopenharmony_ci Py_ssize_t endpos) 26777db96d56Sopenharmony_ci{ 26787db96d56Sopenharmony_ci ScannerObject* scanner; 26797db96d56Sopenharmony_ci 26807db96d56Sopenharmony_ci /* create scanner object */ 26817db96d56Sopenharmony_ci scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); 26827db96d56Sopenharmony_ci if (!scanner) 26837db96d56Sopenharmony_ci return NULL; 26847db96d56Sopenharmony_ci scanner->pattern = NULL; 26857db96d56Sopenharmony_ci scanner->executing = 0; 26867db96d56Sopenharmony_ci 26877db96d56Sopenharmony_ci /* create search state object */ 26887db96d56Sopenharmony_ci if (!state_init(&scanner->state, self, string, pos, endpos)) { 26897db96d56Sopenharmony_ci Py_DECREF(scanner); 26907db96d56Sopenharmony_ci return NULL; 26917db96d56Sopenharmony_ci } 26927db96d56Sopenharmony_ci 26937db96d56Sopenharmony_ci Py_INCREF(self); 26947db96d56Sopenharmony_ci scanner->pattern = (PyObject*) self; 26957db96d56Sopenharmony_ci 26967db96d56Sopenharmony_ci PyObject_GC_Track(scanner); 26977db96d56Sopenharmony_ci return (PyObject*) scanner; 26987db96d56Sopenharmony_ci} 26997db96d56Sopenharmony_ci 27007db96d56Sopenharmony_cistatic Py_hash_t 27017db96d56Sopenharmony_cipattern_hash(PatternObject *self) 27027db96d56Sopenharmony_ci{ 27037db96d56Sopenharmony_ci Py_hash_t hash, hash2; 27047db96d56Sopenharmony_ci 27057db96d56Sopenharmony_ci hash = PyObject_Hash(self->pattern); 27067db96d56Sopenharmony_ci if (hash == -1) { 27077db96d56Sopenharmony_ci return -1; 27087db96d56Sopenharmony_ci } 27097db96d56Sopenharmony_ci 27107db96d56Sopenharmony_ci hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); 27117db96d56Sopenharmony_ci hash ^= hash2; 27127db96d56Sopenharmony_ci 27137db96d56Sopenharmony_ci hash ^= self->flags; 27147db96d56Sopenharmony_ci hash ^= self->isbytes; 27157db96d56Sopenharmony_ci hash ^= self->codesize; 27167db96d56Sopenharmony_ci 27177db96d56Sopenharmony_ci if (hash == -1) { 27187db96d56Sopenharmony_ci hash = -2; 27197db96d56Sopenharmony_ci } 27207db96d56Sopenharmony_ci return hash; 27217db96d56Sopenharmony_ci} 27227db96d56Sopenharmony_ci 27237db96d56Sopenharmony_cistatic PyObject* 27247db96d56Sopenharmony_cipattern_richcompare(PyObject *lefto, PyObject *righto, int op) 27257db96d56Sopenharmony_ci{ 27267db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(lefto); 27277db96d56Sopenharmony_ci _sremodulestate *module_state = get_sre_module_state_by_class(tp); 27287db96d56Sopenharmony_ci PatternObject *left, *right; 27297db96d56Sopenharmony_ci int cmp; 27307db96d56Sopenharmony_ci 27317db96d56Sopenharmony_ci if (op != Py_EQ && op != Py_NE) { 27327db96d56Sopenharmony_ci Py_RETURN_NOTIMPLEMENTED; 27337db96d56Sopenharmony_ci } 27347db96d56Sopenharmony_ci 27357db96d56Sopenharmony_ci if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) 27367db96d56Sopenharmony_ci { 27377db96d56Sopenharmony_ci Py_RETURN_NOTIMPLEMENTED; 27387db96d56Sopenharmony_ci } 27397db96d56Sopenharmony_ci 27407db96d56Sopenharmony_ci if (lefto == righto) { 27417db96d56Sopenharmony_ci /* a pattern is equal to itself */ 27427db96d56Sopenharmony_ci return PyBool_FromLong(op == Py_EQ); 27437db96d56Sopenharmony_ci } 27447db96d56Sopenharmony_ci 27457db96d56Sopenharmony_ci left = (PatternObject *)lefto; 27467db96d56Sopenharmony_ci right = (PatternObject *)righto; 27477db96d56Sopenharmony_ci 27487db96d56Sopenharmony_ci cmp = (left->flags == right->flags 27497db96d56Sopenharmony_ci && left->isbytes == right->isbytes 27507db96d56Sopenharmony_ci && left->codesize == right->codesize); 27517db96d56Sopenharmony_ci if (cmp) { 27527db96d56Sopenharmony_ci /* Compare the code and the pattern because the same pattern can 27537db96d56Sopenharmony_ci produce different codes depending on the locale used to compile the 27547db96d56Sopenharmony_ci pattern when the re.LOCALE flag is used. Don't compare groups, 27557db96d56Sopenharmony_ci indexgroup nor groupindex: they are derivated from the pattern. */ 27567db96d56Sopenharmony_ci cmp = (memcmp(left->code, right->code, 27577db96d56Sopenharmony_ci sizeof(left->code[0]) * left->codesize) == 0); 27587db96d56Sopenharmony_ci } 27597db96d56Sopenharmony_ci if (cmp) { 27607db96d56Sopenharmony_ci cmp = PyObject_RichCompareBool(left->pattern, right->pattern, 27617db96d56Sopenharmony_ci Py_EQ); 27627db96d56Sopenharmony_ci if (cmp < 0) { 27637db96d56Sopenharmony_ci return NULL; 27647db96d56Sopenharmony_ci } 27657db96d56Sopenharmony_ci } 27667db96d56Sopenharmony_ci if (op == Py_NE) { 27677db96d56Sopenharmony_ci cmp = !cmp; 27687db96d56Sopenharmony_ci } 27697db96d56Sopenharmony_ci return PyBool_FromLong(cmp); 27707db96d56Sopenharmony_ci} 27717db96d56Sopenharmony_ci 27727db96d56Sopenharmony_ci#include "clinic/sre.c.h" 27737db96d56Sopenharmony_ci 27747db96d56Sopenharmony_cistatic PyMethodDef pattern_methods[] = { 27757db96d56Sopenharmony_ci _SRE_SRE_PATTERN_MATCH_METHODDEF 27767db96d56Sopenharmony_ci _SRE_SRE_PATTERN_FULLMATCH_METHODDEF 27777db96d56Sopenharmony_ci _SRE_SRE_PATTERN_SEARCH_METHODDEF 27787db96d56Sopenharmony_ci _SRE_SRE_PATTERN_SUB_METHODDEF 27797db96d56Sopenharmony_ci _SRE_SRE_PATTERN_SUBN_METHODDEF 27807db96d56Sopenharmony_ci _SRE_SRE_PATTERN_FINDALL_METHODDEF 27817db96d56Sopenharmony_ci _SRE_SRE_PATTERN_SPLIT_METHODDEF 27827db96d56Sopenharmony_ci _SRE_SRE_PATTERN_FINDITER_METHODDEF 27837db96d56Sopenharmony_ci _SRE_SRE_PATTERN_SCANNER_METHODDEF 27847db96d56Sopenharmony_ci _SRE_SRE_PATTERN___COPY___METHODDEF 27857db96d56Sopenharmony_ci _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF 27867db96d56Sopenharmony_ci {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, 27877db96d56Sopenharmony_ci PyDoc_STR("See PEP 585")}, 27887db96d56Sopenharmony_ci {NULL, NULL} 27897db96d56Sopenharmony_ci}; 27907db96d56Sopenharmony_ci 27917db96d56Sopenharmony_cistatic PyGetSetDef pattern_getset[] = { 27927db96d56Sopenharmony_ci {"groupindex", (getter)pattern_groupindex, (setter)NULL, 27937db96d56Sopenharmony_ci "A dictionary mapping group names to group numbers."}, 27947db96d56Sopenharmony_ci {NULL} /* Sentinel */ 27957db96d56Sopenharmony_ci}; 27967db96d56Sopenharmony_ci 27977db96d56Sopenharmony_ci#define PAT_OFF(x) offsetof(PatternObject, x) 27987db96d56Sopenharmony_cistatic PyMemberDef pattern_members[] = { 27997db96d56Sopenharmony_ci {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY, 28007db96d56Sopenharmony_ci "The pattern string from which the RE object was compiled."}, 28017db96d56Sopenharmony_ci {"flags", T_INT, PAT_OFF(flags), READONLY, 28027db96d56Sopenharmony_ci "The regex matching flags."}, 28037db96d56Sopenharmony_ci {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY, 28047db96d56Sopenharmony_ci "The number of capturing groups in the pattern."}, 28057db96d56Sopenharmony_ci {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY}, 28067db96d56Sopenharmony_ci {NULL} /* Sentinel */ 28077db96d56Sopenharmony_ci}; 28087db96d56Sopenharmony_ci 28097db96d56Sopenharmony_cistatic PyType_Slot pattern_slots[] = { 28107db96d56Sopenharmony_ci {Py_tp_dealloc, (destructor)pattern_dealloc}, 28117db96d56Sopenharmony_ci {Py_tp_repr, (reprfunc)pattern_repr}, 28127db96d56Sopenharmony_ci {Py_tp_hash, (hashfunc)pattern_hash}, 28137db96d56Sopenharmony_ci {Py_tp_doc, (void *)pattern_doc}, 28147db96d56Sopenharmony_ci {Py_tp_richcompare, pattern_richcompare}, 28157db96d56Sopenharmony_ci {Py_tp_methods, pattern_methods}, 28167db96d56Sopenharmony_ci {Py_tp_members, pattern_members}, 28177db96d56Sopenharmony_ci {Py_tp_getset, pattern_getset}, 28187db96d56Sopenharmony_ci {Py_tp_traverse, pattern_traverse}, 28197db96d56Sopenharmony_ci {Py_tp_clear, pattern_clear}, 28207db96d56Sopenharmony_ci {0, NULL}, 28217db96d56Sopenharmony_ci}; 28227db96d56Sopenharmony_ci 28237db96d56Sopenharmony_cistatic PyType_Spec pattern_spec = { 28247db96d56Sopenharmony_ci .name = "re.Pattern", 28257db96d56Sopenharmony_ci .basicsize = sizeof(PatternObject), 28267db96d56Sopenharmony_ci .itemsize = sizeof(SRE_CODE), 28277db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 28287db96d56Sopenharmony_ci Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 28297db96d56Sopenharmony_ci .slots = pattern_slots, 28307db96d56Sopenharmony_ci}; 28317db96d56Sopenharmony_ci 28327db96d56Sopenharmony_cistatic PyMethodDef match_methods[] = { 28337db96d56Sopenharmony_ci {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, 28347db96d56Sopenharmony_ci _SRE_SRE_MATCH_START_METHODDEF 28357db96d56Sopenharmony_ci _SRE_SRE_MATCH_END_METHODDEF 28367db96d56Sopenharmony_ci _SRE_SRE_MATCH_SPAN_METHODDEF 28377db96d56Sopenharmony_ci _SRE_SRE_MATCH_GROUPS_METHODDEF 28387db96d56Sopenharmony_ci _SRE_SRE_MATCH_GROUPDICT_METHODDEF 28397db96d56Sopenharmony_ci _SRE_SRE_MATCH_EXPAND_METHODDEF 28407db96d56Sopenharmony_ci _SRE_SRE_MATCH___COPY___METHODDEF 28417db96d56Sopenharmony_ci _SRE_SRE_MATCH___DEEPCOPY___METHODDEF 28427db96d56Sopenharmony_ci {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, 28437db96d56Sopenharmony_ci PyDoc_STR("See PEP 585")}, 28447db96d56Sopenharmony_ci {NULL, NULL} 28457db96d56Sopenharmony_ci}; 28467db96d56Sopenharmony_ci 28477db96d56Sopenharmony_cistatic PyGetSetDef match_getset[] = { 28487db96d56Sopenharmony_ci {"lastindex", (getter)match_lastindex_get, (setter)NULL, 28497db96d56Sopenharmony_ci "The integer index of the last matched capturing group."}, 28507db96d56Sopenharmony_ci {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, 28517db96d56Sopenharmony_ci "The name of the last matched capturing group."}, 28527db96d56Sopenharmony_ci {"regs", (getter)match_regs_get, (setter)NULL}, 28537db96d56Sopenharmony_ci {NULL} 28547db96d56Sopenharmony_ci}; 28557db96d56Sopenharmony_ci 28567db96d56Sopenharmony_ci#define MATCH_OFF(x) offsetof(MatchObject, x) 28577db96d56Sopenharmony_cistatic PyMemberDef match_members[] = { 28587db96d56Sopenharmony_ci {"string", T_OBJECT, MATCH_OFF(string), READONLY, 28597db96d56Sopenharmony_ci "The string passed to match() or search()."}, 28607db96d56Sopenharmony_ci {"re", T_OBJECT, MATCH_OFF(pattern), READONLY, 28617db96d56Sopenharmony_ci "The regular expression object."}, 28627db96d56Sopenharmony_ci {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY, 28637db96d56Sopenharmony_ci "The index into the string at which the RE engine started looking for a match."}, 28647db96d56Sopenharmony_ci {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY, 28657db96d56Sopenharmony_ci "The index into the string beyond which the RE engine will not go."}, 28667db96d56Sopenharmony_ci {NULL} 28677db96d56Sopenharmony_ci}; 28687db96d56Sopenharmony_ci 28697db96d56Sopenharmony_ci/* FIXME: implement setattr("string", None) as a special case (to 28707db96d56Sopenharmony_ci detach the associated string, if any */ 28717db96d56Sopenharmony_cistatic PyType_Slot match_slots[] = { 28727db96d56Sopenharmony_ci {Py_tp_dealloc, match_dealloc}, 28737db96d56Sopenharmony_ci {Py_tp_repr, match_repr}, 28747db96d56Sopenharmony_ci {Py_tp_doc, (void *)match_doc}, 28757db96d56Sopenharmony_ci {Py_tp_methods, match_methods}, 28767db96d56Sopenharmony_ci {Py_tp_members, match_members}, 28777db96d56Sopenharmony_ci {Py_tp_getset, match_getset}, 28787db96d56Sopenharmony_ci {Py_tp_traverse, match_traverse}, 28797db96d56Sopenharmony_ci {Py_tp_clear, match_clear}, 28807db96d56Sopenharmony_ci 28817db96d56Sopenharmony_ci /* As mapping. 28827db96d56Sopenharmony_ci * 28837db96d56Sopenharmony_ci * Match objects do not support length or assignment, but do support 28847db96d56Sopenharmony_ci * __getitem__. 28857db96d56Sopenharmony_ci */ 28867db96d56Sopenharmony_ci {Py_mp_subscript, match_getitem}, 28877db96d56Sopenharmony_ci 28887db96d56Sopenharmony_ci {0, NULL}, 28897db96d56Sopenharmony_ci}; 28907db96d56Sopenharmony_ci 28917db96d56Sopenharmony_cistatic PyType_Spec match_spec = { 28927db96d56Sopenharmony_ci .name = "re.Match", 28937db96d56Sopenharmony_ci .basicsize = sizeof(MatchObject), 28947db96d56Sopenharmony_ci .itemsize = sizeof(Py_ssize_t), 28957db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 28967db96d56Sopenharmony_ci Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 28977db96d56Sopenharmony_ci .slots = match_slots, 28987db96d56Sopenharmony_ci}; 28997db96d56Sopenharmony_ci 29007db96d56Sopenharmony_cistatic PyMethodDef scanner_methods[] = { 29017db96d56Sopenharmony_ci _SRE_SRE_SCANNER_MATCH_METHODDEF 29027db96d56Sopenharmony_ci _SRE_SRE_SCANNER_SEARCH_METHODDEF 29037db96d56Sopenharmony_ci {NULL, NULL} 29047db96d56Sopenharmony_ci}; 29057db96d56Sopenharmony_ci 29067db96d56Sopenharmony_ci#define SCAN_OFF(x) offsetof(ScannerObject, x) 29077db96d56Sopenharmony_cistatic PyMemberDef scanner_members[] = { 29087db96d56Sopenharmony_ci {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, 29097db96d56Sopenharmony_ci {NULL} /* Sentinel */ 29107db96d56Sopenharmony_ci}; 29117db96d56Sopenharmony_ci 29127db96d56Sopenharmony_cistatic PyType_Slot scanner_slots[] = { 29137db96d56Sopenharmony_ci {Py_tp_dealloc, scanner_dealloc}, 29147db96d56Sopenharmony_ci {Py_tp_methods, scanner_methods}, 29157db96d56Sopenharmony_ci {Py_tp_members, scanner_members}, 29167db96d56Sopenharmony_ci {Py_tp_traverse, scanner_traverse}, 29177db96d56Sopenharmony_ci {Py_tp_clear, scanner_clear}, 29187db96d56Sopenharmony_ci {0, NULL}, 29197db96d56Sopenharmony_ci}; 29207db96d56Sopenharmony_ci 29217db96d56Sopenharmony_cistatic PyType_Spec scanner_spec = { 29227db96d56Sopenharmony_ci .name = "_" SRE_MODULE ".SRE_Scanner", 29237db96d56Sopenharmony_ci .basicsize = sizeof(ScannerObject), 29247db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 29257db96d56Sopenharmony_ci Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 29267db96d56Sopenharmony_ci .slots = scanner_slots, 29277db96d56Sopenharmony_ci}; 29287db96d56Sopenharmony_ci 29297db96d56Sopenharmony_cistatic PyMethodDef _functions[] = { 29307db96d56Sopenharmony_ci _SRE_COMPILE_METHODDEF 29317db96d56Sopenharmony_ci _SRE_GETCODESIZE_METHODDEF 29327db96d56Sopenharmony_ci _SRE_ASCII_ISCASED_METHODDEF 29337db96d56Sopenharmony_ci _SRE_UNICODE_ISCASED_METHODDEF 29347db96d56Sopenharmony_ci _SRE_ASCII_TOLOWER_METHODDEF 29357db96d56Sopenharmony_ci _SRE_UNICODE_TOLOWER_METHODDEF 29367db96d56Sopenharmony_ci {NULL, NULL} 29377db96d56Sopenharmony_ci}; 29387db96d56Sopenharmony_ci 29397db96d56Sopenharmony_cistatic int 29407db96d56Sopenharmony_cisre_traverse(PyObject *module, visitproc visit, void *arg) 29417db96d56Sopenharmony_ci{ 29427db96d56Sopenharmony_ci _sremodulestate *state = get_sre_module_state(module); 29437db96d56Sopenharmony_ci 29447db96d56Sopenharmony_ci Py_VISIT(state->Pattern_Type); 29457db96d56Sopenharmony_ci Py_VISIT(state->Match_Type); 29467db96d56Sopenharmony_ci Py_VISIT(state->Scanner_Type); 29477db96d56Sopenharmony_ci 29487db96d56Sopenharmony_ci return 0; 29497db96d56Sopenharmony_ci} 29507db96d56Sopenharmony_ci 29517db96d56Sopenharmony_cistatic int 29527db96d56Sopenharmony_cisre_clear(PyObject *module) 29537db96d56Sopenharmony_ci{ 29547db96d56Sopenharmony_ci _sremodulestate *state = get_sre_module_state(module); 29557db96d56Sopenharmony_ci 29567db96d56Sopenharmony_ci Py_CLEAR(state->Pattern_Type); 29577db96d56Sopenharmony_ci Py_CLEAR(state->Match_Type); 29587db96d56Sopenharmony_ci Py_CLEAR(state->Scanner_Type); 29597db96d56Sopenharmony_ci 29607db96d56Sopenharmony_ci return 0; 29617db96d56Sopenharmony_ci} 29627db96d56Sopenharmony_ci 29637db96d56Sopenharmony_cistatic void 29647db96d56Sopenharmony_cisre_free(void *module) 29657db96d56Sopenharmony_ci{ 29667db96d56Sopenharmony_ci sre_clear((PyObject *)module); 29677db96d56Sopenharmony_ci} 29687db96d56Sopenharmony_ci 29697db96d56Sopenharmony_ci#define CREATE_TYPE(m, type, spec) \ 29707db96d56Sopenharmony_cido { \ 29717db96d56Sopenharmony_ci type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \ 29727db96d56Sopenharmony_ci if (type == NULL) { \ 29737db96d56Sopenharmony_ci goto error; \ 29747db96d56Sopenharmony_ci } \ 29757db96d56Sopenharmony_ci} while (0) 29767db96d56Sopenharmony_ci 29777db96d56Sopenharmony_ci#define ADD_ULONG_CONSTANT(module, name, value) \ 29787db96d56Sopenharmony_ci do { \ 29797db96d56Sopenharmony_ci PyObject *o = PyLong_FromUnsignedLong(value); \ 29807db96d56Sopenharmony_ci if (!o) \ 29817db96d56Sopenharmony_ci goto error; \ 29827db96d56Sopenharmony_ci int res = PyModule_AddObjectRef(module, name, o); \ 29837db96d56Sopenharmony_ci Py_DECREF(o); \ 29847db96d56Sopenharmony_ci if (res < 0) { \ 29857db96d56Sopenharmony_ci goto error; \ 29867db96d56Sopenharmony_ci } \ 29877db96d56Sopenharmony_ci} while (0) 29887db96d56Sopenharmony_ci 29897db96d56Sopenharmony_cistatic int 29907db96d56Sopenharmony_cisre_exec(PyObject *m) 29917db96d56Sopenharmony_ci{ 29927db96d56Sopenharmony_ci _sremodulestate *state; 29937db96d56Sopenharmony_ci 29947db96d56Sopenharmony_ci /* Create heap types */ 29957db96d56Sopenharmony_ci state = get_sre_module_state(m); 29967db96d56Sopenharmony_ci CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); 29977db96d56Sopenharmony_ci CREATE_TYPE(m, state->Match_Type, &match_spec); 29987db96d56Sopenharmony_ci CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); 29997db96d56Sopenharmony_ci 30007db96d56Sopenharmony_ci if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) { 30017db96d56Sopenharmony_ci goto error; 30027db96d56Sopenharmony_ci } 30037db96d56Sopenharmony_ci 30047db96d56Sopenharmony_ci if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) { 30057db96d56Sopenharmony_ci goto error; 30067db96d56Sopenharmony_ci } 30077db96d56Sopenharmony_ci 30087db96d56Sopenharmony_ci ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT); 30097db96d56Sopenharmony_ci ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS); 30107db96d56Sopenharmony_ci 30117db96d56Sopenharmony_ci if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) { 30127db96d56Sopenharmony_ci goto error; 30137db96d56Sopenharmony_ci } 30147db96d56Sopenharmony_ci 30157db96d56Sopenharmony_ci return 0; 30167db96d56Sopenharmony_ci 30177db96d56Sopenharmony_cierror: 30187db96d56Sopenharmony_ci return -1; 30197db96d56Sopenharmony_ci} 30207db96d56Sopenharmony_ci 30217db96d56Sopenharmony_cistatic PyModuleDef_Slot sre_slots[] = { 30227db96d56Sopenharmony_ci {Py_mod_exec, sre_exec}, 30237db96d56Sopenharmony_ci {0, NULL}, 30247db96d56Sopenharmony_ci}; 30257db96d56Sopenharmony_ci 30267db96d56Sopenharmony_cistatic struct PyModuleDef sremodule = { 30277db96d56Sopenharmony_ci .m_base = PyModuleDef_HEAD_INIT, 30287db96d56Sopenharmony_ci .m_name = "_" SRE_MODULE, 30297db96d56Sopenharmony_ci .m_size = sizeof(_sremodulestate), 30307db96d56Sopenharmony_ci .m_methods = _functions, 30317db96d56Sopenharmony_ci .m_slots = sre_slots, 30327db96d56Sopenharmony_ci .m_traverse = sre_traverse, 30337db96d56Sopenharmony_ci .m_free = sre_free, 30347db96d56Sopenharmony_ci .m_clear = sre_clear, 30357db96d56Sopenharmony_ci}; 30367db96d56Sopenharmony_ci 30377db96d56Sopenharmony_ciPyMODINIT_FUNC 30387db96d56Sopenharmony_ciPyInit__sre(void) 30397db96d56Sopenharmony_ci{ 30407db96d56Sopenharmony_ci return PyModuleDef_Init(&sremodule); 30417db96d56Sopenharmony_ci} 30427db96d56Sopenharmony_ci 30437db96d56Sopenharmony_ci/* vim:ts=4:sw=4:et 30447db96d56Sopenharmony_ci*/ 3045