17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci * Secret Labs' Regular Expression Engine
37db96d56Sopenharmony_ci *
47db96d56Sopenharmony_ci * regular expression matching engine
57db96d56Sopenharmony_ci *
67db96d56Sopenharmony_ci * partial history:
77db96d56Sopenharmony_ci * 1999-10-24 fl   created (based on existing template matcher code)
87db96d56Sopenharmony_ci * 2000-03-06 fl   first alpha, sort of
97db96d56Sopenharmony_ci * 2000-08-01 fl   fixes for 1.6b1
107db96d56Sopenharmony_ci * 2000-08-07 fl   use PyOS_CheckStack() if available
117db96d56Sopenharmony_ci * 2000-09-20 fl   added expand method
127db96d56Sopenharmony_ci * 2001-03-20 fl   lots of fixes for 2.1b2
137db96d56Sopenharmony_ci * 2001-04-15 fl   export copyright as Python attribute, not global
147db96d56Sopenharmony_ci * 2001-04-28 fl   added __copy__ methods (work in progress)
157db96d56Sopenharmony_ci * 2001-05-14 fl   fixes for 1.5.2 compatibility
167db96d56Sopenharmony_ci * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
177db96d56Sopenharmony_ci * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
187db96d56Sopenharmony_ci * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
197db96d56Sopenharmony_ci * 2001-10-21 fl   added sub/subn primitive
207db96d56Sopenharmony_ci * 2001-10-24 fl   added finditer primitive (for 2.2 only)
217db96d56Sopenharmony_ci * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
227db96d56Sopenharmony_ci * 2002-11-09 fl   fixed empty sub/subn return type
237db96d56Sopenharmony_ci * 2003-04-18 mvl  fully support 4-byte codes
247db96d56Sopenharmony_ci * 2003-10-17 gn   implemented non recursive scheme
257db96d56Sopenharmony_ci * 2013-02-04 mrab added fullmatch primitive
267db96d56Sopenharmony_ci *
277db96d56Sopenharmony_ci * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
287db96d56Sopenharmony_ci *
297db96d56Sopenharmony_ci * This version of the SRE library can be redistributed under CNRI's
307db96d56Sopenharmony_ci * Python 1.6 license.  For any other use, please contact Secret Labs
317db96d56Sopenharmony_ci * AB (info@pythonware.com).
327db96d56Sopenharmony_ci *
337db96d56Sopenharmony_ci * Portions of this engine have been developed in cooperation with
347db96d56Sopenharmony_ci * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
357db96d56Sopenharmony_ci * other compatibility work.
367db96d56Sopenharmony_ci */
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_cistatic const char copyright[] =
397db96d56Sopenharmony_ci    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci#define PY_SSIZE_T_CLEAN
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci#include "Python.h"
447db96d56Sopenharmony_ci#include "pycore_long.h"          // _PyLong_GetZero()
457db96d56Sopenharmony_ci#include "pycore_moduleobject.h"  // _PyModule_GetState()
467db96d56Sopenharmony_ci#include "structmember.h"         // PyMemberDef
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci#include "sre.h"
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_ci#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci#include <ctype.h>
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci/* name of this module, minus the leading underscore */
557db96d56Sopenharmony_ci#if !defined(SRE_MODULE)
567db96d56Sopenharmony_ci#define SRE_MODULE "sre"
577db96d56Sopenharmony_ci#endif
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ci#define SRE_PY_MODULE "re"
607db96d56Sopenharmony_ci
617db96d56Sopenharmony_ci/* defining this one enables tracing */
627db96d56Sopenharmony_ci#undef VERBOSE
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci#if defined(_MSC_VER)
677db96d56Sopenharmony_ci#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
687db96d56Sopenharmony_ci#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
697db96d56Sopenharmony_ci/* fastest possible local call under MSVC */
707db96d56Sopenharmony_ci#define LOCAL(type) static __inline type __fastcall
717db96d56Sopenharmony_ci#else
727db96d56Sopenharmony_ci#define LOCAL(type) static inline type
737db96d56Sopenharmony_ci#endif
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci/* error codes */
767db96d56Sopenharmony_ci#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
777db96d56Sopenharmony_ci#define SRE_ERROR_STATE -2 /* illegal state */
787db96d56Sopenharmony_ci#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
797db96d56Sopenharmony_ci#define SRE_ERROR_MEMORY -9 /* out of memory */
807db96d56Sopenharmony_ci#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci#if defined(VERBOSE)
837db96d56Sopenharmony_ci#define TRACE(v) printf v
847db96d56Sopenharmony_ci#else
857db96d56Sopenharmony_ci#define TRACE(v)
867db96d56Sopenharmony_ci#endif
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
897db96d56Sopenharmony_ci/* search engine state */
907db96d56Sopenharmony_ci
917db96d56Sopenharmony_ci#define SRE_IS_DIGIT(ch)\
927db96d56Sopenharmony_ci    ((ch) <= '9' && Py_ISDIGIT(ch))
937db96d56Sopenharmony_ci#define SRE_IS_SPACE(ch)\
947db96d56Sopenharmony_ci    ((ch) <= ' ' && Py_ISSPACE(ch))
957db96d56Sopenharmony_ci#define SRE_IS_LINEBREAK(ch)\
967db96d56Sopenharmony_ci    ((ch) == '\n')
977db96d56Sopenharmony_ci#define SRE_IS_WORD(ch)\
987db96d56Sopenharmony_ci    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
997db96d56Sopenharmony_ci
1007db96d56Sopenharmony_cistatic unsigned int sre_lower_ascii(unsigned int ch)
1017db96d56Sopenharmony_ci{
1027db96d56Sopenharmony_ci    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
1037db96d56Sopenharmony_ci}
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci/* locale-specific character predicates */
1067db96d56Sopenharmony_ci/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
1077db96d56Sopenharmony_ci * warnings when c's type supports only numbers < N+1 */
1087db96d56Sopenharmony_ci#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
1097db96d56Sopenharmony_ci#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_cistatic unsigned int sre_lower_locale(unsigned int ch)
1127db96d56Sopenharmony_ci{
1137db96d56Sopenharmony_ci    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
1147db96d56Sopenharmony_ci}
1157db96d56Sopenharmony_ci
1167db96d56Sopenharmony_cistatic unsigned int sre_upper_locale(unsigned int ch)
1177db96d56Sopenharmony_ci{
1187db96d56Sopenharmony_ci    return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
1197db96d56Sopenharmony_ci}
1207db96d56Sopenharmony_ci
1217db96d56Sopenharmony_ci/* unicode-specific character predicates */
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
1247db96d56Sopenharmony_ci#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
1257db96d56Sopenharmony_ci#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
1267db96d56Sopenharmony_ci#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
1277db96d56Sopenharmony_ci#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_cistatic unsigned int sre_lower_unicode(unsigned int ch)
1307db96d56Sopenharmony_ci{
1317db96d56Sopenharmony_ci    return (unsigned int) Py_UNICODE_TOLOWER(ch);
1327db96d56Sopenharmony_ci}
1337db96d56Sopenharmony_ci
1347db96d56Sopenharmony_cistatic unsigned int sre_upper_unicode(unsigned int ch)
1357db96d56Sopenharmony_ci{
1367db96d56Sopenharmony_ci    return (unsigned int) Py_UNICODE_TOUPPER(ch);
1377db96d56Sopenharmony_ci}
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ciLOCAL(int)
1407db96d56Sopenharmony_cisre_category(SRE_CODE category, unsigned int ch)
1417db96d56Sopenharmony_ci{
1427db96d56Sopenharmony_ci    switch (category) {
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci    case SRE_CATEGORY_DIGIT:
1457db96d56Sopenharmony_ci        return SRE_IS_DIGIT(ch);
1467db96d56Sopenharmony_ci    case SRE_CATEGORY_NOT_DIGIT:
1477db96d56Sopenharmony_ci        return !SRE_IS_DIGIT(ch);
1487db96d56Sopenharmony_ci    case SRE_CATEGORY_SPACE:
1497db96d56Sopenharmony_ci        return SRE_IS_SPACE(ch);
1507db96d56Sopenharmony_ci    case SRE_CATEGORY_NOT_SPACE:
1517db96d56Sopenharmony_ci        return !SRE_IS_SPACE(ch);
1527db96d56Sopenharmony_ci    case SRE_CATEGORY_WORD:
1537db96d56Sopenharmony_ci        return SRE_IS_WORD(ch);
1547db96d56Sopenharmony_ci    case SRE_CATEGORY_NOT_WORD:
1557db96d56Sopenharmony_ci        return !SRE_IS_WORD(ch);
1567db96d56Sopenharmony_ci    case SRE_CATEGORY_LINEBREAK:
1577db96d56Sopenharmony_ci        return SRE_IS_LINEBREAK(ch);
1587db96d56Sopenharmony_ci    case SRE_CATEGORY_NOT_LINEBREAK:
1597db96d56Sopenharmony_ci        return !SRE_IS_LINEBREAK(ch);
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_ci    case SRE_CATEGORY_LOC_WORD:
1627db96d56Sopenharmony_ci        return SRE_LOC_IS_WORD(ch);
1637db96d56Sopenharmony_ci    case SRE_CATEGORY_LOC_NOT_WORD:
1647db96d56Sopenharmony_ci        return !SRE_LOC_IS_WORD(ch);
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_DIGIT:
1677db96d56Sopenharmony_ci        return SRE_UNI_IS_DIGIT(ch);
1687db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_NOT_DIGIT:
1697db96d56Sopenharmony_ci        return !SRE_UNI_IS_DIGIT(ch);
1707db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_SPACE:
1717db96d56Sopenharmony_ci        return SRE_UNI_IS_SPACE(ch);
1727db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_NOT_SPACE:
1737db96d56Sopenharmony_ci        return !SRE_UNI_IS_SPACE(ch);
1747db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_WORD:
1757db96d56Sopenharmony_ci        return SRE_UNI_IS_WORD(ch);
1767db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_NOT_WORD:
1777db96d56Sopenharmony_ci        return !SRE_UNI_IS_WORD(ch);
1787db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_LINEBREAK:
1797db96d56Sopenharmony_ci        return SRE_UNI_IS_LINEBREAK(ch);
1807db96d56Sopenharmony_ci    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1817db96d56Sopenharmony_ci        return !SRE_UNI_IS_LINEBREAK(ch);
1827db96d56Sopenharmony_ci    }
1837db96d56Sopenharmony_ci    return 0;
1847db96d56Sopenharmony_ci}
1857db96d56Sopenharmony_ci
1867db96d56Sopenharmony_ciLOCAL(int)
1877db96d56Sopenharmony_cichar_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
1887db96d56Sopenharmony_ci{
1897db96d56Sopenharmony_ci    return ch == pattern
1907db96d56Sopenharmony_ci        || (SRE_CODE) sre_lower_locale(ch) == pattern
1917db96d56Sopenharmony_ci        || (SRE_CODE) sre_upper_locale(ch) == pattern;
1927db96d56Sopenharmony_ci}
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci
1957db96d56Sopenharmony_ci/* helpers */
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_cistatic void
1987db96d56Sopenharmony_cidata_stack_dealloc(SRE_STATE* state)
1997db96d56Sopenharmony_ci{
2007db96d56Sopenharmony_ci    if (state->data_stack) {
2017db96d56Sopenharmony_ci        PyMem_Free(state->data_stack);
2027db96d56Sopenharmony_ci        state->data_stack = NULL;
2037db96d56Sopenharmony_ci    }
2047db96d56Sopenharmony_ci    state->data_stack_size = state->data_stack_base = 0;
2057db96d56Sopenharmony_ci}
2067db96d56Sopenharmony_ci
2077db96d56Sopenharmony_cistatic int
2087db96d56Sopenharmony_cidata_stack_grow(SRE_STATE* state, Py_ssize_t size)
2097db96d56Sopenharmony_ci{
2107db96d56Sopenharmony_ci    Py_ssize_t minsize, cursize;
2117db96d56Sopenharmony_ci    minsize = state->data_stack_base+size;
2127db96d56Sopenharmony_ci    cursize = state->data_stack_size;
2137db96d56Sopenharmony_ci    if (cursize < minsize) {
2147db96d56Sopenharmony_ci        void* stack;
2157db96d56Sopenharmony_ci        cursize = minsize+minsize/4+1024;
2167db96d56Sopenharmony_ci        TRACE(("allocate/grow stack %zd\n", cursize));
2177db96d56Sopenharmony_ci        stack = PyMem_Realloc(state->data_stack, cursize);
2187db96d56Sopenharmony_ci        if (!stack) {
2197db96d56Sopenharmony_ci            data_stack_dealloc(state);
2207db96d56Sopenharmony_ci            return SRE_ERROR_MEMORY;
2217db96d56Sopenharmony_ci        }
2227db96d56Sopenharmony_ci        state->data_stack = (char *)stack;
2237db96d56Sopenharmony_ci        state->data_stack_size = cursize;
2247db96d56Sopenharmony_ci    }
2257db96d56Sopenharmony_ci    return 0;
2267db96d56Sopenharmony_ci}
2277db96d56Sopenharmony_ci
2287db96d56Sopenharmony_ci/* generate 8-bit version */
2297db96d56Sopenharmony_ci
2307db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS1
2317db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 1
2327db96d56Sopenharmony_ci#define SRE(F) sre_ucs1_##F
2337db96d56Sopenharmony_ci#include "sre_lib.h"
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci/* generate 16-bit unicode version */
2367db96d56Sopenharmony_ci
2377db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS2
2387db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 2
2397db96d56Sopenharmony_ci#define SRE(F) sre_ucs2_##F
2407db96d56Sopenharmony_ci#include "sre_lib.h"
2417db96d56Sopenharmony_ci
2427db96d56Sopenharmony_ci/* generate 32-bit unicode version */
2437db96d56Sopenharmony_ci
2447db96d56Sopenharmony_ci#define SRE_CHAR Py_UCS4
2457db96d56Sopenharmony_ci#define SIZEOF_SRE_CHAR 4
2467db96d56Sopenharmony_ci#define SRE(F) sre_ucs4_##F
2477db96d56Sopenharmony_ci#include "sre_lib.h"
2487db96d56Sopenharmony_ci
2497db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
2507db96d56Sopenharmony_ci/* factories and destructors */
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_ci/* module state */
2537db96d56Sopenharmony_citypedef struct {
2547db96d56Sopenharmony_ci    PyTypeObject *Pattern_Type;
2557db96d56Sopenharmony_ci    PyTypeObject *Match_Type;
2567db96d56Sopenharmony_ci    PyTypeObject *Scanner_Type;
2577db96d56Sopenharmony_ci} _sremodulestate;
2587db96d56Sopenharmony_ci
2597db96d56Sopenharmony_cistatic _sremodulestate *
2607db96d56Sopenharmony_ciget_sre_module_state(PyObject *m)
2617db96d56Sopenharmony_ci{
2627db96d56Sopenharmony_ci    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
2637db96d56Sopenharmony_ci    assert(state);
2647db96d56Sopenharmony_ci    return state;
2657db96d56Sopenharmony_ci}
2667db96d56Sopenharmony_ci
2677db96d56Sopenharmony_cistatic struct PyModuleDef sremodule;
2687db96d56Sopenharmony_ci#define get_sre_module_state_by_class(cls) \
2697db96d56Sopenharmony_ci    (get_sre_module_state(PyType_GetModule(cls)))
2707db96d56Sopenharmony_ci
2717db96d56Sopenharmony_ci/* see sre.h for object declarations */
2727db96d56Sopenharmony_cistatic PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
2737db96d56Sopenharmony_cistatic PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
2747db96d56Sopenharmony_ci
2757db96d56Sopenharmony_ci/*[clinic input]
2767db96d56Sopenharmony_cimodule _sre
2777db96d56Sopenharmony_ciclass _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
2787db96d56Sopenharmony_ciclass _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
2797db96d56Sopenharmony_ciclass _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
2807db96d56Sopenharmony_ci[clinic start generated code]*/
2817db96d56Sopenharmony_ci/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
2827db96d56Sopenharmony_ci
2837db96d56Sopenharmony_ci/*[clinic input]
2847db96d56Sopenharmony_ci_sre.getcodesize -> int
2857db96d56Sopenharmony_ci[clinic start generated code]*/
2867db96d56Sopenharmony_ci
2877db96d56Sopenharmony_cistatic int
2887db96d56Sopenharmony_ci_sre_getcodesize_impl(PyObject *module)
2897db96d56Sopenharmony_ci/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
2907db96d56Sopenharmony_ci{
2917db96d56Sopenharmony_ci    return sizeof(SRE_CODE);
2927db96d56Sopenharmony_ci}
2937db96d56Sopenharmony_ci
2947db96d56Sopenharmony_ci/*[clinic input]
2957db96d56Sopenharmony_ci_sre.ascii_iscased -> bool
2967db96d56Sopenharmony_ci
2977db96d56Sopenharmony_ci    character: int
2987db96d56Sopenharmony_ci    /
2997db96d56Sopenharmony_ci
3007db96d56Sopenharmony_ci[clinic start generated code]*/
3017db96d56Sopenharmony_ci
3027db96d56Sopenharmony_cistatic int
3037db96d56Sopenharmony_ci_sre_ascii_iscased_impl(PyObject *module, int character)
3047db96d56Sopenharmony_ci/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
3057db96d56Sopenharmony_ci{
3067db96d56Sopenharmony_ci    unsigned int ch = (unsigned int)character;
3077db96d56Sopenharmony_ci    return ch < 128 && Py_ISALPHA(ch);
3087db96d56Sopenharmony_ci}
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_ci/*[clinic input]
3117db96d56Sopenharmony_ci_sre.unicode_iscased -> bool
3127db96d56Sopenharmony_ci
3137db96d56Sopenharmony_ci    character: int
3147db96d56Sopenharmony_ci    /
3157db96d56Sopenharmony_ci
3167db96d56Sopenharmony_ci[clinic start generated code]*/
3177db96d56Sopenharmony_ci
3187db96d56Sopenharmony_cistatic int
3197db96d56Sopenharmony_ci_sre_unicode_iscased_impl(PyObject *module, int character)
3207db96d56Sopenharmony_ci/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
3217db96d56Sopenharmony_ci{
3227db96d56Sopenharmony_ci    unsigned int ch = (unsigned int)character;
3237db96d56Sopenharmony_ci    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
3247db96d56Sopenharmony_ci}
3257db96d56Sopenharmony_ci
3267db96d56Sopenharmony_ci/*[clinic input]
3277db96d56Sopenharmony_ci_sre.ascii_tolower -> int
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci    character: int
3307db96d56Sopenharmony_ci    /
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_ci[clinic start generated code]*/
3337db96d56Sopenharmony_ci
3347db96d56Sopenharmony_cistatic int
3357db96d56Sopenharmony_ci_sre_ascii_tolower_impl(PyObject *module, int character)
3367db96d56Sopenharmony_ci/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
3377db96d56Sopenharmony_ci{
3387db96d56Sopenharmony_ci    return sre_lower_ascii(character);
3397db96d56Sopenharmony_ci}
3407db96d56Sopenharmony_ci
3417db96d56Sopenharmony_ci/*[clinic input]
3427db96d56Sopenharmony_ci_sre.unicode_tolower -> int
3437db96d56Sopenharmony_ci
3447db96d56Sopenharmony_ci    character: int
3457db96d56Sopenharmony_ci    /
3467db96d56Sopenharmony_ci
3477db96d56Sopenharmony_ci[clinic start generated code]*/
3487db96d56Sopenharmony_ci
3497db96d56Sopenharmony_cistatic int
3507db96d56Sopenharmony_ci_sre_unicode_tolower_impl(PyObject *module, int character)
3517db96d56Sopenharmony_ci/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
3527db96d56Sopenharmony_ci{
3537db96d56Sopenharmony_ci    return sre_lower_unicode(character);
3547db96d56Sopenharmony_ci}
3557db96d56Sopenharmony_ci
3567db96d56Sopenharmony_ciLOCAL(void)
3577db96d56Sopenharmony_cistate_reset(SRE_STATE* state)
3587db96d56Sopenharmony_ci{
3597db96d56Sopenharmony_ci    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
3607db96d56Sopenharmony_ci    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
3617db96d56Sopenharmony_ci
3627db96d56Sopenharmony_ci    state->lastmark = -1;
3637db96d56Sopenharmony_ci    state->lastindex = -1;
3647db96d56Sopenharmony_ci
3657db96d56Sopenharmony_ci    state->repeat = NULL;
3667db96d56Sopenharmony_ci
3677db96d56Sopenharmony_ci    data_stack_dealloc(state);
3687db96d56Sopenharmony_ci}
3697db96d56Sopenharmony_ci
3707db96d56Sopenharmony_cistatic const void*
3717db96d56Sopenharmony_cigetstring(PyObject* string, Py_ssize_t* p_length,
3727db96d56Sopenharmony_ci          int* p_isbytes, int* p_charsize,
3737db96d56Sopenharmony_ci          Py_buffer *view)
3747db96d56Sopenharmony_ci{
3757db96d56Sopenharmony_ci    /* given a python object, return a data pointer, a length (in
3767db96d56Sopenharmony_ci       characters), and a character size.  return NULL if the object
3777db96d56Sopenharmony_ci       is not a string (or not compatible) */
3787db96d56Sopenharmony_ci
3797db96d56Sopenharmony_ci    /* Unicode objects do not support the buffer API. So, get the data
3807db96d56Sopenharmony_ci       directly instead. */
3817db96d56Sopenharmony_ci    if (PyUnicode_Check(string)) {
3827db96d56Sopenharmony_ci        if (PyUnicode_READY(string) == -1)
3837db96d56Sopenharmony_ci            return NULL;
3847db96d56Sopenharmony_ci        *p_length = PyUnicode_GET_LENGTH(string);
3857db96d56Sopenharmony_ci        *p_charsize = PyUnicode_KIND(string);
3867db96d56Sopenharmony_ci        *p_isbytes = 0;
3877db96d56Sopenharmony_ci        return PyUnicode_DATA(string);
3887db96d56Sopenharmony_ci    }
3897db96d56Sopenharmony_ci
3907db96d56Sopenharmony_ci    /* get pointer to byte string buffer */
3917db96d56Sopenharmony_ci    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
3927db96d56Sopenharmony_ci        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
3937db96d56Sopenharmony_ci                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
3947db96d56Sopenharmony_ci        return NULL;
3957db96d56Sopenharmony_ci    }
3967db96d56Sopenharmony_ci
3977db96d56Sopenharmony_ci    *p_length = view->len;
3987db96d56Sopenharmony_ci    *p_charsize = 1;
3997db96d56Sopenharmony_ci    *p_isbytes = 1;
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci    if (view->buf == NULL) {
4027db96d56Sopenharmony_ci        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
4037db96d56Sopenharmony_ci        PyBuffer_Release(view);
4047db96d56Sopenharmony_ci        view->buf = NULL;
4057db96d56Sopenharmony_ci        return NULL;
4067db96d56Sopenharmony_ci    }
4077db96d56Sopenharmony_ci    return view->buf;
4087db96d56Sopenharmony_ci}
4097db96d56Sopenharmony_ci
4107db96d56Sopenharmony_ciLOCAL(PyObject*)
4117db96d56Sopenharmony_cistate_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
4127db96d56Sopenharmony_ci           Py_ssize_t start, Py_ssize_t end)
4137db96d56Sopenharmony_ci{
4147db96d56Sopenharmony_ci    /* prepare state object */
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci    Py_ssize_t length;
4177db96d56Sopenharmony_ci    int isbytes, charsize;
4187db96d56Sopenharmony_ci    const void* ptr;
4197db96d56Sopenharmony_ci
4207db96d56Sopenharmony_ci    memset(state, 0, sizeof(SRE_STATE));
4217db96d56Sopenharmony_ci
4227db96d56Sopenharmony_ci    state->mark = PyMem_New(const void *, pattern->groups * 2);
4237db96d56Sopenharmony_ci    if (!state->mark) {
4247db96d56Sopenharmony_ci        PyErr_NoMemory();
4257db96d56Sopenharmony_ci        goto err;
4267db96d56Sopenharmony_ci    }
4277db96d56Sopenharmony_ci    state->lastmark = -1;
4287db96d56Sopenharmony_ci    state->lastindex = -1;
4297db96d56Sopenharmony_ci
4307db96d56Sopenharmony_ci    state->buffer.buf = NULL;
4317db96d56Sopenharmony_ci    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
4327db96d56Sopenharmony_ci    if (!ptr)
4337db96d56Sopenharmony_ci        goto err;
4347db96d56Sopenharmony_ci
4357db96d56Sopenharmony_ci    if (isbytes && pattern->isbytes == 0) {
4367db96d56Sopenharmony_ci        PyErr_SetString(PyExc_TypeError,
4377db96d56Sopenharmony_ci                        "cannot use a string pattern on a bytes-like object");
4387db96d56Sopenharmony_ci        goto err;
4397db96d56Sopenharmony_ci    }
4407db96d56Sopenharmony_ci    if (!isbytes && pattern->isbytes > 0) {
4417db96d56Sopenharmony_ci        PyErr_SetString(PyExc_TypeError,
4427db96d56Sopenharmony_ci                        "cannot use a bytes pattern on a string-like object");
4437db96d56Sopenharmony_ci        goto err;
4447db96d56Sopenharmony_ci    }
4457db96d56Sopenharmony_ci
4467db96d56Sopenharmony_ci    /* adjust boundaries */
4477db96d56Sopenharmony_ci    if (start < 0)
4487db96d56Sopenharmony_ci        start = 0;
4497db96d56Sopenharmony_ci    else if (start > length)
4507db96d56Sopenharmony_ci        start = length;
4517db96d56Sopenharmony_ci
4527db96d56Sopenharmony_ci    if (end < 0)
4537db96d56Sopenharmony_ci        end = 0;
4547db96d56Sopenharmony_ci    else if (end > length)
4557db96d56Sopenharmony_ci        end = length;
4567db96d56Sopenharmony_ci
4577db96d56Sopenharmony_ci    state->isbytes = isbytes;
4587db96d56Sopenharmony_ci    state->charsize = charsize;
4597db96d56Sopenharmony_ci    state->match_all = 0;
4607db96d56Sopenharmony_ci    state->must_advance = 0;
4617db96d56Sopenharmony_ci
4627db96d56Sopenharmony_ci    state->beginning = ptr;
4637db96d56Sopenharmony_ci
4647db96d56Sopenharmony_ci    state->start = (void*) ((char*) ptr + start * state->charsize);
4657db96d56Sopenharmony_ci    state->end = (void*) ((char*) ptr + end * state->charsize);
4667db96d56Sopenharmony_ci
4677db96d56Sopenharmony_ci    Py_INCREF(string);
4687db96d56Sopenharmony_ci    state->string = string;
4697db96d56Sopenharmony_ci    state->pos = start;
4707db96d56Sopenharmony_ci    state->endpos = end;
4717db96d56Sopenharmony_ci
4727db96d56Sopenharmony_ci    return string;
4737db96d56Sopenharmony_ci  err:
4747db96d56Sopenharmony_ci    /* We add an explicit cast here because MSVC has a bug when
4757db96d56Sopenharmony_ci       compiling C code where it believes that `const void**` cannot be
4767db96d56Sopenharmony_ci       safely casted to `void*`, see bpo-39943 for details. */
4777db96d56Sopenharmony_ci    PyMem_Free((void*) state->mark);
4787db96d56Sopenharmony_ci    state->mark = NULL;
4797db96d56Sopenharmony_ci    if (state->buffer.buf)
4807db96d56Sopenharmony_ci        PyBuffer_Release(&state->buffer);
4817db96d56Sopenharmony_ci    return NULL;
4827db96d56Sopenharmony_ci}
4837db96d56Sopenharmony_ci
4847db96d56Sopenharmony_ciLOCAL(void)
4857db96d56Sopenharmony_cistate_fini(SRE_STATE* state)
4867db96d56Sopenharmony_ci{
4877db96d56Sopenharmony_ci    if (state->buffer.buf)
4887db96d56Sopenharmony_ci        PyBuffer_Release(&state->buffer);
4897db96d56Sopenharmony_ci    Py_XDECREF(state->string);
4907db96d56Sopenharmony_ci    data_stack_dealloc(state);
4917db96d56Sopenharmony_ci    /* See above PyMem_Del for why we explicitly cast here. */
4927db96d56Sopenharmony_ci    PyMem_Free((void*) state->mark);
4937db96d56Sopenharmony_ci    state->mark = NULL;
4947db96d56Sopenharmony_ci}
4957db96d56Sopenharmony_ci
4967db96d56Sopenharmony_ci/* calculate offset from start of string */
4977db96d56Sopenharmony_ci#define STATE_OFFSET(state, member)\
4987db96d56Sopenharmony_ci    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
4997db96d56Sopenharmony_ci
5007db96d56Sopenharmony_ciLOCAL(PyObject*)
5017db96d56Sopenharmony_cigetslice(int isbytes, const void *ptr,
5027db96d56Sopenharmony_ci         PyObject* string, Py_ssize_t start, Py_ssize_t end)
5037db96d56Sopenharmony_ci{
5047db96d56Sopenharmony_ci    if (isbytes) {
5057db96d56Sopenharmony_ci        if (PyBytes_CheckExact(string) &&
5067db96d56Sopenharmony_ci            start == 0 && end == PyBytes_GET_SIZE(string)) {
5077db96d56Sopenharmony_ci            Py_INCREF(string);
5087db96d56Sopenharmony_ci            return string;
5097db96d56Sopenharmony_ci        }
5107db96d56Sopenharmony_ci        return PyBytes_FromStringAndSize(
5117db96d56Sopenharmony_ci                (const char *)ptr + start, end - start);
5127db96d56Sopenharmony_ci    }
5137db96d56Sopenharmony_ci    else {
5147db96d56Sopenharmony_ci        return PyUnicode_Substring(string, start, end);
5157db96d56Sopenharmony_ci    }
5167db96d56Sopenharmony_ci}
5177db96d56Sopenharmony_ci
5187db96d56Sopenharmony_ciLOCAL(PyObject*)
5197db96d56Sopenharmony_cistate_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
5207db96d56Sopenharmony_ci{
5217db96d56Sopenharmony_ci    Py_ssize_t i, j;
5227db96d56Sopenharmony_ci
5237db96d56Sopenharmony_ci    index = (index - 1) * 2;
5247db96d56Sopenharmony_ci
5257db96d56Sopenharmony_ci    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
5267db96d56Sopenharmony_ci        if (empty)
5277db96d56Sopenharmony_ci            /* want empty string */
5287db96d56Sopenharmony_ci            i = j = 0;
5297db96d56Sopenharmony_ci        else {
5307db96d56Sopenharmony_ci            Py_RETURN_NONE;
5317db96d56Sopenharmony_ci        }
5327db96d56Sopenharmony_ci    } else {
5337db96d56Sopenharmony_ci        i = STATE_OFFSET(state, state->mark[index]);
5347db96d56Sopenharmony_ci        j = STATE_OFFSET(state, state->mark[index+1]);
5357db96d56Sopenharmony_ci
5367db96d56Sopenharmony_ci        /* check wrong span */
5377db96d56Sopenharmony_ci        if (i > j) {
5387db96d56Sopenharmony_ci            PyErr_SetString(PyExc_SystemError,
5397db96d56Sopenharmony_ci                            "The span of capturing group is wrong,"
5407db96d56Sopenharmony_ci                            " please report a bug for the re module.");
5417db96d56Sopenharmony_ci            return NULL;
5427db96d56Sopenharmony_ci        }
5437db96d56Sopenharmony_ci    }
5447db96d56Sopenharmony_ci
5457db96d56Sopenharmony_ci    return getslice(state->isbytes, state->beginning, string, i, j);
5467db96d56Sopenharmony_ci}
5477db96d56Sopenharmony_ci
5487db96d56Sopenharmony_cistatic void
5497db96d56Sopenharmony_cipattern_error(Py_ssize_t status)
5507db96d56Sopenharmony_ci{
5517db96d56Sopenharmony_ci    switch (status) {
5527db96d56Sopenharmony_ci    case SRE_ERROR_RECURSION_LIMIT:
5537db96d56Sopenharmony_ci        /* This error code seems to be unused. */
5547db96d56Sopenharmony_ci        PyErr_SetString(
5557db96d56Sopenharmony_ci            PyExc_RecursionError,
5567db96d56Sopenharmony_ci            "maximum recursion limit exceeded"
5577db96d56Sopenharmony_ci            );
5587db96d56Sopenharmony_ci        break;
5597db96d56Sopenharmony_ci    case SRE_ERROR_MEMORY:
5607db96d56Sopenharmony_ci        PyErr_NoMemory();
5617db96d56Sopenharmony_ci        break;
5627db96d56Sopenharmony_ci    case SRE_ERROR_INTERRUPTED:
5637db96d56Sopenharmony_ci    /* An exception has already been raised, so let it fly */
5647db96d56Sopenharmony_ci        break;
5657db96d56Sopenharmony_ci    default:
5667db96d56Sopenharmony_ci        /* other error codes indicate compiler/engine bugs */
5677db96d56Sopenharmony_ci        PyErr_SetString(
5687db96d56Sopenharmony_ci            PyExc_RuntimeError,
5697db96d56Sopenharmony_ci            "internal error in regular expression engine"
5707db96d56Sopenharmony_ci            );
5717db96d56Sopenharmony_ci    }
5727db96d56Sopenharmony_ci}
5737db96d56Sopenharmony_ci
5747db96d56Sopenharmony_cistatic int
5757db96d56Sopenharmony_cipattern_traverse(PatternObject *self, visitproc visit, void *arg)
5767db96d56Sopenharmony_ci{
5777db96d56Sopenharmony_ci    Py_VISIT(Py_TYPE(self));
5787db96d56Sopenharmony_ci    Py_VISIT(self->groupindex);
5797db96d56Sopenharmony_ci    Py_VISIT(self->indexgroup);
5807db96d56Sopenharmony_ci    Py_VISIT(self->pattern);
5817db96d56Sopenharmony_ci    return 0;
5827db96d56Sopenharmony_ci}
5837db96d56Sopenharmony_ci
5847db96d56Sopenharmony_cistatic int
5857db96d56Sopenharmony_cipattern_clear(PatternObject *self)
5867db96d56Sopenharmony_ci{
5877db96d56Sopenharmony_ci    Py_CLEAR(self->groupindex);
5887db96d56Sopenharmony_ci    Py_CLEAR(self->indexgroup);
5897db96d56Sopenharmony_ci    Py_CLEAR(self->pattern);
5907db96d56Sopenharmony_ci    return 0;
5917db96d56Sopenharmony_ci}
5927db96d56Sopenharmony_ci
5937db96d56Sopenharmony_cistatic void
5947db96d56Sopenharmony_cipattern_dealloc(PatternObject* self)
5957db96d56Sopenharmony_ci{
5967db96d56Sopenharmony_ci    PyTypeObject *tp = Py_TYPE(self);
5977db96d56Sopenharmony_ci
5987db96d56Sopenharmony_ci    PyObject_GC_UnTrack(self);
5997db96d56Sopenharmony_ci    if (self->weakreflist != NULL) {
6007db96d56Sopenharmony_ci        PyObject_ClearWeakRefs((PyObject *) self);
6017db96d56Sopenharmony_ci    }
6027db96d56Sopenharmony_ci    (void)pattern_clear(self);
6037db96d56Sopenharmony_ci    tp->tp_free(self);
6047db96d56Sopenharmony_ci    Py_DECREF(tp);
6057db96d56Sopenharmony_ci}
6067db96d56Sopenharmony_ci
6077db96d56Sopenharmony_ciLOCAL(Py_ssize_t)
6087db96d56Sopenharmony_cisre_match(SRE_STATE* state, SRE_CODE* pattern)
6097db96d56Sopenharmony_ci{
6107db96d56Sopenharmony_ci    if (state->charsize == 1)
6117db96d56Sopenharmony_ci        return sre_ucs1_match(state, pattern, 1);
6127db96d56Sopenharmony_ci    if (state->charsize == 2)
6137db96d56Sopenharmony_ci        return sre_ucs2_match(state, pattern, 1);
6147db96d56Sopenharmony_ci    assert(state->charsize == 4);
6157db96d56Sopenharmony_ci    return sre_ucs4_match(state, pattern, 1);
6167db96d56Sopenharmony_ci}
6177db96d56Sopenharmony_ci
6187db96d56Sopenharmony_ciLOCAL(Py_ssize_t)
6197db96d56Sopenharmony_cisre_search(SRE_STATE* state, SRE_CODE* pattern)
6207db96d56Sopenharmony_ci{
6217db96d56Sopenharmony_ci    if (state->charsize == 1)
6227db96d56Sopenharmony_ci        return sre_ucs1_search(state, pattern);
6237db96d56Sopenharmony_ci    if (state->charsize == 2)
6247db96d56Sopenharmony_ci        return sre_ucs2_search(state, pattern);
6257db96d56Sopenharmony_ci    assert(state->charsize == 4);
6267db96d56Sopenharmony_ci    return sre_ucs4_search(state, pattern);
6277db96d56Sopenharmony_ci}
6287db96d56Sopenharmony_ci
6297db96d56Sopenharmony_ci/*[clinic input]
6307db96d56Sopenharmony_ci_sre.SRE_Pattern.match
6317db96d56Sopenharmony_ci
6327db96d56Sopenharmony_ci    cls: defining_class
6337db96d56Sopenharmony_ci    /
6347db96d56Sopenharmony_ci    string: object
6357db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
6367db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
6377db96d56Sopenharmony_ci
6387db96d56Sopenharmony_ciMatches zero or more characters at the beginning of the string.
6397db96d56Sopenharmony_ci[clinic start generated code]*/
6407db96d56Sopenharmony_ci
6417db96d56Sopenharmony_cistatic PyObject *
6427db96d56Sopenharmony_ci_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
6437db96d56Sopenharmony_ci                            PyObject *string, Py_ssize_t pos,
6447db96d56Sopenharmony_ci                            Py_ssize_t endpos)
6457db96d56Sopenharmony_ci/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
6467db96d56Sopenharmony_ci{
6477db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
6487db96d56Sopenharmony_ci    SRE_STATE state;
6497db96d56Sopenharmony_ci    Py_ssize_t status;
6507db96d56Sopenharmony_ci    PyObject *match;
6517db96d56Sopenharmony_ci
6527db96d56Sopenharmony_ci    if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
6537db96d56Sopenharmony_ci        return NULL;
6547db96d56Sopenharmony_ci
6557db96d56Sopenharmony_ci    state.ptr = state.start;
6567db96d56Sopenharmony_ci
6577db96d56Sopenharmony_ci    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
6587db96d56Sopenharmony_ci
6597db96d56Sopenharmony_ci    status = sre_match(&state, PatternObject_GetCode(self));
6607db96d56Sopenharmony_ci
6617db96d56Sopenharmony_ci    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
6627db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
6637db96d56Sopenharmony_ci        state_fini(&state);
6647db96d56Sopenharmony_ci        return NULL;
6657db96d56Sopenharmony_ci    }
6667db96d56Sopenharmony_ci
6677db96d56Sopenharmony_ci    match = pattern_new_match(module_state, self, &state, status);
6687db96d56Sopenharmony_ci    state_fini(&state);
6697db96d56Sopenharmony_ci    return match;
6707db96d56Sopenharmony_ci}
6717db96d56Sopenharmony_ci
6727db96d56Sopenharmony_ci/*[clinic input]
6737db96d56Sopenharmony_ci_sre.SRE_Pattern.fullmatch
6747db96d56Sopenharmony_ci
6757db96d56Sopenharmony_ci    cls: defining_class
6767db96d56Sopenharmony_ci    /
6777db96d56Sopenharmony_ci    string: object
6787db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
6797db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
6807db96d56Sopenharmony_ci
6817db96d56Sopenharmony_ciMatches against all of the string.
6827db96d56Sopenharmony_ci[clinic start generated code]*/
6837db96d56Sopenharmony_ci
6847db96d56Sopenharmony_cistatic PyObject *
6857db96d56Sopenharmony_ci_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
6867db96d56Sopenharmony_ci                                PyObject *string, Py_ssize_t pos,
6877db96d56Sopenharmony_ci                                Py_ssize_t endpos)
6887db96d56Sopenharmony_ci/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
6897db96d56Sopenharmony_ci{
6907db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
6917db96d56Sopenharmony_ci    SRE_STATE state;
6927db96d56Sopenharmony_ci    Py_ssize_t status;
6937db96d56Sopenharmony_ci    PyObject *match;
6947db96d56Sopenharmony_ci
6957db96d56Sopenharmony_ci    if (!state_init(&state, self, string, pos, endpos))
6967db96d56Sopenharmony_ci        return NULL;
6977db96d56Sopenharmony_ci
6987db96d56Sopenharmony_ci    state.ptr = state.start;
6997db96d56Sopenharmony_ci
7007db96d56Sopenharmony_ci    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
7017db96d56Sopenharmony_ci
7027db96d56Sopenharmony_ci    state.match_all = 1;
7037db96d56Sopenharmony_ci    status = sre_match(&state, PatternObject_GetCode(self));
7047db96d56Sopenharmony_ci
7057db96d56Sopenharmony_ci    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
7067db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
7077db96d56Sopenharmony_ci        state_fini(&state);
7087db96d56Sopenharmony_ci        return NULL;
7097db96d56Sopenharmony_ci    }
7107db96d56Sopenharmony_ci
7117db96d56Sopenharmony_ci    match = pattern_new_match(module_state, self, &state, status);
7127db96d56Sopenharmony_ci    state_fini(&state);
7137db96d56Sopenharmony_ci    return match;
7147db96d56Sopenharmony_ci}
7157db96d56Sopenharmony_ci
7167db96d56Sopenharmony_ci/*[clinic input]
7177db96d56Sopenharmony_ci_sre.SRE_Pattern.search
7187db96d56Sopenharmony_ci
7197db96d56Sopenharmony_ci    cls: defining_class
7207db96d56Sopenharmony_ci    /
7217db96d56Sopenharmony_ci    string: object
7227db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
7237db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
7247db96d56Sopenharmony_ci
7257db96d56Sopenharmony_ciScan through string looking for a match, and return a corresponding match object instance.
7267db96d56Sopenharmony_ci
7277db96d56Sopenharmony_ciReturn None if no position in the string matches.
7287db96d56Sopenharmony_ci[clinic start generated code]*/
7297db96d56Sopenharmony_ci
7307db96d56Sopenharmony_cistatic PyObject *
7317db96d56Sopenharmony_ci_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
7327db96d56Sopenharmony_ci                             PyObject *string, Py_ssize_t pos,
7337db96d56Sopenharmony_ci                             Py_ssize_t endpos)
7347db96d56Sopenharmony_ci/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
7357db96d56Sopenharmony_ci{
7367db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
7377db96d56Sopenharmony_ci    SRE_STATE state;
7387db96d56Sopenharmony_ci    Py_ssize_t status;
7397db96d56Sopenharmony_ci    PyObject *match;
7407db96d56Sopenharmony_ci
7417db96d56Sopenharmony_ci    if (!state_init(&state, self, string, pos, endpos))
7427db96d56Sopenharmony_ci        return NULL;
7437db96d56Sopenharmony_ci
7447db96d56Sopenharmony_ci    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
7457db96d56Sopenharmony_ci
7467db96d56Sopenharmony_ci    status = sre_search(&state, PatternObject_GetCode(self));
7477db96d56Sopenharmony_ci
7487db96d56Sopenharmony_ci    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
7497db96d56Sopenharmony_ci
7507db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
7517db96d56Sopenharmony_ci        state_fini(&state);
7527db96d56Sopenharmony_ci        return NULL;
7537db96d56Sopenharmony_ci    }
7547db96d56Sopenharmony_ci
7557db96d56Sopenharmony_ci    match = pattern_new_match(module_state, self, &state, status);
7567db96d56Sopenharmony_ci    state_fini(&state);
7577db96d56Sopenharmony_ci    return match;
7587db96d56Sopenharmony_ci}
7597db96d56Sopenharmony_ci
7607db96d56Sopenharmony_cistatic PyObject*
7617db96d56Sopenharmony_cicall(const char* module, const char* function, PyObject* args)
7627db96d56Sopenharmony_ci{
7637db96d56Sopenharmony_ci    PyObject* name;
7647db96d56Sopenharmony_ci    PyObject* mod;
7657db96d56Sopenharmony_ci    PyObject* func;
7667db96d56Sopenharmony_ci    PyObject* result;
7677db96d56Sopenharmony_ci
7687db96d56Sopenharmony_ci    if (!args)
7697db96d56Sopenharmony_ci        return NULL;
7707db96d56Sopenharmony_ci    name = PyUnicode_FromString(module);
7717db96d56Sopenharmony_ci    if (!name)
7727db96d56Sopenharmony_ci        return NULL;
7737db96d56Sopenharmony_ci    mod = PyImport_Import(name);
7747db96d56Sopenharmony_ci    Py_DECREF(name);
7757db96d56Sopenharmony_ci    if (!mod)
7767db96d56Sopenharmony_ci        return NULL;
7777db96d56Sopenharmony_ci    func = PyObject_GetAttrString(mod, function);
7787db96d56Sopenharmony_ci    Py_DECREF(mod);
7797db96d56Sopenharmony_ci    if (!func)
7807db96d56Sopenharmony_ci        return NULL;
7817db96d56Sopenharmony_ci    result = PyObject_CallObject(func, args);
7827db96d56Sopenharmony_ci    Py_DECREF(func);
7837db96d56Sopenharmony_ci    Py_DECREF(args);
7847db96d56Sopenharmony_ci    return result;
7857db96d56Sopenharmony_ci}
7867db96d56Sopenharmony_ci
7877db96d56Sopenharmony_ci/*[clinic input]
7887db96d56Sopenharmony_ci_sre.SRE_Pattern.findall
7897db96d56Sopenharmony_ci
7907db96d56Sopenharmony_ci    string: object
7917db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
7927db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
7937db96d56Sopenharmony_ci
7947db96d56Sopenharmony_ciReturn a list of all non-overlapping matches of pattern in string.
7957db96d56Sopenharmony_ci[clinic start generated code]*/
7967db96d56Sopenharmony_ci
7977db96d56Sopenharmony_cistatic PyObject *
7987db96d56Sopenharmony_ci_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
7997db96d56Sopenharmony_ci                              Py_ssize_t pos, Py_ssize_t endpos)
8007db96d56Sopenharmony_ci/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
8017db96d56Sopenharmony_ci{
8027db96d56Sopenharmony_ci    SRE_STATE state;
8037db96d56Sopenharmony_ci    PyObject* list;
8047db96d56Sopenharmony_ci    Py_ssize_t status;
8057db96d56Sopenharmony_ci    Py_ssize_t i, b, e;
8067db96d56Sopenharmony_ci
8077db96d56Sopenharmony_ci    if (!state_init(&state, self, string, pos, endpos))
8087db96d56Sopenharmony_ci        return NULL;
8097db96d56Sopenharmony_ci
8107db96d56Sopenharmony_ci    list = PyList_New(0);
8117db96d56Sopenharmony_ci    if (!list) {
8127db96d56Sopenharmony_ci        state_fini(&state);
8137db96d56Sopenharmony_ci        return NULL;
8147db96d56Sopenharmony_ci    }
8157db96d56Sopenharmony_ci
8167db96d56Sopenharmony_ci    while (state.start <= state.end) {
8177db96d56Sopenharmony_ci
8187db96d56Sopenharmony_ci        PyObject* item;
8197db96d56Sopenharmony_ci
8207db96d56Sopenharmony_ci        state_reset(&state);
8217db96d56Sopenharmony_ci
8227db96d56Sopenharmony_ci        state.ptr = state.start;
8237db96d56Sopenharmony_ci
8247db96d56Sopenharmony_ci        status = sre_search(&state, PatternObject_GetCode(self));
8257db96d56Sopenharmony_ci        if (PyErr_Occurred())
8267db96d56Sopenharmony_ci            goto error;
8277db96d56Sopenharmony_ci
8287db96d56Sopenharmony_ci        if (status <= 0) {
8297db96d56Sopenharmony_ci            if (status == 0)
8307db96d56Sopenharmony_ci                break;
8317db96d56Sopenharmony_ci            pattern_error(status);
8327db96d56Sopenharmony_ci            goto error;
8337db96d56Sopenharmony_ci        }
8347db96d56Sopenharmony_ci
8357db96d56Sopenharmony_ci        /* don't bother to build a match object */
8367db96d56Sopenharmony_ci        switch (self->groups) {
8377db96d56Sopenharmony_ci        case 0:
8387db96d56Sopenharmony_ci            b = STATE_OFFSET(&state, state.start);
8397db96d56Sopenharmony_ci            e = STATE_OFFSET(&state, state.ptr);
8407db96d56Sopenharmony_ci            item = getslice(state.isbytes, state.beginning,
8417db96d56Sopenharmony_ci                            string, b, e);
8427db96d56Sopenharmony_ci            if (!item)
8437db96d56Sopenharmony_ci                goto error;
8447db96d56Sopenharmony_ci            break;
8457db96d56Sopenharmony_ci        case 1:
8467db96d56Sopenharmony_ci            item = state_getslice(&state, 1, string, 1);
8477db96d56Sopenharmony_ci            if (!item)
8487db96d56Sopenharmony_ci                goto error;
8497db96d56Sopenharmony_ci            break;
8507db96d56Sopenharmony_ci        default:
8517db96d56Sopenharmony_ci            item = PyTuple_New(self->groups);
8527db96d56Sopenharmony_ci            if (!item)
8537db96d56Sopenharmony_ci                goto error;
8547db96d56Sopenharmony_ci            for (i = 0; i < self->groups; i++) {
8557db96d56Sopenharmony_ci                PyObject* o = state_getslice(&state, i+1, string, 1);
8567db96d56Sopenharmony_ci                if (!o) {
8577db96d56Sopenharmony_ci                    Py_DECREF(item);
8587db96d56Sopenharmony_ci                    goto error;
8597db96d56Sopenharmony_ci                }
8607db96d56Sopenharmony_ci                PyTuple_SET_ITEM(item, i, o);
8617db96d56Sopenharmony_ci            }
8627db96d56Sopenharmony_ci            break;
8637db96d56Sopenharmony_ci        }
8647db96d56Sopenharmony_ci
8657db96d56Sopenharmony_ci        status = PyList_Append(list, item);
8667db96d56Sopenharmony_ci        Py_DECREF(item);
8677db96d56Sopenharmony_ci        if (status < 0)
8687db96d56Sopenharmony_ci            goto error;
8697db96d56Sopenharmony_ci
8707db96d56Sopenharmony_ci        state.must_advance = (state.ptr == state.start);
8717db96d56Sopenharmony_ci        state.start = state.ptr;
8727db96d56Sopenharmony_ci    }
8737db96d56Sopenharmony_ci
8747db96d56Sopenharmony_ci    state_fini(&state);
8757db96d56Sopenharmony_ci    return list;
8767db96d56Sopenharmony_ci
8777db96d56Sopenharmony_cierror:
8787db96d56Sopenharmony_ci    Py_DECREF(list);
8797db96d56Sopenharmony_ci    state_fini(&state);
8807db96d56Sopenharmony_ci    return NULL;
8817db96d56Sopenharmony_ci
8827db96d56Sopenharmony_ci}
8837db96d56Sopenharmony_ci
8847db96d56Sopenharmony_ci/*[clinic input]
8857db96d56Sopenharmony_ci_sre.SRE_Pattern.finditer
8867db96d56Sopenharmony_ci
8877db96d56Sopenharmony_ci    cls: defining_class
8887db96d56Sopenharmony_ci    /
8897db96d56Sopenharmony_ci    string: object
8907db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
8917db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
8927db96d56Sopenharmony_ci
8937db96d56Sopenharmony_ciReturn an iterator over all non-overlapping matches for the RE pattern in string.
8947db96d56Sopenharmony_ci
8957db96d56Sopenharmony_ciFor each match, the iterator returns a match object.
8967db96d56Sopenharmony_ci[clinic start generated code]*/
8977db96d56Sopenharmony_ci
8987db96d56Sopenharmony_cistatic PyObject *
8997db96d56Sopenharmony_ci_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
9007db96d56Sopenharmony_ci                               PyObject *string, Py_ssize_t pos,
9017db96d56Sopenharmony_ci                               Py_ssize_t endpos)
9027db96d56Sopenharmony_ci/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
9037db96d56Sopenharmony_ci{
9047db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
9057db96d56Sopenharmony_ci    PyObject* scanner;
9067db96d56Sopenharmony_ci    PyObject* search;
9077db96d56Sopenharmony_ci    PyObject* iterator;
9087db96d56Sopenharmony_ci
9097db96d56Sopenharmony_ci    scanner = pattern_scanner(module_state, self, string, pos, endpos);
9107db96d56Sopenharmony_ci    if (!scanner)
9117db96d56Sopenharmony_ci        return NULL;
9127db96d56Sopenharmony_ci
9137db96d56Sopenharmony_ci    search = PyObject_GetAttrString(scanner, "search");
9147db96d56Sopenharmony_ci    Py_DECREF(scanner);
9157db96d56Sopenharmony_ci    if (!search)
9167db96d56Sopenharmony_ci        return NULL;
9177db96d56Sopenharmony_ci
9187db96d56Sopenharmony_ci    iterator = PyCallIter_New(search, Py_None);
9197db96d56Sopenharmony_ci    Py_DECREF(search);
9207db96d56Sopenharmony_ci
9217db96d56Sopenharmony_ci    return iterator;
9227db96d56Sopenharmony_ci}
9237db96d56Sopenharmony_ci
9247db96d56Sopenharmony_ci/*[clinic input]
9257db96d56Sopenharmony_ci_sre.SRE_Pattern.scanner
9267db96d56Sopenharmony_ci
9277db96d56Sopenharmony_ci    cls: defining_class
9287db96d56Sopenharmony_ci    /
9297db96d56Sopenharmony_ci    string: object
9307db96d56Sopenharmony_ci    pos: Py_ssize_t = 0
9317db96d56Sopenharmony_ci    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
9327db96d56Sopenharmony_ci
9337db96d56Sopenharmony_ci[clinic start generated code]*/
9347db96d56Sopenharmony_ci
9357db96d56Sopenharmony_cistatic PyObject *
9367db96d56Sopenharmony_ci_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
9377db96d56Sopenharmony_ci                              PyObject *string, Py_ssize_t pos,
9387db96d56Sopenharmony_ci                              Py_ssize_t endpos)
9397db96d56Sopenharmony_ci/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
9407db96d56Sopenharmony_ci{
9417db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
9427db96d56Sopenharmony_ci
9437db96d56Sopenharmony_ci    return pattern_scanner(module_state, self, string, pos, endpos);
9447db96d56Sopenharmony_ci}
9457db96d56Sopenharmony_ci
9467db96d56Sopenharmony_ci/*[clinic input]
9477db96d56Sopenharmony_ci_sre.SRE_Pattern.split
9487db96d56Sopenharmony_ci
9497db96d56Sopenharmony_ci    string: object
9507db96d56Sopenharmony_ci    maxsplit: Py_ssize_t = 0
9517db96d56Sopenharmony_ci
9527db96d56Sopenharmony_ciSplit string by the occurrences of pattern.
9537db96d56Sopenharmony_ci[clinic start generated code]*/
9547db96d56Sopenharmony_ci
9557db96d56Sopenharmony_cistatic PyObject *
9567db96d56Sopenharmony_ci_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
9577db96d56Sopenharmony_ci                            Py_ssize_t maxsplit)
9587db96d56Sopenharmony_ci/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
9597db96d56Sopenharmony_ci{
9607db96d56Sopenharmony_ci    SRE_STATE state;
9617db96d56Sopenharmony_ci    PyObject* list;
9627db96d56Sopenharmony_ci    PyObject* item;
9637db96d56Sopenharmony_ci    Py_ssize_t status;
9647db96d56Sopenharmony_ci    Py_ssize_t n;
9657db96d56Sopenharmony_ci    Py_ssize_t i;
9667db96d56Sopenharmony_ci    const void* last;
9677db96d56Sopenharmony_ci
9687db96d56Sopenharmony_ci    assert(self->codesize != 0);
9697db96d56Sopenharmony_ci
9707db96d56Sopenharmony_ci    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
9717db96d56Sopenharmony_ci        return NULL;
9727db96d56Sopenharmony_ci
9737db96d56Sopenharmony_ci    list = PyList_New(0);
9747db96d56Sopenharmony_ci    if (!list) {
9757db96d56Sopenharmony_ci        state_fini(&state);
9767db96d56Sopenharmony_ci        return NULL;
9777db96d56Sopenharmony_ci    }
9787db96d56Sopenharmony_ci
9797db96d56Sopenharmony_ci    n = 0;
9807db96d56Sopenharmony_ci    last = state.start;
9817db96d56Sopenharmony_ci
9827db96d56Sopenharmony_ci    while (!maxsplit || n < maxsplit) {
9837db96d56Sopenharmony_ci
9847db96d56Sopenharmony_ci        state_reset(&state);
9857db96d56Sopenharmony_ci
9867db96d56Sopenharmony_ci        state.ptr = state.start;
9877db96d56Sopenharmony_ci
9887db96d56Sopenharmony_ci        status = sre_search(&state, PatternObject_GetCode(self));
9897db96d56Sopenharmony_ci        if (PyErr_Occurred())
9907db96d56Sopenharmony_ci            goto error;
9917db96d56Sopenharmony_ci
9927db96d56Sopenharmony_ci        if (status <= 0) {
9937db96d56Sopenharmony_ci            if (status == 0)
9947db96d56Sopenharmony_ci                break;
9957db96d56Sopenharmony_ci            pattern_error(status);
9967db96d56Sopenharmony_ci            goto error;
9977db96d56Sopenharmony_ci        }
9987db96d56Sopenharmony_ci
9997db96d56Sopenharmony_ci        /* get segment before this match */
10007db96d56Sopenharmony_ci        item = getslice(state.isbytes, state.beginning,
10017db96d56Sopenharmony_ci            string, STATE_OFFSET(&state, last),
10027db96d56Sopenharmony_ci            STATE_OFFSET(&state, state.start)
10037db96d56Sopenharmony_ci            );
10047db96d56Sopenharmony_ci        if (!item)
10057db96d56Sopenharmony_ci            goto error;
10067db96d56Sopenharmony_ci        status = PyList_Append(list, item);
10077db96d56Sopenharmony_ci        Py_DECREF(item);
10087db96d56Sopenharmony_ci        if (status < 0)
10097db96d56Sopenharmony_ci            goto error;
10107db96d56Sopenharmony_ci
10117db96d56Sopenharmony_ci        /* add groups (if any) */
10127db96d56Sopenharmony_ci        for (i = 0; i < self->groups; i++) {
10137db96d56Sopenharmony_ci            item = state_getslice(&state, i+1, string, 0);
10147db96d56Sopenharmony_ci            if (!item)
10157db96d56Sopenharmony_ci                goto error;
10167db96d56Sopenharmony_ci            status = PyList_Append(list, item);
10177db96d56Sopenharmony_ci            Py_DECREF(item);
10187db96d56Sopenharmony_ci            if (status < 0)
10197db96d56Sopenharmony_ci                goto error;
10207db96d56Sopenharmony_ci        }
10217db96d56Sopenharmony_ci
10227db96d56Sopenharmony_ci        n = n + 1;
10237db96d56Sopenharmony_ci        state.must_advance = (state.ptr == state.start);
10247db96d56Sopenharmony_ci        last = state.start = state.ptr;
10257db96d56Sopenharmony_ci
10267db96d56Sopenharmony_ci    }
10277db96d56Sopenharmony_ci
10287db96d56Sopenharmony_ci    /* get segment following last match (even if empty) */
10297db96d56Sopenharmony_ci    item = getslice(state.isbytes, state.beginning,
10307db96d56Sopenharmony_ci        string, STATE_OFFSET(&state, last), state.endpos
10317db96d56Sopenharmony_ci        );
10327db96d56Sopenharmony_ci    if (!item)
10337db96d56Sopenharmony_ci        goto error;
10347db96d56Sopenharmony_ci    status = PyList_Append(list, item);
10357db96d56Sopenharmony_ci    Py_DECREF(item);
10367db96d56Sopenharmony_ci    if (status < 0)
10377db96d56Sopenharmony_ci        goto error;
10387db96d56Sopenharmony_ci
10397db96d56Sopenharmony_ci    state_fini(&state);
10407db96d56Sopenharmony_ci    return list;
10417db96d56Sopenharmony_ci
10427db96d56Sopenharmony_cierror:
10437db96d56Sopenharmony_ci    Py_DECREF(list);
10447db96d56Sopenharmony_ci    state_fini(&state);
10457db96d56Sopenharmony_ci    return NULL;
10467db96d56Sopenharmony_ci
10477db96d56Sopenharmony_ci}
10487db96d56Sopenharmony_ci
10497db96d56Sopenharmony_cistatic PyObject*
10507db96d56Sopenharmony_cipattern_subx(_sremodulestate* module_state,
10517db96d56Sopenharmony_ci             PatternObject* self,
10527db96d56Sopenharmony_ci             PyObject* ptemplate,
10537db96d56Sopenharmony_ci             PyObject* string,
10547db96d56Sopenharmony_ci             Py_ssize_t count,
10557db96d56Sopenharmony_ci             Py_ssize_t subn)
10567db96d56Sopenharmony_ci{
10577db96d56Sopenharmony_ci    SRE_STATE state;
10587db96d56Sopenharmony_ci    PyObject* list;
10597db96d56Sopenharmony_ci    PyObject* joiner;
10607db96d56Sopenharmony_ci    PyObject* item;
10617db96d56Sopenharmony_ci    PyObject* filter;
10627db96d56Sopenharmony_ci    PyObject* match;
10637db96d56Sopenharmony_ci    const void* ptr;
10647db96d56Sopenharmony_ci    Py_ssize_t status;
10657db96d56Sopenharmony_ci    Py_ssize_t n;
10667db96d56Sopenharmony_ci    Py_ssize_t i, b, e;
10677db96d56Sopenharmony_ci    int isbytes, charsize;
10687db96d56Sopenharmony_ci    int filter_is_callable;
10697db96d56Sopenharmony_ci    Py_buffer view;
10707db96d56Sopenharmony_ci
10717db96d56Sopenharmony_ci    if (PyCallable_Check(ptemplate)) {
10727db96d56Sopenharmony_ci        /* sub/subn takes either a function or a template */
10737db96d56Sopenharmony_ci        filter = ptemplate;
10747db96d56Sopenharmony_ci        Py_INCREF(filter);
10757db96d56Sopenharmony_ci        filter_is_callable = 1;
10767db96d56Sopenharmony_ci    } else {
10777db96d56Sopenharmony_ci        /* if not callable, check if it's a literal string */
10787db96d56Sopenharmony_ci        int literal;
10797db96d56Sopenharmony_ci        view.buf = NULL;
10807db96d56Sopenharmony_ci        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
10817db96d56Sopenharmony_ci        if (ptr) {
10827db96d56Sopenharmony_ci            if (charsize == 1)
10837db96d56Sopenharmony_ci                literal = memchr(ptr, '\\', n) == NULL;
10847db96d56Sopenharmony_ci            else
10857db96d56Sopenharmony_ci                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
10867db96d56Sopenharmony_ci        } else {
10877db96d56Sopenharmony_ci            PyErr_Clear();
10887db96d56Sopenharmony_ci            literal = 0;
10897db96d56Sopenharmony_ci        }
10907db96d56Sopenharmony_ci        if (view.buf)
10917db96d56Sopenharmony_ci            PyBuffer_Release(&view);
10927db96d56Sopenharmony_ci        if (literal) {
10937db96d56Sopenharmony_ci            filter = ptemplate;
10947db96d56Sopenharmony_ci            Py_INCREF(filter);
10957db96d56Sopenharmony_ci            filter_is_callable = 0;
10967db96d56Sopenharmony_ci        } else {
10977db96d56Sopenharmony_ci            /* not a literal; hand it over to the template compiler */
10987db96d56Sopenharmony_ci            filter = call(
10997db96d56Sopenharmony_ci                SRE_PY_MODULE, "_subx",
11007db96d56Sopenharmony_ci                PyTuple_Pack(2, self, ptemplate)
11017db96d56Sopenharmony_ci                );
11027db96d56Sopenharmony_ci            if (!filter)
11037db96d56Sopenharmony_ci                return NULL;
11047db96d56Sopenharmony_ci            filter_is_callable = PyCallable_Check(filter);
11057db96d56Sopenharmony_ci        }
11067db96d56Sopenharmony_ci    }
11077db96d56Sopenharmony_ci
11087db96d56Sopenharmony_ci    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
11097db96d56Sopenharmony_ci        Py_DECREF(filter);
11107db96d56Sopenharmony_ci        return NULL;
11117db96d56Sopenharmony_ci    }
11127db96d56Sopenharmony_ci
11137db96d56Sopenharmony_ci    list = PyList_New(0);
11147db96d56Sopenharmony_ci    if (!list) {
11157db96d56Sopenharmony_ci        Py_DECREF(filter);
11167db96d56Sopenharmony_ci        state_fini(&state);
11177db96d56Sopenharmony_ci        return NULL;
11187db96d56Sopenharmony_ci    }
11197db96d56Sopenharmony_ci
11207db96d56Sopenharmony_ci    n = i = 0;
11217db96d56Sopenharmony_ci
11227db96d56Sopenharmony_ci    while (!count || n < count) {
11237db96d56Sopenharmony_ci
11247db96d56Sopenharmony_ci        state_reset(&state);
11257db96d56Sopenharmony_ci
11267db96d56Sopenharmony_ci        state.ptr = state.start;
11277db96d56Sopenharmony_ci
11287db96d56Sopenharmony_ci        status = sre_search(&state, PatternObject_GetCode(self));
11297db96d56Sopenharmony_ci        if (PyErr_Occurred())
11307db96d56Sopenharmony_ci            goto error;
11317db96d56Sopenharmony_ci
11327db96d56Sopenharmony_ci        if (status <= 0) {
11337db96d56Sopenharmony_ci            if (status == 0)
11347db96d56Sopenharmony_ci                break;
11357db96d56Sopenharmony_ci            pattern_error(status);
11367db96d56Sopenharmony_ci            goto error;
11377db96d56Sopenharmony_ci        }
11387db96d56Sopenharmony_ci
11397db96d56Sopenharmony_ci        b = STATE_OFFSET(&state, state.start);
11407db96d56Sopenharmony_ci        e = STATE_OFFSET(&state, state.ptr);
11417db96d56Sopenharmony_ci
11427db96d56Sopenharmony_ci        if (i < b) {
11437db96d56Sopenharmony_ci            /* get segment before this match */
11447db96d56Sopenharmony_ci            item = getslice(state.isbytes, state.beginning,
11457db96d56Sopenharmony_ci                string, i, b);
11467db96d56Sopenharmony_ci            if (!item)
11477db96d56Sopenharmony_ci                goto error;
11487db96d56Sopenharmony_ci            status = PyList_Append(list, item);
11497db96d56Sopenharmony_ci            Py_DECREF(item);
11507db96d56Sopenharmony_ci            if (status < 0)
11517db96d56Sopenharmony_ci                goto error;
11527db96d56Sopenharmony_ci
11537db96d56Sopenharmony_ci        }
11547db96d56Sopenharmony_ci
11557db96d56Sopenharmony_ci        if (filter_is_callable) {
11567db96d56Sopenharmony_ci            /* pass match object through filter */
11577db96d56Sopenharmony_ci            match = pattern_new_match(module_state, self, &state, 1);
11587db96d56Sopenharmony_ci            if (!match)
11597db96d56Sopenharmony_ci                goto error;
11607db96d56Sopenharmony_ci            item = PyObject_CallOneArg(filter, match);
11617db96d56Sopenharmony_ci            Py_DECREF(match);
11627db96d56Sopenharmony_ci            if (!item)
11637db96d56Sopenharmony_ci                goto error;
11647db96d56Sopenharmony_ci        } else {
11657db96d56Sopenharmony_ci            /* filter is literal string */
11667db96d56Sopenharmony_ci            item = filter;
11677db96d56Sopenharmony_ci            Py_INCREF(item);
11687db96d56Sopenharmony_ci        }
11697db96d56Sopenharmony_ci
11707db96d56Sopenharmony_ci        /* add to list */
11717db96d56Sopenharmony_ci        if (item != Py_None) {
11727db96d56Sopenharmony_ci            status = PyList_Append(list, item);
11737db96d56Sopenharmony_ci            Py_DECREF(item);
11747db96d56Sopenharmony_ci            if (status < 0)
11757db96d56Sopenharmony_ci                goto error;
11767db96d56Sopenharmony_ci        }
11777db96d56Sopenharmony_ci
11787db96d56Sopenharmony_ci        i = e;
11797db96d56Sopenharmony_ci        n = n + 1;
11807db96d56Sopenharmony_ci        state.must_advance = (state.ptr == state.start);
11817db96d56Sopenharmony_ci        state.start = state.ptr;
11827db96d56Sopenharmony_ci    }
11837db96d56Sopenharmony_ci
11847db96d56Sopenharmony_ci    /* get segment following last match */
11857db96d56Sopenharmony_ci    if (i < state.endpos) {
11867db96d56Sopenharmony_ci        item = getslice(state.isbytes, state.beginning,
11877db96d56Sopenharmony_ci                        string, i, state.endpos);
11887db96d56Sopenharmony_ci        if (!item)
11897db96d56Sopenharmony_ci            goto error;
11907db96d56Sopenharmony_ci        status = PyList_Append(list, item);
11917db96d56Sopenharmony_ci        Py_DECREF(item);
11927db96d56Sopenharmony_ci        if (status < 0)
11937db96d56Sopenharmony_ci            goto error;
11947db96d56Sopenharmony_ci    }
11957db96d56Sopenharmony_ci
11967db96d56Sopenharmony_ci    state_fini(&state);
11977db96d56Sopenharmony_ci
11987db96d56Sopenharmony_ci    Py_DECREF(filter);
11997db96d56Sopenharmony_ci
12007db96d56Sopenharmony_ci    /* convert list to single string (also removes list) */
12017db96d56Sopenharmony_ci    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
12027db96d56Sopenharmony_ci    if (!joiner) {
12037db96d56Sopenharmony_ci        Py_DECREF(list);
12047db96d56Sopenharmony_ci        return NULL;
12057db96d56Sopenharmony_ci    }
12067db96d56Sopenharmony_ci    if (PyList_GET_SIZE(list) == 0) {
12077db96d56Sopenharmony_ci        Py_DECREF(list);
12087db96d56Sopenharmony_ci        item = joiner;
12097db96d56Sopenharmony_ci    }
12107db96d56Sopenharmony_ci    else {
12117db96d56Sopenharmony_ci        if (state.isbytes)
12127db96d56Sopenharmony_ci            item = _PyBytes_Join(joiner, list);
12137db96d56Sopenharmony_ci        else
12147db96d56Sopenharmony_ci            item = PyUnicode_Join(joiner, list);
12157db96d56Sopenharmony_ci        Py_DECREF(joiner);
12167db96d56Sopenharmony_ci        Py_DECREF(list);
12177db96d56Sopenharmony_ci        if (!item)
12187db96d56Sopenharmony_ci            return NULL;
12197db96d56Sopenharmony_ci    }
12207db96d56Sopenharmony_ci
12217db96d56Sopenharmony_ci    if (subn)
12227db96d56Sopenharmony_ci        return Py_BuildValue("Nn", item, n);
12237db96d56Sopenharmony_ci
12247db96d56Sopenharmony_ci    return item;
12257db96d56Sopenharmony_ci
12267db96d56Sopenharmony_cierror:
12277db96d56Sopenharmony_ci    Py_DECREF(list);
12287db96d56Sopenharmony_ci    state_fini(&state);
12297db96d56Sopenharmony_ci    Py_DECREF(filter);
12307db96d56Sopenharmony_ci    return NULL;
12317db96d56Sopenharmony_ci
12327db96d56Sopenharmony_ci}
12337db96d56Sopenharmony_ci
12347db96d56Sopenharmony_ci/*[clinic input]
12357db96d56Sopenharmony_ci_sre.SRE_Pattern.sub
12367db96d56Sopenharmony_ci
12377db96d56Sopenharmony_ci    cls: defining_class
12387db96d56Sopenharmony_ci    /
12397db96d56Sopenharmony_ci    repl: object
12407db96d56Sopenharmony_ci    string: object
12417db96d56Sopenharmony_ci    count: Py_ssize_t = 0
12427db96d56Sopenharmony_ci
12437db96d56Sopenharmony_ciReturn the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
12447db96d56Sopenharmony_ci[clinic start generated code]*/
12457db96d56Sopenharmony_ci
12467db96d56Sopenharmony_cistatic PyObject *
12477db96d56Sopenharmony_ci_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
12487db96d56Sopenharmony_ci                          PyObject *repl, PyObject *string, Py_ssize_t count)
12497db96d56Sopenharmony_ci/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
12507db96d56Sopenharmony_ci{
12517db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
12527db96d56Sopenharmony_ci
12537db96d56Sopenharmony_ci    return pattern_subx(module_state, self, repl, string, count, 0);
12547db96d56Sopenharmony_ci}
12557db96d56Sopenharmony_ci
12567db96d56Sopenharmony_ci/*[clinic input]
12577db96d56Sopenharmony_ci_sre.SRE_Pattern.subn
12587db96d56Sopenharmony_ci
12597db96d56Sopenharmony_ci    cls: defining_class
12607db96d56Sopenharmony_ci    /
12617db96d56Sopenharmony_ci    repl: object
12627db96d56Sopenharmony_ci    string: object
12637db96d56Sopenharmony_ci    count: Py_ssize_t = 0
12647db96d56Sopenharmony_ci
12657db96d56Sopenharmony_ciReturn the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
12667db96d56Sopenharmony_ci[clinic start generated code]*/
12677db96d56Sopenharmony_ci
12687db96d56Sopenharmony_cistatic PyObject *
12697db96d56Sopenharmony_ci_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
12707db96d56Sopenharmony_ci                           PyObject *repl, PyObject *string,
12717db96d56Sopenharmony_ci                           Py_ssize_t count)
12727db96d56Sopenharmony_ci/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
12737db96d56Sopenharmony_ci{
12747db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
12757db96d56Sopenharmony_ci
12767db96d56Sopenharmony_ci    return pattern_subx(module_state, self, repl, string, count, 1);
12777db96d56Sopenharmony_ci}
12787db96d56Sopenharmony_ci
12797db96d56Sopenharmony_ci/*[clinic input]
12807db96d56Sopenharmony_ci_sre.SRE_Pattern.__copy__
12817db96d56Sopenharmony_ci
12827db96d56Sopenharmony_ci[clinic start generated code]*/
12837db96d56Sopenharmony_ci
12847db96d56Sopenharmony_cistatic PyObject *
12857db96d56Sopenharmony_ci_sre_SRE_Pattern___copy___impl(PatternObject *self)
12867db96d56Sopenharmony_ci/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
12877db96d56Sopenharmony_ci{
12887db96d56Sopenharmony_ci    Py_INCREF(self);
12897db96d56Sopenharmony_ci    return (PyObject *)self;
12907db96d56Sopenharmony_ci}
12917db96d56Sopenharmony_ci
12927db96d56Sopenharmony_ci/*[clinic input]
12937db96d56Sopenharmony_ci_sre.SRE_Pattern.__deepcopy__
12947db96d56Sopenharmony_ci
12957db96d56Sopenharmony_ci    memo: object
12967db96d56Sopenharmony_ci    /
12977db96d56Sopenharmony_ci
12987db96d56Sopenharmony_ci[clinic start generated code]*/
12997db96d56Sopenharmony_ci
13007db96d56Sopenharmony_cistatic PyObject *
13017db96d56Sopenharmony_ci_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
13027db96d56Sopenharmony_ci/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
13037db96d56Sopenharmony_ci{
13047db96d56Sopenharmony_ci    Py_INCREF(self);
13057db96d56Sopenharmony_ci    return (PyObject *)self;
13067db96d56Sopenharmony_ci}
13077db96d56Sopenharmony_ci
13087db96d56Sopenharmony_cistatic PyObject *
13097db96d56Sopenharmony_cipattern_repr(PatternObject *obj)
13107db96d56Sopenharmony_ci{
13117db96d56Sopenharmony_ci    static const struct {
13127db96d56Sopenharmony_ci        const char *name;
13137db96d56Sopenharmony_ci        int value;
13147db96d56Sopenharmony_ci    } flag_names[] = {
13157db96d56Sopenharmony_ci        {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
13167db96d56Sopenharmony_ci        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
13177db96d56Sopenharmony_ci        {"re.LOCALE", SRE_FLAG_LOCALE},
13187db96d56Sopenharmony_ci        {"re.MULTILINE", SRE_FLAG_MULTILINE},
13197db96d56Sopenharmony_ci        {"re.DOTALL", SRE_FLAG_DOTALL},
13207db96d56Sopenharmony_ci        {"re.UNICODE", SRE_FLAG_UNICODE},
13217db96d56Sopenharmony_ci        {"re.VERBOSE", SRE_FLAG_VERBOSE},
13227db96d56Sopenharmony_ci        {"re.DEBUG", SRE_FLAG_DEBUG},
13237db96d56Sopenharmony_ci        {"re.ASCII", SRE_FLAG_ASCII},
13247db96d56Sopenharmony_ci    };
13257db96d56Sopenharmony_ci    PyObject *result = NULL;
13267db96d56Sopenharmony_ci    PyObject *flag_items;
13277db96d56Sopenharmony_ci    size_t i;
13287db96d56Sopenharmony_ci    int flags = obj->flags;
13297db96d56Sopenharmony_ci
13307db96d56Sopenharmony_ci    /* Omit re.UNICODE for valid string patterns. */
13317db96d56Sopenharmony_ci    if (obj->isbytes == 0 &&
13327db96d56Sopenharmony_ci        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
13337db96d56Sopenharmony_ci         SRE_FLAG_UNICODE)
13347db96d56Sopenharmony_ci        flags &= ~SRE_FLAG_UNICODE;
13357db96d56Sopenharmony_ci
13367db96d56Sopenharmony_ci    flag_items = PyList_New(0);
13377db96d56Sopenharmony_ci    if (!flag_items)
13387db96d56Sopenharmony_ci        return NULL;
13397db96d56Sopenharmony_ci
13407db96d56Sopenharmony_ci    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
13417db96d56Sopenharmony_ci        if (flags & flag_names[i].value) {
13427db96d56Sopenharmony_ci            PyObject *item = PyUnicode_FromString(flag_names[i].name);
13437db96d56Sopenharmony_ci            if (!item)
13447db96d56Sopenharmony_ci                goto done;
13457db96d56Sopenharmony_ci
13467db96d56Sopenharmony_ci            if (PyList_Append(flag_items, item) < 0) {
13477db96d56Sopenharmony_ci                Py_DECREF(item);
13487db96d56Sopenharmony_ci                goto done;
13497db96d56Sopenharmony_ci            }
13507db96d56Sopenharmony_ci            Py_DECREF(item);
13517db96d56Sopenharmony_ci            flags &= ~flag_names[i].value;
13527db96d56Sopenharmony_ci        }
13537db96d56Sopenharmony_ci    }
13547db96d56Sopenharmony_ci    if (flags) {
13557db96d56Sopenharmony_ci        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
13567db96d56Sopenharmony_ci        if (!item)
13577db96d56Sopenharmony_ci            goto done;
13587db96d56Sopenharmony_ci
13597db96d56Sopenharmony_ci        if (PyList_Append(flag_items, item) < 0) {
13607db96d56Sopenharmony_ci            Py_DECREF(item);
13617db96d56Sopenharmony_ci            goto done;
13627db96d56Sopenharmony_ci        }
13637db96d56Sopenharmony_ci        Py_DECREF(item);
13647db96d56Sopenharmony_ci    }
13657db96d56Sopenharmony_ci
13667db96d56Sopenharmony_ci    if (PyList_Size(flag_items) > 0) {
13677db96d56Sopenharmony_ci        PyObject *flags_result;
13687db96d56Sopenharmony_ci        PyObject *sep = PyUnicode_FromString("|");
13697db96d56Sopenharmony_ci        if (!sep)
13707db96d56Sopenharmony_ci            goto done;
13717db96d56Sopenharmony_ci        flags_result = PyUnicode_Join(sep, flag_items);
13727db96d56Sopenharmony_ci        Py_DECREF(sep);
13737db96d56Sopenharmony_ci        if (!flags_result)
13747db96d56Sopenharmony_ci            goto done;
13757db96d56Sopenharmony_ci        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
13767db96d56Sopenharmony_ci                                      obj->pattern, flags_result);
13777db96d56Sopenharmony_ci        Py_DECREF(flags_result);
13787db96d56Sopenharmony_ci    }
13797db96d56Sopenharmony_ci    else {
13807db96d56Sopenharmony_ci        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
13817db96d56Sopenharmony_ci    }
13827db96d56Sopenharmony_ci
13837db96d56Sopenharmony_cidone:
13847db96d56Sopenharmony_ci    Py_DECREF(flag_items);
13857db96d56Sopenharmony_ci    return result;
13867db96d56Sopenharmony_ci}
13877db96d56Sopenharmony_ci
13887db96d56Sopenharmony_ciPyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
13897db96d56Sopenharmony_ci
13907db96d56Sopenharmony_ci/* PatternObject's 'groupindex' method. */
13917db96d56Sopenharmony_cistatic PyObject *
13927db96d56Sopenharmony_cipattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
13937db96d56Sopenharmony_ci{
13947db96d56Sopenharmony_ci    if (self->groupindex == NULL)
13957db96d56Sopenharmony_ci        return PyDict_New();
13967db96d56Sopenharmony_ci    return PyDictProxy_New(self->groupindex);
13977db96d56Sopenharmony_ci}
13987db96d56Sopenharmony_ci
13997db96d56Sopenharmony_cistatic int _validate(PatternObject *self); /* Forward */
14007db96d56Sopenharmony_ci
14017db96d56Sopenharmony_ci/*[clinic input]
14027db96d56Sopenharmony_ci_sre.compile
14037db96d56Sopenharmony_ci
14047db96d56Sopenharmony_ci    pattern: object
14057db96d56Sopenharmony_ci    flags: int
14067db96d56Sopenharmony_ci    code: object(subclass_of='&PyList_Type')
14077db96d56Sopenharmony_ci    groups: Py_ssize_t
14087db96d56Sopenharmony_ci    groupindex: object(subclass_of='&PyDict_Type')
14097db96d56Sopenharmony_ci    indexgroup: object(subclass_of='&PyTuple_Type')
14107db96d56Sopenharmony_ci
14117db96d56Sopenharmony_ci[clinic start generated code]*/
14127db96d56Sopenharmony_ci
14137db96d56Sopenharmony_cistatic PyObject *
14147db96d56Sopenharmony_ci_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
14157db96d56Sopenharmony_ci                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
14167db96d56Sopenharmony_ci                  PyObject *indexgroup)
14177db96d56Sopenharmony_ci/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
14187db96d56Sopenharmony_ci{
14197db96d56Sopenharmony_ci    /* "compile" pattern descriptor to pattern object */
14207db96d56Sopenharmony_ci
14217db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state(module);
14227db96d56Sopenharmony_ci    PatternObject* self;
14237db96d56Sopenharmony_ci    Py_ssize_t i, n;
14247db96d56Sopenharmony_ci
14257db96d56Sopenharmony_ci    n = PyList_GET_SIZE(code);
14267db96d56Sopenharmony_ci    /* coverity[ampersand_in_size] */
14277db96d56Sopenharmony_ci    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
14287db96d56Sopenharmony_ci    if (!self)
14297db96d56Sopenharmony_ci        return NULL;
14307db96d56Sopenharmony_ci    self->weakreflist = NULL;
14317db96d56Sopenharmony_ci    self->pattern = NULL;
14327db96d56Sopenharmony_ci    self->groupindex = NULL;
14337db96d56Sopenharmony_ci    self->indexgroup = NULL;
14347db96d56Sopenharmony_ci
14357db96d56Sopenharmony_ci    self->codesize = n;
14367db96d56Sopenharmony_ci
14377db96d56Sopenharmony_ci    for (i = 0; i < n; i++) {
14387db96d56Sopenharmony_ci        PyObject *o = PyList_GET_ITEM(code, i);
14397db96d56Sopenharmony_ci        unsigned long value = PyLong_AsUnsignedLong(o);
14407db96d56Sopenharmony_ci        self->code[i] = (SRE_CODE) value;
14417db96d56Sopenharmony_ci        if ((unsigned long) self->code[i] != value) {
14427db96d56Sopenharmony_ci            PyErr_SetString(PyExc_OverflowError,
14437db96d56Sopenharmony_ci                            "regular expression code size limit exceeded");
14447db96d56Sopenharmony_ci            break;
14457db96d56Sopenharmony_ci        }
14467db96d56Sopenharmony_ci    }
14477db96d56Sopenharmony_ci    PyObject_GC_Track(self);
14487db96d56Sopenharmony_ci
14497db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
14507db96d56Sopenharmony_ci        Py_DECREF(self);
14517db96d56Sopenharmony_ci        return NULL;
14527db96d56Sopenharmony_ci    }
14537db96d56Sopenharmony_ci
14547db96d56Sopenharmony_ci    if (pattern == Py_None) {
14557db96d56Sopenharmony_ci        self->isbytes = -1;
14567db96d56Sopenharmony_ci    }
14577db96d56Sopenharmony_ci    else {
14587db96d56Sopenharmony_ci        Py_ssize_t p_length;
14597db96d56Sopenharmony_ci        int charsize;
14607db96d56Sopenharmony_ci        Py_buffer view;
14617db96d56Sopenharmony_ci        view.buf = NULL;
14627db96d56Sopenharmony_ci        if (!getstring(pattern, &p_length, &self->isbytes,
14637db96d56Sopenharmony_ci                       &charsize, &view)) {
14647db96d56Sopenharmony_ci            Py_DECREF(self);
14657db96d56Sopenharmony_ci            return NULL;
14667db96d56Sopenharmony_ci        }
14677db96d56Sopenharmony_ci        if (view.buf)
14687db96d56Sopenharmony_ci            PyBuffer_Release(&view);
14697db96d56Sopenharmony_ci    }
14707db96d56Sopenharmony_ci
14717db96d56Sopenharmony_ci    Py_INCREF(pattern);
14727db96d56Sopenharmony_ci    self->pattern = pattern;
14737db96d56Sopenharmony_ci
14747db96d56Sopenharmony_ci    self->flags = flags;
14757db96d56Sopenharmony_ci
14767db96d56Sopenharmony_ci    self->groups = groups;
14777db96d56Sopenharmony_ci
14787db96d56Sopenharmony_ci    if (PyDict_GET_SIZE(groupindex) > 0) {
14797db96d56Sopenharmony_ci        Py_INCREF(groupindex);
14807db96d56Sopenharmony_ci        self->groupindex = groupindex;
14817db96d56Sopenharmony_ci        if (PyTuple_GET_SIZE(indexgroup) > 0) {
14827db96d56Sopenharmony_ci            Py_INCREF(indexgroup);
14837db96d56Sopenharmony_ci            self->indexgroup = indexgroup;
14847db96d56Sopenharmony_ci        }
14857db96d56Sopenharmony_ci    }
14867db96d56Sopenharmony_ci
14877db96d56Sopenharmony_ci    if (!_validate(self)) {
14887db96d56Sopenharmony_ci        Py_DECREF(self);
14897db96d56Sopenharmony_ci        return NULL;
14907db96d56Sopenharmony_ci    }
14917db96d56Sopenharmony_ci
14927db96d56Sopenharmony_ci    return (PyObject*) self;
14937db96d56Sopenharmony_ci}
14947db96d56Sopenharmony_ci
14957db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
14967db96d56Sopenharmony_ci/* Code validation */
14977db96d56Sopenharmony_ci
14987db96d56Sopenharmony_ci/* To learn more about this code, have a look at the _compile() function in
14997db96d56Sopenharmony_ci   Lib/sre_compile.py.  The validation functions below checks the code array
15007db96d56Sopenharmony_ci   for conformance with the code patterns generated there.
15017db96d56Sopenharmony_ci
15027db96d56Sopenharmony_ci   The nice thing about the generated code is that it is position-independent:
15037db96d56Sopenharmony_ci   all jumps are relative jumps forward.  Also, jumps don't cross each other:
15047db96d56Sopenharmony_ci   the target of a later jump is always earlier than the target of an earlier
15057db96d56Sopenharmony_ci   jump.  IOW, this is okay:
15067db96d56Sopenharmony_ci
15077db96d56Sopenharmony_ci   J---------J-------T--------T
15087db96d56Sopenharmony_ci    \         \_____/        /
15097db96d56Sopenharmony_ci     \______________________/
15107db96d56Sopenharmony_ci
15117db96d56Sopenharmony_ci   but this is not:
15127db96d56Sopenharmony_ci
15137db96d56Sopenharmony_ci   J---------J-------T--------T
15147db96d56Sopenharmony_ci    \_________\_____/        /
15157db96d56Sopenharmony_ci               \____________/
15167db96d56Sopenharmony_ci
15177db96d56Sopenharmony_ci   It also helps that SRE_CODE is always an unsigned type.
15187db96d56Sopenharmony_ci*/
15197db96d56Sopenharmony_ci
15207db96d56Sopenharmony_ci/* Defining this one enables tracing of the validator */
15217db96d56Sopenharmony_ci#undef VVERBOSE
15227db96d56Sopenharmony_ci
15237db96d56Sopenharmony_ci/* Trace macro for the validator */
15247db96d56Sopenharmony_ci#if defined(VVERBOSE)
15257db96d56Sopenharmony_ci#define VTRACE(v) printf v
15267db96d56Sopenharmony_ci#else
15277db96d56Sopenharmony_ci#define VTRACE(v) do {} while(0)  /* do nothing */
15287db96d56Sopenharmony_ci#endif
15297db96d56Sopenharmony_ci
15307db96d56Sopenharmony_ci/* Report failure */
15317db96d56Sopenharmony_ci#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
15327db96d56Sopenharmony_ci
15337db96d56Sopenharmony_ci/* Extract opcode, argument, or skip count from code array */
15347db96d56Sopenharmony_ci#define GET_OP                                          \
15357db96d56Sopenharmony_ci    do {                                                \
15367db96d56Sopenharmony_ci        VTRACE(("%p: ", code));                         \
15377db96d56Sopenharmony_ci        if (code >= end) FAIL;                          \
15387db96d56Sopenharmony_ci        op = *code++;                                   \
15397db96d56Sopenharmony_ci        VTRACE(("%lu (op)\n", (unsigned long)op));      \
15407db96d56Sopenharmony_ci    } while (0)
15417db96d56Sopenharmony_ci#define GET_ARG                                         \
15427db96d56Sopenharmony_ci    do {                                                \
15437db96d56Sopenharmony_ci        VTRACE(("%p= ", code));                         \
15447db96d56Sopenharmony_ci        if (code >= end) FAIL;                          \
15457db96d56Sopenharmony_ci        arg = *code++;                                  \
15467db96d56Sopenharmony_ci        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
15477db96d56Sopenharmony_ci    } while (0)
15487db96d56Sopenharmony_ci#define GET_SKIP_ADJ(adj)                               \
15497db96d56Sopenharmony_ci    do {                                                \
15507db96d56Sopenharmony_ci        VTRACE(("%p= ", code));                         \
15517db96d56Sopenharmony_ci        if (code >= end) FAIL;                          \
15527db96d56Sopenharmony_ci        skip = *code;                                   \
15537db96d56Sopenharmony_ci        VTRACE(("%lu (skip to %p)\n",                   \
15547db96d56Sopenharmony_ci               (unsigned long)skip, code+skip));        \
15557db96d56Sopenharmony_ci        if (skip-adj > (uintptr_t)(end - code))         \
15567db96d56Sopenharmony_ci            FAIL;                                       \
15577db96d56Sopenharmony_ci        code++;                                         \
15587db96d56Sopenharmony_ci    } while (0)
15597db96d56Sopenharmony_ci#define GET_SKIP GET_SKIP_ADJ(0)
15607db96d56Sopenharmony_ci
15617db96d56Sopenharmony_cistatic int
15627db96d56Sopenharmony_ci_validate_charset(SRE_CODE *code, SRE_CODE *end)
15637db96d56Sopenharmony_ci{
15647db96d56Sopenharmony_ci    /* Some variables are manipulated by the macros above */
15657db96d56Sopenharmony_ci    SRE_CODE op;
15667db96d56Sopenharmony_ci    SRE_CODE arg;
15677db96d56Sopenharmony_ci    SRE_CODE offset;
15687db96d56Sopenharmony_ci    int i;
15697db96d56Sopenharmony_ci
15707db96d56Sopenharmony_ci    while (code < end) {
15717db96d56Sopenharmony_ci        GET_OP;
15727db96d56Sopenharmony_ci        switch (op) {
15737db96d56Sopenharmony_ci
15747db96d56Sopenharmony_ci        case SRE_OP_NEGATE:
15757db96d56Sopenharmony_ci            break;
15767db96d56Sopenharmony_ci
15777db96d56Sopenharmony_ci        case SRE_OP_LITERAL:
15787db96d56Sopenharmony_ci            GET_ARG;
15797db96d56Sopenharmony_ci            break;
15807db96d56Sopenharmony_ci
15817db96d56Sopenharmony_ci        case SRE_OP_RANGE:
15827db96d56Sopenharmony_ci        case SRE_OP_RANGE_UNI_IGNORE:
15837db96d56Sopenharmony_ci            GET_ARG;
15847db96d56Sopenharmony_ci            GET_ARG;
15857db96d56Sopenharmony_ci            break;
15867db96d56Sopenharmony_ci
15877db96d56Sopenharmony_ci        case SRE_OP_CHARSET:
15887db96d56Sopenharmony_ci            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
15897db96d56Sopenharmony_ci            if (offset > (uintptr_t)(end - code))
15907db96d56Sopenharmony_ci                FAIL;
15917db96d56Sopenharmony_ci            code += offset;
15927db96d56Sopenharmony_ci            break;
15937db96d56Sopenharmony_ci
15947db96d56Sopenharmony_ci        case SRE_OP_BIGCHARSET:
15957db96d56Sopenharmony_ci            GET_ARG; /* Number of blocks */
15967db96d56Sopenharmony_ci            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
15977db96d56Sopenharmony_ci            if (offset > (uintptr_t)(end - code))
15987db96d56Sopenharmony_ci                FAIL;
15997db96d56Sopenharmony_ci            /* Make sure that each byte points to a valid block */
16007db96d56Sopenharmony_ci            for (i = 0; i < 256; i++) {
16017db96d56Sopenharmony_ci                if (((unsigned char *)code)[i] >= arg)
16027db96d56Sopenharmony_ci                    FAIL;
16037db96d56Sopenharmony_ci            }
16047db96d56Sopenharmony_ci            code += offset;
16057db96d56Sopenharmony_ci            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
16067db96d56Sopenharmony_ci            if (offset > (uintptr_t)(end - code))
16077db96d56Sopenharmony_ci                FAIL;
16087db96d56Sopenharmony_ci            code += offset;
16097db96d56Sopenharmony_ci            break;
16107db96d56Sopenharmony_ci
16117db96d56Sopenharmony_ci        case SRE_OP_CATEGORY:
16127db96d56Sopenharmony_ci            GET_ARG;
16137db96d56Sopenharmony_ci            switch (arg) {
16147db96d56Sopenharmony_ci            case SRE_CATEGORY_DIGIT:
16157db96d56Sopenharmony_ci            case SRE_CATEGORY_NOT_DIGIT:
16167db96d56Sopenharmony_ci            case SRE_CATEGORY_SPACE:
16177db96d56Sopenharmony_ci            case SRE_CATEGORY_NOT_SPACE:
16187db96d56Sopenharmony_ci            case SRE_CATEGORY_WORD:
16197db96d56Sopenharmony_ci            case SRE_CATEGORY_NOT_WORD:
16207db96d56Sopenharmony_ci            case SRE_CATEGORY_LINEBREAK:
16217db96d56Sopenharmony_ci            case SRE_CATEGORY_NOT_LINEBREAK:
16227db96d56Sopenharmony_ci            case SRE_CATEGORY_LOC_WORD:
16237db96d56Sopenharmony_ci            case SRE_CATEGORY_LOC_NOT_WORD:
16247db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_DIGIT:
16257db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_NOT_DIGIT:
16267db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_SPACE:
16277db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_NOT_SPACE:
16287db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_WORD:
16297db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_NOT_WORD:
16307db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_LINEBREAK:
16317db96d56Sopenharmony_ci            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
16327db96d56Sopenharmony_ci                break;
16337db96d56Sopenharmony_ci            default:
16347db96d56Sopenharmony_ci                FAIL;
16357db96d56Sopenharmony_ci            }
16367db96d56Sopenharmony_ci            break;
16377db96d56Sopenharmony_ci
16387db96d56Sopenharmony_ci        default:
16397db96d56Sopenharmony_ci            FAIL;
16407db96d56Sopenharmony_ci
16417db96d56Sopenharmony_ci        }
16427db96d56Sopenharmony_ci    }
16437db96d56Sopenharmony_ci
16447db96d56Sopenharmony_ci    return 0;
16457db96d56Sopenharmony_ci}
16467db96d56Sopenharmony_ci
16477db96d56Sopenharmony_ci/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
16487db96d56Sopenharmony_cistatic int
16497db96d56Sopenharmony_ci_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
16507db96d56Sopenharmony_ci{
16517db96d56Sopenharmony_ci    /* Some variables are manipulated by the macros above */
16527db96d56Sopenharmony_ci    SRE_CODE op;
16537db96d56Sopenharmony_ci    SRE_CODE arg;
16547db96d56Sopenharmony_ci    SRE_CODE skip;
16557db96d56Sopenharmony_ci
16567db96d56Sopenharmony_ci    VTRACE(("code=%p, end=%p\n", code, end));
16577db96d56Sopenharmony_ci
16587db96d56Sopenharmony_ci    if (code > end)
16597db96d56Sopenharmony_ci        FAIL;
16607db96d56Sopenharmony_ci
16617db96d56Sopenharmony_ci    while (code < end) {
16627db96d56Sopenharmony_ci        GET_OP;
16637db96d56Sopenharmony_ci        switch (op) {
16647db96d56Sopenharmony_ci
16657db96d56Sopenharmony_ci        case SRE_OP_MARK:
16667db96d56Sopenharmony_ci            /* We don't check whether marks are properly nested; the
16677db96d56Sopenharmony_ci               sre_match() code is robust even if they don't, and the worst
16687db96d56Sopenharmony_ci               you can get is nonsensical match results. */
16697db96d56Sopenharmony_ci            GET_ARG;
16707db96d56Sopenharmony_ci            if (arg > 2 * (size_t)groups + 1) {
16717db96d56Sopenharmony_ci                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
16727db96d56Sopenharmony_ci                FAIL;
16737db96d56Sopenharmony_ci            }
16747db96d56Sopenharmony_ci            break;
16757db96d56Sopenharmony_ci
16767db96d56Sopenharmony_ci        case SRE_OP_LITERAL:
16777db96d56Sopenharmony_ci        case SRE_OP_NOT_LITERAL:
16787db96d56Sopenharmony_ci        case SRE_OP_LITERAL_IGNORE:
16797db96d56Sopenharmony_ci        case SRE_OP_NOT_LITERAL_IGNORE:
16807db96d56Sopenharmony_ci        case SRE_OP_LITERAL_UNI_IGNORE:
16817db96d56Sopenharmony_ci        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
16827db96d56Sopenharmony_ci        case SRE_OP_LITERAL_LOC_IGNORE:
16837db96d56Sopenharmony_ci        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
16847db96d56Sopenharmony_ci            GET_ARG;
16857db96d56Sopenharmony_ci            /* The arg is just a character, nothing to check */
16867db96d56Sopenharmony_ci            break;
16877db96d56Sopenharmony_ci
16887db96d56Sopenharmony_ci        case SRE_OP_SUCCESS:
16897db96d56Sopenharmony_ci        case SRE_OP_FAILURE:
16907db96d56Sopenharmony_ci            /* Nothing to check; these normally end the matching process */
16917db96d56Sopenharmony_ci            break;
16927db96d56Sopenharmony_ci
16937db96d56Sopenharmony_ci        case SRE_OP_AT:
16947db96d56Sopenharmony_ci            GET_ARG;
16957db96d56Sopenharmony_ci            switch (arg) {
16967db96d56Sopenharmony_ci            case SRE_AT_BEGINNING:
16977db96d56Sopenharmony_ci            case SRE_AT_BEGINNING_STRING:
16987db96d56Sopenharmony_ci            case SRE_AT_BEGINNING_LINE:
16997db96d56Sopenharmony_ci            case SRE_AT_END:
17007db96d56Sopenharmony_ci            case SRE_AT_END_LINE:
17017db96d56Sopenharmony_ci            case SRE_AT_END_STRING:
17027db96d56Sopenharmony_ci            case SRE_AT_BOUNDARY:
17037db96d56Sopenharmony_ci            case SRE_AT_NON_BOUNDARY:
17047db96d56Sopenharmony_ci            case SRE_AT_LOC_BOUNDARY:
17057db96d56Sopenharmony_ci            case SRE_AT_LOC_NON_BOUNDARY:
17067db96d56Sopenharmony_ci            case SRE_AT_UNI_BOUNDARY:
17077db96d56Sopenharmony_ci            case SRE_AT_UNI_NON_BOUNDARY:
17087db96d56Sopenharmony_ci                break;
17097db96d56Sopenharmony_ci            default:
17107db96d56Sopenharmony_ci                FAIL;
17117db96d56Sopenharmony_ci            }
17127db96d56Sopenharmony_ci            break;
17137db96d56Sopenharmony_ci
17147db96d56Sopenharmony_ci        case SRE_OP_ANY:
17157db96d56Sopenharmony_ci        case SRE_OP_ANY_ALL:
17167db96d56Sopenharmony_ci            /* These have no operands */
17177db96d56Sopenharmony_ci            break;
17187db96d56Sopenharmony_ci
17197db96d56Sopenharmony_ci        case SRE_OP_IN:
17207db96d56Sopenharmony_ci        case SRE_OP_IN_IGNORE:
17217db96d56Sopenharmony_ci        case SRE_OP_IN_UNI_IGNORE:
17227db96d56Sopenharmony_ci        case SRE_OP_IN_LOC_IGNORE:
17237db96d56Sopenharmony_ci            GET_SKIP;
17247db96d56Sopenharmony_ci            /* Stop 1 before the end; we check the FAILURE below */
17257db96d56Sopenharmony_ci            if (_validate_charset(code, code+skip-2))
17267db96d56Sopenharmony_ci                FAIL;
17277db96d56Sopenharmony_ci            if (code[skip-2] != SRE_OP_FAILURE)
17287db96d56Sopenharmony_ci                FAIL;
17297db96d56Sopenharmony_ci            code += skip-1;
17307db96d56Sopenharmony_ci            break;
17317db96d56Sopenharmony_ci
17327db96d56Sopenharmony_ci        case SRE_OP_INFO:
17337db96d56Sopenharmony_ci            {
17347db96d56Sopenharmony_ci                /* A minimal info field is
17357db96d56Sopenharmony_ci                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
17367db96d56Sopenharmony_ci                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
17377db96d56Sopenharmony_ci                   more follows. */
17387db96d56Sopenharmony_ci                SRE_CODE flags, i;
17397db96d56Sopenharmony_ci                SRE_CODE *newcode;
17407db96d56Sopenharmony_ci                GET_SKIP;
17417db96d56Sopenharmony_ci                newcode = code+skip-1;
17427db96d56Sopenharmony_ci                GET_ARG; flags = arg;
17437db96d56Sopenharmony_ci                GET_ARG;
17447db96d56Sopenharmony_ci                GET_ARG;
17457db96d56Sopenharmony_ci                /* Check that only valid flags are present */
17467db96d56Sopenharmony_ci                if ((flags & ~(SRE_INFO_PREFIX |
17477db96d56Sopenharmony_ci                               SRE_INFO_LITERAL |
17487db96d56Sopenharmony_ci                               SRE_INFO_CHARSET)) != 0)
17497db96d56Sopenharmony_ci                    FAIL;
17507db96d56Sopenharmony_ci                /* PREFIX and CHARSET are mutually exclusive */
17517db96d56Sopenharmony_ci                if ((flags & SRE_INFO_PREFIX) &&
17527db96d56Sopenharmony_ci                    (flags & SRE_INFO_CHARSET))
17537db96d56Sopenharmony_ci                    FAIL;
17547db96d56Sopenharmony_ci                /* LITERAL implies PREFIX */
17557db96d56Sopenharmony_ci                if ((flags & SRE_INFO_LITERAL) &&
17567db96d56Sopenharmony_ci                    !(flags & SRE_INFO_PREFIX))
17577db96d56Sopenharmony_ci                    FAIL;
17587db96d56Sopenharmony_ci                /* Validate the prefix */
17597db96d56Sopenharmony_ci                if (flags & SRE_INFO_PREFIX) {
17607db96d56Sopenharmony_ci                    SRE_CODE prefix_len;
17617db96d56Sopenharmony_ci                    GET_ARG; prefix_len = arg;
17627db96d56Sopenharmony_ci                    GET_ARG;
17637db96d56Sopenharmony_ci                    /* Here comes the prefix string */
17647db96d56Sopenharmony_ci                    if (prefix_len > (uintptr_t)(newcode - code))
17657db96d56Sopenharmony_ci                        FAIL;
17667db96d56Sopenharmony_ci                    code += prefix_len;
17677db96d56Sopenharmony_ci                    /* And here comes the overlap table */
17687db96d56Sopenharmony_ci                    if (prefix_len > (uintptr_t)(newcode - code))
17697db96d56Sopenharmony_ci                        FAIL;
17707db96d56Sopenharmony_ci                    /* Each overlap value should be < prefix_len */
17717db96d56Sopenharmony_ci                    for (i = 0; i < prefix_len; i++) {
17727db96d56Sopenharmony_ci                        if (code[i] >= prefix_len)
17737db96d56Sopenharmony_ci                            FAIL;
17747db96d56Sopenharmony_ci                    }
17757db96d56Sopenharmony_ci                    code += prefix_len;
17767db96d56Sopenharmony_ci                }
17777db96d56Sopenharmony_ci                /* Validate the charset */
17787db96d56Sopenharmony_ci                if (flags & SRE_INFO_CHARSET) {
17797db96d56Sopenharmony_ci                    if (_validate_charset(code, newcode-1))
17807db96d56Sopenharmony_ci                        FAIL;
17817db96d56Sopenharmony_ci                    if (newcode[-1] != SRE_OP_FAILURE)
17827db96d56Sopenharmony_ci                        FAIL;
17837db96d56Sopenharmony_ci                    code = newcode;
17847db96d56Sopenharmony_ci                }
17857db96d56Sopenharmony_ci                else if (code != newcode) {
17867db96d56Sopenharmony_ci                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
17877db96d56Sopenharmony_ci                    FAIL;
17887db96d56Sopenharmony_ci                }
17897db96d56Sopenharmony_ci            }
17907db96d56Sopenharmony_ci            break;
17917db96d56Sopenharmony_ci
17927db96d56Sopenharmony_ci        case SRE_OP_BRANCH:
17937db96d56Sopenharmony_ci            {
17947db96d56Sopenharmony_ci                SRE_CODE *target = NULL;
17957db96d56Sopenharmony_ci                for (;;) {
17967db96d56Sopenharmony_ci                    GET_SKIP;
17977db96d56Sopenharmony_ci                    if (skip == 0)
17987db96d56Sopenharmony_ci                        break;
17997db96d56Sopenharmony_ci                    /* Stop 2 before the end; we check the JUMP below */
18007db96d56Sopenharmony_ci                    if (_validate_inner(code, code+skip-3, groups))
18017db96d56Sopenharmony_ci                        FAIL;
18027db96d56Sopenharmony_ci                    code += skip-3;
18037db96d56Sopenharmony_ci                    /* Check that it ends with a JUMP, and that each JUMP
18047db96d56Sopenharmony_ci                       has the same target */
18057db96d56Sopenharmony_ci                    GET_OP;
18067db96d56Sopenharmony_ci                    if (op != SRE_OP_JUMP)
18077db96d56Sopenharmony_ci                        FAIL;
18087db96d56Sopenharmony_ci                    GET_SKIP;
18097db96d56Sopenharmony_ci                    if (target == NULL)
18107db96d56Sopenharmony_ci                        target = code+skip-1;
18117db96d56Sopenharmony_ci                    else if (code+skip-1 != target)
18127db96d56Sopenharmony_ci                        FAIL;
18137db96d56Sopenharmony_ci                }
18147db96d56Sopenharmony_ci                if (code != target)
18157db96d56Sopenharmony_ci                    FAIL;
18167db96d56Sopenharmony_ci            }
18177db96d56Sopenharmony_ci            break;
18187db96d56Sopenharmony_ci
18197db96d56Sopenharmony_ci        case SRE_OP_REPEAT_ONE:
18207db96d56Sopenharmony_ci        case SRE_OP_MIN_REPEAT_ONE:
18217db96d56Sopenharmony_ci        case SRE_OP_POSSESSIVE_REPEAT_ONE:
18227db96d56Sopenharmony_ci            {
18237db96d56Sopenharmony_ci                SRE_CODE min, max;
18247db96d56Sopenharmony_ci                GET_SKIP;
18257db96d56Sopenharmony_ci                GET_ARG; min = arg;
18267db96d56Sopenharmony_ci                GET_ARG; max = arg;
18277db96d56Sopenharmony_ci                if (min > max)
18287db96d56Sopenharmony_ci                    FAIL;
18297db96d56Sopenharmony_ci                if (max > SRE_MAXREPEAT)
18307db96d56Sopenharmony_ci                    FAIL;
18317db96d56Sopenharmony_ci                if (_validate_inner(code, code+skip-4, groups))
18327db96d56Sopenharmony_ci                    FAIL;
18337db96d56Sopenharmony_ci                code += skip-4;
18347db96d56Sopenharmony_ci                GET_OP;
18357db96d56Sopenharmony_ci                if (op != SRE_OP_SUCCESS)
18367db96d56Sopenharmony_ci                    FAIL;
18377db96d56Sopenharmony_ci            }
18387db96d56Sopenharmony_ci            break;
18397db96d56Sopenharmony_ci
18407db96d56Sopenharmony_ci        case SRE_OP_REPEAT:
18417db96d56Sopenharmony_ci        case SRE_OP_POSSESSIVE_REPEAT:
18427db96d56Sopenharmony_ci            {
18437db96d56Sopenharmony_ci                SRE_CODE op1 = op, min, max;
18447db96d56Sopenharmony_ci                GET_SKIP;
18457db96d56Sopenharmony_ci                GET_ARG; min = arg;
18467db96d56Sopenharmony_ci                GET_ARG; max = arg;
18477db96d56Sopenharmony_ci                if (min > max)
18487db96d56Sopenharmony_ci                    FAIL;
18497db96d56Sopenharmony_ci                if (max > SRE_MAXREPEAT)
18507db96d56Sopenharmony_ci                    FAIL;
18517db96d56Sopenharmony_ci                if (_validate_inner(code, code+skip-3, groups))
18527db96d56Sopenharmony_ci                    FAIL;
18537db96d56Sopenharmony_ci                code += skip-3;
18547db96d56Sopenharmony_ci                GET_OP;
18557db96d56Sopenharmony_ci                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
18567db96d56Sopenharmony_ci                    if (op != SRE_OP_SUCCESS)
18577db96d56Sopenharmony_ci                        FAIL;
18587db96d56Sopenharmony_ci                }
18597db96d56Sopenharmony_ci                else {
18607db96d56Sopenharmony_ci                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
18617db96d56Sopenharmony_ci                        FAIL;
18627db96d56Sopenharmony_ci                }
18637db96d56Sopenharmony_ci            }
18647db96d56Sopenharmony_ci            break;
18657db96d56Sopenharmony_ci
18667db96d56Sopenharmony_ci        case SRE_OP_ATOMIC_GROUP:
18677db96d56Sopenharmony_ci            {
18687db96d56Sopenharmony_ci                GET_SKIP;
18697db96d56Sopenharmony_ci                if (_validate_inner(code, code+skip-2, groups))
18707db96d56Sopenharmony_ci                    FAIL;
18717db96d56Sopenharmony_ci                code += skip-2;
18727db96d56Sopenharmony_ci                GET_OP;
18737db96d56Sopenharmony_ci                if (op != SRE_OP_SUCCESS)
18747db96d56Sopenharmony_ci                    FAIL;
18757db96d56Sopenharmony_ci            }
18767db96d56Sopenharmony_ci            break;
18777db96d56Sopenharmony_ci
18787db96d56Sopenharmony_ci        case SRE_OP_GROUPREF:
18797db96d56Sopenharmony_ci        case SRE_OP_GROUPREF_IGNORE:
18807db96d56Sopenharmony_ci        case SRE_OP_GROUPREF_UNI_IGNORE:
18817db96d56Sopenharmony_ci        case SRE_OP_GROUPREF_LOC_IGNORE:
18827db96d56Sopenharmony_ci            GET_ARG;
18837db96d56Sopenharmony_ci            if (arg >= (size_t)groups)
18847db96d56Sopenharmony_ci                FAIL;
18857db96d56Sopenharmony_ci            break;
18867db96d56Sopenharmony_ci
18877db96d56Sopenharmony_ci        case SRE_OP_GROUPREF_EXISTS:
18887db96d56Sopenharmony_ci            /* The regex syntax for this is: '(?(group)then|else)', where
18897db96d56Sopenharmony_ci               'group' is either an integer group number or a group name,
18907db96d56Sopenharmony_ci               'then' and 'else' are sub-regexes, and 'else' is optional. */
18917db96d56Sopenharmony_ci            GET_ARG;
18927db96d56Sopenharmony_ci            if (arg >= (size_t)groups)
18937db96d56Sopenharmony_ci                FAIL;
18947db96d56Sopenharmony_ci            GET_SKIP_ADJ(1);
18957db96d56Sopenharmony_ci            code--; /* The skip is relative to the first arg! */
18967db96d56Sopenharmony_ci            /* There are two possibilities here: if there is both a 'then'
18977db96d56Sopenharmony_ci               part and an 'else' part, the generated code looks like:
18987db96d56Sopenharmony_ci
18997db96d56Sopenharmony_ci               GROUPREF_EXISTS
19007db96d56Sopenharmony_ci               <group>
19017db96d56Sopenharmony_ci               <skipyes>
19027db96d56Sopenharmony_ci               ...then part...
19037db96d56Sopenharmony_ci               JUMP
19047db96d56Sopenharmony_ci               <skipno>
19057db96d56Sopenharmony_ci               (<skipyes> jumps here)
19067db96d56Sopenharmony_ci               ...else part...
19077db96d56Sopenharmony_ci               (<skipno> jumps here)
19087db96d56Sopenharmony_ci
19097db96d56Sopenharmony_ci               If there is only a 'then' part, it looks like:
19107db96d56Sopenharmony_ci
19117db96d56Sopenharmony_ci               GROUPREF_EXISTS
19127db96d56Sopenharmony_ci               <group>
19137db96d56Sopenharmony_ci               <skip>
19147db96d56Sopenharmony_ci               ...then part...
19157db96d56Sopenharmony_ci               (<skip> jumps here)
19167db96d56Sopenharmony_ci
19177db96d56Sopenharmony_ci               There is no direct way to decide which it is, and we don't want
19187db96d56Sopenharmony_ci               to allow arbitrary jumps anywhere in the code; so we just look
19197db96d56Sopenharmony_ci               for a JUMP opcode preceding our skip target.
19207db96d56Sopenharmony_ci            */
19217db96d56Sopenharmony_ci            VTRACE(("then part:\n"));
19227db96d56Sopenharmony_ci            int rc = _validate_inner(code+1, code+skip-1, groups);
19237db96d56Sopenharmony_ci            if (rc == 1) {
19247db96d56Sopenharmony_ci                VTRACE(("else part:\n"));
19257db96d56Sopenharmony_ci                code += skip-2; /* Position after JUMP, at <skipno> */
19267db96d56Sopenharmony_ci                GET_SKIP;
19277db96d56Sopenharmony_ci                rc = _validate_inner(code, code+skip-1, groups);
19287db96d56Sopenharmony_ci            }
19297db96d56Sopenharmony_ci            if (rc)
19307db96d56Sopenharmony_ci                FAIL;
19317db96d56Sopenharmony_ci            code += skip-1;
19327db96d56Sopenharmony_ci            break;
19337db96d56Sopenharmony_ci
19347db96d56Sopenharmony_ci        case SRE_OP_ASSERT:
19357db96d56Sopenharmony_ci        case SRE_OP_ASSERT_NOT:
19367db96d56Sopenharmony_ci            GET_SKIP;
19377db96d56Sopenharmony_ci            GET_ARG; /* 0 for lookahead, width for lookbehind */
19387db96d56Sopenharmony_ci            code--; /* Back up over arg to simplify math below */
19397db96d56Sopenharmony_ci            if (arg & 0x80000000)
19407db96d56Sopenharmony_ci                FAIL; /* Width too large */
19417db96d56Sopenharmony_ci            /* Stop 1 before the end; we check the SUCCESS below */
19427db96d56Sopenharmony_ci            if (_validate_inner(code+1, code+skip-2, groups))
19437db96d56Sopenharmony_ci                FAIL;
19447db96d56Sopenharmony_ci            code += skip-2;
19457db96d56Sopenharmony_ci            GET_OP;
19467db96d56Sopenharmony_ci            if (op != SRE_OP_SUCCESS)
19477db96d56Sopenharmony_ci                FAIL;
19487db96d56Sopenharmony_ci            break;
19497db96d56Sopenharmony_ci
19507db96d56Sopenharmony_ci        case SRE_OP_JUMP:
19517db96d56Sopenharmony_ci            if (code + 1 != end)
19527db96d56Sopenharmony_ci                FAIL;
19537db96d56Sopenharmony_ci            VTRACE(("JUMP: %d\n", __LINE__));
19547db96d56Sopenharmony_ci            return 1;
19557db96d56Sopenharmony_ci
19567db96d56Sopenharmony_ci        default:
19577db96d56Sopenharmony_ci            FAIL;
19587db96d56Sopenharmony_ci
19597db96d56Sopenharmony_ci        }
19607db96d56Sopenharmony_ci    }
19617db96d56Sopenharmony_ci
19627db96d56Sopenharmony_ci    VTRACE(("okay\n"));
19637db96d56Sopenharmony_ci    return 0;
19647db96d56Sopenharmony_ci}
19657db96d56Sopenharmony_ci
19667db96d56Sopenharmony_cistatic int
19677db96d56Sopenharmony_ci_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19687db96d56Sopenharmony_ci{
19697db96d56Sopenharmony_ci    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
19707db96d56Sopenharmony_ci        code >= end || end[-1] != SRE_OP_SUCCESS)
19717db96d56Sopenharmony_ci        FAIL;
19727db96d56Sopenharmony_ci    return _validate_inner(code, end-1, groups);
19737db96d56Sopenharmony_ci}
19747db96d56Sopenharmony_ci
19757db96d56Sopenharmony_cistatic int
19767db96d56Sopenharmony_ci_validate(PatternObject *self)
19777db96d56Sopenharmony_ci{
19787db96d56Sopenharmony_ci    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
19797db96d56Sopenharmony_ci    {
19807db96d56Sopenharmony_ci        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
19817db96d56Sopenharmony_ci        return 0;
19827db96d56Sopenharmony_ci    }
19837db96d56Sopenharmony_ci    else
19847db96d56Sopenharmony_ci        VTRACE(("Success!\n"));
19857db96d56Sopenharmony_ci    return 1;
19867db96d56Sopenharmony_ci}
19877db96d56Sopenharmony_ci
19887db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
19897db96d56Sopenharmony_ci/* match methods */
19907db96d56Sopenharmony_ci
19917db96d56Sopenharmony_cistatic int
19927db96d56Sopenharmony_cimatch_traverse(MatchObject *self, visitproc visit, void *arg)
19937db96d56Sopenharmony_ci{
19947db96d56Sopenharmony_ci    Py_VISIT(Py_TYPE(self));
19957db96d56Sopenharmony_ci    Py_VISIT(self->string);
19967db96d56Sopenharmony_ci    Py_VISIT(self->regs);
19977db96d56Sopenharmony_ci    Py_VISIT(self->pattern);
19987db96d56Sopenharmony_ci    return 0;
19997db96d56Sopenharmony_ci}
20007db96d56Sopenharmony_ci
20017db96d56Sopenharmony_cistatic int
20027db96d56Sopenharmony_cimatch_clear(MatchObject *self)
20037db96d56Sopenharmony_ci{
20047db96d56Sopenharmony_ci    Py_CLEAR(self->string);
20057db96d56Sopenharmony_ci    Py_CLEAR(self->regs);
20067db96d56Sopenharmony_ci    Py_CLEAR(self->pattern);
20077db96d56Sopenharmony_ci    return 0;
20087db96d56Sopenharmony_ci}
20097db96d56Sopenharmony_ci
20107db96d56Sopenharmony_cistatic void
20117db96d56Sopenharmony_cimatch_dealloc(MatchObject* self)
20127db96d56Sopenharmony_ci{
20137db96d56Sopenharmony_ci    PyTypeObject *tp = Py_TYPE(self);
20147db96d56Sopenharmony_ci
20157db96d56Sopenharmony_ci    PyObject_GC_UnTrack(self);
20167db96d56Sopenharmony_ci    (void)match_clear(self);
20177db96d56Sopenharmony_ci    tp->tp_free(self);
20187db96d56Sopenharmony_ci    Py_DECREF(tp);
20197db96d56Sopenharmony_ci}
20207db96d56Sopenharmony_ci
20217db96d56Sopenharmony_cistatic PyObject*
20227db96d56Sopenharmony_cimatch_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
20237db96d56Sopenharmony_ci{
20247db96d56Sopenharmony_ci    Py_ssize_t length;
20257db96d56Sopenharmony_ci    int isbytes, charsize;
20267db96d56Sopenharmony_ci    Py_buffer view;
20277db96d56Sopenharmony_ci    PyObject *result;
20287db96d56Sopenharmony_ci    const void* ptr;
20297db96d56Sopenharmony_ci    Py_ssize_t i, j;
20307db96d56Sopenharmony_ci
20317db96d56Sopenharmony_ci    assert(0 <= index && index < self->groups);
20327db96d56Sopenharmony_ci    index *= 2;
20337db96d56Sopenharmony_ci
20347db96d56Sopenharmony_ci    if (self->string == Py_None || self->mark[index] < 0) {
20357db96d56Sopenharmony_ci        /* return default value if the string or group is undefined */
20367db96d56Sopenharmony_ci        Py_INCREF(def);
20377db96d56Sopenharmony_ci        return def;
20387db96d56Sopenharmony_ci    }
20397db96d56Sopenharmony_ci
20407db96d56Sopenharmony_ci    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
20417db96d56Sopenharmony_ci    if (ptr == NULL)
20427db96d56Sopenharmony_ci        return NULL;
20437db96d56Sopenharmony_ci
20447db96d56Sopenharmony_ci    i = self->mark[index];
20457db96d56Sopenharmony_ci    j = self->mark[index+1];
20467db96d56Sopenharmony_ci    i = Py_MIN(i, length);
20477db96d56Sopenharmony_ci    j = Py_MIN(j, length);
20487db96d56Sopenharmony_ci    result = getslice(isbytes, ptr, self->string, i, j);
20497db96d56Sopenharmony_ci    if (isbytes && view.buf != NULL)
20507db96d56Sopenharmony_ci        PyBuffer_Release(&view);
20517db96d56Sopenharmony_ci    return result;
20527db96d56Sopenharmony_ci}
20537db96d56Sopenharmony_ci
20547db96d56Sopenharmony_cistatic Py_ssize_t
20557db96d56Sopenharmony_cimatch_getindex(MatchObject* self, PyObject* index)
20567db96d56Sopenharmony_ci{
20577db96d56Sopenharmony_ci    Py_ssize_t i;
20587db96d56Sopenharmony_ci
20597db96d56Sopenharmony_ci    if (index == NULL)
20607db96d56Sopenharmony_ci        /* Default value */
20617db96d56Sopenharmony_ci        return 0;
20627db96d56Sopenharmony_ci
20637db96d56Sopenharmony_ci    if (PyIndex_Check(index)) {
20647db96d56Sopenharmony_ci        i = PyNumber_AsSsize_t(index, NULL);
20657db96d56Sopenharmony_ci    }
20667db96d56Sopenharmony_ci    else {
20677db96d56Sopenharmony_ci        i = -1;
20687db96d56Sopenharmony_ci
20697db96d56Sopenharmony_ci        if (self->pattern->groupindex) {
20707db96d56Sopenharmony_ci            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
20717db96d56Sopenharmony_ci            if (index && PyLong_Check(index)) {
20727db96d56Sopenharmony_ci                i = PyLong_AsSsize_t(index);
20737db96d56Sopenharmony_ci            }
20747db96d56Sopenharmony_ci        }
20757db96d56Sopenharmony_ci    }
20767db96d56Sopenharmony_ci    if (i < 0 || i >= self->groups) {
20777db96d56Sopenharmony_ci        /* raise IndexError if we were given a bad group number */
20787db96d56Sopenharmony_ci        if (!PyErr_Occurred()) {
20797db96d56Sopenharmony_ci            PyErr_SetString(PyExc_IndexError, "no such group");
20807db96d56Sopenharmony_ci        }
20817db96d56Sopenharmony_ci        return -1;
20827db96d56Sopenharmony_ci    }
20837db96d56Sopenharmony_ci
20847db96d56Sopenharmony_ci    return i;
20857db96d56Sopenharmony_ci}
20867db96d56Sopenharmony_ci
20877db96d56Sopenharmony_cistatic PyObject*
20887db96d56Sopenharmony_cimatch_getslice(MatchObject* self, PyObject* index, PyObject* def)
20897db96d56Sopenharmony_ci{
20907db96d56Sopenharmony_ci    Py_ssize_t i = match_getindex(self, index);
20917db96d56Sopenharmony_ci
20927db96d56Sopenharmony_ci    if (i < 0) {
20937db96d56Sopenharmony_ci        return NULL;
20947db96d56Sopenharmony_ci    }
20957db96d56Sopenharmony_ci
20967db96d56Sopenharmony_ci    return match_getslice_by_index(self, i, def);
20977db96d56Sopenharmony_ci}
20987db96d56Sopenharmony_ci
20997db96d56Sopenharmony_ci/*[clinic input]
21007db96d56Sopenharmony_ci_sre.SRE_Match.expand
21017db96d56Sopenharmony_ci
21027db96d56Sopenharmony_ci    template: object
21037db96d56Sopenharmony_ci
21047db96d56Sopenharmony_ciReturn the string obtained by doing backslash substitution on the string template, as done by the sub() method.
21057db96d56Sopenharmony_ci[clinic start generated code]*/
21067db96d56Sopenharmony_ci
21077db96d56Sopenharmony_cistatic PyObject *
21087db96d56Sopenharmony_ci_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
21097db96d56Sopenharmony_ci/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
21107db96d56Sopenharmony_ci{
21117db96d56Sopenharmony_ci    /* delegate to Python code */
21127db96d56Sopenharmony_ci    return call(
21137db96d56Sopenharmony_ci        SRE_PY_MODULE, "_expand",
21147db96d56Sopenharmony_ci        PyTuple_Pack(3, self->pattern, self, template)
21157db96d56Sopenharmony_ci        );
21167db96d56Sopenharmony_ci}
21177db96d56Sopenharmony_ci
21187db96d56Sopenharmony_cistatic PyObject*
21197db96d56Sopenharmony_cimatch_group(MatchObject* self, PyObject* args)
21207db96d56Sopenharmony_ci{
21217db96d56Sopenharmony_ci    PyObject* result;
21227db96d56Sopenharmony_ci    Py_ssize_t i, size;
21237db96d56Sopenharmony_ci
21247db96d56Sopenharmony_ci    size = PyTuple_GET_SIZE(args);
21257db96d56Sopenharmony_ci
21267db96d56Sopenharmony_ci    switch (size) {
21277db96d56Sopenharmony_ci    case 0:
21287db96d56Sopenharmony_ci        result = match_getslice(self, _PyLong_GetZero(), Py_None);
21297db96d56Sopenharmony_ci        break;
21307db96d56Sopenharmony_ci    case 1:
21317db96d56Sopenharmony_ci        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
21327db96d56Sopenharmony_ci        break;
21337db96d56Sopenharmony_ci    default:
21347db96d56Sopenharmony_ci        /* fetch multiple items */
21357db96d56Sopenharmony_ci        result = PyTuple_New(size);
21367db96d56Sopenharmony_ci        if (!result)
21377db96d56Sopenharmony_ci            return NULL;
21387db96d56Sopenharmony_ci        for (i = 0; i < size; i++) {
21397db96d56Sopenharmony_ci            PyObject* item = match_getslice(
21407db96d56Sopenharmony_ci                self, PyTuple_GET_ITEM(args, i), Py_None
21417db96d56Sopenharmony_ci                );
21427db96d56Sopenharmony_ci            if (!item) {
21437db96d56Sopenharmony_ci                Py_DECREF(result);
21447db96d56Sopenharmony_ci                return NULL;
21457db96d56Sopenharmony_ci            }
21467db96d56Sopenharmony_ci            PyTuple_SET_ITEM(result, i, item);
21477db96d56Sopenharmony_ci        }
21487db96d56Sopenharmony_ci        break;
21497db96d56Sopenharmony_ci    }
21507db96d56Sopenharmony_ci    return result;
21517db96d56Sopenharmony_ci}
21527db96d56Sopenharmony_ci
21537db96d56Sopenharmony_cistatic PyObject*
21547db96d56Sopenharmony_cimatch_getitem(MatchObject* self, PyObject* name)
21557db96d56Sopenharmony_ci{
21567db96d56Sopenharmony_ci    return match_getslice(self, name, Py_None);
21577db96d56Sopenharmony_ci}
21587db96d56Sopenharmony_ci
21597db96d56Sopenharmony_ci/*[clinic input]
21607db96d56Sopenharmony_ci_sre.SRE_Match.groups
21617db96d56Sopenharmony_ci
21627db96d56Sopenharmony_ci    default: object = None
21637db96d56Sopenharmony_ci        Is used for groups that did not participate in the match.
21647db96d56Sopenharmony_ci
21657db96d56Sopenharmony_ciReturn a tuple containing all the subgroups of the match, from 1.
21667db96d56Sopenharmony_ci[clinic start generated code]*/
21677db96d56Sopenharmony_ci
21687db96d56Sopenharmony_cistatic PyObject *
21697db96d56Sopenharmony_ci_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
21707db96d56Sopenharmony_ci/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
21717db96d56Sopenharmony_ci{
21727db96d56Sopenharmony_ci    PyObject* result;
21737db96d56Sopenharmony_ci    Py_ssize_t index;
21747db96d56Sopenharmony_ci
21757db96d56Sopenharmony_ci    result = PyTuple_New(self->groups-1);
21767db96d56Sopenharmony_ci    if (!result)
21777db96d56Sopenharmony_ci        return NULL;
21787db96d56Sopenharmony_ci
21797db96d56Sopenharmony_ci    for (index = 1; index < self->groups; index++) {
21807db96d56Sopenharmony_ci        PyObject* item;
21817db96d56Sopenharmony_ci        item = match_getslice_by_index(self, index, default_value);
21827db96d56Sopenharmony_ci        if (!item) {
21837db96d56Sopenharmony_ci            Py_DECREF(result);
21847db96d56Sopenharmony_ci            return NULL;
21857db96d56Sopenharmony_ci        }
21867db96d56Sopenharmony_ci        PyTuple_SET_ITEM(result, index-1, item);
21877db96d56Sopenharmony_ci    }
21887db96d56Sopenharmony_ci
21897db96d56Sopenharmony_ci    return result;
21907db96d56Sopenharmony_ci}
21917db96d56Sopenharmony_ci
21927db96d56Sopenharmony_ci/*[clinic input]
21937db96d56Sopenharmony_ci_sre.SRE_Match.groupdict
21947db96d56Sopenharmony_ci
21957db96d56Sopenharmony_ci    default: object = None
21967db96d56Sopenharmony_ci        Is used for groups that did not participate in the match.
21977db96d56Sopenharmony_ci
21987db96d56Sopenharmony_ciReturn a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
21997db96d56Sopenharmony_ci[clinic start generated code]*/
22007db96d56Sopenharmony_ci
22017db96d56Sopenharmony_cistatic PyObject *
22027db96d56Sopenharmony_ci_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
22037db96d56Sopenharmony_ci/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
22047db96d56Sopenharmony_ci{
22057db96d56Sopenharmony_ci    PyObject *result;
22067db96d56Sopenharmony_ci    PyObject *key;
22077db96d56Sopenharmony_ci    PyObject *value;
22087db96d56Sopenharmony_ci    Py_ssize_t pos = 0;
22097db96d56Sopenharmony_ci    Py_hash_t hash;
22107db96d56Sopenharmony_ci
22117db96d56Sopenharmony_ci    result = PyDict_New();
22127db96d56Sopenharmony_ci    if (!result || !self->pattern->groupindex)
22137db96d56Sopenharmony_ci        return result;
22147db96d56Sopenharmony_ci
22157db96d56Sopenharmony_ci    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
22167db96d56Sopenharmony_ci        int status;
22177db96d56Sopenharmony_ci        Py_INCREF(key);
22187db96d56Sopenharmony_ci        value = match_getslice(self, key, default_value);
22197db96d56Sopenharmony_ci        if (!value) {
22207db96d56Sopenharmony_ci            Py_DECREF(key);
22217db96d56Sopenharmony_ci            goto failed;
22227db96d56Sopenharmony_ci        }
22237db96d56Sopenharmony_ci        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
22247db96d56Sopenharmony_ci        Py_DECREF(value);
22257db96d56Sopenharmony_ci        Py_DECREF(key);
22267db96d56Sopenharmony_ci        if (status < 0)
22277db96d56Sopenharmony_ci            goto failed;
22287db96d56Sopenharmony_ci    }
22297db96d56Sopenharmony_ci
22307db96d56Sopenharmony_ci    return result;
22317db96d56Sopenharmony_ci
22327db96d56Sopenharmony_cifailed:
22337db96d56Sopenharmony_ci    Py_DECREF(result);
22347db96d56Sopenharmony_ci    return NULL;
22357db96d56Sopenharmony_ci}
22367db96d56Sopenharmony_ci
22377db96d56Sopenharmony_ci/*[clinic input]
22387db96d56Sopenharmony_ci_sre.SRE_Match.start -> Py_ssize_t
22397db96d56Sopenharmony_ci
22407db96d56Sopenharmony_ci    group: object(c_default="NULL") = 0
22417db96d56Sopenharmony_ci    /
22427db96d56Sopenharmony_ci
22437db96d56Sopenharmony_ciReturn index of the start of the substring matched by group.
22447db96d56Sopenharmony_ci[clinic start generated code]*/
22457db96d56Sopenharmony_ci
22467db96d56Sopenharmony_cistatic Py_ssize_t
22477db96d56Sopenharmony_ci_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
22487db96d56Sopenharmony_ci/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
22497db96d56Sopenharmony_ci{
22507db96d56Sopenharmony_ci    Py_ssize_t index = match_getindex(self, group);
22517db96d56Sopenharmony_ci
22527db96d56Sopenharmony_ci    if (index < 0) {
22537db96d56Sopenharmony_ci        return -1;
22547db96d56Sopenharmony_ci    }
22557db96d56Sopenharmony_ci
22567db96d56Sopenharmony_ci    /* mark is -1 if group is undefined */
22577db96d56Sopenharmony_ci    return self->mark[index*2];
22587db96d56Sopenharmony_ci}
22597db96d56Sopenharmony_ci
22607db96d56Sopenharmony_ci/*[clinic input]
22617db96d56Sopenharmony_ci_sre.SRE_Match.end -> Py_ssize_t
22627db96d56Sopenharmony_ci
22637db96d56Sopenharmony_ci    group: object(c_default="NULL") = 0
22647db96d56Sopenharmony_ci    /
22657db96d56Sopenharmony_ci
22667db96d56Sopenharmony_ciReturn index of the end of the substring matched by group.
22677db96d56Sopenharmony_ci[clinic start generated code]*/
22687db96d56Sopenharmony_ci
22697db96d56Sopenharmony_cistatic Py_ssize_t
22707db96d56Sopenharmony_ci_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
22717db96d56Sopenharmony_ci/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
22727db96d56Sopenharmony_ci{
22737db96d56Sopenharmony_ci    Py_ssize_t index = match_getindex(self, group);
22747db96d56Sopenharmony_ci
22757db96d56Sopenharmony_ci    if (index < 0) {
22767db96d56Sopenharmony_ci        return -1;
22777db96d56Sopenharmony_ci    }
22787db96d56Sopenharmony_ci
22797db96d56Sopenharmony_ci    /* mark is -1 if group is undefined */
22807db96d56Sopenharmony_ci    return self->mark[index*2+1];
22817db96d56Sopenharmony_ci}
22827db96d56Sopenharmony_ci
22837db96d56Sopenharmony_ciLOCAL(PyObject*)
22847db96d56Sopenharmony_ci_pair(Py_ssize_t i1, Py_ssize_t i2)
22857db96d56Sopenharmony_ci{
22867db96d56Sopenharmony_ci    PyObject* pair;
22877db96d56Sopenharmony_ci    PyObject* item;
22887db96d56Sopenharmony_ci
22897db96d56Sopenharmony_ci    pair = PyTuple_New(2);
22907db96d56Sopenharmony_ci    if (!pair)
22917db96d56Sopenharmony_ci        return NULL;
22927db96d56Sopenharmony_ci
22937db96d56Sopenharmony_ci    item = PyLong_FromSsize_t(i1);
22947db96d56Sopenharmony_ci    if (!item)
22957db96d56Sopenharmony_ci        goto error;
22967db96d56Sopenharmony_ci    PyTuple_SET_ITEM(pair, 0, item);
22977db96d56Sopenharmony_ci
22987db96d56Sopenharmony_ci    item = PyLong_FromSsize_t(i2);
22997db96d56Sopenharmony_ci    if (!item)
23007db96d56Sopenharmony_ci        goto error;
23017db96d56Sopenharmony_ci    PyTuple_SET_ITEM(pair, 1, item);
23027db96d56Sopenharmony_ci
23037db96d56Sopenharmony_ci    return pair;
23047db96d56Sopenharmony_ci
23057db96d56Sopenharmony_ci  error:
23067db96d56Sopenharmony_ci    Py_DECREF(pair);
23077db96d56Sopenharmony_ci    return NULL;
23087db96d56Sopenharmony_ci}
23097db96d56Sopenharmony_ci
23107db96d56Sopenharmony_ci/*[clinic input]
23117db96d56Sopenharmony_ci_sre.SRE_Match.span
23127db96d56Sopenharmony_ci
23137db96d56Sopenharmony_ci    group: object(c_default="NULL") = 0
23147db96d56Sopenharmony_ci    /
23157db96d56Sopenharmony_ci
23167db96d56Sopenharmony_ciFor match object m, return the 2-tuple (m.start(group), m.end(group)).
23177db96d56Sopenharmony_ci[clinic start generated code]*/
23187db96d56Sopenharmony_ci
23197db96d56Sopenharmony_cistatic PyObject *
23207db96d56Sopenharmony_ci_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
23217db96d56Sopenharmony_ci/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
23227db96d56Sopenharmony_ci{
23237db96d56Sopenharmony_ci    Py_ssize_t index = match_getindex(self, group);
23247db96d56Sopenharmony_ci
23257db96d56Sopenharmony_ci    if (index < 0) {
23267db96d56Sopenharmony_ci        return NULL;
23277db96d56Sopenharmony_ci    }
23287db96d56Sopenharmony_ci
23297db96d56Sopenharmony_ci    /* marks are -1 if group is undefined */
23307db96d56Sopenharmony_ci    return _pair(self->mark[index*2], self->mark[index*2+1]);
23317db96d56Sopenharmony_ci}
23327db96d56Sopenharmony_ci
23337db96d56Sopenharmony_cistatic PyObject*
23347db96d56Sopenharmony_cimatch_regs(MatchObject* self)
23357db96d56Sopenharmony_ci{
23367db96d56Sopenharmony_ci    PyObject* regs;
23377db96d56Sopenharmony_ci    PyObject* item;
23387db96d56Sopenharmony_ci    Py_ssize_t index;
23397db96d56Sopenharmony_ci
23407db96d56Sopenharmony_ci    regs = PyTuple_New(self->groups);
23417db96d56Sopenharmony_ci    if (!regs)
23427db96d56Sopenharmony_ci        return NULL;
23437db96d56Sopenharmony_ci
23447db96d56Sopenharmony_ci    for (index = 0; index < self->groups; index++) {
23457db96d56Sopenharmony_ci        item = _pair(self->mark[index*2], self->mark[index*2+1]);
23467db96d56Sopenharmony_ci        if (!item) {
23477db96d56Sopenharmony_ci            Py_DECREF(regs);
23487db96d56Sopenharmony_ci            return NULL;
23497db96d56Sopenharmony_ci        }
23507db96d56Sopenharmony_ci        PyTuple_SET_ITEM(regs, index, item);
23517db96d56Sopenharmony_ci    }
23527db96d56Sopenharmony_ci
23537db96d56Sopenharmony_ci    Py_INCREF(regs);
23547db96d56Sopenharmony_ci    self->regs = regs;
23557db96d56Sopenharmony_ci
23567db96d56Sopenharmony_ci    return regs;
23577db96d56Sopenharmony_ci}
23587db96d56Sopenharmony_ci
23597db96d56Sopenharmony_ci/*[clinic input]
23607db96d56Sopenharmony_ci_sre.SRE_Match.__copy__
23617db96d56Sopenharmony_ci
23627db96d56Sopenharmony_ci[clinic start generated code]*/
23637db96d56Sopenharmony_ci
23647db96d56Sopenharmony_cistatic PyObject *
23657db96d56Sopenharmony_ci_sre_SRE_Match___copy___impl(MatchObject *self)
23667db96d56Sopenharmony_ci/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
23677db96d56Sopenharmony_ci{
23687db96d56Sopenharmony_ci    Py_INCREF(self);
23697db96d56Sopenharmony_ci    return (PyObject *)self;
23707db96d56Sopenharmony_ci}
23717db96d56Sopenharmony_ci
23727db96d56Sopenharmony_ci/*[clinic input]
23737db96d56Sopenharmony_ci_sre.SRE_Match.__deepcopy__
23747db96d56Sopenharmony_ci
23757db96d56Sopenharmony_ci    memo: object
23767db96d56Sopenharmony_ci    /
23777db96d56Sopenharmony_ci
23787db96d56Sopenharmony_ci[clinic start generated code]*/
23797db96d56Sopenharmony_ci
23807db96d56Sopenharmony_cistatic PyObject *
23817db96d56Sopenharmony_ci_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
23827db96d56Sopenharmony_ci/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
23837db96d56Sopenharmony_ci{
23847db96d56Sopenharmony_ci    Py_INCREF(self);
23857db96d56Sopenharmony_ci    return (PyObject *)self;
23867db96d56Sopenharmony_ci}
23877db96d56Sopenharmony_ci
23887db96d56Sopenharmony_ciPyDoc_STRVAR(match_doc,
23897db96d56Sopenharmony_ci"The result of re.match() and re.search().\n\
23907db96d56Sopenharmony_ciMatch objects always have a boolean value of True.");
23917db96d56Sopenharmony_ci
23927db96d56Sopenharmony_ciPyDoc_STRVAR(match_group_doc,
23937db96d56Sopenharmony_ci"group([group1, ...]) -> str or tuple.\n\
23947db96d56Sopenharmony_ci    Return subgroup(s) of the match by indices or names.\n\
23957db96d56Sopenharmony_ci    For 0 returns the entire match.");
23967db96d56Sopenharmony_ci
23977db96d56Sopenharmony_cistatic PyObject *
23987db96d56Sopenharmony_cimatch_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
23997db96d56Sopenharmony_ci{
24007db96d56Sopenharmony_ci    if (self->lastindex >= 0)
24017db96d56Sopenharmony_ci        return PyLong_FromSsize_t(self->lastindex);
24027db96d56Sopenharmony_ci    Py_RETURN_NONE;
24037db96d56Sopenharmony_ci}
24047db96d56Sopenharmony_ci
24057db96d56Sopenharmony_cistatic PyObject *
24067db96d56Sopenharmony_cimatch_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
24077db96d56Sopenharmony_ci{
24087db96d56Sopenharmony_ci    if (self->pattern->indexgroup &&
24097db96d56Sopenharmony_ci        self->lastindex >= 0 &&
24107db96d56Sopenharmony_ci        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
24117db96d56Sopenharmony_ci    {
24127db96d56Sopenharmony_ci        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
24137db96d56Sopenharmony_ci                                            self->lastindex);
24147db96d56Sopenharmony_ci        Py_INCREF(result);
24157db96d56Sopenharmony_ci        return result;
24167db96d56Sopenharmony_ci    }
24177db96d56Sopenharmony_ci    Py_RETURN_NONE;
24187db96d56Sopenharmony_ci}
24197db96d56Sopenharmony_ci
24207db96d56Sopenharmony_cistatic PyObject *
24217db96d56Sopenharmony_cimatch_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
24227db96d56Sopenharmony_ci{
24237db96d56Sopenharmony_ci    if (self->regs) {
24247db96d56Sopenharmony_ci        Py_INCREF(self->regs);
24257db96d56Sopenharmony_ci        return self->regs;
24267db96d56Sopenharmony_ci    } else
24277db96d56Sopenharmony_ci        return match_regs(self);
24287db96d56Sopenharmony_ci}
24297db96d56Sopenharmony_ci
24307db96d56Sopenharmony_cistatic PyObject *
24317db96d56Sopenharmony_cimatch_repr(MatchObject *self)
24327db96d56Sopenharmony_ci{
24337db96d56Sopenharmony_ci    PyObject *result;
24347db96d56Sopenharmony_ci    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
24357db96d56Sopenharmony_ci    if (group0 == NULL)
24367db96d56Sopenharmony_ci        return NULL;
24377db96d56Sopenharmony_ci    result = PyUnicode_FromFormat(
24387db96d56Sopenharmony_ci            "<%s object; span=(%zd, %zd), match=%.50R>",
24397db96d56Sopenharmony_ci            Py_TYPE(self)->tp_name,
24407db96d56Sopenharmony_ci            self->mark[0], self->mark[1], group0);
24417db96d56Sopenharmony_ci    Py_DECREF(group0);
24427db96d56Sopenharmony_ci    return result;
24437db96d56Sopenharmony_ci}
24447db96d56Sopenharmony_ci
24457db96d56Sopenharmony_ci
24467db96d56Sopenharmony_cistatic PyObject*
24477db96d56Sopenharmony_cipattern_new_match(_sremodulestate* module_state,
24487db96d56Sopenharmony_ci                  PatternObject* pattern,
24497db96d56Sopenharmony_ci                  SRE_STATE* state,
24507db96d56Sopenharmony_ci                  Py_ssize_t status)
24517db96d56Sopenharmony_ci{
24527db96d56Sopenharmony_ci    /* create match object (from state object) */
24537db96d56Sopenharmony_ci
24547db96d56Sopenharmony_ci    MatchObject* match;
24557db96d56Sopenharmony_ci    Py_ssize_t i, j;
24567db96d56Sopenharmony_ci    char* base;
24577db96d56Sopenharmony_ci    int n;
24587db96d56Sopenharmony_ci
24597db96d56Sopenharmony_ci    if (status > 0) {
24607db96d56Sopenharmony_ci
24617db96d56Sopenharmony_ci        /* create match object (with room for extra group marks) */
24627db96d56Sopenharmony_ci        /* coverity[ampersand_in_size] */
24637db96d56Sopenharmony_ci        match = PyObject_GC_NewVar(MatchObject,
24647db96d56Sopenharmony_ci                                   module_state->Match_Type,
24657db96d56Sopenharmony_ci                                   2*(pattern->groups+1));
24667db96d56Sopenharmony_ci        if (!match)
24677db96d56Sopenharmony_ci            return NULL;
24687db96d56Sopenharmony_ci
24697db96d56Sopenharmony_ci        Py_INCREF(pattern);
24707db96d56Sopenharmony_ci        match->pattern = pattern;
24717db96d56Sopenharmony_ci
24727db96d56Sopenharmony_ci        Py_INCREF(state->string);
24737db96d56Sopenharmony_ci        match->string = state->string;
24747db96d56Sopenharmony_ci
24757db96d56Sopenharmony_ci        match->regs = NULL;
24767db96d56Sopenharmony_ci        match->groups = pattern->groups+1;
24777db96d56Sopenharmony_ci
24787db96d56Sopenharmony_ci        /* fill in group slices */
24797db96d56Sopenharmony_ci
24807db96d56Sopenharmony_ci        base = (char*) state->beginning;
24817db96d56Sopenharmony_ci        n = state->charsize;
24827db96d56Sopenharmony_ci
24837db96d56Sopenharmony_ci        match->mark[0] = ((char*) state->start - base) / n;
24847db96d56Sopenharmony_ci        match->mark[1] = ((char*) state->ptr - base) / n;
24857db96d56Sopenharmony_ci
24867db96d56Sopenharmony_ci        for (i = j = 0; i < pattern->groups; i++, j+=2)
24877db96d56Sopenharmony_ci            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
24887db96d56Sopenharmony_ci                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
24897db96d56Sopenharmony_ci                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
24907db96d56Sopenharmony_ci
24917db96d56Sopenharmony_ci                /* check wrong span */
24927db96d56Sopenharmony_ci                if (match->mark[j+2] > match->mark[j+3]) {
24937db96d56Sopenharmony_ci                    PyErr_SetString(PyExc_SystemError,
24947db96d56Sopenharmony_ci                                    "The span of capturing group is wrong,"
24957db96d56Sopenharmony_ci                                    " please report a bug for the re module.");
24967db96d56Sopenharmony_ci                    Py_DECREF(match);
24977db96d56Sopenharmony_ci                    return NULL;
24987db96d56Sopenharmony_ci                }
24997db96d56Sopenharmony_ci            } else
25007db96d56Sopenharmony_ci                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
25017db96d56Sopenharmony_ci
25027db96d56Sopenharmony_ci        match->pos = state->pos;
25037db96d56Sopenharmony_ci        match->endpos = state->endpos;
25047db96d56Sopenharmony_ci
25057db96d56Sopenharmony_ci        match->lastindex = state->lastindex;
25067db96d56Sopenharmony_ci
25077db96d56Sopenharmony_ci        PyObject_GC_Track(match);
25087db96d56Sopenharmony_ci        return (PyObject*) match;
25097db96d56Sopenharmony_ci
25107db96d56Sopenharmony_ci    } else if (status == 0) {
25117db96d56Sopenharmony_ci
25127db96d56Sopenharmony_ci        /* no match */
25137db96d56Sopenharmony_ci        Py_RETURN_NONE;
25147db96d56Sopenharmony_ci
25157db96d56Sopenharmony_ci    }
25167db96d56Sopenharmony_ci
25177db96d56Sopenharmony_ci    /* internal error */
25187db96d56Sopenharmony_ci    pattern_error(status);
25197db96d56Sopenharmony_ci    return NULL;
25207db96d56Sopenharmony_ci}
25217db96d56Sopenharmony_ci
25227db96d56Sopenharmony_ci
25237db96d56Sopenharmony_ci/* -------------------------------------------------------------------- */
25247db96d56Sopenharmony_ci/* scanner methods (experimental) */
25257db96d56Sopenharmony_ci
25267db96d56Sopenharmony_cistatic int
25277db96d56Sopenharmony_ciscanner_traverse(ScannerObject *self, visitproc visit, void *arg)
25287db96d56Sopenharmony_ci{
25297db96d56Sopenharmony_ci    Py_VISIT(Py_TYPE(self));
25307db96d56Sopenharmony_ci    Py_VISIT(self->pattern);
25317db96d56Sopenharmony_ci    return 0;
25327db96d56Sopenharmony_ci}
25337db96d56Sopenharmony_ci
25347db96d56Sopenharmony_cistatic int
25357db96d56Sopenharmony_ciscanner_clear(ScannerObject *self)
25367db96d56Sopenharmony_ci{
25377db96d56Sopenharmony_ci    Py_CLEAR(self->pattern);
25387db96d56Sopenharmony_ci    return 0;
25397db96d56Sopenharmony_ci}
25407db96d56Sopenharmony_ci
25417db96d56Sopenharmony_cistatic void
25427db96d56Sopenharmony_ciscanner_dealloc(ScannerObject* self)
25437db96d56Sopenharmony_ci{
25447db96d56Sopenharmony_ci    PyTypeObject *tp = Py_TYPE(self);
25457db96d56Sopenharmony_ci
25467db96d56Sopenharmony_ci    PyObject_GC_UnTrack(self);
25477db96d56Sopenharmony_ci    state_fini(&self->state);
25487db96d56Sopenharmony_ci    (void)scanner_clear(self);
25497db96d56Sopenharmony_ci    tp->tp_free(self);
25507db96d56Sopenharmony_ci    Py_DECREF(tp);
25517db96d56Sopenharmony_ci}
25527db96d56Sopenharmony_ci
25537db96d56Sopenharmony_cistatic int
25547db96d56Sopenharmony_ciscanner_begin(ScannerObject* self)
25557db96d56Sopenharmony_ci{
25567db96d56Sopenharmony_ci    if (self->executing) {
25577db96d56Sopenharmony_ci        PyErr_SetString(PyExc_ValueError,
25587db96d56Sopenharmony_ci                        "regular expression scanner already executing");
25597db96d56Sopenharmony_ci        return 0;
25607db96d56Sopenharmony_ci    }
25617db96d56Sopenharmony_ci    self->executing = 1;
25627db96d56Sopenharmony_ci    return 1;
25637db96d56Sopenharmony_ci}
25647db96d56Sopenharmony_ci
25657db96d56Sopenharmony_cistatic void
25667db96d56Sopenharmony_ciscanner_end(ScannerObject* self)
25677db96d56Sopenharmony_ci{
25687db96d56Sopenharmony_ci    assert(self->executing);
25697db96d56Sopenharmony_ci    self->executing = 0;
25707db96d56Sopenharmony_ci}
25717db96d56Sopenharmony_ci
25727db96d56Sopenharmony_ci/*[clinic input]
25737db96d56Sopenharmony_ci_sre.SRE_Scanner.match
25747db96d56Sopenharmony_ci
25757db96d56Sopenharmony_ci    cls: defining_class
25767db96d56Sopenharmony_ci    /
25777db96d56Sopenharmony_ci
25787db96d56Sopenharmony_ci[clinic start generated code]*/
25797db96d56Sopenharmony_ci
25807db96d56Sopenharmony_cistatic PyObject *
25817db96d56Sopenharmony_ci_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
25827db96d56Sopenharmony_ci/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
25837db96d56Sopenharmony_ci{
25847db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
25857db96d56Sopenharmony_ci    SRE_STATE* state = &self->state;
25867db96d56Sopenharmony_ci    PyObject* match;
25877db96d56Sopenharmony_ci    Py_ssize_t status;
25887db96d56Sopenharmony_ci
25897db96d56Sopenharmony_ci    if (!scanner_begin(self)) {
25907db96d56Sopenharmony_ci        return NULL;
25917db96d56Sopenharmony_ci    }
25927db96d56Sopenharmony_ci    if (state->start == NULL) {
25937db96d56Sopenharmony_ci        scanner_end(self);
25947db96d56Sopenharmony_ci        Py_RETURN_NONE;
25957db96d56Sopenharmony_ci    }
25967db96d56Sopenharmony_ci
25977db96d56Sopenharmony_ci    state_reset(state);
25987db96d56Sopenharmony_ci
25997db96d56Sopenharmony_ci    state->ptr = state->start;
26007db96d56Sopenharmony_ci
26017db96d56Sopenharmony_ci    status = sre_match(state, PatternObject_GetCode(self->pattern));
26027db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
26037db96d56Sopenharmony_ci        scanner_end(self);
26047db96d56Sopenharmony_ci        return NULL;
26057db96d56Sopenharmony_ci    }
26067db96d56Sopenharmony_ci
26077db96d56Sopenharmony_ci    match = pattern_new_match(module_state, (PatternObject*) self->pattern,
26087db96d56Sopenharmony_ci                              state, status);
26097db96d56Sopenharmony_ci
26107db96d56Sopenharmony_ci    if (status == 0)
26117db96d56Sopenharmony_ci        state->start = NULL;
26127db96d56Sopenharmony_ci    else {
26137db96d56Sopenharmony_ci        state->must_advance = (state->ptr == state->start);
26147db96d56Sopenharmony_ci        state->start = state->ptr;
26157db96d56Sopenharmony_ci    }
26167db96d56Sopenharmony_ci
26177db96d56Sopenharmony_ci    scanner_end(self);
26187db96d56Sopenharmony_ci    return match;
26197db96d56Sopenharmony_ci}
26207db96d56Sopenharmony_ci
26217db96d56Sopenharmony_ci
26227db96d56Sopenharmony_ci/*[clinic input]
26237db96d56Sopenharmony_ci_sre.SRE_Scanner.search
26247db96d56Sopenharmony_ci
26257db96d56Sopenharmony_ci    cls: defining_class
26267db96d56Sopenharmony_ci    /
26277db96d56Sopenharmony_ci
26287db96d56Sopenharmony_ci[clinic start generated code]*/
26297db96d56Sopenharmony_ci
26307db96d56Sopenharmony_cistatic PyObject *
26317db96d56Sopenharmony_ci_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
26327db96d56Sopenharmony_ci/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
26337db96d56Sopenharmony_ci{
26347db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
26357db96d56Sopenharmony_ci    SRE_STATE* state = &self->state;
26367db96d56Sopenharmony_ci    PyObject* match;
26377db96d56Sopenharmony_ci    Py_ssize_t status;
26387db96d56Sopenharmony_ci
26397db96d56Sopenharmony_ci    if (!scanner_begin(self)) {
26407db96d56Sopenharmony_ci        return NULL;
26417db96d56Sopenharmony_ci    }
26427db96d56Sopenharmony_ci    if (state->start == NULL) {
26437db96d56Sopenharmony_ci        scanner_end(self);
26447db96d56Sopenharmony_ci        Py_RETURN_NONE;
26457db96d56Sopenharmony_ci    }
26467db96d56Sopenharmony_ci
26477db96d56Sopenharmony_ci    state_reset(state);
26487db96d56Sopenharmony_ci
26497db96d56Sopenharmony_ci    state->ptr = state->start;
26507db96d56Sopenharmony_ci
26517db96d56Sopenharmony_ci    status = sre_search(state, PatternObject_GetCode(self->pattern));
26527db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
26537db96d56Sopenharmony_ci        scanner_end(self);
26547db96d56Sopenharmony_ci        return NULL;
26557db96d56Sopenharmony_ci    }
26567db96d56Sopenharmony_ci
26577db96d56Sopenharmony_ci    match = pattern_new_match(module_state, (PatternObject*) self->pattern,
26587db96d56Sopenharmony_ci                              state, status);
26597db96d56Sopenharmony_ci
26607db96d56Sopenharmony_ci    if (status == 0)
26617db96d56Sopenharmony_ci        state->start = NULL;
26627db96d56Sopenharmony_ci    else {
26637db96d56Sopenharmony_ci        state->must_advance = (state->ptr == state->start);
26647db96d56Sopenharmony_ci        state->start = state->ptr;
26657db96d56Sopenharmony_ci    }
26667db96d56Sopenharmony_ci
26677db96d56Sopenharmony_ci    scanner_end(self);
26687db96d56Sopenharmony_ci    return match;
26697db96d56Sopenharmony_ci}
26707db96d56Sopenharmony_ci
26717db96d56Sopenharmony_cistatic PyObject *
26727db96d56Sopenharmony_cipattern_scanner(_sremodulestate *module_state,
26737db96d56Sopenharmony_ci                PatternObject *self,
26747db96d56Sopenharmony_ci                PyObject *string,
26757db96d56Sopenharmony_ci                Py_ssize_t pos,
26767db96d56Sopenharmony_ci                Py_ssize_t endpos)
26777db96d56Sopenharmony_ci{
26787db96d56Sopenharmony_ci    ScannerObject* scanner;
26797db96d56Sopenharmony_ci
26807db96d56Sopenharmony_ci    /* create scanner object */
26817db96d56Sopenharmony_ci    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
26827db96d56Sopenharmony_ci    if (!scanner)
26837db96d56Sopenharmony_ci        return NULL;
26847db96d56Sopenharmony_ci    scanner->pattern = NULL;
26857db96d56Sopenharmony_ci    scanner->executing = 0;
26867db96d56Sopenharmony_ci
26877db96d56Sopenharmony_ci    /* create search state object */
26887db96d56Sopenharmony_ci    if (!state_init(&scanner->state, self, string, pos, endpos)) {
26897db96d56Sopenharmony_ci        Py_DECREF(scanner);
26907db96d56Sopenharmony_ci        return NULL;
26917db96d56Sopenharmony_ci    }
26927db96d56Sopenharmony_ci
26937db96d56Sopenharmony_ci    Py_INCREF(self);
26947db96d56Sopenharmony_ci    scanner->pattern = (PyObject*) self;
26957db96d56Sopenharmony_ci
26967db96d56Sopenharmony_ci    PyObject_GC_Track(scanner);
26977db96d56Sopenharmony_ci    return (PyObject*) scanner;
26987db96d56Sopenharmony_ci}
26997db96d56Sopenharmony_ci
27007db96d56Sopenharmony_cistatic Py_hash_t
27017db96d56Sopenharmony_cipattern_hash(PatternObject *self)
27027db96d56Sopenharmony_ci{
27037db96d56Sopenharmony_ci    Py_hash_t hash, hash2;
27047db96d56Sopenharmony_ci
27057db96d56Sopenharmony_ci    hash = PyObject_Hash(self->pattern);
27067db96d56Sopenharmony_ci    if (hash == -1) {
27077db96d56Sopenharmony_ci        return -1;
27087db96d56Sopenharmony_ci    }
27097db96d56Sopenharmony_ci
27107db96d56Sopenharmony_ci    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
27117db96d56Sopenharmony_ci    hash ^= hash2;
27127db96d56Sopenharmony_ci
27137db96d56Sopenharmony_ci    hash ^= self->flags;
27147db96d56Sopenharmony_ci    hash ^= self->isbytes;
27157db96d56Sopenharmony_ci    hash ^= self->codesize;
27167db96d56Sopenharmony_ci
27177db96d56Sopenharmony_ci    if (hash == -1) {
27187db96d56Sopenharmony_ci        hash = -2;
27197db96d56Sopenharmony_ci    }
27207db96d56Sopenharmony_ci    return hash;
27217db96d56Sopenharmony_ci}
27227db96d56Sopenharmony_ci
27237db96d56Sopenharmony_cistatic PyObject*
27247db96d56Sopenharmony_cipattern_richcompare(PyObject *lefto, PyObject *righto, int op)
27257db96d56Sopenharmony_ci{
27267db96d56Sopenharmony_ci    PyTypeObject *tp = Py_TYPE(lefto);
27277db96d56Sopenharmony_ci    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
27287db96d56Sopenharmony_ci    PatternObject *left, *right;
27297db96d56Sopenharmony_ci    int cmp;
27307db96d56Sopenharmony_ci
27317db96d56Sopenharmony_ci    if (op != Py_EQ && op != Py_NE) {
27327db96d56Sopenharmony_ci        Py_RETURN_NOTIMPLEMENTED;
27337db96d56Sopenharmony_ci    }
27347db96d56Sopenharmony_ci
27357db96d56Sopenharmony_ci    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
27367db96d56Sopenharmony_ci    {
27377db96d56Sopenharmony_ci        Py_RETURN_NOTIMPLEMENTED;
27387db96d56Sopenharmony_ci    }
27397db96d56Sopenharmony_ci
27407db96d56Sopenharmony_ci    if (lefto == righto) {
27417db96d56Sopenharmony_ci        /* a pattern is equal to itself */
27427db96d56Sopenharmony_ci        return PyBool_FromLong(op == Py_EQ);
27437db96d56Sopenharmony_ci    }
27447db96d56Sopenharmony_ci
27457db96d56Sopenharmony_ci    left = (PatternObject *)lefto;
27467db96d56Sopenharmony_ci    right = (PatternObject *)righto;
27477db96d56Sopenharmony_ci
27487db96d56Sopenharmony_ci    cmp = (left->flags == right->flags
27497db96d56Sopenharmony_ci           && left->isbytes == right->isbytes
27507db96d56Sopenharmony_ci           && left->codesize == right->codesize);
27517db96d56Sopenharmony_ci    if (cmp) {
27527db96d56Sopenharmony_ci        /* Compare the code and the pattern because the same pattern can
27537db96d56Sopenharmony_ci           produce different codes depending on the locale used to compile the
27547db96d56Sopenharmony_ci           pattern when the re.LOCALE flag is used. Don't compare groups,
27557db96d56Sopenharmony_ci           indexgroup nor groupindex: they are derivated from the pattern. */
27567db96d56Sopenharmony_ci        cmp = (memcmp(left->code, right->code,
27577db96d56Sopenharmony_ci                      sizeof(left->code[0]) * left->codesize) == 0);
27587db96d56Sopenharmony_ci    }
27597db96d56Sopenharmony_ci    if (cmp) {
27607db96d56Sopenharmony_ci        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
27617db96d56Sopenharmony_ci                                       Py_EQ);
27627db96d56Sopenharmony_ci        if (cmp < 0) {
27637db96d56Sopenharmony_ci            return NULL;
27647db96d56Sopenharmony_ci        }
27657db96d56Sopenharmony_ci    }
27667db96d56Sopenharmony_ci    if (op == Py_NE) {
27677db96d56Sopenharmony_ci        cmp = !cmp;
27687db96d56Sopenharmony_ci    }
27697db96d56Sopenharmony_ci    return PyBool_FromLong(cmp);
27707db96d56Sopenharmony_ci}
27717db96d56Sopenharmony_ci
27727db96d56Sopenharmony_ci#include "clinic/sre.c.h"
27737db96d56Sopenharmony_ci
27747db96d56Sopenharmony_cistatic PyMethodDef pattern_methods[] = {
27757db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_MATCH_METHODDEF
27767db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
27777db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_SEARCH_METHODDEF
27787db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_SUB_METHODDEF
27797db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_SUBN_METHODDEF
27807db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_FINDALL_METHODDEF
27817db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_SPLIT_METHODDEF
27827db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_FINDITER_METHODDEF
27837db96d56Sopenharmony_ci    _SRE_SRE_PATTERN_SCANNER_METHODDEF
27847db96d56Sopenharmony_ci    _SRE_SRE_PATTERN___COPY___METHODDEF
27857db96d56Sopenharmony_ci    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
27867db96d56Sopenharmony_ci    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
27877db96d56Sopenharmony_ci     PyDoc_STR("See PEP 585")},
27887db96d56Sopenharmony_ci    {NULL, NULL}
27897db96d56Sopenharmony_ci};
27907db96d56Sopenharmony_ci
27917db96d56Sopenharmony_cistatic PyGetSetDef pattern_getset[] = {
27927db96d56Sopenharmony_ci    {"groupindex", (getter)pattern_groupindex, (setter)NULL,
27937db96d56Sopenharmony_ci      "A dictionary mapping group names to group numbers."},
27947db96d56Sopenharmony_ci    {NULL}  /* Sentinel */
27957db96d56Sopenharmony_ci};
27967db96d56Sopenharmony_ci
27977db96d56Sopenharmony_ci#define PAT_OFF(x) offsetof(PatternObject, x)
27987db96d56Sopenharmony_cistatic PyMemberDef pattern_members[] = {
27997db96d56Sopenharmony_ci    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
28007db96d56Sopenharmony_ci     "The pattern string from which the RE object was compiled."},
28017db96d56Sopenharmony_ci    {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
28027db96d56Sopenharmony_ci     "The regex matching flags."},
28037db96d56Sopenharmony_ci    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
28047db96d56Sopenharmony_ci     "The number of capturing groups in the pattern."},
28057db96d56Sopenharmony_ci    {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY},
28067db96d56Sopenharmony_ci    {NULL}  /* Sentinel */
28077db96d56Sopenharmony_ci};
28087db96d56Sopenharmony_ci
28097db96d56Sopenharmony_cistatic PyType_Slot pattern_slots[] = {
28107db96d56Sopenharmony_ci    {Py_tp_dealloc, (destructor)pattern_dealloc},
28117db96d56Sopenharmony_ci    {Py_tp_repr, (reprfunc)pattern_repr},
28127db96d56Sopenharmony_ci    {Py_tp_hash, (hashfunc)pattern_hash},
28137db96d56Sopenharmony_ci    {Py_tp_doc, (void *)pattern_doc},
28147db96d56Sopenharmony_ci    {Py_tp_richcompare, pattern_richcompare},
28157db96d56Sopenharmony_ci    {Py_tp_methods, pattern_methods},
28167db96d56Sopenharmony_ci    {Py_tp_members, pattern_members},
28177db96d56Sopenharmony_ci    {Py_tp_getset, pattern_getset},
28187db96d56Sopenharmony_ci    {Py_tp_traverse, pattern_traverse},
28197db96d56Sopenharmony_ci    {Py_tp_clear, pattern_clear},
28207db96d56Sopenharmony_ci    {0, NULL},
28217db96d56Sopenharmony_ci};
28227db96d56Sopenharmony_ci
28237db96d56Sopenharmony_cistatic PyType_Spec pattern_spec = {
28247db96d56Sopenharmony_ci    .name = "re.Pattern",
28257db96d56Sopenharmony_ci    .basicsize = sizeof(PatternObject),
28267db96d56Sopenharmony_ci    .itemsize = sizeof(SRE_CODE),
28277db96d56Sopenharmony_ci    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
28287db96d56Sopenharmony_ci              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
28297db96d56Sopenharmony_ci    .slots = pattern_slots,
28307db96d56Sopenharmony_ci};
28317db96d56Sopenharmony_ci
28327db96d56Sopenharmony_cistatic PyMethodDef match_methods[] = {
28337db96d56Sopenharmony_ci    {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
28347db96d56Sopenharmony_ci    _SRE_SRE_MATCH_START_METHODDEF
28357db96d56Sopenharmony_ci    _SRE_SRE_MATCH_END_METHODDEF
28367db96d56Sopenharmony_ci    _SRE_SRE_MATCH_SPAN_METHODDEF
28377db96d56Sopenharmony_ci    _SRE_SRE_MATCH_GROUPS_METHODDEF
28387db96d56Sopenharmony_ci    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
28397db96d56Sopenharmony_ci    _SRE_SRE_MATCH_EXPAND_METHODDEF
28407db96d56Sopenharmony_ci    _SRE_SRE_MATCH___COPY___METHODDEF
28417db96d56Sopenharmony_ci    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
28427db96d56Sopenharmony_ci    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
28437db96d56Sopenharmony_ci     PyDoc_STR("See PEP 585")},
28447db96d56Sopenharmony_ci    {NULL, NULL}
28457db96d56Sopenharmony_ci};
28467db96d56Sopenharmony_ci
28477db96d56Sopenharmony_cistatic PyGetSetDef match_getset[] = {
28487db96d56Sopenharmony_ci    {"lastindex", (getter)match_lastindex_get, (setter)NULL,
28497db96d56Sopenharmony_ci     "The integer index of the last matched capturing group."},
28507db96d56Sopenharmony_ci    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
28517db96d56Sopenharmony_ci     "The name of the last matched capturing group."},
28527db96d56Sopenharmony_ci    {"regs",      (getter)match_regs_get,      (setter)NULL},
28537db96d56Sopenharmony_ci    {NULL}
28547db96d56Sopenharmony_ci};
28557db96d56Sopenharmony_ci
28567db96d56Sopenharmony_ci#define MATCH_OFF(x) offsetof(MatchObject, x)
28577db96d56Sopenharmony_cistatic PyMemberDef match_members[] = {
28587db96d56Sopenharmony_ci    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
28597db96d56Sopenharmony_ci     "The string passed to match() or search()."},
28607db96d56Sopenharmony_ci    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
28617db96d56Sopenharmony_ci     "The regular expression object."},
28627db96d56Sopenharmony_ci    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
28637db96d56Sopenharmony_ci     "The index into the string at which the RE engine started looking for a match."},
28647db96d56Sopenharmony_ci    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
28657db96d56Sopenharmony_ci     "The index into the string beyond which the RE engine will not go."},
28667db96d56Sopenharmony_ci    {NULL}
28677db96d56Sopenharmony_ci};
28687db96d56Sopenharmony_ci
28697db96d56Sopenharmony_ci/* FIXME: implement setattr("string", None) as a special case (to
28707db96d56Sopenharmony_ci   detach the associated string, if any */
28717db96d56Sopenharmony_cistatic PyType_Slot match_slots[] = {
28727db96d56Sopenharmony_ci    {Py_tp_dealloc, match_dealloc},
28737db96d56Sopenharmony_ci    {Py_tp_repr, match_repr},
28747db96d56Sopenharmony_ci    {Py_tp_doc, (void *)match_doc},
28757db96d56Sopenharmony_ci    {Py_tp_methods, match_methods},
28767db96d56Sopenharmony_ci    {Py_tp_members, match_members},
28777db96d56Sopenharmony_ci    {Py_tp_getset, match_getset},
28787db96d56Sopenharmony_ci    {Py_tp_traverse, match_traverse},
28797db96d56Sopenharmony_ci    {Py_tp_clear, match_clear},
28807db96d56Sopenharmony_ci
28817db96d56Sopenharmony_ci    /* As mapping.
28827db96d56Sopenharmony_ci     *
28837db96d56Sopenharmony_ci     * Match objects do not support length or assignment, but do support
28847db96d56Sopenharmony_ci     * __getitem__.
28857db96d56Sopenharmony_ci     */
28867db96d56Sopenharmony_ci    {Py_mp_subscript, match_getitem},
28877db96d56Sopenharmony_ci
28887db96d56Sopenharmony_ci    {0, NULL},
28897db96d56Sopenharmony_ci};
28907db96d56Sopenharmony_ci
28917db96d56Sopenharmony_cistatic PyType_Spec match_spec = {
28927db96d56Sopenharmony_ci    .name = "re.Match",
28937db96d56Sopenharmony_ci    .basicsize = sizeof(MatchObject),
28947db96d56Sopenharmony_ci    .itemsize = sizeof(Py_ssize_t),
28957db96d56Sopenharmony_ci    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
28967db96d56Sopenharmony_ci              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
28977db96d56Sopenharmony_ci    .slots = match_slots,
28987db96d56Sopenharmony_ci};
28997db96d56Sopenharmony_ci
29007db96d56Sopenharmony_cistatic PyMethodDef scanner_methods[] = {
29017db96d56Sopenharmony_ci    _SRE_SRE_SCANNER_MATCH_METHODDEF
29027db96d56Sopenharmony_ci    _SRE_SRE_SCANNER_SEARCH_METHODDEF
29037db96d56Sopenharmony_ci    {NULL, NULL}
29047db96d56Sopenharmony_ci};
29057db96d56Sopenharmony_ci
29067db96d56Sopenharmony_ci#define SCAN_OFF(x) offsetof(ScannerObject, x)
29077db96d56Sopenharmony_cistatic PyMemberDef scanner_members[] = {
29087db96d56Sopenharmony_ci    {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
29097db96d56Sopenharmony_ci    {NULL}  /* Sentinel */
29107db96d56Sopenharmony_ci};
29117db96d56Sopenharmony_ci
29127db96d56Sopenharmony_cistatic PyType_Slot scanner_slots[] = {
29137db96d56Sopenharmony_ci    {Py_tp_dealloc, scanner_dealloc},
29147db96d56Sopenharmony_ci    {Py_tp_methods, scanner_methods},
29157db96d56Sopenharmony_ci    {Py_tp_members, scanner_members},
29167db96d56Sopenharmony_ci    {Py_tp_traverse, scanner_traverse},
29177db96d56Sopenharmony_ci    {Py_tp_clear, scanner_clear},
29187db96d56Sopenharmony_ci    {0, NULL},
29197db96d56Sopenharmony_ci};
29207db96d56Sopenharmony_ci
29217db96d56Sopenharmony_cistatic PyType_Spec scanner_spec = {
29227db96d56Sopenharmony_ci    .name = "_" SRE_MODULE ".SRE_Scanner",
29237db96d56Sopenharmony_ci    .basicsize = sizeof(ScannerObject),
29247db96d56Sopenharmony_ci    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
29257db96d56Sopenharmony_ci              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
29267db96d56Sopenharmony_ci    .slots = scanner_slots,
29277db96d56Sopenharmony_ci};
29287db96d56Sopenharmony_ci
29297db96d56Sopenharmony_cistatic PyMethodDef _functions[] = {
29307db96d56Sopenharmony_ci    _SRE_COMPILE_METHODDEF
29317db96d56Sopenharmony_ci    _SRE_GETCODESIZE_METHODDEF
29327db96d56Sopenharmony_ci    _SRE_ASCII_ISCASED_METHODDEF
29337db96d56Sopenharmony_ci    _SRE_UNICODE_ISCASED_METHODDEF
29347db96d56Sopenharmony_ci    _SRE_ASCII_TOLOWER_METHODDEF
29357db96d56Sopenharmony_ci    _SRE_UNICODE_TOLOWER_METHODDEF
29367db96d56Sopenharmony_ci    {NULL, NULL}
29377db96d56Sopenharmony_ci};
29387db96d56Sopenharmony_ci
29397db96d56Sopenharmony_cistatic int
29407db96d56Sopenharmony_cisre_traverse(PyObject *module, visitproc visit, void *arg)
29417db96d56Sopenharmony_ci{
29427db96d56Sopenharmony_ci    _sremodulestate *state = get_sre_module_state(module);
29437db96d56Sopenharmony_ci
29447db96d56Sopenharmony_ci    Py_VISIT(state->Pattern_Type);
29457db96d56Sopenharmony_ci    Py_VISIT(state->Match_Type);
29467db96d56Sopenharmony_ci    Py_VISIT(state->Scanner_Type);
29477db96d56Sopenharmony_ci
29487db96d56Sopenharmony_ci    return 0;
29497db96d56Sopenharmony_ci}
29507db96d56Sopenharmony_ci
29517db96d56Sopenharmony_cistatic int
29527db96d56Sopenharmony_cisre_clear(PyObject *module)
29537db96d56Sopenharmony_ci{
29547db96d56Sopenharmony_ci    _sremodulestate *state = get_sre_module_state(module);
29557db96d56Sopenharmony_ci
29567db96d56Sopenharmony_ci    Py_CLEAR(state->Pattern_Type);
29577db96d56Sopenharmony_ci    Py_CLEAR(state->Match_Type);
29587db96d56Sopenharmony_ci    Py_CLEAR(state->Scanner_Type);
29597db96d56Sopenharmony_ci
29607db96d56Sopenharmony_ci    return 0;
29617db96d56Sopenharmony_ci}
29627db96d56Sopenharmony_ci
29637db96d56Sopenharmony_cistatic void
29647db96d56Sopenharmony_cisre_free(void *module)
29657db96d56Sopenharmony_ci{
29667db96d56Sopenharmony_ci    sre_clear((PyObject *)module);
29677db96d56Sopenharmony_ci}
29687db96d56Sopenharmony_ci
29697db96d56Sopenharmony_ci#define CREATE_TYPE(m, type, spec)                                  \
29707db96d56Sopenharmony_cido {                                                                \
29717db96d56Sopenharmony_ci    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
29727db96d56Sopenharmony_ci    if (type == NULL) {                                             \
29737db96d56Sopenharmony_ci        goto error;                                                 \
29747db96d56Sopenharmony_ci    }                                                               \
29757db96d56Sopenharmony_ci} while (0)
29767db96d56Sopenharmony_ci
29777db96d56Sopenharmony_ci#define ADD_ULONG_CONSTANT(module, name, value)           \
29787db96d56Sopenharmony_ci    do {                                                  \
29797db96d56Sopenharmony_ci        PyObject *o = PyLong_FromUnsignedLong(value);     \
29807db96d56Sopenharmony_ci        if (!o)                                           \
29817db96d56Sopenharmony_ci            goto error;                                   \
29827db96d56Sopenharmony_ci        int res = PyModule_AddObjectRef(module, name, o); \
29837db96d56Sopenharmony_ci        Py_DECREF(o);                                     \
29847db96d56Sopenharmony_ci        if (res < 0) {                                    \
29857db96d56Sopenharmony_ci            goto error;                                   \
29867db96d56Sopenharmony_ci        }                                                 \
29877db96d56Sopenharmony_ci} while (0)
29887db96d56Sopenharmony_ci
29897db96d56Sopenharmony_cistatic int
29907db96d56Sopenharmony_cisre_exec(PyObject *m)
29917db96d56Sopenharmony_ci{
29927db96d56Sopenharmony_ci    _sremodulestate *state;
29937db96d56Sopenharmony_ci
29947db96d56Sopenharmony_ci    /* Create heap types */
29957db96d56Sopenharmony_ci    state = get_sre_module_state(m);
29967db96d56Sopenharmony_ci    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
29977db96d56Sopenharmony_ci    CREATE_TYPE(m, state->Match_Type, &match_spec);
29987db96d56Sopenharmony_ci    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
29997db96d56Sopenharmony_ci
30007db96d56Sopenharmony_ci    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
30017db96d56Sopenharmony_ci        goto error;
30027db96d56Sopenharmony_ci    }
30037db96d56Sopenharmony_ci
30047db96d56Sopenharmony_ci    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
30057db96d56Sopenharmony_ci        goto error;
30067db96d56Sopenharmony_ci    }
30077db96d56Sopenharmony_ci
30087db96d56Sopenharmony_ci    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
30097db96d56Sopenharmony_ci    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
30107db96d56Sopenharmony_ci
30117db96d56Sopenharmony_ci    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
30127db96d56Sopenharmony_ci        goto error;
30137db96d56Sopenharmony_ci    }
30147db96d56Sopenharmony_ci
30157db96d56Sopenharmony_ci    return 0;
30167db96d56Sopenharmony_ci
30177db96d56Sopenharmony_cierror:
30187db96d56Sopenharmony_ci    return -1;
30197db96d56Sopenharmony_ci}
30207db96d56Sopenharmony_ci
30217db96d56Sopenharmony_cistatic PyModuleDef_Slot sre_slots[] = {
30227db96d56Sopenharmony_ci    {Py_mod_exec, sre_exec},
30237db96d56Sopenharmony_ci    {0, NULL},
30247db96d56Sopenharmony_ci};
30257db96d56Sopenharmony_ci
30267db96d56Sopenharmony_cistatic struct PyModuleDef sremodule = {
30277db96d56Sopenharmony_ci    .m_base = PyModuleDef_HEAD_INIT,
30287db96d56Sopenharmony_ci    .m_name = "_" SRE_MODULE,
30297db96d56Sopenharmony_ci    .m_size = sizeof(_sremodulestate),
30307db96d56Sopenharmony_ci    .m_methods = _functions,
30317db96d56Sopenharmony_ci    .m_slots = sre_slots,
30327db96d56Sopenharmony_ci    .m_traverse = sre_traverse,
30337db96d56Sopenharmony_ci    .m_free = sre_free,
30347db96d56Sopenharmony_ci    .m_clear = sre_clear,
30357db96d56Sopenharmony_ci};
30367db96d56Sopenharmony_ci
30377db96d56Sopenharmony_ciPyMODINIT_FUNC
30387db96d56Sopenharmony_ciPyInit__sre(void)
30397db96d56Sopenharmony_ci{
30407db96d56Sopenharmony_ci    return PyModuleDef_Init(&sremodule);
30417db96d56Sopenharmony_ci}
30427db96d56Sopenharmony_ci
30437db96d56Sopenharmony_ci/* vim:ts=4:sw=4:et
30447db96d56Sopenharmony_ci*/
3045