xref: /third_party/python/Modules/_sre/sre.c (revision 7db96d56)
1/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl   created (based on existing template matcher code)
8 * 2000-03-06 fl   first alpha, sort of
9 * 2000-08-01 fl   fixes for 1.6b1
10 * 2000-08-07 fl   use PyOS_CheckStack() if available
11 * 2000-09-20 fl   added expand method
12 * 2001-03-20 fl   lots of fixes for 2.1b2
13 * 2001-04-15 fl   export copyright as Python attribute, not global
14 * 2001-04-28 fl   added __copy__ methods (work in progress)
15 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl   added sub/subn primitive
20 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl   fixed empty sub/subn return type
23 * 2003-04-18 mvl  fully support 4-byte codes
24 * 2003-10-17 gn   implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license.  For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38static const char copyright[] =
39    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41#define PY_SSIZE_T_CLEAN
42
43#include "Python.h"
44#include "pycore_long.h"          // _PyLong_GetZero()
45#include "pycore_moduleobject.h"  // _PyModule_GetState()
46#include "structmember.h"         // PyMemberDef
47
48#include "sre.h"
49
50#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
51
52#include <ctype.h>
53
54/* name of this module, minus the leading underscore */
55#if !defined(SRE_MODULE)
56#define SRE_MODULE "sre"
57#endif
58
59#define SRE_PY_MODULE "re"
60
61/* defining this one enables tracing */
62#undef VERBOSE
63
64/* -------------------------------------------------------------------- */
65
66#if defined(_MSC_VER)
67#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
68#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
69/* fastest possible local call under MSVC */
70#define LOCAL(type) static __inline type __fastcall
71#else
72#define LOCAL(type) static inline type
73#endif
74
75/* error codes */
76#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
77#define SRE_ERROR_STATE -2 /* illegal state */
78#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
79#define SRE_ERROR_MEMORY -9 /* out of memory */
80#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
81
82#if defined(VERBOSE)
83#define TRACE(v) printf v
84#else
85#define TRACE(v)
86#endif
87
88/* -------------------------------------------------------------------- */
89/* search engine state */
90
91#define SRE_IS_DIGIT(ch)\
92    ((ch) <= '9' && Py_ISDIGIT(ch))
93#define SRE_IS_SPACE(ch)\
94    ((ch) <= ' ' && Py_ISSPACE(ch))
95#define SRE_IS_LINEBREAK(ch)\
96    ((ch) == '\n')
97#define SRE_IS_WORD(ch)\
98    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
99
100static unsigned int sre_lower_ascii(unsigned int ch)
101{
102    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
103}
104
105/* locale-specific character predicates */
106/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
107 * warnings when c's type supports only numbers < N+1 */
108#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
109#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
110
111static unsigned int sre_lower_locale(unsigned int ch)
112{
113    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
114}
115
116static unsigned int sre_upper_locale(unsigned int ch)
117{
118    return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
119}
120
121/* unicode-specific character predicates */
122
123#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
124#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
125#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
126#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
127#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
128
129static unsigned int sre_lower_unicode(unsigned int ch)
130{
131    return (unsigned int) Py_UNICODE_TOLOWER(ch);
132}
133
134static unsigned int sre_upper_unicode(unsigned int ch)
135{
136    return (unsigned int) Py_UNICODE_TOUPPER(ch);
137}
138
139LOCAL(int)
140sre_category(SRE_CODE category, unsigned int ch)
141{
142    switch (category) {
143
144    case SRE_CATEGORY_DIGIT:
145        return SRE_IS_DIGIT(ch);
146    case SRE_CATEGORY_NOT_DIGIT:
147        return !SRE_IS_DIGIT(ch);
148    case SRE_CATEGORY_SPACE:
149        return SRE_IS_SPACE(ch);
150    case SRE_CATEGORY_NOT_SPACE:
151        return !SRE_IS_SPACE(ch);
152    case SRE_CATEGORY_WORD:
153        return SRE_IS_WORD(ch);
154    case SRE_CATEGORY_NOT_WORD:
155        return !SRE_IS_WORD(ch);
156    case SRE_CATEGORY_LINEBREAK:
157        return SRE_IS_LINEBREAK(ch);
158    case SRE_CATEGORY_NOT_LINEBREAK:
159        return !SRE_IS_LINEBREAK(ch);
160
161    case SRE_CATEGORY_LOC_WORD:
162        return SRE_LOC_IS_WORD(ch);
163    case SRE_CATEGORY_LOC_NOT_WORD:
164        return !SRE_LOC_IS_WORD(ch);
165
166    case SRE_CATEGORY_UNI_DIGIT:
167        return SRE_UNI_IS_DIGIT(ch);
168    case SRE_CATEGORY_UNI_NOT_DIGIT:
169        return !SRE_UNI_IS_DIGIT(ch);
170    case SRE_CATEGORY_UNI_SPACE:
171        return SRE_UNI_IS_SPACE(ch);
172    case SRE_CATEGORY_UNI_NOT_SPACE:
173        return !SRE_UNI_IS_SPACE(ch);
174    case SRE_CATEGORY_UNI_WORD:
175        return SRE_UNI_IS_WORD(ch);
176    case SRE_CATEGORY_UNI_NOT_WORD:
177        return !SRE_UNI_IS_WORD(ch);
178    case SRE_CATEGORY_UNI_LINEBREAK:
179        return SRE_UNI_IS_LINEBREAK(ch);
180    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
181        return !SRE_UNI_IS_LINEBREAK(ch);
182    }
183    return 0;
184}
185
186LOCAL(int)
187char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
188{
189    return ch == pattern
190        || (SRE_CODE) sre_lower_locale(ch) == pattern
191        || (SRE_CODE) sre_upper_locale(ch) == pattern;
192}
193
194
195/* helpers */
196
197static void
198data_stack_dealloc(SRE_STATE* state)
199{
200    if (state->data_stack) {
201        PyMem_Free(state->data_stack);
202        state->data_stack = NULL;
203    }
204    state->data_stack_size = state->data_stack_base = 0;
205}
206
207static int
208data_stack_grow(SRE_STATE* state, Py_ssize_t size)
209{
210    Py_ssize_t minsize, cursize;
211    minsize = state->data_stack_base+size;
212    cursize = state->data_stack_size;
213    if (cursize < minsize) {
214        void* stack;
215        cursize = minsize+minsize/4+1024;
216        TRACE(("allocate/grow stack %zd\n", cursize));
217        stack = PyMem_Realloc(state->data_stack, cursize);
218        if (!stack) {
219            data_stack_dealloc(state);
220            return SRE_ERROR_MEMORY;
221        }
222        state->data_stack = (char *)stack;
223        state->data_stack_size = cursize;
224    }
225    return 0;
226}
227
228/* generate 8-bit version */
229
230#define SRE_CHAR Py_UCS1
231#define SIZEOF_SRE_CHAR 1
232#define SRE(F) sre_ucs1_##F
233#include "sre_lib.h"
234
235/* generate 16-bit unicode version */
236
237#define SRE_CHAR Py_UCS2
238#define SIZEOF_SRE_CHAR 2
239#define SRE(F) sre_ucs2_##F
240#include "sre_lib.h"
241
242/* generate 32-bit unicode version */
243
244#define SRE_CHAR Py_UCS4
245#define SIZEOF_SRE_CHAR 4
246#define SRE(F) sre_ucs4_##F
247#include "sre_lib.h"
248
249/* -------------------------------------------------------------------- */
250/* factories and destructors */
251
252/* module state */
253typedef struct {
254    PyTypeObject *Pattern_Type;
255    PyTypeObject *Match_Type;
256    PyTypeObject *Scanner_Type;
257} _sremodulestate;
258
259static _sremodulestate *
260get_sre_module_state(PyObject *m)
261{
262    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
263    assert(state);
264    return state;
265}
266
267static struct PyModuleDef sremodule;
268#define get_sre_module_state_by_class(cls) \
269    (get_sre_module_state(PyType_GetModule(cls)))
270
271/* see sre.h for object declarations */
272static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
273static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
274
275/*[clinic input]
276module _sre
277class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
278class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
279class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
280[clinic start generated code]*/
281/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
282
283/*[clinic input]
284_sre.getcodesize -> int
285[clinic start generated code]*/
286
287static int
288_sre_getcodesize_impl(PyObject *module)
289/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
290{
291    return sizeof(SRE_CODE);
292}
293
294/*[clinic input]
295_sre.ascii_iscased -> bool
296
297    character: int
298    /
299
300[clinic start generated code]*/
301
302static int
303_sre_ascii_iscased_impl(PyObject *module, int character)
304/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
305{
306    unsigned int ch = (unsigned int)character;
307    return ch < 128 && Py_ISALPHA(ch);
308}
309
310/*[clinic input]
311_sre.unicode_iscased -> bool
312
313    character: int
314    /
315
316[clinic start generated code]*/
317
318static int
319_sre_unicode_iscased_impl(PyObject *module, int character)
320/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
321{
322    unsigned int ch = (unsigned int)character;
323    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
324}
325
326/*[clinic input]
327_sre.ascii_tolower -> int
328
329    character: int
330    /
331
332[clinic start generated code]*/
333
334static int
335_sre_ascii_tolower_impl(PyObject *module, int character)
336/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
337{
338    return sre_lower_ascii(character);
339}
340
341/*[clinic input]
342_sre.unicode_tolower -> int
343
344    character: int
345    /
346
347[clinic start generated code]*/
348
349static int
350_sre_unicode_tolower_impl(PyObject *module, int character)
351/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
352{
353    return sre_lower_unicode(character);
354}
355
356LOCAL(void)
357state_reset(SRE_STATE* state)
358{
359    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
360    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
361
362    state->lastmark = -1;
363    state->lastindex = -1;
364
365    state->repeat = NULL;
366
367    data_stack_dealloc(state);
368}
369
370static const void*
371getstring(PyObject* string, Py_ssize_t* p_length,
372          int* p_isbytes, int* p_charsize,
373          Py_buffer *view)
374{
375    /* given a python object, return a data pointer, a length (in
376       characters), and a character size.  return NULL if the object
377       is not a string (or not compatible) */
378
379    /* Unicode objects do not support the buffer API. So, get the data
380       directly instead. */
381    if (PyUnicode_Check(string)) {
382        if (PyUnicode_READY(string) == -1)
383            return NULL;
384        *p_length = PyUnicode_GET_LENGTH(string);
385        *p_charsize = PyUnicode_KIND(string);
386        *p_isbytes = 0;
387        return PyUnicode_DATA(string);
388    }
389
390    /* get pointer to byte string buffer */
391    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
392        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
393                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
394        return NULL;
395    }
396
397    *p_length = view->len;
398    *p_charsize = 1;
399    *p_isbytes = 1;
400
401    if (view->buf == NULL) {
402        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
403        PyBuffer_Release(view);
404        view->buf = NULL;
405        return NULL;
406    }
407    return view->buf;
408}
409
410LOCAL(PyObject*)
411state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
412           Py_ssize_t start, Py_ssize_t end)
413{
414    /* prepare state object */
415
416    Py_ssize_t length;
417    int isbytes, charsize;
418    const void* ptr;
419
420    memset(state, 0, sizeof(SRE_STATE));
421
422    state->mark = PyMem_New(const void *, pattern->groups * 2);
423    if (!state->mark) {
424        PyErr_NoMemory();
425        goto err;
426    }
427    state->lastmark = -1;
428    state->lastindex = -1;
429
430    state->buffer.buf = NULL;
431    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
432    if (!ptr)
433        goto err;
434
435    if (isbytes && pattern->isbytes == 0) {
436        PyErr_SetString(PyExc_TypeError,
437                        "cannot use a string pattern on a bytes-like object");
438        goto err;
439    }
440    if (!isbytes && pattern->isbytes > 0) {
441        PyErr_SetString(PyExc_TypeError,
442                        "cannot use a bytes pattern on a string-like object");
443        goto err;
444    }
445
446    /* adjust boundaries */
447    if (start < 0)
448        start = 0;
449    else if (start > length)
450        start = length;
451
452    if (end < 0)
453        end = 0;
454    else if (end > length)
455        end = length;
456
457    state->isbytes = isbytes;
458    state->charsize = charsize;
459    state->match_all = 0;
460    state->must_advance = 0;
461
462    state->beginning = ptr;
463
464    state->start = (void*) ((char*) ptr + start * state->charsize);
465    state->end = (void*) ((char*) ptr + end * state->charsize);
466
467    Py_INCREF(string);
468    state->string = string;
469    state->pos = start;
470    state->endpos = end;
471
472    return string;
473  err:
474    /* We add an explicit cast here because MSVC has a bug when
475       compiling C code where it believes that `const void**` cannot be
476       safely casted to `void*`, see bpo-39943 for details. */
477    PyMem_Free((void*) state->mark);
478    state->mark = NULL;
479    if (state->buffer.buf)
480        PyBuffer_Release(&state->buffer);
481    return NULL;
482}
483
484LOCAL(void)
485state_fini(SRE_STATE* state)
486{
487    if (state->buffer.buf)
488        PyBuffer_Release(&state->buffer);
489    Py_XDECREF(state->string);
490    data_stack_dealloc(state);
491    /* See above PyMem_Del for why we explicitly cast here. */
492    PyMem_Free((void*) state->mark);
493    state->mark = NULL;
494}
495
496/* calculate offset from start of string */
497#define STATE_OFFSET(state, member)\
498    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
499
500LOCAL(PyObject*)
501getslice(int isbytes, const void *ptr,
502         PyObject* string, Py_ssize_t start, Py_ssize_t end)
503{
504    if (isbytes) {
505        if (PyBytes_CheckExact(string) &&
506            start == 0 && end == PyBytes_GET_SIZE(string)) {
507            Py_INCREF(string);
508            return string;
509        }
510        return PyBytes_FromStringAndSize(
511                (const char *)ptr + start, end - start);
512    }
513    else {
514        return PyUnicode_Substring(string, start, end);
515    }
516}
517
518LOCAL(PyObject*)
519state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
520{
521    Py_ssize_t i, j;
522
523    index = (index - 1) * 2;
524
525    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
526        if (empty)
527            /* want empty string */
528            i = j = 0;
529        else {
530            Py_RETURN_NONE;
531        }
532    } else {
533        i = STATE_OFFSET(state, state->mark[index]);
534        j = STATE_OFFSET(state, state->mark[index+1]);
535
536        /* check wrong span */
537        if (i > j) {
538            PyErr_SetString(PyExc_SystemError,
539                            "The span of capturing group is wrong,"
540                            " please report a bug for the re module.");
541            return NULL;
542        }
543    }
544
545    return getslice(state->isbytes, state->beginning, string, i, j);
546}
547
548static void
549pattern_error(Py_ssize_t status)
550{
551    switch (status) {
552    case SRE_ERROR_RECURSION_LIMIT:
553        /* This error code seems to be unused. */
554        PyErr_SetString(
555            PyExc_RecursionError,
556            "maximum recursion limit exceeded"
557            );
558        break;
559    case SRE_ERROR_MEMORY:
560        PyErr_NoMemory();
561        break;
562    case SRE_ERROR_INTERRUPTED:
563    /* An exception has already been raised, so let it fly */
564        break;
565    default:
566        /* other error codes indicate compiler/engine bugs */
567        PyErr_SetString(
568            PyExc_RuntimeError,
569            "internal error in regular expression engine"
570            );
571    }
572}
573
574static int
575pattern_traverse(PatternObject *self, visitproc visit, void *arg)
576{
577    Py_VISIT(Py_TYPE(self));
578    Py_VISIT(self->groupindex);
579    Py_VISIT(self->indexgroup);
580    Py_VISIT(self->pattern);
581    return 0;
582}
583
584static int
585pattern_clear(PatternObject *self)
586{
587    Py_CLEAR(self->groupindex);
588    Py_CLEAR(self->indexgroup);
589    Py_CLEAR(self->pattern);
590    return 0;
591}
592
593static void
594pattern_dealloc(PatternObject* self)
595{
596    PyTypeObject *tp = Py_TYPE(self);
597
598    PyObject_GC_UnTrack(self);
599    if (self->weakreflist != NULL) {
600        PyObject_ClearWeakRefs((PyObject *) self);
601    }
602    (void)pattern_clear(self);
603    tp->tp_free(self);
604    Py_DECREF(tp);
605}
606
607LOCAL(Py_ssize_t)
608sre_match(SRE_STATE* state, SRE_CODE* pattern)
609{
610    if (state->charsize == 1)
611        return sre_ucs1_match(state, pattern, 1);
612    if (state->charsize == 2)
613        return sre_ucs2_match(state, pattern, 1);
614    assert(state->charsize == 4);
615    return sre_ucs4_match(state, pattern, 1);
616}
617
618LOCAL(Py_ssize_t)
619sre_search(SRE_STATE* state, SRE_CODE* pattern)
620{
621    if (state->charsize == 1)
622        return sre_ucs1_search(state, pattern);
623    if (state->charsize == 2)
624        return sre_ucs2_search(state, pattern);
625    assert(state->charsize == 4);
626    return sre_ucs4_search(state, pattern);
627}
628
629/*[clinic input]
630_sre.SRE_Pattern.match
631
632    cls: defining_class
633    /
634    string: object
635    pos: Py_ssize_t = 0
636    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
637
638Matches zero or more characters at the beginning of the string.
639[clinic start generated code]*/
640
641static PyObject *
642_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
643                            PyObject *string, Py_ssize_t pos,
644                            Py_ssize_t endpos)
645/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
646{
647    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
648    SRE_STATE state;
649    Py_ssize_t status;
650    PyObject *match;
651
652    if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
653        return NULL;
654
655    state.ptr = state.start;
656
657    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
658
659    status = sre_match(&state, PatternObject_GetCode(self));
660
661    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
662    if (PyErr_Occurred()) {
663        state_fini(&state);
664        return NULL;
665    }
666
667    match = pattern_new_match(module_state, self, &state, status);
668    state_fini(&state);
669    return match;
670}
671
672/*[clinic input]
673_sre.SRE_Pattern.fullmatch
674
675    cls: defining_class
676    /
677    string: object
678    pos: Py_ssize_t = 0
679    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
680
681Matches against all of the string.
682[clinic start generated code]*/
683
684static PyObject *
685_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
686                                PyObject *string, Py_ssize_t pos,
687                                Py_ssize_t endpos)
688/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
689{
690    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
691    SRE_STATE state;
692    Py_ssize_t status;
693    PyObject *match;
694
695    if (!state_init(&state, self, string, pos, endpos))
696        return NULL;
697
698    state.ptr = state.start;
699
700    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
701
702    state.match_all = 1;
703    status = sre_match(&state, PatternObject_GetCode(self));
704
705    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
706    if (PyErr_Occurred()) {
707        state_fini(&state);
708        return NULL;
709    }
710
711    match = pattern_new_match(module_state, self, &state, status);
712    state_fini(&state);
713    return match;
714}
715
716/*[clinic input]
717_sre.SRE_Pattern.search
718
719    cls: defining_class
720    /
721    string: object
722    pos: Py_ssize_t = 0
723    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
724
725Scan through string looking for a match, and return a corresponding match object instance.
726
727Return None if no position in the string matches.
728[clinic start generated code]*/
729
730static PyObject *
731_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
732                             PyObject *string, Py_ssize_t pos,
733                             Py_ssize_t endpos)
734/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
735{
736    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
737    SRE_STATE state;
738    Py_ssize_t status;
739    PyObject *match;
740
741    if (!state_init(&state, self, string, pos, endpos))
742        return NULL;
743
744    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
745
746    status = sre_search(&state, PatternObject_GetCode(self));
747
748    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
749
750    if (PyErr_Occurred()) {
751        state_fini(&state);
752        return NULL;
753    }
754
755    match = pattern_new_match(module_state, self, &state, status);
756    state_fini(&state);
757    return match;
758}
759
760static PyObject*
761call(const char* module, const char* function, PyObject* args)
762{
763    PyObject* name;
764    PyObject* mod;
765    PyObject* func;
766    PyObject* result;
767
768    if (!args)
769        return NULL;
770    name = PyUnicode_FromString(module);
771    if (!name)
772        return NULL;
773    mod = PyImport_Import(name);
774    Py_DECREF(name);
775    if (!mod)
776        return NULL;
777    func = PyObject_GetAttrString(mod, function);
778    Py_DECREF(mod);
779    if (!func)
780        return NULL;
781    result = PyObject_CallObject(func, args);
782    Py_DECREF(func);
783    Py_DECREF(args);
784    return result;
785}
786
787/*[clinic input]
788_sre.SRE_Pattern.findall
789
790    string: object
791    pos: Py_ssize_t = 0
792    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
793
794Return a list of all non-overlapping matches of pattern in string.
795[clinic start generated code]*/
796
797static PyObject *
798_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
799                              Py_ssize_t pos, Py_ssize_t endpos)
800/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
801{
802    SRE_STATE state;
803    PyObject* list;
804    Py_ssize_t status;
805    Py_ssize_t i, b, e;
806
807    if (!state_init(&state, self, string, pos, endpos))
808        return NULL;
809
810    list = PyList_New(0);
811    if (!list) {
812        state_fini(&state);
813        return NULL;
814    }
815
816    while (state.start <= state.end) {
817
818        PyObject* item;
819
820        state_reset(&state);
821
822        state.ptr = state.start;
823
824        status = sre_search(&state, PatternObject_GetCode(self));
825        if (PyErr_Occurred())
826            goto error;
827
828        if (status <= 0) {
829            if (status == 0)
830                break;
831            pattern_error(status);
832            goto error;
833        }
834
835        /* don't bother to build a match object */
836        switch (self->groups) {
837        case 0:
838            b = STATE_OFFSET(&state, state.start);
839            e = STATE_OFFSET(&state, state.ptr);
840            item = getslice(state.isbytes, state.beginning,
841                            string, b, e);
842            if (!item)
843                goto error;
844            break;
845        case 1:
846            item = state_getslice(&state, 1, string, 1);
847            if (!item)
848                goto error;
849            break;
850        default:
851            item = PyTuple_New(self->groups);
852            if (!item)
853                goto error;
854            for (i = 0; i < self->groups; i++) {
855                PyObject* o = state_getslice(&state, i+1, string, 1);
856                if (!o) {
857                    Py_DECREF(item);
858                    goto error;
859                }
860                PyTuple_SET_ITEM(item, i, o);
861            }
862            break;
863        }
864
865        status = PyList_Append(list, item);
866        Py_DECREF(item);
867        if (status < 0)
868            goto error;
869
870        state.must_advance = (state.ptr == state.start);
871        state.start = state.ptr;
872    }
873
874    state_fini(&state);
875    return list;
876
877error:
878    Py_DECREF(list);
879    state_fini(&state);
880    return NULL;
881
882}
883
884/*[clinic input]
885_sre.SRE_Pattern.finditer
886
887    cls: defining_class
888    /
889    string: object
890    pos: Py_ssize_t = 0
891    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
892
893Return an iterator over all non-overlapping matches for the RE pattern in string.
894
895For each match, the iterator returns a match object.
896[clinic start generated code]*/
897
898static PyObject *
899_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
900                               PyObject *string, Py_ssize_t pos,
901                               Py_ssize_t endpos)
902/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
903{
904    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
905    PyObject* scanner;
906    PyObject* search;
907    PyObject* iterator;
908
909    scanner = pattern_scanner(module_state, self, string, pos, endpos);
910    if (!scanner)
911        return NULL;
912
913    search = PyObject_GetAttrString(scanner, "search");
914    Py_DECREF(scanner);
915    if (!search)
916        return NULL;
917
918    iterator = PyCallIter_New(search, Py_None);
919    Py_DECREF(search);
920
921    return iterator;
922}
923
924/*[clinic input]
925_sre.SRE_Pattern.scanner
926
927    cls: defining_class
928    /
929    string: object
930    pos: Py_ssize_t = 0
931    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
932
933[clinic start generated code]*/
934
935static PyObject *
936_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
937                              PyObject *string, Py_ssize_t pos,
938                              Py_ssize_t endpos)
939/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
940{
941    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
942
943    return pattern_scanner(module_state, self, string, pos, endpos);
944}
945
946/*[clinic input]
947_sre.SRE_Pattern.split
948
949    string: object
950    maxsplit: Py_ssize_t = 0
951
952Split string by the occurrences of pattern.
953[clinic start generated code]*/
954
955static PyObject *
956_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
957                            Py_ssize_t maxsplit)
958/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
959{
960    SRE_STATE state;
961    PyObject* list;
962    PyObject* item;
963    Py_ssize_t status;
964    Py_ssize_t n;
965    Py_ssize_t i;
966    const void* last;
967
968    assert(self->codesize != 0);
969
970    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
971        return NULL;
972
973    list = PyList_New(0);
974    if (!list) {
975        state_fini(&state);
976        return NULL;
977    }
978
979    n = 0;
980    last = state.start;
981
982    while (!maxsplit || n < maxsplit) {
983
984        state_reset(&state);
985
986        state.ptr = state.start;
987
988        status = sre_search(&state, PatternObject_GetCode(self));
989        if (PyErr_Occurred())
990            goto error;
991
992        if (status <= 0) {
993            if (status == 0)
994                break;
995            pattern_error(status);
996            goto error;
997        }
998
999        /* get segment before this match */
1000        item = getslice(state.isbytes, state.beginning,
1001            string, STATE_OFFSET(&state, last),
1002            STATE_OFFSET(&state, state.start)
1003            );
1004        if (!item)
1005            goto error;
1006        status = PyList_Append(list, item);
1007        Py_DECREF(item);
1008        if (status < 0)
1009            goto error;
1010
1011        /* add groups (if any) */
1012        for (i = 0; i < self->groups; i++) {
1013            item = state_getslice(&state, i+1, string, 0);
1014            if (!item)
1015                goto error;
1016            status = PyList_Append(list, item);
1017            Py_DECREF(item);
1018            if (status < 0)
1019                goto error;
1020        }
1021
1022        n = n + 1;
1023        state.must_advance = (state.ptr == state.start);
1024        last = state.start = state.ptr;
1025
1026    }
1027
1028    /* get segment following last match (even if empty) */
1029    item = getslice(state.isbytes, state.beginning,
1030        string, STATE_OFFSET(&state, last), state.endpos
1031        );
1032    if (!item)
1033        goto error;
1034    status = PyList_Append(list, item);
1035    Py_DECREF(item);
1036    if (status < 0)
1037        goto error;
1038
1039    state_fini(&state);
1040    return list;
1041
1042error:
1043    Py_DECREF(list);
1044    state_fini(&state);
1045    return NULL;
1046
1047}
1048
1049static PyObject*
1050pattern_subx(_sremodulestate* module_state,
1051             PatternObject* self,
1052             PyObject* ptemplate,
1053             PyObject* string,
1054             Py_ssize_t count,
1055             Py_ssize_t subn)
1056{
1057    SRE_STATE state;
1058    PyObject* list;
1059    PyObject* joiner;
1060    PyObject* item;
1061    PyObject* filter;
1062    PyObject* match;
1063    const void* ptr;
1064    Py_ssize_t status;
1065    Py_ssize_t n;
1066    Py_ssize_t i, b, e;
1067    int isbytes, charsize;
1068    int filter_is_callable;
1069    Py_buffer view;
1070
1071    if (PyCallable_Check(ptemplate)) {
1072        /* sub/subn takes either a function or a template */
1073        filter = ptemplate;
1074        Py_INCREF(filter);
1075        filter_is_callable = 1;
1076    } else {
1077        /* if not callable, check if it's a literal string */
1078        int literal;
1079        view.buf = NULL;
1080        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1081        if (ptr) {
1082            if (charsize == 1)
1083                literal = memchr(ptr, '\\', n) == NULL;
1084            else
1085                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1086        } else {
1087            PyErr_Clear();
1088            literal = 0;
1089        }
1090        if (view.buf)
1091            PyBuffer_Release(&view);
1092        if (literal) {
1093            filter = ptemplate;
1094            Py_INCREF(filter);
1095            filter_is_callable = 0;
1096        } else {
1097            /* not a literal; hand it over to the template compiler */
1098            filter = call(
1099                SRE_PY_MODULE, "_subx",
1100                PyTuple_Pack(2, self, ptemplate)
1101                );
1102            if (!filter)
1103                return NULL;
1104            filter_is_callable = PyCallable_Check(filter);
1105        }
1106    }
1107
1108    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1109        Py_DECREF(filter);
1110        return NULL;
1111    }
1112
1113    list = PyList_New(0);
1114    if (!list) {
1115        Py_DECREF(filter);
1116        state_fini(&state);
1117        return NULL;
1118    }
1119
1120    n = i = 0;
1121
1122    while (!count || n < count) {
1123
1124        state_reset(&state);
1125
1126        state.ptr = state.start;
1127
1128        status = sre_search(&state, PatternObject_GetCode(self));
1129        if (PyErr_Occurred())
1130            goto error;
1131
1132        if (status <= 0) {
1133            if (status == 0)
1134                break;
1135            pattern_error(status);
1136            goto error;
1137        }
1138
1139        b = STATE_OFFSET(&state, state.start);
1140        e = STATE_OFFSET(&state, state.ptr);
1141
1142        if (i < b) {
1143            /* get segment before this match */
1144            item = getslice(state.isbytes, state.beginning,
1145                string, i, b);
1146            if (!item)
1147                goto error;
1148            status = PyList_Append(list, item);
1149            Py_DECREF(item);
1150            if (status < 0)
1151                goto error;
1152
1153        }
1154
1155        if (filter_is_callable) {
1156            /* pass match object through filter */
1157            match = pattern_new_match(module_state, self, &state, 1);
1158            if (!match)
1159                goto error;
1160            item = PyObject_CallOneArg(filter, match);
1161            Py_DECREF(match);
1162            if (!item)
1163                goto error;
1164        } else {
1165            /* filter is literal string */
1166            item = filter;
1167            Py_INCREF(item);
1168        }
1169
1170        /* add to list */
1171        if (item != Py_None) {
1172            status = PyList_Append(list, item);
1173            Py_DECREF(item);
1174            if (status < 0)
1175                goto error;
1176        }
1177
1178        i = e;
1179        n = n + 1;
1180        state.must_advance = (state.ptr == state.start);
1181        state.start = state.ptr;
1182    }
1183
1184    /* get segment following last match */
1185    if (i < state.endpos) {
1186        item = getslice(state.isbytes, state.beginning,
1187                        string, i, state.endpos);
1188        if (!item)
1189            goto error;
1190        status = PyList_Append(list, item);
1191        Py_DECREF(item);
1192        if (status < 0)
1193            goto error;
1194    }
1195
1196    state_fini(&state);
1197
1198    Py_DECREF(filter);
1199
1200    /* convert list to single string (also removes list) */
1201    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1202    if (!joiner) {
1203        Py_DECREF(list);
1204        return NULL;
1205    }
1206    if (PyList_GET_SIZE(list) == 0) {
1207        Py_DECREF(list);
1208        item = joiner;
1209    }
1210    else {
1211        if (state.isbytes)
1212            item = _PyBytes_Join(joiner, list);
1213        else
1214            item = PyUnicode_Join(joiner, list);
1215        Py_DECREF(joiner);
1216        Py_DECREF(list);
1217        if (!item)
1218            return NULL;
1219    }
1220
1221    if (subn)
1222        return Py_BuildValue("Nn", item, n);
1223
1224    return item;
1225
1226error:
1227    Py_DECREF(list);
1228    state_fini(&state);
1229    Py_DECREF(filter);
1230    return NULL;
1231
1232}
1233
1234/*[clinic input]
1235_sre.SRE_Pattern.sub
1236
1237    cls: defining_class
1238    /
1239    repl: object
1240    string: object
1241    count: Py_ssize_t = 0
1242
1243Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1244[clinic start generated code]*/
1245
1246static PyObject *
1247_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1248                          PyObject *repl, PyObject *string, Py_ssize_t count)
1249/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1250{
1251    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1252
1253    return pattern_subx(module_state, self, repl, string, count, 0);
1254}
1255
1256/*[clinic input]
1257_sre.SRE_Pattern.subn
1258
1259    cls: defining_class
1260    /
1261    repl: object
1262    string: object
1263    count: Py_ssize_t = 0
1264
1265Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1270                           PyObject *repl, PyObject *string,
1271                           Py_ssize_t count)
1272/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1273{
1274    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1275
1276    return pattern_subx(module_state, self, repl, string, count, 1);
1277}
1278
1279/*[clinic input]
1280_sre.SRE_Pattern.__copy__
1281
1282[clinic start generated code]*/
1283
1284static PyObject *
1285_sre_SRE_Pattern___copy___impl(PatternObject *self)
1286/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1287{
1288    Py_INCREF(self);
1289    return (PyObject *)self;
1290}
1291
1292/*[clinic input]
1293_sre.SRE_Pattern.__deepcopy__
1294
1295    memo: object
1296    /
1297
1298[clinic start generated code]*/
1299
1300static PyObject *
1301_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1302/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1303{
1304    Py_INCREF(self);
1305    return (PyObject *)self;
1306}
1307
1308static PyObject *
1309pattern_repr(PatternObject *obj)
1310{
1311    static const struct {
1312        const char *name;
1313        int value;
1314    } flag_names[] = {
1315        {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1316        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1317        {"re.LOCALE", SRE_FLAG_LOCALE},
1318        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1319        {"re.DOTALL", SRE_FLAG_DOTALL},
1320        {"re.UNICODE", SRE_FLAG_UNICODE},
1321        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1322        {"re.DEBUG", SRE_FLAG_DEBUG},
1323        {"re.ASCII", SRE_FLAG_ASCII},
1324    };
1325    PyObject *result = NULL;
1326    PyObject *flag_items;
1327    size_t i;
1328    int flags = obj->flags;
1329
1330    /* Omit re.UNICODE for valid string patterns. */
1331    if (obj->isbytes == 0 &&
1332        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1333         SRE_FLAG_UNICODE)
1334        flags &= ~SRE_FLAG_UNICODE;
1335
1336    flag_items = PyList_New(0);
1337    if (!flag_items)
1338        return NULL;
1339
1340    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1341        if (flags & flag_names[i].value) {
1342            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1343            if (!item)
1344                goto done;
1345
1346            if (PyList_Append(flag_items, item) < 0) {
1347                Py_DECREF(item);
1348                goto done;
1349            }
1350            Py_DECREF(item);
1351            flags &= ~flag_names[i].value;
1352        }
1353    }
1354    if (flags) {
1355        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1356        if (!item)
1357            goto done;
1358
1359        if (PyList_Append(flag_items, item) < 0) {
1360            Py_DECREF(item);
1361            goto done;
1362        }
1363        Py_DECREF(item);
1364    }
1365
1366    if (PyList_Size(flag_items) > 0) {
1367        PyObject *flags_result;
1368        PyObject *sep = PyUnicode_FromString("|");
1369        if (!sep)
1370            goto done;
1371        flags_result = PyUnicode_Join(sep, flag_items);
1372        Py_DECREF(sep);
1373        if (!flags_result)
1374            goto done;
1375        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1376                                      obj->pattern, flags_result);
1377        Py_DECREF(flags_result);
1378    }
1379    else {
1380        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1381    }
1382
1383done:
1384    Py_DECREF(flag_items);
1385    return result;
1386}
1387
1388PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1389
1390/* PatternObject's 'groupindex' method. */
1391static PyObject *
1392pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1393{
1394    if (self->groupindex == NULL)
1395        return PyDict_New();
1396    return PyDictProxy_New(self->groupindex);
1397}
1398
1399static int _validate(PatternObject *self); /* Forward */
1400
1401/*[clinic input]
1402_sre.compile
1403
1404    pattern: object
1405    flags: int
1406    code: object(subclass_of='&PyList_Type')
1407    groups: Py_ssize_t
1408    groupindex: object(subclass_of='&PyDict_Type')
1409    indexgroup: object(subclass_of='&PyTuple_Type')
1410
1411[clinic start generated code]*/
1412
1413static PyObject *
1414_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1415                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1416                  PyObject *indexgroup)
1417/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1418{
1419    /* "compile" pattern descriptor to pattern object */
1420
1421    _sremodulestate *module_state = get_sre_module_state(module);
1422    PatternObject* self;
1423    Py_ssize_t i, n;
1424
1425    n = PyList_GET_SIZE(code);
1426    /* coverity[ampersand_in_size] */
1427    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1428    if (!self)
1429        return NULL;
1430    self->weakreflist = NULL;
1431    self->pattern = NULL;
1432    self->groupindex = NULL;
1433    self->indexgroup = NULL;
1434
1435    self->codesize = n;
1436
1437    for (i = 0; i < n; i++) {
1438        PyObject *o = PyList_GET_ITEM(code, i);
1439        unsigned long value = PyLong_AsUnsignedLong(o);
1440        self->code[i] = (SRE_CODE) value;
1441        if ((unsigned long) self->code[i] != value) {
1442            PyErr_SetString(PyExc_OverflowError,
1443                            "regular expression code size limit exceeded");
1444            break;
1445        }
1446    }
1447    PyObject_GC_Track(self);
1448
1449    if (PyErr_Occurred()) {
1450        Py_DECREF(self);
1451        return NULL;
1452    }
1453
1454    if (pattern == Py_None) {
1455        self->isbytes = -1;
1456    }
1457    else {
1458        Py_ssize_t p_length;
1459        int charsize;
1460        Py_buffer view;
1461        view.buf = NULL;
1462        if (!getstring(pattern, &p_length, &self->isbytes,
1463                       &charsize, &view)) {
1464            Py_DECREF(self);
1465            return NULL;
1466        }
1467        if (view.buf)
1468            PyBuffer_Release(&view);
1469    }
1470
1471    Py_INCREF(pattern);
1472    self->pattern = pattern;
1473
1474    self->flags = flags;
1475
1476    self->groups = groups;
1477
1478    if (PyDict_GET_SIZE(groupindex) > 0) {
1479        Py_INCREF(groupindex);
1480        self->groupindex = groupindex;
1481        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1482            Py_INCREF(indexgroup);
1483            self->indexgroup = indexgroup;
1484        }
1485    }
1486
1487    if (!_validate(self)) {
1488        Py_DECREF(self);
1489        return NULL;
1490    }
1491
1492    return (PyObject*) self;
1493}
1494
1495/* -------------------------------------------------------------------- */
1496/* Code validation */
1497
1498/* To learn more about this code, have a look at the _compile() function in
1499   Lib/sre_compile.py.  The validation functions below checks the code array
1500   for conformance with the code patterns generated there.
1501
1502   The nice thing about the generated code is that it is position-independent:
1503   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1504   the target of a later jump is always earlier than the target of an earlier
1505   jump.  IOW, this is okay:
1506
1507   J---------J-------T--------T
1508    \         \_____/        /
1509     \______________________/
1510
1511   but this is not:
1512
1513   J---------J-------T--------T
1514    \_________\_____/        /
1515               \____________/
1516
1517   It also helps that SRE_CODE is always an unsigned type.
1518*/
1519
1520/* Defining this one enables tracing of the validator */
1521#undef VVERBOSE
1522
1523/* Trace macro for the validator */
1524#if defined(VVERBOSE)
1525#define VTRACE(v) printf v
1526#else
1527#define VTRACE(v) do {} while(0)  /* do nothing */
1528#endif
1529
1530/* Report failure */
1531#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1532
1533/* Extract opcode, argument, or skip count from code array */
1534#define GET_OP                                          \
1535    do {                                                \
1536        VTRACE(("%p: ", code));                         \
1537        if (code >= end) FAIL;                          \
1538        op = *code++;                                   \
1539        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1540    } while (0)
1541#define GET_ARG                                         \
1542    do {                                                \
1543        VTRACE(("%p= ", code));                         \
1544        if (code >= end) FAIL;                          \
1545        arg = *code++;                                  \
1546        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1547    } while (0)
1548#define GET_SKIP_ADJ(adj)                               \
1549    do {                                                \
1550        VTRACE(("%p= ", code));                         \
1551        if (code >= end) FAIL;                          \
1552        skip = *code;                                   \
1553        VTRACE(("%lu (skip to %p)\n",                   \
1554               (unsigned long)skip, code+skip));        \
1555        if (skip-adj > (uintptr_t)(end - code))         \
1556            FAIL;                                       \
1557        code++;                                         \
1558    } while (0)
1559#define GET_SKIP GET_SKIP_ADJ(0)
1560
1561static int
1562_validate_charset(SRE_CODE *code, SRE_CODE *end)
1563{
1564    /* Some variables are manipulated by the macros above */
1565    SRE_CODE op;
1566    SRE_CODE arg;
1567    SRE_CODE offset;
1568    int i;
1569
1570    while (code < end) {
1571        GET_OP;
1572        switch (op) {
1573
1574        case SRE_OP_NEGATE:
1575            break;
1576
1577        case SRE_OP_LITERAL:
1578            GET_ARG;
1579            break;
1580
1581        case SRE_OP_RANGE:
1582        case SRE_OP_RANGE_UNI_IGNORE:
1583            GET_ARG;
1584            GET_ARG;
1585            break;
1586
1587        case SRE_OP_CHARSET:
1588            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1589            if (offset > (uintptr_t)(end - code))
1590                FAIL;
1591            code += offset;
1592            break;
1593
1594        case SRE_OP_BIGCHARSET:
1595            GET_ARG; /* Number of blocks */
1596            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1597            if (offset > (uintptr_t)(end - code))
1598                FAIL;
1599            /* Make sure that each byte points to a valid block */
1600            for (i = 0; i < 256; i++) {
1601                if (((unsigned char *)code)[i] >= arg)
1602                    FAIL;
1603            }
1604            code += offset;
1605            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1606            if (offset > (uintptr_t)(end - code))
1607                FAIL;
1608            code += offset;
1609            break;
1610
1611        case SRE_OP_CATEGORY:
1612            GET_ARG;
1613            switch (arg) {
1614            case SRE_CATEGORY_DIGIT:
1615            case SRE_CATEGORY_NOT_DIGIT:
1616            case SRE_CATEGORY_SPACE:
1617            case SRE_CATEGORY_NOT_SPACE:
1618            case SRE_CATEGORY_WORD:
1619            case SRE_CATEGORY_NOT_WORD:
1620            case SRE_CATEGORY_LINEBREAK:
1621            case SRE_CATEGORY_NOT_LINEBREAK:
1622            case SRE_CATEGORY_LOC_WORD:
1623            case SRE_CATEGORY_LOC_NOT_WORD:
1624            case SRE_CATEGORY_UNI_DIGIT:
1625            case SRE_CATEGORY_UNI_NOT_DIGIT:
1626            case SRE_CATEGORY_UNI_SPACE:
1627            case SRE_CATEGORY_UNI_NOT_SPACE:
1628            case SRE_CATEGORY_UNI_WORD:
1629            case SRE_CATEGORY_UNI_NOT_WORD:
1630            case SRE_CATEGORY_UNI_LINEBREAK:
1631            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1632                break;
1633            default:
1634                FAIL;
1635            }
1636            break;
1637
1638        default:
1639            FAIL;
1640
1641        }
1642    }
1643
1644    return 0;
1645}
1646
1647/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1648static int
1649_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1650{
1651    /* Some variables are manipulated by the macros above */
1652    SRE_CODE op;
1653    SRE_CODE arg;
1654    SRE_CODE skip;
1655
1656    VTRACE(("code=%p, end=%p\n", code, end));
1657
1658    if (code > end)
1659        FAIL;
1660
1661    while (code < end) {
1662        GET_OP;
1663        switch (op) {
1664
1665        case SRE_OP_MARK:
1666            /* We don't check whether marks are properly nested; the
1667               sre_match() code is robust even if they don't, and the worst
1668               you can get is nonsensical match results. */
1669            GET_ARG;
1670            if (arg > 2 * (size_t)groups + 1) {
1671                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1672                FAIL;
1673            }
1674            break;
1675
1676        case SRE_OP_LITERAL:
1677        case SRE_OP_NOT_LITERAL:
1678        case SRE_OP_LITERAL_IGNORE:
1679        case SRE_OP_NOT_LITERAL_IGNORE:
1680        case SRE_OP_LITERAL_UNI_IGNORE:
1681        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1682        case SRE_OP_LITERAL_LOC_IGNORE:
1683        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1684            GET_ARG;
1685            /* The arg is just a character, nothing to check */
1686            break;
1687
1688        case SRE_OP_SUCCESS:
1689        case SRE_OP_FAILURE:
1690            /* Nothing to check; these normally end the matching process */
1691            break;
1692
1693        case SRE_OP_AT:
1694            GET_ARG;
1695            switch (arg) {
1696            case SRE_AT_BEGINNING:
1697            case SRE_AT_BEGINNING_STRING:
1698            case SRE_AT_BEGINNING_LINE:
1699            case SRE_AT_END:
1700            case SRE_AT_END_LINE:
1701            case SRE_AT_END_STRING:
1702            case SRE_AT_BOUNDARY:
1703            case SRE_AT_NON_BOUNDARY:
1704            case SRE_AT_LOC_BOUNDARY:
1705            case SRE_AT_LOC_NON_BOUNDARY:
1706            case SRE_AT_UNI_BOUNDARY:
1707            case SRE_AT_UNI_NON_BOUNDARY:
1708                break;
1709            default:
1710                FAIL;
1711            }
1712            break;
1713
1714        case SRE_OP_ANY:
1715        case SRE_OP_ANY_ALL:
1716            /* These have no operands */
1717            break;
1718
1719        case SRE_OP_IN:
1720        case SRE_OP_IN_IGNORE:
1721        case SRE_OP_IN_UNI_IGNORE:
1722        case SRE_OP_IN_LOC_IGNORE:
1723            GET_SKIP;
1724            /* Stop 1 before the end; we check the FAILURE below */
1725            if (_validate_charset(code, code+skip-2))
1726                FAIL;
1727            if (code[skip-2] != SRE_OP_FAILURE)
1728                FAIL;
1729            code += skip-1;
1730            break;
1731
1732        case SRE_OP_INFO:
1733            {
1734                /* A minimal info field is
1735                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1736                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1737                   more follows. */
1738                SRE_CODE flags, i;
1739                SRE_CODE *newcode;
1740                GET_SKIP;
1741                newcode = code+skip-1;
1742                GET_ARG; flags = arg;
1743                GET_ARG;
1744                GET_ARG;
1745                /* Check that only valid flags are present */
1746                if ((flags & ~(SRE_INFO_PREFIX |
1747                               SRE_INFO_LITERAL |
1748                               SRE_INFO_CHARSET)) != 0)
1749                    FAIL;
1750                /* PREFIX and CHARSET are mutually exclusive */
1751                if ((flags & SRE_INFO_PREFIX) &&
1752                    (flags & SRE_INFO_CHARSET))
1753                    FAIL;
1754                /* LITERAL implies PREFIX */
1755                if ((flags & SRE_INFO_LITERAL) &&
1756                    !(flags & SRE_INFO_PREFIX))
1757                    FAIL;
1758                /* Validate the prefix */
1759                if (flags & SRE_INFO_PREFIX) {
1760                    SRE_CODE prefix_len;
1761                    GET_ARG; prefix_len = arg;
1762                    GET_ARG;
1763                    /* Here comes the prefix string */
1764                    if (prefix_len > (uintptr_t)(newcode - code))
1765                        FAIL;
1766                    code += prefix_len;
1767                    /* And here comes the overlap table */
1768                    if (prefix_len > (uintptr_t)(newcode - code))
1769                        FAIL;
1770                    /* Each overlap value should be < prefix_len */
1771                    for (i = 0; i < prefix_len; i++) {
1772                        if (code[i] >= prefix_len)
1773                            FAIL;
1774                    }
1775                    code += prefix_len;
1776                }
1777                /* Validate the charset */
1778                if (flags & SRE_INFO_CHARSET) {
1779                    if (_validate_charset(code, newcode-1))
1780                        FAIL;
1781                    if (newcode[-1] != SRE_OP_FAILURE)
1782                        FAIL;
1783                    code = newcode;
1784                }
1785                else if (code != newcode) {
1786                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
1787                    FAIL;
1788                }
1789            }
1790            break;
1791
1792        case SRE_OP_BRANCH:
1793            {
1794                SRE_CODE *target = NULL;
1795                for (;;) {
1796                    GET_SKIP;
1797                    if (skip == 0)
1798                        break;
1799                    /* Stop 2 before the end; we check the JUMP below */
1800                    if (_validate_inner(code, code+skip-3, groups))
1801                        FAIL;
1802                    code += skip-3;
1803                    /* Check that it ends with a JUMP, and that each JUMP
1804                       has the same target */
1805                    GET_OP;
1806                    if (op != SRE_OP_JUMP)
1807                        FAIL;
1808                    GET_SKIP;
1809                    if (target == NULL)
1810                        target = code+skip-1;
1811                    else if (code+skip-1 != target)
1812                        FAIL;
1813                }
1814                if (code != target)
1815                    FAIL;
1816            }
1817            break;
1818
1819        case SRE_OP_REPEAT_ONE:
1820        case SRE_OP_MIN_REPEAT_ONE:
1821        case SRE_OP_POSSESSIVE_REPEAT_ONE:
1822            {
1823                SRE_CODE min, max;
1824                GET_SKIP;
1825                GET_ARG; min = arg;
1826                GET_ARG; max = arg;
1827                if (min > max)
1828                    FAIL;
1829                if (max > SRE_MAXREPEAT)
1830                    FAIL;
1831                if (_validate_inner(code, code+skip-4, groups))
1832                    FAIL;
1833                code += skip-4;
1834                GET_OP;
1835                if (op != SRE_OP_SUCCESS)
1836                    FAIL;
1837            }
1838            break;
1839
1840        case SRE_OP_REPEAT:
1841        case SRE_OP_POSSESSIVE_REPEAT:
1842            {
1843                SRE_CODE op1 = op, min, max;
1844                GET_SKIP;
1845                GET_ARG; min = arg;
1846                GET_ARG; max = arg;
1847                if (min > max)
1848                    FAIL;
1849                if (max > SRE_MAXREPEAT)
1850                    FAIL;
1851                if (_validate_inner(code, code+skip-3, groups))
1852                    FAIL;
1853                code += skip-3;
1854                GET_OP;
1855                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
1856                    if (op != SRE_OP_SUCCESS)
1857                        FAIL;
1858                }
1859                else {
1860                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1861                        FAIL;
1862                }
1863            }
1864            break;
1865
1866        case SRE_OP_ATOMIC_GROUP:
1867            {
1868                GET_SKIP;
1869                if (_validate_inner(code, code+skip-2, groups))
1870                    FAIL;
1871                code += skip-2;
1872                GET_OP;
1873                if (op != SRE_OP_SUCCESS)
1874                    FAIL;
1875            }
1876            break;
1877
1878        case SRE_OP_GROUPREF:
1879        case SRE_OP_GROUPREF_IGNORE:
1880        case SRE_OP_GROUPREF_UNI_IGNORE:
1881        case SRE_OP_GROUPREF_LOC_IGNORE:
1882            GET_ARG;
1883            if (arg >= (size_t)groups)
1884                FAIL;
1885            break;
1886
1887        case SRE_OP_GROUPREF_EXISTS:
1888            /* The regex syntax for this is: '(?(group)then|else)', where
1889               'group' is either an integer group number or a group name,
1890               'then' and 'else' are sub-regexes, and 'else' is optional. */
1891            GET_ARG;
1892            if (arg >= (size_t)groups)
1893                FAIL;
1894            GET_SKIP_ADJ(1);
1895            code--; /* The skip is relative to the first arg! */
1896            /* There are two possibilities here: if there is both a 'then'
1897               part and an 'else' part, the generated code looks like:
1898
1899               GROUPREF_EXISTS
1900               <group>
1901               <skipyes>
1902               ...then part...
1903               JUMP
1904               <skipno>
1905               (<skipyes> jumps here)
1906               ...else part...
1907               (<skipno> jumps here)
1908
1909               If there is only a 'then' part, it looks like:
1910
1911               GROUPREF_EXISTS
1912               <group>
1913               <skip>
1914               ...then part...
1915               (<skip> jumps here)
1916
1917               There is no direct way to decide which it is, and we don't want
1918               to allow arbitrary jumps anywhere in the code; so we just look
1919               for a JUMP opcode preceding our skip target.
1920            */
1921            VTRACE(("then part:\n"));
1922            int rc = _validate_inner(code+1, code+skip-1, groups);
1923            if (rc == 1) {
1924                VTRACE(("else part:\n"));
1925                code += skip-2; /* Position after JUMP, at <skipno> */
1926                GET_SKIP;
1927                rc = _validate_inner(code, code+skip-1, groups);
1928            }
1929            if (rc)
1930                FAIL;
1931            code += skip-1;
1932            break;
1933
1934        case SRE_OP_ASSERT:
1935        case SRE_OP_ASSERT_NOT:
1936            GET_SKIP;
1937            GET_ARG; /* 0 for lookahead, width for lookbehind */
1938            code--; /* Back up over arg to simplify math below */
1939            if (arg & 0x80000000)
1940                FAIL; /* Width too large */
1941            /* Stop 1 before the end; we check the SUCCESS below */
1942            if (_validate_inner(code+1, code+skip-2, groups))
1943                FAIL;
1944            code += skip-2;
1945            GET_OP;
1946            if (op != SRE_OP_SUCCESS)
1947                FAIL;
1948            break;
1949
1950        case SRE_OP_JUMP:
1951            if (code + 1 != end)
1952                FAIL;
1953            VTRACE(("JUMP: %d\n", __LINE__));
1954            return 1;
1955
1956        default:
1957            FAIL;
1958
1959        }
1960    }
1961
1962    VTRACE(("okay\n"));
1963    return 0;
1964}
1965
1966static int
1967_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1968{
1969    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1970        code >= end || end[-1] != SRE_OP_SUCCESS)
1971        FAIL;
1972    return _validate_inner(code, end-1, groups);
1973}
1974
1975static int
1976_validate(PatternObject *self)
1977{
1978    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
1979    {
1980        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1981        return 0;
1982    }
1983    else
1984        VTRACE(("Success!\n"));
1985    return 1;
1986}
1987
1988/* -------------------------------------------------------------------- */
1989/* match methods */
1990
1991static int
1992match_traverse(MatchObject *self, visitproc visit, void *arg)
1993{
1994    Py_VISIT(Py_TYPE(self));
1995    Py_VISIT(self->string);
1996    Py_VISIT(self->regs);
1997    Py_VISIT(self->pattern);
1998    return 0;
1999}
2000
2001static int
2002match_clear(MatchObject *self)
2003{
2004    Py_CLEAR(self->string);
2005    Py_CLEAR(self->regs);
2006    Py_CLEAR(self->pattern);
2007    return 0;
2008}
2009
2010static void
2011match_dealloc(MatchObject* self)
2012{
2013    PyTypeObject *tp = Py_TYPE(self);
2014
2015    PyObject_GC_UnTrack(self);
2016    (void)match_clear(self);
2017    tp->tp_free(self);
2018    Py_DECREF(tp);
2019}
2020
2021static PyObject*
2022match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2023{
2024    Py_ssize_t length;
2025    int isbytes, charsize;
2026    Py_buffer view;
2027    PyObject *result;
2028    const void* ptr;
2029    Py_ssize_t i, j;
2030
2031    assert(0 <= index && index < self->groups);
2032    index *= 2;
2033
2034    if (self->string == Py_None || self->mark[index] < 0) {
2035        /* return default value if the string or group is undefined */
2036        Py_INCREF(def);
2037        return def;
2038    }
2039
2040    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2041    if (ptr == NULL)
2042        return NULL;
2043
2044    i = self->mark[index];
2045    j = self->mark[index+1];
2046    i = Py_MIN(i, length);
2047    j = Py_MIN(j, length);
2048    result = getslice(isbytes, ptr, self->string, i, j);
2049    if (isbytes && view.buf != NULL)
2050        PyBuffer_Release(&view);
2051    return result;
2052}
2053
2054static Py_ssize_t
2055match_getindex(MatchObject* self, PyObject* index)
2056{
2057    Py_ssize_t i;
2058
2059    if (index == NULL)
2060        /* Default value */
2061        return 0;
2062
2063    if (PyIndex_Check(index)) {
2064        i = PyNumber_AsSsize_t(index, NULL);
2065    }
2066    else {
2067        i = -1;
2068
2069        if (self->pattern->groupindex) {
2070            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2071            if (index && PyLong_Check(index)) {
2072                i = PyLong_AsSsize_t(index);
2073            }
2074        }
2075    }
2076    if (i < 0 || i >= self->groups) {
2077        /* raise IndexError if we were given a bad group number */
2078        if (!PyErr_Occurred()) {
2079            PyErr_SetString(PyExc_IndexError, "no such group");
2080        }
2081        return -1;
2082    }
2083
2084    return i;
2085}
2086
2087static PyObject*
2088match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2089{
2090    Py_ssize_t i = match_getindex(self, index);
2091
2092    if (i < 0) {
2093        return NULL;
2094    }
2095
2096    return match_getslice_by_index(self, i, def);
2097}
2098
2099/*[clinic input]
2100_sre.SRE_Match.expand
2101
2102    template: object
2103
2104Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2105[clinic start generated code]*/
2106
2107static PyObject *
2108_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2109/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2110{
2111    /* delegate to Python code */
2112    return call(
2113        SRE_PY_MODULE, "_expand",
2114        PyTuple_Pack(3, self->pattern, self, template)
2115        );
2116}
2117
2118static PyObject*
2119match_group(MatchObject* self, PyObject* args)
2120{
2121    PyObject* result;
2122    Py_ssize_t i, size;
2123
2124    size = PyTuple_GET_SIZE(args);
2125
2126    switch (size) {
2127    case 0:
2128        result = match_getslice(self, _PyLong_GetZero(), Py_None);
2129        break;
2130    case 1:
2131        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2132        break;
2133    default:
2134        /* fetch multiple items */
2135        result = PyTuple_New(size);
2136        if (!result)
2137            return NULL;
2138        for (i = 0; i < size; i++) {
2139            PyObject* item = match_getslice(
2140                self, PyTuple_GET_ITEM(args, i), Py_None
2141                );
2142            if (!item) {
2143                Py_DECREF(result);
2144                return NULL;
2145            }
2146            PyTuple_SET_ITEM(result, i, item);
2147        }
2148        break;
2149    }
2150    return result;
2151}
2152
2153static PyObject*
2154match_getitem(MatchObject* self, PyObject* name)
2155{
2156    return match_getslice(self, name, Py_None);
2157}
2158
2159/*[clinic input]
2160_sre.SRE_Match.groups
2161
2162    default: object = None
2163        Is used for groups that did not participate in the match.
2164
2165Return a tuple containing all the subgroups of the match, from 1.
2166[clinic start generated code]*/
2167
2168static PyObject *
2169_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2170/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2171{
2172    PyObject* result;
2173    Py_ssize_t index;
2174
2175    result = PyTuple_New(self->groups-1);
2176    if (!result)
2177        return NULL;
2178
2179    for (index = 1; index < self->groups; index++) {
2180        PyObject* item;
2181        item = match_getslice_by_index(self, index, default_value);
2182        if (!item) {
2183            Py_DECREF(result);
2184            return NULL;
2185        }
2186        PyTuple_SET_ITEM(result, index-1, item);
2187    }
2188
2189    return result;
2190}
2191
2192/*[clinic input]
2193_sre.SRE_Match.groupdict
2194
2195    default: object = None
2196        Is used for groups that did not participate in the match.
2197
2198Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2199[clinic start generated code]*/
2200
2201static PyObject *
2202_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2203/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2204{
2205    PyObject *result;
2206    PyObject *key;
2207    PyObject *value;
2208    Py_ssize_t pos = 0;
2209    Py_hash_t hash;
2210
2211    result = PyDict_New();
2212    if (!result || !self->pattern->groupindex)
2213        return result;
2214
2215    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2216        int status;
2217        Py_INCREF(key);
2218        value = match_getslice(self, key, default_value);
2219        if (!value) {
2220            Py_DECREF(key);
2221            goto failed;
2222        }
2223        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2224        Py_DECREF(value);
2225        Py_DECREF(key);
2226        if (status < 0)
2227            goto failed;
2228    }
2229
2230    return result;
2231
2232failed:
2233    Py_DECREF(result);
2234    return NULL;
2235}
2236
2237/*[clinic input]
2238_sre.SRE_Match.start -> Py_ssize_t
2239
2240    group: object(c_default="NULL") = 0
2241    /
2242
2243Return index of the start of the substring matched by group.
2244[clinic start generated code]*/
2245
2246static Py_ssize_t
2247_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2248/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2249{
2250    Py_ssize_t index = match_getindex(self, group);
2251
2252    if (index < 0) {
2253        return -1;
2254    }
2255
2256    /* mark is -1 if group is undefined */
2257    return self->mark[index*2];
2258}
2259
2260/*[clinic input]
2261_sre.SRE_Match.end -> Py_ssize_t
2262
2263    group: object(c_default="NULL") = 0
2264    /
2265
2266Return index of the end of the substring matched by group.
2267[clinic start generated code]*/
2268
2269static Py_ssize_t
2270_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2271/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2272{
2273    Py_ssize_t index = match_getindex(self, group);
2274
2275    if (index < 0) {
2276        return -1;
2277    }
2278
2279    /* mark is -1 if group is undefined */
2280    return self->mark[index*2+1];
2281}
2282
2283LOCAL(PyObject*)
2284_pair(Py_ssize_t i1, Py_ssize_t i2)
2285{
2286    PyObject* pair;
2287    PyObject* item;
2288
2289    pair = PyTuple_New(2);
2290    if (!pair)
2291        return NULL;
2292
2293    item = PyLong_FromSsize_t(i1);
2294    if (!item)
2295        goto error;
2296    PyTuple_SET_ITEM(pair, 0, item);
2297
2298    item = PyLong_FromSsize_t(i2);
2299    if (!item)
2300        goto error;
2301    PyTuple_SET_ITEM(pair, 1, item);
2302
2303    return pair;
2304
2305  error:
2306    Py_DECREF(pair);
2307    return NULL;
2308}
2309
2310/*[clinic input]
2311_sre.SRE_Match.span
2312
2313    group: object(c_default="NULL") = 0
2314    /
2315
2316For match object m, return the 2-tuple (m.start(group), m.end(group)).
2317[clinic start generated code]*/
2318
2319static PyObject *
2320_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2321/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2322{
2323    Py_ssize_t index = match_getindex(self, group);
2324
2325    if (index < 0) {
2326        return NULL;
2327    }
2328
2329    /* marks are -1 if group is undefined */
2330    return _pair(self->mark[index*2], self->mark[index*2+1]);
2331}
2332
2333static PyObject*
2334match_regs(MatchObject* self)
2335{
2336    PyObject* regs;
2337    PyObject* item;
2338    Py_ssize_t index;
2339
2340    regs = PyTuple_New(self->groups);
2341    if (!regs)
2342        return NULL;
2343
2344    for (index = 0; index < self->groups; index++) {
2345        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2346        if (!item) {
2347            Py_DECREF(regs);
2348            return NULL;
2349        }
2350        PyTuple_SET_ITEM(regs, index, item);
2351    }
2352
2353    Py_INCREF(regs);
2354    self->regs = regs;
2355
2356    return regs;
2357}
2358
2359/*[clinic input]
2360_sre.SRE_Match.__copy__
2361
2362[clinic start generated code]*/
2363
2364static PyObject *
2365_sre_SRE_Match___copy___impl(MatchObject *self)
2366/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2367{
2368    Py_INCREF(self);
2369    return (PyObject *)self;
2370}
2371
2372/*[clinic input]
2373_sre.SRE_Match.__deepcopy__
2374
2375    memo: object
2376    /
2377
2378[clinic start generated code]*/
2379
2380static PyObject *
2381_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2382/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2383{
2384    Py_INCREF(self);
2385    return (PyObject *)self;
2386}
2387
2388PyDoc_STRVAR(match_doc,
2389"The result of re.match() and re.search().\n\
2390Match objects always have a boolean value of True.");
2391
2392PyDoc_STRVAR(match_group_doc,
2393"group([group1, ...]) -> str or tuple.\n\
2394    Return subgroup(s) of the match by indices or names.\n\
2395    For 0 returns the entire match.");
2396
2397static PyObject *
2398match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2399{
2400    if (self->lastindex >= 0)
2401        return PyLong_FromSsize_t(self->lastindex);
2402    Py_RETURN_NONE;
2403}
2404
2405static PyObject *
2406match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2407{
2408    if (self->pattern->indexgroup &&
2409        self->lastindex >= 0 &&
2410        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2411    {
2412        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2413                                            self->lastindex);
2414        Py_INCREF(result);
2415        return result;
2416    }
2417    Py_RETURN_NONE;
2418}
2419
2420static PyObject *
2421match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2422{
2423    if (self->regs) {
2424        Py_INCREF(self->regs);
2425        return self->regs;
2426    } else
2427        return match_regs(self);
2428}
2429
2430static PyObject *
2431match_repr(MatchObject *self)
2432{
2433    PyObject *result;
2434    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2435    if (group0 == NULL)
2436        return NULL;
2437    result = PyUnicode_FromFormat(
2438            "<%s object; span=(%zd, %zd), match=%.50R>",
2439            Py_TYPE(self)->tp_name,
2440            self->mark[0], self->mark[1], group0);
2441    Py_DECREF(group0);
2442    return result;
2443}
2444
2445
2446static PyObject*
2447pattern_new_match(_sremodulestate* module_state,
2448                  PatternObject* pattern,
2449                  SRE_STATE* state,
2450                  Py_ssize_t status)
2451{
2452    /* create match object (from state object) */
2453
2454    MatchObject* match;
2455    Py_ssize_t i, j;
2456    char* base;
2457    int n;
2458
2459    if (status > 0) {
2460
2461        /* create match object (with room for extra group marks) */
2462        /* coverity[ampersand_in_size] */
2463        match = PyObject_GC_NewVar(MatchObject,
2464                                   module_state->Match_Type,
2465                                   2*(pattern->groups+1));
2466        if (!match)
2467            return NULL;
2468
2469        Py_INCREF(pattern);
2470        match->pattern = pattern;
2471
2472        Py_INCREF(state->string);
2473        match->string = state->string;
2474
2475        match->regs = NULL;
2476        match->groups = pattern->groups+1;
2477
2478        /* fill in group slices */
2479
2480        base = (char*) state->beginning;
2481        n = state->charsize;
2482
2483        match->mark[0] = ((char*) state->start - base) / n;
2484        match->mark[1] = ((char*) state->ptr - base) / n;
2485
2486        for (i = j = 0; i < pattern->groups; i++, j+=2)
2487            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2488                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2489                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2490
2491                /* check wrong span */
2492                if (match->mark[j+2] > match->mark[j+3]) {
2493                    PyErr_SetString(PyExc_SystemError,
2494                                    "The span of capturing group is wrong,"
2495                                    " please report a bug for the re module.");
2496                    Py_DECREF(match);
2497                    return NULL;
2498                }
2499            } else
2500                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2501
2502        match->pos = state->pos;
2503        match->endpos = state->endpos;
2504
2505        match->lastindex = state->lastindex;
2506
2507        PyObject_GC_Track(match);
2508        return (PyObject*) match;
2509
2510    } else if (status == 0) {
2511
2512        /* no match */
2513        Py_RETURN_NONE;
2514
2515    }
2516
2517    /* internal error */
2518    pattern_error(status);
2519    return NULL;
2520}
2521
2522
2523/* -------------------------------------------------------------------- */
2524/* scanner methods (experimental) */
2525
2526static int
2527scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
2528{
2529    Py_VISIT(Py_TYPE(self));
2530    Py_VISIT(self->pattern);
2531    return 0;
2532}
2533
2534static int
2535scanner_clear(ScannerObject *self)
2536{
2537    Py_CLEAR(self->pattern);
2538    return 0;
2539}
2540
2541static void
2542scanner_dealloc(ScannerObject* self)
2543{
2544    PyTypeObject *tp = Py_TYPE(self);
2545
2546    PyObject_GC_UnTrack(self);
2547    state_fini(&self->state);
2548    (void)scanner_clear(self);
2549    tp->tp_free(self);
2550    Py_DECREF(tp);
2551}
2552
2553static int
2554scanner_begin(ScannerObject* self)
2555{
2556    if (self->executing) {
2557        PyErr_SetString(PyExc_ValueError,
2558                        "regular expression scanner already executing");
2559        return 0;
2560    }
2561    self->executing = 1;
2562    return 1;
2563}
2564
2565static void
2566scanner_end(ScannerObject* self)
2567{
2568    assert(self->executing);
2569    self->executing = 0;
2570}
2571
2572/*[clinic input]
2573_sre.SRE_Scanner.match
2574
2575    cls: defining_class
2576    /
2577
2578[clinic start generated code]*/
2579
2580static PyObject *
2581_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2582/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2583{
2584    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2585    SRE_STATE* state = &self->state;
2586    PyObject* match;
2587    Py_ssize_t status;
2588
2589    if (!scanner_begin(self)) {
2590        return NULL;
2591    }
2592    if (state->start == NULL) {
2593        scanner_end(self);
2594        Py_RETURN_NONE;
2595    }
2596
2597    state_reset(state);
2598
2599    state->ptr = state->start;
2600
2601    status = sre_match(state, PatternObject_GetCode(self->pattern));
2602    if (PyErr_Occurred()) {
2603        scanner_end(self);
2604        return NULL;
2605    }
2606
2607    match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2608                              state, status);
2609
2610    if (status == 0)
2611        state->start = NULL;
2612    else {
2613        state->must_advance = (state->ptr == state->start);
2614        state->start = state->ptr;
2615    }
2616
2617    scanner_end(self);
2618    return match;
2619}
2620
2621
2622/*[clinic input]
2623_sre.SRE_Scanner.search
2624
2625    cls: defining_class
2626    /
2627
2628[clinic start generated code]*/
2629
2630static PyObject *
2631_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2632/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2633{
2634    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2635    SRE_STATE* state = &self->state;
2636    PyObject* match;
2637    Py_ssize_t status;
2638
2639    if (!scanner_begin(self)) {
2640        return NULL;
2641    }
2642    if (state->start == NULL) {
2643        scanner_end(self);
2644        Py_RETURN_NONE;
2645    }
2646
2647    state_reset(state);
2648
2649    state->ptr = state->start;
2650
2651    status = sre_search(state, PatternObject_GetCode(self->pattern));
2652    if (PyErr_Occurred()) {
2653        scanner_end(self);
2654        return NULL;
2655    }
2656
2657    match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2658                              state, status);
2659
2660    if (status == 0)
2661        state->start = NULL;
2662    else {
2663        state->must_advance = (state->ptr == state->start);
2664        state->start = state->ptr;
2665    }
2666
2667    scanner_end(self);
2668    return match;
2669}
2670
2671static PyObject *
2672pattern_scanner(_sremodulestate *module_state,
2673                PatternObject *self,
2674                PyObject *string,
2675                Py_ssize_t pos,
2676                Py_ssize_t endpos)
2677{
2678    ScannerObject* scanner;
2679
2680    /* create scanner object */
2681    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2682    if (!scanner)
2683        return NULL;
2684    scanner->pattern = NULL;
2685    scanner->executing = 0;
2686
2687    /* create search state object */
2688    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2689        Py_DECREF(scanner);
2690        return NULL;
2691    }
2692
2693    Py_INCREF(self);
2694    scanner->pattern = (PyObject*) self;
2695
2696    PyObject_GC_Track(scanner);
2697    return (PyObject*) scanner;
2698}
2699
2700static Py_hash_t
2701pattern_hash(PatternObject *self)
2702{
2703    Py_hash_t hash, hash2;
2704
2705    hash = PyObject_Hash(self->pattern);
2706    if (hash == -1) {
2707        return -1;
2708    }
2709
2710    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2711    hash ^= hash2;
2712
2713    hash ^= self->flags;
2714    hash ^= self->isbytes;
2715    hash ^= self->codesize;
2716
2717    if (hash == -1) {
2718        hash = -2;
2719    }
2720    return hash;
2721}
2722
2723static PyObject*
2724pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2725{
2726    PyTypeObject *tp = Py_TYPE(lefto);
2727    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
2728    PatternObject *left, *right;
2729    int cmp;
2730
2731    if (op != Py_EQ && op != Py_NE) {
2732        Py_RETURN_NOTIMPLEMENTED;
2733    }
2734
2735    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
2736    {
2737        Py_RETURN_NOTIMPLEMENTED;
2738    }
2739
2740    if (lefto == righto) {
2741        /* a pattern is equal to itself */
2742        return PyBool_FromLong(op == Py_EQ);
2743    }
2744
2745    left = (PatternObject *)lefto;
2746    right = (PatternObject *)righto;
2747
2748    cmp = (left->flags == right->flags
2749           && left->isbytes == right->isbytes
2750           && left->codesize == right->codesize);
2751    if (cmp) {
2752        /* Compare the code and the pattern because the same pattern can
2753           produce different codes depending on the locale used to compile the
2754           pattern when the re.LOCALE flag is used. Don't compare groups,
2755           indexgroup nor groupindex: they are derivated from the pattern. */
2756        cmp = (memcmp(left->code, right->code,
2757                      sizeof(left->code[0]) * left->codesize) == 0);
2758    }
2759    if (cmp) {
2760        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2761                                       Py_EQ);
2762        if (cmp < 0) {
2763            return NULL;
2764        }
2765    }
2766    if (op == Py_NE) {
2767        cmp = !cmp;
2768    }
2769    return PyBool_FromLong(cmp);
2770}
2771
2772#include "clinic/sre.c.h"
2773
2774static PyMethodDef pattern_methods[] = {
2775    _SRE_SRE_PATTERN_MATCH_METHODDEF
2776    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2777    _SRE_SRE_PATTERN_SEARCH_METHODDEF
2778    _SRE_SRE_PATTERN_SUB_METHODDEF
2779    _SRE_SRE_PATTERN_SUBN_METHODDEF
2780    _SRE_SRE_PATTERN_FINDALL_METHODDEF
2781    _SRE_SRE_PATTERN_SPLIT_METHODDEF
2782    _SRE_SRE_PATTERN_FINDITER_METHODDEF
2783    _SRE_SRE_PATTERN_SCANNER_METHODDEF
2784    _SRE_SRE_PATTERN___COPY___METHODDEF
2785    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2786    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
2787     PyDoc_STR("See PEP 585")},
2788    {NULL, NULL}
2789};
2790
2791static PyGetSetDef pattern_getset[] = {
2792    {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2793      "A dictionary mapping group names to group numbers."},
2794    {NULL}  /* Sentinel */
2795};
2796
2797#define PAT_OFF(x) offsetof(PatternObject, x)
2798static PyMemberDef pattern_members[] = {
2799    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
2800     "The pattern string from which the RE object was compiled."},
2801    {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
2802     "The regex matching flags."},
2803    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
2804     "The number of capturing groups in the pattern."},
2805    {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY},
2806    {NULL}  /* Sentinel */
2807};
2808
2809static PyType_Slot pattern_slots[] = {
2810    {Py_tp_dealloc, (destructor)pattern_dealloc},
2811    {Py_tp_repr, (reprfunc)pattern_repr},
2812    {Py_tp_hash, (hashfunc)pattern_hash},
2813    {Py_tp_doc, (void *)pattern_doc},
2814    {Py_tp_richcompare, pattern_richcompare},
2815    {Py_tp_methods, pattern_methods},
2816    {Py_tp_members, pattern_members},
2817    {Py_tp_getset, pattern_getset},
2818    {Py_tp_traverse, pattern_traverse},
2819    {Py_tp_clear, pattern_clear},
2820    {0, NULL},
2821};
2822
2823static PyType_Spec pattern_spec = {
2824    .name = "re.Pattern",
2825    .basicsize = sizeof(PatternObject),
2826    .itemsize = sizeof(SRE_CODE),
2827    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2828              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2829    .slots = pattern_slots,
2830};
2831
2832static PyMethodDef match_methods[] = {
2833    {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2834    _SRE_SRE_MATCH_START_METHODDEF
2835    _SRE_SRE_MATCH_END_METHODDEF
2836    _SRE_SRE_MATCH_SPAN_METHODDEF
2837    _SRE_SRE_MATCH_GROUPS_METHODDEF
2838    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2839    _SRE_SRE_MATCH_EXPAND_METHODDEF
2840    _SRE_SRE_MATCH___COPY___METHODDEF
2841    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2842    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
2843     PyDoc_STR("See PEP 585")},
2844    {NULL, NULL}
2845};
2846
2847static PyGetSetDef match_getset[] = {
2848    {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2849     "The integer index of the last matched capturing group."},
2850    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2851     "The name of the last matched capturing group."},
2852    {"regs",      (getter)match_regs_get,      (setter)NULL},
2853    {NULL}
2854};
2855
2856#define MATCH_OFF(x) offsetof(MatchObject, x)
2857static PyMemberDef match_members[] = {
2858    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
2859     "The string passed to match() or search()."},
2860    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
2861     "The regular expression object."},
2862    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
2863     "The index into the string at which the RE engine started looking for a match."},
2864    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
2865     "The index into the string beyond which the RE engine will not go."},
2866    {NULL}
2867};
2868
2869/* FIXME: implement setattr("string", None) as a special case (to
2870   detach the associated string, if any */
2871static PyType_Slot match_slots[] = {
2872    {Py_tp_dealloc, match_dealloc},
2873    {Py_tp_repr, match_repr},
2874    {Py_tp_doc, (void *)match_doc},
2875    {Py_tp_methods, match_methods},
2876    {Py_tp_members, match_members},
2877    {Py_tp_getset, match_getset},
2878    {Py_tp_traverse, match_traverse},
2879    {Py_tp_clear, match_clear},
2880
2881    /* As mapping.
2882     *
2883     * Match objects do not support length or assignment, but do support
2884     * __getitem__.
2885     */
2886    {Py_mp_subscript, match_getitem},
2887
2888    {0, NULL},
2889};
2890
2891static PyType_Spec match_spec = {
2892    .name = "re.Match",
2893    .basicsize = sizeof(MatchObject),
2894    .itemsize = sizeof(Py_ssize_t),
2895    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2896              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2897    .slots = match_slots,
2898};
2899
2900static PyMethodDef scanner_methods[] = {
2901    _SRE_SRE_SCANNER_MATCH_METHODDEF
2902    _SRE_SRE_SCANNER_SEARCH_METHODDEF
2903    {NULL, NULL}
2904};
2905
2906#define SCAN_OFF(x) offsetof(ScannerObject, x)
2907static PyMemberDef scanner_members[] = {
2908    {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2909    {NULL}  /* Sentinel */
2910};
2911
2912static PyType_Slot scanner_slots[] = {
2913    {Py_tp_dealloc, scanner_dealloc},
2914    {Py_tp_methods, scanner_methods},
2915    {Py_tp_members, scanner_members},
2916    {Py_tp_traverse, scanner_traverse},
2917    {Py_tp_clear, scanner_clear},
2918    {0, NULL},
2919};
2920
2921static PyType_Spec scanner_spec = {
2922    .name = "_" SRE_MODULE ".SRE_Scanner",
2923    .basicsize = sizeof(ScannerObject),
2924    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
2925              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
2926    .slots = scanner_slots,
2927};
2928
2929static PyMethodDef _functions[] = {
2930    _SRE_COMPILE_METHODDEF
2931    _SRE_GETCODESIZE_METHODDEF
2932    _SRE_ASCII_ISCASED_METHODDEF
2933    _SRE_UNICODE_ISCASED_METHODDEF
2934    _SRE_ASCII_TOLOWER_METHODDEF
2935    _SRE_UNICODE_TOLOWER_METHODDEF
2936    {NULL, NULL}
2937};
2938
2939static int
2940sre_traverse(PyObject *module, visitproc visit, void *arg)
2941{
2942    _sremodulestate *state = get_sre_module_state(module);
2943
2944    Py_VISIT(state->Pattern_Type);
2945    Py_VISIT(state->Match_Type);
2946    Py_VISIT(state->Scanner_Type);
2947
2948    return 0;
2949}
2950
2951static int
2952sre_clear(PyObject *module)
2953{
2954    _sremodulestate *state = get_sre_module_state(module);
2955
2956    Py_CLEAR(state->Pattern_Type);
2957    Py_CLEAR(state->Match_Type);
2958    Py_CLEAR(state->Scanner_Type);
2959
2960    return 0;
2961}
2962
2963static void
2964sre_free(void *module)
2965{
2966    sre_clear((PyObject *)module);
2967}
2968
2969#define CREATE_TYPE(m, type, spec)                                  \
2970do {                                                                \
2971    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
2972    if (type == NULL) {                                             \
2973        goto error;                                                 \
2974    }                                                               \
2975} while (0)
2976
2977#define ADD_ULONG_CONSTANT(module, name, value)           \
2978    do {                                                  \
2979        PyObject *o = PyLong_FromUnsignedLong(value);     \
2980        if (!o)                                           \
2981            goto error;                                   \
2982        int res = PyModule_AddObjectRef(module, name, o); \
2983        Py_DECREF(o);                                     \
2984        if (res < 0) {                                    \
2985            goto error;                                   \
2986        }                                                 \
2987} while (0)
2988
2989static int
2990sre_exec(PyObject *m)
2991{
2992    _sremodulestate *state;
2993
2994    /* Create heap types */
2995    state = get_sre_module_state(m);
2996    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
2997    CREATE_TYPE(m, state->Match_Type, &match_spec);
2998    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
2999
3000    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3001        goto error;
3002    }
3003
3004    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3005        goto error;
3006    }
3007
3008    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3009    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3010
3011    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3012        goto error;
3013    }
3014
3015    return 0;
3016
3017error:
3018    return -1;
3019}
3020
3021static PyModuleDef_Slot sre_slots[] = {
3022    {Py_mod_exec, sre_exec},
3023    {0, NULL},
3024};
3025
3026static struct PyModuleDef sremodule = {
3027    .m_base = PyModuleDef_HEAD_INIT,
3028    .m_name = "_" SRE_MODULE,
3029    .m_size = sizeof(_sremodulestate),
3030    .m_methods = _functions,
3031    .m_slots = sre_slots,
3032    .m_traverse = sre_traverse,
3033    .m_free = sre_free,
3034    .m_clear = sre_clear,
3035};
3036
3037PyMODINIT_FUNC
3038PyInit__sre(void)
3039{
3040    return PyModuleDef_Init(&sremodule);
3041}
3042
3043/* vim:ts=4:sw=4:et
3044*/
3045