xref: /third_party/python/Modules/_sre/sre_lib.h (revision 7db96d56)
1/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7 *
8 * See the sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17{
18    /* check if pointer is at given position */
19
20    Py_ssize_t thisp, thatp;
21
22    switch (at) {
23
24    case SRE_AT_BEGINNING:
25    case SRE_AT_BEGINNING_STRING:
26        return ((void*) ptr == state->beginning);
27
28    case SRE_AT_BEGINNING_LINE:
29        return ((void*) ptr == state->beginning ||
30                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32    case SRE_AT_END:
33        return (((SRE_CHAR *)state->end - ptr == 1 &&
34                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35                ((void*) ptr == state->end));
36
37    case SRE_AT_END_LINE:
38        return ((void*) ptr == state->end ||
39                SRE_IS_LINEBREAK((int) ptr[0]));
40
41    case SRE_AT_END_STRING:
42        return ((void*) ptr == state->end);
43
44    case SRE_AT_BOUNDARY:
45        if (state->beginning == state->end)
46            return 0;
47        thatp = ((void*) ptr > state->beginning) ?
48            SRE_IS_WORD((int) ptr[-1]) : 0;
49        thisp = ((void*) ptr < state->end) ?
50            SRE_IS_WORD((int) ptr[0]) : 0;
51        return thisp != thatp;
52
53    case SRE_AT_NON_BOUNDARY:
54        if (state->beginning == state->end)
55            return 0;
56        thatp = ((void*) ptr > state->beginning) ?
57            SRE_IS_WORD((int) ptr[-1]) : 0;
58        thisp = ((void*) ptr < state->end) ?
59            SRE_IS_WORD((int) ptr[0]) : 0;
60        return thisp == thatp;
61
62    case SRE_AT_LOC_BOUNDARY:
63        if (state->beginning == state->end)
64            return 0;
65        thatp = ((void*) ptr > state->beginning) ?
66            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67        thisp = ((void*) ptr < state->end) ?
68            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69        return thisp != thatp;
70
71    case SRE_AT_LOC_NON_BOUNDARY:
72        if (state->beginning == state->end)
73            return 0;
74        thatp = ((void*) ptr > state->beginning) ?
75            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76        thisp = ((void*) ptr < state->end) ?
77            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78        return thisp == thatp;
79
80    case SRE_AT_UNI_BOUNDARY:
81        if (state->beginning == state->end)
82            return 0;
83        thatp = ((void*) ptr > state->beginning) ?
84            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85        thisp = ((void*) ptr < state->end) ?
86            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87        return thisp != thatp;
88
89    case SRE_AT_UNI_NON_BOUNDARY:
90        if (state->beginning == state->end)
91            return 0;
92        thatp = ((void*) ptr > state->beginning) ?
93            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94        thisp = ((void*) ptr < state->end) ?
95            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96        return thisp == thatp;
97
98    }
99
100    return 0;
101}
102
103LOCAL(int)
104SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
105{
106    /* check if character is a member of the given set */
107
108    int ok = 1;
109
110    for (;;) {
111        switch (*set++) {
112
113        case SRE_OP_FAILURE:
114            return !ok;
115
116        case SRE_OP_LITERAL:
117            /* <LITERAL> <code> */
118            if (ch == set[0])
119                return ok;
120            set++;
121            break;
122
123        case SRE_OP_CATEGORY:
124            /* <CATEGORY> <code> */
125            if (sre_category(set[0], (int) ch))
126                return ok;
127            set++;
128            break;
129
130        case SRE_OP_CHARSET:
131            /* <CHARSET> <bitmap> */
132            if (ch < 256 &&
133                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                return ok;
135            set += 256/SRE_CODE_BITS;
136            break;
137
138        case SRE_OP_RANGE:
139            /* <RANGE> <lower> <upper> */
140            if (set[0] <= ch && ch <= set[1])
141                return ok;
142            set += 2;
143            break;
144
145        case SRE_OP_RANGE_UNI_IGNORE:
146            /* <RANGE_UNI_IGNORE> <lower> <upper> */
147        {
148            SRE_CODE uch;
149            /* ch is already lower cased */
150            if (set[0] <= ch && ch <= set[1])
151                return ok;
152            uch = sre_upper_unicode(ch);
153            if (set[0] <= uch && uch <= set[1])
154                return ok;
155            set += 2;
156            break;
157        }
158
159        case SRE_OP_NEGATE:
160            ok = !ok;
161            break;
162
163        case SRE_OP_BIGCHARSET:
164            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165        {
166            Py_ssize_t count, block;
167            count = *(set++);
168
169            if (ch < 0x10000u)
170                block = ((unsigned char*)set)[ch >> 8];
171            else
172                block = -1;
173            set += 256/sizeof(SRE_CODE);
174            if (block >=0 &&
175                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                    (1u << (ch & (SRE_CODE_BITS-1)))))
177                return ok;
178            set += count * (256/SRE_CODE_BITS);
179            break;
180        }
181
182        default:
183            /* internal error -- there's not much we can do about it
184               here, so let's just pretend it didn't match... */
185            return 0;
186        }
187    }
188}
189
190LOCAL(int)
191SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
192{
193    SRE_CODE lo, up;
194    lo = sre_lower_locale(ch);
195    if (SRE(charset)(state, set, lo))
196       return 1;
197
198    up = sre_upper_locale(ch);
199    return up != lo && SRE(charset)(state, set, up);
200}
201
202LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
203
204LOCAL(Py_ssize_t)
205SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
206{
207    SRE_CODE chr;
208    SRE_CHAR c;
209    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
210    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
211    Py_ssize_t i;
212
213    /* adjust end */
214    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215        end = ptr + maxcount;
216
217    switch (pattern[0]) {
218
219    case SRE_OP_IN:
220        /* repeated set */
221        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223            ptr++;
224        break;
225
226    case SRE_OP_ANY:
227        /* repeated dot wildcard. */
228        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230            ptr++;
231        break;
232
233    case SRE_OP_ANY_ALL:
234        /* repeated dot wildcard.  skip to the end of the target
235           string, and backtrack from there */
236        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237        ptr = end;
238        break;
239
240    case SRE_OP_LITERAL:
241        /* repeated literal */
242        chr = pattern[1];
243        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244        c = (SRE_CHAR) chr;
245#if SIZEOF_SRE_CHAR < 4
246        if ((SRE_CODE) c != chr)
247            ; /* literal can't match: doesn't fit in char width */
248        else
249#endif
250        while (ptr < end && *ptr == c)
251            ptr++;
252        break;
253
254    case SRE_OP_LITERAL_IGNORE:
255        /* repeated literal */
256        chr = pattern[1];
257        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259            ptr++;
260        break;
261
262    case SRE_OP_LITERAL_UNI_IGNORE:
263        /* repeated literal */
264        chr = pattern[1];
265        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267            ptr++;
268        break;
269
270    case SRE_OP_LITERAL_LOC_IGNORE:
271        /* repeated literal */
272        chr = pattern[1];
273        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274        while (ptr < end && char_loc_ignore(chr, *ptr))
275            ptr++;
276        break;
277
278    case SRE_OP_NOT_LITERAL:
279        /* repeated non-literal */
280        chr = pattern[1];
281        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282        c = (SRE_CHAR) chr;
283#if SIZEOF_SRE_CHAR < 4
284        if ((SRE_CODE) c != chr)
285            ptr = end; /* literal can't match: doesn't fit in char width */
286        else
287#endif
288        while (ptr < end && *ptr != c)
289            ptr++;
290        break;
291
292    case SRE_OP_NOT_LITERAL_IGNORE:
293        /* repeated non-literal */
294        chr = pattern[1];
295        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297            ptr++;
298        break;
299
300    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301        /* repeated non-literal */
302        chr = pattern[1];
303        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305            ptr++;
306        break;
307
308    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309        /* repeated non-literal */
310        chr = pattern[1];
311        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312        while (ptr < end && !char_loc_ignore(chr, *ptr))
313            ptr++;
314        break;
315
316    default:
317        /* repeated single character pattern */
318        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319        while ((SRE_CHAR*) state->ptr < end) {
320            i = SRE(match)(state, pattern, 0);
321            if (i < 0)
322                return i;
323            if (!i)
324                break;
325        }
326        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
327               (SRE_CHAR*) state->ptr - ptr));
328        return (SRE_CHAR*) state->ptr - ptr;
329    }
330
331    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
332           ptr - (SRE_CHAR*) state->ptr));
333    return ptr - (SRE_CHAR*) state->ptr;
334}
335
336/* The macros below should be used to protect recursive SRE(match)()
337 * calls that *failed* and do *not* return immediately (IOW, those
338 * that will backtrack). Explaining:
339 *
340 * - Recursive SRE(match)() returned true: that's usually a success
341 *   (besides atypical cases like ASSERT_NOT), therefore there's no
342 *   reason to restore lastmark;
343 *
344 * - Recursive SRE(match)() returned false but the current SRE(match)()
345 *   is returning to the caller: If the current SRE(match)() is the
346 *   top function of the recursion, returning false will be a matching
347 *   failure, and it doesn't matter where lastmark is pointing to.
348 *   If it's *not* the top function, it will be a recursive SRE(match)()
349 *   failure by itself, and the calling SRE(match)() will have to deal
350 *   with the failure by the same rules explained here (it will restore
351 *   lastmark by itself if necessary);
352 *
353 * - Recursive SRE(match)() returned false, and will continue the
354 *   outside 'for' loop: must be protected when breaking, since the next
355 *   OP could potentially depend on lastmark;
356 *
357 * - Recursive SRE(match)() returned false, and will be called again
358 *   inside a local for/while loop: must be protected between each
359 *   loop iteration, since the recursive SRE(match)() could do anything,
360 *   and could potentially depend on lastmark.
361 *
362 * For more information, check the discussion at SF patch #712900.
363 */
364#define LASTMARK_SAVE()     \
365    do { \
366        ctx->lastmark = state->lastmark; \
367        ctx->lastindex = state->lastindex; \
368    } while (0)
369#define LASTMARK_RESTORE()  \
370    do { \
371        state->lastmark = ctx->lastmark; \
372        state->lastindex = ctx->lastindex; \
373    } while (0)
374
375#define RETURN_ERROR(i) do { return i; } while(0)
376#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
377#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
378
379#define RETURN_ON_ERROR(i) \
380    do { if (i < 0) RETURN_ERROR(i); } while (0)
381#define RETURN_ON_SUCCESS(i) \
382    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
383#define RETURN_ON_FAILURE(i) \
384    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
385
386#define DATA_STACK_ALLOC(state, type, ptr) \
387do { \
388    alloc_pos = state->data_stack_base; \
389    TRACE(("allocating %s in %zd (%zd)\n", \
390           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
391    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
392        int j = data_stack_grow(state, sizeof(type)); \
393        if (j < 0) return j; \
394        if (ctx_pos != -1) \
395            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
396    } \
397    ptr = (type*)(state->data_stack+alloc_pos); \
398    state->data_stack_base += sizeof(type); \
399} while (0)
400
401#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
402do { \
403    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
404    ptr = (type*)(state->data_stack+pos); \
405} while (0)
406
407#define DATA_STACK_PUSH(state, data, size) \
408do { \
409    TRACE(("copy data in %p to %zd (%zd)\n", \
410           data, state->data_stack_base, size)); \
411    if (size > state->data_stack_size - state->data_stack_base) { \
412        int j = data_stack_grow(state, size); \
413        if (j < 0) return j; \
414        if (ctx_pos != -1) \
415            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
416    } \
417    memcpy(state->data_stack+state->data_stack_base, data, size); \
418    state->data_stack_base += size; \
419} while (0)
420
421/* We add an explicit cast to memcpy here because MSVC has a bug when
422   compiling C code where it believes that `const void**` cannot be
423   safely casted to `void*`, see bpo-39943 for details. */
424#define DATA_STACK_POP(state, data, size, discard) \
425do { \
426    TRACE(("copy data to %p from %zd (%zd)\n", \
427           data, state->data_stack_base-size, size)); \
428    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
429    if (discard) \
430        state->data_stack_base -= size; \
431} while (0)
432
433#define DATA_STACK_POP_DISCARD(state, size) \
434do { \
435    TRACE(("discard data from %zd (%zd)\n", \
436           state->data_stack_base-size, size)); \
437    state->data_stack_base -= size; \
438} while(0)
439
440#define DATA_PUSH(x) \
441    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
442#define DATA_POP(x) \
443    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
444#define DATA_POP_DISCARD(x) \
445    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
446#define DATA_ALLOC(t,p) \
447    DATA_STACK_ALLOC(state, t, p)
448#define DATA_LOOKUP_AT(t,p,pos) \
449    DATA_STACK_LOOKUP_AT(state,t,p,pos)
450
451#define MARK_PUSH(lastmark) \
452    do if (lastmark >= 0) { \
453        size_t _marks_size = (lastmark+1) * sizeof(void*); \
454        DATA_STACK_PUSH(state, state->mark, _marks_size); \
455    } while (0)
456#define MARK_POP(lastmark) \
457    do if (lastmark >= 0) { \
458        size_t _marks_size = (lastmark+1) * sizeof(void*); \
459        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
460    } while (0)
461#define MARK_POP_KEEP(lastmark) \
462    do if (lastmark >= 0) { \
463        size_t _marks_size = (lastmark+1) * sizeof(void*); \
464        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
465    } while (0)
466#define MARK_POP_DISCARD(lastmark) \
467    do if (lastmark >= 0) { \
468        size_t _marks_size = (lastmark+1) * sizeof(void*); \
469        DATA_STACK_POP_DISCARD(state, _marks_size); \
470    } while (0)
471
472#define JUMP_NONE            0
473#define JUMP_MAX_UNTIL_1     1
474#define JUMP_MAX_UNTIL_2     2
475#define JUMP_MAX_UNTIL_3     3
476#define JUMP_MIN_UNTIL_1     4
477#define JUMP_MIN_UNTIL_2     5
478#define JUMP_MIN_UNTIL_3     6
479#define JUMP_REPEAT          7
480#define JUMP_REPEAT_ONE_1    8
481#define JUMP_REPEAT_ONE_2    9
482#define JUMP_MIN_REPEAT_ONE  10
483#define JUMP_BRANCH          11
484#define JUMP_ASSERT          12
485#define JUMP_ASSERT_NOT      13
486#define JUMP_POSS_REPEAT_1   14
487#define JUMP_POSS_REPEAT_2   15
488#define JUMP_ATOMIC_GROUP    16
489
490#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
491    ctx->pattern = pattern; \
492    ctx->ptr = ptr; \
493    DATA_ALLOC(SRE(match_context), nextctx); \
494    nextctx->pattern = nextpattern; \
495    nextctx->toplevel = toplevel_; \
496    nextctx->jump = jumpvalue; \
497    nextctx->last_ctx_pos = ctx_pos; \
498    pattern = nextpattern; \
499    ctx_pos = alloc_pos; \
500    ctx = nextctx; \
501    goto entrance; \
502    jumplabel: \
503    pattern = ctx->pattern; \
504    ptr = ctx->ptr;
505
506#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
507    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
508
509#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
510    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
511
512typedef struct {
513    Py_ssize_t count;
514    union {
515        SRE_CODE chr;
516        SRE_REPEAT* rep;
517    } u;
518    int lastmark;
519    int lastindex;
520    const SRE_CODE* pattern;
521    const SRE_CHAR* ptr;
522    int toplevel;
523    int jump;
524    Py_ssize_t last_ctx_pos;
525} SRE(match_context);
526
527#define MAYBE_CHECK_SIGNALS                                        \
528    do {                                                           \
529        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
530            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
531        }                                                          \
532    } while (0)
533
534#ifdef HAVE_COMPUTED_GOTOS
535    #ifndef USE_COMPUTED_GOTOS
536    #define USE_COMPUTED_GOTOS 1
537    #endif
538#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
539    #error "Computed gotos are not supported on this compiler."
540#else
541    #undef USE_COMPUTED_GOTOS
542    #define USE_COMPUTED_GOTOS 0
543#endif
544
545#if USE_COMPUTED_GOTOS
546    #define TARGET(OP) TARGET_ ## OP
547    #define DISPATCH                       \
548        do {                               \
549            MAYBE_CHECK_SIGNALS;           \
550            goto *sre_targets[*pattern++]; \
551        } while (0)
552#else
553    #define TARGET(OP) case OP
554    #define DISPATCH goto dispatch
555#endif
556
557/* check if string matches the given pattern.  returns <0 for
558   error, 0 for failure, and 1 for success */
559LOCAL(Py_ssize_t)
560SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
561{
562    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
563    Py_ssize_t alloc_pos, ctx_pos = -1;
564    Py_ssize_t ret = 0;
565    int jump;
566    unsigned int sigcount=0;
567
568    SRE(match_context)* ctx;
569    SRE(match_context)* nextctx;
570
571    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
572
573    DATA_ALLOC(SRE(match_context), ctx);
574    ctx->last_ctx_pos = -1;
575    ctx->jump = JUMP_NONE;
576    ctx->toplevel = toplevel;
577    ctx_pos = alloc_pos;
578
579#if USE_COMPUTED_GOTOS
580#include "sre_targets.h"
581#endif
582
583entrance:
584
585    ;  // Fashion statement.
586    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
587
588    if (pattern[0] == SRE_OP_INFO) {
589        /* optimization info block */
590        /* <INFO> <1=skip> <2=flags> <3=min> ... */
591        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
592            TRACE(("reject (got %zd chars, need %zd)\n",
593                   end - ptr, (Py_ssize_t) pattern[3]));
594            RETURN_FAILURE;
595        }
596        pattern += pattern[1] + 1;
597    }
598
599#if USE_COMPUTED_GOTOS
600    DISPATCH;
601#else
602dispatch:
603    MAYBE_CHECK_SIGNALS;
604    switch (*pattern++)
605#endif
606    {
607
608        TARGET(SRE_OP_MARK):
609            /* set mark */
610            /* <MARK> <gid> */
611            TRACE(("|%p|%p|MARK %d\n", pattern,
612                   ptr, pattern[0]));
613            {
614                int i = pattern[0];
615                if (i & 1)
616                    state->lastindex = i/2 + 1;
617                if (i > state->lastmark) {
618                    /* state->lastmark is the highest valid index in the
619                       state->mark array.  If it is increased by more than 1,
620                       the intervening marks must be set to NULL to signal
621                       that these marks have not been encountered. */
622                    int j = state->lastmark + 1;
623                    while (j < i)
624                        state->mark[j++] = NULL;
625                    state->lastmark = i;
626                }
627                state->mark[i] = ptr;
628            }
629            pattern++;
630            DISPATCH;
631
632        TARGET(SRE_OP_LITERAL):
633            /* match literal string */
634            /* <LITERAL> <code> */
635            TRACE(("|%p|%p|LITERAL %d\n", pattern,
636                   ptr, *pattern));
637            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
638                RETURN_FAILURE;
639            pattern++;
640            ptr++;
641            DISPATCH;
642
643        TARGET(SRE_OP_NOT_LITERAL):
644            /* match anything that is not literal character */
645            /* <NOT_LITERAL> <code> */
646            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
647                   ptr, *pattern));
648            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
649                RETURN_FAILURE;
650            pattern++;
651            ptr++;
652            DISPATCH;
653
654        TARGET(SRE_OP_SUCCESS):
655            /* end of pattern */
656            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
657            if (ctx->toplevel &&
658                ((state->match_all && ptr != state->end) ||
659                 (state->must_advance && ptr == state->start)))
660            {
661                RETURN_FAILURE;
662            }
663            state->ptr = ptr;
664            RETURN_SUCCESS;
665
666        TARGET(SRE_OP_AT):
667            /* match at given position */
668            /* <AT> <code> */
669            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
670            if (!SRE(at)(state, ptr, *pattern))
671                RETURN_FAILURE;
672            pattern++;
673            DISPATCH;
674
675        TARGET(SRE_OP_CATEGORY):
676            /* match at given category */
677            /* <CATEGORY> <code> */
678            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
679                   ptr, *pattern));
680            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
681                RETURN_FAILURE;
682            pattern++;
683            ptr++;
684            DISPATCH;
685
686        TARGET(SRE_OP_ANY):
687            /* match anything (except a newline) */
688            /* <ANY> */
689            TRACE(("|%p|%p|ANY\n", pattern, ptr));
690            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
691                RETURN_FAILURE;
692            ptr++;
693            DISPATCH;
694
695        TARGET(SRE_OP_ANY_ALL):
696            /* match anything */
697            /* <ANY_ALL> */
698            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
699            if (ptr >= end)
700                RETURN_FAILURE;
701            ptr++;
702            DISPATCH;
703
704        TARGET(SRE_OP_IN):
705            /* match set member (or non_member) */
706            /* <IN> <skip> <set> */
707            TRACE(("|%p|%p|IN\n", pattern, ptr));
708            if (ptr >= end ||
709                !SRE(charset)(state, pattern + 1, *ptr))
710                RETURN_FAILURE;
711            pattern += pattern[0];
712            ptr++;
713            DISPATCH;
714
715        TARGET(SRE_OP_LITERAL_IGNORE):
716            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
717                   pattern, ptr, pattern[0]));
718            if (ptr >= end ||
719                sre_lower_ascii(*ptr) != *pattern)
720                RETURN_FAILURE;
721            pattern++;
722            ptr++;
723            DISPATCH;
724
725        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
726            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
727                   pattern, ptr, pattern[0]));
728            if (ptr >= end ||
729                sre_lower_unicode(*ptr) != *pattern)
730                RETURN_FAILURE;
731            pattern++;
732            ptr++;
733            DISPATCH;
734
735        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
736            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
737                   pattern, ptr, pattern[0]));
738            if (ptr >= end
739                || !char_loc_ignore(*pattern, *ptr))
740                RETURN_FAILURE;
741            pattern++;
742            ptr++;
743            DISPATCH;
744
745        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
746            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
747                   pattern, ptr, *pattern));
748            if (ptr >= end ||
749                sre_lower_ascii(*ptr) == *pattern)
750                RETURN_FAILURE;
751            pattern++;
752            ptr++;
753            DISPATCH;
754
755        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
756            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
757                   pattern, ptr, *pattern));
758            if (ptr >= end ||
759                sre_lower_unicode(*ptr) == *pattern)
760                RETURN_FAILURE;
761            pattern++;
762            ptr++;
763            DISPATCH;
764
765        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
766            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
767                   pattern, ptr, *pattern));
768            if (ptr >= end
769                || char_loc_ignore(*pattern, *ptr))
770                RETURN_FAILURE;
771            pattern++;
772            ptr++;
773            DISPATCH;
774
775        TARGET(SRE_OP_IN_IGNORE):
776            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
777            if (ptr >= end
778                || !SRE(charset)(state, pattern+1,
779                                 (SRE_CODE)sre_lower_ascii(*ptr)))
780                RETURN_FAILURE;
781            pattern += pattern[0];
782            ptr++;
783            DISPATCH;
784
785        TARGET(SRE_OP_IN_UNI_IGNORE):
786            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
787            if (ptr >= end
788                || !SRE(charset)(state, pattern+1,
789                                 (SRE_CODE)sre_lower_unicode(*ptr)))
790                RETURN_FAILURE;
791            pattern += pattern[0];
792            ptr++;
793            DISPATCH;
794
795        TARGET(SRE_OP_IN_LOC_IGNORE):
796            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
797            if (ptr >= end
798                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
799                RETURN_FAILURE;
800            pattern += pattern[0];
801            ptr++;
802            DISPATCH;
803
804        TARGET(SRE_OP_JUMP):
805        TARGET(SRE_OP_INFO):
806            /* jump forward */
807            /* <JUMP> <offset> */
808            TRACE(("|%p|%p|JUMP %d\n", pattern,
809                   ptr, pattern[0]));
810            pattern += pattern[0];
811            DISPATCH;
812
813        TARGET(SRE_OP_BRANCH):
814            /* alternation */
815            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
816            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
817            LASTMARK_SAVE();
818            if (state->repeat)
819                MARK_PUSH(ctx->lastmark);
820            for (; pattern[0]; pattern += pattern[0]) {
821                if (pattern[1] == SRE_OP_LITERAL &&
822                    (ptr >= end ||
823                     (SRE_CODE) *ptr != pattern[2]))
824                    continue;
825                if (pattern[1] == SRE_OP_IN &&
826                    (ptr >= end ||
827                     !SRE(charset)(state, pattern + 3,
828                                   (SRE_CODE) *ptr)))
829                    continue;
830                state->ptr = ptr;
831                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
832                if (ret) {
833                    if (state->repeat)
834                        MARK_POP_DISCARD(ctx->lastmark);
835                    RETURN_ON_ERROR(ret);
836                    RETURN_SUCCESS;
837                }
838                if (state->repeat)
839                    MARK_POP_KEEP(ctx->lastmark);
840                LASTMARK_RESTORE();
841            }
842            if (state->repeat)
843                MARK_POP_DISCARD(ctx->lastmark);
844            RETURN_FAILURE;
845
846        TARGET(SRE_OP_REPEAT_ONE):
847            /* match repeated sequence (maximizing regexp) */
848
849            /* this operator only works if the repeated item is
850               exactly one character wide, and we're not already
851               collecting backtracking points.  for other cases,
852               use the MAX_REPEAT operator */
853
854            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
855
856            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
857                   pattern[1], pattern[2]));
858
859            if ((Py_ssize_t) pattern[1] > end - ptr)
860                RETURN_FAILURE; /* cannot match */
861
862            state->ptr = ptr;
863
864            ret = SRE(count)(state, pattern+3, pattern[2]);
865            RETURN_ON_ERROR(ret);
866            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
867            ctx->count = ret;
868            ptr += ctx->count;
869
870            /* when we arrive here, count contains the number of
871               matches, and ptr points to the tail of the target
872               string.  check if the rest of the pattern matches,
873               and backtrack if not. */
874
875            if (ctx->count < (Py_ssize_t) pattern[1])
876                RETURN_FAILURE;
877
878            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
879                ptr == state->end &&
880                !(ctx->toplevel && state->must_advance && ptr == state->start))
881            {
882                /* tail is empty.  we're finished */
883                state->ptr = ptr;
884                RETURN_SUCCESS;
885            }
886
887            LASTMARK_SAVE();
888            if (state->repeat)
889                MARK_PUSH(ctx->lastmark);
890
891            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
892                /* tail starts with a literal. skip positions where
893                   the rest of the pattern cannot possibly match */
894                ctx->u.chr = pattern[pattern[0]+1];
895                for (;;) {
896                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
897                           (ptr >= end || *ptr != ctx->u.chr)) {
898                        ptr--;
899                        ctx->count--;
900                    }
901                    if (ctx->count < (Py_ssize_t) pattern[1])
902                        break;
903                    state->ptr = ptr;
904                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
905                            pattern+pattern[0]);
906                    if (ret) {
907                        if (state->repeat)
908                            MARK_POP_DISCARD(ctx->lastmark);
909                        RETURN_ON_ERROR(ret);
910                        RETURN_SUCCESS;
911                    }
912                    if (state->repeat)
913                        MARK_POP_KEEP(ctx->lastmark);
914                    LASTMARK_RESTORE();
915
916                    ptr--;
917                    ctx->count--;
918                }
919                if (state->repeat)
920                    MARK_POP_DISCARD(ctx->lastmark);
921            } else {
922                /* general case */
923                while (ctx->count >= (Py_ssize_t) pattern[1]) {
924                    state->ptr = ptr;
925                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
926                            pattern+pattern[0]);
927                    if (ret) {
928                        if (state->repeat)
929                            MARK_POP_DISCARD(ctx->lastmark);
930                        RETURN_ON_ERROR(ret);
931                        RETURN_SUCCESS;
932                    }
933                    if (state->repeat)
934                        MARK_POP_KEEP(ctx->lastmark);
935                    LASTMARK_RESTORE();
936
937                    ptr--;
938                    ctx->count--;
939                }
940                if (state->repeat)
941                    MARK_POP_DISCARD(ctx->lastmark);
942            }
943            RETURN_FAILURE;
944
945        TARGET(SRE_OP_MIN_REPEAT_ONE):
946            /* match repeated sequence (minimizing regexp) */
947
948            /* this operator only works if the repeated item is
949               exactly one character wide, and we're not already
950               collecting backtracking points.  for other cases,
951               use the MIN_REPEAT operator */
952
953            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
954
955            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
956                   pattern[1], pattern[2]));
957
958            if ((Py_ssize_t) pattern[1] > end - ptr)
959                RETURN_FAILURE; /* cannot match */
960
961            state->ptr = ptr;
962
963            if (pattern[1] == 0)
964                ctx->count = 0;
965            else {
966                /* count using pattern min as the maximum */
967                ret = SRE(count)(state, pattern+3, pattern[1]);
968                RETURN_ON_ERROR(ret);
969                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
970                if (ret < (Py_ssize_t) pattern[1])
971                    /* didn't match minimum number of times */
972                    RETURN_FAILURE;
973                /* advance past minimum matches of repeat */
974                ctx->count = ret;
975                ptr += ctx->count;
976            }
977
978            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
979                !(ctx->toplevel &&
980                  ((state->match_all && ptr != state->end) ||
981                   (state->must_advance && ptr == state->start))))
982            {
983                /* tail is empty.  we're finished */
984                state->ptr = ptr;
985                RETURN_SUCCESS;
986
987            } else {
988                /* general case */
989                LASTMARK_SAVE();
990                if (state->repeat)
991                    MARK_PUSH(ctx->lastmark);
992
993                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
994                       || ctx->count <= (Py_ssize_t)pattern[2]) {
995                    state->ptr = ptr;
996                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
997                            pattern+pattern[0]);
998                    if (ret) {
999                        if (state->repeat)
1000                            MARK_POP_DISCARD(ctx->lastmark);
1001                        RETURN_ON_ERROR(ret);
1002                        RETURN_SUCCESS;
1003                    }
1004                    if (state->repeat)
1005                        MARK_POP_KEEP(ctx->lastmark);
1006                    LASTMARK_RESTORE();
1007
1008                    state->ptr = ptr;
1009                    ret = SRE(count)(state, pattern+3, 1);
1010                    RETURN_ON_ERROR(ret);
1011                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1012                    if (ret == 0)
1013                        break;
1014                    assert(ret == 1);
1015                    ptr++;
1016                    ctx->count++;
1017                }
1018                if (state->repeat)
1019                    MARK_POP_DISCARD(ctx->lastmark);
1020            }
1021            RETURN_FAILURE;
1022
1023        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1024            /* match repeated sequence (maximizing regexp) without
1025               backtracking */
1026
1027            /* this operator only works if the repeated item is
1028               exactly one character wide, and we're not already
1029               collecting backtracking points.  for other cases,
1030               use the MAX_REPEAT operator */
1031
1032            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1033               tail */
1034
1035            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1036                   ptr, pattern[1], pattern[2]));
1037
1038            if (ptr + pattern[1] > end) {
1039                RETURN_FAILURE; /* cannot match */
1040            }
1041
1042            state->ptr = ptr;
1043
1044            ret = SRE(count)(state, pattern + 3, pattern[2]);
1045            RETURN_ON_ERROR(ret);
1046            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1047            ctx->count = ret;
1048            ptr += ctx->count;
1049
1050            /* when we arrive here, count contains the number of
1051               matches, and ptr points to the tail of the target
1052               string.  check if the rest of the pattern matches,
1053               and fail if not. */
1054
1055            /* Test for not enough repetitions in match */
1056            if (ctx->count < (Py_ssize_t) pattern[1]) {
1057                RETURN_FAILURE;
1058            }
1059
1060            /* Update the pattern to point to the next op code */
1061            pattern += pattern[0];
1062
1063            /* Let the tail be evaluated separately and consider this
1064               match successful. */
1065            if (*pattern == SRE_OP_SUCCESS &&
1066                ptr == state->end &&
1067                !(ctx->toplevel && state->must_advance && ptr == state->start))
1068            {
1069                /* tail is empty.  we're finished */
1070                state->ptr = ptr;
1071                RETURN_SUCCESS;
1072            }
1073
1074            /* Attempt to match the rest of the string */
1075            DISPATCH;
1076
1077        TARGET(SRE_OP_REPEAT):
1078            /* create repeat context.  all the hard work is done
1079               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1080            /* <REPEAT> <skip> <1=min> <2=max>
1081               <3=repeat_index> item <UNTIL> tail */
1082            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1083                   pattern[1], pattern[2]));
1084
1085            /* install new repeat context */
1086            /* TODO(https://github.com/python/cpython/issues/67877): Fix this
1087             * potential memory leak. */
1088            ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
1089            if (!ctx->u.rep) {
1090                PyErr_NoMemory();
1091                RETURN_FAILURE;
1092            }
1093            ctx->u.rep->count = -1;
1094            ctx->u.rep->pattern = pattern;
1095            ctx->u.rep->prev = state->repeat;
1096            ctx->u.rep->last_ptr = NULL;
1097            state->repeat = ctx->u.rep;
1098
1099            state->ptr = ptr;
1100            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1101            state->repeat = ctx->u.rep->prev;
1102            PyObject_Free(ctx->u.rep);
1103
1104            if (ret) {
1105                RETURN_ON_ERROR(ret);
1106                RETURN_SUCCESS;
1107            }
1108            RETURN_FAILURE;
1109
1110        TARGET(SRE_OP_MAX_UNTIL):
1111            /* maximizing repeat */
1112            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1113
1114            /* FIXME: we probably need to deal with zero-width
1115               matches in here... */
1116
1117            ctx->u.rep = state->repeat;
1118            if (!ctx->u.rep)
1119                RETURN_ERROR(SRE_ERROR_STATE);
1120
1121            state->ptr = ptr;
1122
1123            ctx->count = ctx->u.rep->count+1;
1124
1125            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1126                   ptr, ctx->count));
1127
1128            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1129                /* not enough matches */
1130                ctx->u.rep->count = ctx->count;
1131                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1132                        ctx->u.rep->pattern+3);
1133                if (ret) {
1134                    RETURN_ON_ERROR(ret);
1135                    RETURN_SUCCESS;
1136                }
1137                ctx->u.rep->count = ctx->count-1;
1138                state->ptr = ptr;
1139                RETURN_FAILURE;
1140            }
1141
1142            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1143                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1144                state->ptr != ctx->u.rep->last_ptr) {
1145                /* we may have enough matches, but if we can
1146                   match another item, do so */
1147                ctx->u.rep->count = ctx->count;
1148                LASTMARK_SAVE();
1149                MARK_PUSH(ctx->lastmark);
1150                /* zero-width match protection */
1151                DATA_PUSH(&ctx->u.rep->last_ptr);
1152                ctx->u.rep->last_ptr = state->ptr;
1153                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1154                        ctx->u.rep->pattern+3);
1155                DATA_POP(&ctx->u.rep->last_ptr);
1156                if (ret) {
1157                    MARK_POP_DISCARD(ctx->lastmark);
1158                    RETURN_ON_ERROR(ret);
1159                    RETURN_SUCCESS;
1160                }
1161                MARK_POP(ctx->lastmark);
1162                LASTMARK_RESTORE();
1163                ctx->u.rep->count = ctx->count-1;
1164                state->ptr = ptr;
1165            }
1166
1167            /* cannot match more repeated items here.  make sure the
1168               tail matches */
1169            state->repeat = ctx->u.rep->prev;
1170            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1171            state->repeat = ctx->u.rep; // restore repeat before return
1172
1173            RETURN_ON_SUCCESS(ret);
1174            state->ptr = ptr;
1175            RETURN_FAILURE;
1176
1177        TARGET(SRE_OP_MIN_UNTIL):
1178            /* minimizing repeat */
1179            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1180
1181            ctx->u.rep = state->repeat;
1182            if (!ctx->u.rep)
1183                RETURN_ERROR(SRE_ERROR_STATE);
1184
1185            state->ptr = ptr;
1186
1187            ctx->count = ctx->u.rep->count+1;
1188
1189            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1190                   ptr, ctx->count, ctx->u.rep->pattern));
1191
1192            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1193                /* not enough matches */
1194                ctx->u.rep->count = ctx->count;
1195                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1196                        ctx->u.rep->pattern+3);
1197                if (ret) {
1198                    RETURN_ON_ERROR(ret);
1199                    RETURN_SUCCESS;
1200                }
1201                ctx->u.rep->count = ctx->count-1;
1202                state->ptr = ptr;
1203                RETURN_FAILURE;
1204            }
1205
1206            /* see if the tail matches */
1207            state->repeat = ctx->u.rep->prev;
1208
1209            LASTMARK_SAVE();
1210            if (state->repeat)
1211                MARK_PUSH(ctx->lastmark);
1212
1213            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1214            SRE_REPEAT *repeat_of_tail = state->repeat;
1215            state->repeat = ctx->u.rep; // restore repeat before return
1216
1217            if (ret) {
1218                if (repeat_of_tail)
1219                    MARK_POP_DISCARD(ctx->lastmark);
1220                RETURN_ON_ERROR(ret);
1221                RETURN_SUCCESS;
1222            }
1223            if (repeat_of_tail)
1224                MARK_POP(ctx->lastmark);
1225            LASTMARK_RESTORE();
1226
1227            state->ptr = ptr;
1228
1229            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1230                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1231                state->ptr == ctx->u.rep->last_ptr)
1232                RETURN_FAILURE;
1233
1234            ctx->u.rep->count = ctx->count;
1235            /* zero-width match protection */
1236            DATA_PUSH(&ctx->u.rep->last_ptr);
1237            ctx->u.rep->last_ptr = state->ptr;
1238            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1239                    ctx->u.rep->pattern+3);
1240            DATA_POP(&ctx->u.rep->last_ptr);
1241            if (ret) {
1242                RETURN_ON_ERROR(ret);
1243                RETURN_SUCCESS;
1244            }
1245            ctx->u.rep->count = ctx->count-1;
1246            state->ptr = ptr;
1247            RETURN_FAILURE;
1248
1249        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1250            /* create possessive repeat contexts. */
1251            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1252               <SUCCESS> tail */
1253            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1254                   ptr, pattern[1], pattern[2]));
1255
1256            /* Set the global Input pointer to this context's Input
1257               pointer */
1258            state->ptr = ptr;
1259
1260            /* Initialize Count to 0 */
1261            ctx->count = 0;
1262
1263            /* Check for minimum required matches. */
1264            while (ctx->count < (Py_ssize_t)pattern[1]) {
1265                /* not enough matches */
1266                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1267                         &pattern[3]);
1268                if (ret) {
1269                    RETURN_ON_ERROR(ret);
1270                    ctx->count++;
1271                }
1272                else {
1273                    state->ptr = ptr;
1274                    RETURN_FAILURE;
1275                }
1276            }
1277
1278            /* Clear the context's Input stream pointer so that it
1279               doesn't match the global state so that the while loop can
1280               be entered. */
1281            ptr = NULL;
1282
1283            /* Keep trying to parse the <pattern> sub-pattern until the
1284               end is reached, creating a new context each time. */
1285            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1286                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1287                   state->ptr != ptr) {
1288                /* Save the Capture Group Marker state into the current
1289                   Context and back up the current highest number
1290                   Capture Group marker. */
1291                LASTMARK_SAVE();
1292                MARK_PUSH(ctx->lastmark);
1293
1294                /* zero-width match protection */
1295                /* Set the context's Input Stream pointer to be the
1296                   current Input Stream pointer from the global
1297                   state.  When the loop reaches the next iteration,
1298                   the context will then store the last known good
1299                   position with the global state holding the Input
1300                   Input Stream position that has been updated with
1301                   the most recent match.  Thus, if state's Input
1302                   stream remains the same as the one stored in the
1303                   current Context, we know we have successfully
1304                   matched an empty string and that all subsequent
1305                   matches will also be the empty string until the
1306                   maximum number of matches are counted, and because
1307                   of this, we could immediately stop at that point and
1308                   consider this match successful. */
1309                ptr = state->ptr;
1310
1311                /* We have not reached the maximin matches, so try to
1312                   match once more. */
1313                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1314                         &pattern[3]);
1315
1316                /* Check to see if the last attempted match
1317                   succeeded. */
1318                if (ret) {
1319                    /* Drop the saved highest number Capture Group
1320                       marker saved above and use the newly updated
1321                       value. */
1322                    MARK_POP_DISCARD(ctx->lastmark);
1323                    RETURN_ON_ERROR(ret);
1324
1325                    /* Success, increment the count. */
1326                    ctx->count++;
1327                }
1328                /* Last attempted match failed. */
1329                else {
1330                    /* Restore the previously saved highest number
1331                       Capture Group marker since the last iteration
1332                       did not match, then restore that to the global
1333                       state. */
1334                    MARK_POP(ctx->lastmark);
1335                    LASTMARK_RESTORE();
1336
1337                    /* We have sufficient matches, so exit loop. */
1338                    break;
1339                }
1340            }
1341
1342            /* Evaluate Tail */
1343            /* Jump to end of pattern indicated by skip, and then skip
1344               the SUCCESS op code that follows it. */
1345            pattern += pattern[0] + 1;
1346            ptr = state->ptr;
1347            DISPATCH;
1348
1349        TARGET(SRE_OP_ATOMIC_GROUP):
1350            /* Atomic Group Sub Pattern */
1351            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1352            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1353
1354            /* Set the global Input pointer to this context's Input
1355               pointer */
1356            state->ptr = ptr;
1357
1358            /* Evaluate the Atomic Group in a new context, terminating
1359               when the end of the group, represented by a SUCCESS op
1360               code, is reached. */
1361            /* Group Pattern begins at an offset of 1 code. */
1362            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1363                     &pattern[1]);
1364
1365            /* Test Exit Condition */
1366            RETURN_ON_ERROR(ret);
1367
1368            if (ret == 0) {
1369                /* Atomic Group failed to Match. */
1370                state->ptr = ptr;
1371                RETURN_FAILURE;
1372            }
1373
1374            /* Evaluate Tail */
1375            /* Jump to end of pattern indicated by skip, and then skip
1376               the SUCCESS op code that follows it. */
1377            pattern += pattern[0];
1378            ptr = state->ptr;
1379            DISPATCH;
1380
1381        TARGET(SRE_OP_GROUPREF):
1382            /* match backreference */
1383            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1384                   ptr, pattern[0]));
1385            {
1386                int groupref = pattern[0] * 2;
1387                if (groupref >= state->lastmark) {
1388                    RETURN_FAILURE;
1389                } else {
1390                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1391                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1392                    if (!p || !e || e < p)
1393                        RETURN_FAILURE;
1394                    while (p < e) {
1395                        if (ptr >= end || *ptr != *p)
1396                            RETURN_FAILURE;
1397                        p++;
1398                        ptr++;
1399                    }
1400                }
1401            }
1402            pattern++;
1403            DISPATCH;
1404
1405        TARGET(SRE_OP_GROUPREF_IGNORE):
1406            /* match backreference */
1407            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1408                   ptr, pattern[0]));
1409            {
1410                int groupref = pattern[0] * 2;
1411                if (groupref >= state->lastmark) {
1412                    RETURN_FAILURE;
1413                } else {
1414                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1415                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1416                    if (!p || !e || e < p)
1417                        RETURN_FAILURE;
1418                    while (p < e) {
1419                        if (ptr >= end ||
1420                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1421                            RETURN_FAILURE;
1422                        p++;
1423                        ptr++;
1424                    }
1425                }
1426            }
1427            pattern++;
1428            DISPATCH;
1429
1430        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1431            /* match backreference */
1432            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1433                   ptr, pattern[0]));
1434            {
1435                int groupref = pattern[0] * 2;
1436                if (groupref >= state->lastmark) {
1437                    RETURN_FAILURE;
1438                } else {
1439                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1440                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1441                    if (!p || !e || e < p)
1442                        RETURN_FAILURE;
1443                    while (p < e) {
1444                        if (ptr >= end ||
1445                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1446                            RETURN_FAILURE;
1447                        p++;
1448                        ptr++;
1449                    }
1450                }
1451            }
1452            pattern++;
1453            DISPATCH;
1454
1455        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1456            /* match backreference */
1457            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1458                   ptr, pattern[0]));
1459            {
1460                int groupref = pattern[0] * 2;
1461                if (groupref >= state->lastmark) {
1462                    RETURN_FAILURE;
1463                } else {
1464                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1465                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1466                    if (!p || !e || e < p)
1467                        RETURN_FAILURE;
1468                    while (p < e) {
1469                        if (ptr >= end ||
1470                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1471                            RETURN_FAILURE;
1472                        p++;
1473                        ptr++;
1474                    }
1475                }
1476            }
1477            pattern++;
1478            DISPATCH;
1479
1480        TARGET(SRE_OP_GROUPREF_EXISTS):
1481            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1482                   ptr, pattern[0]));
1483            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1484            {
1485                int groupref = pattern[0] * 2;
1486                if (groupref >= state->lastmark) {
1487                    pattern += pattern[1];
1488                    DISPATCH;
1489                } else {
1490                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1491                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1492                    if (!p || !e || e < p) {
1493                        pattern += pattern[1];
1494                        DISPATCH;
1495                    }
1496                }
1497            }
1498            pattern += 2;
1499            DISPATCH;
1500
1501        TARGET(SRE_OP_ASSERT):
1502            /* assert subpattern */
1503            /* <ASSERT> <skip> <back> <pattern> */
1504            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1505                   ptr, pattern[1]));
1506            if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1])
1507                RETURN_FAILURE;
1508            state->ptr = ptr - pattern[1];
1509            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1510            RETURN_ON_FAILURE(ret);
1511            pattern += pattern[0];
1512            DISPATCH;
1513
1514        TARGET(SRE_OP_ASSERT_NOT):
1515            /* assert not subpattern */
1516            /* <ASSERT_NOT> <skip> <back> <pattern> */
1517            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1518                   ptr, pattern[1]));
1519            if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) {
1520                state->ptr = ptr - pattern[1];
1521                LASTMARK_SAVE();
1522                if (state->repeat)
1523                    MARK_PUSH(ctx->lastmark);
1524
1525                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1526                if (ret) {
1527                    if (state->repeat)
1528                        MARK_POP_DISCARD(ctx->lastmark);
1529                    RETURN_ON_ERROR(ret);
1530                    RETURN_FAILURE;
1531                }
1532                if (state->repeat)
1533                    MARK_POP(ctx->lastmark);
1534                LASTMARK_RESTORE();
1535            }
1536            pattern += pattern[0];
1537            DISPATCH;
1538
1539        TARGET(SRE_OP_FAILURE):
1540            /* immediate failure */
1541            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1542            RETURN_FAILURE;
1543
1544#if !USE_COMPUTED_GOTOS
1545        default:
1546#endif
1547        // Also any unused opcodes:
1548        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1549        TARGET(SRE_OP_SUBPATTERN):
1550        TARGET(SRE_OP_RANGE):
1551        TARGET(SRE_OP_NEGATE):
1552        TARGET(SRE_OP_BIGCHARSET):
1553        TARGET(SRE_OP_CHARSET):
1554            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1555                   pattern[-1]));
1556            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1557
1558    }
1559
1560exit:
1561    ctx_pos = ctx->last_ctx_pos;
1562    jump = ctx->jump;
1563    DATA_POP_DISCARD(ctx);
1564    if (ctx_pos == -1)
1565        return ret;
1566    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1567
1568    switch (jump) {
1569        case JUMP_MAX_UNTIL_2:
1570            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1571            goto jump_max_until_2;
1572        case JUMP_MAX_UNTIL_3:
1573            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1574            goto jump_max_until_3;
1575        case JUMP_MIN_UNTIL_2:
1576            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1577            goto jump_min_until_2;
1578        case JUMP_MIN_UNTIL_3:
1579            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1580            goto jump_min_until_3;
1581        case JUMP_BRANCH:
1582            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1583            goto jump_branch;
1584        case JUMP_MAX_UNTIL_1:
1585            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1586            goto jump_max_until_1;
1587        case JUMP_MIN_UNTIL_1:
1588            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1589            goto jump_min_until_1;
1590        case JUMP_POSS_REPEAT_1:
1591            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1592            goto jump_poss_repeat_1;
1593        case JUMP_POSS_REPEAT_2:
1594            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1595            goto jump_poss_repeat_2;
1596        case JUMP_REPEAT:
1597            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1598            goto jump_repeat;
1599        case JUMP_REPEAT_ONE_1:
1600            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1601            goto jump_repeat_one_1;
1602        case JUMP_REPEAT_ONE_2:
1603            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1604            goto jump_repeat_one_2;
1605        case JUMP_MIN_REPEAT_ONE:
1606            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1607            goto jump_min_repeat_one;
1608        case JUMP_ATOMIC_GROUP:
1609            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1610            goto jump_atomic_group;
1611        case JUMP_ASSERT:
1612            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1613            goto jump_assert;
1614        case JUMP_ASSERT_NOT:
1615            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1616            goto jump_assert_not;
1617        case JUMP_NONE:
1618            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1619                   ptr, ret));
1620            break;
1621    }
1622
1623    return ret; /* should never get here */
1624}
1625
1626/* need to reset capturing groups between two SRE(match) callings in loops */
1627#define RESET_CAPTURE_GROUP() \
1628    do { state->lastmark = state->lastindex = -1; } while (0)
1629
1630LOCAL(Py_ssize_t)
1631SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1632{
1633    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1634    SRE_CHAR* end = (SRE_CHAR *)state->end;
1635    Py_ssize_t status = 0;
1636    Py_ssize_t prefix_len = 0;
1637    Py_ssize_t prefix_skip = 0;
1638    SRE_CODE* prefix = NULL;
1639    SRE_CODE* charset = NULL;
1640    SRE_CODE* overlap = NULL;
1641    int flags = 0;
1642
1643    if (ptr > end)
1644        return 0;
1645
1646    if (pattern[0] == SRE_OP_INFO) {
1647        /* optimization info block */
1648        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1649
1650        flags = pattern[2];
1651
1652        if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1653            TRACE(("reject (got %u chars, need %u)\n",
1654                   (unsigned int)(end - ptr), pattern[3]));
1655            return 0;
1656        }
1657        if (pattern[3] > 1) {
1658            /* adjust end point (but make sure we leave at least one
1659               character in there, so literal search will work) */
1660            end -= pattern[3] - 1;
1661            if (end <= ptr)
1662                end = ptr;
1663        }
1664
1665        if (flags & SRE_INFO_PREFIX) {
1666            /* pattern starts with a known prefix */
1667            /* <length> <skip> <prefix data> <overlap data> */
1668            prefix_len = pattern[5];
1669            prefix_skip = pattern[6];
1670            prefix = pattern + 7;
1671            overlap = prefix + prefix_len - 1;
1672        } else if (flags & SRE_INFO_CHARSET)
1673            /* pattern starts with a character from a known set */
1674            /* <charset> */
1675            charset = pattern + 5;
1676
1677        pattern += 1 + pattern[1];
1678    }
1679
1680    TRACE(("prefix = %p %zd %zd\n",
1681           prefix, prefix_len, prefix_skip));
1682    TRACE(("charset = %p\n", charset));
1683
1684    if (prefix_len == 1) {
1685        /* pattern starts with a literal character */
1686        SRE_CHAR c = (SRE_CHAR) prefix[0];
1687#if SIZEOF_SRE_CHAR < 4
1688        if ((SRE_CODE) c != prefix[0])
1689            return 0; /* literal can't match: doesn't fit in char width */
1690#endif
1691        end = (SRE_CHAR *)state->end;
1692        state->must_advance = 0;
1693        while (ptr < end) {
1694            while (*ptr != c) {
1695                if (++ptr >= end)
1696                    return 0;
1697            }
1698            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1699            state->start = ptr;
1700            state->ptr = ptr + prefix_skip;
1701            if (flags & SRE_INFO_LITERAL)
1702                return 1; /* we got all of it */
1703            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1704            if (status != 0)
1705                return status;
1706            ++ptr;
1707            RESET_CAPTURE_GROUP();
1708        }
1709        return 0;
1710    }
1711
1712    if (prefix_len > 1) {
1713        /* pattern starts with a known prefix.  use the overlap
1714           table to skip forward as fast as we possibly can */
1715        Py_ssize_t i = 0;
1716
1717        end = (SRE_CHAR *)state->end;
1718        if (prefix_len > end - ptr)
1719            return 0;
1720#if SIZEOF_SRE_CHAR < 4
1721        for (i = 0; i < prefix_len; i++)
1722            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1723                return 0; /* literal can't match: doesn't fit in char width */
1724#endif
1725        while (ptr < end) {
1726            SRE_CHAR c = (SRE_CHAR) prefix[0];
1727            while (*ptr++ != c) {
1728                if (ptr >= end)
1729                    return 0;
1730            }
1731            if (ptr >= end)
1732                return 0;
1733
1734            i = 1;
1735            state->must_advance = 0;
1736            do {
1737                if (*ptr == (SRE_CHAR) prefix[i]) {
1738                    if (++i != prefix_len) {
1739                        if (++ptr >= end)
1740                            return 0;
1741                        continue;
1742                    }
1743                    /* found a potential match */
1744                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1745                    state->start = ptr - (prefix_len - 1);
1746                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1747                    if (flags & SRE_INFO_LITERAL)
1748                        return 1; /* we got all of it */
1749                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1750                    if (status != 0)
1751                        return status;
1752                    /* close but no cigar -- try again */
1753                    if (++ptr >= end)
1754                        return 0;
1755                    RESET_CAPTURE_GROUP();
1756                }
1757                i = overlap[i];
1758            } while (i != 0);
1759        }
1760        return 0;
1761    }
1762
1763    if (charset) {
1764        /* pattern starts with a character from a known set */
1765        end = (SRE_CHAR *)state->end;
1766        state->must_advance = 0;
1767        for (;;) {
1768            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1769                ptr++;
1770            if (ptr >= end)
1771                return 0;
1772            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1773            state->start = ptr;
1774            state->ptr = ptr;
1775            status = SRE(match)(state, pattern, 0);
1776            if (status != 0)
1777                break;
1778            ptr++;
1779            RESET_CAPTURE_GROUP();
1780        }
1781    } else {
1782        /* general case */
1783        assert(ptr <= end);
1784        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1785        state->start = state->ptr = ptr;
1786        status = SRE(match)(state, pattern, 1);
1787        state->must_advance = 0;
1788        if (status == 0 && pattern[0] == SRE_OP_AT &&
1789            (pattern[1] == SRE_AT_BEGINNING ||
1790             pattern[1] == SRE_AT_BEGINNING_STRING))
1791        {
1792            state->start = state->ptr = ptr = end;
1793            return 0;
1794        }
1795        while (status == 0 && ptr < end) {
1796            ptr++;
1797            RESET_CAPTURE_GROUP();
1798            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1799            state->start = state->ptr = ptr;
1800            status = SRE(match)(state, pattern, 0);
1801        }
1802    }
1803
1804    return status;
1805}
1806
1807#undef SRE_CHAR
1808#undef SIZEOF_SRE_CHAR
1809#undef SRE
1810
1811/* vim:ts=4:sw=4:et
1812*/
1813