17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci * Secret Labs' Regular Expression Engine 37db96d56Sopenharmony_ci * 47db96d56Sopenharmony_ci * regular expression matching engine 57db96d56Sopenharmony_ci * 67db96d56Sopenharmony_ci * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 77db96d56Sopenharmony_ci * 87db96d56Sopenharmony_ci * See the sre.c file for information on usage and redistribution. 97db96d56Sopenharmony_ci */ 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci/* String matching engine */ 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci/* This file is included three times, with different character settings */ 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_ciLOCAL(int) 167db96d56Sopenharmony_ciSRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) 177db96d56Sopenharmony_ci{ 187db96d56Sopenharmony_ci /* check if pointer is at given position */ 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci Py_ssize_t thisp, thatp; 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci switch (at) { 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci case SRE_AT_BEGINNING: 257db96d56Sopenharmony_ci case SRE_AT_BEGINNING_STRING: 267db96d56Sopenharmony_ci return ((void*) ptr == state->beginning); 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci case SRE_AT_BEGINNING_LINE: 297db96d56Sopenharmony_ci return ((void*) ptr == state->beginning || 307db96d56Sopenharmony_ci SRE_IS_LINEBREAK((int) ptr[-1])); 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci case SRE_AT_END: 337db96d56Sopenharmony_ci return (((SRE_CHAR *)state->end - ptr == 1 && 347db96d56Sopenharmony_ci SRE_IS_LINEBREAK((int) ptr[0])) || 357db96d56Sopenharmony_ci ((void*) ptr == state->end)); 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci case SRE_AT_END_LINE: 387db96d56Sopenharmony_ci return ((void*) ptr == state->end || 397db96d56Sopenharmony_ci SRE_IS_LINEBREAK((int) ptr[0])); 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci case SRE_AT_END_STRING: 427db96d56Sopenharmony_ci return ((void*) ptr == state->end); 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci case SRE_AT_BOUNDARY: 457db96d56Sopenharmony_ci if (state->beginning == state->end) 467db96d56Sopenharmony_ci return 0; 477db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 487db96d56Sopenharmony_ci SRE_IS_WORD((int) ptr[-1]) : 0; 497db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 507db96d56Sopenharmony_ci SRE_IS_WORD((int) ptr[0]) : 0; 517db96d56Sopenharmony_ci return thisp != thatp; 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci case SRE_AT_NON_BOUNDARY: 547db96d56Sopenharmony_ci if (state->beginning == state->end) 557db96d56Sopenharmony_ci return 0; 567db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 577db96d56Sopenharmony_ci SRE_IS_WORD((int) ptr[-1]) : 0; 587db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 597db96d56Sopenharmony_ci SRE_IS_WORD((int) ptr[0]) : 0; 607db96d56Sopenharmony_ci return thisp == thatp; 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci case SRE_AT_LOC_BOUNDARY: 637db96d56Sopenharmony_ci if (state->beginning == state->end) 647db96d56Sopenharmony_ci return 0; 657db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 667db96d56Sopenharmony_ci SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 677db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 687db96d56Sopenharmony_ci SRE_LOC_IS_WORD((int) ptr[0]) : 0; 697db96d56Sopenharmony_ci return thisp != thatp; 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_ci case SRE_AT_LOC_NON_BOUNDARY: 727db96d56Sopenharmony_ci if (state->beginning == state->end) 737db96d56Sopenharmony_ci return 0; 747db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 757db96d56Sopenharmony_ci SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 767db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 777db96d56Sopenharmony_ci SRE_LOC_IS_WORD((int) ptr[0]) : 0; 787db96d56Sopenharmony_ci return thisp == thatp; 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci case SRE_AT_UNI_BOUNDARY: 817db96d56Sopenharmony_ci if (state->beginning == state->end) 827db96d56Sopenharmony_ci return 0; 837db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 847db96d56Sopenharmony_ci SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 857db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 867db96d56Sopenharmony_ci SRE_UNI_IS_WORD((int) ptr[0]) : 0; 877db96d56Sopenharmony_ci return thisp != thatp; 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci case SRE_AT_UNI_NON_BOUNDARY: 907db96d56Sopenharmony_ci if (state->beginning == state->end) 917db96d56Sopenharmony_ci return 0; 927db96d56Sopenharmony_ci thatp = ((void*) ptr > state->beginning) ? 937db96d56Sopenharmony_ci SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 947db96d56Sopenharmony_ci thisp = ((void*) ptr < state->end) ? 957db96d56Sopenharmony_ci SRE_UNI_IS_WORD((int) ptr[0]) : 0; 967db96d56Sopenharmony_ci return thisp == thatp; 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci } 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_ci return 0; 1017db96d56Sopenharmony_ci} 1027db96d56Sopenharmony_ci 1037db96d56Sopenharmony_ciLOCAL(int) 1047db96d56Sopenharmony_ciSRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) 1057db96d56Sopenharmony_ci{ 1067db96d56Sopenharmony_ci /* check if character is a member of the given set */ 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci int ok = 1; 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci for (;;) { 1117db96d56Sopenharmony_ci switch (*set++) { 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci case SRE_OP_FAILURE: 1147db96d56Sopenharmony_ci return !ok; 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_ci case SRE_OP_LITERAL: 1177db96d56Sopenharmony_ci /* <LITERAL> <code> */ 1187db96d56Sopenharmony_ci if (ch == set[0]) 1197db96d56Sopenharmony_ci return ok; 1207db96d56Sopenharmony_ci set++; 1217db96d56Sopenharmony_ci break; 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci case SRE_OP_CATEGORY: 1247db96d56Sopenharmony_ci /* <CATEGORY> <code> */ 1257db96d56Sopenharmony_ci if (sre_category(set[0], (int) ch)) 1267db96d56Sopenharmony_ci return ok; 1277db96d56Sopenharmony_ci set++; 1287db96d56Sopenharmony_ci break; 1297db96d56Sopenharmony_ci 1307db96d56Sopenharmony_ci case SRE_OP_CHARSET: 1317db96d56Sopenharmony_ci /* <CHARSET> <bitmap> */ 1327db96d56Sopenharmony_ci if (ch < 256 && 1337db96d56Sopenharmony_ci (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) 1347db96d56Sopenharmony_ci return ok; 1357db96d56Sopenharmony_ci set += 256/SRE_CODE_BITS; 1367db96d56Sopenharmony_ci break; 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci case SRE_OP_RANGE: 1397db96d56Sopenharmony_ci /* <RANGE> <lower> <upper> */ 1407db96d56Sopenharmony_ci if (set[0] <= ch && ch <= set[1]) 1417db96d56Sopenharmony_ci return ok; 1427db96d56Sopenharmony_ci set += 2; 1437db96d56Sopenharmony_ci break; 1447db96d56Sopenharmony_ci 1457db96d56Sopenharmony_ci case SRE_OP_RANGE_UNI_IGNORE: 1467db96d56Sopenharmony_ci /* <RANGE_UNI_IGNORE> <lower> <upper> */ 1477db96d56Sopenharmony_ci { 1487db96d56Sopenharmony_ci SRE_CODE uch; 1497db96d56Sopenharmony_ci /* ch is already lower cased */ 1507db96d56Sopenharmony_ci if (set[0] <= ch && ch <= set[1]) 1517db96d56Sopenharmony_ci return ok; 1527db96d56Sopenharmony_ci uch = sre_upper_unicode(ch); 1537db96d56Sopenharmony_ci if (set[0] <= uch && uch <= set[1]) 1547db96d56Sopenharmony_ci return ok; 1557db96d56Sopenharmony_ci set += 2; 1567db96d56Sopenharmony_ci break; 1577db96d56Sopenharmony_ci } 1587db96d56Sopenharmony_ci 1597db96d56Sopenharmony_ci case SRE_OP_NEGATE: 1607db96d56Sopenharmony_ci ok = !ok; 1617db96d56Sopenharmony_ci break; 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci case SRE_OP_BIGCHARSET: 1647db96d56Sopenharmony_ci /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ 1657db96d56Sopenharmony_ci { 1667db96d56Sopenharmony_ci Py_ssize_t count, block; 1677db96d56Sopenharmony_ci count = *(set++); 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci if (ch < 0x10000u) 1707db96d56Sopenharmony_ci block = ((unsigned char*)set)[ch >> 8]; 1717db96d56Sopenharmony_ci else 1727db96d56Sopenharmony_ci block = -1; 1737db96d56Sopenharmony_ci set += 256/sizeof(SRE_CODE); 1747db96d56Sopenharmony_ci if (block >=0 && 1757db96d56Sopenharmony_ci (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & 1767db96d56Sopenharmony_ci (1u << (ch & (SRE_CODE_BITS-1))))) 1777db96d56Sopenharmony_ci return ok; 1787db96d56Sopenharmony_ci set += count * (256/SRE_CODE_BITS); 1797db96d56Sopenharmony_ci break; 1807db96d56Sopenharmony_ci } 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_ci default: 1837db96d56Sopenharmony_ci /* internal error -- there's not much we can do about it 1847db96d56Sopenharmony_ci here, so let's just pretend it didn't match... */ 1857db96d56Sopenharmony_ci return 0; 1867db96d56Sopenharmony_ci } 1877db96d56Sopenharmony_ci } 1887db96d56Sopenharmony_ci} 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ciLOCAL(int) 1917db96d56Sopenharmony_ciSRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) 1927db96d56Sopenharmony_ci{ 1937db96d56Sopenharmony_ci SRE_CODE lo, up; 1947db96d56Sopenharmony_ci lo = sre_lower_locale(ch); 1957db96d56Sopenharmony_ci if (SRE(charset)(state, set, lo)) 1967db96d56Sopenharmony_ci return 1; 1977db96d56Sopenharmony_ci 1987db96d56Sopenharmony_ci up = sre_upper_locale(ch); 1997db96d56Sopenharmony_ci return up != lo && SRE(charset)(state, set, up); 2007db96d56Sopenharmony_ci} 2017db96d56Sopenharmony_ci 2027db96d56Sopenharmony_ciLOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); 2037db96d56Sopenharmony_ci 2047db96d56Sopenharmony_ciLOCAL(Py_ssize_t) 2057db96d56Sopenharmony_ciSRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) 2067db96d56Sopenharmony_ci{ 2077db96d56Sopenharmony_ci SRE_CODE chr; 2087db96d56Sopenharmony_ci SRE_CHAR c; 2097db96d56Sopenharmony_ci const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; 2107db96d56Sopenharmony_ci const SRE_CHAR* end = (const SRE_CHAR *)state->end; 2117db96d56Sopenharmony_ci Py_ssize_t i; 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_ci /* adjust end */ 2147db96d56Sopenharmony_ci if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) 2157db96d56Sopenharmony_ci end = ptr + maxcount; 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci switch (pattern[0]) { 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci case SRE_OP_IN: 2207db96d56Sopenharmony_ci /* repeated set */ 2217db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); 2227db96d56Sopenharmony_ci while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) 2237db96d56Sopenharmony_ci ptr++; 2247db96d56Sopenharmony_ci break; 2257db96d56Sopenharmony_ci 2267db96d56Sopenharmony_ci case SRE_OP_ANY: 2277db96d56Sopenharmony_ci /* repeated dot wildcard. */ 2287db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); 2297db96d56Sopenharmony_ci while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) 2307db96d56Sopenharmony_ci ptr++; 2317db96d56Sopenharmony_ci break; 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci case SRE_OP_ANY_ALL: 2347db96d56Sopenharmony_ci /* repeated dot wildcard. skip to the end of the target 2357db96d56Sopenharmony_ci string, and backtrack from there */ 2367db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); 2377db96d56Sopenharmony_ci ptr = end; 2387db96d56Sopenharmony_ci break; 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci case SRE_OP_LITERAL: 2417db96d56Sopenharmony_ci /* repeated literal */ 2427db96d56Sopenharmony_ci chr = pattern[1]; 2437db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); 2447db96d56Sopenharmony_ci c = (SRE_CHAR) chr; 2457db96d56Sopenharmony_ci#if SIZEOF_SRE_CHAR < 4 2467db96d56Sopenharmony_ci if ((SRE_CODE) c != chr) 2477db96d56Sopenharmony_ci ; /* literal can't match: doesn't fit in char width */ 2487db96d56Sopenharmony_ci else 2497db96d56Sopenharmony_ci#endif 2507db96d56Sopenharmony_ci while (ptr < end && *ptr == c) 2517db96d56Sopenharmony_ci ptr++; 2527db96d56Sopenharmony_ci break; 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci case SRE_OP_LITERAL_IGNORE: 2557db96d56Sopenharmony_ci /* repeated literal */ 2567db96d56Sopenharmony_ci chr = pattern[1]; 2577db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); 2587db96d56Sopenharmony_ci while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) 2597db96d56Sopenharmony_ci ptr++; 2607db96d56Sopenharmony_ci break; 2617db96d56Sopenharmony_ci 2627db96d56Sopenharmony_ci case SRE_OP_LITERAL_UNI_IGNORE: 2637db96d56Sopenharmony_ci /* repeated literal */ 2647db96d56Sopenharmony_ci chr = pattern[1]; 2657db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); 2667db96d56Sopenharmony_ci while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) 2677db96d56Sopenharmony_ci ptr++; 2687db96d56Sopenharmony_ci break; 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ci case SRE_OP_LITERAL_LOC_IGNORE: 2717db96d56Sopenharmony_ci /* repeated literal */ 2727db96d56Sopenharmony_ci chr = pattern[1]; 2737db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); 2747db96d56Sopenharmony_ci while (ptr < end && char_loc_ignore(chr, *ptr)) 2757db96d56Sopenharmony_ci ptr++; 2767db96d56Sopenharmony_ci break; 2777db96d56Sopenharmony_ci 2787db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL: 2797db96d56Sopenharmony_ci /* repeated non-literal */ 2807db96d56Sopenharmony_ci chr = pattern[1]; 2817db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); 2827db96d56Sopenharmony_ci c = (SRE_CHAR) chr; 2837db96d56Sopenharmony_ci#if SIZEOF_SRE_CHAR < 4 2847db96d56Sopenharmony_ci if ((SRE_CODE) c != chr) 2857db96d56Sopenharmony_ci ptr = end; /* literal can't match: doesn't fit in char width */ 2867db96d56Sopenharmony_ci else 2877db96d56Sopenharmony_ci#endif 2887db96d56Sopenharmony_ci while (ptr < end && *ptr != c) 2897db96d56Sopenharmony_ci ptr++; 2907db96d56Sopenharmony_ci break; 2917db96d56Sopenharmony_ci 2927db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_IGNORE: 2937db96d56Sopenharmony_ci /* repeated non-literal */ 2947db96d56Sopenharmony_ci chr = pattern[1]; 2957db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); 2967db96d56Sopenharmony_ci while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) 2977db96d56Sopenharmony_ci ptr++; 2987db96d56Sopenharmony_ci break; 2997db96d56Sopenharmony_ci 3007db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_UNI_IGNORE: 3017db96d56Sopenharmony_ci /* repeated non-literal */ 3027db96d56Sopenharmony_ci chr = pattern[1]; 3037db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); 3047db96d56Sopenharmony_ci while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) 3057db96d56Sopenharmony_ci ptr++; 3067db96d56Sopenharmony_ci break; 3077db96d56Sopenharmony_ci 3087db96d56Sopenharmony_ci case SRE_OP_NOT_LITERAL_LOC_IGNORE: 3097db96d56Sopenharmony_ci /* repeated non-literal */ 3107db96d56Sopenharmony_ci chr = pattern[1]; 3117db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); 3127db96d56Sopenharmony_ci while (ptr < end && !char_loc_ignore(chr, *ptr)) 3137db96d56Sopenharmony_ci ptr++; 3147db96d56Sopenharmony_ci break; 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_ci default: 3177db96d56Sopenharmony_ci /* repeated single character pattern */ 3187db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); 3197db96d56Sopenharmony_ci while ((SRE_CHAR*) state->ptr < end) { 3207db96d56Sopenharmony_ci i = SRE(match)(state, pattern, 0); 3217db96d56Sopenharmony_ci if (i < 0) 3227db96d56Sopenharmony_ci return i; 3237db96d56Sopenharmony_ci if (!i) 3247db96d56Sopenharmony_ci break; 3257db96d56Sopenharmony_ci } 3267db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, 3277db96d56Sopenharmony_ci (SRE_CHAR*) state->ptr - ptr)); 3287db96d56Sopenharmony_ci return (SRE_CHAR*) state->ptr - ptr; 3297db96d56Sopenharmony_ci } 3307db96d56Sopenharmony_ci 3317db96d56Sopenharmony_ci TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, 3327db96d56Sopenharmony_ci ptr - (SRE_CHAR*) state->ptr)); 3337db96d56Sopenharmony_ci return ptr - (SRE_CHAR*) state->ptr; 3347db96d56Sopenharmony_ci} 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_ci/* The macros below should be used to protect recursive SRE(match)() 3377db96d56Sopenharmony_ci * calls that *failed* and do *not* return immediately (IOW, those 3387db96d56Sopenharmony_ci * that will backtrack). Explaining: 3397db96d56Sopenharmony_ci * 3407db96d56Sopenharmony_ci * - Recursive SRE(match)() returned true: that's usually a success 3417db96d56Sopenharmony_ci * (besides atypical cases like ASSERT_NOT), therefore there's no 3427db96d56Sopenharmony_ci * reason to restore lastmark; 3437db96d56Sopenharmony_ci * 3447db96d56Sopenharmony_ci * - Recursive SRE(match)() returned false but the current SRE(match)() 3457db96d56Sopenharmony_ci * is returning to the caller: If the current SRE(match)() is the 3467db96d56Sopenharmony_ci * top function of the recursion, returning false will be a matching 3477db96d56Sopenharmony_ci * failure, and it doesn't matter where lastmark is pointing to. 3487db96d56Sopenharmony_ci * If it's *not* the top function, it will be a recursive SRE(match)() 3497db96d56Sopenharmony_ci * failure by itself, and the calling SRE(match)() will have to deal 3507db96d56Sopenharmony_ci * with the failure by the same rules explained here (it will restore 3517db96d56Sopenharmony_ci * lastmark by itself if necessary); 3527db96d56Sopenharmony_ci * 3537db96d56Sopenharmony_ci * - Recursive SRE(match)() returned false, and will continue the 3547db96d56Sopenharmony_ci * outside 'for' loop: must be protected when breaking, since the next 3557db96d56Sopenharmony_ci * OP could potentially depend on lastmark; 3567db96d56Sopenharmony_ci * 3577db96d56Sopenharmony_ci * - Recursive SRE(match)() returned false, and will be called again 3587db96d56Sopenharmony_ci * inside a local for/while loop: must be protected between each 3597db96d56Sopenharmony_ci * loop iteration, since the recursive SRE(match)() could do anything, 3607db96d56Sopenharmony_ci * and could potentially depend on lastmark. 3617db96d56Sopenharmony_ci * 3627db96d56Sopenharmony_ci * For more information, check the discussion at SF patch #712900. 3637db96d56Sopenharmony_ci */ 3647db96d56Sopenharmony_ci#define LASTMARK_SAVE() \ 3657db96d56Sopenharmony_ci do { \ 3667db96d56Sopenharmony_ci ctx->lastmark = state->lastmark; \ 3677db96d56Sopenharmony_ci ctx->lastindex = state->lastindex; \ 3687db96d56Sopenharmony_ci } while (0) 3697db96d56Sopenharmony_ci#define LASTMARK_RESTORE() \ 3707db96d56Sopenharmony_ci do { \ 3717db96d56Sopenharmony_ci state->lastmark = ctx->lastmark; \ 3727db96d56Sopenharmony_ci state->lastindex = ctx->lastindex; \ 3737db96d56Sopenharmony_ci } while (0) 3747db96d56Sopenharmony_ci 3757db96d56Sopenharmony_ci#define RETURN_ERROR(i) do { return i; } while(0) 3767db96d56Sopenharmony_ci#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) 3777db96d56Sopenharmony_ci#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) 3787db96d56Sopenharmony_ci 3797db96d56Sopenharmony_ci#define RETURN_ON_ERROR(i) \ 3807db96d56Sopenharmony_ci do { if (i < 0) RETURN_ERROR(i); } while (0) 3817db96d56Sopenharmony_ci#define RETURN_ON_SUCCESS(i) \ 3827db96d56Sopenharmony_ci do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) 3837db96d56Sopenharmony_ci#define RETURN_ON_FAILURE(i) \ 3847db96d56Sopenharmony_ci do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) 3857db96d56Sopenharmony_ci 3867db96d56Sopenharmony_ci#define DATA_STACK_ALLOC(state, type, ptr) \ 3877db96d56Sopenharmony_cido { \ 3887db96d56Sopenharmony_ci alloc_pos = state->data_stack_base; \ 3897db96d56Sopenharmony_ci TRACE(("allocating %s in %zd (%zd)\n", \ 3907db96d56Sopenharmony_ci Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ 3917db96d56Sopenharmony_ci if (sizeof(type) > state->data_stack_size - alloc_pos) { \ 3927db96d56Sopenharmony_ci int j = data_stack_grow(state, sizeof(type)); \ 3937db96d56Sopenharmony_ci if (j < 0) return j; \ 3947db96d56Sopenharmony_ci if (ctx_pos != -1) \ 3957db96d56Sopenharmony_ci DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 3967db96d56Sopenharmony_ci } \ 3977db96d56Sopenharmony_ci ptr = (type*)(state->data_stack+alloc_pos); \ 3987db96d56Sopenharmony_ci state->data_stack_base += sizeof(type); \ 3997db96d56Sopenharmony_ci} while (0) 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ 4027db96d56Sopenharmony_cido { \ 4037db96d56Sopenharmony_ci TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ 4047db96d56Sopenharmony_ci ptr = (type*)(state->data_stack+pos); \ 4057db96d56Sopenharmony_ci} while (0) 4067db96d56Sopenharmony_ci 4077db96d56Sopenharmony_ci#define DATA_STACK_PUSH(state, data, size) \ 4087db96d56Sopenharmony_cido { \ 4097db96d56Sopenharmony_ci TRACE(("copy data in %p to %zd (%zd)\n", \ 4107db96d56Sopenharmony_ci data, state->data_stack_base, size)); \ 4117db96d56Sopenharmony_ci if (size > state->data_stack_size - state->data_stack_base) { \ 4127db96d56Sopenharmony_ci int j = data_stack_grow(state, size); \ 4137db96d56Sopenharmony_ci if (j < 0) return j; \ 4147db96d56Sopenharmony_ci if (ctx_pos != -1) \ 4157db96d56Sopenharmony_ci DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 4167db96d56Sopenharmony_ci } \ 4177db96d56Sopenharmony_ci memcpy(state->data_stack+state->data_stack_base, data, size); \ 4187db96d56Sopenharmony_ci state->data_stack_base += size; \ 4197db96d56Sopenharmony_ci} while (0) 4207db96d56Sopenharmony_ci 4217db96d56Sopenharmony_ci/* We add an explicit cast to memcpy here because MSVC has a bug when 4227db96d56Sopenharmony_ci compiling C code where it believes that `const void**` cannot be 4237db96d56Sopenharmony_ci safely casted to `void*`, see bpo-39943 for details. */ 4247db96d56Sopenharmony_ci#define DATA_STACK_POP(state, data, size, discard) \ 4257db96d56Sopenharmony_cido { \ 4267db96d56Sopenharmony_ci TRACE(("copy data to %p from %zd (%zd)\n", \ 4277db96d56Sopenharmony_ci data, state->data_stack_base-size, size)); \ 4287db96d56Sopenharmony_ci memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ 4297db96d56Sopenharmony_ci if (discard) \ 4307db96d56Sopenharmony_ci state->data_stack_base -= size; \ 4317db96d56Sopenharmony_ci} while (0) 4327db96d56Sopenharmony_ci 4337db96d56Sopenharmony_ci#define DATA_STACK_POP_DISCARD(state, size) \ 4347db96d56Sopenharmony_cido { \ 4357db96d56Sopenharmony_ci TRACE(("discard data from %zd (%zd)\n", \ 4367db96d56Sopenharmony_ci state->data_stack_base-size, size)); \ 4377db96d56Sopenharmony_ci state->data_stack_base -= size; \ 4387db96d56Sopenharmony_ci} while(0) 4397db96d56Sopenharmony_ci 4407db96d56Sopenharmony_ci#define DATA_PUSH(x) \ 4417db96d56Sopenharmony_ci DATA_STACK_PUSH(state, (x), sizeof(*(x))) 4427db96d56Sopenharmony_ci#define DATA_POP(x) \ 4437db96d56Sopenharmony_ci DATA_STACK_POP(state, (x), sizeof(*(x)), 1) 4447db96d56Sopenharmony_ci#define DATA_POP_DISCARD(x) \ 4457db96d56Sopenharmony_ci DATA_STACK_POP_DISCARD(state, sizeof(*(x))) 4467db96d56Sopenharmony_ci#define DATA_ALLOC(t,p) \ 4477db96d56Sopenharmony_ci DATA_STACK_ALLOC(state, t, p) 4487db96d56Sopenharmony_ci#define DATA_LOOKUP_AT(t,p,pos) \ 4497db96d56Sopenharmony_ci DATA_STACK_LOOKUP_AT(state,t,p,pos) 4507db96d56Sopenharmony_ci 4517db96d56Sopenharmony_ci#define MARK_PUSH(lastmark) \ 4527db96d56Sopenharmony_ci do if (lastmark >= 0) { \ 4537db96d56Sopenharmony_ci size_t _marks_size = (lastmark+1) * sizeof(void*); \ 4547db96d56Sopenharmony_ci DATA_STACK_PUSH(state, state->mark, _marks_size); \ 4557db96d56Sopenharmony_ci } while (0) 4567db96d56Sopenharmony_ci#define MARK_POP(lastmark) \ 4577db96d56Sopenharmony_ci do if (lastmark >= 0) { \ 4587db96d56Sopenharmony_ci size_t _marks_size = (lastmark+1) * sizeof(void*); \ 4597db96d56Sopenharmony_ci DATA_STACK_POP(state, state->mark, _marks_size, 1); \ 4607db96d56Sopenharmony_ci } while (0) 4617db96d56Sopenharmony_ci#define MARK_POP_KEEP(lastmark) \ 4627db96d56Sopenharmony_ci do if (lastmark >= 0) { \ 4637db96d56Sopenharmony_ci size_t _marks_size = (lastmark+1) * sizeof(void*); \ 4647db96d56Sopenharmony_ci DATA_STACK_POP(state, state->mark, _marks_size, 0); \ 4657db96d56Sopenharmony_ci } while (0) 4667db96d56Sopenharmony_ci#define MARK_POP_DISCARD(lastmark) \ 4677db96d56Sopenharmony_ci do if (lastmark >= 0) { \ 4687db96d56Sopenharmony_ci size_t _marks_size = (lastmark+1) * sizeof(void*); \ 4697db96d56Sopenharmony_ci DATA_STACK_POP_DISCARD(state, _marks_size); \ 4707db96d56Sopenharmony_ci } while (0) 4717db96d56Sopenharmony_ci 4727db96d56Sopenharmony_ci#define JUMP_NONE 0 4737db96d56Sopenharmony_ci#define JUMP_MAX_UNTIL_1 1 4747db96d56Sopenharmony_ci#define JUMP_MAX_UNTIL_2 2 4757db96d56Sopenharmony_ci#define JUMP_MAX_UNTIL_3 3 4767db96d56Sopenharmony_ci#define JUMP_MIN_UNTIL_1 4 4777db96d56Sopenharmony_ci#define JUMP_MIN_UNTIL_2 5 4787db96d56Sopenharmony_ci#define JUMP_MIN_UNTIL_3 6 4797db96d56Sopenharmony_ci#define JUMP_REPEAT 7 4807db96d56Sopenharmony_ci#define JUMP_REPEAT_ONE_1 8 4817db96d56Sopenharmony_ci#define JUMP_REPEAT_ONE_2 9 4827db96d56Sopenharmony_ci#define JUMP_MIN_REPEAT_ONE 10 4837db96d56Sopenharmony_ci#define JUMP_BRANCH 11 4847db96d56Sopenharmony_ci#define JUMP_ASSERT 12 4857db96d56Sopenharmony_ci#define JUMP_ASSERT_NOT 13 4867db96d56Sopenharmony_ci#define JUMP_POSS_REPEAT_1 14 4877db96d56Sopenharmony_ci#define JUMP_POSS_REPEAT_2 15 4887db96d56Sopenharmony_ci#define JUMP_ATOMIC_GROUP 16 4897db96d56Sopenharmony_ci 4907db96d56Sopenharmony_ci#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ 4917db96d56Sopenharmony_ci ctx->pattern = pattern; \ 4927db96d56Sopenharmony_ci ctx->ptr = ptr; \ 4937db96d56Sopenharmony_ci DATA_ALLOC(SRE(match_context), nextctx); \ 4947db96d56Sopenharmony_ci nextctx->pattern = nextpattern; \ 4957db96d56Sopenharmony_ci nextctx->toplevel = toplevel_; \ 4967db96d56Sopenharmony_ci nextctx->jump = jumpvalue; \ 4977db96d56Sopenharmony_ci nextctx->last_ctx_pos = ctx_pos; \ 4987db96d56Sopenharmony_ci pattern = nextpattern; \ 4997db96d56Sopenharmony_ci ctx_pos = alloc_pos; \ 5007db96d56Sopenharmony_ci ctx = nextctx; \ 5017db96d56Sopenharmony_ci goto entrance; \ 5027db96d56Sopenharmony_ci jumplabel: \ 5037db96d56Sopenharmony_ci pattern = ctx->pattern; \ 5047db96d56Sopenharmony_ci ptr = ctx->ptr; 5057db96d56Sopenharmony_ci 5067db96d56Sopenharmony_ci#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ 5077db96d56Sopenharmony_ci DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) 5087db96d56Sopenharmony_ci 5097db96d56Sopenharmony_ci#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ 5107db96d56Sopenharmony_ci DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) 5117db96d56Sopenharmony_ci 5127db96d56Sopenharmony_citypedef struct { 5137db96d56Sopenharmony_ci Py_ssize_t count; 5147db96d56Sopenharmony_ci union { 5157db96d56Sopenharmony_ci SRE_CODE chr; 5167db96d56Sopenharmony_ci SRE_REPEAT* rep; 5177db96d56Sopenharmony_ci } u; 5187db96d56Sopenharmony_ci int lastmark; 5197db96d56Sopenharmony_ci int lastindex; 5207db96d56Sopenharmony_ci const SRE_CODE* pattern; 5217db96d56Sopenharmony_ci const SRE_CHAR* ptr; 5227db96d56Sopenharmony_ci int toplevel; 5237db96d56Sopenharmony_ci int jump; 5247db96d56Sopenharmony_ci Py_ssize_t last_ctx_pos; 5257db96d56Sopenharmony_ci} SRE(match_context); 5267db96d56Sopenharmony_ci 5277db96d56Sopenharmony_ci#define MAYBE_CHECK_SIGNALS \ 5287db96d56Sopenharmony_ci do { \ 5297db96d56Sopenharmony_ci if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \ 5307db96d56Sopenharmony_ci RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ 5317db96d56Sopenharmony_ci } \ 5327db96d56Sopenharmony_ci } while (0) 5337db96d56Sopenharmony_ci 5347db96d56Sopenharmony_ci#ifdef HAVE_COMPUTED_GOTOS 5357db96d56Sopenharmony_ci #ifndef USE_COMPUTED_GOTOS 5367db96d56Sopenharmony_ci #define USE_COMPUTED_GOTOS 1 5377db96d56Sopenharmony_ci #endif 5387db96d56Sopenharmony_ci#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS 5397db96d56Sopenharmony_ci #error "Computed gotos are not supported on this compiler." 5407db96d56Sopenharmony_ci#else 5417db96d56Sopenharmony_ci #undef USE_COMPUTED_GOTOS 5427db96d56Sopenharmony_ci #define USE_COMPUTED_GOTOS 0 5437db96d56Sopenharmony_ci#endif 5447db96d56Sopenharmony_ci 5457db96d56Sopenharmony_ci#if USE_COMPUTED_GOTOS 5467db96d56Sopenharmony_ci #define TARGET(OP) TARGET_ ## OP 5477db96d56Sopenharmony_ci #define DISPATCH \ 5487db96d56Sopenharmony_ci do { \ 5497db96d56Sopenharmony_ci MAYBE_CHECK_SIGNALS; \ 5507db96d56Sopenharmony_ci goto *sre_targets[*pattern++]; \ 5517db96d56Sopenharmony_ci } while (0) 5527db96d56Sopenharmony_ci#else 5537db96d56Sopenharmony_ci #define TARGET(OP) case OP 5547db96d56Sopenharmony_ci #define DISPATCH goto dispatch 5557db96d56Sopenharmony_ci#endif 5567db96d56Sopenharmony_ci 5577db96d56Sopenharmony_ci/* check if string matches the given pattern. returns <0 for 5587db96d56Sopenharmony_ci error, 0 for failure, and 1 for success */ 5597db96d56Sopenharmony_ciLOCAL(Py_ssize_t) 5607db96d56Sopenharmony_ciSRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) 5617db96d56Sopenharmony_ci{ 5627db96d56Sopenharmony_ci const SRE_CHAR* end = (const SRE_CHAR *)state->end; 5637db96d56Sopenharmony_ci Py_ssize_t alloc_pos, ctx_pos = -1; 5647db96d56Sopenharmony_ci Py_ssize_t ret = 0; 5657db96d56Sopenharmony_ci int jump; 5667db96d56Sopenharmony_ci unsigned int sigcount=0; 5677db96d56Sopenharmony_ci 5687db96d56Sopenharmony_ci SRE(match_context)* ctx; 5697db96d56Sopenharmony_ci SRE(match_context)* nextctx; 5707db96d56Sopenharmony_ci 5717db96d56Sopenharmony_ci TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); 5727db96d56Sopenharmony_ci 5737db96d56Sopenharmony_ci DATA_ALLOC(SRE(match_context), ctx); 5747db96d56Sopenharmony_ci ctx->last_ctx_pos = -1; 5757db96d56Sopenharmony_ci ctx->jump = JUMP_NONE; 5767db96d56Sopenharmony_ci ctx->toplevel = toplevel; 5777db96d56Sopenharmony_ci ctx_pos = alloc_pos; 5787db96d56Sopenharmony_ci 5797db96d56Sopenharmony_ci#if USE_COMPUTED_GOTOS 5807db96d56Sopenharmony_ci#include "sre_targets.h" 5817db96d56Sopenharmony_ci#endif 5827db96d56Sopenharmony_ci 5837db96d56Sopenharmony_cientrance: 5847db96d56Sopenharmony_ci 5857db96d56Sopenharmony_ci ; // Fashion statement. 5867db96d56Sopenharmony_ci const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; 5877db96d56Sopenharmony_ci 5887db96d56Sopenharmony_ci if (pattern[0] == SRE_OP_INFO) { 5897db96d56Sopenharmony_ci /* optimization info block */ 5907db96d56Sopenharmony_ci /* <INFO> <1=skip> <2=flags> <3=min> ... */ 5917db96d56Sopenharmony_ci if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { 5927db96d56Sopenharmony_ci TRACE(("reject (got %zd chars, need %zd)\n", 5937db96d56Sopenharmony_ci end - ptr, (Py_ssize_t) pattern[3])); 5947db96d56Sopenharmony_ci RETURN_FAILURE; 5957db96d56Sopenharmony_ci } 5967db96d56Sopenharmony_ci pattern += pattern[1] + 1; 5977db96d56Sopenharmony_ci } 5987db96d56Sopenharmony_ci 5997db96d56Sopenharmony_ci#if USE_COMPUTED_GOTOS 6007db96d56Sopenharmony_ci DISPATCH; 6017db96d56Sopenharmony_ci#else 6027db96d56Sopenharmony_cidispatch: 6037db96d56Sopenharmony_ci MAYBE_CHECK_SIGNALS; 6047db96d56Sopenharmony_ci switch (*pattern++) 6057db96d56Sopenharmony_ci#endif 6067db96d56Sopenharmony_ci { 6077db96d56Sopenharmony_ci 6087db96d56Sopenharmony_ci TARGET(SRE_OP_MARK): 6097db96d56Sopenharmony_ci /* set mark */ 6107db96d56Sopenharmony_ci /* <MARK> <gid> */ 6117db96d56Sopenharmony_ci TRACE(("|%p|%p|MARK %d\n", pattern, 6127db96d56Sopenharmony_ci ptr, pattern[0])); 6137db96d56Sopenharmony_ci { 6147db96d56Sopenharmony_ci int i = pattern[0]; 6157db96d56Sopenharmony_ci if (i & 1) 6167db96d56Sopenharmony_ci state->lastindex = i/2 + 1; 6177db96d56Sopenharmony_ci if (i > state->lastmark) { 6187db96d56Sopenharmony_ci /* state->lastmark is the highest valid index in the 6197db96d56Sopenharmony_ci state->mark array. If it is increased by more than 1, 6207db96d56Sopenharmony_ci the intervening marks must be set to NULL to signal 6217db96d56Sopenharmony_ci that these marks have not been encountered. */ 6227db96d56Sopenharmony_ci int j = state->lastmark + 1; 6237db96d56Sopenharmony_ci while (j < i) 6247db96d56Sopenharmony_ci state->mark[j++] = NULL; 6257db96d56Sopenharmony_ci state->lastmark = i; 6267db96d56Sopenharmony_ci } 6277db96d56Sopenharmony_ci state->mark[i] = ptr; 6287db96d56Sopenharmony_ci } 6297db96d56Sopenharmony_ci pattern++; 6307db96d56Sopenharmony_ci DISPATCH; 6317db96d56Sopenharmony_ci 6327db96d56Sopenharmony_ci TARGET(SRE_OP_LITERAL): 6337db96d56Sopenharmony_ci /* match literal string */ 6347db96d56Sopenharmony_ci /* <LITERAL> <code> */ 6357db96d56Sopenharmony_ci TRACE(("|%p|%p|LITERAL %d\n", pattern, 6367db96d56Sopenharmony_ci ptr, *pattern)); 6377db96d56Sopenharmony_ci if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) 6387db96d56Sopenharmony_ci RETURN_FAILURE; 6397db96d56Sopenharmony_ci pattern++; 6407db96d56Sopenharmony_ci ptr++; 6417db96d56Sopenharmony_ci DISPATCH; 6427db96d56Sopenharmony_ci 6437db96d56Sopenharmony_ci TARGET(SRE_OP_NOT_LITERAL): 6447db96d56Sopenharmony_ci /* match anything that is not literal character */ 6457db96d56Sopenharmony_ci /* <NOT_LITERAL> <code> */ 6467db96d56Sopenharmony_ci TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, 6477db96d56Sopenharmony_ci ptr, *pattern)); 6487db96d56Sopenharmony_ci if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) 6497db96d56Sopenharmony_ci RETURN_FAILURE; 6507db96d56Sopenharmony_ci pattern++; 6517db96d56Sopenharmony_ci ptr++; 6527db96d56Sopenharmony_ci DISPATCH; 6537db96d56Sopenharmony_ci 6547db96d56Sopenharmony_ci TARGET(SRE_OP_SUCCESS): 6557db96d56Sopenharmony_ci /* end of pattern */ 6567db96d56Sopenharmony_ci TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); 6577db96d56Sopenharmony_ci if (ctx->toplevel && 6587db96d56Sopenharmony_ci ((state->match_all && ptr != state->end) || 6597db96d56Sopenharmony_ci (state->must_advance && ptr == state->start))) 6607db96d56Sopenharmony_ci { 6617db96d56Sopenharmony_ci RETURN_FAILURE; 6627db96d56Sopenharmony_ci } 6637db96d56Sopenharmony_ci state->ptr = ptr; 6647db96d56Sopenharmony_ci RETURN_SUCCESS; 6657db96d56Sopenharmony_ci 6667db96d56Sopenharmony_ci TARGET(SRE_OP_AT): 6677db96d56Sopenharmony_ci /* match at given position */ 6687db96d56Sopenharmony_ci /* <AT> <code> */ 6697db96d56Sopenharmony_ci TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); 6707db96d56Sopenharmony_ci if (!SRE(at)(state, ptr, *pattern)) 6717db96d56Sopenharmony_ci RETURN_FAILURE; 6727db96d56Sopenharmony_ci pattern++; 6737db96d56Sopenharmony_ci DISPATCH; 6747db96d56Sopenharmony_ci 6757db96d56Sopenharmony_ci TARGET(SRE_OP_CATEGORY): 6767db96d56Sopenharmony_ci /* match at given category */ 6777db96d56Sopenharmony_ci /* <CATEGORY> <code> */ 6787db96d56Sopenharmony_ci TRACE(("|%p|%p|CATEGORY %d\n", pattern, 6797db96d56Sopenharmony_ci ptr, *pattern)); 6807db96d56Sopenharmony_ci if (ptr >= end || !sre_category(pattern[0], ptr[0])) 6817db96d56Sopenharmony_ci RETURN_FAILURE; 6827db96d56Sopenharmony_ci pattern++; 6837db96d56Sopenharmony_ci ptr++; 6847db96d56Sopenharmony_ci DISPATCH; 6857db96d56Sopenharmony_ci 6867db96d56Sopenharmony_ci TARGET(SRE_OP_ANY): 6877db96d56Sopenharmony_ci /* match anything (except a newline) */ 6887db96d56Sopenharmony_ci /* <ANY> */ 6897db96d56Sopenharmony_ci TRACE(("|%p|%p|ANY\n", pattern, ptr)); 6907db96d56Sopenharmony_ci if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) 6917db96d56Sopenharmony_ci RETURN_FAILURE; 6927db96d56Sopenharmony_ci ptr++; 6937db96d56Sopenharmony_ci DISPATCH; 6947db96d56Sopenharmony_ci 6957db96d56Sopenharmony_ci TARGET(SRE_OP_ANY_ALL): 6967db96d56Sopenharmony_ci /* match anything */ 6977db96d56Sopenharmony_ci /* <ANY_ALL> */ 6987db96d56Sopenharmony_ci TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); 6997db96d56Sopenharmony_ci if (ptr >= end) 7007db96d56Sopenharmony_ci RETURN_FAILURE; 7017db96d56Sopenharmony_ci ptr++; 7027db96d56Sopenharmony_ci DISPATCH; 7037db96d56Sopenharmony_ci 7047db96d56Sopenharmony_ci TARGET(SRE_OP_IN): 7057db96d56Sopenharmony_ci /* match set member (or non_member) */ 7067db96d56Sopenharmony_ci /* <IN> <skip> <set> */ 7077db96d56Sopenharmony_ci TRACE(("|%p|%p|IN\n", pattern, ptr)); 7087db96d56Sopenharmony_ci if (ptr >= end || 7097db96d56Sopenharmony_ci !SRE(charset)(state, pattern + 1, *ptr)) 7107db96d56Sopenharmony_ci RETURN_FAILURE; 7117db96d56Sopenharmony_ci pattern += pattern[0]; 7127db96d56Sopenharmony_ci ptr++; 7137db96d56Sopenharmony_ci DISPATCH; 7147db96d56Sopenharmony_ci 7157db96d56Sopenharmony_ci TARGET(SRE_OP_LITERAL_IGNORE): 7167db96d56Sopenharmony_ci TRACE(("|%p|%p|LITERAL_IGNORE %d\n", 7177db96d56Sopenharmony_ci pattern, ptr, pattern[0])); 7187db96d56Sopenharmony_ci if (ptr >= end || 7197db96d56Sopenharmony_ci sre_lower_ascii(*ptr) != *pattern) 7207db96d56Sopenharmony_ci RETURN_FAILURE; 7217db96d56Sopenharmony_ci pattern++; 7227db96d56Sopenharmony_ci ptr++; 7237db96d56Sopenharmony_ci DISPATCH; 7247db96d56Sopenharmony_ci 7257db96d56Sopenharmony_ci TARGET(SRE_OP_LITERAL_UNI_IGNORE): 7267db96d56Sopenharmony_ci TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", 7277db96d56Sopenharmony_ci pattern, ptr, pattern[0])); 7287db96d56Sopenharmony_ci if (ptr >= end || 7297db96d56Sopenharmony_ci sre_lower_unicode(*ptr) != *pattern) 7307db96d56Sopenharmony_ci RETURN_FAILURE; 7317db96d56Sopenharmony_ci pattern++; 7327db96d56Sopenharmony_ci ptr++; 7337db96d56Sopenharmony_ci DISPATCH; 7347db96d56Sopenharmony_ci 7357db96d56Sopenharmony_ci TARGET(SRE_OP_LITERAL_LOC_IGNORE): 7367db96d56Sopenharmony_ci TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", 7377db96d56Sopenharmony_ci pattern, ptr, pattern[0])); 7387db96d56Sopenharmony_ci if (ptr >= end 7397db96d56Sopenharmony_ci || !char_loc_ignore(*pattern, *ptr)) 7407db96d56Sopenharmony_ci RETURN_FAILURE; 7417db96d56Sopenharmony_ci pattern++; 7427db96d56Sopenharmony_ci ptr++; 7437db96d56Sopenharmony_ci DISPATCH; 7447db96d56Sopenharmony_ci 7457db96d56Sopenharmony_ci TARGET(SRE_OP_NOT_LITERAL_IGNORE): 7467db96d56Sopenharmony_ci TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", 7477db96d56Sopenharmony_ci pattern, ptr, *pattern)); 7487db96d56Sopenharmony_ci if (ptr >= end || 7497db96d56Sopenharmony_ci sre_lower_ascii(*ptr) == *pattern) 7507db96d56Sopenharmony_ci RETURN_FAILURE; 7517db96d56Sopenharmony_ci pattern++; 7527db96d56Sopenharmony_ci ptr++; 7537db96d56Sopenharmony_ci DISPATCH; 7547db96d56Sopenharmony_ci 7557db96d56Sopenharmony_ci TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): 7567db96d56Sopenharmony_ci TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", 7577db96d56Sopenharmony_ci pattern, ptr, *pattern)); 7587db96d56Sopenharmony_ci if (ptr >= end || 7597db96d56Sopenharmony_ci sre_lower_unicode(*ptr) == *pattern) 7607db96d56Sopenharmony_ci RETURN_FAILURE; 7617db96d56Sopenharmony_ci pattern++; 7627db96d56Sopenharmony_ci ptr++; 7637db96d56Sopenharmony_ci DISPATCH; 7647db96d56Sopenharmony_ci 7657db96d56Sopenharmony_ci TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): 7667db96d56Sopenharmony_ci TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", 7677db96d56Sopenharmony_ci pattern, ptr, *pattern)); 7687db96d56Sopenharmony_ci if (ptr >= end 7697db96d56Sopenharmony_ci || char_loc_ignore(*pattern, *ptr)) 7707db96d56Sopenharmony_ci RETURN_FAILURE; 7717db96d56Sopenharmony_ci pattern++; 7727db96d56Sopenharmony_ci ptr++; 7737db96d56Sopenharmony_ci DISPATCH; 7747db96d56Sopenharmony_ci 7757db96d56Sopenharmony_ci TARGET(SRE_OP_IN_IGNORE): 7767db96d56Sopenharmony_ci TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); 7777db96d56Sopenharmony_ci if (ptr >= end 7787db96d56Sopenharmony_ci || !SRE(charset)(state, pattern+1, 7797db96d56Sopenharmony_ci (SRE_CODE)sre_lower_ascii(*ptr))) 7807db96d56Sopenharmony_ci RETURN_FAILURE; 7817db96d56Sopenharmony_ci pattern += pattern[0]; 7827db96d56Sopenharmony_ci ptr++; 7837db96d56Sopenharmony_ci DISPATCH; 7847db96d56Sopenharmony_ci 7857db96d56Sopenharmony_ci TARGET(SRE_OP_IN_UNI_IGNORE): 7867db96d56Sopenharmony_ci TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); 7877db96d56Sopenharmony_ci if (ptr >= end 7887db96d56Sopenharmony_ci || !SRE(charset)(state, pattern+1, 7897db96d56Sopenharmony_ci (SRE_CODE)sre_lower_unicode(*ptr))) 7907db96d56Sopenharmony_ci RETURN_FAILURE; 7917db96d56Sopenharmony_ci pattern += pattern[0]; 7927db96d56Sopenharmony_ci ptr++; 7937db96d56Sopenharmony_ci DISPATCH; 7947db96d56Sopenharmony_ci 7957db96d56Sopenharmony_ci TARGET(SRE_OP_IN_LOC_IGNORE): 7967db96d56Sopenharmony_ci TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); 7977db96d56Sopenharmony_ci if (ptr >= end 7987db96d56Sopenharmony_ci || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) 7997db96d56Sopenharmony_ci RETURN_FAILURE; 8007db96d56Sopenharmony_ci pattern += pattern[0]; 8017db96d56Sopenharmony_ci ptr++; 8027db96d56Sopenharmony_ci DISPATCH; 8037db96d56Sopenharmony_ci 8047db96d56Sopenharmony_ci TARGET(SRE_OP_JUMP): 8057db96d56Sopenharmony_ci TARGET(SRE_OP_INFO): 8067db96d56Sopenharmony_ci /* jump forward */ 8077db96d56Sopenharmony_ci /* <JUMP> <offset> */ 8087db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP %d\n", pattern, 8097db96d56Sopenharmony_ci ptr, pattern[0])); 8107db96d56Sopenharmony_ci pattern += pattern[0]; 8117db96d56Sopenharmony_ci DISPATCH; 8127db96d56Sopenharmony_ci 8137db96d56Sopenharmony_ci TARGET(SRE_OP_BRANCH): 8147db96d56Sopenharmony_ci /* alternation */ 8157db96d56Sopenharmony_ci /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ 8167db96d56Sopenharmony_ci TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); 8177db96d56Sopenharmony_ci LASTMARK_SAVE(); 8187db96d56Sopenharmony_ci if (state->repeat) 8197db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 8207db96d56Sopenharmony_ci for (; pattern[0]; pattern += pattern[0]) { 8217db96d56Sopenharmony_ci if (pattern[1] == SRE_OP_LITERAL && 8227db96d56Sopenharmony_ci (ptr >= end || 8237db96d56Sopenharmony_ci (SRE_CODE) *ptr != pattern[2])) 8247db96d56Sopenharmony_ci continue; 8257db96d56Sopenharmony_ci if (pattern[1] == SRE_OP_IN && 8267db96d56Sopenharmony_ci (ptr >= end || 8277db96d56Sopenharmony_ci !SRE(charset)(state, pattern + 3, 8287db96d56Sopenharmony_ci (SRE_CODE) *ptr))) 8297db96d56Sopenharmony_ci continue; 8307db96d56Sopenharmony_ci state->ptr = ptr; 8317db96d56Sopenharmony_ci DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); 8327db96d56Sopenharmony_ci if (ret) { 8337db96d56Sopenharmony_ci if (state->repeat) 8347db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 8357db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 8367db96d56Sopenharmony_ci RETURN_SUCCESS; 8377db96d56Sopenharmony_ci } 8387db96d56Sopenharmony_ci if (state->repeat) 8397db96d56Sopenharmony_ci MARK_POP_KEEP(ctx->lastmark); 8407db96d56Sopenharmony_ci LASTMARK_RESTORE(); 8417db96d56Sopenharmony_ci } 8427db96d56Sopenharmony_ci if (state->repeat) 8437db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 8447db96d56Sopenharmony_ci RETURN_FAILURE; 8457db96d56Sopenharmony_ci 8467db96d56Sopenharmony_ci TARGET(SRE_OP_REPEAT_ONE): 8477db96d56Sopenharmony_ci /* match repeated sequence (maximizing regexp) */ 8487db96d56Sopenharmony_ci 8497db96d56Sopenharmony_ci /* this operator only works if the repeated item is 8507db96d56Sopenharmony_ci exactly one character wide, and we're not already 8517db96d56Sopenharmony_ci collecting backtracking points. for other cases, 8527db96d56Sopenharmony_ci use the MAX_REPEAT operator */ 8537db96d56Sopenharmony_ci 8547db96d56Sopenharmony_ci /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 8557db96d56Sopenharmony_ci 8567db96d56Sopenharmony_ci TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, 8577db96d56Sopenharmony_ci pattern[1], pattern[2])); 8587db96d56Sopenharmony_ci 8597db96d56Sopenharmony_ci if ((Py_ssize_t) pattern[1] > end - ptr) 8607db96d56Sopenharmony_ci RETURN_FAILURE; /* cannot match */ 8617db96d56Sopenharmony_ci 8627db96d56Sopenharmony_ci state->ptr = ptr; 8637db96d56Sopenharmony_ci 8647db96d56Sopenharmony_ci ret = SRE(count)(state, pattern+3, pattern[2]); 8657db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 8667db96d56Sopenharmony_ci DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 8677db96d56Sopenharmony_ci ctx->count = ret; 8687db96d56Sopenharmony_ci ptr += ctx->count; 8697db96d56Sopenharmony_ci 8707db96d56Sopenharmony_ci /* when we arrive here, count contains the number of 8717db96d56Sopenharmony_ci matches, and ptr points to the tail of the target 8727db96d56Sopenharmony_ci string. check if the rest of the pattern matches, 8737db96d56Sopenharmony_ci and backtrack if not. */ 8747db96d56Sopenharmony_ci 8757db96d56Sopenharmony_ci if (ctx->count < (Py_ssize_t) pattern[1]) 8767db96d56Sopenharmony_ci RETURN_FAILURE; 8777db96d56Sopenharmony_ci 8787db96d56Sopenharmony_ci if (pattern[pattern[0]] == SRE_OP_SUCCESS && 8797db96d56Sopenharmony_ci ptr == state->end && 8807db96d56Sopenharmony_ci !(ctx->toplevel && state->must_advance && ptr == state->start)) 8817db96d56Sopenharmony_ci { 8827db96d56Sopenharmony_ci /* tail is empty. we're finished */ 8837db96d56Sopenharmony_ci state->ptr = ptr; 8847db96d56Sopenharmony_ci RETURN_SUCCESS; 8857db96d56Sopenharmony_ci } 8867db96d56Sopenharmony_ci 8877db96d56Sopenharmony_ci LASTMARK_SAVE(); 8887db96d56Sopenharmony_ci if (state->repeat) 8897db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 8907db96d56Sopenharmony_ci 8917db96d56Sopenharmony_ci if (pattern[pattern[0]] == SRE_OP_LITERAL) { 8927db96d56Sopenharmony_ci /* tail starts with a literal. skip positions where 8937db96d56Sopenharmony_ci the rest of the pattern cannot possibly match */ 8947db96d56Sopenharmony_ci ctx->u.chr = pattern[pattern[0]+1]; 8957db96d56Sopenharmony_ci for (;;) { 8967db96d56Sopenharmony_ci while (ctx->count >= (Py_ssize_t) pattern[1] && 8977db96d56Sopenharmony_ci (ptr >= end || *ptr != ctx->u.chr)) { 8987db96d56Sopenharmony_ci ptr--; 8997db96d56Sopenharmony_ci ctx->count--; 9007db96d56Sopenharmony_ci } 9017db96d56Sopenharmony_ci if (ctx->count < (Py_ssize_t) pattern[1]) 9027db96d56Sopenharmony_ci break; 9037db96d56Sopenharmony_ci state->ptr = ptr; 9047db96d56Sopenharmony_ci DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, 9057db96d56Sopenharmony_ci pattern+pattern[0]); 9067db96d56Sopenharmony_ci if (ret) { 9077db96d56Sopenharmony_ci if (state->repeat) 9087db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 9097db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 9107db96d56Sopenharmony_ci RETURN_SUCCESS; 9117db96d56Sopenharmony_ci } 9127db96d56Sopenharmony_ci if (state->repeat) 9137db96d56Sopenharmony_ci MARK_POP_KEEP(ctx->lastmark); 9147db96d56Sopenharmony_ci LASTMARK_RESTORE(); 9157db96d56Sopenharmony_ci 9167db96d56Sopenharmony_ci ptr--; 9177db96d56Sopenharmony_ci ctx->count--; 9187db96d56Sopenharmony_ci } 9197db96d56Sopenharmony_ci if (state->repeat) 9207db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 9217db96d56Sopenharmony_ci } else { 9227db96d56Sopenharmony_ci /* general case */ 9237db96d56Sopenharmony_ci while (ctx->count >= (Py_ssize_t) pattern[1]) { 9247db96d56Sopenharmony_ci state->ptr = ptr; 9257db96d56Sopenharmony_ci DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, 9267db96d56Sopenharmony_ci pattern+pattern[0]); 9277db96d56Sopenharmony_ci if (ret) { 9287db96d56Sopenharmony_ci if (state->repeat) 9297db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 9307db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 9317db96d56Sopenharmony_ci RETURN_SUCCESS; 9327db96d56Sopenharmony_ci } 9337db96d56Sopenharmony_ci if (state->repeat) 9347db96d56Sopenharmony_ci MARK_POP_KEEP(ctx->lastmark); 9357db96d56Sopenharmony_ci LASTMARK_RESTORE(); 9367db96d56Sopenharmony_ci 9377db96d56Sopenharmony_ci ptr--; 9387db96d56Sopenharmony_ci ctx->count--; 9397db96d56Sopenharmony_ci } 9407db96d56Sopenharmony_ci if (state->repeat) 9417db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 9427db96d56Sopenharmony_ci } 9437db96d56Sopenharmony_ci RETURN_FAILURE; 9447db96d56Sopenharmony_ci 9457db96d56Sopenharmony_ci TARGET(SRE_OP_MIN_REPEAT_ONE): 9467db96d56Sopenharmony_ci /* match repeated sequence (minimizing regexp) */ 9477db96d56Sopenharmony_ci 9487db96d56Sopenharmony_ci /* this operator only works if the repeated item is 9497db96d56Sopenharmony_ci exactly one character wide, and we're not already 9507db96d56Sopenharmony_ci collecting backtracking points. for other cases, 9517db96d56Sopenharmony_ci use the MIN_REPEAT operator */ 9527db96d56Sopenharmony_ci 9537db96d56Sopenharmony_ci /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_ci TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, 9567db96d56Sopenharmony_ci pattern[1], pattern[2])); 9577db96d56Sopenharmony_ci 9587db96d56Sopenharmony_ci if ((Py_ssize_t) pattern[1] > end - ptr) 9597db96d56Sopenharmony_ci RETURN_FAILURE; /* cannot match */ 9607db96d56Sopenharmony_ci 9617db96d56Sopenharmony_ci state->ptr = ptr; 9627db96d56Sopenharmony_ci 9637db96d56Sopenharmony_ci if (pattern[1] == 0) 9647db96d56Sopenharmony_ci ctx->count = 0; 9657db96d56Sopenharmony_ci else { 9667db96d56Sopenharmony_ci /* count using pattern min as the maximum */ 9677db96d56Sopenharmony_ci ret = SRE(count)(state, pattern+3, pattern[1]); 9687db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 9697db96d56Sopenharmony_ci DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 9707db96d56Sopenharmony_ci if (ret < (Py_ssize_t) pattern[1]) 9717db96d56Sopenharmony_ci /* didn't match minimum number of times */ 9727db96d56Sopenharmony_ci RETURN_FAILURE; 9737db96d56Sopenharmony_ci /* advance past minimum matches of repeat */ 9747db96d56Sopenharmony_ci ctx->count = ret; 9757db96d56Sopenharmony_ci ptr += ctx->count; 9767db96d56Sopenharmony_ci } 9777db96d56Sopenharmony_ci 9787db96d56Sopenharmony_ci if (pattern[pattern[0]] == SRE_OP_SUCCESS && 9797db96d56Sopenharmony_ci !(ctx->toplevel && 9807db96d56Sopenharmony_ci ((state->match_all && ptr != state->end) || 9817db96d56Sopenharmony_ci (state->must_advance && ptr == state->start)))) 9827db96d56Sopenharmony_ci { 9837db96d56Sopenharmony_ci /* tail is empty. we're finished */ 9847db96d56Sopenharmony_ci state->ptr = ptr; 9857db96d56Sopenharmony_ci RETURN_SUCCESS; 9867db96d56Sopenharmony_ci 9877db96d56Sopenharmony_ci } else { 9887db96d56Sopenharmony_ci /* general case */ 9897db96d56Sopenharmony_ci LASTMARK_SAVE(); 9907db96d56Sopenharmony_ci if (state->repeat) 9917db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 9927db96d56Sopenharmony_ci 9937db96d56Sopenharmony_ci while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT 9947db96d56Sopenharmony_ci || ctx->count <= (Py_ssize_t)pattern[2]) { 9957db96d56Sopenharmony_ci state->ptr = ptr; 9967db96d56Sopenharmony_ci DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, 9977db96d56Sopenharmony_ci pattern+pattern[0]); 9987db96d56Sopenharmony_ci if (ret) { 9997db96d56Sopenharmony_ci if (state->repeat) 10007db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 10017db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 10027db96d56Sopenharmony_ci RETURN_SUCCESS; 10037db96d56Sopenharmony_ci } 10047db96d56Sopenharmony_ci if (state->repeat) 10057db96d56Sopenharmony_ci MARK_POP_KEEP(ctx->lastmark); 10067db96d56Sopenharmony_ci LASTMARK_RESTORE(); 10077db96d56Sopenharmony_ci 10087db96d56Sopenharmony_ci state->ptr = ptr; 10097db96d56Sopenharmony_ci ret = SRE(count)(state, pattern+3, 1); 10107db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 10117db96d56Sopenharmony_ci DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 10127db96d56Sopenharmony_ci if (ret == 0) 10137db96d56Sopenharmony_ci break; 10147db96d56Sopenharmony_ci assert(ret == 1); 10157db96d56Sopenharmony_ci ptr++; 10167db96d56Sopenharmony_ci ctx->count++; 10177db96d56Sopenharmony_ci } 10187db96d56Sopenharmony_ci if (state->repeat) 10197db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 10207db96d56Sopenharmony_ci } 10217db96d56Sopenharmony_ci RETURN_FAILURE; 10227db96d56Sopenharmony_ci 10237db96d56Sopenharmony_ci TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): 10247db96d56Sopenharmony_ci /* match repeated sequence (maximizing regexp) without 10257db96d56Sopenharmony_ci backtracking */ 10267db96d56Sopenharmony_ci 10277db96d56Sopenharmony_ci /* this operator only works if the repeated item is 10287db96d56Sopenharmony_ci exactly one character wide, and we're not already 10297db96d56Sopenharmony_ci collecting backtracking points. for other cases, 10307db96d56Sopenharmony_ci use the MAX_REPEAT operator */ 10317db96d56Sopenharmony_ci 10327db96d56Sopenharmony_ci /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> 10337db96d56Sopenharmony_ci tail */ 10347db96d56Sopenharmony_ci 10357db96d56Sopenharmony_ci TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, 10367db96d56Sopenharmony_ci ptr, pattern[1], pattern[2])); 10377db96d56Sopenharmony_ci 10387db96d56Sopenharmony_ci if (ptr + pattern[1] > end) { 10397db96d56Sopenharmony_ci RETURN_FAILURE; /* cannot match */ 10407db96d56Sopenharmony_ci } 10417db96d56Sopenharmony_ci 10427db96d56Sopenharmony_ci state->ptr = ptr; 10437db96d56Sopenharmony_ci 10447db96d56Sopenharmony_ci ret = SRE(count)(state, pattern + 3, pattern[2]); 10457db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 10467db96d56Sopenharmony_ci DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 10477db96d56Sopenharmony_ci ctx->count = ret; 10487db96d56Sopenharmony_ci ptr += ctx->count; 10497db96d56Sopenharmony_ci 10507db96d56Sopenharmony_ci /* when we arrive here, count contains the number of 10517db96d56Sopenharmony_ci matches, and ptr points to the tail of the target 10527db96d56Sopenharmony_ci string. check if the rest of the pattern matches, 10537db96d56Sopenharmony_ci and fail if not. */ 10547db96d56Sopenharmony_ci 10557db96d56Sopenharmony_ci /* Test for not enough repetitions in match */ 10567db96d56Sopenharmony_ci if (ctx->count < (Py_ssize_t) pattern[1]) { 10577db96d56Sopenharmony_ci RETURN_FAILURE; 10587db96d56Sopenharmony_ci } 10597db96d56Sopenharmony_ci 10607db96d56Sopenharmony_ci /* Update the pattern to point to the next op code */ 10617db96d56Sopenharmony_ci pattern += pattern[0]; 10627db96d56Sopenharmony_ci 10637db96d56Sopenharmony_ci /* Let the tail be evaluated separately and consider this 10647db96d56Sopenharmony_ci match successful. */ 10657db96d56Sopenharmony_ci if (*pattern == SRE_OP_SUCCESS && 10667db96d56Sopenharmony_ci ptr == state->end && 10677db96d56Sopenharmony_ci !(ctx->toplevel && state->must_advance && ptr == state->start)) 10687db96d56Sopenharmony_ci { 10697db96d56Sopenharmony_ci /* tail is empty. we're finished */ 10707db96d56Sopenharmony_ci state->ptr = ptr; 10717db96d56Sopenharmony_ci RETURN_SUCCESS; 10727db96d56Sopenharmony_ci } 10737db96d56Sopenharmony_ci 10747db96d56Sopenharmony_ci /* Attempt to match the rest of the string */ 10757db96d56Sopenharmony_ci DISPATCH; 10767db96d56Sopenharmony_ci 10777db96d56Sopenharmony_ci TARGET(SRE_OP_REPEAT): 10787db96d56Sopenharmony_ci /* create repeat context. all the hard work is done 10797db96d56Sopenharmony_ci by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ 10807db96d56Sopenharmony_ci /* <REPEAT> <skip> <1=min> <2=max> 10817db96d56Sopenharmony_ci <3=repeat_index> item <UNTIL> tail */ 10827db96d56Sopenharmony_ci TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, 10837db96d56Sopenharmony_ci pattern[1], pattern[2])); 10847db96d56Sopenharmony_ci 10857db96d56Sopenharmony_ci /* install new repeat context */ 10867db96d56Sopenharmony_ci /* TODO(https://github.com/python/cpython/issues/67877): Fix this 10877db96d56Sopenharmony_ci * potential memory leak. */ 10887db96d56Sopenharmony_ci ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); 10897db96d56Sopenharmony_ci if (!ctx->u.rep) { 10907db96d56Sopenharmony_ci PyErr_NoMemory(); 10917db96d56Sopenharmony_ci RETURN_FAILURE; 10927db96d56Sopenharmony_ci } 10937db96d56Sopenharmony_ci ctx->u.rep->count = -1; 10947db96d56Sopenharmony_ci ctx->u.rep->pattern = pattern; 10957db96d56Sopenharmony_ci ctx->u.rep->prev = state->repeat; 10967db96d56Sopenharmony_ci ctx->u.rep->last_ptr = NULL; 10977db96d56Sopenharmony_ci state->repeat = ctx->u.rep; 10987db96d56Sopenharmony_ci 10997db96d56Sopenharmony_ci state->ptr = ptr; 11007db96d56Sopenharmony_ci DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); 11017db96d56Sopenharmony_ci state->repeat = ctx->u.rep->prev; 11027db96d56Sopenharmony_ci PyObject_Free(ctx->u.rep); 11037db96d56Sopenharmony_ci 11047db96d56Sopenharmony_ci if (ret) { 11057db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 11067db96d56Sopenharmony_ci RETURN_SUCCESS; 11077db96d56Sopenharmony_ci } 11087db96d56Sopenharmony_ci RETURN_FAILURE; 11097db96d56Sopenharmony_ci 11107db96d56Sopenharmony_ci TARGET(SRE_OP_MAX_UNTIL): 11117db96d56Sopenharmony_ci /* maximizing repeat */ 11127db96d56Sopenharmony_ci /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ 11137db96d56Sopenharmony_ci 11147db96d56Sopenharmony_ci /* FIXME: we probably need to deal with zero-width 11157db96d56Sopenharmony_ci matches in here... */ 11167db96d56Sopenharmony_ci 11177db96d56Sopenharmony_ci ctx->u.rep = state->repeat; 11187db96d56Sopenharmony_ci if (!ctx->u.rep) 11197db96d56Sopenharmony_ci RETURN_ERROR(SRE_ERROR_STATE); 11207db96d56Sopenharmony_ci 11217db96d56Sopenharmony_ci state->ptr = ptr; 11227db96d56Sopenharmony_ci 11237db96d56Sopenharmony_ci ctx->count = ctx->u.rep->count+1; 11247db96d56Sopenharmony_ci 11257db96d56Sopenharmony_ci TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, 11267db96d56Sopenharmony_ci ptr, ctx->count)); 11277db96d56Sopenharmony_ci 11287db96d56Sopenharmony_ci if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 11297db96d56Sopenharmony_ci /* not enough matches */ 11307db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count; 11317db96d56Sopenharmony_ci DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, 11327db96d56Sopenharmony_ci ctx->u.rep->pattern+3); 11337db96d56Sopenharmony_ci if (ret) { 11347db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 11357db96d56Sopenharmony_ci RETURN_SUCCESS; 11367db96d56Sopenharmony_ci } 11377db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count-1; 11387db96d56Sopenharmony_ci state->ptr = ptr; 11397db96d56Sopenharmony_ci RETURN_FAILURE; 11407db96d56Sopenharmony_ci } 11417db96d56Sopenharmony_ci 11427db96d56Sopenharmony_ci if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || 11437db96d56Sopenharmony_ci ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && 11447db96d56Sopenharmony_ci state->ptr != ctx->u.rep->last_ptr) { 11457db96d56Sopenharmony_ci /* we may have enough matches, but if we can 11467db96d56Sopenharmony_ci match another item, do so */ 11477db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count; 11487db96d56Sopenharmony_ci LASTMARK_SAVE(); 11497db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 11507db96d56Sopenharmony_ci /* zero-width match protection */ 11517db96d56Sopenharmony_ci DATA_PUSH(&ctx->u.rep->last_ptr); 11527db96d56Sopenharmony_ci ctx->u.rep->last_ptr = state->ptr; 11537db96d56Sopenharmony_ci DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, 11547db96d56Sopenharmony_ci ctx->u.rep->pattern+3); 11557db96d56Sopenharmony_ci DATA_POP(&ctx->u.rep->last_ptr); 11567db96d56Sopenharmony_ci if (ret) { 11577db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 11587db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 11597db96d56Sopenharmony_ci RETURN_SUCCESS; 11607db96d56Sopenharmony_ci } 11617db96d56Sopenharmony_ci MARK_POP(ctx->lastmark); 11627db96d56Sopenharmony_ci LASTMARK_RESTORE(); 11637db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count-1; 11647db96d56Sopenharmony_ci state->ptr = ptr; 11657db96d56Sopenharmony_ci } 11667db96d56Sopenharmony_ci 11677db96d56Sopenharmony_ci /* cannot match more repeated items here. make sure the 11687db96d56Sopenharmony_ci tail matches */ 11697db96d56Sopenharmony_ci state->repeat = ctx->u.rep->prev; 11707db96d56Sopenharmony_ci DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); 11717db96d56Sopenharmony_ci state->repeat = ctx->u.rep; // restore repeat before return 11727db96d56Sopenharmony_ci 11737db96d56Sopenharmony_ci RETURN_ON_SUCCESS(ret); 11747db96d56Sopenharmony_ci state->ptr = ptr; 11757db96d56Sopenharmony_ci RETURN_FAILURE; 11767db96d56Sopenharmony_ci 11777db96d56Sopenharmony_ci TARGET(SRE_OP_MIN_UNTIL): 11787db96d56Sopenharmony_ci /* minimizing repeat */ 11797db96d56Sopenharmony_ci /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ 11807db96d56Sopenharmony_ci 11817db96d56Sopenharmony_ci ctx->u.rep = state->repeat; 11827db96d56Sopenharmony_ci if (!ctx->u.rep) 11837db96d56Sopenharmony_ci RETURN_ERROR(SRE_ERROR_STATE); 11847db96d56Sopenharmony_ci 11857db96d56Sopenharmony_ci state->ptr = ptr; 11867db96d56Sopenharmony_ci 11877db96d56Sopenharmony_ci ctx->count = ctx->u.rep->count+1; 11887db96d56Sopenharmony_ci 11897db96d56Sopenharmony_ci TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, 11907db96d56Sopenharmony_ci ptr, ctx->count, ctx->u.rep->pattern)); 11917db96d56Sopenharmony_ci 11927db96d56Sopenharmony_ci if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 11937db96d56Sopenharmony_ci /* not enough matches */ 11947db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count; 11957db96d56Sopenharmony_ci DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, 11967db96d56Sopenharmony_ci ctx->u.rep->pattern+3); 11977db96d56Sopenharmony_ci if (ret) { 11987db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 11997db96d56Sopenharmony_ci RETURN_SUCCESS; 12007db96d56Sopenharmony_ci } 12017db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count-1; 12027db96d56Sopenharmony_ci state->ptr = ptr; 12037db96d56Sopenharmony_ci RETURN_FAILURE; 12047db96d56Sopenharmony_ci } 12057db96d56Sopenharmony_ci 12067db96d56Sopenharmony_ci /* see if the tail matches */ 12077db96d56Sopenharmony_ci state->repeat = ctx->u.rep->prev; 12087db96d56Sopenharmony_ci 12097db96d56Sopenharmony_ci LASTMARK_SAVE(); 12107db96d56Sopenharmony_ci if (state->repeat) 12117db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 12127db96d56Sopenharmony_ci 12137db96d56Sopenharmony_ci DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); 12147db96d56Sopenharmony_ci SRE_REPEAT *repeat_of_tail = state->repeat; 12157db96d56Sopenharmony_ci state->repeat = ctx->u.rep; // restore repeat before return 12167db96d56Sopenharmony_ci 12177db96d56Sopenharmony_ci if (ret) { 12187db96d56Sopenharmony_ci if (repeat_of_tail) 12197db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 12207db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 12217db96d56Sopenharmony_ci RETURN_SUCCESS; 12227db96d56Sopenharmony_ci } 12237db96d56Sopenharmony_ci if (repeat_of_tail) 12247db96d56Sopenharmony_ci MARK_POP(ctx->lastmark); 12257db96d56Sopenharmony_ci LASTMARK_RESTORE(); 12267db96d56Sopenharmony_ci 12277db96d56Sopenharmony_ci state->ptr = ptr; 12287db96d56Sopenharmony_ci 12297db96d56Sopenharmony_ci if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] 12307db96d56Sopenharmony_ci && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || 12317db96d56Sopenharmony_ci state->ptr == ctx->u.rep->last_ptr) 12327db96d56Sopenharmony_ci RETURN_FAILURE; 12337db96d56Sopenharmony_ci 12347db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count; 12357db96d56Sopenharmony_ci /* zero-width match protection */ 12367db96d56Sopenharmony_ci DATA_PUSH(&ctx->u.rep->last_ptr); 12377db96d56Sopenharmony_ci ctx->u.rep->last_ptr = state->ptr; 12387db96d56Sopenharmony_ci DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, 12397db96d56Sopenharmony_ci ctx->u.rep->pattern+3); 12407db96d56Sopenharmony_ci DATA_POP(&ctx->u.rep->last_ptr); 12417db96d56Sopenharmony_ci if (ret) { 12427db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 12437db96d56Sopenharmony_ci RETURN_SUCCESS; 12447db96d56Sopenharmony_ci } 12457db96d56Sopenharmony_ci ctx->u.rep->count = ctx->count-1; 12467db96d56Sopenharmony_ci state->ptr = ptr; 12477db96d56Sopenharmony_ci RETURN_FAILURE; 12487db96d56Sopenharmony_ci 12497db96d56Sopenharmony_ci TARGET(SRE_OP_POSSESSIVE_REPEAT): 12507db96d56Sopenharmony_ci /* create possessive repeat contexts. */ 12517db96d56Sopenharmony_ci /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern 12527db96d56Sopenharmony_ci <SUCCESS> tail */ 12537db96d56Sopenharmony_ci TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, 12547db96d56Sopenharmony_ci ptr, pattern[1], pattern[2])); 12557db96d56Sopenharmony_ci 12567db96d56Sopenharmony_ci /* Set the global Input pointer to this context's Input 12577db96d56Sopenharmony_ci pointer */ 12587db96d56Sopenharmony_ci state->ptr = ptr; 12597db96d56Sopenharmony_ci 12607db96d56Sopenharmony_ci /* Initialize Count to 0 */ 12617db96d56Sopenharmony_ci ctx->count = 0; 12627db96d56Sopenharmony_ci 12637db96d56Sopenharmony_ci /* Check for minimum required matches. */ 12647db96d56Sopenharmony_ci while (ctx->count < (Py_ssize_t)pattern[1]) { 12657db96d56Sopenharmony_ci /* not enough matches */ 12667db96d56Sopenharmony_ci DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, 12677db96d56Sopenharmony_ci &pattern[3]); 12687db96d56Sopenharmony_ci if (ret) { 12697db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 12707db96d56Sopenharmony_ci ctx->count++; 12717db96d56Sopenharmony_ci } 12727db96d56Sopenharmony_ci else { 12737db96d56Sopenharmony_ci state->ptr = ptr; 12747db96d56Sopenharmony_ci RETURN_FAILURE; 12757db96d56Sopenharmony_ci } 12767db96d56Sopenharmony_ci } 12777db96d56Sopenharmony_ci 12787db96d56Sopenharmony_ci /* Clear the context's Input stream pointer so that it 12797db96d56Sopenharmony_ci doesn't match the global state so that the while loop can 12807db96d56Sopenharmony_ci be entered. */ 12817db96d56Sopenharmony_ci ptr = NULL; 12827db96d56Sopenharmony_ci 12837db96d56Sopenharmony_ci /* Keep trying to parse the <pattern> sub-pattern until the 12847db96d56Sopenharmony_ci end is reached, creating a new context each time. */ 12857db96d56Sopenharmony_ci while ((ctx->count < (Py_ssize_t)pattern[2] || 12867db96d56Sopenharmony_ci (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && 12877db96d56Sopenharmony_ci state->ptr != ptr) { 12887db96d56Sopenharmony_ci /* Save the Capture Group Marker state into the current 12897db96d56Sopenharmony_ci Context and back up the current highest number 12907db96d56Sopenharmony_ci Capture Group marker. */ 12917db96d56Sopenharmony_ci LASTMARK_SAVE(); 12927db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 12937db96d56Sopenharmony_ci 12947db96d56Sopenharmony_ci /* zero-width match protection */ 12957db96d56Sopenharmony_ci /* Set the context's Input Stream pointer to be the 12967db96d56Sopenharmony_ci current Input Stream pointer from the global 12977db96d56Sopenharmony_ci state. When the loop reaches the next iteration, 12987db96d56Sopenharmony_ci the context will then store the last known good 12997db96d56Sopenharmony_ci position with the global state holding the Input 13007db96d56Sopenharmony_ci Input Stream position that has been updated with 13017db96d56Sopenharmony_ci the most recent match. Thus, if state's Input 13027db96d56Sopenharmony_ci stream remains the same as the one stored in the 13037db96d56Sopenharmony_ci current Context, we know we have successfully 13047db96d56Sopenharmony_ci matched an empty string and that all subsequent 13057db96d56Sopenharmony_ci matches will also be the empty string until the 13067db96d56Sopenharmony_ci maximum number of matches are counted, and because 13077db96d56Sopenharmony_ci of this, we could immediately stop at that point and 13087db96d56Sopenharmony_ci consider this match successful. */ 13097db96d56Sopenharmony_ci ptr = state->ptr; 13107db96d56Sopenharmony_ci 13117db96d56Sopenharmony_ci /* We have not reached the maximin matches, so try to 13127db96d56Sopenharmony_ci match once more. */ 13137db96d56Sopenharmony_ci DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, 13147db96d56Sopenharmony_ci &pattern[3]); 13157db96d56Sopenharmony_ci 13167db96d56Sopenharmony_ci /* Check to see if the last attempted match 13177db96d56Sopenharmony_ci succeeded. */ 13187db96d56Sopenharmony_ci if (ret) { 13197db96d56Sopenharmony_ci /* Drop the saved highest number Capture Group 13207db96d56Sopenharmony_ci marker saved above and use the newly updated 13217db96d56Sopenharmony_ci value. */ 13227db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 13237db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 13247db96d56Sopenharmony_ci 13257db96d56Sopenharmony_ci /* Success, increment the count. */ 13267db96d56Sopenharmony_ci ctx->count++; 13277db96d56Sopenharmony_ci } 13287db96d56Sopenharmony_ci /* Last attempted match failed. */ 13297db96d56Sopenharmony_ci else { 13307db96d56Sopenharmony_ci /* Restore the previously saved highest number 13317db96d56Sopenharmony_ci Capture Group marker since the last iteration 13327db96d56Sopenharmony_ci did not match, then restore that to the global 13337db96d56Sopenharmony_ci state. */ 13347db96d56Sopenharmony_ci MARK_POP(ctx->lastmark); 13357db96d56Sopenharmony_ci LASTMARK_RESTORE(); 13367db96d56Sopenharmony_ci 13377db96d56Sopenharmony_ci /* We have sufficient matches, so exit loop. */ 13387db96d56Sopenharmony_ci break; 13397db96d56Sopenharmony_ci } 13407db96d56Sopenharmony_ci } 13417db96d56Sopenharmony_ci 13427db96d56Sopenharmony_ci /* Evaluate Tail */ 13437db96d56Sopenharmony_ci /* Jump to end of pattern indicated by skip, and then skip 13447db96d56Sopenharmony_ci the SUCCESS op code that follows it. */ 13457db96d56Sopenharmony_ci pattern += pattern[0] + 1; 13467db96d56Sopenharmony_ci ptr = state->ptr; 13477db96d56Sopenharmony_ci DISPATCH; 13487db96d56Sopenharmony_ci 13497db96d56Sopenharmony_ci TARGET(SRE_OP_ATOMIC_GROUP): 13507db96d56Sopenharmony_ci /* Atomic Group Sub Pattern */ 13517db96d56Sopenharmony_ci /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ 13527db96d56Sopenharmony_ci TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); 13537db96d56Sopenharmony_ci 13547db96d56Sopenharmony_ci /* Set the global Input pointer to this context's Input 13557db96d56Sopenharmony_ci pointer */ 13567db96d56Sopenharmony_ci state->ptr = ptr; 13577db96d56Sopenharmony_ci 13587db96d56Sopenharmony_ci /* Evaluate the Atomic Group in a new context, terminating 13597db96d56Sopenharmony_ci when the end of the group, represented by a SUCCESS op 13607db96d56Sopenharmony_ci code, is reached. */ 13617db96d56Sopenharmony_ci /* Group Pattern begins at an offset of 1 code. */ 13627db96d56Sopenharmony_ci DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, 13637db96d56Sopenharmony_ci &pattern[1]); 13647db96d56Sopenharmony_ci 13657db96d56Sopenharmony_ci /* Test Exit Condition */ 13667db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 13677db96d56Sopenharmony_ci 13687db96d56Sopenharmony_ci if (ret == 0) { 13697db96d56Sopenharmony_ci /* Atomic Group failed to Match. */ 13707db96d56Sopenharmony_ci state->ptr = ptr; 13717db96d56Sopenharmony_ci RETURN_FAILURE; 13727db96d56Sopenharmony_ci } 13737db96d56Sopenharmony_ci 13747db96d56Sopenharmony_ci /* Evaluate Tail */ 13757db96d56Sopenharmony_ci /* Jump to end of pattern indicated by skip, and then skip 13767db96d56Sopenharmony_ci the SUCCESS op code that follows it. */ 13777db96d56Sopenharmony_ci pattern += pattern[0]; 13787db96d56Sopenharmony_ci ptr = state->ptr; 13797db96d56Sopenharmony_ci DISPATCH; 13807db96d56Sopenharmony_ci 13817db96d56Sopenharmony_ci TARGET(SRE_OP_GROUPREF): 13827db96d56Sopenharmony_ci /* match backreference */ 13837db96d56Sopenharmony_ci TRACE(("|%p|%p|GROUPREF %d\n", pattern, 13847db96d56Sopenharmony_ci ptr, pattern[0])); 13857db96d56Sopenharmony_ci { 13867db96d56Sopenharmony_ci int groupref = pattern[0] * 2; 13877db96d56Sopenharmony_ci if (groupref >= state->lastmark) { 13887db96d56Sopenharmony_ci RETURN_FAILURE; 13897db96d56Sopenharmony_ci } else { 13907db96d56Sopenharmony_ci SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 13917db96d56Sopenharmony_ci SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 13927db96d56Sopenharmony_ci if (!p || !e || e < p) 13937db96d56Sopenharmony_ci RETURN_FAILURE; 13947db96d56Sopenharmony_ci while (p < e) { 13957db96d56Sopenharmony_ci if (ptr >= end || *ptr != *p) 13967db96d56Sopenharmony_ci RETURN_FAILURE; 13977db96d56Sopenharmony_ci p++; 13987db96d56Sopenharmony_ci ptr++; 13997db96d56Sopenharmony_ci } 14007db96d56Sopenharmony_ci } 14017db96d56Sopenharmony_ci } 14027db96d56Sopenharmony_ci pattern++; 14037db96d56Sopenharmony_ci DISPATCH; 14047db96d56Sopenharmony_ci 14057db96d56Sopenharmony_ci TARGET(SRE_OP_GROUPREF_IGNORE): 14067db96d56Sopenharmony_ci /* match backreference */ 14077db96d56Sopenharmony_ci TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, 14087db96d56Sopenharmony_ci ptr, pattern[0])); 14097db96d56Sopenharmony_ci { 14107db96d56Sopenharmony_ci int groupref = pattern[0] * 2; 14117db96d56Sopenharmony_ci if (groupref >= state->lastmark) { 14127db96d56Sopenharmony_ci RETURN_FAILURE; 14137db96d56Sopenharmony_ci } else { 14147db96d56Sopenharmony_ci SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 14157db96d56Sopenharmony_ci SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 14167db96d56Sopenharmony_ci if (!p || !e || e < p) 14177db96d56Sopenharmony_ci RETURN_FAILURE; 14187db96d56Sopenharmony_ci while (p < e) { 14197db96d56Sopenharmony_ci if (ptr >= end || 14207db96d56Sopenharmony_ci sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) 14217db96d56Sopenharmony_ci RETURN_FAILURE; 14227db96d56Sopenharmony_ci p++; 14237db96d56Sopenharmony_ci ptr++; 14247db96d56Sopenharmony_ci } 14257db96d56Sopenharmony_ci } 14267db96d56Sopenharmony_ci } 14277db96d56Sopenharmony_ci pattern++; 14287db96d56Sopenharmony_ci DISPATCH; 14297db96d56Sopenharmony_ci 14307db96d56Sopenharmony_ci TARGET(SRE_OP_GROUPREF_UNI_IGNORE): 14317db96d56Sopenharmony_ci /* match backreference */ 14327db96d56Sopenharmony_ci TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, 14337db96d56Sopenharmony_ci ptr, pattern[0])); 14347db96d56Sopenharmony_ci { 14357db96d56Sopenharmony_ci int groupref = pattern[0] * 2; 14367db96d56Sopenharmony_ci if (groupref >= state->lastmark) { 14377db96d56Sopenharmony_ci RETURN_FAILURE; 14387db96d56Sopenharmony_ci } else { 14397db96d56Sopenharmony_ci SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 14407db96d56Sopenharmony_ci SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 14417db96d56Sopenharmony_ci if (!p || !e || e < p) 14427db96d56Sopenharmony_ci RETURN_FAILURE; 14437db96d56Sopenharmony_ci while (p < e) { 14447db96d56Sopenharmony_ci if (ptr >= end || 14457db96d56Sopenharmony_ci sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) 14467db96d56Sopenharmony_ci RETURN_FAILURE; 14477db96d56Sopenharmony_ci p++; 14487db96d56Sopenharmony_ci ptr++; 14497db96d56Sopenharmony_ci } 14507db96d56Sopenharmony_ci } 14517db96d56Sopenharmony_ci } 14527db96d56Sopenharmony_ci pattern++; 14537db96d56Sopenharmony_ci DISPATCH; 14547db96d56Sopenharmony_ci 14557db96d56Sopenharmony_ci TARGET(SRE_OP_GROUPREF_LOC_IGNORE): 14567db96d56Sopenharmony_ci /* match backreference */ 14577db96d56Sopenharmony_ci TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, 14587db96d56Sopenharmony_ci ptr, pattern[0])); 14597db96d56Sopenharmony_ci { 14607db96d56Sopenharmony_ci int groupref = pattern[0] * 2; 14617db96d56Sopenharmony_ci if (groupref >= state->lastmark) { 14627db96d56Sopenharmony_ci RETURN_FAILURE; 14637db96d56Sopenharmony_ci } else { 14647db96d56Sopenharmony_ci SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 14657db96d56Sopenharmony_ci SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 14667db96d56Sopenharmony_ci if (!p || !e || e < p) 14677db96d56Sopenharmony_ci RETURN_FAILURE; 14687db96d56Sopenharmony_ci while (p < e) { 14697db96d56Sopenharmony_ci if (ptr >= end || 14707db96d56Sopenharmony_ci sre_lower_locale(*ptr) != sre_lower_locale(*p)) 14717db96d56Sopenharmony_ci RETURN_FAILURE; 14727db96d56Sopenharmony_ci p++; 14737db96d56Sopenharmony_ci ptr++; 14747db96d56Sopenharmony_ci } 14757db96d56Sopenharmony_ci } 14767db96d56Sopenharmony_ci } 14777db96d56Sopenharmony_ci pattern++; 14787db96d56Sopenharmony_ci DISPATCH; 14797db96d56Sopenharmony_ci 14807db96d56Sopenharmony_ci TARGET(SRE_OP_GROUPREF_EXISTS): 14817db96d56Sopenharmony_ci TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, 14827db96d56Sopenharmony_ci ptr, pattern[0])); 14837db96d56Sopenharmony_ci /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ 14847db96d56Sopenharmony_ci { 14857db96d56Sopenharmony_ci int groupref = pattern[0] * 2; 14867db96d56Sopenharmony_ci if (groupref >= state->lastmark) { 14877db96d56Sopenharmony_ci pattern += pattern[1]; 14887db96d56Sopenharmony_ci DISPATCH; 14897db96d56Sopenharmony_ci } else { 14907db96d56Sopenharmony_ci SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 14917db96d56Sopenharmony_ci SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 14927db96d56Sopenharmony_ci if (!p || !e || e < p) { 14937db96d56Sopenharmony_ci pattern += pattern[1]; 14947db96d56Sopenharmony_ci DISPATCH; 14957db96d56Sopenharmony_ci } 14967db96d56Sopenharmony_ci } 14977db96d56Sopenharmony_ci } 14987db96d56Sopenharmony_ci pattern += 2; 14997db96d56Sopenharmony_ci DISPATCH; 15007db96d56Sopenharmony_ci 15017db96d56Sopenharmony_ci TARGET(SRE_OP_ASSERT): 15027db96d56Sopenharmony_ci /* assert subpattern */ 15037db96d56Sopenharmony_ci /* <ASSERT> <skip> <back> <pattern> */ 15047db96d56Sopenharmony_ci TRACE(("|%p|%p|ASSERT %d\n", pattern, 15057db96d56Sopenharmony_ci ptr, pattern[1])); 15067db96d56Sopenharmony_ci if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1]) 15077db96d56Sopenharmony_ci RETURN_FAILURE; 15087db96d56Sopenharmony_ci state->ptr = ptr - pattern[1]; 15097db96d56Sopenharmony_ci DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); 15107db96d56Sopenharmony_ci RETURN_ON_FAILURE(ret); 15117db96d56Sopenharmony_ci pattern += pattern[0]; 15127db96d56Sopenharmony_ci DISPATCH; 15137db96d56Sopenharmony_ci 15147db96d56Sopenharmony_ci TARGET(SRE_OP_ASSERT_NOT): 15157db96d56Sopenharmony_ci /* assert not subpattern */ 15167db96d56Sopenharmony_ci /* <ASSERT_NOT> <skip> <back> <pattern> */ 15177db96d56Sopenharmony_ci TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, 15187db96d56Sopenharmony_ci ptr, pattern[1])); 15197db96d56Sopenharmony_ci if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) { 15207db96d56Sopenharmony_ci state->ptr = ptr - pattern[1]; 15217db96d56Sopenharmony_ci LASTMARK_SAVE(); 15227db96d56Sopenharmony_ci if (state->repeat) 15237db96d56Sopenharmony_ci MARK_PUSH(ctx->lastmark); 15247db96d56Sopenharmony_ci 15257db96d56Sopenharmony_ci DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); 15267db96d56Sopenharmony_ci if (ret) { 15277db96d56Sopenharmony_ci if (state->repeat) 15287db96d56Sopenharmony_ci MARK_POP_DISCARD(ctx->lastmark); 15297db96d56Sopenharmony_ci RETURN_ON_ERROR(ret); 15307db96d56Sopenharmony_ci RETURN_FAILURE; 15317db96d56Sopenharmony_ci } 15327db96d56Sopenharmony_ci if (state->repeat) 15337db96d56Sopenharmony_ci MARK_POP(ctx->lastmark); 15347db96d56Sopenharmony_ci LASTMARK_RESTORE(); 15357db96d56Sopenharmony_ci } 15367db96d56Sopenharmony_ci pattern += pattern[0]; 15377db96d56Sopenharmony_ci DISPATCH; 15387db96d56Sopenharmony_ci 15397db96d56Sopenharmony_ci TARGET(SRE_OP_FAILURE): 15407db96d56Sopenharmony_ci /* immediate failure */ 15417db96d56Sopenharmony_ci TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); 15427db96d56Sopenharmony_ci RETURN_FAILURE; 15437db96d56Sopenharmony_ci 15447db96d56Sopenharmony_ci#if !USE_COMPUTED_GOTOS 15457db96d56Sopenharmony_ci default: 15467db96d56Sopenharmony_ci#endif 15477db96d56Sopenharmony_ci // Also any unused opcodes: 15487db96d56Sopenharmony_ci TARGET(SRE_OP_RANGE_UNI_IGNORE): 15497db96d56Sopenharmony_ci TARGET(SRE_OP_SUBPATTERN): 15507db96d56Sopenharmony_ci TARGET(SRE_OP_RANGE): 15517db96d56Sopenharmony_ci TARGET(SRE_OP_NEGATE): 15527db96d56Sopenharmony_ci TARGET(SRE_OP_BIGCHARSET): 15537db96d56Sopenharmony_ci TARGET(SRE_OP_CHARSET): 15547db96d56Sopenharmony_ci TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, 15557db96d56Sopenharmony_ci pattern[-1])); 15567db96d56Sopenharmony_ci RETURN_ERROR(SRE_ERROR_ILLEGAL); 15577db96d56Sopenharmony_ci 15587db96d56Sopenharmony_ci } 15597db96d56Sopenharmony_ci 15607db96d56Sopenharmony_ciexit: 15617db96d56Sopenharmony_ci ctx_pos = ctx->last_ctx_pos; 15627db96d56Sopenharmony_ci jump = ctx->jump; 15637db96d56Sopenharmony_ci DATA_POP_DISCARD(ctx); 15647db96d56Sopenharmony_ci if (ctx_pos == -1) 15657db96d56Sopenharmony_ci return ret; 15667db96d56Sopenharmony_ci DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 15677db96d56Sopenharmony_ci 15687db96d56Sopenharmony_ci switch (jump) { 15697db96d56Sopenharmony_ci case JUMP_MAX_UNTIL_2: 15707db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); 15717db96d56Sopenharmony_ci goto jump_max_until_2; 15727db96d56Sopenharmony_ci case JUMP_MAX_UNTIL_3: 15737db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); 15747db96d56Sopenharmony_ci goto jump_max_until_3; 15757db96d56Sopenharmony_ci case JUMP_MIN_UNTIL_2: 15767db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); 15777db96d56Sopenharmony_ci goto jump_min_until_2; 15787db96d56Sopenharmony_ci case JUMP_MIN_UNTIL_3: 15797db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); 15807db96d56Sopenharmony_ci goto jump_min_until_3; 15817db96d56Sopenharmony_ci case JUMP_BRANCH: 15827db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); 15837db96d56Sopenharmony_ci goto jump_branch; 15847db96d56Sopenharmony_ci case JUMP_MAX_UNTIL_1: 15857db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); 15867db96d56Sopenharmony_ci goto jump_max_until_1; 15877db96d56Sopenharmony_ci case JUMP_MIN_UNTIL_1: 15887db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); 15897db96d56Sopenharmony_ci goto jump_min_until_1; 15907db96d56Sopenharmony_ci case JUMP_POSS_REPEAT_1: 15917db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); 15927db96d56Sopenharmony_ci goto jump_poss_repeat_1; 15937db96d56Sopenharmony_ci case JUMP_POSS_REPEAT_2: 15947db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); 15957db96d56Sopenharmony_ci goto jump_poss_repeat_2; 15967db96d56Sopenharmony_ci case JUMP_REPEAT: 15977db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); 15987db96d56Sopenharmony_ci goto jump_repeat; 15997db96d56Sopenharmony_ci case JUMP_REPEAT_ONE_1: 16007db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); 16017db96d56Sopenharmony_ci goto jump_repeat_one_1; 16027db96d56Sopenharmony_ci case JUMP_REPEAT_ONE_2: 16037db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); 16047db96d56Sopenharmony_ci goto jump_repeat_one_2; 16057db96d56Sopenharmony_ci case JUMP_MIN_REPEAT_ONE: 16067db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); 16077db96d56Sopenharmony_ci goto jump_min_repeat_one; 16087db96d56Sopenharmony_ci case JUMP_ATOMIC_GROUP: 16097db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); 16107db96d56Sopenharmony_ci goto jump_atomic_group; 16117db96d56Sopenharmony_ci case JUMP_ASSERT: 16127db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); 16137db96d56Sopenharmony_ci goto jump_assert; 16147db96d56Sopenharmony_ci case JUMP_ASSERT_NOT: 16157db96d56Sopenharmony_ci TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); 16167db96d56Sopenharmony_ci goto jump_assert_not; 16177db96d56Sopenharmony_ci case JUMP_NONE: 16187db96d56Sopenharmony_ci TRACE(("|%p|%p|RETURN %zd\n", pattern, 16197db96d56Sopenharmony_ci ptr, ret)); 16207db96d56Sopenharmony_ci break; 16217db96d56Sopenharmony_ci } 16227db96d56Sopenharmony_ci 16237db96d56Sopenharmony_ci return ret; /* should never get here */ 16247db96d56Sopenharmony_ci} 16257db96d56Sopenharmony_ci 16267db96d56Sopenharmony_ci/* need to reset capturing groups between two SRE(match) callings in loops */ 16277db96d56Sopenharmony_ci#define RESET_CAPTURE_GROUP() \ 16287db96d56Sopenharmony_ci do { state->lastmark = state->lastindex = -1; } while (0) 16297db96d56Sopenharmony_ci 16307db96d56Sopenharmony_ciLOCAL(Py_ssize_t) 16317db96d56Sopenharmony_ciSRE(search)(SRE_STATE* state, SRE_CODE* pattern) 16327db96d56Sopenharmony_ci{ 16337db96d56Sopenharmony_ci SRE_CHAR* ptr = (SRE_CHAR *)state->start; 16347db96d56Sopenharmony_ci SRE_CHAR* end = (SRE_CHAR *)state->end; 16357db96d56Sopenharmony_ci Py_ssize_t status = 0; 16367db96d56Sopenharmony_ci Py_ssize_t prefix_len = 0; 16377db96d56Sopenharmony_ci Py_ssize_t prefix_skip = 0; 16387db96d56Sopenharmony_ci SRE_CODE* prefix = NULL; 16397db96d56Sopenharmony_ci SRE_CODE* charset = NULL; 16407db96d56Sopenharmony_ci SRE_CODE* overlap = NULL; 16417db96d56Sopenharmony_ci int flags = 0; 16427db96d56Sopenharmony_ci 16437db96d56Sopenharmony_ci if (ptr > end) 16447db96d56Sopenharmony_ci return 0; 16457db96d56Sopenharmony_ci 16467db96d56Sopenharmony_ci if (pattern[0] == SRE_OP_INFO) { 16477db96d56Sopenharmony_ci /* optimization info block */ 16487db96d56Sopenharmony_ci /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ 16497db96d56Sopenharmony_ci 16507db96d56Sopenharmony_ci flags = pattern[2]; 16517db96d56Sopenharmony_ci 16527db96d56Sopenharmony_ci if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { 16537db96d56Sopenharmony_ci TRACE(("reject (got %u chars, need %u)\n", 16547db96d56Sopenharmony_ci (unsigned int)(end - ptr), pattern[3])); 16557db96d56Sopenharmony_ci return 0; 16567db96d56Sopenharmony_ci } 16577db96d56Sopenharmony_ci if (pattern[3] > 1) { 16587db96d56Sopenharmony_ci /* adjust end point (but make sure we leave at least one 16597db96d56Sopenharmony_ci character in there, so literal search will work) */ 16607db96d56Sopenharmony_ci end -= pattern[3] - 1; 16617db96d56Sopenharmony_ci if (end <= ptr) 16627db96d56Sopenharmony_ci end = ptr; 16637db96d56Sopenharmony_ci } 16647db96d56Sopenharmony_ci 16657db96d56Sopenharmony_ci if (flags & SRE_INFO_PREFIX) { 16667db96d56Sopenharmony_ci /* pattern starts with a known prefix */ 16677db96d56Sopenharmony_ci /* <length> <skip> <prefix data> <overlap data> */ 16687db96d56Sopenharmony_ci prefix_len = pattern[5]; 16697db96d56Sopenharmony_ci prefix_skip = pattern[6]; 16707db96d56Sopenharmony_ci prefix = pattern + 7; 16717db96d56Sopenharmony_ci overlap = prefix + prefix_len - 1; 16727db96d56Sopenharmony_ci } else if (flags & SRE_INFO_CHARSET) 16737db96d56Sopenharmony_ci /* pattern starts with a character from a known set */ 16747db96d56Sopenharmony_ci /* <charset> */ 16757db96d56Sopenharmony_ci charset = pattern + 5; 16767db96d56Sopenharmony_ci 16777db96d56Sopenharmony_ci pattern += 1 + pattern[1]; 16787db96d56Sopenharmony_ci } 16797db96d56Sopenharmony_ci 16807db96d56Sopenharmony_ci TRACE(("prefix = %p %zd %zd\n", 16817db96d56Sopenharmony_ci prefix, prefix_len, prefix_skip)); 16827db96d56Sopenharmony_ci TRACE(("charset = %p\n", charset)); 16837db96d56Sopenharmony_ci 16847db96d56Sopenharmony_ci if (prefix_len == 1) { 16857db96d56Sopenharmony_ci /* pattern starts with a literal character */ 16867db96d56Sopenharmony_ci SRE_CHAR c = (SRE_CHAR) prefix[0]; 16877db96d56Sopenharmony_ci#if SIZEOF_SRE_CHAR < 4 16887db96d56Sopenharmony_ci if ((SRE_CODE) c != prefix[0]) 16897db96d56Sopenharmony_ci return 0; /* literal can't match: doesn't fit in char width */ 16907db96d56Sopenharmony_ci#endif 16917db96d56Sopenharmony_ci end = (SRE_CHAR *)state->end; 16927db96d56Sopenharmony_ci state->must_advance = 0; 16937db96d56Sopenharmony_ci while (ptr < end) { 16947db96d56Sopenharmony_ci while (*ptr != c) { 16957db96d56Sopenharmony_ci if (++ptr >= end) 16967db96d56Sopenharmony_ci return 0; 16977db96d56Sopenharmony_ci } 16987db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); 16997db96d56Sopenharmony_ci state->start = ptr; 17007db96d56Sopenharmony_ci state->ptr = ptr + prefix_skip; 17017db96d56Sopenharmony_ci if (flags & SRE_INFO_LITERAL) 17027db96d56Sopenharmony_ci return 1; /* we got all of it */ 17037db96d56Sopenharmony_ci status = SRE(match)(state, pattern + 2*prefix_skip, 0); 17047db96d56Sopenharmony_ci if (status != 0) 17057db96d56Sopenharmony_ci return status; 17067db96d56Sopenharmony_ci ++ptr; 17077db96d56Sopenharmony_ci RESET_CAPTURE_GROUP(); 17087db96d56Sopenharmony_ci } 17097db96d56Sopenharmony_ci return 0; 17107db96d56Sopenharmony_ci } 17117db96d56Sopenharmony_ci 17127db96d56Sopenharmony_ci if (prefix_len > 1) { 17137db96d56Sopenharmony_ci /* pattern starts with a known prefix. use the overlap 17147db96d56Sopenharmony_ci table to skip forward as fast as we possibly can */ 17157db96d56Sopenharmony_ci Py_ssize_t i = 0; 17167db96d56Sopenharmony_ci 17177db96d56Sopenharmony_ci end = (SRE_CHAR *)state->end; 17187db96d56Sopenharmony_ci if (prefix_len > end - ptr) 17197db96d56Sopenharmony_ci return 0; 17207db96d56Sopenharmony_ci#if SIZEOF_SRE_CHAR < 4 17217db96d56Sopenharmony_ci for (i = 0; i < prefix_len; i++) 17227db96d56Sopenharmony_ci if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) 17237db96d56Sopenharmony_ci return 0; /* literal can't match: doesn't fit in char width */ 17247db96d56Sopenharmony_ci#endif 17257db96d56Sopenharmony_ci while (ptr < end) { 17267db96d56Sopenharmony_ci SRE_CHAR c = (SRE_CHAR) prefix[0]; 17277db96d56Sopenharmony_ci while (*ptr++ != c) { 17287db96d56Sopenharmony_ci if (ptr >= end) 17297db96d56Sopenharmony_ci return 0; 17307db96d56Sopenharmony_ci } 17317db96d56Sopenharmony_ci if (ptr >= end) 17327db96d56Sopenharmony_ci return 0; 17337db96d56Sopenharmony_ci 17347db96d56Sopenharmony_ci i = 1; 17357db96d56Sopenharmony_ci state->must_advance = 0; 17367db96d56Sopenharmony_ci do { 17377db96d56Sopenharmony_ci if (*ptr == (SRE_CHAR) prefix[i]) { 17387db96d56Sopenharmony_ci if (++i != prefix_len) { 17397db96d56Sopenharmony_ci if (++ptr >= end) 17407db96d56Sopenharmony_ci return 0; 17417db96d56Sopenharmony_ci continue; 17427db96d56Sopenharmony_ci } 17437db96d56Sopenharmony_ci /* found a potential match */ 17447db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); 17457db96d56Sopenharmony_ci state->start = ptr - (prefix_len - 1); 17467db96d56Sopenharmony_ci state->ptr = ptr - (prefix_len - prefix_skip - 1); 17477db96d56Sopenharmony_ci if (flags & SRE_INFO_LITERAL) 17487db96d56Sopenharmony_ci return 1; /* we got all of it */ 17497db96d56Sopenharmony_ci status = SRE(match)(state, pattern + 2*prefix_skip, 0); 17507db96d56Sopenharmony_ci if (status != 0) 17517db96d56Sopenharmony_ci return status; 17527db96d56Sopenharmony_ci /* close but no cigar -- try again */ 17537db96d56Sopenharmony_ci if (++ptr >= end) 17547db96d56Sopenharmony_ci return 0; 17557db96d56Sopenharmony_ci RESET_CAPTURE_GROUP(); 17567db96d56Sopenharmony_ci } 17577db96d56Sopenharmony_ci i = overlap[i]; 17587db96d56Sopenharmony_ci } while (i != 0); 17597db96d56Sopenharmony_ci } 17607db96d56Sopenharmony_ci return 0; 17617db96d56Sopenharmony_ci } 17627db96d56Sopenharmony_ci 17637db96d56Sopenharmony_ci if (charset) { 17647db96d56Sopenharmony_ci /* pattern starts with a character from a known set */ 17657db96d56Sopenharmony_ci end = (SRE_CHAR *)state->end; 17667db96d56Sopenharmony_ci state->must_advance = 0; 17677db96d56Sopenharmony_ci for (;;) { 17687db96d56Sopenharmony_ci while (ptr < end && !SRE(charset)(state, charset, *ptr)) 17697db96d56Sopenharmony_ci ptr++; 17707db96d56Sopenharmony_ci if (ptr >= end) 17717db96d56Sopenharmony_ci return 0; 17727db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); 17737db96d56Sopenharmony_ci state->start = ptr; 17747db96d56Sopenharmony_ci state->ptr = ptr; 17757db96d56Sopenharmony_ci status = SRE(match)(state, pattern, 0); 17767db96d56Sopenharmony_ci if (status != 0) 17777db96d56Sopenharmony_ci break; 17787db96d56Sopenharmony_ci ptr++; 17797db96d56Sopenharmony_ci RESET_CAPTURE_GROUP(); 17807db96d56Sopenharmony_ci } 17817db96d56Sopenharmony_ci } else { 17827db96d56Sopenharmony_ci /* general case */ 17837db96d56Sopenharmony_ci assert(ptr <= end); 17847db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); 17857db96d56Sopenharmony_ci state->start = state->ptr = ptr; 17867db96d56Sopenharmony_ci status = SRE(match)(state, pattern, 1); 17877db96d56Sopenharmony_ci state->must_advance = 0; 17887db96d56Sopenharmony_ci if (status == 0 && pattern[0] == SRE_OP_AT && 17897db96d56Sopenharmony_ci (pattern[1] == SRE_AT_BEGINNING || 17907db96d56Sopenharmony_ci pattern[1] == SRE_AT_BEGINNING_STRING)) 17917db96d56Sopenharmony_ci { 17927db96d56Sopenharmony_ci state->start = state->ptr = ptr = end; 17937db96d56Sopenharmony_ci return 0; 17947db96d56Sopenharmony_ci } 17957db96d56Sopenharmony_ci while (status == 0 && ptr < end) { 17967db96d56Sopenharmony_ci ptr++; 17977db96d56Sopenharmony_ci RESET_CAPTURE_GROUP(); 17987db96d56Sopenharmony_ci TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); 17997db96d56Sopenharmony_ci state->start = state->ptr = ptr; 18007db96d56Sopenharmony_ci status = SRE(match)(state, pattern, 0); 18017db96d56Sopenharmony_ci } 18027db96d56Sopenharmony_ci } 18037db96d56Sopenharmony_ci 18047db96d56Sopenharmony_ci return status; 18057db96d56Sopenharmony_ci} 18067db96d56Sopenharmony_ci 18077db96d56Sopenharmony_ci#undef SRE_CHAR 18087db96d56Sopenharmony_ci#undef SIZEOF_SRE_CHAR 18097db96d56Sopenharmony_ci#undef SRE 18107db96d56Sopenharmony_ci 18117db96d56Sopenharmony_ci/* vim:ts=4:sw=4:et 18127db96d56Sopenharmony_ci*/ 1813