1570af302Sopenharmony_ci/* 2570af302Sopenharmony_ci regexec.c - TRE POSIX compatible matching functions (and more). 3570af302Sopenharmony_ci 4570af302Sopenharmony_ci Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 5570af302Sopenharmony_ci All rights reserved. 6570af302Sopenharmony_ci 7570af302Sopenharmony_ci Redistribution and use in source and binary forms, with or without 8570af302Sopenharmony_ci modification, are permitted provided that the following conditions 9570af302Sopenharmony_ci are met: 10570af302Sopenharmony_ci 11570af302Sopenharmony_ci 1. Redistributions of source code must retain the above copyright 12570af302Sopenharmony_ci notice, this list of conditions and the following disclaimer. 13570af302Sopenharmony_ci 14570af302Sopenharmony_ci 2. Redistributions in binary form must reproduce the above copyright 15570af302Sopenharmony_ci notice, this list of conditions and the following disclaimer in the 16570af302Sopenharmony_ci documentation and/or other materials provided with the distribution. 17570af302Sopenharmony_ci 18570af302Sopenharmony_ci THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 19570af302Sopenharmony_ci ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20570af302Sopenharmony_ci LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21570af302Sopenharmony_ci A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22570af302Sopenharmony_ci HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23570af302Sopenharmony_ci SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24570af302Sopenharmony_ci LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25570af302Sopenharmony_ci DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26570af302Sopenharmony_ci THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27570af302Sopenharmony_ci (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28570af302Sopenharmony_ci OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29570af302Sopenharmony_ci 30570af302Sopenharmony_ci*/ 31570af302Sopenharmony_ci 32570af302Sopenharmony_ci#include <stdlib.h> 33570af302Sopenharmony_ci#include <string.h> 34570af302Sopenharmony_ci#include <wchar.h> 35570af302Sopenharmony_ci#include <wctype.h> 36570af302Sopenharmony_ci#include <limits.h> 37570af302Sopenharmony_ci#include <stdint.h> 38570af302Sopenharmony_ci 39570af302Sopenharmony_ci#include <regex.h> 40570af302Sopenharmony_ci 41570af302Sopenharmony_ci#include "tre.h" 42570af302Sopenharmony_ci 43570af302Sopenharmony_ci#include <assert.h> 44570af302Sopenharmony_ci 45570af302Sopenharmony_cistatic void 46570af302Sopenharmony_citre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 47570af302Sopenharmony_ci const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo); 48570af302Sopenharmony_ci 49570af302Sopenharmony_ci/*********************************************************************** 50570af302Sopenharmony_ci from tre-match-utils.h 51570af302Sopenharmony_ci***********************************************************************/ 52570af302Sopenharmony_ci 53570af302Sopenharmony_ci#define GET_NEXT_WCHAR() do { \ 54570af302Sopenharmony_ci prev_c = next_c; pos += pos_add_next; \ 55570af302Sopenharmony_ci if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ 56570af302Sopenharmony_ci if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ 57570af302Sopenharmony_ci else pos_add_next++; \ 58570af302Sopenharmony_ci } \ 59570af302Sopenharmony_ci str_byte += pos_add_next; \ 60570af302Sopenharmony_ci } while (0) 61570af302Sopenharmony_ci 62570af302Sopenharmony_ci#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) 63570af302Sopenharmony_ci 64570af302Sopenharmony_ci#define CHECK_ASSERTIONS(assertions) \ 65570af302Sopenharmony_ci (((assertions & ASSERT_AT_BOL) \ 66570af302Sopenharmony_ci && (pos > 0 || reg_notbol) \ 67570af302Sopenharmony_ci && (prev_c != L'\n' || !reg_newline)) \ 68570af302Sopenharmony_ci || ((assertions & ASSERT_AT_EOL) \ 69570af302Sopenharmony_ci && (next_c != L'\0' || reg_noteol) \ 70570af302Sopenharmony_ci && (next_c != L'\n' || !reg_newline)) \ 71570af302Sopenharmony_ci || ((assertions & ASSERT_AT_BOW) \ 72570af302Sopenharmony_ci && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ 73570af302Sopenharmony_ci || ((assertions & ASSERT_AT_EOW) \ 74570af302Sopenharmony_ci && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ 75570af302Sopenharmony_ci || ((assertions & ASSERT_AT_WB) \ 76570af302Sopenharmony_ci && (pos != 0 && next_c != L'\0' \ 77570af302Sopenharmony_ci && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ 78570af302Sopenharmony_ci || ((assertions & ASSERT_AT_WB_NEG) \ 79570af302Sopenharmony_ci && (pos == 0 || next_c == L'\0' \ 80570af302Sopenharmony_ci || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) 81570af302Sopenharmony_ci 82570af302Sopenharmony_ci#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ 83570af302Sopenharmony_ci (((trans_i->assertions & ASSERT_CHAR_CLASS) \ 84570af302Sopenharmony_ci && !(tnfa->cflags & REG_ICASE) \ 85570af302Sopenharmony_ci && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ 86570af302Sopenharmony_ci || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ 87570af302Sopenharmony_ci && (tnfa->cflags & REG_ICASE) \ 88570af302Sopenharmony_ci && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ 89570af302Sopenharmony_ci && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ 90570af302Sopenharmony_ci || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ 91570af302Sopenharmony_ci && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ 92570af302Sopenharmony_ci tnfa->cflags & REG_ICASE))) 93570af302Sopenharmony_ci 94570af302Sopenharmony_ci 95570af302Sopenharmony_ci 96570af302Sopenharmony_ci 97570af302Sopenharmony_ci/* Returns 1 if `t1' wins `t2', 0 otherwise. */ 98570af302Sopenharmony_cistatic int 99570af302Sopenharmony_citre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, 100570af302Sopenharmony_ci regoff_t *t1, regoff_t *t2) 101570af302Sopenharmony_ci{ 102570af302Sopenharmony_ci int i; 103570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 104570af302Sopenharmony_ci { 105570af302Sopenharmony_ci if (tag_directions[i] == TRE_TAG_MINIMIZE) 106570af302Sopenharmony_ci { 107570af302Sopenharmony_ci if (t1[i] < t2[i]) 108570af302Sopenharmony_ci return 1; 109570af302Sopenharmony_ci if (t1[i] > t2[i]) 110570af302Sopenharmony_ci return 0; 111570af302Sopenharmony_ci } 112570af302Sopenharmony_ci else 113570af302Sopenharmony_ci { 114570af302Sopenharmony_ci if (t1[i] > t2[i]) 115570af302Sopenharmony_ci return 1; 116570af302Sopenharmony_ci if (t1[i] < t2[i]) 117570af302Sopenharmony_ci return 0; 118570af302Sopenharmony_ci } 119570af302Sopenharmony_ci } 120570af302Sopenharmony_ci /* assert(0);*/ 121570af302Sopenharmony_ci return 0; 122570af302Sopenharmony_ci} 123570af302Sopenharmony_ci 124570af302Sopenharmony_cistatic int 125570af302Sopenharmony_citre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) 126570af302Sopenharmony_ci{ 127570af302Sopenharmony_ci while (*classes != (tre_ctype_t)0) 128570af302Sopenharmony_ci if ((!icase && tre_isctype(wc, *classes)) 129570af302Sopenharmony_ci || (icase && (tre_isctype(tre_toupper(wc), *classes) 130570af302Sopenharmony_ci || tre_isctype(tre_tolower(wc), *classes)))) 131570af302Sopenharmony_ci return 1; /* Match. */ 132570af302Sopenharmony_ci else 133570af302Sopenharmony_ci classes++; 134570af302Sopenharmony_ci return 0; /* No match. */ 135570af302Sopenharmony_ci} 136570af302Sopenharmony_ci 137570af302Sopenharmony_ci 138570af302Sopenharmony_ci/*********************************************************************** 139570af302Sopenharmony_ci from tre-match-parallel.c 140570af302Sopenharmony_ci***********************************************************************/ 141570af302Sopenharmony_ci 142570af302Sopenharmony_ci/* 143570af302Sopenharmony_ci This algorithm searches for matches basically by reading characters 144570af302Sopenharmony_ci in the searched string one by one, starting at the beginning. All 145570af302Sopenharmony_ci matching paths in the TNFA are traversed in parallel. When two or 146570af302Sopenharmony_ci more paths reach the same state, exactly one is chosen according to 147570af302Sopenharmony_ci tag ordering rules; if returning submatches is not required it does 148570af302Sopenharmony_ci not matter which path is chosen. 149570af302Sopenharmony_ci 150570af302Sopenharmony_ci The worst case time required for finding the leftmost and longest 151570af302Sopenharmony_ci match, or determining that there is no match, is always linearly 152570af302Sopenharmony_ci dependent on the length of the text being searched. 153570af302Sopenharmony_ci 154570af302Sopenharmony_ci This algorithm cannot handle TNFAs with back referencing nodes. 155570af302Sopenharmony_ci See `tre-match-backtrack.c'. 156570af302Sopenharmony_ci*/ 157570af302Sopenharmony_ci 158570af302Sopenharmony_citypedef struct { 159570af302Sopenharmony_ci tre_tnfa_transition_t *state; 160570af302Sopenharmony_ci regoff_t *tags; 161570af302Sopenharmony_ci} tre_tnfa_reach_t; 162570af302Sopenharmony_ci 163570af302Sopenharmony_citypedef struct { 164570af302Sopenharmony_ci regoff_t pos; 165570af302Sopenharmony_ci regoff_t **tags; 166570af302Sopenharmony_ci} tre_reach_pos_t; 167570af302Sopenharmony_ci 168570af302Sopenharmony_ci 169570af302Sopenharmony_cistatic reg_errcode_t 170570af302Sopenharmony_citre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, 171570af302Sopenharmony_ci regoff_t *match_tags, int eflags, 172570af302Sopenharmony_ci regoff_t *match_end_ofs) 173570af302Sopenharmony_ci{ 174570af302Sopenharmony_ci /* State variables required by GET_NEXT_WCHAR. */ 175570af302Sopenharmony_ci tre_char_t prev_c = 0, next_c = 0; 176570af302Sopenharmony_ci const char *str_byte = string; 177570af302Sopenharmony_ci regoff_t pos = -1; 178570af302Sopenharmony_ci regoff_t pos_add_next = 1; 179570af302Sopenharmony_ci#ifdef TRE_MBSTATE 180570af302Sopenharmony_ci mbstate_t mbstate; 181570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 182570af302Sopenharmony_ci int reg_notbol = eflags & REG_NOTBOL; 183570af302Sopenharmony_ci int reg_noteol = eflags & REG_NOTEOL; 184570af302Sopenharmony_ci int reg_newline = tnfa->cflags & REG_NEWLINE; 185570af302Sopenharmony_ci reg_errcode_t ret; 186570af302Sopenharmony_ci 187570af302Sopenharmony_ci char *buf; 188570af302Sopenharmony_ci tre_tnfa_transition_t *trans_i; 189570af302Sopenharmony_ci tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; 190570af302Sopenharmony_ci tre_reach_pos_t *reach_pos; 191570af302Sopenharmony_ci int *tag_i; 192570af302Sopenharmony_ci int num_tags, i; 193570af302Sopenharmony_ci 194570af302Sopenharmony_ci regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */ 195570af302Sopenharmony_ci int new_match = 0; 196570af302Sopenharmony_ci regoff_t *tmp_tags = NULL; 197570af302Sopenharmony_ci regoff_t *tmp_iptr; 198570af302Sopenharmony_ci 199570af302Sopenharmony_ci#ifdef TRE_MBSTATE 200570af302Sopenharmony_ci memset(&mbstate, '\0', sizeof(mbstate)); 201570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 202570af302Sopenharmony_ci 203570af302Sopenharmony_ci if (!match_tags) 204570af302Sopenharmony_ci num_tags = 0; 205570af302Sopenharmony_ci else 206570af302Sopenharmony_ci num_tags = tnfa->num_tags; 207570af302Sopenharmony_ci 208570af302Sopenharmony_ci /* Allocate memory for temporary data required for matching. This needs to 209570af302Sopenharmony_ci be done for every matching operation to be thread safe. This allocates 210570af302Sopenharmony_ci everything in a single large block with calloc(). */ 211570af302Sopenharmony_ci { 212570af302Sopenharmony_ci size_t tbytes, rbytes, pbytes, xbytes, total_bytes; 213570af302Sopenharmony_ci char *tmp_buf; 214570af302Sopenharmony_ci 215570af302Sopenharmony_ci /* Ensure that tbytes and xbytes*num_states cannot overflow, and that 216570af302Sopenharmony_ci * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */ 217570af302Sopenharmony_ci if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states)) 218570af302Sopenharmony_ci return REG_ESPACE; 219570af302Sopenharmony_ci 220570af302Sopenharmony_ci /* Likewise check rbytes. */ 221570af302Sopenharmony_ci if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next))) 222570af302Sopenharmony_ci return REG_ESPACE; 223570af302Sopenharmony_ci 224570af302Sopenharmony_ci /* Likewise check pbytes. */ 225570af302Sopenharmony_ci if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos))) 226570af302Sopenharmony_ci return REG_ESPACE; 227570af302Sopenharmony_ci 228570af302Sopenharmony_ci /* Compute the length of the block we need. */ 229570af302Sopenharmony_ci tbytes = sizeof(*tmp_tags) * num_tags; 230570af302Sopenharmony_ci rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); 231570af302Sopenharmony_ci pbytes = sizeof(*reach_pos) * tnfa->num_states; 232570af302Sopenharmony_ci xbytes = sizeof(regoff_t) * num_tags; 233570af302Sopenharmony_ci total_bytes = 234570af302Sopenharmony_ci (sizeof(long) - 1) * 4 /* for alignment paddings */ 235570af302Sopenharmony_ci + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; 236570af302Sopenharmony_ci 237570af302Sopenharmony_ci /* Allocate the memory. */ 238570af302Sopenharmony_ci buf = calloc(total_bytes, 1); 239570af302Sopenharmony_ci if (buf == NULL) 240570af302Sopenharmony_ci return REG_ESPACE; 241570af302Sopenharmony_ci 242570af302Sopenharmony_ci /* Get the various pointers within tmp_buf (properly aligned). */ 243570af302Sopenharmony_ci tmp_tags = (void *)buf; 244570af302Sopenharmony_ci tmp_buf = buf + tbytes; 245570af302Sopenharmony_ci tmp_buf += ALIGN(tmp_buf, long); 246570af302Sopenharmony_ci reach_next = (void *)tmp_buf; 247570af302Sopenharmony_ci tmp_buf += rbytes; 248570af302Sopenharmony_ci tmp_buf += ALIGN(tmp_buf, long); 249570af302Sopenharmony_ci reach = (void *)tmp_buf; 250570af302Sopenharmony_ci tmp_buf += rbytes; 251570af302Sopenharmony_ci tmp_buf += ALIGN(tmp_buf, long); 252570af302Sopenharmony_ci reach_pos = (void *)tmp_buf; 253570af302Sopenharmony_ci tmp_buf += pbytes; 254570af302Sopenharmony_ci tmp_buf += ALIGN(tmp_buf, long); 255570af302Sopenharmony_ci for (i = 0; i < tnfa->num_states; i++) 256570af302Sopenharmony_ci { 257570af302Sopenharmony_ci reach[i].tags = (void *)tmp_buf; 258570af302Sopenharmony_ci tmp_buf += xbytes; 259570af302Sopenharmony_ci reach_next[i].tags = (void *)tmp_buf; 260570af302Sopenharmony_ci tmp_buf += xbytes; 261570af302Sopenharmony_ci } 262570af302Sopenharmony_ci } 263570af302Sopenharmony_ci 264570af302Sopenharmony_ci for (i = 0; i < tnfa->num_states; i++) 265570af302Sopenharmony_ci reach_pos[i].pos = -1; 266570af302Sopenharmony_ci 267570af302Sopenharmony_ci GET_NEXT_WCHAR(); 268570af302Sopenharmony_ci pos = 0; 269570af302Sopenharmony_ci 270570af302Sopenharmony_ci reach_next_i = reach_next; 271570af302Sopenharmony_ci while (1) 272570af302Sopenharmony_ci { 273570af302Sopenharmony_ci /* If no match found yet, add the initial states to `reach_next'. */ 274570af302Sopenharmony_ci if (match_eo < 0) 275570af302Sopenharmony_ci { 276570af302Sopenharmony_ci trans_i = tnfa->initial; 277570af302Sopenharmony_ci while (trans_i->state != NULL) 278570af302Sopenharmony_ci { 279570af302Sopenharmony_ci if (reach_pos[trans_i->state_id].pos < pos) 280570af302Sopenharmony_ci { 281570af302Sopenharmony_ci if (trans_i->assertions 282570af302Sopenharmony_ci && CHECK_ASSERTIONS(trans_i->assertions)) 283570af302Sopenharmony_ci { 284570af302Sopenharmony_ci trans_i++; 285570af302Sopenharmony_ci continue; 286570af302Sopenharmony_ci } 287570af302Sopenharmony_ci 288570af302Sopenharmony_ci reach_next_i->state = trans_i->state; 289570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 290570af302Sopenharmony_ci reach_next_i->tags[i] = -1; 291570af302Sopenharmony_ci tag_i = trans_i->tags; 292570af302Sopenharmony_ci if (tag_i) 293570af302Sopenharmony_ci while (*tag_i >= 0) 294570af302Sopenharmony_ci { 295570af302Sopenharmony_ci if (*tag_i < num_tags) 296570af302Sopenharmony_ci reach_next_i->tags[*tag_i] = pos; 297570af302Sopenharmony_ci tag_i++; 298570af302Sopenharmony_ci } 299570af302Sopenharmony_ci if (reach_next_i->state == tnfa->final) 300570af302Sopenharmony_ci { 301570af302Sopenharmony_ci match_eo = pos; 302570af302Sopenharmony_ci new_match = 1; 303570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 304570af302Sopenharmony_ci match_tags[i] = reach_next_i->tags[i]; 305570af302Sopenharmony_ci } 306570af302Sopenharmony_ci reach_pos[trans_i->state_id].pos = pos; 307570af302Sopenharmony_ci reach_pos[trans_i->state_id].tags = &reach_next_i->tags; 308570af302Sopenharmony_ci reach_next_i++; 309570af302Sopenharmony_ci } 310570af302Sopenharmony_ci trans_i++; 311570af302Sopenharmony_ci } 312570af302Sopenharmony_ci reach_next_i->state = NULL; 313570af302Sopenharmony_ci } 314570af302Sopenharmony_ci else 315570af302Sopenharmony_ci { 316570af302Sopenharmony_ci if (num_tags == 0 || reach_next_i == reach_next) 317570af302Sopenharmony_ci /* We have found a match. */ 318570af302Sopenharmony_ci break; 319570af302Sopenharmony_ci } 320570af302Sopenharmony_ci 321570af302Sopenharmony_ci /* Check for end of string. */ 322570af302Sopenharmony_ci if (!next_c) break; 323570af302Sopenharmony_ci 324570af302Sopenharmony_ci GET_NEXT_WCHAR(); 325570af302Sopenharmony_ci 326570af302Sopenharmony_ci /* Swap `reach' and `reach_next'. */ 327570af302Sopenharmony_ci reach_i = reach; 328570af302Sopenharmony_ci reach = reach_next; 329570af302Sopenharmony_ci reach_next = reach_i; 330570af302Sopenharmony_ci 331570af302Sopenharmony_ci /* For each state in `reach', weed out states that don't fulfill the 332570af302Sopenharmony_ci minimal matching conditions. */ 333570af302Sopenharmony_ci if (tnfa->num_minimals && new_match) 334570af302Sopenharmony_ci { 335570af302Sopenharmony_ci new_match = 0; 336570af302Sopenharmony_ci reach_next_i = reach_next; 337570af302Sopenharmony_ci for (reach_i = reach; reach_i->state; reach_i++) 338570af302Sopenharmony_ci { 339570af302Sopenharmony_ci int skip = 0; 340570af302Sopenharmony_ci for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) 341570af302Sopenharmony_ci { 342570af302Sopenharmony_ci int end = tnfa->minimal_tags[i]; 343570af302Sopenharmony_ci int start = tnfa->minimal_tags[i + 1]; 344570af302Sopenharmony_ci if (end >= num_tags) 345570af302Sopenharmony_ci { 346570af302Sopenharmony_ci skip = 1; 347570af302Sopenharmony_ci break; 348570af302Sopenharmony_ci } 349570af302Sopenharmony_ci else if (reach_i->tags[start] == match_tags[start] 350570af302Sopenharmony_ci && reach_i->tags[end] < match_tags[end]) 351570af302Sopenharmony_ci { 352570af302Sopenharmony_ci skip = 1; 353570af302Sopenharmony_ci break; 354570af302Sopenharmony_ci } 355570af302Sopenharmony_ci } 356570af302Sopenharmony_ci if (!skip) 357570af302Sopenharmony_ci { 358570af302Sopenharmony_ci reach_next_i->state = reach_i->state; 359570af302Sopenharmony_ci tmp_iptr = reach_next_i->tags; 360570af302Sopenharmony_ci reach_next_i->tags = reach_i->tags; 361570af302Sopenharmony_ci reach_i->tags = tmp_iptr; 362570af302Sopenharmony_ci reach_next_i++; 363570af302Sopenharmony_ci } 364570af302Sopenharmony_ci } 365570af302Sopenharmony_ci reach_next_i->state = NULL; 366570af302Sopenharmony_ci 367570af302Sopenharmony_ci /* Swap `reach' and `reach_next'. */ 368570af302Sopenharmony_ci reach_i = reach; 369570af302Sopenharmony_ci reach = reach_next; 370570af302Sopenharmony_ci reach_next = reach_i; 371570af302Sopenharmony_ci } 372570af302Sopenharmony_ci 373570af302Sopenharmony_ci /* For each state in `reach' see if there is a transition leaving with 374570af302Sopenharmony_ci the current input symbol to a state not yet in `reach_next', and 375570af302Sopenharmony_ci add the destination states to `reach_next'. */ 376570af302Sopenharmony_ci reach_next_i = reach_next; 377570af302Sopenharmony_ci for (reach_i = reach; reach_i->state; reach_i++) 378570af302Sopenharmony_ci { 379570af302Sopenharmony_ci for (trans_i = reach_i->state; trans_i->state; trans_i++) 380570af302Sopenharmony_ci { 381570af302Sopenharmony_ci /* Does this transition match the input symbol? */ 382570af302Sopenharmony_ci if (trans_i->code_min <= (tre_cint_t)prev_c && 383570af302Sopenharmony_ci trans_i->code_max >= (tre_cint_t)prev_c) 384570af302Sopenharmony_ci { 385570af302Sopenharmony_ci if (trans_i->assertions 386570af302Sopenharmony_ci && (CHECK_ASSERTIONS(trans_i->assertions) 387570af302Sopenharmony_ci || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) 388570af302Sopenharmony_ci { 389570af302Sopenharmony_ci continue; 390570af302Sopenharmony_ci } 391570af302Sopenharmony_ci 392570af302Sopenharmony_ci /* Compute the tags after this transition. */ 393570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 394570af302Sopenharmony_ci tmp_tags[i] = reach_i->tags[i]; 395570af302Sopenharmony_ci tag_i = trans_i->tags; 396570af302Sopenharmony_ci if (tag_i != NULL) 397570af302Sopenharmony_ci while (*tag_i >= 0) 398570af302Sopenharmony_ci { 399570af302Sopenharmony_ci if (*tag_i < num_tags) 400570af302Sopenharmony_ci tmp_tags[*tag_i] = pos; 401570af302Sopenharmony_ci tag_i++; 402570af302Sopenharmony_ci } 403570af302Sopenharmony_ci 404570af302Sopenharmony_ci if (reach_pos[trans_i->state_id].pos < pos) 405570af302Sopenharmony_ci { 406570af302Sopenharmony_ci /* Found an unvisited node. */ 407570af302Sopenharmony_ci reach_next_i->state = trans_i->state; 408570af302Sopenharmony_ci tmp_iptr = reach_next_i->tags; 409570af302Sopenharmony_ci reach_next_i->tags = tmp_tags; 410570af302Sopenharmony_ci tmp_tags = tmp_iptr; 411570af302Sopenharmony_ci reach_pos[trans_i->state_id].pos = pos; 412570af302Sopenharmony_ci reach_pos[trans_i->state_id].tags = &reach_next_i->tags; 413570af302Sopenharmony_ci 414570af302Sopenharmony_ci if (reach_next_i->state == tnfa->final 415570af302Sopenharmony_ci && (match_eo == -1 416570af302Sopenharmony_ci || (num_tags > 0 417570af302Sopenharmony_ci && reach_next_i->tags[0] <= match_tags[0]))) 418570af302Sopenharmony_ci { 419570af302Sopenharmony_ci match_eo = pos; 420570af302Sopenharmony_ci new_match = 1; 421570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 422570af302Sopenharmony_ci match_tags[i] = reach_next_i->tags[i]; 423570af302Sopenharmony_ci } 424570af302Sopenharmony_ci reach_next_i++; 425570af302Sopenharmony_ci 426570af302Sopenharmony_ci } 427570af302Sopenharmony_ci else 428570af302Sopenharmony_ci { 429570af302Sopenharmony_ci assert(reach_pos[trans_i->state_id].pos == pos); 430570af302Sopenharmony_ci /* Another path has also reached this state. We choose 431570af302Sopenharmony_ci the winner by examining the tag values for both 432570af302Sopenharmony_ci paths. */ 433570af302Sopenharmony_ci if (tre_tag_order(num_tags, tnfa->tag_directions, 434570af302Sopenharmony_ci tmp_tags, 435570af302Sopenharmony_ci *reach_pos[trans_i->state_id].tags)) 436570af302Sopenharmony_ci { 437570af302Sopenharmony_ci /* The new path wins. */ 438570af302Sopenharmony_ci tmp_iptr = *reach_pos[trans_i->state_id].tags; 439570af302Sopenharmony_ci *reach_pos[trans_i->state_id].tags = tmp_tags; 440570af302Sopenharmony_ci if (trans_i->state == tnfa->final) 441570af302Sopenharmony_ci { 442570af302Sopenharmony_ci match_eo = pos; 443570af302Sopenharmony_ci new_match = 1; 444570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 445570af302Sopenharmony_ci match_tags[i] = tmp_tags[i]; 446570af302Sopenharmony_ci } 447570af302Sopenharmony_ci tmp_tags = tmp_iptr; 448570af302Sopenharmony_ci } 449570af302Sopenharmony_ci } 450570af302Sopenharmony_ci } 451570af302Sopenharmony_ci } 452570af302Sopenharmony_ci } 453570af302Sopenharmony_ci reach_next_i->state = NULL; 454570af302Sopenharmony_ci } 455570af302Sopenharmony_ci 456570af302Sopenharmony_ci *match_end_ofs = match_eo; 457570af302Sopenharmony_ci ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 458570af302Sopenharmony_cierror_exit: 459570af302Sopenharmony_ci xfree(buf); 460570af302Sopenharmony_ci return ret; 461570af302Sopenharmony_ci} 462570af302Sopenharmony_ci 463570af302Sopenharmony_ci 464570af302Sopenharmony_ci 465570af302Sopenharmony_ci/*********************************************************************** 466570af302Sopenharmony_ci from tre-match-backtrack.c 467570af302Sopenharmony_ci***********************************************************************/ 468570af302Sopenharmony_ci 469570af302Sopenharmony_ci/* 470570af302Sopenharmony_ci This matcher is for regexps that use back referencing. Regexp matching 471570af302Sopenharmony_ci with back referencing is an NP-complete problem on the number of back 472570af302Sopenharmony_ci references. The easiest way to match them is to use a backtracking 473570af302Sopenharmony_ci routine which basically goes through all possible paths in the TNFA 474570af302Sopenharmony_ci and chooses the one which results in the best (leftmost and longest) 475570af302Sopenharmony_ci match. This can be spectacularly expensive and may run out of stack 476570af302Sopenharmony_ci space, but there really is no better known generic algorithm. Quoting 477570af302Sopenharmony_ci Henry Spencer from comp.compilers: 478570af302Sopenharmony_ci <URL: http://compilers.iecc.com/comparch/article/93-03-102> 479570af302Sopenharmony_ci 480570af302Sopenharmony_ci POSIX.2 REs require longest match, which is really exciting to 481570af302Sopenharmony_ci implement since the obsolete ("basic") variant also includes 482570af302Sopenharmony_ci \<digit>. I haven't found a better way of tackling this than doing 483570af302Sopenharmony_ci a preliminary match using a DFA (or simulation) on a modified RE 484570af302Sopenharmony_ci that just replicates subREs for \<digit>, and then doing a 485570af302Sopenharmony_ci backtracking match to determine whether the subRE matches were 486570af302Sopenharmony_ci right. This can be rather slow, but I console myself with the 487570af302Sopenharmony_ci thought that people who use \<digit> deserve very slow execution. 488570af302Sopenharmony_ci (Pun unintentional but very appropriate.) 489570af302Sopenharmony_ci 490570af302Sopenharmony_ci*/ 491570af302Sopenharmony_ci 492570af302Sopenharmony_citypedef struct { 493570af302Sopenharmony_ci regoff_t pos; 494570af302Sopenharmony_ci const char *str_byte; 495570af302Sopenharmony_ci tre_tnfa_transition_t *state; 496570af302Sopenharmony_ci int state_id; 497570af302Sopenharmony_ci int next_c; 498570af302Sopenharmony_ci regoff_t *tags; 499570af302Sopenharmony_ci#ifdef TRE_MBSTATE 500570af302Sopenharmony_ci mbstate_t mbstate; 501570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 502570af302Sopenharmony_ci} tre_backtrack_item_t; 503570af302Sopenharmony_ci 504570af302Sopenharmony_citypedef struct tre_backtrack_struct { 505570af302Sopenharmony_ci tre_backtrack_item_t item; 506570af302Sopenharmony_ci struct tre_backtrack_struct *prev; 507570af302Sopenharmony_ci struct tre_backtrack_struct *next; 508570af302Sopenharmony_ci} *tre_backtrack_t; 509570af302Sopenharmony_ci 510570af302Sopenharmony_ci#ifdef TRE_MBSTATE 511570af302Sopenharmony_ci#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) 512570af302Sopenharmony_ci#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate 513570af302Sopenharmony_ci#else /* !TRE_MBSTATE */ 514570af302Sopenharmony_ci#define BT_STACK_MBSTATE_IN 515570af302Sopenharmony_ci#define BT_STACK_MBSTATE_OUT 516570af302Sopenharmony_ci#endif /* !TRE_MBSTATE */ 517570af302Sopenharmony_ci 518570af302Sopenharmony_ci#define tre_bt_mem_new tre_mem_new 519570af302Sopenharmony_ci#define tre_bt_mem_alloc tre_mem_alloc 520570af302Sopenharmony_ci#define tre_bt_mem_destroy tre_mem_destroy 521570af302Sopenharmony_ci 522570af302Sopenharmony_ci 523570af302Sopenharmony_ci#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ 524570af302Sopenharmony_ci do \ 525570af302Sopenharmony_ci { \ 526570af302Sopenharmony_ci int i; \ 527570af302Sopenharmony_ci if (!stack->next) \ 528570af302Sopenharmony_ci { \ 529570af302Sopenharmony_ci tre_backtrack_t s; \ 530570af302Sopenharmony_ci s = tre_bt_mem_alloc(mem, sizeof(*s)); \ 531570af302Sopenharmony_ci if (!s) \ 532570af302Sopenharmony_ci { \ 533570af302Sopenharmony_ci tre_bt_mem_destroy(mem); \ 534570af302Sopenharmony_ci if (tags) \ 535570af302Sopenharmony_ci xfree(tags); \ 536570af302Sopenharmony_ci if (pmatch) \ 537570af302Sopenharmony_ci xfree(pmatch); \ 538570af302Sopenharmony_ci if (states_seen) \ 539570af302Sopenharmony_ci xfree(states_seen); \ 540570af302Sopenharmony_ci return REG_ESPACE; \ 541570af302Sopenharmony_ci } \ 542570af302Sopenharmony_ci s->prev = stack; \ 543570af302Sopenharmony_ci s->next = NULL; \ 544570af302Sopenharmony_ci s->item.tags = tre_bt_mem_alloc(mem, \ 545570af302Sopenharmony_ci sizeof(*tags) * tnfa->num_tags); \ 546570af302Sopenharmony_ci if (!s->item.tags) \ 547570af302Sopenharmony_ci { \ 548570af302Sopenharmony_ci tre_bt_mem_destroy(mem); \ 549570af302Sopenharmony_ci if (tags) \ 550570af302Sopenharmony_ci xfree(tags); \ 551570af302Sopenharmony_ci if (pmatch) \ 552570af302Sopenharmony_ci xfree(pmatch); \ 553570af302Sopenharmony_ci if (states_seen) \ 554570af302Sopenharmony_ci xfree(states_seen); \ 555570af302Sopenharmony_ci return REG_ESPACE; \ 556570af302Sopenharmony_ci } \ 557570af302Sopenharmony_ci stack->next = s; \ 558570af302Sopenharmony_ci stack = s; \ 559570af302Sopenharmony_ci } \ 560570af302Sopenharmony_ci else \ 561570af302Sopenharmony_ci stack = stack->next; \ 562570af302Sopenharmony_ci stack->item.pos = (_pos); \ 563570af302Sopenharmony_ci stack->item.str_byte = (_str_byte); \ 564570af302Sopenharmony_ci stack->item.state = (_state); \ 565570af302Sopenharmony_ci stack->item.state_id = (_state_id); \ 566570af302Sopenharmony_ci stack->item.next_c = (_next_c); \ 567570af302Sopenharmony_ci for (i = 0; i < tnfa->num_tags; i++) \ 568570af302Sopenharmony_ci stack->item.tags[i] = (_tags)[i]; \ 569570af302Sopenharmony_ci BT_STACK_MBSTATE_IN; \ 570570af302Sopenharmony_ci } \ 571570af302Sopenharmony_ci while (0) 572570af302Sopenharmony_ci 573570af302Sopenharmony_ci#define BT_STACK_POP() \ 574570af302Sopenharmony_ci do \ 575570af302Sopenharmony_ci { \ 576570af302Sopenharmony_ci int i; \ 577570af302Sopenharmony_ci assert(stack->prev); \ 578570af302Sopenharmony_ci pos = stack->item.pos; \ 579570af302Sopenharmony_ci str_byte = stack->item.str_byte; \ 580570af302Sopenharmony_ci state = stack->item.state; \ 581570af302Sopenharmony_ci next_c = stack->item.next_c; \ 582570af302Sopenharmony_ci for (i = 0; i < tnfa->num_tags; i++) \ 583570af302Sopenharmony_ci tags[i] = stack->item.tags[i]; \ 584570af302Sopenharmony_ci BT_STACK_MBSTATE_OUT; \ 585570af302Sopenharmony_ci stack = stack->prev; \ 586570af302Sopenharmony_ci } \ 587570af302Sopenharmony_ci while (0) 588570af302Sopenharmony_ci 589570af302Sopenharmony_ci#undef MIN 590570af302Sopenharmony_ci#define MIN(a, b) ((a) <= (b) ? (a) : (b)) 591570af302Sopenharmony_ci 592570af302Sopenharmony_cistatic reg_errcode_t 593570af302Sopenharmony_citre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 594570af302Sopenharmony_ci regoff_t *match_tags, int eflags, regoff_t *match_end_ofs) 595570af302Sopenharmony_ci{ 596570af302Sopenharmony_ci /* State variables required by GET_NEXT_WCHAR. */ 597570af302Sopenharmony_ci tre_char_t prev_c = 0, next_c = 0; 598570af302Sopenharmony_ci const char *str_byte = string; 599570af302Sopenharmony_ci regoff_t pos = 0; 600570af302Sopenharmony_ci regoff_t pos_add_next = 1; 601570af302Sopenharmony_ci#ifdef TRE_MBSTATE 602570af302Sopenharmony_ci mbstate_t mbstate; 603570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 604570af302Sopenharmony_ci int reg_notbol = eflags & REG_NOTBOL; 605570af302Sopenharmony_ci int reg_noteol = eflags & REG_NOTEOL; 606570af302Sopenharmony_ci int reg_newline = tnfa->cflags & REG_NEWLINE; 607570af302Sopenharmony_ci 608570af302Sopenharmony_ci /* These are used to remember the necessary values of the above 609570af302Sopenharmony_ci variables to return to the position where the current search 610570af302Sopenharmony_ci started from. */ 611570af302Sopenharmony_ci int next_c_start; 612570af302Sopenharmony_ci const char *str_byte_start; 613570af302Sopenharmony_ci regoff_t pos_start = -1; 614570af302Sopenharmony_ci#ifdef TRE_MBSTATE 615570af302Sopenharmony_ci mbstate_t mbstate_start; 616570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 617570af302Sopenharmony_ci 618570af302Sopenharmony_ci /* End offset of best match so far, or -1 if no match found yet. */ 619570af302Sopenharmony_ci regoff_t match_eo = -1; 620570af302Sopenharmony_ci /* Tag arrays. */ 621570af302Sopenharmony_ci int *next_tags; 622570af302Sopenharmony_ci regoff_t *tags = NULL; 623570af302Sopenharmony_ci /* Current TNFA state. */ 624570af302Sopenharmony_ci tre_tnfa_transition_t *state; 625570af302Sopenharmony_ci int *states_seen = NULL; 626570af302Sopenharmony_ci 627570af302Sopenharmony_ci /* Memory allocator to for allocating the backtracking stack. */ 628570af302Sopenharmony_ci tre_mem_t mem = tre_bt_mem_new(); 629570af302Sopenharmony_ci 630570af302Sopenharmony_ci /* The backtracking stack. */ 631570af302Sopenharmony_ci tre_backtrack_t stack; 632570af302Sopenharmony_ci 633570af302Sopenharmony_ci tre_tnfa_transition_t *trans_i; 634570af302Sopenharmony_ci regmatch_t *pmatch = NULL; 635570af302Sopenharmony_ci int ret; 636570af302Sopenharmony_ci 637570af302Sopenharmony_ci#ifdef TRE_MBSTATE 638570af302Sopenharmony_ci memset(&mbstate, '\0', sizeof(mbstate)); 639570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 640570af302Sopenharmony_ci 641570af302Sopenharmony_ci if (!mem) 642570af302Sopenharmony_ci return REG_ESPACE; 643570af302Sopenharmony_ci stack = tre_bt_mem_alloc(mem, sizeof(*stack)); 644570af302Sopenharmony_ci if (!stack) 645570af302Sopenharmony_ci { 646570af302Sopenharmony_ci ret = REG_ESPACE; 647570af302Sopenharmony_ci goto error_exit; 648570af302Sopenharmony_ci } 649570af302Sopenharmony_ci stack->prev = NULL; 650570af302Sopenharmony_ci stack->next = NULL; 651570af302Sopenharmony_ci 652570af302Sopenharmony_ci if (tnfa->num_tags) 653570af302Sopenharmony_ci { 654570af302Sopenharmony_ci tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 655570af302Sopenharmony_ci if (!tags) 656570af302Sopenharmony_ci { 657570af302Sopenharmony_ci ret = REG_ESPACE; 658570af302Sopenharmony_ci goto error_exit; 659570af302Sopenharmony_ci } 660570af302Sopenharmony_ci } 661570af302Sopenharmony_ci if (tnfa->num_submatches) 662570af302Sopenharmony_ci { 663570af302Sopenharmony_ci pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); 664570af302Sopenharmony_ci if (!pmatch) 665570af302Sopenharmony_ci { 666570af302Sopenharmony_ci ret = REG_ESPACE; 667570af302Sopenharmony_ci goto error_exit; 668570af302Sopenharmony_ci } 669570af302Sopenharmony_ci } 670570af302Sopenharmony_ci if (tnfa->num_states) 671570af302Sopenharmony_ci { 672570af302Sopenharmony_ci states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); 673570af302Sopenharmony_ci if (!states_seen) 674570af302Sopenharmony_ci { 675570af302Sopenharmony_ci ret = REG_ESPACE; 676570af302Sopenharmony_ci goto error_exit; 677570af302Sopenharmony_ci } 678570af302Sopenharmony_ci } 679570af302Sopenharmony_ci 680570af302Sopenharmony_ci retry: 681570af302Sopenharmony_ci { 682570af302Sopenharmony_ci int i; 683570af302Sopenharmony_ci for (i = 0; i < tnfa->num_tags; i++) 684570af302Sopenharmony_ci { 685570af302Sopenharmony_ci tags[i] = -1; 686570af302Sopenharmony_ci if (match_tags) 687570af302Sopenharmony_ci match_tags[i] = -1; 688570af302Sopenharmony_ci } 689570af302Sopenharmony_ci for (i = 0; i < tnfa->num_states; i++) 690570af302Sopenharmony_ci states_seen[i] = 0; 691570af302Sopenharmony_ci } 692570af302Sopenharmony_ci 693570af302Sopenharmony_ci state = NULL; 694570af302Sopenharmony_ci pos = pos_start; 695570af302Sopenharmony_ci GET_NEXT_WCHAR(); 696570af302Sopenharmony_ci pos_start = pos; 697570af302Sopenharmony_ci next_c_start = next_c; 698570af302Sopenharmony_ci str_byte_start = str_byte; 699570af302Sopenharmony_ci#ifdef TRE_MBSTATE 700570af302Sopenharmony_ci mbstate_start = mbstate; 701570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 702570af302Sopenharmony_ci 703570af302Sopenharmony_ci /* Handle initial states. */ 704570af302Sopenharmony_ci next_tags = NULL; 705570af302Sopenharmony_ci for (trans_i = tnfa->initial; trans_i->state; trans_i++) 706570af302Sopenharmony_ci { 707570af302Sopenharmony_ci if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) 708570af302Sopenharmony_ci { 709570af302Sopenharmony_ci continue; 710570af302Sopenharmony_ci } 711570af302Sopenharmony_ci if (state == NULL) 712570af302Sopenharmony_ci { 713570af302Sopenharmony_ci /* Start from this state. */ 714570af302Sopenharmony_ci state = trans_i->state; 715570af302Sopenharmony_ci next_tags = trans_i->tags; 716570af302Sopenharmony_ci } 717570af302Sopenharmony_ci else 718570af302Sopenharmony_ci { 719570af302Sopenharmony_ci /* Backtrack to this state. */ 720570af302Sopenharmony_ci BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, 721570af302Sopenharmony_ci trans_i->state_id, next_c, tags, mbstate); 722570af302Sopenharmony_ci { 723570af302Sopenharmony_ci int *tmp = trans_i->tags; 724570af302Sopenharmony_ci if (tmp) 725570af302Sopenharmony_ci while (*tmp >= 0) 726570af302Sopenharmony_ci stack->item.tags[*tmp++] = pos; 727570af302Sopenharmony_ci } 728570af302Sopenharmony_ci } 729570af302Sopenharmony_ci } 730570af302Sopenharmony_ci 731570af302Sopenharmony_ci if (next_tags) 732570af302Sopenharmony_ci for (; *next_tags >= 0; next_tags++) 733570af302Sopenharmony_ci tags[*next_tags] = pos; 734570af302Sopenharmony_ci 735570af302Sopenharmony_ci 736570af302Sopenharmony_ci if (state == NULL) 737570af302Sopenharmony_ci goto backtrack; 738570af302Sopenharmony_ci 739570af302Sopenharmony_ci while (1) 740570af302Sopenharmony_ci { 741570af302Sopenharmony_ci tre_tnfa_transition_t *next_state; 742570af302Sopenharmony_ci int empty_br_match; 743570af302Sopenharmony_ci 744570af302Sopenharmony_ci if (state == tnfa->final) 745570af302Sopenharmony_ci { 746570af302Sopenharmony_ci if (match_eo < pos 747570af302Sopenharmony_ci || (match_eo == pos 748570af302Sopenharmony_ci && match_tags 749570af302Sopenharmony_ci && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, 750570af302Sopenharmony_ci tags, match_tags))) 751570af302Sopenharmony_ci { 752570af302Sopenharmony_ci int i; 753570af302Sopenharmony_ci /* This match wins the previous match. */ 754570af302Sopenharmony_ci match_eo = pos; 755570af302Sopenharmony_ci if (match_tags) 756570af302Sopenharmony_ci for (i = 0; i < tnfa->num_tags; i++) 757570af302Sopenharmony_ci match_tags[i] = tags[i]; 758570af302Sopenharmony_ci } 759570af302Sopenharmony_ci /* Our TNFAs never have transitions leaving from the final state, 760570af302Sopenharmony_ci so we jump right to backtracking. */ 761570af302Sopenharmony_ci goto backtrack; 762570af302Sopenharmony_ci } 763570af302Sopenharmony_ci 764570af302Sopenharmony_ci /* Go to the next character in the input string. */ 765570af302Sopenharmony_ci empty_br_match = 0; 766570af302Sopenharmony_ci trans_i = state; 767570af302Sopenharmony_ci if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) 768570af302Sopenharmony_ci { 769570af302Sopenharmony_ci /* This is a back reference state. All transitions leaving from 770570af302Sopenharmony_ci this state have the same back reference "assertion". Instead 771570af302Sopenharmony_ci of reading the next character, we match the back reference. */ 772570af302Sopenharmony_ci regoff_t so, eo; 773570af302Sopenharmony_ci int bt = trans_i->u.backref; 774570af302Sopenharmony_ci regoff_t bt_len; 775570af302Sopenharmony_ci int result; 776570af302Sopenharmony_ci 777570af302Sopenharmony_ci /* Get the substring we need to match against. Remember to 778570af302Sopenharmony_ci turn off REG_NOSUB temporarily. */ 779570af302Sopenharmony_ci tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, 780570af302Sopenharmony_ci tnfa, tags, pos); 781570af302Sopenharmony_ci so = pmatch[bt].rm_so; 782570af302Sopenharmony_ci eo = pmatch[bt].rm_eo; 783570af302Sopenharmony_ci bt_len = eo - so; 784570af302Sopenharmony_ci 785570af302Sopenharmony_ci result = strncmp((const char*)string + so, str_byte - 1, 786570af302Sopenharmony_ci (size_t)bt_len); 787570af302Sopenharmony_ci 788570af302Sopenharmony_ci if (result == 0) 789570af302Sopenharmony_ci { 790570af302Sopenharmony_ci /* Back reference matched. Check for infinite loop. */ 791570af302Sopenharmony_ci if (bt_len == 0) 792570af302Sopenharmony_ci empty_br_match = 1; 793570af302Sopenharmony_ci if (empty_br_match && states_seen[trans_i->state_id]) 794570af302Sopenharmony_ci { 795570af302Sopenharmony_ci goto backtrack; 796570af302Sopenharmony_ci } 797570af302Sopenharmony_ci 798570af302Sopenharmony_ci states_seen[trans_i->state_id] = empty_br_match; 799570af302Sopenharmony_ci 800570af302Sopenharmony_ci /* Advance in input string and resync `prev_c', `next_c' 801570af302Sopenharmony_ci and pos. */ 802570af302Sopenharmony_ci str_byte += bt_len - 1; 803570af302Sopenharmony_ci pos += bt_len - 1; 804570af302Sopenharmony_ci GET_NEXT_WCHAR(); 805570af302Sopenharmony_ci } 806570af302Sopenharmony_ci else 807570af302Sopenharmony_ci { 808570af302Sopenharmony_ci goto backtrack; 809570af302Sopenharmony_ci } 810570af302Sopenharmony_ci } 811570af302Sopenharmony_ci else 812570af302Sopenharmony_ci { 813570af302Sopenharmony_ci /* Check for end of string. */ 814570af302Sopenharmony_ci if (next_c == L'\0') 815570af302Sopenharmony_ci goto backtrack; 816570af302Sopenharmony_ci 817570af302Sopenharmony_ci /* Read the next character. */ 818570af302Sopenharmony_ci GET_NEXT_WCHAR(); 819570af302Sopenharmony_ci } 820570af302Sopenharmony_ci 821570af302Sopenharmony_ci next_state = NULL; 822570af302Sopenharmony_ci for (trans_i = state; trans_i->state; trans_i++) 823570af302Sopenharmony_ci { 824570af302Sopenharmony_ci if (trans_i->code_min <= (tre_cint_t)prev_c 825570af302Sopenharmony_ci && trans_i->code_max >= (tre_cint_t)prev_c) 826570af302Sopenharmony_ci { 827570af302Sopenharmony_ci if (trans_i->assertions 828570af302Sopenharmony_ci && (CHECK_ASSERTIONS(trans_i->assertions) 829570af302Sopenharmony_ci || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) 830570af302Sopenharmony_ci { 831570af302Sopenharmony_ci continue; 832570af302Sopenharmony_ci } 833570af302Sopenharmony_ci 834570af302Sopenharmony_ci if (next_state == NULL) 835570af302Sopenharmony_ci { 836570af302Sopenharmony_ci /* First matching transition. */ 837570af302Sopenharmony_ci next_state = trans_i->state; 838570af302Sopenharmony_ci next_tags = trans_i->tags; 839570af302Sopenharmony_ci } 840570af302Sopenharmony_ci else 841570af302Sopenharmony_ci { 842570af302Sopenharmony_ci /* Second matching transition. We may need to backtrack here 843570af302Sopenharmony_ci to take this transition instead of the first one, so we 844570af302Sopenharmony_ci push this transition in the backtracking stack so we can 845570af302Sopenharmony_ci jump back here if needed. */ 846570af302Sopenharmony_ci BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, 847570af302Sopenharmony_ci trans_i->state_id, next_c, tags, mbstate); 848570af302Sopenharmony_ci { 849570af302Sopenharmony_ci int *tmp; 850570af302Sopenharmony_ci for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) 851570af302Sopenharmony_ci stack->item.tags[*tmp] = pos; 852570af302Sopenharmony_ci } 853570af302Sopenharmony_ci#if 0 /* XXX - it's important not to look at all transitions here to keep 854570af302Sopenharmony_ci the stack small! */ 855570af302Sopenharmony_ci break; 856570af302Sopenharmony_ci#endif 857570af302Sopenharmony_ci } 858570af302Sopenharmony_ci } 859570af302Sopenharmony_ci } 860570af302Sopenharmony_ci 861570af302Sopenharmony_ci if (next_state != NULL) 862570af302Sopenharmony_ci { 863570af302Sopenharmony_ci /* Matching transitions were found. Take the first one. */ 864570af302Sopenharmony_ci state = next_state; 865570af302Sopenharmony_ci 866570af302Sopenharmony_ci /* Update the tag values. */ 867570af302Sopenharmony_ci if (next_tags) 868570af302Sopenharmony_ci while (*next_tags >= 0) 869570af302Sopenharmony_ci tags[*next_tags++] = pos; 870570af302Sopenharmony_ci } 871570af302Sopenharmony_ci else 872570af302Sopenharmony_ci { 873570af302Sopenharmony_ci backtrack: 874570af302Sopenharmony_ci /* A matching transition was not found. Try to backtrack. */ 875570af302Sopenharmony_ci if (stack->prev) 876570af302Sopenharmony_ci { 877570af302Sopenharmony_ci if (stack->item.state->assertions & ASSERT_BACKREF) 878570af302Sopenharmony_ci { 879570af302Sopenharmony_ci states_seen[stack->item.state_id] = 0; 880570af302Sopenharmony_ci } 881570af302Sopenharmony_ci 882570af302Sopenharmony_ci BT_STACK_POP(); 883570af302Sopenharmony_ci } 884570af302Sopenharmony_ci else if (match_eo < 0) 885570af302Sopenharmony_ci { 886570af302Sopenharmony_ci /* Try starting from a later position in the input string. */ 887570af302Sopenharmony_ci /* Check for end of string. */ 888570af302Sopenharmony_ci if (next_c == L'\0') 889570af302Sopenharmony_ci { 890570af302Sopenharmony_ci break; 891570af302Sopenharmony_ci } 892570af302Sopenharmony_ci next_c = next_c_start; 893570af302Sopenharmony_ci#ifdef TRE_MBSTATE 894570af302Sopenharmony_ci mbstate = mbstate_start; 895570af302Sopenharmony_ci#endif /* TRE_MBSTATE */ 896570af302Sopenharmony_ci str_byte = str_byte_start; 897570af302Sopenharmony_ci goto retry; 898570af302Sopenharmony_ci } 899570af302Sopenharmony_ci else 900570af302Sopenharmony_ci { 901570af302Sopenharmony_ci break; 902570af302Sopenharmony_ci } 903570af302Sopenharmony_ci } 904570af302Sopenharmony_ci } 905570af302Sopenharmony_ci 906570af302Sopenharmony_ci ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 907570af302Sopenharmony_ci *match_end_ofs = match_eo; 908570af302Sopenharmony_ci 909570af302Sopenharmony_ci error_exit: 910570af302Sopenharmony_ci tre_bt_mem_destroy(mem); 911570af302Sopenharmony_ci#ifndef TRE_USE_ALLOCA 912570af302Sopenharmony_ci if (tags) 913570af302Sopenharmony_ci xfree(tags); 914570af302Sopenharmony_ci if (pmatch) 915570af302Sopenharmony_ci xfree(pmatch); 916570af302Sopenharmony_ci if (states_seen) 917570af302Sopenharmony_ci xfree(states_seen); 918570af302Sopenharmony_ci#endif /* !TRE_USE_ALLOCA */ 919570af302Sopenharmony_ci 920570af302Sopenharmony_ci return ret; 921570af302Sopenharmony_ci} 922570af302Sopenharmony_ci 923570af302Sopenharmony_ci/*********************************************************************** 924570af302Sopenharmony_ci from regexec.c 925570af302Sopenharmony_ci***********************************************************************/ 926570af302Sopenharmony_ci 927570af302Sopenharmony_ci/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match 928570af302Sopenharmony_ci endpoint values. */ 929570af302Sopenharmony_cistatic void 930570af302Sopenharmony_citre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 931570af302Sopenharmony_ci const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo) 932570af302Sopenharmony_ci{ 933570af302Sopenharmony_ci tre_submatch_data_t *submatch_data; 934570af302Sopenharmony_ci unsigned int i, j; 935570af302Sopenharmony_ci int *parents; 936570af302Sopenharmony_ci 937570af302Sopenharmony_ci i = 0; 938570af302Sopenharmony_ci if (match_eo >= 0 && !(cflags & REG_NOSUB)) 939570af302Sopenharmony_ci { 940570af302Sopenharmony_ci /* Construct submatch offsets from the tags. */ 941570af302Sopenharmony_ci submatch_data = tnfa->submatch_data; 942570af302Sopenharmony_ci while (i < tnfa->num_submatches && i < nmatch) 943570af302Sopenharmony_ci { 944570af302Sopenharmony_ci if (submatch_data[i].so_tag == tnfa->end_tag) 945570af302Sopenharmony_ci pmatch[i].rm_so = match_eo; 946570af302Sopenharmony_ci else 947570af302Sopenharmony_ci pmatch[i].rm_so = tags[submatch_data[i].so_tag]; 948570af302Sopenharmony_ci 949570af302Sopenharmony_ci if (submatch_data[i].eo_tag == tnfa->end_tag) 950570af302Sopenharmony_ci pmatch[i].rm_eo = match_eo; 951570af302Sopenharmony_ci else 952570af302Sopenharmony_ci pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; 953570af302Sopenharmony_ci 954570af302Sopenharmony_ci /* If either of the endpoints were not used, this submatch 955570af302Sopenharmony_ci was not part of the match. */ 956570af302Sopenharmony_ci if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) 957570af302Sopenharmony_ci pmatch[i].rm_so = pmatch[i].rm_eo = -1; 958570af302Sopenharmony_ci 959570af302Sopenharmony_ci i++; 960570af302Sopenharmony_ci } 961570af302Sopenharmony_ci /* Reset all submatches that are not within all of their parent 962570af302Sopenharmony_ci submatches. */ 963570af302Sopenharmony_ci i = 0; 964570af302Sopenharmony_ci while (i < tnfa->num_submatches && i < nmatch) 965570af302Sopenharmony_ci { 966570af302Sopenharmony_ci if (pmatch[i].rm_eo == -1) 967570af302Sopenharmony_ci assert(pmatch[i].rm_so == -1); 968570af302Sopenharmony_ci assert(pmatch[i].rm_so <= pmatch[i].rm_eo); 969570af302Sopenharmony_ci 970570af302Sopenharmony_ci parents = submatch_data[i].parents; 971570af302Sopenharmony_ci if (parents != NULL) 972570af302Sopenharmony_ci for (j = 0; parents[j] >= 0; j++) 973570af302Sopenharmony_ci { 974570af302Sopenharmony_ci if (pmatch[i].rm_so < pmatch[parents[j]].rm_so 975570af302Sopenharmony_ci || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) 976570af302Sopenharmony_ci pmatch[i].rm_so = pmatch[i].rm_eo = -1; 977570af302Sopenharmony_ci } 978570af302Sopenharmony_ci i++; 979570af302Sopenharmony_ci } 980570af302Sopenharmony_ci } 981570af302Sopenharmony_ci 982570af302Sopenharmony_ci while (i < nmatch) 983570af302Sopenharmony_ci { 984570af302Sopenharmony_ci pmatch[i].rm_so = -1; 985570af302Sopenharmony_ci pmatch[i].rm_eo = -1; 986570af302Sopenharmony_ci i++; 987570af302Sopenharmony_ci } 988570af302Sopenharmony_ci} 989570af302Sopenharmony_ci 990570af302Sopenharmony_ci 991570af302Sopenharmony_ci/* 992570af302Sopenharmony_ci Wrapper functions for POSIX compatible regexp matching. 993570af302Sopenharmony_ci*/ 994570af302Sopenharmony_ci 995570af302Sopenharmony_ciint 996570af302Sopenharmony_ciregexec(const regex_t *restrict preg, const char *restrict string, 997570af302Sopenharmony_ci size_t nmatch, regmatch_t pmatch[restrict], int eflags) 998570af302Sopenharmony_ci{ 999570af302Sopenharmony_ci tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 1000570af302Sopenharmony_ci reg_errcode_t status; 1001570af302Sopenharmony_ci regoff_t *tags = NULL, eo; 1002570af302Sopenharmony_ci if (tnfa->cflags & REG_NOSUB) nmatch = 0; 1003570af302Sopenharmony_ci if (tnfa->num_tags > 0 && nmatch > 0) 1004570af302Sopenharmony_ci { 1005570af302Sopenharmony_ci tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 1006570af302Sopenharmony_ci if (tags == NULL) 1007570af302Sopenharmony_ci return REG_ESPACE; 1008570af302Sopenharmony_ci } 1009570af302Sopenharmony_ci 1010570af302Sopenharmony_ci /* Dispatch to the appropriate matcher. */ 1011570af302Sopenharmony_ci if (tnfa->have_backrefs) 1012570af302Sopenharmony_ci { 1013570af302Sopenharmony_ci /* The regex has back references, use the backtracking matcher. */ 1014570af302Sopenharmony_ci status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); 1015570af302Sopenharmony_ci } 1016570af302Sopenharmony_ci else 1017570af302Sopenharmony_ci { 1018570af302Sopenharmony_ci /* Exact matching, no back references, use the parallel matcher. */ 1019570af302Sopenharmony_ci status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); 1020570af302Sopenharmony_ci } 1021570af302Sopenharmony_ci 1022570af302Sopenharmony_ci if (status == REG_OK) 1023570af302Sopenharmony_ci /* A match was found, so fill the submatch registers. */ 1024570af302Sopenharmony_ci tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); 1025570af302Sopenharmony_ci if (tags) 1026570af302Sopenharmony_ci xfree(tags); 1027570af302Sopenharmony_ci return status; 1028570af302Sopenharmony_ci} 1029