1570af302Sopenharmony_ci/* 2570af302Sopenharmony_ci regcomp.c - TRE POSIX compatible regex compilation functions. 3570af302Sopenharmony_ci 4570af302Sopenharmony_ci Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 5570af302Sopenharmony_ci All rights reserved. 6570af302Sopenharmony_ci 7570af302Sopenharmony_ci Redistribution and use in source and binary forms, with or without 8570af302Sopenharmony_ci modification, are permitted provided that the following conditions 9570af302Sopenharmony_ci are met: 10570af302Sopenharmony_ci 11570af302Sopenharmony_ci 1. Redistributions of source code must retain the above copyright 12570af302Sopenharmony_ci notice, this list of conditions and the following disclaimer. 13570af302Sopenharmony_ci 14570af302Sopenharmony_ci 2. Redistributions in binary form must reproduce the above copyright 15570af302Sopenharmony_ci notice, this list of conditions and the following disclaimer in the 16570af302Sopenharmony_ci documentation and/or other materials provided with the distribution. 17570af302Sopenharmony_ci 18570af302Sopenharmony_ci THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 19570af302Sopenharmony_ci ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20570af302Sopenharmony_ci LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21570af302Sopenharmony_ci A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22570af302Sopenharmony_ci HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23570af302Sopenharmony_ci SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24570af302Sopenharmony_ci LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25570af302Sopenharmony_ci DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26570af302Sopenharmony_ci THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27570af302Sopenharmony_ci (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28570af302Sopenharmony_ci OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29570af302Sopenharmony_ci 30570af302Sopenharmony_ci*/ 31570af302Sopenharmony_ci 32570af302Sopenharmony_ci#include <string.h> 33570af302Sopenharmony_ci#include <stdlib.h> 34570af302Sopenharmony_ci#include <regex.h> 35570af302Sopenharmony_ci#include <limits.h> 36570af302Sopenharmony_ci#include <stdint.h> 37570af302Sopenharmony_ci#include <ctype.h> 38570af302Sopenharmony_ci 39570af302Sopenharmony_ci#include "tre.h" 40570af302Sopenharmony_ci 41570af302Sopenharmony_ci#include <assert.h> 42570af302Sopenharmony_ci 43570af302Sopenharmony_ci/*********************************************************************** 44570af302Sopenharmony_ci from tre-compile.h 45570af302Sopenharmony_ci***********************************************************************/ 46570af302Sopenharmony_ci 47570af302Sopenharmony_citypedef struct { 48570af302Sopenharmony_ci int position; 49570af302Sopenharmony_ci int code_min; 50570af302Sopenharmony_ci int code_max; 51570af302Sopenharmony_ci int *tags; 52570af302Sopenharmony_ci int assertions; 53570af302Sopenharmony_ci tre_ctype_t class; 54570af302Sopenharmony_ci tre_ctype_t *neg_classes; 55570af302Sopenharmony_ci int backref; 56570af302Sopenharmony_ci} tre_pos_and_tags_t; 57570af302Sopenharmony_ci 58570af302Sopenharmony_ci 59570af302Sopenharmony_ci/*********************************************************************** 60570af302Sopenharmony_ci from tre-ast.c and tre-ast.h 61570af302Sopenharmony_ci***********************************************************************/ 62570af302Sopenharmony_ci 63570af302Sopenharmony_ci/* The different AST node types. */ 64570af302Sopenharmony_citypedef enum { 65570af302Sopenharmony_ci LITERAL, 66570af302Sopenharmony_ci CATENATION, 67570af302Sopenharmony_ci ITERATION, 68570af302Sopenharmony_ci UNION 69570af302Sopenharmony_ci} tre_ast_type_t; 70570af302Sopenharmony_ci 71570af302Sopenharmony_ci/* Special subtypes of TRE_LITERAL. */ 72570af302Sopenharmony_ci#define EMPTY -1 /* Empty leaf (denotes empty string). */ 73570af302Sopenharmony_ci#define ASSERTION -2 /* Assertion leaf. */ 74570af302Sopenharmony_ci#define TAG -3 /* Tag leaf. */ 75570af302Sopenharmony_ci#define BACKREF -4 /* Back reference leaf. */ 76570af302Sopenharmony_ci 77570af302Sopenharmony_ci#define IS_SPECIAL(x) ((x)->code_min < 0) 78570af302Sopenharmony_ci#define IS_EMPTY(x) ((x)->code_min == EMPTY) 79570af302Sopenharmony_ci#define IS_ASSERTION(x) ((x)->code_min == ASSERTION) 80570af302Sopenharmony_ci#define IS_TAG(x) ((x)->code_min == TAG) 81570af302Sopenharmony_ci#define IS_BACKREF(x) ((x)->code_min == BACKREF) 82570af302Sopenharmony_ci 83570af302Sopenharmony_ci 84570af302Sopenharmony_ci/* A generic AST node. All AST nodes consist of this node on the top 85570af302Sopenharmony_ci level with `obj' pointing to the actual content. */ 86570af302Sopenharmony_citypedef struct { 87570af302Sopenharmony_ci tre_ast_type_t type; /* Type of the node. */ 88570af302Sopenharmony_ci void *obj; /* Pointer to actual node. */ 89570af302Sopenharmony_ci int nullable; 90570af302Sopenharmony_ci int submatch_id; 91570af302Sopenharmony_ci int num_submatches; 92570af302Sopenharmony_ci int num_tags; 93570af302Sopenharmony_ci tre_pos_and_tags_t *firstpos; 94570af302Sopenharmony_ci tre_pos_and_tags_t *lastpos; 95570af302Sopenharmony_ci} tre_ast_node_t; 96570af302Sopenharmony_ci 97570af302Sopenharmony_ci 98570af302Sopenharmony_ci/* A "literal" node. These are created for assertions, back references, 99570af302Sopenharmony_ci tags, matching parameter settings, and all expressions that match one 100570af302Sopenharmony_ci character. */ 101570af302Sopenharmony_citypedef struct { 102570af302Sopenharmony_ci long code_min; 103570af302Sopenharmony_ci long code_max; 104570af302Sopenharmony_ci int position; 105570af302Sopenharmony_ci tre_ctype_t class; 106570af302Sopenharmony_ci tre_ctype_t *neg_classes; 107570af302Sopenharmony_ci} tre_literal_t; 108570af302Sopenharmony_ci 109570af302Sopenharmony_ci/* A "catenation" node. These are created when two regexps are concatenated. 110570af302Sopenharmony_ci If there are more than one subexpressions in sequence, the `left' part 111570af302Sopenharmony_ci holds all but the last, and `right' part holds the last subexpression 112570af302Sopenharmony_ci (catenation is left associative). */ 113570af302Sopenharmony_citypedef struct { 114570af302Sopenharmony_ci tre_ast_node_t *left; 115570af302Sopenharmony_ci tre_ast_node_t *right; 116570af302Sopenharmony_ci} tre_catenation_t; 117570af302Sopenharmony_ci 118570af302Sopenharmony_ci/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" 119570af302Sopenharmony_ci operators. */ 120570af302Sopenharmony_citypedef struct { 121570af302Sopenharmony_ci /* Subexpression to match. */ 122570af302Sopenharmony_ci tre_ast_node_t *arg; 123570af302Sopenharmony_ci /* Minimum number of consecutive matches. */ 124570af302Sopenharmony_ci int min; 125570af302Sopenharmony_ci /* Maximum number of consecutive matches. */ 126570af302Sopenharmony_ci int max; 127570af302Sopenharmony_ci /* If 0, match as many characters as possible, if 1 match as few as 128570af302Sopenharmony_ci possible. Note that this does not always mean the same thing as 129570af302Sopenharmony_ci matching as many/few repetitions as possible. */ 130570af302Sopenharmony_ci unsigned int minimal:1; 131570af302Sopenharmony_ci} tre_iteration_t; 132570af302Sopenharmony_ci 133570af302Sopenharmony_ci/* An "union" node. These are created for the "|" operator. */ 134570af302Sopenharmony_citypedef struct { 135570af302Sopenharmony_ci tre_ast_node_t *left; 136570af302Sopenharmony_ci tre_ast_node_t *right; 137570af302Sopenharmony_ci} tre_union_t; 138570af302Sopenharmony_ci 139570af302Sopenharmony_ci 140570af302Sopenharmony_cistatic tre_ast_node_t * 141570af302Sopenharmony_citre_ast_new_node(tre_mem_t mem, int type, void *obj) 142570af302Sopenharmony_ci{ 143570af302Sopenharmony_ci tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node); 144570af302Sopenharmony_ci if (!node || !obj) 145570af302Sopenharmony_ci return 0; 146570af302Sopenharmony_ci node->obj = obj; 147570af302Sopenharmony_ci node->type = type; 148570af302Sopenharmony_ci node->nullable = -1; 149570af302Sopenharmony_ci node->submatch_id = -1; 150570af302Sopenharmony_ci return node; 151570af302Sopenharmony_ci} 152570af302Sopenharmony_ci 153570af302Sopenharmony_cistatic tre_ast_node_t * 154570af302Sopenharmony_citre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position) 155570af302Sopenharmony_ci{ 156570af302Sopenharmony_ci tre_ast_node_t *node; 157570af302Sopenharmony_ci tre_literal_t *lit; 158570af302Sopenharmony_ci 159570af302Sopenharmony_ci lit = tre_mem_calloc(mem, sizeof *lit); 160570af302Sopenharmony_ci node = tre_ast_new_node(mem, LITERAL, lit); 161570af302Sopenharmony_ci if (!node) 162570af302Sopenharmony_ci return 0; 163570af302Sopenharmony_ci lit->code_min = code_min; 164570af302Sopenharmony_ci lit->code_max = code_max; 165570af302Sopenharmony_ci lit->position = position; 166570af302Sopenharmony_ci return node; 167570af302Sopenharmony_ci} 168570af302Sopenharmony_ci 169570af302Sopenharmony_cistatic tre_ast_node_t * 170570af302Sopenharmony_citre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal) 171570af302Sopenharmony_ci{ 172570af302Sopenharmony_ci tre_ast_node_t *node; 173570af302Sopenharmony_ci tre_iteration_t *iter; 174570af302Sopenharmony_ci 175570af302Sopenharmony_ci iter = tre_mem_calloc(mem, sizeof *iter); 176570af302Sopenharmony_ci node = tre_ast_new_node(mem, ITERATION, iter); 177570af302Sopenharmony_ci if (!node) 178570af302Sopenharmony_ci return 0; 179570af302Sopenharmony_ci iter->arg = arg; 180570af302Sopenharmony_ci iter->min = min; 181570af302Sopenharmony_ci iter->max = max; 182570af302Sopenharmony_ci iter->minimal = minimal; 183570af302Sopenharmony_ci node->num_submatches = arg->num_submatches; 184570af302Sopenharmony_ci return node; 185570af302Sopenharmony_ci} 186570af302Sopenharmony_ci 187570af302Sopenharmony_cistatic tre_ast_node_t * 188570af302Sopenharmony_citre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) 189570af302Sopenharmony_ci{ 190570af302Sopenharmony_ci tre_ast_node_t *node; 191570af302Sopenharmony_ci tre_union_t *un; 192570af302Sopenharmony_ci 193570af302Sopenharmony_ci if (!left) 194570af302Sopenharmony_ci return right; 195570af302Sopenharmony_ci un = tre_mem_calloc(mem, sizeof *un); 196570af302Sopenharmony_ci node = tre_ast_new_node(mem, UNION, un); 197570af302Sopenharmony_ci if (!node || !right) 198570af302Sopenharmony_ci return 0; 199570af302Sopenharmony_ci un->left = left; 200570af302Sopenharmony_ci un->right = right; 201570af302Sopenharmony_ci node->num_submatches = left->num_submatches + right->num_submatches; 202570af302Sopenharmony_ci return node; 203570af302Sopenharmony_ci} 204570af302Sopenharmony_ci 205570af302Sopenharmony_cistatic tre_ast_node_t * 206570af302Sopenharmony_citre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) 207570af302Sopenharmony_ci{ 208570af302Sopenharmony_ci tre_ast_node_t *node; 209570af302Sopenharmony_ci tre_catenation_t *cat; 210570af302Sopenharmony_ci 211570af302Sopenharmony_ci if (!left) 212570af302Sopenharmony_ci return right; 213570af302Sopenharmony_ci cat = tre_mem_calloc(mem, sizeof *cat); 214570af302Sopenharmony_ci node = tre_ast_new_node(mem, CATENATION, cat); 215570af302Sopenharmony_ci if (!node) 216570af302Sopenharmony_ci return 0; 217570af302Sopenharmony_ci cat->left = left; 218570af302Sopenharmony_ci cat->right = right; 219570af302Sopenharmony_ci node->num_submatches = left->num_submatches + right->num_submatches; 220570af302Sopenharmony_ci return node; 221570af302Sopenharmony_ci} 222570af302Sopenharmony_ci 223570af302Sopenharmony_ci 224570af302Sopenharmony_ci/*********************************************************************** 225570af302Sopenharmony_ci from tre-stack.c and tre-stack.h 226570af302Sopenharmony_ci***********************************************************************/ 227570af302Sopenharmony_ci 228570af302Sopenharmony_citypedef struct tre_stack_rec tre_stack_t; 229570af302Sopenharmony_ci 230570af302Sopenharmony_ci/* Creates a new stack object. `size' is initial size in bytes, `max_size' 231570af302Sopenharmony_ci is maximum size, and `increment' specifies how much more space will be 232570af302Sopenharmony_ci allocated with realloc() if all space gets used up. Returns the stack 233570af302Sopenharmony_ci object or NULL if out of memory. */ 234570af302Sopenharmony_cistatic tre_stack_t * 235570af302Sopenharmony_citre_stack_new(int size, int max_size, int increment); 236570af302Sopenharmony_ci 237570af302Sopenharmony_ci/* Frees the stack object. */ 238570af302Sopenharmony_cistatic void 239570af302Sopenharmony_citre_stack_destroy(tre_stack_t *s); 240570af302Sopenharmony_ci 241570af302Sopenharmony_ci/* Returns the current number of objects in the stack. */ 242570af302Sopenharmony_cistatic int 243570af302Sopenharmony_citre_stack_num_objects(tre_stack_t *s); 244570af302Sopenharmony_ci 245570af302Sopenharmony_ci/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes 246570af302Sopenharmony_ci `value' on top of stack `s'. Returns REG_ESPACE if out of memory. 247570af302Sopenharmony_ci This tries to realloc() more space before failing if maximum size 248570af302Sopenharmony_ci has not yet been reached. Returns REG_OK if successful. */ 249570af302Sopenharmony_ci#define declare_pushf(typetag, type) \ 250570af302Sopenharmony_ci static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value) 251570af302Sopenharmony_ci 252570af302Sopenharmony_cideclare_pushf(voidptr, void *); 253570af302Sopenharmony_cideclare_pushf(int, int); 254570af302Sopenharmony_ci 255570af302Sopenharmony_ci/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost 256570af302Sopenharmony_ci element off of stack `s' and returns it. The stack must not be 257570af302Sopenharmony_ci empty. */ 258570af302Sopenharmony_ci#define declare_popf(typetag, type) \ 259570af302Sopenharmony_ci static type tre_stack_pop_ ## typetag(tre_stack_t *s) 260570af302Sopenharmony_ci 261570af302Sopenharmony_cideclare_popf(voidptr, void *); 262570af302Sopenharmony_cideclare_popf(int, int); 263570af302Sopenharmony_ci 264570af302Sopenharmony_ci/* Just to save some typing. */ 265570af302Sopenharmony_ci#define STACK_PUSH(s, typetag, value) \ 266570af302Sopenharmony_ci do \ 267570af302Sopenharmony_ci { \ 268570af302Sopenharmony_ci status = tre_stack_push_ ## typetag(s, value); \ 269570af302Sopenharmony_ci } \ 270570af302Sopenharmony_ci while (/*CONSTCOND*/0) 271570af302Sopenharmony_ci 272570af302Sopenharmony_ci#define STACK_PUSHX(s, typetag, value) \ 273570af302Sopenharmony_ci { \ 274570af302Sopenharmony_ci status = tre_stack_push_ ## typetag(s, value); \ 275570af302Sopenharmony_ci if (status != REG_OK) \ 276570af302Sopenharmony_ci break; \ 277570af302Sopenharmony_ci } 278570af302Sopenharmony_ci 279570af302Sopenharmony_ci#define STACK_PUSHR(s, typetag, value) \ 280570af302Sopenharmony_ci { \ 281570af302Sopenharmony_ci reg_errcode_t _status; \ 282570af302Sopenharmony_ci _status = tre_stack_push_ ## typetag(s, value); \ 283570af302Sopenharmony_ci if (_status != REG_OK) \ 284570af302Sopenharmony_ci return _status; \ 285570af302Sopenharmony_ci } 286570af302Sopenharmony_ci 287570af302Sopenharmony_ciunion tre_stack_item { 288570af302Sopenharmony_ci void *voidptr_value; 289570af302Sopenharmony_ci int int_value; 290570af302Sopenharmony_ci}; 291570af302Sopenharmony_ci 292570af302Sopenharmony_cistruct tre_stack_rec { 293570af302Sopenharmony_ci int size; 294570af302Sopenharmony_ci int max_size; 295570af302Sopenharmony_ci int increment; 296570af302Sopenharmony_ci int ptr; 297570af302Sopenharmony_ci union tre_stack_item *stack; 298570af302Sopenharmony_ci}; 299570af302Sopenharmony_ci 300570af302Sopenharmony_ci 301570af302Sopenharmony_cistatic tre_stack_t * 302570af302Sopenharmony_citre_stack_new(int size, int max_size, int increment) 303570af302Sopenharmony_ci{ 304570af302Sopenharmony_ci tre_stack_t *s; 305570af302Sopenharmony_ci 306570af302Sopenharmony_ci s = xmalloc(sizeof(*s)); 307570af302Sopenharmony_ci if (s != NULL) 308570af302Sopenharmony_ci { 309570af302Sopenharmony_ci s->stack = xmalloc(sizeof(*s->stack) * size); 310570af302Sopenharmony_ci if (s->stack == NULL) 311570af302Sopenharmony_ci { 312570af302Sopenharmony_ci xfree(s); 313570af302Sopenharmony_ci return NULL; 314570af302Sopenharmony_ci } 315570af302Sopenharmony_ci s->size = size; 316570af302Sopenharmony_ci s->max_size = max_size; 317570af302Sopenharmony_ci s->increment = increment; 318570af302Sopenharmony_ci s->ptr = 0; 319570af302Sopenharmony_ci } 320570af302Sopenharmony_ci return s; 321570af302Sopenharmony_ci} 322570af302Sopenharmony_ci 323570af302Sopenharmony_cistatic void 324570af302Sopenharmony_citre_stack_destroy(tre_stack_t *s) 325570af302Sopenharmony_ci{ 326570af302Sopenharmony_ci xfree(s->stack); 327570af302Sopenharmony_ci xfree(s); 328570af302Sopenharmony_ci} 329570af302Sopenharmony_ci 330570af302Sopenharmony_cistatic int 331570af302Sopenharmony_citre_stack_num_objects(tre_stack_t *s) 332570af302Sopenharmony_ci{ 333570af302Sopenharmony_ci return s->ptr; 334570af302Sopenharmony_ci} 335570af302Sopenharmony_ci 336570af302Sopenharmony_cistatic reg_errcode_t 337570af302Sopenharmony_citre_stack_push(tre_stack_t *s, union tre_stack_item value) 338570af302Sopenharmony_ci{ 339570af302Sopenharmony_ci if (s->ptr < s->size) 340570af302Sopenharmony_ci { 341570af302Sopenharmony_ci s->stack[s->ptr] = value; 342570af302Sopenharmony_ci s->ptr++; 343570af302Sopenharmony_ci } 344570af302Sopenharmony_ci else 345570af302Sopenharmony_ci { 346570af302Sopenharmony_ci if (s->size >= s->max_size) 347570af302Sopenharmony_ci { 348570af302Sopenharmony_ci return REG_ESPACE; 349570af302Sopenharmony_ci } 350570af302Sopenharmony_ci else 351570af302Sopenharmony_ci { 352570af302Sopenharmony_ci union tre_stack_item *new_buffer; 353570af302Sopenharmony_ci int new_size; 354570af302Sopenharmony_ci new_size = s->size + s->increment; 355570af302Sopenharmony_ci if (new_size > s->max_size) 356570af302Sopenharmony_ci new_size = s->max_size; 357570af302Sopenharmony_ci new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); 358570af302Sopenharmony_ci if (new_buffer == NULL) 359570af302Sopenharmony_ci { 360570af302Sopenharmony_ci return REG_ESPACE; 361570af302Sopenharmony_ci } 362570af302Sopenharmony_ci assert(new_size > s->size); 363570af302Sopenharmony_ci s->size = new_size; 364570af302Sopenharmony_ci s->stack = new_buffer; 365570af302Sopenharmony_ci tre_stack_push(s, value); 366570af302Sopenharmony_ci } 367570af302Sopenharmony_ci } 368570af302Sopenharmony_ci return REG_OK; 369570af302Sopenharmony_ci} 370570af302Sopenharmony_ci 371570af302Sopenharmony_ci#define define_pushf(typetag, type) \ 372570af302Sopenharmony_ci declare_pushf(typetag, type) { \ 373570af302Sopenharmony_ci union tre_stack_item item; \ 374570af302Sopenharmony_ci item.typetag ## _value = value; \ 375570af302Sopenharmony_ci return tre_stack_push(s, item); \ 376570af302Sopenharmony_ci} 377570af302Sopenharmony_ci 378570af302Sopenharmony_cidefine_pushf(int, int) 379570af302Sopenharmony_cidefine_pushf(voidptr, void *) 380570af302Sopenharmony_ci 381570af302Sopenharmony_ci#define define_popf(typetag, type) \ 382570af302Sopenharmony_ci declare_popf(typetag, type) { \ 383570af302Sopenharmony_ci return s->stack[--s->ptr].typetag ## _value; \ 384570af302Sopenharmony_ci } 385570af302Sopenharmony_ci 386570af302Sopenharmony_cidefine_popf(int, int) 387570af302Sopenharmony_cidefine_popf(voidptr, void *) 388570af302Sopenharmony_ci 389570af302Sopenharmony_ci 390570af302Sopenharmony_ci/*********************************************************************** 391570af302Sopenharmony_ci from tre-parse.c and tre-parse.h 392570af302Sopenharmony_ci***********************************************************************/ 393570af302Sopenharmony_ci 394570af302Sopenharmony_ci/* Parse context. */ 395570af302Sopenharmony_citypedef struct { 396570af302Sopenharmony_ci /* Memory allocator. The AST is allocated using this. */ 397570af302Sopenharmony_ci tre_mem_t mem; 398570af302Sopenharmony_ci /* Stack used for keeping track of regexp syntax. */ 399570af302Sopenharmony_ci tre_stack_t *stack; 400570af302Sopenharmony_ci /* The parsed node after a parse function returns. */ 401570af302Sopenharmony_ci tre_ast_node_t *n; 402570af302Sopenharmony_ci /* Position in the regexp pattern after a parse function returns. */ 403570af302Sopenharmony_ci const char *s; 404570af302Sopenharmony_ci /* The first character of the last subexpression parsed. */ 405570af302Sopenharmony_ci const char *start; 406570af302Sopenharmony_ci /* Current submatch ID. */ 407570af302Sopenharmony_ci int submatch_id; 408570af302Sopenharmony_ci /* Current position (number of literal). */ 409570af302Sopenharmony_ci int position; 410570af302Sopenharmony_ci /* The highest back reference or -1 if none seen so far. */ 411570af302Sopenharmony_ci int max_backref; 412570af302Sopenharmony_ci /* Compilation flags. */ 413570af302Sopenharmony_ci int cflags; 414570af302Sopenharmony_ci} tre_parse_ctx_t; 415570af302Sopenharmony_ci 416570af302Sopenharmony_ci/* Some macros for expanding \w, \s, etc. */ 417570af302Sopenharmony_cistatic const struct { 418570af302Sopenharmony_ci char c; 419570af302Sopenharmony_ci const char *expansion; 420570af302Sopenharmony_ci} tre_macros[] = { 421570af302Sopenharmony_ci {'t', "\t"}, {'n', "\n"}, {'r', "\r"}, 422570af302Sopenharmony_ci {'f', "\f"}, {'a', "\a"}, {'e', "\033"}, 423570af302Sopenharmony_ci {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, 424570af302Sopenharmony_ci {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"}, 425570af302Sopenharmony_ci { 0, 0 } 426570af302Sopenharmony_ci}; 427570af302Sopenharmony_ci 428570af302Sopenharmony_ci/* Expands a macro delimited by `regex' and `regex_end' to `buf', which 429570af302Sopenharmony_ci must have at least `len' items. Sets buf[0] to zero if the there 430570af302Sopenharmony_ci is no match in `tre_macros'. */ 431570af302Sopenharmony_cistatic const char *tre_expand_macro(const char *s) 432570af302Sopenharmony_ci{ 433570af302Sopenharmony_ci int i; 434570af302Sopenharmony_ci for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++); 435570af302Sopenharmony_ci return tre_macros[i].expansion; 436570af302Sopenharmony_ci} 437570af302Sopenharmony_ci 438570af302Sopenharmony_cistatic int 439570af302Sopenharmony_citre_compare_lit(const void *a, const void *b) 440570af302Sopenharmony_ci{ 441570af302Sopenharmony_ci const tre_literal_t *const *la = a; 442570af302Sopenharmony_ci const tre_literal_t *const *lb = b; 443570af302Sopenharmony_ci /* assumes the range of valid code_min is < INT_MAX */ 444570af302Sopenharmony_ci return la[0]->code_min - lb[0]->code_min; 445570af302Sopenharmony_ci} 446570af302Sopenharmony_ci 447570af302Sopenharmony_cistruct literals { 448570af302Sopenharmony_ci tre_mem_t mem; 449570af302Sopenharmony_ci tre_literal_t **a; 450570af302Sopenharmony_ci int len; 451570af302Sopenharmony_ci int cap; 452570af302Sopenharmony_ci}; 453570af302Sopenharmony_ci 454570af302Sopenharmony_cistatic tre_literal_t *tre_new_lit(struct literals *p) 455570af302Sopenharmony_ci{ 456570af302Sopenharmony_ci tre_literal_t **a; 457570af302Sopenharmony_ci if (p->len >= p->cap) { 458570af302Sopenharmony_ci if (p->cap >= 1<<15) 459570af302Sopenharmony_ci return 0; 460570af302Sopenharmony_ci p->cap *= 2; 461570af302Sopenharmony_ci a = xrealloc(p->a, p->cap * sizeof *p->a); 462570af302Sopenharmony_ci if (!a) 463570af302Sopenharmony_ci return 0; 464570af302Sopenharmony_ci p->a = a; 465570af302Sopenharmony_ci } 466570af302Sopenharmony_ci a = p->a + p->len++; 467570af302Sopenharmony_ci *a = tre_mem_calloc(p->mem, sizeof **a); 468570af302Sopenharmony_ci return *a; 469570af302Sopenharmony_ci} 470570af302Sopenharmony_ci 471570af302Sopenharmony_cistatic int add_icase_literals(struct literals *ls, int min, int max) 472570af302Sopenharmony_ci{ 473570af302Sopenharmony_ci tre_literal_t *lit; 474570af302Sopenharmony_ci int b, e, c; 475570af302Sopenharmony_ci for (c=min; c<=max; ) { 476570af302Sopenharmony_ci /* assumes islower(c) and isupper(c) are exclusive 477570af302Sopenharmony_ci and toupper(c)!=c if islower(c). 478570af302Sopenharmony_ci multiple opposite case characters are not supported */ 479570af302Sopenharmony_ci if (tre_islower(c)) { 480570af302Sopenharmony_ci b = e = tre_toupper(c); 481570af302Sopenharmony_ci for (c++, e++; c<=max; c++, e++) 482570af302Sopenharmony_ci if (tre_toupper(c) != e) break; 483570af302Sopenharmony_ci } else if (tre_isupper(c)) { 484570af302Sopenharmony_ci b = e = tre_tolower(c); 485570af302Sopenharmony_ci for (c++, e++; c<=max; c++, e++) 486570af302Sopenharmony_ci if (tre_tolower(c) != e) break; 487570af302Sopenharmony_ci } else { 488570af302Sopenharmony_ci c++; 489570af302Sopenharmony_ci continue; 490570af302Sopenharmony_ci } 491570af302Sopenharmony_ci lit = tre_new_lit(ls); 492570af302Sopenharmony_ci if (!lit) 493570af302Sopenharmony_ci return -1; 494570af302Sopenharmony_ci lit->code_min = b; 495570af302Sopenharmony_ci lit->code_max = e-1; 496570af302Sopenharmony_ci lit->position = -1; 497570af302Sopenharmony_ci } 498570af302Sopenharmony_ci return 0; 499570af302Sopenharmony_ci} 500570af302Sopenharmony_ci 501570af302Sopenharmony_ci 502570af302Sopenharmony_ci/* Maximum number of character classes in a negated bracket expression. */ 503570af302Sopenharmony_ci#define MAX_NEG_CLASSES 64 504570af302Sopenharmony_ci 505570af302Sopenharmony_cistruct neg { 506570af302Sopenharmony_ci int negate; 507570af302Sopenharmony_ci int len; 508570af302Sopenharmony_ci tre_ctype_t a[MAX_NEG_CLASSES]; 509570af302Sopenharmony_ci}; 510570af302Sopenharmony_ci 511570af302Sopenharmony_ci// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges 512570af302Sopenharmony_ci 513570af302Sopenharmony_ci/* 514570af302Sopenharmony_cibracket grammar: 515570af302Sopenharmony_ciBracket = '[' List ']' | '[^' List ']' 516570af302Sopenharmony_ciList = Term | List Term 517570af302Sopenharmony_ciTerm = Char | Range | Chclass | Eqclass 518570af302Sopenharmony_ciRange = Char '-' Char | Char '-' '-' 519570af302Sopenharmony_ciChar = Coll | coll_single 520570af302Sopenharmony_ciMeta = ']' | '-' 521570af302Sopenharmony_ciColl = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]' 522570af302Sopenharmony_ciEqclass = '[=' coll_single '=]' | '[=' coll_multi '=]' 523570af302Sopenharmony_ciChclass = '[:' class ':]' 524570af302Sopenharmony_ci 525570af302Sopenharmony_cicoll_single is a single char collating element but it can be 526570af302Sopenharmony_ci '-' only at the beginning or end of a List and 527570af302Sopenharmony_ci ']' only at the beginning of a List and 528570af302Sopenharmony_ci '^' anywhere except after the openning '[' 529570af302Sopenharmony_ci*/ 530570af302Sopenharmony_ci 531570af302Sopenharmony_cistatic reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg) 532570af302Sopenharmony_ci{ 533570af302Sopenharmony_ci const char *start = s; 534570af302Sopenharmony_ci tre_ctype_t class; 535570af302Sopenharmony_ci int min, max; 536570af302Sopenharmony_ci wchar_t wc; 537570af302Sopenharmony_ci int len; 538570af302Sopenharmony_ci 539570af302Sopenharmony_ci for (;;) { 540570af302Sopenharmony_ci class = 0; 541570af302Sopenharmony_ci len = mbtowc(&wc, s, -1); 542570af302Sopenharmony_ci if (len <= 0) 543570af302Sopenharmony_ci return *s ? REG_BADPAT : REG_EBRACK; 544570af302Sopenharmony_ci if (*s == ']' && s != start) { 545570af302Sopenharmony_ci ctx->s = s+1; 546570af302Sopenharmony_ci return REG_OK; 547570af302Sopenharmony_ci } 548570af302Sopenharmony_ci if (*s == '-' && s != start && s[1] != ']' && 549570af302Sopenharmony_ci /* extension: [a-z--@] is accepted as [a-z]|[--@] */ 550570af302Sopenharmony_ci (s[1] != '-' || s[2] == ']')) 551570af302Sopenharmony_ci return REG_ERANGE; 552570af302Sopenharmony_ci if (*s == '[' && (s[1] == '.' || s[1] == '=')) 553570af302Sopenharmony_ci /* collating symbols and equivalence classes are not supported */ 554570af302Sopenharmony_ci return REG_ECOLLATE; 555570af302Sopenharmony_ci if (*s == '[' && s[1] == ':') { 556570af302Sopenharmony_ci char tmp[CHARCLASS_NAME_MAX+1]; 557570af302Sopenharmony_ci s += 2; 558570af302Sopenharmony_ci for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) { 559570af302Sopenharmony_ci if (s[len] == ':') { 560570af302Sopenharmony_ci memcpy(tmp, s, len); 561570af302Sopenharmony_ci tmp[len] = 0; 562570af302Sopenharmony_ci class = tre_ctype(tmp); 563570af302Sopenharmony_ci break; 564570af302Sopenharmony_ci } 565570af302Sopenharmony_ci } 566570af302Sopenharmony_ci if (!class || s[len+1] != ']') 567570af302Sopenharmony_ci return REG_ECTYPE; 568570af302Sopenharmony_ci min = 0; 569570af302Sopenharmony_ci max = TRE_CHAR_MAX; 570570af302Sopenharmony_ci s += len+2; 571570af302Sopenharmony_ci } else { 572570af302Sopenharmony_ci min = max = wc; 573570af302Sopenharmony_ci s += len; 574570af302Sopenharmony_ci if (*s == '-' && s[1] != ']') { 575570af302Sopenharmony_ci s++; 576570af302Sopenharmony_ci len = mbtowc(&wc, s, -1); 577570af302Sopenharmony_ci max = wc; 578570af302Sopenharmony_ci /* XXX - Should use collation order instead of 579570af302Sopenharmony_ci encoding values in character ranges. */ 580570af302Sopenharmony_ci if (len <= 0 || min > max) 581570af302Sopenharmony_ci return REG_ERANGE; 582570af302Sopenharmony_ci s += len; 583570af302Sopenharmony_ci } 584570af302Sopenharmony_ci } 585570af302Sopenharmony_ci 586570af302Sopenharmony_ci if (class && neg->negate) { 587570af302Sopenharmony_ci if (neg->len >= MAX_NEG_CLASSES) 588570af302Sopenharmony_ci return REG_ESPACE; 589570af302Sopenharmony_ci neg->a[neg->len++] = class; 590570af302Sopenharmony_ci } else { 591570af302Sopenharmony_ci tre_literal_t *lit = tre_new_lit(ls); 592570af302Sopenharmony_ci if (!lit) 593570af302Sopenharmony_ci return REG_ESPACE; 594570af302Sopenharmony_ci lit->code_min = min; 595570af302Sopenharmony_ci lit->code_max = max; 596570af302Sopenharmony_ci lit->class = class; 597570af302Sopenharmony_ci lit->position = -1; 598570af302Sopenharmony_ci 599570af302Sopenharmony_ci /* Add opposite-case codepoints if REG_ICASE is present. 600570af302Sopenharmony_ci It seems that POSIX requires that bracket negation 601570af302Sopenharmony_ci should happen before case-folding, but most practical 602570af302Sopenharmony_ci implementations do it the other way around. Changing 603570af302Sopenharmony_ci the order would need efficient representation of 604570af302Sopenharmony_ci case-fold ranges and bracket range sets even with 605570af302Sopenharmony_ci simple patterns so this is ok for now. */ 606570af302Sopenharmony_ci if (ctx->cflags & REG_ICASE && !class) 607570af302Sopenharmony_ci if (add_icase_literals(ls, min, max)) 608570af302Sopenharmony_ci return REG_ESPACE; 609570af302Sopenharmony_ci } 610570af302Sopenharmony_ci } 611570af302Sopenharmony_ci} 612570af302Sopenharmony_ci 613570af302Sopenharmony_cistatic reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s) 614570af302Sopenharmony_ci{ 615570af302Sopenharmony_ci int i, max, min, negmax, negmin; 616570af302Sopenharmony_ci tre_ast_node_t *node = 0, *n; 617570af302Sopenharmony_ci tre_ctype_t *nc = 0; 618570af302Sopenharmony_ci tre_literal_t *lit; 619570af302Sopenharmony_ci struct literals ls; 620570af302Sopenharmony_ci struct neg neg; 621570af302Sopenharmony_ci reg_errcode_t err; 622570af302Sopenharmony_ci 623570af302Sopenharmony_ci ls.mem = ctx->mem; 624570af302Sopenharmony_ci ls.len = 0; 625570af302Sopenharmony_ci ls.cap = 32; 626570af302Sopenharmony_ci ls.a = xmalloc(ls.cap * sizeof *ls.a); 627570af302Sopenharmony_ci if (!ls.a) 628570af302Sopenharmony_ci return REG_ESPACE; 629570af302Sopenharmony_ci neg.len = 0; 630570af302Sopenharmony_ci neg.negate = *s == '^'; 631570af302Sopenharmony_ci if (neg.negate) 632570af302Sopenharmony_ci s++; 633570af302Sopenharmony_ci 634570af302Sopenharmony_ci err = parse_bracket_terms(ctx, s, &ls, &neg); 635570af302Sopenharmony_ci if (err != REG_OK) 636570af302Sopenharmony_ci goto parse_bracket_done; 637570af302Sopenharmony_ci 638570af302Sopenharmony_ci if (neg.negate) { 639570af302Sopenharmony_ci /* 640570af302Sopenharmony_ci * With REG_NEWLINE, POSIX requires that newlines are not matched by 641570af302Sopenharmony_ci * any form of a non-matching list. 642570af302Sopenharmony_ci */ 643570af302Sopenharmony_ci if (ctx->cflags & REG_NEWLINE) { 644570af302Sopenharmony_ci lit = tre_new_lit(&ls); 645570af302Sopenharmony_ci if (!lit) { 646570af302Sopenharmony_ci err = REG_ESPACE; 647570af302Sopenharmony_ci goto parse_bracket_done; 648570af302Sopenharmony_ci } 649570af302Sopenharmony_ci lit->code_min = '\n'; 650570af302Sopenharmony_ci lit->code_max = '\n'; 651570af302Sopenharmony_ci lit->position = -1; 652570af302Sopenharmony_ci } 653570af302Sopenharmony_ci /* Sort the array if we need to negate it. */ 654570af302Sopenharmony_ci qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit); 655570af302Sopenharmony_ci /* extra lit for the last negated range */ 656570af302Sopenharmony_ci lit = tre_new_lit(&ls); 657570af302Sopenharmony_ci if (!lit) { 658570af302Sopenharmony_ci err = REG_ESPACE; 659570af302Sopenharmony_ci goto parse_bracket_done; 660570af302Sopenharmony_ci } 661570af302Sopenharmony_ci lit->code_min = TRE_CHAR_MAX+1; 662570af302Sopenharmony_ci lit->code_max = TRE_CHAR_MAX+1; 663570af302Sopenharmony_ci lit->position = -1; 664570af302Sopenharmony_ci /* negated classes */ 665570af302Sopenharmony_ci if (neg.len) { 666570af302Sopenharmony_ci nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a); 667570af302Sopenharmony_ci if (!nc) { 668570af302Sopenharmony_ci err = REG_ESPACE; 669570af302Sopenharmony_ci goto parse_bracket_done; 670570af302Sopenharmony_ci } 671570af302Sopenharmony_ci memcpy(nc, neg.a, neg.len*sizeof *neg.a); 672570af302Sopenharmony_ci nc[neg.len] = 0; 673570af302Sopenharmony_ci } 674570af302Sopenharmony_ci } 675570af302Sopenharmony_ci 676570af302Sopenharmony_ci /* Build a union of the items in the array, negated if necessary. */ 677570af302Sopenharmony_ci negmax = negmin = 0; 678570af302Sopenharmony_ci for (i = 0; i < ls.len; i++) { 679570af302Sopenharmony_ci lit = ls.a[i]; 680570af302Sopenharmony_ci min = lit->code_min; 681570af302Sopenharmony_ci max = lit->code_max; 682570af302Sopenharmony_ci if (neg.negate) { 683570af302Sopenharmony_ci if (min <= negmin) { 684570af302Sopenharmony_ci /* Overlap. */ 685570af302Sopenharmony_ci negmin = MAX(max + 1, negmin); 686570af302Sopenharmony_ci continue; 687570af302Sopenharmony_ci } 688570af302Sopenharmony_ci negmax = min - 1; 689570af302Sopenharmony_ci lit->code_min = negmin; 690570af302Sopenharmony_ci lit->code_max = negmax; 691570af302Sopenharmony_ci negmin = max + 1; 692570af302Sopenharmony_ci } 693570af302Sopenharmony_ci lit->position = ctx->position; 694570af302Sopenharmony_ci lit->neg_classes = nc; 695570af302Sopenharmony_ci n = tre_ast_new_node(ctx->mem, LITERAL, lit); 696570af302Sopenharmony_ci node = tre_ast_new_union(ctx->mem, node, n); 697570af302Sopenharmony_ci if (!node) { 698570af302Sopenharmony_ci err = REG_ESPACE; 699570af302Sopenharmony_ci break; 700570af302Sopenharmony_ci } 701570af302Sopenharmony_ci } 702570af302Sopenharmony_ci 703570af302Sopenharmony_ciparse_bracket_done: 704570af302Sopenharmony_ci xfree(ls.a); 705570af302Sopenharmony_ci ctx->position++; 706570af302Sopenharmony_ci ctx->n = node; 707570af302Sopenharmony_ci return err; 708570af302Sopenharmony_ci} 709570af302Sopenharmony_ci 710570af302Sopenharmony_cistatic const char *parse_dup_count(const char *s, int *n) 711570af302Sopenharmony_ci{ 712570af302Sopenharmony_ci *n = -1; 713570af302Sopenharmony_ci if (!isdigit(*s)) 714570af302Sopenharmony_ci return s; 715570af302Sopenharmony_ci *n = 0; 716570af302Sopenharmony_ci for (;;) { 717570af302Sopenharmony_ci *n = 10 * *n + (*s - '0'); 718570af302Sopenharmony_ci s++; 719570af302Sopenharmony_ci if (!isdigit(*s) || *n > RE_DUP_MAX) 720570af302Sopenharmony_ci break; 721570af302Sopenharmony_ci } 722570af302Sopenharmony_ci return s; 723570af302Sopenharmony_ci} 724570af302Sopenharmony_ci 725570af302Sopenharmony_cistatic const char *parse_dup(const char *s, int ere, int *pmin, int *pmax) 726570af302Sopenharmony_ci{ 727570af302Sopenharmony_ci int min, max; 728570af302Sopenharmony_ci 729570af302Sopenharmony_ci s = parse_dup_count(s, &min); 730570af302Sopenharmony_ci if (*s == ',') 731570af302Sopenharmony_ci s = parse_dup_count(s+1, &max); 732570af302Sopenharmony_ci else 733570af302Sopenharmony_ci max = min; 734570af302Sopenharmony_ci 735570af302Sopenharmony_ci if ( 736570af302Sopenharmony_ci (max < min && max >= 0) || 737570af302Sopenharmony_ci max > RE_DUP_MAX || 738570af302Sopenharmony_ci min > RE_DUP_MAX || 739570af302Sopenharmony_ci min < 0 || 740570af302Sopenharmony_ci (!ere && *s++ != '\\') || 741570af302Sopenharmony_ci *s++ != '}' 742570af302Sopenharmony_ci ) 743570af302Sopenharmony_ci return 0; 744570af302Sopenharmony_ci *pmin = min; 745570af302Sopenharmony_ci *pmax = max; 746570af302Sopenharmony_ci return s; 747570af302Sopenharmony_ci} 748570af302Sopenharmony_ci 749570af302Sopenharmony_cistatic int hexval(unsigned c) 750570af302Sopenharmony_ci{ 751570af302Sopenharmony_ci if (c-'0'<10) return c-'0'; 752570af302Sopenharmony_ci c |= 32; 753570af302Sopenharmony_ci if (c-'a'<6) return c-'a'+10; 754570af302Sopenharmony_ci return -1; 755570af302Sopenharmony_ci} 756570af302Sopenharmony_ci 757570af302Sopenharmony_cistatic reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid) 758570af302Sopenharmony_ci{ 759570af302Sopenharmony_ci if (node->submatch_id >= 0) { 760570af302Sopenharmony_ci tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 761570af302Sopenharmony_ci if (!n) 762570af302Sopenharmony_ci return REG_ESPACE; 763570af302Sopenharmony_ci n = tre_ast_new_catenation(ctx->mem, n, node); 764570af302Sopenharmony_ci if (!n) 765570af302Sopenharmony_ci return REG_ESPACE; 766570af302Sopenharmony_ci n->num_submatches = node->num_submatches; 767570af302Sopenharmony_ci node = n; 768570af302Sopenharmony_ci } 769570af302Sopenharmony_ci node->submatch_id = subid; 770570af302Sopenharmony_ci node->num_submatches++; 771570af302Sopenharmony_ci ctx->n = node; 772570af302Sopenharmony_ci return REG_OK; 773570af302Sopenharmony_ci} 774570af302Sopenharmony_ci 775570af302Sopenharmony_ci/* 776570af302Sopenharmony_ciBRE grammar: 777570af302Sopenharmony_ciRegex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$' 778570af302Sopenharmony_ciBranch = Atom | Branch Atom 779570af302Sopenharmony_ciAtom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref 780570af302Sopenharmony_ciDup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}' 781570af302Sopenharmony_ci 782570af302Sopenharmony_ci(leading ^ and trailing $ in a sub expr may be an anchor or literal as well) 783570af302Sopenharmony_ci 784570af302Sopenharmony_ciERE grammar: 785570af302Sopenharmony_ciRegex = Branch | Regex '|' Branch 786570af302Sopenharmony_ciBranch = Atom | Branch Atom 787570af302Sopenharmony_ciAtom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$' 788570af302Sopenharmony_ciDup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}' 789570af302Sopenharmony_ci 790570af302Sopenharmony_ci(a*+?, ^*, $+, \X, {, (|a) are unspecified) 791570af302Sopenharmony_ci*/ 792570af302Sopenharmony_ci 793570af302Sopenharmony_cistatic reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) 794570af302Sopenharmony_ci{ 795570af302Sopenharmony_ci int len, ere = ctx->cflags & REG_EXTENDED; 796570af302Sopenharmony_ci const char *p; 797570af302Sopenharmony_ci tre_ast_node_t *node; 798570af302Sopenharmony_ci wchar_t wc; 799570af302Sopenharmony_ci switch (*s) { 800570af302Sopenharmony_ci case '[': 801570af302Sopenharmony_ci return parse_bracket(ctx, s+1); 802570af302Sopenharmony_ci case '\\': 803570af302Sopenharmony_ci p = tre_expand_macro(s+1); 804570af302Sopenharmony_ci if (p) { 805570af302Sopenharmony_ci /* assume \X expansion is a single atom */ 806570af302Sopenharmony_ci reg_errcode_t err = parse_atom(ctx, p); 807570af302Sopenharmony_ci ctx->s = s+2; 808570af302Sopenharmony_ci return err; 809570af302Sopenharmony_ci } 810570af302Sopenharmony_ci /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */ 811570af302Sopenharmony_ci switch (*++s) { 812570af302Sopenharmony_ci case 0: 813570af302Sopenharmony_ci return REG_EESCAPE; 814570af302Sopenharmony_ci case 'b': 815570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1); 816570af302Sopenharmony_ci break; 817570af302Sopenharmony_ci case 'B': 818570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1); 819570af302Sopenharmony_ci break; 820570af302Sopenharmony_ci case '<': 821570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1); 822570af302Sopenharmony_ci break; 823570af302Sopenharmony_ci case '>': 824570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1); 825570af302Sopenharmony_ci break; 826570af302Sopenharmony_ci case 'x': 827570af302Sopenharmony_ci s++; 828570af302Sopenharmony_ci int i, v = 0, c; 829570af302Sopenharmony_ci len = 2; 830570af302Sopenharmony_ci if (*s == '{') { 831570af302Sopenharmony_ci len = 8; 832570af302Sopenharmony_ci s++; 833570af302Sopenharmony_ci } 834570af302Sopenharmony_ci for (i=0; i<len && v<0x110000; i++) { 835570af302Sopenharmony_ci c = hexval(s[i]); 836570af302Sopenharmony_ci if (c < 0) break; 837570af302Sopenharmony_ci v = 16*v + c; 838570af302Sopenharmony_ci } 839570af302Sopenharmony_ci s += i; 840570af302Sopenharmony_ci if (len == 8) { 841570af302Sopenharmony_ci if (*s != '}') 842570af302Sopenharmony_ci return REG_EBRACE; 843570af302Sopenharmony_ci s++; 844570af302Sopenharmony_ci } 845570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++); 846570af302Sopenharmony_ci s--; 847570af302Sopenharmony_ci break; 848570af302Sopenharmony_ci case '{': 849570af302Sopenharmony_ci case '+': 850570af302Sopenharmony_ci case '?': 851570af302Sopenharmony_ci /* extension: treat \+, \? as repetitions in BRE */ 852570af302Sopenharmony_ci /* reject repetitions after empty expression in BRE */ 853570af302Sopenharmony_ci if (!ere) 854570af302Sopenharmony_ci return REG_BADRPT; 855570af302Sopenharmony_ci case '|': 856570af302Sopenharmony_ci /* extension: treat \| as alternation in BRE */ 857570af302Sopenharmony_ci if (!ere) { 858570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 859570af302Sopenharmony_ci s--; 860570af302Sopenharmony_ci goto end; 861570af302Sopenharmony_ci } 862570af302Sopenharmony_ci /* fallthrough */ 863570af302Sopenharmony_ci default: 864570af302Sopenharmony_ci if (!ere && (unsigned)*s-'1' < 9) { 865570af302Sopenharmony_ci /* back reference */ 866570af302Sopenharmony_ci int val = *s - '0'; 867570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++); 868570af302Sopenharmony_ci ctx->max_backref = MAX(val, ctx->max_backref); 869570af302Sopenharmony_ci } else { 870570af302Sopenharmony_ci /* extension: accept unknown escaped char 871570af302Sopenharmony_ci as a literal */ 872570af302Sopenharmony_ci goto parse_literal; 873570af302Sopenharmony_ci } 874570af302Sopenharmony_ci } 875570af302Sopenharmony_ci s++; 876570af302Sopenharmony_ci break; 877570af302Sopenharmony_ci case '.': 878570af302Sopenharmony_ci if (ctx->cflags & REG_NEWLINE) { 879570af302Sopenharmony_ci tre_ast_node_t *tmp1, *tmp2; 880570af302Sopenharmony_ci tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++); 881570af302Sopenharmony_ci tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++); 882570af302Sopenharmony_ci if (tmp1 && tmp2) 883570af302Sopenharmony_ci node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 884570af302Sopenharmony_ci else 885570af302Sopenharmony_ci node = 0; 886570af302Sopenharmony_ci } else { 887570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++); 888570af302Sopenharmony_ci } 889570af302Sopenharmony_ci s++; 890570af302Sopenharmony_ci break; 891570af302Sopenharmony_ci case '^': 892570af302Sopenharmony_ci /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */ 893570af302Sopenharmony_ci if (!ere && s != ctx->start) 894570af302Sopenharmony_ci goto parse_literal; 895570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); 896570af302Sopenharmony_ci s++; 897570af302Sopenharmony_ci break; 898570af302Sopenharmony_ci case '$': 899570af302Sopenharmony_ci /* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */ 900570af302Sopenharmony_ci if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|'))) 901570af302Sopenharmony_ci goto parse_literal; 902570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1); 903570af302Sopenharmony_ci s++; 904570af302Sopenharmony_ci break; 905570af302Sopenharmony_ci case '*': 906570af302Sopenharmony_ci case '{': 907570af302Sopenharmony_ci case '+': 908570af302Sopenharmony_ci case '?': 909570af302Sopenharmony_ci /* reject repetitions after empty expression in ERE */ 910570af302Sopenharmony_ci if (ere) 911570af302Sopenharmony_ci return REG_BADRPT; 912570af302Sopenharmony_ci case '|': 913570af302Sopenharmony_ci if (!ere) 914570af302Sopenharmony_ci goto parse_literal; 915570af302Sopenharmony_ci case 0: 916570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 917570af302Sopenharmony_ci break; 918570af302Sopenharmony_ci default: 919570af302Sopenharmony_ciparse_literal: 920570af302Sopenharmony_ci len = mbtowc(&wc, s, -1); 921570af302Sopenharmony_ci if (len < 0) 922570af302Sopenharmony_ci return REG_BADPAT; 923570af302Sopenharmony_ci if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) { 924570af302Sopenharmony_ci tre_ast_node_t *tmp1, *tmp2; 925570af302Sopenharmony_ci /* multiple opposite case characters are not supported */ 926570af302Sopenharmony_ci tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position); 927570af302Sopenharmony_ci tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position); 928570af302Sopenharmony_ci if (tmp1 && tmp2) 929570af302Sopenharmony_ci node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 930570af302Sopenharmony_ci else 931570af302Sopenharmony_ci node = 0; 932570af302Sopenharmony_ci } else { 933570af302Sopenharmony_ci node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position); 934570af302Sopenharmony_ci } 935570af302Sopenharmony_ci ctx->position++; 936570af302Sopenharmony_ci s += len; 937570af302Sopenharmony_ci break; 938570af302Sopenharmony_ci } 939570af302Sopenharmony_ciend: 940570af302Sopenharmony_ci if (!node) 941570af302Sopenharmony_ci return REG_ESPACE; 942570af302Sopenharmony_ci ctx->n = node; 943570af302Sopenharmony_ci ctx->s = s; 944570af302Sopenharmony_ci return REG_OK; 945570af302Sopenharmony_ci} 946570af302Sopenharmony_ci 947570af302Sopenharmony_ci#define PUSHPTR(err, s, v) do { \ 948570af302Sopenharmony_ci if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \ 949570af302Sopenharmony_ci return err; \ 950570af302Sopenharmony_ci} while(0) 951570af302Sopenharmony_ci 952570af302Sopenharmony_ci#define PUSHINT(err, s, v) do { \ 953570af302Sopenharmony_ci if ((err = tre_stack_push_int(s, v)) != REG_OK) \ 954570af302Sopenharmony_ci return err; \ 955570af302Sopenharmony_ci} while(0) 956570af302Sopenharmony_ci 957570af302Sopenharmony_cistatic reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) 958570af302Sopenharmony_ci{ 959570af302Sopenharmony_ci tre_ast_node_t *nbranch=0, *nunion=0; 960570af302Sopenharmony_ci int ere = ctx->cflags & REG_EXTENDED; 961570af302Sopenharmony_ci const char *s = ctx->start; 962570af302Sopenharmony_ci int subid = 0; 963570af302Sopenharmony_ci int depth = 0; 964570af302Sopenharmony_ci reg_errcode_t err; 965570af302Sopenharmony_ci tre_stack_t *stack = ctx->stack; 966570af302Sopenharmony_ci 967570af302Sopenharmony_ci PUSHINT(err, stack, subid++); 968570af302Sopenharmony_ci for (;;) { 969570af302Sopenharmony_ci if ((!ere && *s == '\\' && s[1] == '(') || 970570af302Sopenharmony_ci (ere && *s == '(')) { 971570af302Sopenharmony_ci PUSHPTR(err, stack, nunion); 972570af302Sopenharmony_ci PUSHPTR(err, stack, nbranch); 973570af302Sopenharmony_ci PUSHINT(err, stack, subid++); 974570af302Sopenharmony_ci s++; 975570af302Sopenharmony_ci if (!ere) 976570af302Sopenharmony_ci s++; 977570af302Sopenharmony_ci depth++; 978570af302Sopenharmony_ci nbranch = nunion = 0; 979570af302Sopenharmony_ci ctx->start = s; 980570af302Sopenharmony_ci continue; 981570af302Sopenharmony_ci } 982570af302Sopenharmony_ci if ((!ere && *s == '\\' && s[1] == ')') || 983570af302Sopenharmony_ci (ere && *s == ')' && depth)) { 984570af302Sopenharmony_ci ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 985570af302Sopenharmony_ci if (!ctx->n) 986570af302Sopenharmony_ci return REG_ESPACE; 987570af302Sopenharmony_ci } else { 988570af302Sopenharmony_ci err = parse_atom(ctx, s); 989570af302Sopenharmony_ci if (err != REG_OK) 990570af302Sopenharmony_ci return err; 991570af302Sopenharmony_ci s = ctx->s; 992570af302Sopenharmony_ci } 993570af302Sopenharmony_ci 994570af302Sopenharmony_ci parse_iter: 995570af302Sopenharmony_ci for (;;) { 996570af302Sopenharmony_ci int min, max; 997570af302Sopenharmony_ci 998570af302Sopenharmony_ci if (*s!='\\' && *s!='*') { 999570af302Sopenharmony_ci if (!ere) 1000570af302Sopenharmony_ci break; 1001570af302Sopenharmony_ci if (*s!='+' && *s!='?' && *s!='{') 1002570af302Sopenharmony_ci break; 1003570af302Sopenharmony_ci } 1004570af302Sopenharmony_ci if (*s=='\\' && ere) 1005570af302Sopenharmony_ci break; 1006570af302Sopenharmony_ci /* extension: treat \+, \? as repetitions in BRE */ 1007570af302Sopenharmony_ci if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{') 1008570af302Sopenharmony_ci break; 1009570af302Sopenharmony_ci if (*s=='\\') 1010570af302Sopenharmony_ci s++; 1011570af302Sopenharmony_ci 1012570af302Sopenharmony_ci /* handle ^* at the start of a BRE. */ 1013570af302Sopenharmony_ci if (!ere && s==ctx->start+1 && s[-1]=='^') 1014570af302Sopenharmony_ci break; 1015570af302Sopenharmony_ci 1016570af302Sopenharmony_ci /* extension: multiple consecutive *+?{,} is unspecified, 1017570af302Sopenharmony_ci but (a+)+ has to be supported so accepting a++ makes 1018570af302Sopenharmony_ci sense, note however that the RE_DUP_MAX limit can be 1019570af302Sopenharmony_ci circumvented: (a{255}){255} uses a lot of memory.. */ 1020570af302Sopenharmony_ci if (*s=='{') { 1021570af302Sopenharmony_ci s = parse_dup(s+1, ere, &min, &max); 1022570af302Sopenharmony_ci if (!s) 1023570af302Sopenharmony_ci return REG_BADBR; 1024570af302Sopenharmony_ci } else { 1025570af302Sopenharmony_ci min=0; 1026570af302Sopenharmony_ci max=-1; 1027570af302Sopenharmony_ci if (*s == '+') 1028570af302Sopenharmony_ci min = 1; 1029570af302Sopenharmony_ci if (*s == '?') 1030570af302Sopenharmony_ci max = 1; 1031570af302Sopenharmony_ci s++; 1032570af302Sopenharmony_ci } 1033570af302Sopenharmony_ci if (max == 0) 1034570af302Sopenharmony_ci ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 1035570af302Sopenharmony_ci else 1036570af302Sopenharmony_ci ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); 1037570af302Sopenharmony_ci if (!ctx->n) 1038570af302Sopenharmony_ci return REG_ESPACE; 1039570af302Sopenharmony_ci } 1040570af302Sopenharmony_ci 1041570af302Sopenharmony_ci nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); 1042570af302Sopenharmony_ci if ((ere && *s == '|') || 1043570af302Sopenharmony_ci (ere && *s == ')' && depth) || 1044570af302Sopenharmony_ci (!ere && *s == '\\' && s[1] == ')') || 1045570af302Sopenharmony_ci /* extension: treat \| as alternation in BRE */ 1046570af302Sopenharmony_ci (!ere && *s == '\\' && s[1] == '|') || 1047570af302Sopenharmony_ci !*s) { 1048570af302Sopenharmony_ci /* extension: empty branch is unspecified (), (|a), (a|) 1049570af302Sopenharmony_ci here they are not rejected but match on empty string */ 1050570af302Sopenharmony_ci int c = *s; 1051570af302Sopenharmony_ci nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); 1052570af302Sopenharmony_ci nbranch = 0; 1053570af302Sopenharmony_ci 1054570af302Sopenharmony_ci if (c == '\\' && s[1] == '|') { 1055570af302Sopenharmony_ci s+=2; 1056570af302Sopenharmony_ci ctx->start = s; 1057570af302Sopenharmony_ci } else if (c == '|') { 1058570af302Sopenharmony_ci s++; 1059570af302Sopenharmony_ci ctx->start = s; 1060570af302Sopenharmony_ci } else { 1061570af302Sopenharmony_ci if (c == '\\') { 1062570af302Sopenharmony_ci if (!depth) return REG_EPAREN; 1063570af302Sopenharmony_ci s+=2; 1064570af302Sopenharmony_ci } else if (c == ')') 1065570af302Sopenharmony_ci s++; 1066570af302Sopenharmony_ci depth--; 1067570af302Sopenharmony_ci err = marksub(ctx, nunion, tre_stack_pop_int(stack)); 1068570af302Sopenharmony_ci if (err != REG_OK) 1069570af302Sopenharmony_ci return err; 1070570af302Sopenharmony_ci if (!c && depth<0) { 1071570af302Sopenharmony_ci ctx->submatch_id = subid; 1072570af302Sopenharmony_ci return REG_OK; 1073570af302Sopenharmony_ci } 1074570af302Sopenharmony_ci if (!c || depth<0) 1075570af302Sopenharmony_ci return REG_EPAREN; 1076570af302Sopenharmony_ci nbranch = tre_stack_pop_voidptr(stack); 1077570af302Sopenharmony_ci nunion = tre_stack_pop_voidptr(stack); 1078570af302Sopenharmony_ci goto parse_iter; 1079570af302Sopenharmony_ci } 1080570af302Sopenharmony_ci } 1081570af302Sopenharmony_ci } 1082570af302Sopenharmony_ci} 1083570af302Sopenharmony_ci 1084570af302Sopenharmony_ci 1085570af302Sopenharmony_ci/*********************************************************************** 1086570af302Sopenharmony_ci from tre-compile.c 1087570af302Sopenharmony_ci***********************************************************************/ 1088570af302Sopenharmony_ci 1089570af302Sopenharmony_ci 1090570af302Sopenharmony_ci/* 1091570af302Sopenharmony_ci TODO: 1092570af302Sopenharmony_ci - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive 1093570af302Sopenharmony_ci function calls. 1094570af302Sopenharmony_ci*/ 1095570af302Sopenharmony_ci 1096570af302Sopenharmony_ci/* 1097570af302Sopenharmony_ci Algorithms to setup tags so that submatch addressing can be done. 1098570af302Sopenharmony_ci*/ 1099570af302Sopenharmony_ci 1100570af302Sopenharmony_ci 1101570af302Sopenharmony_ci/* Inserts a catenation node to the root of the tree given in `node'. 1102570af302Sopenharmony_ci As the left child a new tag with number `tag_id' to `node' is added, 1103570af302Sopenharmony_ci and the right child is the old root. */ 1104570af302Sopenharmony_cistatic reg_errcode_t 1105570af302Sopenharmony_citre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 1106570af302Sopenharmony_ci{ 1107570af302Sopenharmony_ci tre_catenation_t *c; 1108570af302Sopenharmony_ci 1109570af302Sopenharmony_ci c = tre_mem_alloc(mem, sizeof(*c)); 1110570af302Sopenharmony_ci if (c == NULL) 1111570af302Sopenharmony_ci return REG_ESPACE; 1112570af302Sopenharmony_ci c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); 1113570af302Sopenharmony_ci if (c->left == NULL) 1114570af302Sopenharmony_ci return REG_ESPACE; 1115570af302Sopenharmony_ci c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 1116570af302Sopenharmony_ci if (c->right == NULL) 1117570af302Sopenharmony_ci return REG_ESPACE; 1118570af302Sopenharmony_ci 1119570af302Sopenharmony_ci c->right->obj = node->obj; 1120570af302Sopenharmony_ci c->right->type = node->type; 1121570af302Sopenharmony_ci c->right->nullable = -1; 1122570af302Sopenharmony_ci c->right->submatch_id = -1; 1123570af302Sopenharmony_ci c->right->firstpos = NULL; 1124570af302Sopenharmony_ci c->right->lastpos = NULL; 1125570af302Sopenharmony_ci c->right->num_tags = 0; 1126570af302Sopenharmony_ci c->right->num_submatches = 0; 1127570af302Sopenharmony_ci node->obj = c; 1128570af302Sopenharmony_ci node->type = CATENATION; 1129570af302Sopenharmony_ci return REG_OK; 1130570af302Sopenharmony_ci} 1131570af302Sopenharmony_ci 1132570af302Sopenharmony_ci/* Inserts a catenation node to the root of the tree given in `node'. 1133570af302Sopenharmony_ci As the right child a new tag with number `tag_id' to `node' is added, 1134570af302Sopenharmony_ci and the left child is the old root. */ 1135570af302Sopenharmony_cistatic reg_errcode_t 1136570af302Sopenharmony_citre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 1137570af302Sopenharmony_ci{ 1138570af302Sopenharmony_ci tre_catenation_t *c; 1139570af302Sopenharmony_ci 1140570af302Sopenharmony_ci c = tre_mem_alloc(mem, sizeof(*c)); 1141570af302Sopenharmony_ci if (c == NULL) 1142570af302Sopenharmony_ci return REG_ESPACE; 1143570af302Sopenharmony_ci c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); 1144570af302Sopenharmony_ci if (c->right == NULL) 1145570af302Sopenharmony_ci return REG_ESPACE; 1146570af302Sopenharmony_ci c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 1147570af302Sopenharmony_ci if (c->left == NULL) 1148570af302Sopenharmony_ci return REG_ESPACE; 1149570af302Sopenharmony_ci 1150570af302Sopenharmony_ci c->left->obj = node->obj; 1151570af302Sopenharmony_ci c->left->type = node->type; 1152570af302Sopenharmony_ci c->left->nullable = -1; 1153570af302Sopenharmony_ci c->left->submatch_id = -1; 1154570af302Sopenharmony_ci c->left->firstpos = NULL; 1155570af302Sopenharmony_ci c->left->lastpos = NULL; 1156570af302Sopenharmony_ci c->left->num_tags = 0; 1157570af302Sopenharmony_ci c->left->num_submatches = 0; 1158570af302Sopenharmony_ci node->obj = c; 1159570af302Sopenharmony_ci node->type = CATENATION; 1160570af302Sopenharmony_ci return REG_OK; 1161570af302Sopenharmony_ci} 1162570af302Sopenharmony_ci 1163570af302Sopenharmony_citypedef enum { 1164570af302Sopenharmony_ci ADDTAGS_RECURSE, 1165570af302Sopenharmony_ci ADDTAGS_AFTER_ITERATION, 1166570af302Sopenharmony_ci ADDTAGS_AFTER_UNION_LEFT, 1167570af302Sopenharmony_ci ADDTAGS_AFTER_UNION_RIGHT, 1168570af302Sopenharmony_ci ADDTAGS_AFTER_CAT_LEFT, 1169570af302Sopenharmony_ci ADDTAGS_AFTER_CAT_RIGHT, 1170570af302Sopenharmony_ci ADDTAGS_SET_SUBMATCH_END 1171570af302Sopenharmony_ci} tre_addtags_symbol_t; 1172570af302Sopenharmony_ci 1173570af302Sopenharmony_ci 1174570af302Sopenharmony_citypedef struct { 1175570af302Sopenharmony_ci int tag; 1176570af302Sopenharmony_ci int next_tag; 1177570af302Sopenharmony_ci} tre_tag_states_t; 1178570af302Sopenharmony_ci 1179570af302Sopenharmony_ci 1180570af302Sopenharmony_ci/* Go through `regset' and set submatch data for submatches that are 1181570af302Sopenharmony_ci using this tag. */ 1182570af302Sopenharmony_cistatic void 1183570af302Sopenharmony_citre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag) 1184570af302Sopenharmony_ci{ 1185570af302Sopenharmony_ci int i; 1186570af302Sopenharmony_ci 1187570af302Sopenharmony_ci for (i = 0; regset[i] >= 0; i++) 1188570af302Sopenharmony_ci { 1189570af302Sopenharmony_ci int id = regset[i] / 2; 1190570af302Sopenharmony_ci int start = !(regset[i] % 2); 1191570af302Sopenharmony_ci if (start) 1192570af302Sopenharmony_ci tnfa->submatch_data[id].so_tag = tag; 1193570af302Sopenharmony_ci else 1194570af302Sopenharmony_ci tnfa->submatch_data[id].eo_tag = tag; 1195570af302Sopenharmony_ci } 1196570af302Sopenharmony_ci regset[0] = -1; 1197570af302Sopenharmony_ci} 1198570af302Sopenharmony_ci 1199570af302Sopenharmony_ci 1200570af302Sopenharmony_ci/* Adds tags to appropriate locations in the parse tree in `tree', so that 1201570af302Sopenharmony_ci subexpressions marked for submatch addressing can be traced. */ 1202570af302Sopenharmony_cistatic reg_errcode_t 1203570af302Sopenharmony_citre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, 1204570af302Sopenharmony_ci tre_tnfa_t *tnfa) 1205570af302Sopenharmony_ci{ 1206570af302Sopenharmony_ci reg_errcode_t status = REG_OK; 1207570af302Sopenharmony_ci tre_addtags_symbol_t symbol; 1208570af302Sopenharmony_ci tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ 1209570af302Sopenharmony_ci int bottom = tre_stack_num_objects(stack); 1210570af302Sopenharmony_ci /* True for first pass (counting number of needed tags) */ 1211570af302Sopenharmony_ci int first_pass = (mem == NULL || tnfa == NULL); 1212570af302Sopenharmony_ci int *regset, *orig_regset; 1213570af302Sopenharmony_ci int num_tags = 0; /* Total number of tags. */ 1214570af302Sopenharmony_ci int num_minimals = 0; /* Number of special minimal tags. */ 1215570af302Sopenharmony_ci int tag = 0; /* The tag that is to be added next. */ 1216570af302Sopenharmony_ci int next_tag = 1; /* Next tag to use after this one. */ 1217570af302Sopenharmony_ci int *parents; /* Stack of submatches the current submatch is 1218570af302Sopenharmony_ci contained in. */ 1219570af302Sopenharmony_ci int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ 1220570af302Sopenharmony_ci tre_tag_states_t *saved_states; 1221570af302Sopenharmony_ci 1222570af302Sopenharmony_ci tre_tag_direction_t direction = TRE_TAG_MINIMIZE; 1223570af302Sopenharmony_ci if (!first_pass) 1224570af302Sopenharmony_ci { 1225570af302Sopenharmony_ci tnfa->end_tag = 0; 1226570af302Sopenharmony_ci tnfa->minimal_tags[0] = -1; 1227570af302Sopenharmony_ci } 1228570af302Sopenharmony_ci 1229570af302Sopenharmony_ci regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); 1230570af302Sopenharmony_ci if (regset == NULL) 1231570af302Sopenharmony_ci return REG_ESPACE; 1232570af302Sopenharmony_ci regset[0] = -1; 1233570af302Sopenharmony_ci orig_regset = regset; 1234570af302Sopenharmony_ci 1235570af302Sopenharmony_ci parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); 1236570af302Sopenharmony_ci if (parents == NULL) 1237570af302Sopenharmony_ci { 1238570af302Sopenharmony_ci xfree(regset); 1239570af302Sopenharmony_ci return REG_ESPACE; 1240570af302Sopenharmony_ci } 1241570af302Sopenharmony_ci parents[0] = -1; 1242570af302Sopenharmony_ci 1243570af302Sopenharmony_ci saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); 1244570af302Sopenharmony_ci if (saved_states == NULL) 1245570af302Sopenharmony_ci { 1246570af302Sopenharmony_ci xfree(regset); 1247570af302Sopenharmony_ci xfree(parents); 1248570af302Sopenharmony_ci return REG_ESPACE; 1249570af302Sopenharmony_ci } 1250570af302Sopenharmony_ci else 1251570af302Sopenharmony_ci { 1252570af302Sopenharmony_ci unsigned int i; 1253570af302Sopenharmony_ci for (i = 0; i <= tnfa->num_submatches; i++) 1254570af302Sopenharmony_ci saved_states[i].tag = -1; 1255570af302Sopenharmony_ci } 1256570af302Sopenharmony_ci 1257570af302Sopenharmony_ci STACK_PUSH(stack, voidptr, node); 1258570af302Sopenharmony_ci STACK_PUSH(stack, int, ADDTAGS_RECURSE); 1259570af302Sopenharmony_ci 1260570af302Sopenharmony_ci while (tre_stack_num_objects(stack) > bottom) 1261570af302Sopenharmony_ci { 1262570af302Sopenharmony_ci if (status != REG_OK) 1263570af302Sopenharmony_ci break; 1264570af302Sopenharmony_ci 1265570af302Sopenharmony_ci symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); 1266570af302Sopenharmony_ci switch (symbol) 1267570af302Sopenharmony_ci { 1268570af302Sopenharmony_ci 1269570af302Sopenharmony_ci case ADDTAGS_SET_SUBMATCH_END: 1270570af302Sopenharmony_ci { 1271570af302Sopenharmony_ci int id = tre_stack_pop_int(stack); 1272570af302Sopenharmony_ci int i; 1273570af302Sopenharmony_ci 1274570af302Sopenharmony_ci /* Add end of this submatch to regset. */ 1275570af302Sopenharmony_ci for (i = 0; regset[i] >= 0; i++); 1276570af302Sopenharmony_ci regset[i] = id * 2 + 1; 1277570af302Sopenharmony_ci regset[i + 1] = -1; 1278570af302Sopenharmony_ci 1279570af302Sopenharmony_ci /* Pop this submatch from the parents stack. */ 1280570af302Sopenharmony_ci for (i = 0; parents[i] >= 0; i++); 1281570af302Sopenharmony_ci parents[i - 1] = -1; 1282570af302Sopenharmony_ci break; 1283570af302Sopenharmony_ci } 1284570af302Sopenharmony_ci 1285570af302Sopenharmony_ci case ADDTAGS_RECURSE: 1286570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1287570af302Sopenharmony_ci 1288570af302Sopenharmony_ci if (node->submatch_id >= 0) 1289570af302Sopenharmony_ci { 1290570af302Sopenharmony_ci int id = node->submatch_id; 1291570af302Sopenharmony_ci int i; 1292570af302Sopenharmony_ci 1293570af302Sopenharmony_ci 1294570af302Sopenharmony_ci /* Add start of this submatch to regset. */ 1295570af302Sopenharmony_ci for (i = 0; regset[i] >= 0; i++); 1296570af302Sopenharmony_ci regset[i] = id * 2; 1297570af302Sopenharmony_ci regset[i + 1] = -1; 1298570af302Sopenharmony_ci 1299570af302Sopenharmony_ci if (!first_pass) 1300570af302Sopenharmony_ci { 1301570af302Sopenharmony_ci for (i = 0; parents[i] >= 0; i++); 1302570af302Sopenharmony_ci tnfa->submatch_data[id].parents = NULL; 1303570af302Sopenharmony_ci if (i > 0) 1304570af302Sopenharmony_ci { 1305570af302Sopenharmony_ci int *p = xmalloc(sizeof(*p) * (i + 1)); 1306570af302Sopenharmony_ci if (p == NULL) 1307570af302Sopenharmony_ci { 1308570af302Sopenharmony_ci status = REG_ESPACE; 1309570af302Sopenharmony_ci break; 1310570af302Sopenharmony_ci } 1311570af302Sopenharmony_ci assert(tnfa->submatch_data[id].parents == NULL); 1312570af302Sopenharmony_ci tnfa->submatch_data[id].parents = p; 1313570af302Sopenharmony_ci for (i = 0; parents[i] >= 0; i++) 1314570af302Sopenharmony_ci p[i] = parents[i]; 1315570af302Sopenharmony_ci p[i] = -1; 1316570af302Sopenharmony_ci } 1317570af302Sopenharmony_ci } 1318570af302Sopenharmony_ci 1319570af302Sopenharmony_ci /* Add end of this submatch to regset after processing this 1320570af302Sopenharmony_ci node. */ 1321570af302Sopenharmony_ci STACK_PUSHX(stack, int, node->submatch_id); 1322570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); 1323570af302Sopenharmony_ci } 1324570af302Sopenharmony_ci 1325570af302Sopenharmony_ci switch (node->type) 1326570af302Sopenharmony_ci { 1327570af302Sopenharmony_ci case LITERAL: 1328570af302Sopenharmony_ci { 1329570af302Sopenharmony_ci tre_literal_t *lit = node->obj; 1330570af302Sopenharmony_ci 1331570af302Sopenharmony_ci if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1332570af302Sopenharmony_ci { 1333570af302Sopenharmony_ci int i; 1334570af302Sopenharmony_ci if (regset[0] >= 0) 1335570af302Sopenharmony_ci { 1336570af302Sopenharmony_ci /* Regset is not empty, so add a tag before the 1337570af302Sopenharmony_ci literal or backref. */ 1338570af302Sopenharmony_ci if (!first_pass) 1339570af302Sopenharmony_ci { 1340570af302Sopenharmony_ci status = tre_add_tag_left(mem, node, tag); 1341570af302Sopenharmony_ci tnfa->tag_directions[tag] = direction; 1342570af302Sopenharmony_ci if (minimal_tag >= 0) 1343570af302Sopenharmony_ci { 1344570af302Sopenharmony_ci for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1345570af302Sopenharmony_ci tnfa->minimal_tags[i] = tag; 1346570af302Sopenharmony_ci tnfa->minimal_tags[i + 1] = minimal_tag; 1347570af302Sopenharmony_ci tnfa->minimal_tags[i + 2] = -1; 1348570af302Sopenharmony_ci minimal_tag = -1; 1349570af302Sopenharmony_ci num_minimals++; 1350570af302Sopenharmony_ci } 1351570af302Sopenharmony_ci tre_purge_regset(regset, tnfa, tag); 1352570af302Sopenharmony_ci } 1353570af302Sopenharmony_ci else 1354570af302Sopenharmony_ci { 1355570af302Sopenharmony_ci node->num_tags = 1; 1356570af302Sopenharmony_ci } 1357570af302Sopenharmony_ci 1358570af302Sopenharmony_ci regset[0] = -1; 1359570af302Sopenharmony_ci tag = next_tag; 1360570af302Sopenharmony_ci num_tags++; 1361570af302Sopenharmony_ci next_tag++; 1362570af302Sopenharmony_ci } 1363570af302Sopenharmony_ci } 1364570af302Sopenharmony_ci else 1365570af302Sopenharmony_ci { 1366570af302Sopenharmony_ci assert(!IS_TAG(lit)); 1367570af302Sopenharmony_ci } 1368570af302Sopenharmony_ci break; 1369570af302Sopenharmony_ci } 1370570af302Sopenharmony_ci case CATENATION: 1371570af302Sopenharmony_ci { 1372570af302Sopenharmony_ci tre_catenation_t *cat = node->obj; 1373570af302Sopenharmony_ci tre_ast_node_t *left = cat->left; 1374570af302Sopenharmony_ci tre_ast_node_t *right = cat->right; 1375570af302Sopenharmony_ci int reserved_tag = -1; 1376570af302Sopenharmony_ci 1377570af302Sopenharmony_ci 1378570af302Sopenharmony_ci /* After processing right child. */ 1379570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, node); 1380570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); 1381570af302Sopenharmony_ci 1382570af302Sopenharmony_ci /* Process right child. */ 1383570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, right); 1384570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1385570af302Sopenharmony_ci 1386570af302Sopenharmony_ci /* After processing left child. */ 1387570af302Sopenharmony_ci STACK_PUSHX(stack, int, next_tag + left->num_tags); 1388570af302Sopenharmony_ci if (left->num_tags > 0 && right->num_tags > 0) 1389570af302Sopenharmony_ci { 1390570af302Sopenharmony_ci /* Reserve the next tag to the right child. */ 1391570af302Sopenharmony_ci reserved_tag = next_tag; 1392570af302Sopenharmony_ci next_tag++; 1393570af302Sopenharmony_ci } 1394570af302Sopenharmony_ci STACK_PUSHX(stack, int, reserved_tag); 1395570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); 1396570af302Sopenharmony_ci 1397570af302Sopenharmony_ci /* Process left child. */ 1398570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, left); 1399570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1400570af302Sopenharmony_ci 1401570af302Sopenharmony_ci } 1402570af302Sopenharmony_ci break; 1403570af302Sopenharmony_ci case ITERATION: 1404570af302Sopenharmony_ci { 1405570af302Sopenharmony_ci tre_iteration_t *iter = node->obj; 1406570af302Sopenharmony_ci 1407570af302Sopenharmony_ci if (first_pass) 1408570af302Sopenharmony_ci { 1409570af302Sopenharmony_ci STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); 1410570af302Sopenharmony_ci } 1411570af302Sopenharmony_ci else 1412570af302Sopenharmony_ci { 1413570af302Sopenharmony_ci STACK_PUSHX(stack, int, tag); 1414570af302Sopenharmony_ci STACK_PUSHX(stack, int, iter->minimal); 1415570af302Sopenharmony_ci } 1416570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, node); 1417570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); 1418570af302Sopenharmony_ci 1419570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, iter->arg); 1420570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1421570af302Sopenharmony_ci 1422570af302Sopenharmony_ci /* Regset is not empty, so add a tag here. */ 1423570af302Sopenharmony_ci if (regset[0] >= 0 || iter->minimal) 1424570af302Sopenharmony_ci { 1425570af302Sopenharmony_ci if (!first_pass) 1426570af302Sopenharmony_ci { 1427570af302Sopenharmony_ci int i; 1428570af302Sopenharmony_ci status = tre_add_tag_left(mem, node, tag); 1429570af302Sopenharmony_ci if (iter->minimal) 1430570af302Sopenharmony_ci tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; 1431570af302Sopenharmony_ci else 1432570af302Sopenharmony_ci tnfa->tag_directions[tag] = direction; 1433570af302Sopenharmony_ci if (minimal_tag >= 0) 1434570af302Sopenharmony_ci { 1435570af302Sopenharmony_ci for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1436570af302Sopenharmony_ci tnfa->minimal_tags[i] = tag; 1437570af302Sopenharmony_ci tnfa->minimal_tags[i + 1] = minimal_tag; 1438570af302Sopenharmony_ci tnfa->minimal_tags[i + 2] = -1; 1439570af302Sopenharmony_ci minimal_tag = -1; 1440570af302Sopenharmony_ci num_minimals++; 1441570af302Sopenharmony_ci } 1442570af302Sopenharmony_ci tre_purge_regset(regset, tnfa, tag); 1443570af302Sopenharmony_ci } 1444570af302Sopenharmony_ci 1445570af302Sopenharmony_ci regset[0] = -1; 1446570af302Sopenharmony_ci tag = next_tag; 1447570af302Sopenharmony_ci num_tags++; 1448570af302Sopenharmony_ci next_tag++; 1449570af302Sopenharmony_ci } 1450570af302Sopenharmony_ci direction = TRE_TAG_MINIMIZE; 1451570af302Sopenharmony_ci } 1452570af302Sopenharmony_ci break; 1453570af302Sopenharmony_ci case UNION: 1454570af302Sopenharmony_ci { 1455570af302Sopenharmony_ci tre_union_t *uni = node->obj; 1456570af302Sopenharmony_ci tre_ast_node_t *left = uni->left; 1457570af302Sopenharmony_ci tre_ast_node_t *right = uni->right; 1458570af302Sopenharmony_ci int left_tag; 1459570af302Sopenharmony_ci int right_tag; 1460570af302Sopenharmony_ci 1461570af302Sopenharmony_ci if (regset[0] >= 0) 1462570af302Sopenharmony_ci { 1463570af302Sopenharmony_ci left_tag = next_tag; 1464570af302Sopenharmony_ci right_tag = next_tag + 1; 1465570af302Sopenharmony_ci } 1466570af302Sopenharmony_ci else 1467570af302Sopenharmony_ci { 1468570af302Sopenharmony_ci left_tag = tag; 1469570af302Sopenharmony_ci right_tag = next_tag; 1470570af302Sopenharmony_ci } 1471570af302Sopenharmony_ci 1472570af302Sopenharmony_ci /* After processing right child. */ 1473570af302Sopenharmony_ci STACK_PUSHX(stack, int, right_tag); 1474570af302Sopenharmony_ci STACK_PUSHX(stack, int, left_tag); 1475570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, regset); 1476570af302Sopenharmony_ci STACK_PUSHX(stack, int, regset[0] >= 0); 1477570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, node); 1478570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, right); 1479570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, left); 1480570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); 1481570af302Sopenharmony_ci 1482570af302Sopenharmony_ci /* Process right child. */ 1483570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, right); 1484570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1485570af302Sopenharmony_ci 1486570af302Sopenharmony_ci /* After processing left child. */ 1487570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); 1488570af302Sopenharmony_ci 1489570af302Sopenharmony_ci /* Process left child. */ 1490570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, left); 1491570af302Sopenharmony_ci STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1492570af302Sopenharmony_ci 1493570af302Sopenharmony_ci /* Regset is not empty, so add a tag here. */ 1494570af302Sopenharmony_ci if (regset[0] >= 0) 1495570af302Sopenharmony_ci { 1496570af302Sopenharmony_ci if (!first_pass) 1497570af302Sopenharmony_ci { 1498570af302Sopenharmony_ci int i; 1499570af302Sopenharmony_ci status = tre_add_tag_left(mem, node, tag); 1500570af302Sopenharmony_ci tnfa->tag_directions[tag] = direction; 1501570af302Sopenharmony_ci if (minimal_tag >= 0) 1502570af302Sopenharmony_ci { 1503570af302Sopenharmony_ci for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1504570af302Sopenharmony_ci tnfa->minimal_tags[i] = tag; 1505570af302Sopenharmony_ci tnfa->minimal_tags[i + 1] = minimal_tag; 1506570af302Sopenharmony_ci tnfa->minimal_tags[i + 2] = -1; 1507570af302Sopenharmony_ci minimal_tag = -1; 1508570af302Sopenharmony_ci num_minimals++; 1509570af302Sopenharmony_ci } 1510570af302Sopenharmony_ci tre_purge_regset(regset, tnfa, tag); 1511570af302Sopenharmony_ci } 1512570af302Sopenharmony_ci 1513570af302Sopenharmony_ci regset[0] = -1; 1514570af302Sopenharmony_ci tag = next_tag; 1515570af302Sopenharmony_ci num_tags++; 1516570af302Sopenharmony_ci next_tag++; 1517570af302Sopenharmony_ci } 1518570af302Sopenharmony_ci 1519570af302Sopenharmony_ci if (node->num_submatches > 0) 1520570af302Sopenharmony_ci { 1521570af302Sopenharmony_ci /* The next two tags are reserved for markers. */ 1522570af302Sopenharmony_ci next_tag++; 1523570af302Sopenharmony_ci tag = next_tag; 1524570af302Sopenharmony_ci next_tag++; 1525570af302Sopenharmony_ci } 1526570af302Sopenharmony_ci 1527570af302Sopenharmony_ci break; 1528570af302Sopenharmony_ci } 1529570af302Sopenharmony_ci } 1530570af302Sopenharmony_ci 1531570af302Sopenharmony_ci if (node->submatch_id >= 0) 1532570af302Sopenharmony_ci { 1533570af302Sopenharmony_ci int i; 1534570af302Sopenharmony_ci /* Push this submatch on the parents stack. */ 1535570af302Sopenharmony_ci for (i = 0; parents[i] >= 0; i++); 1536570af302Sopenharmony_ci parents[i] = node->submatch_id; 1537570af302Sopenharmony_ci parents[i + 1] = -1; 1538570af302Sopenharmony_ci } 1539570af302Sopenharmony_ci 1540570af302Sopenharmony_ci break; /* end case: ADDTAGS_RECURSE */ 1541570af302Sopenharmony_ci 1542570af302Sopenharmony_ci case ADDTAGS_AFTER_ITERATION: 1543570af302Sopenharmony_ci { 1544570af302Sopenharmony_ci int minimal = 0; 1545570af302Sopenharmony_ci int enter_tag; 1546570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1547570af302Sopenharmony_ci if (first_pass) 1548570af302Sopenharmony_ci { 1549570af302Sopenharmony_ci node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags 1550570af302Sopenharmony_ci + tre_stack_pop_int(stack); 1551570af302Sopenharmony_ci minimal_tag = -1; 1552570af302Sopenharmony_ci } 1553570af302Sopenharmony_ci else 1554570af302Sopenharmony_ci { 1555570af302Sopenharmony_ci minimal = tre_stack_pop_int(stack); 1556570af302Sopenharmony_ci enter_tag = tre_stack_pop_int(stack); 1557570af302Sopenharmony_ci if (minimal) 1558570af302Sopenharmony_ci minimal_tag = enter_tag; 1559570af302Sopenharmony_ci } 1560570af302Sopenharmony_ci 1561570af302Sopenharmony_ci if (!first_pass) 1562570af302Sopenharmony_ci { 1563570af302Sopenharmony_ci if (minimal) 1564570af302Sopenharmony_ci direction = TRE_TAG_MINIMIZE; 1565570af302Sopenharmony_ci else 1566570af302Sopenharmony_ci direction = TRE_TAG_MAXIMIZE; 1567570af302Sopenharmony_ci } 1568570af302Sopenharmony_ci break; 1569570af302Sopenharmony_ci } 1570570af302Sopenharmony_ci 1571570af302Sopenharmony_ci case ADDTAGS_AFTER_CAT_LEFT: 1572570af302Sopenharmony_ci { 1573570af302Sopenharmony_ci int new_tag = tre_stack_pop_int(stack); 1574570af302Sopenharmony_ci next_tag = tre_stack_pop_int(stack); 1575570af302Sopenharmony_ci if (new_tag >= 0) 1576570af302Sopenharmony_ci { 1577570af302Sopenharmony_ci tag = new_tag; 1578570af302Sopenharmony_ci } 1579570af302Sopenharmony_ci break; 1580570af302Sopenharmony_ci } 1581570af302Sopenharmony_ci 1582570af302Sopenharmony_ci case ADDTAGS_AFTER_CAT_RIGHT: 1583570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1584570af302Sopenharmony_ci if (first_pass) 1585570af302Sopenharmony_ci node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags 1586570af302Sopenharmony_ci + ((tre_catenation_t *)node->obj)->right->num_tags; 1587570af302Sopenharmony_ci break; 1588570af302Sopenharmony_ci 1589570af302Sopenharmony_ci case ADDTAGS_AFTER_UNION_LEFT: 1590570af302Sopenharmony_ci /* Lift the bottom of the `regset' array so that when processing 1591570af302Sopenharmony_ci the right operand the items currently in the array are 1592570af302Sopenharmony_ci invisible. The original bottom was saved at ADDTAGS_UNION and 1593570af302Sopenharmony_ci will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ 1594570af302Sopenharmony_ci while (*regset >= 0) 1595570af302Sopenharmony_ci regset++; 1596570af302Sopenharmony_ci break; 1597570af302Sopenharmony_ci 1598570af302Sopenharmony_ci case ADDTAGS_AFTER_UNION_RIGHT: 1599570af302Sopenharmony_ci { 1600570af302Sopenharmony_ci int added_tags, tag_left, tag_right; 1601570af302Sopenharmony_ci tre_ast_node_t *left = tre_stack_pop_voidptr(stack); 1602570af302Sopenharmony_ci tre_ast_node_t *right = tre_stack_pop_voidptr(stack); 1603570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1604570af302Sopenharmony_ci added_tags = tre_stack_pop_int(stack); 1605570af302Sopenharmony_ci if (first_pass) 1606570af302Sopenharmony_ci { 1607570af302Sopenharmony_ci node->num_tags = ((tre_union_t *)node->obj)->left->num_tags 1608570af302Sopenharmony_ci + ((tre_union_t *)node->obj)->right->num_tags + added_tags 1609570af302Sopenharmony_ci + ((node->num_submatches > 0) ? 2 : 0); 1610570af302Sopenharmony_ci } 1611570af302Sopenharmony_ci regset = tre_stack_pop_voidptr(stack); 1612570af302Sopenharmony_ci tag_left = tre_stack_pop_int(stack); 1613570af302Sopenharmony_ci tag_right = tre_stack_pop_int(stack); 1614570af302Sopenharmony_ci 1615570af302Sopenharmony_ci /* Add tags after both children, the left child gets a smaller 1616570af302Sopenharmony_ci tag than the right child. This guarantees that we prefer 1617570af302Sopenharmony_ci the left child over the right child. */ 1618570af302Sopenharmony_ci /* XXX - This is not always necessary (if the children have 1619570af302Sopenharmony_ci tags which must be seen for every match of that child). */ 1620570af302Sopenharmony_ci /* XXX - Check if this is the only place where tre_add_tag_right 1621570af302Sopenharmony_ci is used. If so, use tre_add_tag_left (putting the tag before 1622570af302Sopenharmony_ci the child as opposed after the child) and throw away 1623570af302Sopenharmony_ci tre_add_tag_right. */ 1624570af302Sopenharmony_ci if (node->num_submatches > 0) 1625570af302Sopenharmony_ci { 1626570af302Sopenharmony_ci if (!first_pass) 1627570af302Sopenharmony_ci { 1628570af302Sopenharmony_ci status = tre_add_tag_right(mem, left, tag_left); 1629570af302Sopenharmony_ci tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; 1630570af302Sopenharmony_ci if (status == REG_OK) 1631570af302Sopenharmony_ci status = tre_add_tag_right(mem, right, tag_right); 1632570af302Sopenharmony_ci tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; 1633570af302Sopenharmony_ci } 1634570af302Sopenharmony_ci num_tags += 2; 1635570af302Sopenharmony_ci } 1636570af302Sopenharmony_ci direction = TRE_TAG_MAXIMIZE; 1637570af302Sopenharmony_ci break; 1638570af302Sopenharmony_ci } 1639570af302Sopenharmony_ci 1640570af302Sopenharmony_ci default: 1641570af302Sopenharmony_ci assert(0); 1642570af302Sopenharmony_ci break; 1643570af302Sopenharmony_ci 1644570af302Sopenharmony_ci } /* end switch(symbol) */ 1645570af302Sopenharmony_ci } /* end while(tre_stack_num_objects(stack) > bottom) */ 1646570af302Sopenharmony_ci 1647570af302Sopenharmony_ci if (!first_pass) 1648570af302Sopenharmony_ci tre_purge_regset(regset, tnfa, tag); 1649570af302Sopenharmony_ci 1650570af302Sopenharmony_ci if (!first_pass && minimal_tag >= 0) 1651570af302Sopenharmony_ci { 1652570af302Sopenharmony_ci int i; 1653570af302Sopenharmony_ci for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1654570af302Sopenharmony_ci tnfa->minimal_tags[i] = tag; 1655570af302Sopenharmony_ci tnfa->minimal_tags[i + 1] = minimal_tag; 1656570af302Sopenharmony_ci tnfa->minimal_tags[i + 2] = -1; 1657570af302Sopenharmony_ci minimal_tag = -1; 1658570af302Sopenharmony_ci num_minimals++; 1659570af302Sopenharmony_ci } 1660570af302Sopenharmony_ci 1661570af302Sopenharmony_ci assert(tree->num_tags == num_tags); 1662570af302Sopenharmony_ci tnfa->end_tag = num_tags; 1663570af302Sopenharmony_ci tnfa->num_tags = num_tags; 1664570af302Sopenharmony_ci tnfa->num_minimals = num_minimals; 1665570af302Sopenharmony_ci xfree(orig_regset); 1666570af302Sopenharmony_ci xfree(parents); 1667570af302Sopenharmony_ci xfree(saved_states); 1668570af302Sopenharmony_ci return status; 1669570af302Sopenharmony_ci} 1670570af302Sopenharmony_ci 1671570af302Sopenharmony_ci 1672570af302Sopenharmony_ci 1673570af302Sopenharmony_ci/* 1674570af302Sopenharmony_ci AST to TNFA compilation routines. 1675570af302Sopenharmony_ci*/ 1676570af302Sopenharmony_ci 1677570af302Sopenharmony_citypedef enum { 1678570af302Sopenharmony_ci COPY_RECURSE, 1679570af302Sopenharmony_ci COPY_SET_RESULT_PTR 1680570af302Sopenharmony_ci} tre_copyast_symbol_t; 1681570af302Sopenharmony_ci 1682570af302Sopenharmony_ci/* Flags for tre_copy_ast(). */ 1683570af302Sopenharmony_ci#define COPY_REMOVE_TAGS 1 1684570af302Sopenharmony_ci#define COPY_MAXIMIZE_FIRST_TAG 2 1685570af302Sopenharmony_ci 1686570af302Sopenharmony_cistatic reg_errcode_t 1687570af302Sopenharmony_citre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 1688570af302Sopenharmony_ci int flags, int *pos_add, tre_tag_direction_t *tag_directions, 1689570af302Sopenharmony_ci tre_ast_node_t **copy, int *max_pos) 1690570af302Sopenharmony_ci{ 1691570af302Sopenharmony_ci reg_errcode_t status = REG_OK; 1692570af302Sopenharmony_ci int bottom = tre_stack_num_objects(stack); 1693570af302Sopenharmony_ci int num_copied = 0; 1694570af302Sopenharmony_ci int first_tag = 1; 1695570af302Sopenharmony_ci tre_ast_node_t **result = copy; 1696570af302Sopenharmony_ci tre_copyast_symbol_t symbol; 1697570af302Sopenharmony_ci 1698570af302Sopenharmony_ci STACK_PUSH(stack, voidptr, ast); 1699570af302Sopenharmony_ci STACK_PUSH(stack, int, COPY_RECURSE); 1700570af302Sopenharmony_ci 1701570af302Sopenharmony_ci while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 1702570af302Sopenharmony_ci { 1703570af302Sopenharmony_ci tre_ast_node_t *node; 1704570af302Sopenharmony_ci if (status != REG_OK) 1705570af302Sopenharmony_ci break; 1706570af302Sopenharmony_ci 1707570af302Sopenharmony_ci symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); 1708570af302Sopenharmony_ci switch (symbol) 1709570af302Sopenharmony_ci { 1710570af302Sopenharmony_ci case COPY_SET_RESULT_PTR: 1711570af302Sopenharmony_ci result = tre_stack_pop_voidptr(stack); 1712570af302Sopenharmony_ci break; 1713570af302Sopenharmony_ci case COPY_RECURSE: 1714570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1715570af302Sopenharmony_ci switch (node->type) 1716570af302Sopenharmony_ci { 1717570af302Sopenharmony_ci case LITERAL: 1718570af302Sopenharmony_ci { 1719570af302Sopenharmony_ci tre_literal_t *lit = node->obj; 1720570af302Sopenharmony_ci int pos = lit->position; 1721570af302Sopenharmony_ci int min = lit->code_min; 1722570af302Sopenharmony_ci int max = lit->code_max; 1723570af302Sopenharmony_ci if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1724570af302Sopenharmony_ci { 1725570af302Sopenharmony_ci /* XXX - e.g. [ab] has only one position but two 1726570af302Sopenharmony_ci nodes, so we are creating holes in the state space 1727570af302Sopenharmony_ci here. Not fatal, just wastes memory. */ 1728570af302Sopenharmony_ci pos += *pos_add; 1729570af302Sopenharmony_ci num_copied++; 1730570af302Sopenharmony_ci } 1731570af302Sopenharmony_ci else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) 1732570af302Sopenharmony_ci { 1733570af302Sopenharmony_ci /* Change this tag to empty. */ 1734570af302Sopenharmony_ci min = EMPTY; 1735570af302Sopenharmony_ci max = pos = -1; 1736570af302Sopenharmony_ci } 1737570af302Sopenharmony_ci else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) 1738570af302Sopenharmony_ci && first_tag) 1739570af302Sopenharmony_ci { 1740570af302Sopenharmony_ci /* Maximize the first tag. */ 1741570af302Sopenharmony_ci tag_directions[max] = TRE_TAG_MAXIMIZE; 1742570af302Sopenharmony_ci first_tag = 0; 1743570af302Sopenharmony_ci } 1744570af302Sopenharmony_ci *result = tre_ast_new_literal(mem, min, max, pos); 1745570af302Sopenharmony_ci if (*result == NULL) 1746570af302Sopenharmony_ci status = REG_ESPACE; 1747570af302Sopenharmony_ci else { 1748570af302Sopenharmony_ci tre_literal_t *p = (*result)->obj; 1749570af302Sopenharmony_ci p->class = lit->class; 1750570af302Sopenharmony_ci p->neg_classes = lit->neg_classes; 1751570af302Sopenharmony_ci } 1752570af302Sopenharmony_ci 1753570af302Sopenharmony_ci if (pos > *max_pos) 1754570af302Sopenharmony_ci *max_pos = pos; 1755570af302Sopenharmony_ci break; 1756570af302Sopenharmony_ci } 1757570af302Sopenharmony_ci case UNION: 1758570af302Sopenharmony_ci { 1759570af302Sopenharmony_ci tre_union_t *uni = node->obj; 1760570af302Sopenharmony_ci tre_union_t *tmp; 1761570af302Sopenharmony_ci *result = tre_ast_new_union(mem, uni->left, uni->right); 1762570af302Sopenharmony_ci if (*result == NULL) 1763570af302Sopenharmony_ci { 1764570af302Sopenharmony_ci status = REG_ESPACE; 1765570af302Sopenharmony_ci break; 1766570af302Sopenharmony_ci } 1767570af302Sopenharmony_ci tmp = (*result)->obj; 1768570af302Sopenharmony_ci result = &tmp->left; 1769570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->right); 1770570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_RECURSE); 1771570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, &tmp->right); 1772570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); 1773570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->left); 1774570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_RECURSE); 1775570af302Sopenharmony_ci break; 1776570af302Sopenharmony_ci } 1777570af302Sopenharmony_ci case CATENATION: 1778570af302Sopenharmony_ci { 1779570af302Sopenharmony_ci tre_catenation_t *cat = node->obj; 1780570af302Sopenharmony_ci tre_catenation_t *tmp; 1781570af302Sopenharmony_ci *result = tre_ast_new_catenation(mem, cat->left, cat->right); 1782570af302Sopenharmony_ci if (*result == NULL) 1783570af302Sopenharmony_ci { 1784570af302Sopenharmony_ci status = REG_ESPACE; 1785570af302Sopenharmony_ci break; 1786570af302Sopenharmony_ci } 1787570af302Sopenharmony_ci tmp = (*result)->obj; 1788570af302Sopenharmony_ci tmp->left = NULL; 1789570af302Sopenharmony_ci tmp->right = NULL; 1790570af302Sopenharmony_ci result = &tmp->left; 1791570af302Sopenharmony_ci 1792570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->right); 1793570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_RECURSE); 1794570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, &tmp->right); 1795570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); 1796570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->left); 1797570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_RECURSE); 1798570af302Sopenharmony_ci break; 1799570af302Sopenharmony_ci } 1800570af302Sopenharmony_ci case ITERATION: 1801570af302Sopenharmony_ci { 1802570af302Sopenharmony_ci tre_iteration_t *iter = node->obj; 1803570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, iter->arg); 1804570af302Sopenharmony_ci STACK_PUSHX(stack, int, COPY_RECURSE); 1805570af302Sopenharmony_ci *result = tre_ast_new_iter(mem, iter->arg, iter->min, 1806570af302Sopenharmony_ci iter->max, iter->minimal); 1807570af302Sopenharmony_ci if (*result == NULL) 1808570af302Sopenharmony_ci { 1809570af302Sopenharmony_ci status = REG_ESPACE; 1810570af302Sopenharmony_ci break; 1811570af302Sopenharmony_ci } 1812570af302Sopenharmony_ci iter = (*result)->obj; 1813570af302Sopenharmony_ci result = &iter->arg; 1814570af302Sopenharmony_ci break; 1815570af302Sopenharmony_ci } 1816570af302Sopenharmony_ci default: 1817570af302Sopenharmony_ci assert(0); 1818570af302Sopenharmony_ci break; 1819570af302Sopenharmony_ci } 1820570af302Sopenharmony_ci break; 1821570af302Sopenharmony_ci } 1822570af302Sopenharmony_ci } 1823570af302Sopenharmony_ci *pos_add += num_copied; 1824570af302Sopenharmony_ci return status; 1825570af302Sopenharmony_ci} 1826570af302Sopenharmony_ci 1827570af302Sopenharmony_citypedef enum { 1828570af302Sopenharmony_ci EXPAND_RECURSE, 1829570af302Sopenharmony_ci EXPAND_AFTER_ITER 1830570af302Sopenharmony_ci} tre_expand_ast_symbol_t; 1831570af302Sopenharmony_ci 1832570af302Sopenharmony_ci/* Expands each iteration node that has a finite nonzero minimum or maximum 1833570af302Sopenharmony_ci iteration count to a catenated sequence of copies of the node. */ 1834570af302Sopenharmony_cistatic reg_errcode_t 1835570af302Sopenharmony_citre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 1836570af302Sopenharmony_ci int *position, tre_tag_direction_t *tag_directions) 1837570af302Sopenharmony_ci{ 1838570af302Sopenharmony_ci reg_errcode_t status = REG_OK; 1839570af302Sopenharmony_ci int bottom = tre_stack_num_objects(stack); 1840570af302Sopenharmony_ci int pos_add = 0; 1841570af302Sopenharmony_ci int pos_add_total = 0; 1842570af302Sopenharmony_ci int max_pos = 0; 1843570af302Sopenharmony_ci int iter_depth = 0; 1844570af302Sopenharmony_ci 1845570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ast); 1846570af302Sopenharmony_ci STACK_PUSHR(stack, int, EXPAND_RECURSE); 1847570af302Sopenharmony_ci while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 1848570af302Sopenharmony_ci { 1849570af302Sopenharmony_ci tre_ast_node_t *node; 1850570af302Sopenharmony_ci tre_expand_ast_symbol_t symbol; 1851570af302Sopenharmony_ci 1852570af302Sopenharmony_ci if (status != REG_OK) 1853570af302Sopenharmony_ci break; 1854570af302Sopenharmony_ci 1855570af302Sopenharmony_ci symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); 1856570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 1857570af302Sopenharmony_ci switch (symbol) 1858570af302Sopenharmony_ci { 1859570af302Sopenharmony_ci case EXPAND_RECURSE: 1860570af302Sopenharmony_ci switch (node->type) 1861570af302Sopenharmony_ci { 1862570af302Sopenharmony_ci case LITERAL: 1863570af302Sopenharmony_ci { 1864570af302Sopenharmony_ci tre_literal_t *lit= node->obj; 1865570af302Sopenharmony_ci if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1866570af302Sopenharmony_ci { 1867570af302Sopenharmony_ci lit->position += pos_add; 1868570af302Sopenharmony_ci if (lit->position > max_pos) 1869570af302Sopenharmony_ci max_pos = lit->position; 1870570af302Sopenharmony_ci } 1871570af302Sopenharmony_ci break; 1872570af302Sopenharmony_ci } 1873570af302Sopenharmony_ci case UNION: 1874570af302Sopenharmony_ci { 1875570af302Sopenharmony_ci tre_union_t *uni = node->obj; 1876570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->right); 1877570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_RECURSE); 1878570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->left); 1879570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_RECURSE); 1880570af302Sopenharmony_ci break; 1881570af302Sopenharmony_ci } 1882570af302Sopenharmony_ci case CATENATION: 1883570af302Sopenharmony_ci { 1884570af302Sopenharmony_ci tre_catenation_t *cat = node->obj; 1885570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->right); 1886570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_RECURSE); 1887570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->left); 1888570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_RECURSE); 1889570af302Sopenharmony_ci break; 1890570af302Sopenharmony_ci } 1891570af302Sopenharmony_ci case ITERATION: 1892570af302Sopenharmony_ci { 1893570af302Sopenharmony_ci tre_iteration_t *iter = node->obj; 1894570af302Sopenharmony_ci STACK_PUSHX(stack, int, pos_add); 1895570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, node); 1896570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); 1897570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, iter->arg); 1898570af302Sopenharmony_ci STACK_PUSHX(stack, int, EXPAND_RECURSE); 1899570af302Sopenharmony_ci /* If we are going to expand this node at EXPAND_AFTER_ITER 1900570af302Sopenharmony_ci then don't increase the `pos' fields of the nodes now, it 1901570af302Sopenharmony_ci will get done when expanding. */ 1902570af302Sopenharmony_ci if (iter->min > 1 || iter->max > 1) 1903570af302Sopenharmony_ci pos_add = 0; 1904570af302Sopenharmony_ci iter_depth++; 1905570af302Sopenharmony_ci break; 1906570af302Sopenharmony_ci } 1907570af302Sopenharmony_ci default: 1908570af302Sopenharmony_ci assert(0); 1909570af302Sopenharmony_ci break; 1910570af302Sopenharmony_ci } 1911570af302Sopenharmony_ci break; 1912570af302Sopenharmony_ci case EXPAND_AFTER_ITER: 1913570af302Sopenharmony_ci { 1914570af302Sopenharmony_ci tre_iteration_t *iter = node->obj; 1915570af302Sopenharmony_ci int pos_add_last; 1916570af302Sopenharmony_ci pos_add = tre_stack_pop_int(stack); 1917570af302Sopenharmony_ci pos_add_last = pos_add; 1918570af302Sopenharmony_ci if (iter->min > 1 || iter->max > 1) 1919570af302Sopenharmony_ci { 1920570af302Sopenharmony_ci tre_ast_node_t *seq1 = NULL, *seq2 = NULL; 1921570af302Sopenharmony_ci int j; 1922570af302Sopenharmony_ci int pos_add_save = pos_add; 1923570af302Sopenharmony_ci 1924570af302Sopenharmony_ci /* Create a catenated sequence of copies of the node. */ 1925570af302Sopenharmony_ci for (j = 0; j < iter->min; j++) 1926570af302Sopenharmony_ci { 1927570af302Sopenharmony_ci tre_ast_node_t *copy; 1928570af302Sopenharmony_ci /* Remove tags from all but the last copy. */ 1929570af302Sopenharmony_ci int flags = ((j + 1 < iter->min) 1930570af302Sopenharmony_ci ? COPY_REMOVE_TAGS 1931570af302Sopenharmony_ci : COPY_MAXIMIZE_FIRST_TAG); 1932570af302Sopenharmony_ci pos_add_save = pos_add; 1933570af302Sopenharmony_ci status = tre_copy_ast(mem, stack, iter->arg, flags, 1934570af302Sopenharmony_ci &pos_add, tag_directions, ©, 1935570af302Sopenharmony_ci &max_pos); 1936570af302Sopenharmony_ci if (status != REG_OK) 1937570af302Sopenharmony_ci return status; 1938570af302Sopenharmony_ci if (seq1 != NULL) 1939570af302Sopenharmony_ci seq1 = tre_ast_new_catenation(mem, seq1, copy); 1940570af302Sopenharmony_ci else 1941570af302Sopenharmony_ci seq1 = copy; 1942570af302Sopenharmony_ci if (seq1 == NULL) 1943570af302Sopenharmony_ci return REG_ESPACE; 1944570af302Sopenharmony_ci } 1945570af302Sopenharmony_ci 1946570af302Sopenharmony_ci if (iter->max == -1) 1947570af302Sopenharmony_ci { 1948570af302Sopenharmony_ci /* No upper limit. */ 1949570af302Sopenharmony_ci pos_add_save = pos_add; 1950570af302Sopenharmony_ci status = tre_copy_ast(mem, stack, iter->arg, 0, 1951570af302Sopenharmony_ci &pos_add, NULL, &seq2, &max_pos); 1952570af302Sopenharmony_ci if (status != REG_OK) 1953570af302Sopenharmony_ci return status; 1954570af302Sopenharmony_ci seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); 1955570af302Sopenharmony_ci if (seq2 == NULL) 1956570af302Sopenharmony_ci return REG_ESPACE; 1957570af302Sopenharmony_ci } 1958570af302Sopenharmony_ci else 1959570af302Sopenharmony_ci { 1960570af302Sopenharmony_ci for (j = iter->min; j < iter->max; j++) 1961570af302Sopenharmony_ci { 1962570af302Sopenharmony_ci tre_ast_node_t *tmp, *copy; 1963570af302Sopenharmony_ci pos_add_save = pos_add; 1964570af302Sopenharmony_ci status = tre_copy_ast(mem, stack, iter->arg, 0, 1965570af302Sopenharmony_ci &pos_add, NULL, ©, &max_pos); 1966570af302Sopenharmony_ci if (status != REG_OK) 1967570af302Sopenharmony_ci return status; 1968570af302Sopenharmony_ci if (seq2 != NULL) 1969570af302Sopenharmony_ci seq2 = tre_ast_new_catenation(mem, copy, seq2); 1970570af302Sopenharmony_ci else 1971570af302Sopenharmony_ci seq2 = copy; 1972570af302Sopenharmony_ci if (seq2 == NULL) 1973570af302Sopenharmony_ci return REG_ESPACE; 1974570af302Sopenharmony_ci tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); 1975570af302Sopenharmony_ci if (tmp == NULL) 1976570af302Sopenharmony_ci return REG_ESPACE; 1977570af302Sopenharmony_ci seq2 = tre_ast_new_union(mem, tmp, seq2); 1978570af302Sopenharmony_ci if (seq2 == NULL) 1979570af302Sopenharmony_ci return REG_ESPACE; 1980570af302Sopenharmony_ci } 1981570af302Sopenharmony_ci } 1982570af302Sopenharmony_ci 1983570af302Sopenharmony_ci pos_add = pos_add_save; 1984570af302Sopenharmony_ci if (seq1 == NULL) 1985570af302Sopenharmony_ci seq1 = seq2; 1986570af302Sopenharmony_ci else if (seq2 != NULL) 1987570af302Sopenharmony_ci seq1 = tre_ast_new_catenation(mem, seq1, seq2); 1988570af302Sopenharmony_ci if (seq1 == NULL) 1989570af302Sopenharmony_ci return REG_ESPACE; 1990570af302Sopenharmony_ci node->obj = seq1->obj; 1991570af302Sopenharmony_ci node->type = seq1->type; 1992570af302Sopenharmony_ci } 1993570af302Sopenharmony_ci 1994570af302Sopenharmony_ci iter_depth--; 1995570af302Sopenharmony_ci pos_add_total += pos_add - pos_add_last; 1996570af302Sopenharmony_ci if (iter_depth == 0) 1997570af302Sopenharmony_ci pos_add = pos_add_total; 1998570af302Sopenharmony_ci 1999570af302Sopenharmony_ci break; 2000570af302Sopenharmony_ci } 2001570af302Sopenharmony_ci default: 2002570af302Sopenharmony_ci assert(0); 2003570af302Sopenharmony_ci break; 2004570af302Sopenharmony_ci } 2005570af302Sopenharmony_ci } 2006570af302Sopenharmony_ci 2007570af302Sopenharmony_ci *position += pos_add_total; 2008570af302Sopenharmony_ci 2009570af302Sopenharmony_ci /* `max_pos' should never be larger than `*position' if the above 2010570af302Sopenharmony_ci code works, but just an extra safeguard let's make sure 2011570af302Sopenharmony_ci `*position' is set large enough so enough memory will be 2012570af302Sopenharmony_ci allocated for the transition table. */ 2013570af302Sopenharmony_ci if (max_pos > *position) 2014570af302Sopenharmony_ci *position = max_pos; 2015570af302Sopenharmony_ci 2016570af302Sopenharmony_ci return status; 2017570af302Sopenharmony_ci} 2018570af302Sopenharmony_ci 2019570af302Sopenharmony_cistatic tre_pos_and_tags_t * 2020570af302Sopenharmony_citre_set_empty(tre_mem_t mem) 2021570af302Sopenharmony_ci{ 2022570af302Sopenharmony_ci tre_pos_and_tags_t *new_set; 2023570af302Sopenharmony_ci 2024570af302Sopenharmony_ci new_set = tre_mem_calloc(mem, sizeof(*new_set)); 2025570af302Sopenharmony_ci if (new_set == NULL) 2026570af302Sopenharmony_ci return NULL; 2027570af302Sopenharmony_ci 2028570af302Sopenharmony_ci new_set[0].position = -1; 2029570af302Sopenharmony_ci new_set[0].code_min = -1; 2030570af302Sopenharmony_ci new_set[0].code_max = -1; 2031570af302Sopenharmony_ci 2032570af302Sopenharmony_ci return new_set; 2033570af302Sopenharmony_ci} 2034570af302Sopenharmony_ci 2035570af302Sopenharmony_cistatic tre_pos_and_tags_t * 2036570af302Sopenharmony_citre_set_one(tre_mem_t mem, int position, int code_min, int code_max, 2037570af302Sopenharmony_ci tre_ctype_t class, tre_ctype_t *neg_classes, int backref) 2038570af302Sopenharmony_ci{ 2039570af302Sopenharmony_ci tre_pos_and_tags_t *new_set; 2040570af302Sopenharmony_ci 2041570af302Sopenharmony_ci new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); 2042570af302Sopenharmony_ci if (new_set == NULL) 2043570af302Sopenharmony_ci return NULL; 2044570af302Sopenharmony_ci 2045570af302Sopenharmony_ci new_set[0].position = position; 2046570af302Sopenharmony_ci new_set[0].code_min = code_min; 2047570af302Sopenharmony_ci new_set[0].code_max = code_max; 2048570af302Sopenharmony_ci new_set[0].class = class; 2049570af302Sopenharmony_ci new_set[0].neg_classes = neg_classes; 2050570af302Sopenharmony_ci new_set[0].backref = backref; 2051570af302Sopenharmony_ci new_set[1].position = -1; 2052570af302Sopenharmony_ci new_set[1].code_min = -1; 2053570af302Sopenharmony_ci new_set[1].code_max = -1; 2054570af302Sopenharmony_ci 2055570af302Sopenharmony_ci return new_set; 2056570af302Sopenharmony_ci} 2057570af302Sopenharmony_ci 2058570af302Sopenharmony_cistatic tre_pos_and_tags_t * 2059570af302Sopenharmony_citre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, 2060570af302Sopenharmony_ci int *tags, int assertions) 2061570af302Sopenharmony_ci{ 2062570af302Sopenharmony_ci int s1, s2, i, j; 2063570af302Sopenharmony_ci tre_pos_and_tags_t *new_set; 2064570af302Sopenharmony_ci int *new_tags; 2065570af302Sopenharmony_ci int num_tags; 2066570af302Sopenharmony_ci 2067570af302Sopenharmony_ci for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); 2068570af302Sopenharmony_ci for (s1 = 0; set1[s1].position >= 0; s1++); 2069570af302Sopenharmony_ci for (s2 = 0; set2[s2].position >= 0; s2++); 2070570af302Sopenharmony_ci new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); 2071570af302Sopenharmony_ci if (!new_set ) 2072570af302Sopenharmony_ci return NULL; 2073570af302Sopenharmony_ci 2074570af302Sopenharmony_ci for (s1 = 0; set1[s1].position >= 0; s1++) 2075570af302Sopenharmony_ci { 2076570af302Sopenharmony_ci new_set[s1].position = set1[s1].position; 2077570af302Sopenharmony_ci new_set[s1].code_min = set1[s1].code_min; 2078570af302Sopenharmony_ci new_set[s1].code_max = set1[s1].code_max; 2079570af302Sopenharmony_ci new_set[s1].assertions = set1[s1].assertions | assertions; 2080570af302Sopenharmony_ci new_set[s1].class = set1[s1].class; 2081570af302Sopenharmony_ci new_set[s1].neg_classes = set1[s1].neg_classes; 2082570af302Sopenharmony_ci new_set[s1].backref = set1[s1].backref; 2083570af302Sopenharmony_ci if (set1[s1].tags == NULL && tags == NULL) 2084570af302Sopenharmony_ci new_set[s1].tags = NULL; 2085570af302Sopenharmony_ci else 2086570af302Sopenharmony_ci { 2087570af302Sopenharmony_ci for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); 2088570af302Sopenharmony_ci new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) 2089570af302Sopenharmony_ci * (i + num_tags + 1))); 2090570af302Sopenharmony_ci if (new_tags == NULL) 2091570af302Sopenharmony_ci return NULL; 2092570af302Sopenharmony_ci for (j = 0; j < i; j++) 2093570af302Sopenharmony_ci new_tags[j] = set1[s1].tags[j]; 2094570af302Sopenharmony_ci for (i = 0; i < num_tags; i++) 2095570af302Sopenharmony_ci new_tags[j + i] = tags[i]; 2096570af302Sopenharmony_ci new_tags[j + i] = -1; 2097570af302Sopenharmony_ci new_set[s1].tags = new_tags; 2098570af302Sopenharmony_ci } 2099570af302Sopenharmony_ci } 2100570af302Sopenharmony_ci 2101570af302Sopenharmony_ci for (s2 = 0; set2[s2].position >= 0; s2++) 2102570af302Sopenharmony_ci { 2103570af302Sopenharmony_ci new_set[s1 + s2].position = set2[s2].position; 2104570af302Sopenharmony_ci new_set[s1 + s2].code_min = set2[s2].code_min; 2105570af302Sopenharmony_ci new_set[s1 + s2].code_max = set2[s2].code_max; 2106570af302Sopenharmony_ci /* XXX - why not | assertions here as well? */ 2107570af302Sopenharmony_ci new_set[s1 + s2].assertions = set2[s2].assertions; 2108570af302Sopenharmony_ci new_set[s1 + s2].class = set2[s2].class; 2109570af302Sopenharmony_ci new_set[s1 + s2].neg_classes = set2[s2].neg_classes; 2110570af302Sopenharmony_ci new_set[s1 + s2].backref = set2[s2].backref; 2111570af302Sopenharmony_ci if (set2[s2].tags == NULL) 2112570af302Sopenharmony_ci new_set[s1 + s2].tags = NULL; 2113570af302Sopenharmony_ci else 2114570af302Sopenharmony_ci { 2115570af302Sopenharmony_ci for (i = 0; set2[s2].tags[i] >= 0; i++); 2116570af302Sopenharmony_ci new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); 2117570af302Sopenharmony_ci if (new_tags == NULL) 2118570af302Sopenharmony_ci return NULL; 2119570af302Sopenharmony_ci for (j = 0; j < i; j++) 2120570af302Sopenharmony_ci new_tags[j] = set2[s2].tags[j]; 2121570af302Sopenharmony_ci new_tags[j] = -1; 2122570af302Sopenharmony_ci new_set[s1 + s2].tags = new_tags; 2123570af302Sopenharmony_ci } 2124570af302Sopenharmony_ci } 2125570af302Sopenharmony_ci new_set[s1 + s2].position = -1; 2126570af302Sopenharmony_ci return new_set; 2127570af302Sopenharmony_ci} 2128570af302Sopenharmony_ci 2129570af302Sopenharmony_ci/* Finds the empty path through `node' which is the one that should be 2130570af302Sopenharmony_ci taken according to POSIX.2 rules, and adds the tags on that path to 2131570af302Sopenharmony_ci `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is 2132570af302Sopenharmony_ci set to the number of tags seen on the path. */ 2133570af302Sopenharmony_cistatic reg_errcode_t 2134570af302Sopenharmony_citre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, 2135570af302Sopenharmony_ci int *assertions, int *num_tags_seen) 2136570af302Sopenharmony_ci{ 2137570af302Sopenharmony_ci tre_literal_t *lit; 2138570af302Sopenharmony_ci tre_union_t *uni; 2139570af302Sopenharmony_ci tre_catenation_t *cat; 2140570af302Sopenharmony_ci tre_iteration_t *iter; 2141570af302Sopenharmony_ci int i; 2142570af302Sopenharmony_ci int bottom = tre_stack_num_objects(stack); 2143570af302Sopenharmony_ci reg_errcode_t status = REG_OK; 2144570af302Sopenharmony_ci if (num_tags_seen) 2145570af302Sopenharmony_ci *num_tags_seen = 0; 2146570af302Sopenharmony_ci 2147570af302Sopenharmony_ci status = tre_stack_push_voidptr(stack, node); 2148570af302Sopenharmony_ci 2149570af302Sopenharmony_ci /* Walk through the tree recursively. */ 2150570af302Sopenharmony_ci while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 2151570af302Sopenharmony_ci { 2152570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 2153570af302Sopenharmony_ci 2154570af302Sopenharmony_ci switch (node->type) 2155570af302Sopenharmony_ci { 2156570af302Sopenharmony_ci case LITERAL: 2157570af302Sopenharmony_ci lit = (tre_literal_t *)node->obj; 2158570af302Sopenharmony_ci switch (lit->code_min) 2159570af302Sopenharmony_ci { 2160570af302Sopenharmony_ci case TAG: 2161570af302Sopenharmony_ci if (lit->code_max >= 0) 2162570af302Sopenharmony_ci { 2163570af302Sopenharmony_ci if (tags != NULL) 2164570af302Sopenharmony_ci { 2165570af302Sopenharmony_ci /* Add the tag to `tags'. */ 2166570af302Sopenharmony_ci for (i = 0; tags[i] >= 0; i++) 2167570af302Sopenharmony_ci if (tags[i] == lit->code_max) 2168570af302Sopenharmony_ci break; 2169570af302Sopenharmony_ci if (tags[i] < 0) 2170570af302Sopenharmony_ci { 2171570af302Sopenharmony_ci tags[i] = lit->code_max; 2172570af302Sopenharmony_ci tags[i + 1] = -1; 2173570af302Sopenharmony_ci } 2174570af302Sopenharmony_ci } 2175570af302Sopenharmony_ci if (num_tags_seen) 2176570af302Sopenharmony_ci (*num_tags_seen)++; 2177570af302Sopenharmony_ci } 2178570af302Sopenharmony_ci break; 2179570af302Sopenharmony_ci case ASSERTION: 2180570af302Sopenharmony_ci assert(lit->code_max >= 1 2181570af302Sopenharmony_ci || lit->code_max <= ASSERT_LAST); 2182570af302Sopenharmony_ci if (assertions != NULL) 2183570af302Sopenharmony_ci *assertions |= lit->code_max; 2184570af302Sopenharmony_ci break; 2185570af302Sopenharmony_ci case EMPTY: 2186570af302Sopenharmony_ci break; 2187570af302Sopenharmony_ci default: 2188570af302Sopenharmony_ci assert(0); 2189570af302Sopenharmony_ci break; 2190570af302Sopenharmony_ci } 2191570af302Sopenharmony_ci break; 2192570af302Sopenharmony_ci 2193570af302Sopenharmony_ci case UNION: 2194570af302Sopenharmony_ci /* Subexpressions starting earlier take priority over ones 2195570af302Sopenharmony_ci starting later, so we prefer the left subexpression over the 2196570af302Sopenharmony_ci right subexpression. */ 2197570af302Sopenharmony_ci uni = (tre_union_t *)node->obj; 2198570af302Sopenharmony_ci if (uni->left->nullable) 2199570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->left) 2200570af302Sopenharmony_ci else if (uni->right->nullable) 2201570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, uni->right) 2202570af302Sopenharmony_ci else 2203570af302Sopenharmony_ci assert(0); 2204570af302Sopenharmony_ci break; 2205570af302Sopenharmony_ci 2206570af302Sopenharmony_ci case CATENATION: 2207570af302Sopenharmony_ci /* The path must go through both children. */ 2208570af302Sopenharmony_ci cat = (tre_catenation_t *)node->obj; 2209570af302Sopenharmony_ci assert(cat->left->nullable); 2210570af302Sopenharmony_ci assert(cat->right->nullable); 2211570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->left); 2212570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, cat->right); 2213570af302Sopenharmony_ci break; 2214570af302Sopenharmony_ci 2215570af302Sopenharmony_ci case ITERATION: 2216570af302Sopenharmony_ci /* A match with an empty string is preferred over no match at 2217570af302Sopenharmony_ci all, so we go through the argument if possible. */ 2218570af302Sopenharmony_ci iter = (tre_iteration_t *)node->obj; 2219570af302Sopenharmony_ci if (iter->arg->nullable) 2220570af302Sopenharmony_ci STACK_PUSHX(stack, voidptr, iter->arg); 2221570af302Sopenharmony_ci break; 2222570af302Sopenharmony_ci 2223570af302Sopenharmony_ci default: 2224570af302Sopenharmony_ci assert(0); 2225570af302Sopenharmony_ci break; 2226570af302Sopenharmony_ci } 2227570af302Sopenharmony_ci } 2228570af302Sopenharmony_ci 2229570af302Sopenharmony_ci return status; 2230570af302Sopenharmony_ci} 2231570af302Sopenharmony_ci 2232570af302Sopenharmony_ci 2233570af302Sopenharmony_citypedef enum { 2234570af302Sopenharmony_ci NFL_RECURSE, 2235570af302Sopenharmony_ci NFL_POST_UNION, 2236570af302Sopenharmony_ci NFL_POST_CATENATION, 2237570af302Sopenharmony_ci NFL_POST_ITERATION 2238570af302Sopenharmony_ci} tre_nfl_stack_symbol_t; 2239570af302Sopenharmony_ci 2240570af302Sopenharmony_ci 2241570af302Sopenharmony_ci/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for 2242570af302Sopenharmony_ci the nodes of the AST `tree'. */ 2243570af302Sopenharmony_cistatic reg_errcode_t 2244570af302Sopenharmony_citre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) 2245570af302Sopenharmony_ci{ 2246570af302Sopenharmony_ci int bottom = tre_stack_num_objects(stack); 2247570af302Sopenharmony_ci 2248570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, tree); 2249570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2250570af302Sopenharmony_ci 2251570af302Sopenharmony_ci while (tre_stack_num_objects(stack) > bottom) 2252570af302Sopenharmony_ci { 2253570af302Sopenharmony_ci tre_nfl_stack_symbol_t symbol; 2254570af302Sopenharmony_ci tre_ast_node_t *node; 2255570af302Sopenharmony_ci 2256570af302Sopenharmony_ci symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack); 2257570af302Sopenharmony_ci node = tre_stack_pop_voidptr(stack); 2258570af302Sopenharmony_ci switch (symbol) 2259570af302Sopenharmony_ci { 2260570af302Sopenharmony_ci case NFL_RECURSE: 2261570af302Sopenharmony_ci switch (node->type) 2262570af302Sopenharmony_ci { 2263570af302Sopenharmony_ci case LITERAL: 2264570af302Sopenharmony_ci { 2265570af302Sopenharmony_ci tre_literal_t *lit = (tre_literal_t *)node->obj; 2266570af302Sopenharmony_ci if (IS_BACKREF(lit)) 2267570af302Sopenharmony_ci { 2268570af302Sopenharmony_ci /* Back references: nullable = false, firstpos = {i}, 2269570af302Sopenharmony_ci lastpos = {i}. */ 2270570af302Sopenharmony_ci node->nullable = 0; 2271570af302Sopenharmony_ci node->firstpos = tre_set_one(mem, lit->position, 0, 2272570af302Sopenharmony_ci TRE_CHAR_MAX, 0, NULL, -1); 2273570af302Sopenharmony_ci if (!node->firstpos) 2274570af302Sopenharmony_ci return REG_ESPACE; 2275570af302Sopenharmony_ci node->lastpos = tre_set_one(mem, lit->position, 0, 2276570af302Sopenharmony_ci TRE_CHAR_MAX, 0, NULL, 2277570af302Sopenharmony_ci (int)lit->code_max); 2278570af302Sopenharmony_ci if (!node->lastpos) 2279570af302Sopenharmony_ci return REG_ESPACE; 2280570af302Sopenharmony_ci } 2281570af302Sopenharmony_ci else if (lit->code_min < 0) 2282570af302Sopenharmony_ci { 2283570af302Sopenharmony_ci /* Tags, empty strings, params, and zero width assertions: 2284570af302Sopenharmony_ci nullable = true, firstpos = {}, and lastpos = {}. */ 2285570af302Sopenharmony_ci node->nullable = 1; 2286570af302Sopenharmony_ci node->firstpos = tre_set_empty(mem); 2287570af302Sopenharmony_ci if (!node->firstpos) 2288570af302Sopenharmony_ci return REG_ESPACE; 2289570af302Sopenharmony_ci node->lastpos = tre_set_empty(mem); 2290570af302Sopenharmony_ci if (!node->lastpos) 2291570af302Sopenharmony_ci return REG_ESPACE; 2292570af302Sopenharmony_ci } 2293570af302Sopenharmony_ci else 2294570af302Sopenharmony_ci { 2295570af302Sopenharmony_ci /* Literal at position i: nullable = false, firstpos = {i}, 2296570af302Sopenharmony_ci lastpos = {i}. */ 2297570af302Sopenharmony_ci node->nullable = 0; 2298570af302Sopenharmony_ci node->firstpos = 2299570af302Sopenharmony_ci tre_set_one(mem, lit->position, (int)lit->code_min, 2300570af302Sopenharmony_ci (int)lit->code_max, 0, NULL, -1); 2301570af302Sopenharmony_ci if (!node->firstpos) 2302570af302Sopenharmony_ci return REG_ESPACE; 2303570af302Sopenharmony_ci node->lastpos = tre_set_one(mem, lit->position, 2304570af302Sopenharmony_ci (int)lit->code_min, 2305570af302Sopenharmony_ci (int)lit->code_max, 2306570af302Sopenharmony_ci lit->class, lit->neg_classes, 2307570af302Sopenharmony_ci -1); 2308570af302Sopenharmony_ci if (!node->lastpos) 2309570af302Sopenharmony_ci return REG_ESPACE; 2310570af302Sopenharmony_ci } 2311570af302Sopenharmony_ci break; 2312570af302Sopenharmony_ci } 2313570af302Sopenharmony_ci 2314570af302Sopenharmony_ci case UNION: 2315570af302Sopenharmony_ci /* Compute the attributes for the two subtrees, and after that 2316570af302Sopenharmony_ci for this node. */ 2317570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, node); 2318570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_POST_UNION); 2319570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); 2320570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2321570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); 2322570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2323570af302Sopenharmony_ci break; 2324570af302Sopenharmony_ci 2325570af302Sopenharmony_ci case CATENATION: 2326570af302Sopenharmony_ci /* Compute the attributes for the two subtrees, and after that 2327570af302Sopenharmony_ci for this node. */ 2328570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, node); 2329570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_POST_CATENATION); 2330570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right); 2331570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2332570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left); 2333570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2334570af302Sopenharmony_ci break; 2335570af302Sopenharmony_ci 2336570af302Sopenharmony_ci case ITERATION: 2337570af302Sopenharmony_ci /* Compute the attributes for the subtree, and after that for 2338570af302Sopenharmony_ci this node. */ 2339570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, node); 2340570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_POST_ITERATION); 2341570af302Sopenharmony_ci STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); 2342570af302Sopenharmony_ci STACK_PUSHR(stack, int, NFL_RECURSE); 2343570af302Sopenharmony_ci break; 2344570af302Sopenharmony_ci } 2345570af302Sopenharmony_ci break; /* end case: NFL_RECURSE */ 2346570af302Sopenharmony_ci 2347570af302Sopenharmony_ci case NFL_POST_UNION: 2348570af302Sopenharmony_ci { 2349570af302Sopenharmony_ci tre_union_t *uni = (tre_union_t *)node->obj; 2350570af302Sopenharmony_ci node->nullable = uni->left->nullable || uni->right->nullable; 2351570af302Sopenharmony_ci node->firstpos = tre_set_union(mem, uni->left->firstpos, 2352570af302Sopenharmony_ci uni->right->firstpos, NULL, 0); 2353570af302Sopenharmony_ci if (!node->firstpos) 2354570af302Sopenharmony_ci return REG_ESPACE; 2355570af302Sopenharmony_ci node->lastpos = tre_set_union(mem, uni->left->lastpos, 2356570af302Sopenharmony_ci uni->right->lastpos, NULL, 0); 2357570af302Sopenharmony_ci if (!node->lastpos) 2358570af302Sopenharmony_ci return REG_ESPACE; 2359570af302Sopenharmony_ci break; 2360570af302Sopenharmony_ci } 2361570af302Sopenharmony_ci 2362570af302Sopenharmony_ci case NFL_POST_ITERATION: 2363570af302Sopenharmony_ci { 2364570af302Sopenharmony_ci tre_iteration_t *iter = (tre_iteration_t *)node->obj; 2365570af302Sopenharmony_ci 2366570af302Sopenharmony_ci if (iter->min == 0 || iter->arg->nullable) 2367570af302Sopenharmony_ci node->nullable = 1; 2368570af302Sopenharmony_ci else 2369570af302Sopenharmony_ci node->nullable = 0; 2370570af302Sopenharmony_ci node->firstpos = iter->arg->firstpos; 2371570af302Sopenharmony_ci node->lastpos = iter->arg->lastpos; 2372570af302Sopenharmony_ci break; 2373570af302Sopenharmony_ci } 2374570af302Sopenharmony_ci 2375570af302Sopenharmony_ci case NFL_POST_CATENATION: 2376570af302Sopenharmony_ci { 2377570af302Sopenharmony_ci int num_tags, *tags, assertions; 2378570af302Sopenharmony_ci reg_errcode_t status; 2379570af302Sopenharmony_ci tre_catenation_t *cat = node->obj; 2380570af302Sopenharmony_ci node->nullable = cat->left->nullable && cat->right->nullable; 2381570af302Sopenharmony_ci 2382570af302Sopenharmony_ci /* Compute firstpos. */ 2383570af302Sopenharmony_ci if (cat->left->nullable) 2384570af302Sopenharmony_ci { 2385570af302Sopenharmony_ci /* The left side matches the empty string. Make a first pass 2386570af302Sopenharmony_ci with tre_match_empty() to get the number of tags and 2387570af302Sopenharmony_ci parameters. */ 2388570af302Sopenharmony_ci status = tre_match_empty(stack, cat->left, 2389570af302Sopenharmony_ci NULL, NULL, &num_tags); 2390570af302Sopenharmony_ci if (status != REG_OK) 2391570af302Sopenharmony_ci return status; 2392570af302Sopenharmony_ci /* Allocate arrays for the tags and parameters. */ 2393570af302Sopenharmony_ci tags = xmalloc(sizeof(*tags) * (num_tags + 1)); 2394570af302Sopenharmony_ci if (!tags) 2395570af302Sopenharmony_ci return REG_ESPACE; 2396570af302Sopenharmony_ci tags[0] = -1; 2397570af302Sopenharmony_ci assertions = 0; 2398570af302Sopenharmony_ci /* Second pass with tre_mach_empty() to get the list of 2399570af302Sopenharmony_ci tags and parameters. */ 2400570af302Sopenharmony_ci status = tre_match_empty(stack, cat->left, tags, 2401570af302Sopenharmony_ci &assertions, NULL); 2402570af302Sopenharmony_ci if (status != REG_OK) 2403570af302Sopenharmony_ci { 2404570af302Sopenharmony_ci xfree(tags); 2405570af302Sopenharmony_ci return status; 2406570af302Sopenharmony_ci } 2407570af302Sopenharmony_ci node->firstpos = 2408570af302Sopenharmony_ci tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, 2409570af302Sopenharmony_ci tags, assertions); 2410570af302Sopenharmony_ci xfree(tags); 2411570af302Sopenharmony_ci if (!node->firstpos) 2412570af302Sopenharmony_ci return REG_ESPACE; 2413570af302Sopenharmony_ci } 2414570af302Sopenharmony_ci else 2415570af302Sopenharmony_ci { 2416570af302Sopenharmony_ci node->firstpos = cat->left->firstpos; 2417570af302Sopenharmony_ci } 2418570af302Sopenharmony_ci 2419570af302Sopenharmony_ci /* Compute lastpos. */ 2420570af302Sopenharmony_ci if (cat->right->nullable) 2421570af302Sopenharmony_ci { 2422570af302Sopenharmony_ci /* The right side matches the empty string. Make a first pass 2423570af302Sopenharmony_ci with tre_match_empty() to get the number of tags and 2424570af302Sopenharmony_ci parameters. */ 2425570af302Sopenharmony_ci status = tre_match_empty(stack, cat->right, 2426570af302Sopenharmony_ci NULL, NULL, &num_tags); 2427570af302Sopenharmony_ci if (status != REG_OK) 2428570af302Sopenharmony_ci return status; 2429570af302Sopenharmony_ci /* Allocate arrays for the tags and parameters. */ 2430570af302Sopenharmony_ci tags = xmalloc(sizeof(int) * (num_tags + 1)); 2431570af302Sopenharmony_ci if (!tags) 2432570af302Sopenharmony_ci return REG_ESPACE; 2433570af302Sopenharmony_ci tags[0] = -1; 2434570af302Sopenharmony_ci assertions = 0; 2435570af302Sopenharmony_ci /* Second pass with tre_mach_empty() to get the list of 2436570af302Sopenharmony_ci tags and parameters. */ 2437570af302Sopenharmony_ci status = tre_match_empty(stack, cat->right, tags, 2438570af302Sopenharmony_ci &assertions, NULL); 2439570af302Sopenharmony_ci if (status != REG_OK) 2440570af302Sopenharmony_ci { 2441570af302Sopenharmony_ci xfree(tags); 2442570af302Sopenharmony_ci return status; 2443570af302Sopenharmony_ci } 2444570af302Sopenharmony_ci node->lastpos = 2445570af302Sopenharmony_ci tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, 2446570af302Sopenharmony_ci tags, assertions); 2447570af302Sopenharmony_ci xfree(tags); 2448570af302Sopenharmony_ci if (!node->lastpos) 2449570af302Sopenharmony_ci return REG_ESPACE; 2450570af302Sopenharmony_ci } 2451570af302Sopenharmony_ci else 2452570af302Sopenharmony_ci { 2453570af302Sopenharmony_ci node->lastpos = cat->right->lastpos; 2454570af302Sopenharmony_ci } 2455570af302Sopenharmony_ci break; 2456570af302Sopenharmony_ci } 2457570af302Sopenharmony_ci 2458570af302Sopenharmony_ci default: 2459570af302Sopenharmony_ci assert(0); 2460570af302Sopenharmony_ci break; 2461570af302Sopenharmony_ci } 2462570af302Sopenharmony_ci } 2463570af302Sopenharmony_ci 2464570af302Sopenharmony_ci return REG_OK; 2465570af302Sopenharmony_ci} 2466570af302Sopenharmony_ci 2467570af302Sopenharmony_ci 2468570af302Sopenharmony_ci/* Adds a transition from each position in `p1' to each position in `p2'. */ 2469570af302Sopenharmony_cistatic reg_errcode_t 2470570af302Sopenharmony_citre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, 2471570af302Sopenharmony_ci tre_tnfa_transition_t *transitions, 2472570af302Sopenharmony_ci int *counts, int *offs) 2473570af302Sopenharmony_ci{ 2474570af302Sopenharmony_ci tre_pos_and_tags_t *orig_p2 = p2; 2475570af302Sopenharmony_ci tre_tnfa_transition_t *trans; 2476570af302Sopenharmony_ci int i, j, k, l, dup, prev_p2_pos; 2477570af302Sopenharmony_ci 2478570af302Sopenharmony_ci if (transitions != NULL) 2479570af302Sopenharmony_ci while (p1->position >= 0) 2480570af302Sopenharmony_ci { 2481570af302Sopenharmony_ci p2 = orig_p2; 2482570af302Sopenharmony_ci prev_p2_pos = -1; 2483570af302Sopenharmony_ci while (p2->position >= 0) 2484570af302Sopenharmony_ci { 2485570af302Sopenharmony_ci /* Optimization: if this position was already handled, skip it. */ 2486570af302Sopenharmony_ci if (p2->position == prev_p2_pos) 2487570af302Sopenharmony_ci { 2488570af302Sopenharmony_ci p2++; 2489570af302Sopenharmony_ci continue; 2490570af302Sopenharmony_ci } 2491570af302Sopenharmony_ci prev_p2_pos = p2->position; 2492570af302Sopenharmony_ci /* Set `trans' to point to the next unused transition from 2493570af302Sopenharmony_ci position `p1->position'. */ 2494570af302Sopenharmony_ci trans = transitions + offs[p1->position]; 2495570af302Sopenharmony_ci while (trans->state != NULL) 2496570af302Sopenharmony_ci { 2497570af302Sopenharmony_ci#if 0 2498570af302Sopenharmony_ci /* If we find a previous transition from `p1->position' to 2499570af302Sopenharmony_ci `p2->position', it is overwritten. This can happen only 2500570af302Sopenharmony_ci if there are nested loops in the regexp, like in "((a)*)*". 2501570af302Sopenharmony_ci In POSIX.2 repetition using the outer loop is always 2502570af302Sopenharmony_ci preferred over using the inner loop. Therefore the 2503570af302Sopenharmony_ci transition for the inner loop is useless and can be thrown 2504570af302Sopenharmony_ci away. */ 2505570af302Sopenharmony_ci /* XXX - The same position is used for all nodes in a bracket 2506570af302Sopenharmony_ci expression, so this optimization cannot be used (it will 2507570af302Sopenharmony_ci break bracket expressions) unless I figure out a way to 2508570af302Sopenharmony_ci detect it here. */ 2509570af302Sopenharmony_ci if (trans->state_id == p2->position) 2510570af302Sopenharmony_ci { 2511570af302Sopenharmony_ci break; 2512570af302Sopenharmony_ci } 2513570af302Sopenharmony_ci#endif 2514570af302Sopenharmony_ci trans++; 2515570af302Sopenharmony_ci } 2516570af302Sopenharmony_ci 2517570af302Sopenharmony_ci if (trans->state == NULL) 2518570af302Sopenharmony_ci (trans + 1)->state = NULL; 2519570af302Sopenharmony_ci /* Use the character ranges, assertions, etc. from `p1' for 2520570af302Sopenharmony_ci the transition from `p1' to `p2'. */ 2521570af302Sopenharmony_ci trans->code_min = p1->code_min; 2522570af302Sopenharmony_ci trans->code_max = p1->code_max; 2523570af302Sopenharmony_ci trans->state = transitions + offs[p2->position]; 2524570af302Sopenharmony_ci trans->state_id = p2->position; 2525570af302Sopenharmony_ci trans->assertions = p1->assertions | p2->assertions 2526570af302Sopenharmony_ci | (p1->class ? ASSERT_CHAR_CLASS : 0) 2527570af302Sopenharmony_ci | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); 2528570af302Sopenharmony_ci if (p1->backref >= 0) 2529570af302Sopenharmony_ci { 2530570af302Sopenharmony_ci assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); 2531570af302Sopenharmony_ci assert(p2->backref < 0); 2532570af302Sopenharmony_ci trans->u.backref = p1->backref; 2533570af302Sopenharmony_ci trans->assertions |= ASSERT_BACKREF; 2534570af302Sopenharmony_ci } 2535570af302Sopenharmony_ci else 2536570af302Sopenharmony_ci trans->u.class = p1->class; 2537570af302Sopenharmony_ci if (p1->neg_classes != NULL) 2538570af302Sopenharmony_ci { 2539570af302Sopenharmony_ci for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); 2540570af302Sopenharmony_ci trans->neg_classes = 2541570af302Sopenharmony_ci xmalloc(sizeof(*trans->neg_classes) * (i + 1)); 2542570af302Sopenharmony_ci if (trans->neg_classes == NULL) 2543570af302Sopenharmony_ci return REG_ESPACE; 2544570af302Sopenharmony_ci for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) 2545570af302Sopenharmony_ci trans->neg_classes[i] = p1->neg_classes[i]; 2546570af302Sopenharmony_ci trans->neg_classes[i] = (tre_ctype_t)0; 2547570af302Sopenharmony_ci } 2548570af302Sopenharmony_ci else 2549570af302Sopenharmony_ci trans->neg_classes = NULL; 2550570af302Sopenharmony_ci 2551570af302Sopenharmony_ci /* Find out how many tags this transition has. */ 2552570af302Sopenharmony_ci i = 0; 2553570af302Sopenharmony_ci if (p1->tags != NULL) 2554570af302Sopenharmony_ci while(p1->tags[i] >= 0) 2555570af302Sopenharmony_ci i++; 2556570af302Sopenharmony_ci j = 0; 2557570af302Sopenharmony_ci if (p2->tags != NULL) 2558570af302Sopenharmony_ci while(p2->tags[j] >= 0) 2559570af302Sopenharmony_ci j++; 2560570af302Sopenharmony_ci 2561570af302Sopenharmony_ci /* If we are overwriting a transition, free the old tag array. */ 2562570af302Sopenharmony_ci if (trans->tags != NULL) 2563570af302Sopenharmony_ci xfree(trans->tags); 2564570af302Sopenharmony_ci trans->tags = NULL; 2565570af302Sopenharmony_ci 2566570af302Sopenharmony_ci /* If there were any tags, allocate an array and fill it. */ 2567570af302Sopenharmony_ci if (i + j > 0) 2568570af302Sopenharmony_ci { 2569570af302Sopenharmony_ci trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); 2570570af302Sopenharmony_ci if (!trans->tags) 2571570af302Sopenharmony_ci return REG_ESPACE; 2572570af302Sopenharmony_ci i = 0; 2573570af302Sopenharmony_ci if (p1->tags != NULL) 2574570af302Sopenharmony_ci while(p1->tags[i] >= 0) 2575570af302Sopenharmony_ci { 2576570af302Sopenharmony_ci trans->tags[i] = p1->tags[i]; 2577570af302Sopenharmony_ci i++; 2578570af302Sopenharmony_ci } 2579570af302Sopenharmony_ci l = i; 2580570af302Sopenharmony_ci j = 0; 2581570af302Sopenharmony_ci if (p2->tags != NULL) 2582570af302Sopenharmony_ci while (p2->tags[j] >= 0) 2583570af302Sopenharmony_ci { 2584570af302Sopenharmony_ci /* Don't add duplicates. */ 2585570af302Sopenharmony_ci dup = 0; 2586570af302Sopenharmony_ci for (k = 0; k < i; k++) 2587570af302Sopenharmony_ci if (trans->tags[k] == p2->tags[j]) 2588570af302Sopenharmony_ci { 2589570af302Sopenharmony_ci dup = 1; 2590570af302Sopenharmony_ci break; 2591570af302Sopenharmony_ci } 2592570af302Sopenharmony_ci if (!dup) 2593570af302Sopenharmony_ci trans->tags[l++] = p2->tags[j]; 2594570af302Sopenharmony_ci j++; 2595570af302Sopenharmony_ci } 2596570af302Sopenharmony_ci trans->tags[l] = -1; 2597570af302Sopenharmony_ci } 2598570af302Sopenharmony_ci 2599570af302Sopenharmony_ci p2++; 2600570af302Sopenharmony_ci } 2601570af302Sopenharmony_ci p1++; 2602570af302Sopenharmony_ci } 2603570af302Sopenharmony_ci else 2604570af302Sopenharmony_ci /* Compute a maximum limit for the number of transitions leaving 2605570af302Sopenharmony_ci from each state. */ 2606570af302Sopenharmony_ci while (p1->position >= 0) 2607570af302Sopenharmony_ci { 2608570af302Sopenharmony_ci p2 = orig_p2; 2609570af302Sopenharmony_ci while (p2->position >= 0) 2610570af302Sopenharmony_ci { 2611570af302Sopenharmony_ci counts[p1->position]++; 2612570af302Sopenharmony_ci p2++; 2613570af302Sopenharmony_ci } 2614570af302Sopenharmony_ci p1++; 2615570af302Sopenharmony_ci } 2616570af302Sopenharmony_ci return REG_OK; 2617570af302Sopenharmony_ci} 2618570af302Sopenharmony_ci 2619570af302Sopenharmony_ci/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are 2620570af302Sopenharmony_ci labelled with one character range (there are no transitions on empty 2621570af302Sopenharmony_ci strings). The TNFA takes O(n^2) space in the worst case, `n' is size of 2622570af302Sopenharmony_ci the regexp. */ 2623570af302Sopenharmony_cistatic reg_errcode_t 2624570af302Sopenharmony_citre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, 2625570af302Sopenharmony_ci int *counts, int *offs) 2626570af302Sopenharmony_ci{ 2627570af302Sopenharmony_ci tre_union_t *uni; 2628570af302Sopenharmony_ci tre_catenation_t *cat; 2629570af302Sopenharmony_ci tre_iteration_t *iter; 2630570af302Sopenharmony_ci reg_errcode_t errcode = REG_OK; 2631570af302Sopenharmony_ci 2632570af302Sopenharmony_ci /* XXX - recurse using a stack!. */ 2633570af302Sopenharmony_ci switch (node->type) 2634570af302Sopenharmony_ci { 2635570af302Sopenharmony_ci case LITERAL: 2636570af302Sopenharmony_ci break; 2637570af302Sopenharmony_ci case UNION: 2638570af302Sopenharmony_ci uni = (tre_union_t *)node->obj; 2639570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); 2640570af302Sopenharmony_ci if (errcode != REG_OK) 2641570af302Sopenharmony_ci return errcode; 2642570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); 2643570af302Sopenharmony_ci break; 2644570af302Sopenharmony_ci 2645570af302Sopenharmony_ci case CATENATION: 2646570af302Sopenharmony_ci cat = (tre_catenation_t *)node->obj; 2647570af302Sopenharmony_ci /* Add a transition from each position in cat->left->lastpos 2648570af302Sopenharmony_ci to each position in cat->right->firstpos. */ 2649570af302Sopenharmony_ci errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, 2650570af302Sopenharmony_ci transitions, counts, offs); 2651570af302Sopenharmony_ci if (errcode != REG_OK) 2652570af302Sopenharmony_ci return errcode; 2653570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); 2654570af302Sopenharmony_ci if (errcode != REG_OK) 2655570af302Sopenharmony_ci return errcode; 2656570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); 2657570af302Sopenharmony_ci break; 2658570af302Sopenharmony_ci 2659570af302Sopenharmony_ci case ITERATION: 2660570af302Sopenharmony_ci iter = (tre_iteration_t *)node->obj; 2661570af302Sopenharmony_ci assert(iter->max == -1 || iter->max == 1); 2662570af302Sopenharmony_ci 2663570af302Sopenharmony_ci if (iter->max == -1) 2664570af302Sopenharmony_ci { 2665570af302Sopenharmony_ci assert(iter->min == 0 || iter->min == 1); 2666570af302Sopenharmony_ci /* Add a transition from each last position in the iterated 2667570af302Sopenharmony_ci expression to each first position. */ 2668570af302Sopenharmony_ci errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, 2669570af302Sopenharmony_ci transitions, counts, offs); 2670570af302Sopenharmony_ci if (errcode != REG_OK) 2671570af302Sopenharmony_ci return errcode; 2672570af302Sopenharmony_ci } 2673570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); 2674570af302Sopenharmony_ci break; 2675570af302Sopenharmony_ci } 2676570af302Sopenharmony_ci return errcode; 2677570af302Sopenharmony_ci} 2678570af302Sopenharmony_ci 2679570af302Sopenharmony_ci 2680570af302Sopenharmony_ci#define ERROR_EXIT(err) \ 2681570af302Sopenharmony_ci do \ 2682570af302Sopenharmony_ci { \ 2683570af302Sopenharmony_ci errcode = err; \ 2684570af302Sopenharmony_ci if (/*CONSTCOND*/1) \ 2685570af302Sopenharmony_ci goto error_exit; \ 2686570af302Sopenharmony_ci } \ 2687570af302Sopenharmony_ci while (/*CONSTCOND*/0) 2688570af302Sopenharmony_ci 2689570af302Sopenharmony_ci 2690570af302Sopenharmony_ciint 2691570af302Sopenharmony_ciregcomp(regex_t *restrict preg, const char *restrict regex, int cflags) 2692570af302Sopenharmony_ci{ 2693570af302Sopenharmony_ci tre_stack_t *stack; 2694570af302Sopenharmony_ci tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; 2695570af302Sopenharmony_ci tre_pos_and_tags_t *p; 2696570af302Sopenharmony_ci int *counts = NULL, *offs = NULL; 2697570af302Sopenharmony_ci int i, add = 0; 2698570af302Sopenharmony_ci tre_tnfa_transition_t *transitions, *initial; 2699570af302Sopenharmony_ci tre_tnfa_t *tnfa = NULL; 2700570af302Sopenharmony_ci tre_submatch_data_t *submatch_data; 2701570af302Sopenharmony_ci tre_tag_direction_t *tag_directions = NULL; 2702570af302Sopenharmony_ci reg_errcode_t errcode; 2703570af302Sopenharmony_ci tre_mem_t mem; 2704570af302Sopenharmony_ci 2705570af302Sopenharmony_ci /* Parse context. */ 2706570af302Sopenharmony_ci tre_parse_ctx_t parse_ctx; 2707570af302Sopenharmony_ci 2708570af302Sopenharmony_ci /* Allocate a stack used throughout the compilation process for various 2709570af302Sopenharmony_ci purposes. */ 2710570af302Sopenharmony_ci stack = tre_stack_new(512, 1024000, 128); 2711570af302Sopenharmony_ci if (!stack) 2712570af302Sopenharmony_ci return REG_ESPACE; 2713570af302Sopenharmony_ci /* Allocate a fast memory allocator. */ 2714570af302Sopenharmony_ci mem = tre_mem_new(); 2715570af302Sopenharmony_ci if (!mem) 2716570af302Sopenharmony_ci { 2717570af302Sopenharmony_ci tre_stack_destroy(stack); 2718570af302Sopenharmony_ci return REG_ESPACE; 2719570af302Sopenharmony_ci } 2720570af302Sopenharmony_ci 2721570af302Sopenharmony_ci /* Parse the regexp. */ 2722570af302Sopenharmony_ci memset(&parse_ctx, 0, sizeof(parse_ctx)); 2723570af302Sopenharmony_ci parse_ctx.mem = mem; 2724570af302Sopenharmony_ci parse_ctx.stack = stack; 2725570af302Sopenharmony_ci parse_ctx.start = regex; 2726570af302Sopenharmony_ci parse_ctx.cflags = cflags; 2727570af302Sopenharmony_ci parse_ctx.max_backref = -1; 2728570af302Sopenharmony_ci errcode = tre_parse(&parse_ctx); 2729570af302Sopenharmony_ci if (errcode != REG_OK) 2730570af302Sopenharmony_ci ERROR_EXIT(errcode); 2731570af302Sopenharmony_ci preg->re_nsub = parse_ctx.submatch_id - 1; 2732570af302Sopenharmony_ci tree = parse_ctx.n; 2733570af302Sopenharmony_ci 2734570af302Sopenharmony_ci#ifdef TRE_DEBUG 2735570af302Sopenharmony_ci tre_ast_print(tree); 2736570af302Sopenharmony_ci#endif /* TRE_DEBUG */ 2737570af302Sopenharmony_ci 2738570af302Sopenharmony_ci /* Referring to nonexistent subexpressions is illegal. */ 2739570af302Sopenharmony_ci if (parse_ctx.max_backref > (int)preg->re_nsub) 2740570af302Sopenharmony_ci ERROR_EXIT(REG_ESUBREG); 2741570af302Sopenharmony_ci 2742570af302Sopenharmony_ci /* Allocate the TNFA struct. */ 2743570af302Sopenharmony_ci tnfa = xcalloc(1, sizeof(tre_tnfa_t)); 2744570af302Sopenharmony_ci if (tnfa == NULL) 2745570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2746570af302Sopenharmony_ci tnfa->have_backrefs = parse_ctx.max_backref >= 0; 2747570af302Sopenharmony_ci tnfa->have_approx = 0; 2748570af302Sopenharmony_ci tnfa->num_submatches = parse_ctx.submatch_id; 2749570af302Sopenharmony_ci 2750570af302Sopenharmony_ci /* Set up tags for submatch addressing. If REG_NOSUB is set and the 2751570af302Sopenharmony_ci regexp does not have back references, this can be skipped. */ 2752570af302Sopenharmony_ci if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) 2753570af302Sopenharmony_ci { 2754570af302Sopenharmony_ci 2755570af302Sopenharmony_ci /* Figure out how many tags we will need. */ 2756570af302Sopenharmony_ci errcode = tre_add_tags(NULL, stack, tree, tnfa); 2757570af302Sopenharmony_ci if (errcode != REG_OK) 2758570af302Sopenharmony_ci ERROR_EXIT(errcode); 2759570af302Sopenharmony_ci 2760570af302Sopenharmony_ci if (tnfa->num_tags > 0) 2761570af302Sopenharmony_ci { 2762570af302Sopenharmony_ci tag_directions = xmalloc(sizeof(*tag_directions) 2763570af302Sopenharmony_ci * (tnfa->num_tags + 1)); 2764570af302Sopenharmony_ci if (tag_directions == NULL) 2765570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2766570af302Sopenharmony_ci tnfa->tag_directions = tag_directions; 2767570af302Sopenharmony_ci memset(tag_directions, -1, 2768570af302Sopenharmony_ci sizeof(*tag_directions) * (tnfa->num_tags + 1)); 2769570af302Sopenharmony_ci } 2770570af302Sopenharmony_ci tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1, 2771570af302Sopenharmony_ci sizeof(*tnfa->minimal_tags)); 2772570af302Sopenharmony_ci if (tnfa->minimal_tags == NULL) 2773570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2774570af302Sopenharmony_ci 2775570af302Sopenharmony_ci submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, 2776570af302Sopenharmony_ci sizeof(*submatch_data)); 2777570af302Sopenharmony_ci if (submatch_data == NULL) 2778570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2779570af302Sopenharmony_ci tnfa->submatch_data = submatch_data; 2780570af302Sopenharmony_ci 2781570af302Sopenharmony_ci errcode = tre_add_tags(mem, stack, tree, tnfa); 2782570af302Sopenharmony_ci if (errcode != REG_OK) 2783570af302Sopenharmony_ci ERROR_EXIT(errcode); 2784570af302Sopenharmony_ci 2785570af302Sopenharmony_ci } 2786570af302Sopenharmony_ci 2787570af302Sopenharmony_ci /* Expand iteration nodes. */ 2788570af302Sopenharmony_ci errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, 2789570af302Sopenharmony_ci tag_directions); 2790570af302Sopenharmony_ci if (errcode != REG_OK) 2791570af302Sopenharmony_ci ERROR_EXIT(errcode); 2792570af302Sopenharmony_ci 2793570af302Sopenharmony_ci /* Add a dummy node for the final state. 2794570af302Sopenharmony_ci XXX - For certain patterns this dummy node can be optimized away, 2795570af302Sopenharmony_ci for example "a*" or "ab*". Figure out a simple way to detect 2796570af302Sopenharmony_ci this possibility. */ 2797570af302Sopenharmony_ci tmp_ast_l = tree; 2798570af302Sopenharmony_ci tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); 2799570af302Sopenharmony_ci if (tmp_ast_r == NULL) 2800570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2801570af302Sopenharmony_ci 2802570af302Sopenharmony_ci tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); 2803570af302Sopenharmony_ci if (tree == NULL) 2804570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2805570af302Sopenharmony_ci 2806570af302Sopenharmony_ci errcode = tre_compute_nfl(mem, stack, tree); 2807570af302Sopenharmony_ci if (errcode != REG_OK) 2808570af302Sopenharmony_ci ERROR_EXIT(errcode); 2809570af302Sopenharmony_ci 2810570af302Sopenharmony_ci counts = xmalloc(sizeof(int) * parse_ctx.position); 2811570af302Sopenharmony_ci if (counts == NULL) 2812570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2813570af302Sopenharmony_ci 2814570af302Sopenharmony_ci offs = xmalloc(sizeof(int) * parse_ctx.position); 2815570af302Sopenharmony_ci if (offs == NULL) 2816570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2817570af302Sopenharmony_ci 2818570af302Sopenharmony_ci for (i = 0; i < parse_ctx.position; i++) 2819570af302Sopenharmony_ci counts[i] = 0; 2820570af302Sopenharmony_ci tre_ast_to_tnfa(tree, NULL, counts, NULL); 2821570af302Sopenharmony_ci 2822570af302Sopenharmony_ci add = 0; 2823570af302Sopenharmony_ci for (i = 0; i < parse_ctx.position; i++) 2824570af302Sopenharmony_ci { 2825570af302Sopenharmony_ci offs[i] = add; 2826570af302Sopenharmony_ci add += counts[i] + 1; 2827570af302Sopenharmony_ci counts[i] = 0; 2828570af302Sopenharmony_ci } 2829570af302Sopenharmony_ci transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); 2830570af302Sopenharmony_ci if (transitions == NULL) 2831570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2832570af302Sopenharmony_ci tnfa->transitions = transitions; 2833570af302Sopenharmony_ci tnfa->num_transitions = add; 2834570af302Sopenharmony_ci 2835570af302Sopenharmony_ci errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); 2836570af302Sopenharmony_ci if (errcode != REG_OK) 2837570af302Sopenharmony_ci ERROR_EXIT(errcode); 2838570af302Sopenharmony_ci 2839570af302Sopenharmony_ci tnfa->firstpos_chars = NULL; 2840570af302Sopenharmony_ci 2841570af302Sopenharmony_ci p = tree->firstpos; 2842570af302Sopenharmony_ci i = 0; 2843570af302Sopenharmony_ci while (p->position >= 0) 2844570af302Sopenharmony_ci { 2845570af302Sopenharmony_ci i++; 2846570af302Sopenharmony_ci p++; 2847570af302Sopenharmony_ci } 2848570af302Sopenharmony_ci 2849570af302Sopenharmony_ci initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); 2850570af302Sopenharmony_ci if (initial == NULL) 2851570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2852570af302Sopenharmony_ci tnfa->initial = initial; 2853570af302Sopenharmony_ci 2854570af302Sopenharmony_ci i = 0; 2855570af302Sopenharmony_ci for (p = tree->firstpos; p->position >= 0; p++) 2856570af302Sopenharmony_ci { 2857570af302Sopenharmony_ci initial[i].state = transitions + offs[p->position]; 2858570af302Sopenharmony_ci initial[i].state_id = p->position; 2859570af302Sopenharmony_ci initial[i].tags = NULL; 2860570af302Sopenharmony_ci /* Copy the arrays p->tags, and p->params, they are allocated 2861570af302Sopenharmony_ci from a tre_mem object. */ 2862570af302Sopenharmony_ci if (p->tags) 2863570af302Sopenharmony_ci { 2864570af302Sopenharmony_ci int j; 2865570af302Sopenharmony_ci for (j = 0; p->tags[j] >= 0; j++); 2866570af302Sopenharmony_ci initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); 2867570af302Sopenharmony_ci if (!initial[i].tags) 2868570af302Sopenharmony_ci ERROR_EXIT(REG_ESPACE); 2869570af302Sopenharmony_ci memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); 2870570af302Sopenharmony_ci } 2871570af302Sopenharmony_ci initial[i].assertions = p->assertions; 2872570af302Sopenharmony_ci i++; 2873570af302Sopenharmony_ci } 2874570af302Sopenharmony_ci initial[i].state = NULL; 2875570af302Sopenharmony_ci 2876570af302Sopenharmony_ci tnfa->num_transitions = add; 2877570af302Sopenharmony_ci tnfa->final = transitions + offs[tree->lastpos[0].position]; 2878570af302Sopenharmony_ci tnfa->num_states = parse_ctx.position; 2879570af302Sopenharmony_ci tnfa->cflags = cflags; 2880570af302Sopenharmony_ci 2881570af302Sopenharmony_ci tre_mem_destroy(mem); 2882570af302Sopenharmony_ci tre_stack_destroy(stack); 2883570af302Sopenharmony_ci xfree(counts); 2884570af302Sopenharmony_ci xfree(offs); 2885570af302Sopenharmony_ci 2886570af302Sopenharmony_ci preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2887570af302Sopenharmony_ci return REG_OK; 2888570af302Sopenharmony_ci 2889570af302Sopenharmony_ci error_exit: 2890570af302Sopenharmony_ci /* Free everything that was allocated and return the error code. */ 2891570af302Sopenharmony_ci tre_mem_destroy(mem); 2892570af302Sopenharmony_ci if (stack != NULL) 2893570af302Sopenharmony_ci tre_stack_destroy(stack); 2894570af302Sopenharmony_ci if (counts != NULL) 2895570af302Sopenharmony_ci xfree(counts); 2896570af302Sopenharmony_ci if (offs != NULL) 2897570af302Sopenharmony_ci xfree(offs); 2898570af302Sopenharmony_ci preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2899570af302Sopenharmony_ci regfree(preg); 2900570af302Sopenharmony_ci return errcode; 2901570af302Sopenharmony_ci} 2902570af302Sopenharmony_ci 2903570af302Sopenharmony_ci 2904570af302Sopenharmony_ci 2905570af302Sopenharmony_ci 2906570af302Sopenharmony_civoid 2907570af302Sopenharmony_ciregfree(regex_t *preg) 2908570af302Sopenharmony_ci{ 2909570af302Sopenharmony_ci tre_tnfa_t *tnfa; 2910570af302Sopenharmony_ci unsigned int i; 2911570af302Sopenharmony_ci tre_tnfa_transition_t *trans; 2912570af302Sopenharmony_ci 2913570af302Sopenharmony_ci tnfa = (void *)preg->TRE_REGEX_T_FIELD; 2914570af302Sopenharmony_ci if (!tnfa) 2915570af302Sopenharmony_ci return; 2916570af302Sopenharmony_ci 2917570af302Sopenharmony_ci for (i = 0; i < tnfa->num_transitions; i++) 2918570af302Sopenharmony_ci if (tnfa->transitions[i].state) 2919570af302Sopenharmony_ci { 2920570af302Sopenharmony_ci if (tnfa->transitions[i].tags) 2921570af302Sopenharmony_ci xfree(tnfa->transitions[i].tags); 2922570af302Sopenharmony_ci if (tnfa->transitions[i].neg_classes) 2923570af302Sopenharmony_ci xfree(tnfa->transitions[i].neg_classes); 2924570af302Sopenharmony_ci } 2925570af302Sopenharmony_ci if (tnfa->transitions) 2926570af302Sopenharmony_ci xfree(tnfa->transitions); 2927570af302Sopenharmony_ci 2928570af302Sopenharmony_ci if (tnfa->initial) 2929570af302Sopenharmony_ci { 2930570af302Sopenharmony_ci for (trans = tnfa->initial; trans->state; trans++) 2931570af302Sopenharmony_ci { 2932570af302Sopenharmony_ci if (trans->tags) 2933570af302Sopenharmony_ci xfree(trans->tags); 2934570af302Sopenharmony_ci } 2935570af302Sopenharmony_ci xfree(tnfa->initial); 2936570af302Sopenharmony_ci } 2937570af302Sopenharmony_ci 2938570af302Sopenharmony_ci if (tnfa->submatch_data) 2939570af302Sopenharmony_ci { 2940570af302Sopenharmony_ci for (i = 0; i < tnfa->num_submatches; i++) 2941570af302Sopenharmony_ci if (tnfa->submatch_data[i].parents) 2942570af302Sopenharmony_ci xfree(tnfa->submatch_data[i].parents); 2943570af302Sopenharmony_ci xfree(tnfa->submatch_data); 2944570af302Sopenharmony_ci } 2945570af302Sopenharmony_ci 2946570af302Sopenharmony_ci if (tnfa->tag_directions) 2947570af302Sopenharmony_ci xfree(tnfa->tag_directions); 2948570af302Sopenharmony_ci if (tnfa->firstpos_chars) 2949570af302Sopenharmony_ci xfree(tnfa->firstpos_chars); 2950570af302Sopenharmony_ci if (tnfa->minimal_tags) 2951570af302Sopenharmony_ci xfree(tnfa->minimal_tags); 2952570af302Sopenharmony_ci xfree(tnfa); 2953570af302Sopenharmony_ci} 2954