1570af302Sopenharmony_ci/*
2570af302Sopenharmony_ci  regcomp.c - TRE POSIX compatible regex compilation functions.
3570af302Sopenharmony_ci
4570af302Sopenharmony_ci  Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5570af302Sopenharmony_ci  All rights reserved.
6570af302Sopenharmony_ci
7570af302Sopenharmony_ci  Redistribution and use in source and binary forms, with or without
8570af302Sopenharmony_ci  modification, are permitted provided that the following conditions
9570af302Sopenharmony_ci  are met:
10570af302Sopenharmony_ci
11570af302Sopenharmony_ci    1. Redistributions of source code must retain the above copyright
12570af302Sopenharmony_ci       notice, this list of conditions and the following disclaimer.
13570af302Sopenharmony_ci
14570af302Sopenharmony_ci    2. Redistributions in binary form must reproduce the above copyright
15570af302Sopenharmony_ci       notice, this list of conditions and the following disclaimer in the
16570af302Sopenharmony_ci       documentation and/or other materials provided with the distribution.
17570af302Sopenharmony_ci
18570af302Sopenharmony_ci  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19570af302Sopenharmony_ci  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20570af302Sopenharmony_ci  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21570af302Sopenharmony_ci  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22570af302Sopenharmony_ci  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23570af302Sopenharmony_ci  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24570af302Sopenharmony_ci  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25570af302Sopenharmony_ci  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26570af302Sopenharmony_ci  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27570af302Sopenharmony_ci  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28570af302Sopenharmony_ci  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29570af302Sopenharmony_ci
30570af302Sopenharmony_ci*/
31570af302Sopenharmony_ci
32570af302Sopenharmony_ci#include <string.h>
33570af302Sopenharmony_ci#include <stdlib.h>
34570af302Sopenharmony_ci#include <regex.h>
35570af302Sopenharmony_ci#include <limits.h>
36570af302Sopenharmony_ci#include <stdint.h>
37570af302Sopenharmony_ci#include <ctype.h>
38570af302Sopenharmony_ci
39570af302Sopenharmony_ci#include "tre.h"
40570af302Sopenharmony_ci
41570af302Sopenharmony_ci#include <assert.h>
42570af302Sopenharmony_ci
43570af302Sopenharmony_ci/***********************************************************************
44570af302Sopenharmony_ci from tre-compile.h
45570af302Sopenharmony_ci***********************************************************************/
46570af302Sopenharmony_ci
47570af302Sopenharmony_citypedef struct {
48570af302Sopenharmony_ci  int position;
49570af302Sopenharmony_ci  int code_min;
50570af302Sopenharmony_ci  int code_max;
51570af302Sopenharmony_ci  int *tags;
52570af302Sopenharmony_ci  int assertions;
53570af302Sopenharmony_ci  tre_ctype_t class;
54570af302Sopenharmony_ci  tre_ctype_t *neg_classes;
55570af302Sopenharmony_ci  int backref;
56570af302Sopenharmony_ci} tre_pos_and_tags_t;
57570af302Sopenharmony_ci
58570af302Sopenharmony_ci
59570af302Sopenharmony_ci/***********************************************************************
60570af302Sopenharmony_ci from tre-ast.c and tre-ast.h
61570af302Sopenharmony_ci***********************************************************************/
62570af302Sopenharmony_ci
63570af302Sopenharmony_ci/* The different AST node types. */
64570af302Sopenharmony_citypedef enum {
65570af302Sopenharmony_ci  LITERAL,
66570af302Sopenharmony_ci  CATENATION,
67570af302Sopenharmony_ci  ITERATION,
68570af302Sopenharmony_ci  UNION
69570af302Sopenharmony_ci} tre_ast_type_t;
70570af302Sopenharmony_ci
71570af302Sopenharmony_ci/* Special subtypes of TRE_LITERAL. */
72570af302Sopenharmony_ci#define EMPTY	  -1   /* Empty leaf (denotes empty string). */
73570af302Sopenharmony_ci#define ASSERTION -2   /* Assertion leaf. */
74570af302Sopenharmony_ci#define TAG	  -3   /* Tag leaf. */
75570af302Sopenharmony_ci#define BACKREF	  -4   /* Back reference leaf. */
76570af302Sopenharmony_ci
77570af302Sopenharmony_ci#define IS_SPECIAL(x)	((x)->code_min < 0)
78570af302Sopenharmony_ci#define IS_EMPTY(x)	((x)->code_min == EMPTY)
79570af302Sopenharmony_ci#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80570af302Sopenharmony_ci#define IS_TAG(x)	((x)->code_min == TAG)
81570af302Sopenharmony_ci#define IS_BACKREF(x)	((x)->code_min == BACKREF)
82570af302Sopenharmony_ci
83570af302Sopenharmony_ci
84570af302Sopenharmony_ci/* A generic AST node.  All AST nodes consist of this node on the top
85570af302Sopenharmony_ci   level with `obj' pointing to the actual content. */
86570af302Sopenharmony_citypedef struct {
87570af302Sopenharmony_ci  tre_ast_type_t type;   /* Type of the node. */
88570af302Sopenharmony_ci  void *obj;             /* Pointer to actual node. */
89570af302Sopenharmony_ci  int nullable;
90570af302Sopenharmony_ci  int submatch_id;
91570af302Sopenharmony_ci  int num_submatches;
92570af302Sopenharmony_ci  int num_tags;
93570af302Sopenharmony_ci  tre_pos_and_tags_t *firstpos;
94570af302Sopenharmony_ci  tre_pos_and_tags_t *lastpos;
95570af302Sopenharmony_ci} tre_ast_node_t;
96570af302Sopenharmony_ci
97570af302Sopenharmony_ci
98570af302Sopenharmony_ci/* A "literal" node.  These are created for assertions, back references,
99570af302Sopenharmony_ci   tags, matching parameter settings, and all expressions that match one
100570af302Sopenharmony_ci   character. */
101570af302Sopenharmony_citypedef struct {
102570af302Sopenharmony_ci  long code_min;
103570af302Sopenharmony_ci  long code_max;
104570af302Sopenharmony_ci  int position;
105570af302Sopenharmony_ci  tre_ctype_t class;
106570af302Sopenharmony_ci  tre_ctype_t *neg_classes;
107570af302Sopenharmony_ci} tre_literal_t;
108570af302Sopenharmony_ci
109570af302Sopenharmony_ci/* A "catenation" node.	 These are created when two regexps are concatenated.
110570af302Sopenharmony_ci   If there are more than one subexpressions in sequence, the `left' part
111570af302Sopenharmony_ci   holds all but the last, and `right' part holds the last subexpression
112570af302Sopenharmony_ci   (catenation is left associative). */
113570af302Sopenharmony_citypedef struct {
114570af302Sopenharmony_ci  tre_ast_node_t *left;
115570af302Sopenharmony_ci  tre_ast_node_t *right;
116570af302Sopenharmony_ci} tre_catenation_t;
117570af302Sopenharmony_ci
118570af302Sopenharmony_ci/* An "iteration" node.	 These are created for the "*", "+", "?", and "{m,n}"
119570af302Sopenharmony_ci   operators. */
120570af302Sopenharmony_citypedef struct {
121570af302Sopenharmony_ci  /* Subexpression to match. */
122570af302Sopenharmony_ci  tre_ast_node_t *arg;
123570af302Sopenharmony_ci  /* Minimum number of consecutive matches. */
124570af302Sopenharmony_ci  int min;
125570af302Sopenharmony_ci  /* Maximum number of consecutive matches. */
126570af302Sopenharmony_ci  int max;
127570af302Sopenharmony_ci  /* If 0, match as many characters as possible, if 1 match as few as
128570af302Sopenharmony_ci     possible.	Note that this does not always mean the same thing as
129570af302Sopenharmony_ci     matching as many/few repetitions as possible. */
130570af302Sopenharmony_ci  unsigned int minimal:1;
131570af302Sopenharmony_ci} tre_iteration_t;
132570af302Sopenharmony_ci
133570af302Sopenharmony_ci/* An "union" node.  These are created for the "|" operator. */
134570af302Sopenharmony_citypedef struct {
135570af302Sopenharmony_ci  tre_ast_node_t *left;
136570af302Sopenharmony_ci  tre_ast_node_t *right;
137570af302Sopenharmony_ci} tre_union_t;
138570af302Sopenharmony_ci
139570af302Sopenharmony_ci
140570af302Sopenharmony_cistatic tre_ast_node_t *
141570af302Sopenharmony_citre_ast_new_node(tre_mem_t mem, int type, void *obj)
142570af302Sopenharmony_ci{
143570af302Sopenharmony_ci	tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
144570af302Sopenharmony_ci	if (!node || !obj)
145570af302Sopenharmony_ci		return 0;
146570af302Sopenharmony_ci	node->obj = obj;
147570af302Sopenharmony_ci	node->type = type;
148570af302Sopenharmony_ci	node->nullable = -1;
149570af302Sopenharmony_ci	node->submatch_id = -1;
150570af302Sopenharmony_ci	return node;
151570af302Sopenharmony_ci}
152570af302Sopenharmony_ci
153570af302Sopenharmony_cistatic tre_ast_node_t *
154570af302Sopenharmony_citre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155570af302Sopenharmony_ci{
156570af302Sopenharmony_ci	tre_ast_node_t *node;
157570af302Sopenharmony_ci	tre_literal_t *lit;
158570af302Sopenharmony_ci
159570af302Sopenharmony_ci	lit = tre_mem_calloc(mem, sizeof *lit);
160570af302Sopenharmony_ci	node = tre_ast_new_node(mem, LITERAL, lit);
161570af302Sopenharmony_ci	if (!node)
162570af302Sopenharmony_ci		return 0;
163570af302Sopenharmony_ci	lit->code_min = code_min;
164570af302Sopenharmony_ci	lit->code_max = code_max;
165570af302Sopenharmony_ci	lit->position = position;
166570af302Sopenharmony_ci	return node;
167570af302Sopenharmony_ci}
168570af302Sopenharmony_ci
169570af302Sopenharmony_cistatic tre_ast_node_t *
170570af302Sopenharmony_citre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171570af302Sopenharmony_ci{
172570af302Sopenharmony_ci	tre_ast_node_t *node;
173570af302Sopenharmony_ci	tre_iteration_t *iter;
174570af302Sopenharmony_ci
175570af302Sopenharmony_ci	iter = tre_mem_calloc(mem, sizeof *iter);
176570af302Sopenharmony_ci	node = tre_ast_new_node(mem, ITERATION, iter);
177570af302Sopenharmony_ci	if (!node)
178570af302Sopenharmony_ci		return 0;
179570af302Sopenharmony_ci	iter->arg = arg;
180570af302Sopenharmony_ci	iter->min = min;
181570af302Sopenharmony_ci	iter->max = max;
182570af302Sopenharmony_ci	iter->minimal = minimal;
183570af302Sopenharmony_ci	node->num_submatches = arg->num_submatches;
184570af302Sopenharmony_ci	return node;
185570af302Sopenharmony_ci}
186570af302Sopenharmony_ci
187570af302Sopenharmony_cistatic tre_ast_node_t *
188570af302Sopenharmony_citre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
189570af302Sopenharmony_ci{
190570af302Sopenharmony_ci	tre_ast_node_t *node;
191570af302Sopenharmony_ci	tre_union_t *un;
192570af302Sopenharmony_ci
193570af302Sopenharmony_ci	if (!left)
194570af302Sopenharmony_ci		return right;
195570af302Sopenharmony_ci	un = tre_mem_calloc(mem, sizeof *un);
196570af302Sopenharmony_ci	node = tre_ast_new_node(mem, UNION, un);
197570af302Sopenharmony_ci	if (!node || !right)
198570af302Sopenharmony_ci		return 0;
199570af302Sopenharmony_ci	un->left = left;
200570af302Sopenharmony_ci	un->right = right;
201570af302Sopenharmony_ci	node->num_submatches = left->num_submatches + right->num_submatches;
202570af302Sopenharmony_ci	return node;
203570af302Sopenharmony_ci}
204570af302Sopenharmony_ci
205570af302Sopenharmony_cistatic tre_ast_node_t *
206570af302Sopenharmony_citre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
207570af302Sopenharmony_ci{
208570af302Sopenharmony_ci	tre_ast_node_t *node;
209570af302Sopenharmony_ci	tre_catenation_t *cat;
210570af302Sopenharmony_ci
211570af302Sopenharmony_ci	if (!left)
212570af302Sopenharmony_ci		return right;
213570af302Sopenharmony_ci	cat = tre_mem_calloc(mem, sizeof *cat);
214570af302Sopenharmony_ci	node = tre_ast_new_node(mem, CATENATION, cat);
215570af302Sopenharmony_ci	if (!node)
216570af302Sopenharmony_ci		return 0;
217570af302Sopenharmony_ci	cat->left = left;
218570af302Sopenharmony_ci	cat->right = right;
219570af302Sopenharmony_ci	node->num_submatches = left->num_submatches + right->num_submatches;
220570af302Sopenharmony_ci	return node;
221570af302Sopenharmony_ci}
222570af302Sopenharmony_ci
223570af302Sopenharmony_ci
224570af302Sopenharmony_ci/***********************************************************************
225570af302Sopenharmony_ci from tre-stack.c and tre-stack.h
226570af302Sopenharmony_ci***********************************************************************/
227570af302Sopenharmony_ci
228570af302Sopenharmony_citypedef struct tre_stack_rec tre_stack_t;
229570af302Sopenharmony_ci
230570af302Sopenharmony_ci/* Creates a new stack object.	`size' is initial size in bytes, `max_size'
231570af302Sopenharmony_ci   is maximum size, and `increment' specifies how much more space will be
232570af302Sopenharmony_ci   allocated with realloc() if all space gets used up.	Returns the stack
233570af302Sopenharmony_ci   object or NULL if out of memory. */
234570af302Sopenharmony_cistatic tre_stack_t *
235570af302Sopenharmony_citre_stack_new(int size, int max_size, int increment);
236570af302Sopenharmony_ci
237570af302Sopenharmony_ci/* Frees the stack object. */
238570af302Sopenharmony_cistatic void
239570af302Sopenharmony_citre_stack_destroy(tre_stack_t *s);
240570af302Sopenharmony_ci
241570af302Sopenharmony_ci/* Returns the current number of objects in the stack. */
242570af302Sopenharmony_cistatic int
243570af302Sopenharmony_citre_stack_num_objects(tre_stack_t *s);
244570af302Sopenharmony_ci
245570af302Sopenharmony_ci/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246570af302Sopenharmony_ci   `value' on top of stack `s'.  Returns REG_ESPACE if out of memory.
247570af302Sopenharmony_ci   This tries to realloc() more space before failing if maximum size
248570af302Sopenharmony_ci   has not yet been reached.  Returns REG_OK if successful. */
249570af302Sopenharmony_ci#define declare_pushf(typetag, type)					      \
250570af302Sopenharmony_ci  static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251570af302Sopenharmony_ci
252570af302Sopenharmony_cideclare_pushf(voidptr, void *);
253570af302Sopenharmony_cideclare_pushf(int, int);
254570af302Sopenharmony_ci
255570af302Sopenharmony_ci/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256570af302Sopenharmony_ci   element off of stack `s' and returns it.  The stack must not be
257570af302Sopenharmony_ci   empty. */
258570af302Sopenharmony_ci#define declare_popf(typetag, type)		  \
259570af302Sopenharmony_ci  static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260570af302Sopenharmony_ci
261570af302Sopenharmony_cideclare_popf(voidptr, void *);
262570af302Sopenharmony_cideclare_popf(int, int);
263570af302Sopenharmony_ci
264570af302Sopenharmony_ci/* Just to save some typing. */
265570af302Sopenharmony_ci#define STACK_PUSH(s, typetag, value)					      \
266570af302Sopenharmony_ci  do									      \
267570af302Sopenharmony_ci    {									      \
268570af302Sopenharmony_ci      status = tre_stack_push_ ## typetag(s, value);			      \
269570af302Sopenharmony_ci    }									      \
270570af302Sopenharmony_ci  while (/*CONSTCOND*/0)
271570af302Sopenharmony_ci
272570af302Sopenharmony_ci#define STACK_PUSHX(s, typetag, value)					      \
273570af302Sopenharmony_ci  {									      \
274570af302Sopenharmony_ci    status = tre_stack_push_ ## typetag(s, value);			      \
275570af302Sopenharmony_ci    if (status != REG_OK)						      \
276570af302Sopenharmony_ci      break;								      \
277570af302Sopenharmony_ci  }
278570af302Sopenharmony_ci
279570af302Sopenharmony_ci#define STACK_PUSHR(s, typetag, value)					      \
280570af302Sopenharmony_ci  {									      \
281570af302Sopenharmony_ci    reg_errcode_t _status;						      \
282570af302Sopenharmony_ci    _status = tre_stack_push_ ## typetag(s, value);			      \
283570af302Sopenharmony_ci    if (_status != REG_OK)						      \
284570af302Sopenharmony_ci      return _status;							      \
285570af302Sopenharmony_ci  }
286570af302Sopenharmony_ci
287570af302Sopenharmony_ciunion tre_stack_item {
288570af302Sopenharmony_ci  void *voidptr_value;
289570af302Sopenharmony_ci  int int_value;
290570af302Sopenharmony_ci};
291570af302Sopenharmony_ci
292570af302Sopenharmony_cistruct tre_stack_rec {
293570af302Sopenharmony_ci  int size;
294570af302Sopenharmony_ci  int max_size;
295570af302Sopenharmony_ci  int increment;
296570af302Sopenharmony_ci  int ptr;
297570af302Sopenharmony_ci  union tre_stack_item *stack;
298570af302Sopenharmony_ci};
299570af302Sopenharmony_ci
300570af302Sopenharmony_ci
301570af302Sopenharmony_cistatic tre_stack_t *
302570af302Sopenharmony_citre_stack_new(int size, int max_size, int increment)
303570af302Sopenharmony_ci{
304570af302Sopenharmony_ci  tre_stack_t *s;
305570af302Sopenharmony_ci
306570af302Sopenharmony_ci  s = xmalloc(sizeof(*s));
307570af302Sopenharmony_ci  if (s != NULL)
308570af302Sopenharmony_ci    {
309570af302Sopenharmony_ci      s->stack = xmalloc(sizeof(*s->stack) * size);
310570af302Sopenharmony_ci      if (s->stack == NULL)
311570af302Sopenharmony_ci	{
312570af302Sopenharmony_ci	  xfree(s);
313570af302Sopenharmony_ci	  return NULL;
314570af302Sopenharmony_ci	}
315570af302Sopenharmony_ci      s->size = size;
316570af302Sopenharmony_ci      s->max_size = max_size;
317570af302Sopenharmony_ci      s->increment = increment;
318570af302Sopenharmony_ci      s->ptr = 0;
319570af302Sopenharmony_ci    }
320570af302Sopenharmony_ci  return s;
321570af302Sopenharmony_ci}
322570af302Sopenharmony_ci
323570af302Sopenharmony_cistatic void
324570af302Sopenharmony_citre_stack_destroy(tre_stack_t *s)
325570af302Sopenharmony_ci{
326570af302Sopenharmony_ci  xfree(s->stack);
327570af302Sopenharmony_ci  xfree(s);
328570af302Sopenharmony_ci}
329570af302Sopenharmony_ci
330570af302Sopenharmony_cistatic int
331570af302Sopenharmony_citre_stack_num_objects(tre_stack_t *s)
332570af302Sopenharmony_ci{
333570af302Sopenharmony_ci  return s->ptr;
334570af302Sopenharmony_ci}
335570af302Sopenharmony_ci
336570af302Sopenharmony_cistatic reg_errcode_t
337570af302Sopenharmony_citre_stack_push(tre_stack_t *s, union tre_stack_item value)
338570af302Sopenharmony_ci{
339570af302Sopenharmony_ci  if (s->ptr < s->size)
340570af302Sopenharmony_ci    {
341570af302Sopenharmony_ci      s->stack[s->ptr] = value;
342570af302Sopenharmony_ci      s->ptr++;
343570af302Sopenharmony_ci    }
344570af302Sopenharmony_ci  else
345570af302Sopenharmony_ci    {
346570af302Sopenharmony_ci      if (s->size >= s->max_size)
347570af302Sopenharmony_ci	{
348570af302Sopenharmony_ci	  return REG_ESPACE;
349570af302Sopenharmony_ci	}
350570af302Sopenharmony_ci      else
351570af302Sopenharmony_ci	{
352570af302Sopenharmony_ci	  union tre_stack_item *new_buffer;
353570af302Sopenharmony_ci	  int new_size;
354570af302Sopenharmony_ci	  new_size = s->size + s->increment;
355570af302Sopenharmony_ci	  if (new_size > s->max_size)
356570af302Sopenharmony_ci	    new_size = s->max_size;
357570af302Sopenharmony_ci	  new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358570af302Sopenharmony_ci	  if (new_buffer == NULL)
359570af302Sopenharmony_ci	    {
360570af302Sopenharmony_ci	      return REG_ESPACE;
361570af302Sopenharmony_ci	    }
362570af302Sopenharmony_ci	  assert(new_size > s->size);
363570af302Sopenharmony_ci	  s->size = new_size;
364570af302Sopenharmony_ci	  s->stack = new_buffer;
365570af302Sopenharmony_ci	  tre_stack_push(s, value);
366570af302Sopenharmony_ci	}
367570af302Sopenharmony_ci    }
368570af302Sopenharmony_ci  return REG_OK;
369570af302Sopenharmony_ci}
370570af302Sopenharmony_ci
371570af302Sopenharmony_ci#define define_pushf(typetag, type)  \
372570af302Sopenharmony_ci  declare_pushf(typetag, type) {     \
373570af302Sopenharmony_ci    union tre_stack_item item;	     \
374570af302Sopenharmony_ci    item.typetag ## _value = value;  \
375570af302Sopenharmony_ci    return tre_stack_push(s, item);  \
376570af302Sopenharmony_ci}
377570af302Sopenharmony_ci
378570af302Sopenharmony_cidefine_pushf(int, int)
379570af302Sopenharmony_cidefine_pushf(voidptr, void *)
380570af302Sopenharmony_ci
381570af302Sopenharmony_ci#define define_popf(typetag, type)		    \
382570af302Sopenharmony_ci  declare_popf(typetag, type) {			    \
383570af302Sopenharmony_ci    return s->stack[--s->ptr].typetag ## _value;    \
384570af302Sopenharmony_ci  }
385570af302Sopenharmony_ci
386570af302Sopenharmony_cidefine_popf(int, int)
387570af302Sopenharmony_cidefine_popf(voidptr, void *)
388570af302Sopenharmony_ci
389570af302Sopenharmony_ci
390570af302Sopenharmony_ci/***********************************************************************
391570af302Sopenharmony_ci from tre-parse.c and tre-parse.h
392570af302Sopenharmony_ci***********************************************************************/
393570af302Sopenharmony_ci
394570af302Sopenharmony_ci/* Parse context. */
395570af302Sopenharmony_citypedef struct {
396570af302Sopenharmony_ci	/* Memory allocator. The AST is allocated using this. */
397570af302Sopenharmony_ci	tre_mem_t mem;
398570af302Sopenharmony_ci	/* Stack used for keeping track of regexp syntax. */
399570af302Sopenharmony_ci	tre_stack_t *stack;
400570af302Sopenharmony_ci	/* The parsed node after a parse function returns. */
401570af302Sopenharmony_ci	tre_ast_node_t *n;
402570af302Sopenharmony_ci	/* Position in the regexp pattern after a parse function returns. */
403570af302Sopenharmony_ci	const char *s;
404570af302Sopenharmony_ci	/* The first character of the last subexpression parsed. */
405570af302Sopenharmony_ci	const char *start;
406570af302Sopenharmony_ci	/* Current submatch ID. */
407570af302Sopenharmony_ci	int submatch_id;
408570af302Sopenharmony_ci	/* Current position (number of literal). */
409570af302Sopenharmony_ci	int position;
410570af302Sopenharmony_ci	/* The highest back reference or -1 if none seen so far. */
411570af302Sopenharmony_ci	int max_backref;
412570af302Sopenharmony_ci	/* Compilation flags. */
413570af302Sopenharmony_ci	int cflags;
414570af302Sopenharmony_ci} tre_parse_ctx_t;
415570af302Sopenharmony_ci
416570af302Sopenharmony_ci/* Some macros for expanding \w, \s, etc. */
417570af302Sopenharmony_cistatic const struct {
418570af302Sopenharmony_ci	char c;
419570af302Sopenharmony_ci	const char *expansion;
420570af302Sopenharmony_ci} tre_macros[] = {
421570af302Sopenharmony_ci	{'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422570af302Sopenharmony_ci	{'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423570af302Sopenharmony_ci	{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424570af302Sopenharmony_ci	{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425570af302Sopenharmony_ci	{ 0, 0 }
426570af302Sopenharmony_ci};
427570af302Sopenharmony_ci
428570af302Sopenharmony_ci/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429570af302Sopenharmony_ci   must have at least `len' items.  Sets buf[0] to zero if the there
430570af302Sopenharmony_ci   is no match in `tre_macros'. */
431570af302Sopenharmony_cistatic const char *tre_expand_macro(const char *s)
432570af302Sopenharmony_ci{
433570af302Sopenharmony_ci	int i;
434570af302Sopenharmony_ci	for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435570af302Sopenharmony_ci	return tre_macros[i].expansion;
436570af302Sopenharmony_ci}
437570af302Sopenharmony_ci
438570af302Sopenharmony_cistatic int
439570af302Sopenharmony_citre_compare_lit(const void *a, const void *b)
440570af302Sopenharmony_ci{
441570af302Sopenharmony_ci	const tre_literal_t *const *la = a;
442570af302Sopenharmony_ci	const tre_literal_t *const *lb = b;
443570af302Sopenharmony_ci	/* assumes the range of valid code_min is < INT_MAX */
444570af302Sopenharmony_ci	return la[0]->code_min - lb[0]->code_min;
445570af302Sopenharmony_ci}
446570af302Sopenharmony_ci
447570af302Sopenharmony_cistruct literals {
448570af302Sopenharmony_ci	tre_mem_t mem;
449570af302Sopenharmony_ci	tre_literal_t **a;
450570af302Sopenharmony_ci	int len;
451570af302Sopenharmony_ci	int cap;
452570af302Sopenharmony_ci};
453570af302Sopenharmony_ci
454570af302Sopenharmony_cistatic tre_literal_t *tre_new_lit(struct literals *p)
455570af302Sopenharmony_ci{
456570af302Sopenharmony_ci	tre_literal_t **a;
457570af302Sopenharmony_ci	if (p->len >= p->cap) {
458570af302Sopenharmony_ci		if (p->cap >= 1<<15)
459570af302Sopenharmony_ci			return 0;
460570af302Sopenharmony_ci		p->cap *= 2;
461570af302Sopenharmony_ci		a = xrealloc(p->a, p->cap * sizeof *p->a);
462570af302Sopenharmony_ci		if (!a)
463570af302Sopenharmony_ci			return 0;
464570af302Sopenharmony_ci		p->a = a;
465570af302Sopenharmony_ci	}
466570af302Sopenharmony_ci	a = p->a + p->len++;
467570af302Sopenharmony_ci	*a = tre_mem_calloc(p->mem, sizeof **a);
468570af302Sopenharmony_ci	return *a;
469570af302Sopenharmony_ci}
470570af302Sopenharmony_ci
471570af302Sopenharmony_cistatic int add_icase_literals(struct literals *ls, int min, int max)
472570af302Sopenharmony_ci{
473570af302Sopenharmony_ci	tre_literal_t *lit;
474570af302Sopenharmony_ci	int b, e, c;
475570af302Sopenharmony_ci	for (c=min; c<=max; ) {
476570af302Sopenharmony_ci		/* assumes islower(c) and isupper(c) are exclusive
477570af302Sopenharmony_ci		   and toupper(c)!=c if islower(c).
478570af302Sopenharmony_ci		   multiple opposite case characters are not supported */
479570af302Sopenharmony_ci		if (tre_islower(c)) {
480570af302Sopenharmony_ci			b = e = tre_toupper(c);
481570af302Sopenharmony_ci			for (c++, e++; c<=max; c++, e++)
482570af302Sopenharmony_ci				if (tre_toupper(c) != e) break;
483570af302Sopenharmony_ci		} else if (tre_isupper(c)) {
484570af302Sopenharmony_ci			b = e = tre_tolower(c);
485570af302Sopenharmony_ci			for (c++, e++; c<=max; c++, e++)
486570af302Sopenharmony_ci				if (tre_tolower(c) != e) break;
487570af302Sopenharmony_ci		} else {
488570af302Sopenharmony_ci			c++;
489570af302Sopenharmony_ci			continue;
490570af302Sopenharmony_ci		}
491570af302Sopenharmony_ci		lit = tre_new_lit(ls);
492570af302Sopenharmony_ci		if (!lit)
493570af302Sopenharmony_ci			return -1;
494570af302Sopenharmony_ci		lit->code_min = b;
495570af302Sopenharmony_ci		lit->code_max = e-1;
496570af302Sopenharmony_ci		lit->position = -1;
497570af302Sopenharmony_ci	}
498570af302Sopenharmony_ci	return 0;
499570af302Sopenharmony_ci}
500570af302Sopenharmony_ci
501570af302Sopenharmony_ci
502570af302Sopenharmony_ci/* Maximum number of character classes in a negated bracket expression. */
503570af302Sopenharmony_ci#define MAX_NEG_CLASSES 64
504570af302Sopenharmony_ci
505570af302Sopenharmony_cistruct neg {
506570af302Sopenharmony_ci	int negate;
507570af302Sopenharmony_ci	int len;
508570af302Sopenharmony_ci	tre_ctype_t a[MAX_NEG_CLASSES];
509570af302Sopenharmony_ci};
510570af302Sopenharmony_ci
511570af302Sopenharmony_ci// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512570af302Sopenharmony_ci
513570af302Sopenharmony_ci/*
514570af302Sopenharmony_cibracket grammar:
515570af302Sopenharmony_ciBracket  =  '[' List ']'  |  '[^' List ']'
516570af302Sopenharmony_ciList     =  Term  |  List Term
517570af302Sopenharmony_ciTerm     =  Char  |  Range  |  Chclass  |  Eqclass
518570af302Sopenharmony_ciRange    =  Char '-' Char  |  Char '-' '-'
519570af302Sopenharmony_ciChar     =  Coll  |  coll_single
520570af302Sopenharmony_ciMeta     =  ']'  |  '-'
521570af302Sopenharmony_ciColl     =  '[.' coll_single '.]'  |  '[.' coll_multi '.]'  |  '[.' Meta '.]'
522570af302Sopenharmony_ciEqclass  =  '[=' coll_single '=]'  |  '[=' coll_multi '=]'
523570af302Sopenharmony_ciChclass  =  '[:' class ':]'
524570af302Sopenharmony_ci
525570af302Sopenharmony_cicoll_single is a single char collating element but it can be
526570af302Sopenharmony_ci '-' only at the beginning or end of a List and
527570af302Sopenharmony_ci ']' only at the beginning of a List and
528570af302Sopenharmony_ci '^' anywhere except after the openning '['
529570af302Sopenharmony_ci*/
530570af302Sopenharmony_ci
531570af302Sopenharmony_cistatic reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
532570af302Sopenharmony_ci{
533570af302Sopenharmony_ci	const char *start = s;
534570af302Sopenharmony_ci	tre_ctype_t class;
535570af302Sopenharmony_ci	int min, max;
536570af302Sopenharmony_ci	wchar_t wc;
537570af302Sopenharmony_ci	int len;
538570af302Sopenharmony_ci
539570af302Sopenharmony_ci	for (;;) {
540570af302Sopenharmony_ci		class = 0;
541570af302Sopenharmony_ci		len = mbtowc(&wc, s, -1);
542570af302Sopenharmony_ci		if (len <= 0)
543570af302Sopenharmony_ci			return *s ? REG_BADPAT : REG_EBRACK;
544570af302Sopenharmony_ci		if (*s == ']' && s != start) {
545570af302Sopenharmony_ci			ctx->s = s+1;
546570af302Sopenharmony_ci			return REG_OK;
547570af302Sopenharmony_ci		}
548570af302Sopenharmony_ci		if (*s == '-' && s != start && s[1] != ']' &&
549570af302Sopenharmony_ci		    /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550570af302Sopenharmony_ci		    (s[1] != '-' || s[2] == ']'))
551570af302Sopenharmony_ci			return REG_ERANGE;
552570af302Sopenharmony_ci		if (*s == '[' && (s[1] == '.' || s[1] == '='))
553570af302Sopenharmony_ci			/* collating symbols and equivalence classes are not supported */
554570af302Sopenharmony_ci			return REG_ECOLLATE;
555570af302Sopenharmony_ci		if (*s == '[' && s[1] == ':') {
556570af302Sopenharmony_ci			char tmp[CHARCLASS_NAME_MAX+1];
557570af302Sopenharmony_ci			s += 2;
558570af302Sopenharmony_ci			for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
559570af302Sopenharmony_ci				if (s[len] == ':') {
560570af302Sopenharmony_ci					memcpy(tmp, s, len);
561570af302Sopenharmony_ci					tmp[len] = 0;
562570af302Sopenharmony_ci					class = tre_ctype(tmp);
563570af302Sopenharmony_ci					break;
564570af302Sopenharmony_ci				}
565570af302Sopenharmony_ci			}
566570af302Sopenharmony_ci			if (!class || s[len+1] != ']')
567570af302Sopenharmony_ci				return REG_ECTYPE;
568570af302Sopenharmony_ci			min = 0;
569570af302Sopenharmony_ci			max = TRE_CHAR_MAX;
570570af302Sopenharmony_ci			s += len+2;
571570af302Sopenharmony_ci		} else {
572570af302Sopenharmony_ci			min = max = wc;
573570af302Sopenharmony_ci			s += len;
574570af302Sopenharmony_ci			if (*s == '-' && s[1] != ']') {
575570af302Sopenharmony_ci				s++;
576570af302Sopenharmony_ci				len = mbtowc(&wc, s, -1);
577570af302Sopenharmony_ci				max = wc;
578570af302Sopenharmony_ci				/* XXX - Should use collation order instead of
579570af302Sopenharmony_ci				   encoding values in character ranges. */
580570af302Sopenharmony_ci				if (len <= 0 || min > max)
581570af302Sopenharmony_ci					return REG_ERANGE;
582570af302Sopenharmony_ci				s += len;
583570af302Sopenharmony_ci			}
584570af302Sopenharmony_ci		}
585570af302Sopenharmony_ci
586570af302Sopenharmony_ci		if (class && neg->negate) {
587570af302Sopenharmony_ci			if (neg->len >= MAX_NEG_CLASSES)
588570af302Sopenharmony_ci				return REG_ESPACE;
589570af302Sopenharmony_ci			neg->a[neg->len++] = class;
590570af302Sopenharmony_ci		} else  {
591570af302Sopenharmony_ci			tre_literal_t *lit = tre_new_lit(ls);
592570af302Sopenharmony_ci			if (!lit)
593570af302Sopenharmony_ci				return REG_ESPACE;
594570af302Sopenharmony_ci			lit->code_min = min;
595570af302Sopenharmony_ci			lit->code_max = max;
596570af302Sopenharmony_ci			lit->class = class;
597570af302Sopenharmony_ci			lit->position = -1;
598570af302Sopenharmony_ci
599570af302Sopenharmony_ci			/* Add opposite-case codepoints if REG_ICASE is present.
600570af302Sopenharmony_ci			   It seems that POSIX requires that bracket negation
601570af302Sopenharmony_ci			   should happen before case-folding, but most practical
602570af302Sopenharmony_ci			   implementations do it the other way around. Changing
603570af302Sopenharmony_ci			   the order would need efficient representation of
604570af302Sopenharmony_ci			   case-fold ranges and bracket range sets even with
605570af302Sopenharmony_ci			   simple patterns so this is ok for now. */
606570af302Sopenharmony_ci			if (ctx->cflags & REG_ICASE && !class)
607570af302Sopenharmony_ci				if (add_icase_literals(ls, min, max))
608570af302Sopenharmony_ci					return REG_ESPACE;
609570af302Sopenharmony_ci		}
610570af302Sopenharmony_ci	}
611570af302Sopenharmony_ci}
612570af302Sopenharmony_ci
613570af302Sopenharmony_cistatic reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
614570af302Sopenharmony_ci{
615570af302Sopenharmony_ci	int i, max, min, negmax, negmin;
616570af302Sopenharmony_ci	tre_ast_node_t *node = 0, *n;
617570af302Sopenharmony_ci	tre_ctype_t *nc = 0;
618570af302Sopenharmony_ci	tre_literal_t *lit;
619570af302Sopenharmony_ci	struct literals ls;
620570af302Sopenharmony_ci	struct neg neg;
621570af302Sopenharmony_ci	reg_errcode_t err;
622570af302Sopenharmony_ci
623570af302Sopenharmony_ci	ls.mem = ctx->mem;
624570af302Sopenharmony_ci	ls.len = 0;
625570af302Sopenharmony_ci	ls.cap = 32;
626570af302Sopenharmony_ci	ls.a = xmalloc(ls.cap * sizeof *ls.a);
627570af302Sopenharmony_ci	if (!ls.a)
628570af302Sopenharmony_ci		return REG_ESPACE;
629570af302Sopenharmony_ci	neg.len = 0;
630570af302Sopenharmony_ci	neg.negate = *s == '^';
631570af302Sopenharmony_ci	if (neg.negate)
632570af302Sopenharmony_ci		s++;
633570af302Sopenharmony_ci
634570af302Sopenharmony_ci	err = parse_bracket_terms(ctx, s, &ls, &neg);
635570af302Sopenharmony_ci	if (err != REG_OK)
636570af302Sopenharmony_ci		goto parse_bracket_done;
637570af302Sopenharmony_ci
638570af302Sopenharmony_ci	if (neg.negate) {
639570af302Sopenharmony_ci		/*
640570af302Sopenharmony_ci		 * With REG_NEWLINE, POSIX requires that newlines are not matched by
641570af302Sopenharmony_ci		 * any form of a non-matching list.
642570af302Sopenharmony_ci		 */
643570af302Sopenharmony_ci		if (ctx->cflags & REG_NEWLINE) {
644570af302Sopenharmony_ci			lit = tre_new_lit(&ls);
645570af302Sopenharmony_ci			if (!lit) {
646570af302Sopenharmony_ci				err = REG_ESPACE;
647570af302Sopenharmony_ci				goto parse_bracket_done;
648570af302Sopenharmony_ci			}
649570af302Sopenharmony_ci			lit->code_min = '\n';
650570af302Sopenharmony_ci			lit->code_max = '\n';
651570af302Sopenharmony_ci			lit->position = -1;
652570af302Sopenharmony_ci		}
653570af302Sopenharmony_ci		/* Sort the array if we need to negate it. */
654570af302Sopenharmony_ci		qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
655570af302Sopenharmony_ci		/* extra lit for the last negated range */
656570af302Sopenharmony_ci		lit = tre_new_lit(&ls);
657570af302Sopenharmony_ci		if (!lit) {
658570af302Sopenharmony_ci			err = REG_ESPACE;
659570af302Sopenharmony_ci			goto parse_bracket_done;
660570af302Sopenharmony_ci		}
661570af302Sopenharmony_ci		lit->code_min = TRE_CHAR_MAX+1;
662570af302Sopenharmony_ci		lit->code_max = TRE_CHAR_MAX+1;
663570af302Sopenharmony_ci		lit->position = -1;
664570af302Sopenharmony_ci		/* negated classes */
665570af302Sopenharmony_ci		if (neg.len) {
666570af302Sopenharmony_ci			nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
667570af302Sopenharmony_ci			if (!nc) {
668570af302Sopenharmony_ci				err = REG_ESPACE;
669570af302Sopenharmony_ci				goto parse_bracket_done;
670570af302Sopenharmony_ci			}
671570af302Sopenharmony_ci			memcpy(nc, neg.a, neg.len*sizeof *neg.a);
672570af302Sopenharmony_ci			nc[neg.len] = 0;
673570af302Sopenharmony_ci		}
674570af302Sopenharmony_ci	}
675570af302Sopenharmony_ci
676570af302Sopenharmony_ci	/* Build a union of the items in the array, negated if necessary. */
677570af302Sopenharmony_ci	negmax = negmin = 0;
678570af302Sopenharmony_ci	for (i = 0; i < ls.len; i++) {
679570af302Sopenharmony_ci		lit = ls.a[i];
680570af302Sopenharmony_ci		min = lit->code_min;
681570af302Sopenharmony_ci		max = lit->code_max;
682570af302Sopenharmony_ci		if (neg.negate) {
683570af302Sopenharmony_ci			if (min <= negmin) {
684570af302Sopenharmony_ci				/* Overlap. */
685570af302Sopenharmony_ci				negmin = MAX(max + 1, negmin);
686570af302Sopenharmony_ci				continue;
687570af302Sopenharmony_ci			}
688570af302Sopenharmony_ci			negmax = min - 1;
689570af302Sopenharmony_ci			lit->code_min = negmin;
690570af302Sopenharmony_ci			lit->code_max = negmax;
691570af302Sopenharmony_ci			negmin = max + 1;
692570af302Sopenharmony_ci		}
693570af302Sopenharmony_ci		lit->position = ctx->position;
694570af302Sopenharmony_ci		lit->neg_classes = nc;
695570af302Sopenharmony_ci		n = tre_ast_new_node(ctx->mem, LITERAL, lit);
696570af302Sopenharmony_ci		node = tre_ast_new_union(ctx->mem, node, n);
697570af302Sopenharmony_ci		if (!node) {
698570af302Sopenharmony_ci			err = REG_ESPACE;
699570af302Sopenharmony_ci			break;
700570af302Sopenharmony_ci		}
701570af302Sopenharmony_ci	}
702570af302Sopenharmony_ci
703570af302Sopenharmony_ciparse_bracket_done:
704570af302Sopenharmony_ci	xfree(ls.a);
705570af302Sopenharmony_ci	ctx->position++;
706570af302Sopenharmony_ci	ctx->n = node;
707570af302Sopenharmony_ci	return err;
708570af302Sopenharmony_ci}
709570af302Sopenharmony_ci
710570af302Sopenharmony_cistatic const char *parse_dup_count(const char *s, int *n)
711570af302Sopenharmony_ci{
712570af302Sopenharmony_ci	*n = -1;
713570af302Sopenharmony_ci	if (!isdigit(*s))
714570af302Sopenharmony_ci		return s;
715570af302Sopenharmony_ci	*n = 0;
716570af302Sopenharmony_ci	for (;;) {
717570af302Sopenharmony_ci		*n = 10 * *n + (*s - '0');
718570af302Sopenharmony_ci		s++;
719570af302Sopenharmony_ci		if (!isdigit(*s) || *n > RE_DUP_MAX)
720570af302Sopenharmony_ci			break;
721570af302Sopenharmony_ci	}
722570af302Sopenharmony_ci	return s;
723570af302Sopenharmony_ci}
724570af302Sopenharmony_ci
725570af302Sopenharmony_cistatic const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
726570af302Sopenharmony_ci{
727570af302Sopenharmony_ci	int min, max;
728570af302Sopenharmony_ci
729570af302Sopenharmony_ci	s = parse_dup_count(s, &min);
730570af302Sopenharmony_ci	if (*s == ',')
731570af302Sopenharmony_ci		s = parse_dup_count(s+1, &max);
732570af302Sopenharmony_ci	else
733570af302Sopenharmony_ci		max = min;
734570af302Sopenharmony_ci
735570af302Sopenharmony_ci	if (
736570af302Sopenharmony_ci		(max < min && max >= 0) ||
737570af302Sopenharmony_ci		max > RE_DUP_MAX ||
738570af302Sopenharmony_ci		min > RE_DUP_MAX ||
739570af302Sopenharmony_ci		min < 0 ||
740570af302Sopenharmony_ci		(!ere && *s++ != '\\') ||
741570af302Sopenharmony_ci		*s++ != '}'
742570af302Sopenharmony_ci	)
743570af302Sopenharmony_ci		return 0;
744570af302Sopenharmony_ci	*pmin = min;
745570af302Sopenharmony_ci	*pmax = max;
746570af302Sopenharmony_ci	return s;
747570af302Sopenharmony_ci}
748570af302Sopenharmony_ci
749570af302Sopenharmony_cistatic int hexval(unsigned c)
750570af302Sopenharmony_ci{
751570af302Sopenharmony_ci	if (c-'0'<10) return c-'0';
752570af302Sopenharmony_ci	c |= 32;
753570af302Sopenharmony_ci	if (c-'a'<6) return c-'a'+10;
754570af302Sopenharmony_ci	return -1;
755570af302Sopenharmony_ci}
756570af302Sopenharmony_ci
757570af302Sopenharmony_cistatic reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
758570af302Sopenharmony_ci{
759570af302Sopenharmony_ci	if (node->submatch_id >= 0) {
760570af302Sopenharmony_ci		tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
761570af302Sopenharmony_ci		if (!n)
762570af302Sopenharmony_ci			return REG_ESPACE;
763570af302Sopenharmony_ci		n = tre_ast_new_catenation(ctx->mem, n, node);
764570af302Sopenharmony_ci		if (!n)
765570af302Sopenharmony_ci			return REG_ESPACE;
766570af302Sopenharmony_ci		n->num_submatches = node->num_submatches;
767570af302Sopenharmony_ci		node = n;
768570af302Sopenharmony_ci	}
769570af302Sopenharmony_ci	node->submatch_id = subid;
770570af302Sopenharmony_ci	node->num_submatches++;
771570af302Sopenharmony_ci	ctx->n = node;
772570af302Sopenharmony_ci	return REG_OK;
773570af302Sopenharmony_ci}
774570af302Sopenharmony_ci
775570af302Sopenharmony_ci/*
776570af302Sopenharmony_ciBRE grammar:
777570af302Sopenharmony_ciRegex  =  Branch  |  '^'  |  '$'  |  '^$'  |  '^' Branch  |  Branch '$'  |  '^' Branch '$'
778570af302Sopenharmony_ciBranch =  Atom  |  Branch Atom
779570af302Sopenharmony_ciAtom   =  char  |  quoted_char  |  '.'  |  Bracket  |  Atom Dup  |  '\(' Branch '\)'  |  back_ref
780570af302Sopenharmony_ciDup    =  '*'  |  '\{' Count '\}'  |  '\{' Count ',\}'  |  '\{' Count ',' Count '\}'
781570af302Sopenharmony_ci
782570af302Sopenharmony_ci(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
783570af302Sopenharmony_ci
784570af302Sopenharmony_ciERE grammar:
785570af302Sopenharmony_ciRegex  =  Branch  |  Regex '|' Branch
786570af302Sopenharmony_ciBranch =  Atom  |  Branch Atom
787570af302Sopenharmony_ciAtom   =  char  |  quoted_char  |  '.'  |  Bracket  |  Atom Dup  |  '(' Regex ')'  |  '^'  |  '$'
788570af302Sopenharmony_ciDup    =  '*'  |  '+'  |  '?'  |  '{' Count '}'  |  '{' Count ',}'  |  '{' Count ',' Count '}'
789570af302Sopenharmony_ci
790570af302Sopenharmony_ci(a*+?, ^*, $+, \X, {, (|a) are unspecified)
791570af302Sopenharmony_ci*/
792570af302Sopenharmony_ci
793570af302Sopenharmony_cistatic reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
794570af302Sopenharmony_ci{
795570af302Sopenharmony_ci	int len, ere = ctx->cflags & REG_EXTENDED;
796570af302Sopenharmony_ci	const char *p;
797570af302Sopenharmony_ci	tre_ast_node_t *node;
798570af302Sopenharmony_ci	wchar_t wc;
799570af302Sopenharmony_ci	switch (*s) {
800570af302Sopenharmony_ci	case '[':
801570af302Sopenharmony_ci		return parse_bracket(ctx, s+1);
802570af302Sopenharmony_ci	case '\\':
803570af302Sopenharmony_ci		p = tre_expand_macro(s+1);
804570af302Sopenharmony_ci		if (p) {
805570af302Sopenharmony_ci			/* assume \X expansion is a single atom */
806570af302Sopenharmony_ci			reg_errcode_t err = parse_atom(ctx, p);
807570af302Sopenharmony_ci			ctx->s = s+2;
808570af302Sopenharmony_ci			return err;
809570af302Sopenharmony_ci		}
810570af302Sopenharmony_ci		/* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
811570af302Sopenharmony_ci		switch (*++s) {
812570af302Sopenharmony_ci		case 0:
813570af302Sopenharmony_ci			return REG_EESCAPE;
814570af302Sopenharmony_ci		case 'b':
815570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
816570af302Sopenharmony_ci			break;
817570af302Sopenharmony_ci		case 'B':
818570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
819570af302Sopenharmony_ci			break;
820570af302Sopenharmony_ci		case '<':
821570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
822570af302Sopenharmony_ci			break;
823570af302Sopenharmony_ci		case '>':
824570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
825570af302Sopenharmony_ci			break;
826570af302Sopenharmony_ci		case 'x':
827570af302Sopenharmony_ci			s++;
828570af302Sopenharmony_ci			int i, v = 0, c;
829570af302Sopenharmony_ci			len = 2;
830570af302Sopenharmony_ci			if (*s == '{') {
831570af302Sopenharmony_ci				len = 8;
832570af302Sopenharmony_ci				s++;
833570af302Sopenharmony_ci			}
834570af302Sopenharmony_ci			for (i=0; i<len && v<0x110000; i++) {
835570af302Sopenharmony_ci				c = hexval(s[i]);
836570af302Sopenharmony_ci				if (c < 0) break;
837570af302Sopenharmony_ci				v = 16*v + c;
838570af302Sopenharmony_ci			}
839570af302Sopenharmony_ci			s += i;
840570af302Sopenharmony_ci			if (len == 8) {
841570af302Sopenharmony_ci				if (*s != '}')
842570af302Sopenharmony_ci					return REG_EBRACE;
843570af302Sopenharmony_ci				s++;
844570af302Sopenharmony_ci			}
845570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
846570af302Sopenharmony_ci			s--;
847570af302Sopenharmony_ci			break;
848570af302Sopenharmony_ci		case '{':
849570af302Sopenharmony_ci		case '+':
850570af302Sopenharmony_ci		case '?':
851570af302Sopenharmony_ci			/* extension: treat \+, \? as repetitions in BRE */
852570af302Sopenharmony_ci			/* reject repetitions after empty expression in BRE */
853570af302Sopenharmony_ci			if (!ere)
854570af302Sopenharmony_ci				return REG_BADRPT;
855570af302Sopenharmony_ci		case '|':
856570af302Sopenharmony_ci			/* extension: treat \| as alternation in BRE */
857570af302Sopenharmony_ci			if (!ere) {
858570af302Sopenharmony_ci				node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
859570af302Sopenharmony_ci				s--;
860570af302Sopenharmony_ci				goto end;
861570af302Sopenharmony_ci			}
862570af302Sopenharmony_ci			/* fallthrough */
863570af302Sopenharmony_ci		default:
864570af302Sopenharmony_ci			if (!ere && (unsigned)*s-'1' < 9) {
865570af302Sopenharmony_ci				/* back reference */
866570af302Sopenharmony_ci				int val = *s - '0';
867570af302Sopenharmony_ci				node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
868570af302Sopenharmony_ci				ctx->max_backref = MAX(val, ctx->max_backref);
869570af302Sopenharmony_ci			} else {
870570af302Sopenharmony_ci				/* extension: accept unknown escaped char
871570af302Sopenharmony_ci				   as a literal */
872570af302Sopenharmony_ci				goto parse_literal;
873570af302Sopenharmony_ci			}
874570af302Sopenharmony_ci		}
875570af302Sopenharmony_ci		s++;
876570af302Sopenharmony_ci		break;
877570af302Sopenharmony_ci	case '.':
878570af302Sopenharmony_ci		if (ctx->cflags & REG_NEWLINE) {
879570af302Sopenharmony_ci			tre_ast_node_t *tmp1, *tmp2;
880570af302Sopenharmony_ci			tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
881570af302Sopenharmony_ci			tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
882570af302Sopenharmony_ci			if (tmp1 && tmp2)
883570af302Sopenharmony_ci				node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
884570af302Sopenharmony_ci			else
885570af302Sopenharmony_ci				node = 0;
886570af302Sopenharmony_ci		} else {
887570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
888570af302Sopenharmony_ci		}
889570af302Sopenharmony_ci		s++;
890570af302Sopenharmony_ci		break;
891570af302Sopenharmony_ci	case '^':
892570af302Sopenharmony_ci		/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
893570af302Sopenharmony_ci		if (!ere && s != ctx->start)
894570af302Sopenharmony_ci			goto parse_literal;
895570af302Sopenharmony_ci		node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
896570af302Sopenharmony_ci		s++;
897570af302Sopenharmony_ci		break;
898570af302Sopenharmony_ci	case '$':
899570af302Sopenharmony_ci		/* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */
900570af302Sopenharmony_ci		if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|')))
901570af302Sopenharmony_ci			goto parse_literal;
902570af302Sopenharmony_ci		node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
903570af302Sopenharmony_ci		s++;
904570af302Sopenharmony_ci		break;
905570af302Sopenharmony_ci	case '*':
906570af302Sopenharmony_ci	case '{':
907570af302Sopenharmony_ci	case '+':
908570af302Sopenharmony_ci	case '?':
909570af302Sopenharmony_ci		/* reject repetitions after empty expression in ERE */
910570af302Sopenharmony_ci		if (ere)
911570af302Sopenharmony_ci			return REG_BADRPT;
912570af302Sopenharmony_ci	case '|':
913570af302Sopenharmony_ci		if (!ere)
914570af302Sopenharmony_ci			goto parse_literal;
915570af302Sopenharmony_ci	case 0:
916570af302Sopenharmony_ci		node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
917570af302Sopenharmony_ci		break;
918570af302Sopenharmony_ci	default:
919570af302Sopenharmony_ciparse_literal:
920570af302Sopenharmony_ci		len = mbtowc(&wc, s, -1);
921570af302Sopenharmony_ci		if (len < 0)
922570af302Sopenharmony_ci			return REG_BADPAT;
923570af302Sopenharmony_ci		if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
924570af302Sopenharmony_ci			tre_ast_node_t *tmp1, *tmp2;
925570af302Sopenharmony_ci			/* multiple opposite case characters are not supported */
926570af302Sopenharmony_ci			tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
927570af302Sopenharmony_ci			tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
928570af302Sopenharmony_ci			if (tmp1 && tmp2)
929570af302Sopenharmony_ci				node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
930570af302Sopenharmony_ci			else
931570af302Sopenharmony_ci				node = 0;
932570af302Sopenharmony_ci		} else {
933570af302Sopenharmony_ci			node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
934570af302Sopenharmony_ci		}
935570af302Sopenharmony_ci		ctx->position++;
936570af302Sopenharmony_ci		s += len;
937570af302Sopenharmony_ci		break;
938570af302Sopenharmony_ci	}
939570af302Sopenharmony_ciend:
940570af302Sopenharmony_ci	if (!node)
941570af302Sopenharmony_ci		return REG_ESPACE;
942570af302Sopenharmony_ci	ctx->n = node;
943570af302Sopenharmony_ci	ctx->s = s;
944570af302Sopenharmony_ci	return REG_OK;
945570af302Sopenharmony_ci}
946570af302Sopenharmony_ci
947570af302Sopenharmony_ci#define PUSHPTR(err, s, v) do { \
948570af302Sopenharmony_ci	if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
949570af302Sopenharmony_ci		return err; \
950570af302Sopenharmony_ci} while(0)
951570af302Sopenharmony_ci
952570af302Sopenharmony_ci#define PUSHINT(err, s, v) do { \
953570af302Sopenharmony_ci	if ((err = tre_stack_push_int(s, v)) != REG_OK) \
954570af302Sopenharmony_ci		return err; \
955570af302Sopenharmony_ci} while(0)
956570af302Sopenharmony_ci
957570af302Sopenharmony_cistatic reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
958570af302Sopenharmony_ci{
959570af302Sopenharmony_ci	tre_ast_node_t *nbranch=0, *nunion=0;
960570af302Sopenharmony_ci	int ere = ctx->cflags & REG_EXTENDED;
961570af302Sopenharmony_ci	const char *s = ctx->start;
962570af302Sopenharmony_ci	int subid = 0;
963570af302Sopenharmony_ci	int depth = 0;
964570af302Sopenharmony_ci	reg_errcode_t err;
965570af302Sopenharmony_ci	tre_stack_t *stack = ctx->stack;
966570af302Sopenharmony_ci
967570af302Sopenharmony_ci	PUSHINT(err, stack, subid++);
968570af302Sopenharmony_ci	for (;;) {
969570af302Sopenharmony_ci		if ((!ere && *s == '\\' && s[1] == '(') ||
970570af302Sopenharmony_ci		    (ere && *s == '(')) {
971570af302Sopenharmony_ci			PUSHPTR(err, stack, nunion);
972570af302Sopenharmony_ci			PUSHPTR(err, stack, nbranch);
973570af302Sopenharmony_ci			PUSHINT(err, stack, subid++);
974570af302Sopenharmony_ci			s++;
975570af302Sopenharmony_ci			if (!ere)
976570af302Sopenharmony_ci				s++;
977570af302Sopenharmony_ci			depth++;
978570af302Sopenharmony_ci			nbranch = nunion = 0;
979570af302Sopenharmony_ci			ctx->start = s;
980570af302Sopenharmony_ci			continue;
981570af302Sopenharmony_ci		}
982570af302Sopenharmony_ci		if ((!ere && *s == '\\' && s[1] == ')') ||
983570af302Sopenharmony_ci		    (ere && *s == ')' && depth)) {
984570af302Sopenharmony_ci			ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
985570af302Sopenharmony_ci			if (!ctx->n)
986570af302Sopenharmony_ci				return REG_ESPACE;
987570af302Sopenharmony_ci		} else {
988570af302Sopenharmony_ci			err = parse_atom(ctx, s);
989570af302Sopenharmony_ci			if (err != REG_OK)
990570af302Sopenharmony_ci				return err;
991570af302Sopenharmony_ci			s = ctx->s;
992570af302Sopenharmony_ci		}
993570af302Sopenharmony_ci
994570af302Sopenharmony_ci	parse_iter:
995570af302Sopenharmony_ci		for (;;) {
996570af302Sopenharmony_ci			int min, max;
997570af302Sopenharmony_ci
998570af302Sopenharmony_ci			if (*s!='\\' && *s!='*') {
999570af302Sopenharmony_ci				if (!ere)
1000570af302Sopenharmony_ci					break;
1001570af302Sopenharmony_ci				if (*s!='+' && *s!='?' && *s!='{')
1002570af302Sopenharmony_ci					break;
1003570af302Sopenharmony_ci			}
1004570af302Sopenharmony_ci			if (*s=='\\' && ere)
1005570af302Sopenharmony_ci				break;
1006570af302Sopenharmony_ci			/* extension: treat \+, \? as repetitions in BRE */
1007570af302Sopenharmony_ci			if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
1008570af302Sopenharmony_ci				break;
1009570af302Sopenharmony_ci			if (*s=='\\')
1010570af302Sopenharmony_ci				s++;
1011570af302Sopenharmony_ci
1012570af302Sopenharmony_ci			/* handle ^* at the start of a BRE. */
1013570af302Sopenharmony_ci			if (!ere && s==ctx->start+1 && s[-1]=='^')
1014570af302Sopenharmony_ci				break;
1015570af302Sopenharmony_ci
1016570af302Sopenharmony_ci			/* extension: multiple consecutive *+?{,} is unspecified,
1017570af302Sopenharmony_ci			   but (a+)+ has to be supported so accepting a++ makes
1018570af302Sopenharmony_ci			   sense, note however that the RE_DUP_MAX limit can be
1019570af302Sopenharmony_ci			   circumvented: (a{255}){255} uses a lot of memory.. */
1020570af302Sopenharmony_ci			if (*s=='{') {
1021570af302Sopenharmony_ci				s = parse_dup(s+1, ere, &min, &max);
1022570af302Sopenharmony_ci				if (!s)
1023570af302Sopenharmony_ci					return REG_BADBR;
1024570af302Sopenharmony_ci			} else {
1025570af302Sopenharmony_ci				min=0;
1026570af302Sopenharmony_ci				max=-1;
1027570af302Sopenharmony_ci				if (*s == '+')
1028570af302Sopenharmony_ci					min = 1;
1029570af302Sopenharmony_ci				if (*s == '?')
1030570af302Sopenharmony_ci					max = 1;
1031570af302Sopenharmony_ci				s++;
1032570af302Sopenharmony_ci			}
1033570af302Sopenharmony_ci			if (max == 0)
1034570af302Sopenharmony_ci				ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1035570af302Sopenharmony_ci			else
1036570af302Sopenharmony_ci				ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1037570af302Sopenharmony_ci			if (!ctx->n)
1038570af302Sopenharmony_ci				return REG_ESPACE;
1039570af302Sopenharmony_ci		}
1040570af302Sopenharmony_ci
1041570af302Sopenharmony_ci		nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1042570af302Sopenharmony_ci		if ((ere && *s == '|') ||
1043570af302Sopenharmony_ci		    (ere && *s == ')' && depth) ||
1044570af302Sopenharmony_ci		    (!ere && *s == '\\' && s[1] == ')') ||
1045570af302Sopenharmony_ci		    /* extension: treat \| as alternation in BRE */
1046570af302Sopenharmony_ci		    (!ere && *s == '\\' && s[1] == '|') ||
1047570af302Sopenharmony_ci		    !*s) {
1048570af302Sopenharmony_ci			/* extension: empty branch is unspecified (), (|a), (a|)
1049570af302Sopenharmony_ci			   here they are not rejected but match on empty string */
1050570af302Sopenharmony_ci			int c = *s;
1051570af302Sopenharmony_ci			nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1052570af302Sopenharmony_ci			nbranch = 0;
1053570af302Sopenharmony_ci
1054570af302Sopenharmony_ci			if (c == '\\' && s[1] == '|') {
1055570af302Sopenharmony_ci				s+=2;
1056570af302Sopenharmony_ci				ctx->start = s;
1057570af302Sopenharmony_ci			} else if (c == '|') {
1058570af302Sopenharmony_ci				s++;
1059570af302Sopenharmony_ci				ctx->start = s;
1060570af302Sopenharmony_ci			} else {
1061570af302Sopenharmony_ci				if (c == '\\') {
1062570af302Sopenharmony_ci					if (!depth) return REG_EPAREN;
1063570af302Sopenharmony_ci					s+=2;
1064570af302Sopenharmony_ci				} else if (c == ')')
1065570af302Sopenharmony_ci					s++;
1066570af302Sopenharmony_ci				depth--;
1067570af302Sopenharmony_ci				err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1068570af302Sopenharmony_ci				if (err != REG_OK)
1069570af302Sopenharmony_ci					return err;
1070570af302Sopenharmony_ci				if (!c && depth<0) {
1071570af302Sopenharmony_ci					ctx->submatch_id = subid;
1072570af302Sopenharmony_ci					return REG_OK;
1073570af302Sopenharmony_ci				}
1074570af302Sopenharmony_ci				if (!c || depth<0)
1075570af302Sopenharmony_ci					return REG_EPAREN;
1076570af302Sopenharmony_ci				nbranch = tre_stack_pop_voidptr(stack);
1077570af302Sopenharmony_ci				nunion = tre_stack_pop_voidptr(stack);
1078570af302Sopenharmony_ci				goto parse_iter;
1079570af302Sopenharmony_ci			}
1080570af302Sopenharmony_ci		}
1081570af302Sopenharmony_ci	}
1082570af302Sopenharmony_ci}
1083570af302Sopenharmony_ci
1084570af302Sopenharmony_ci
1085570af302Sopenharmony_ci/***********************************************************************
1086570af302Sopenharmony_ci from tre-compile.c
1087570af302Sopenharmony_ci***********************************************************************/
1088570af302Sopenharmony_ci
1089570af302Sopenharmony_ci
1090570af302Sopenharmony_ci/*
1091570af302Sopenharmony_ci  TODO:
1092570af302Sopenharmony_ci   - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1093570af302Sopenharmony_ci     function calls.
1094570af302Sopenharmony_ci*/
1095570af302Sopenharmony_ci
1096570af302Sopenharmony_ci/*
1097570af302Sopenharmony_ci  Algorithms to setup tags so that submatch addressing can be done.
1098570af302Sopenharmony_ci*/
1099570af302Sopenharmony_ci
1100570af302Sopenharmony_ci
1101570af302Sopenharmony_ci/* Inserts a catenation node to the root of the tree given in `node'.
1102570af302Sopenharmony_ci   As the left child a new tag with number `tag_id' to `node' is added,
1103570af302Sopenharmony_ci   and the right child is the old root. */
1104570af302Sopenharmony_cistatic reg_errcode_t
1105570af302Sopenharmony_citre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1106570af302Sopenharmony_ci{
1107570af302Sopenharmony_ci  tre_catenation_t *c;
1108570af302Sopenharmony_ci
1109570af302Sopenharmony_ci  c = tre_mem_alloc(mem, sizeof(*c));
1110570af302Sopenharmony_ci  if (c == NULL)
1111570af302Sopenharmony_ci    return REG_ESPACE;
1112570af302Sopenharmony_ci  c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1113570af302Sopenharmony_ci  if (c->left == NULL)
1114570af302Sopenharmony_ci    return REG_ESPACE;
1115570af302Sopenharmony_ci  c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1116570af302Sopenharmony_ci  if (c->right == NULL)
1117570af302Sopenharmony_ci    return REG_ESPACE;
1118570af302Sopenharmony_ci
1119570af302Sopenharmony_ci  c->right->obj = node->obj;
1120570af302Sopenharmony_ci  c->right->type = node->type;
1121570af302Sopenharmony_ci  c->right->nullable = -1;
1122570af302Sopenharmony_ci  c->right->submatch_id = -1;
1123570af302Sopenharmony_ci  c->right->firstpos = NULL;
1124570af302Sopenharmony_ci  c->right->lastpos = NULL;
1125570af302Sopenharmony_ci  c->right->num_tags = 0;
1126570af302Sopenharmony_ci  c->right->num_submatches = 0;
1127570af302Sopenharmony_ci  node->obj = c;
1128570af302Sopenharmony_ci  node->type = CATENATION;
1129570af302Sopenharmony_ci  return REG_OK;
1130570af302Sopenharmony_ci}
1131570af302Sopenharmony_ci
1132570af302Sopenharmony_ci/* Inserts a catenation node to the root of the tree given in `node'.
1133570af302Sopenharmony_ci   As the right child a new tag with number `tag_id' to `node' is added,
1134570af302Sopenharmony_ci   and the left child is the old root. */
1135570af302Sopenharmony_cistatic reg_errcode_t
1136570af302Sopenharmony_citre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1137570af302Sopenharmony_ci{
1138570af302Sopenharmony_ci  tre_catenation_t *c;
1139570af302Sopenharmony_ci
1140570af302Sopenharmony_ci  c = tre_mem_alloc(mem, sizeof(*c));
1141570af302Sopenharmony_ci  if (c == NULL)
1142570af302Sopenharmony_ci    return REG_ESPACE;
1143570af302Sopenharmony_ci  c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1144570af302Sopenharmony_ci  if (c->right == NULL)
1145570af302Sopenharmony_ci    return REG_ESPACE;
1146570af302Sopenharmony_ci  c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1147570af302Sopenharmony_ci  if (c->left == NULL)
1148570af302Sopenharmony_ci    return REG_ESPACE;
1149570af302Sopenharmony_ci
1150570af302Sopenharmony_ci  c->left->obj = node->obj;
1151570af302Sopenharmony_ci  c->left->type = node->type;
1152570af302Sopenharmony_ci  c->left->nullable = -1;
1153570af302Sopenharmony_ci  c->left->submatch_id = -1;
1154570af302Sopenharmony_ci  c->left->firstpos = NULL;
1155570af302Sopenharmony_ci  c->left->lastpos = NULL;
1156570af302Sopenharmony_ci  c->left->num_tags = 0;
1157570af302Sopenharmony_ci  c->left->num_submatches = 0;
1158570af302Sopenharmony_ci  node->obj = c;
1159570af302Sopenharmony_ci  node->type = CATENATION;
1160570af302Sopenharmony_ci  return REG_OK;
1161570af302Sopenharmony_ci}
1162570af302Sopenharmony_ci
1163570af302Sopenharmony_citypedef enum {
1164570af302Sopenharmony_ci  ADDTAGS_RECURSE,
1165570af302Sopenharmony_ci  ADDTAGS_AFTER_ITERATION,
1166570af302Sopenharmony_ci  ADDTAGS_AFTER_UNION_LEFT,
1167570af302Sopenharmony_ci  ADDTAGS_AFTER_UNION_RIGHT,
1168570af302Sopenharmony_ci  ADDTAGS_AFTER_CAT_LEFT,
1169570af302Sopenharmony_ci  ADDTAGS_AFTER_CAT_RIGHT,
1170570af302Sopenharmony_ci  ADDTAGS_SET_SUBMATCH_END
1171570af302Sopenharmony_ci} tre_addtags_symbol_t;
1172570af302Sopenharmony_ci
1173570af302Sopenharmony_ci
1174570af302Sopenharmony_citypedef struct {
1175570af302Sopenharmony_ci  int tag;
1176570af302Sopenharmony_ci  int next_tag;
1177570af302Sopenharmony_ci} tre_tag_states_t;
1178570af302Sopenharmony_ci
1179570af302Sopenharmony_ci
1180570af302Sopenharmony_ci/* Go through `regset' and set submatch data for submatches that are
1181570af302Sopenharmony_ci   using this tag. */
1182570af302Sopenharmony_cistatic void
1183570af302Sopenharmony_citre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1184570af302Sopenharmony_ci{
1185570af302Sopenharmony_ci  int i;
1186570af302Sopenharmony_ci
1187570af302Sopenharmony_ci  for (i = 0; regset[i] >= 0; i++)
1188570af302Sopenharmony_ci    {
1189570af302Sopenharmony_ci      int id = regset[i] / 2;
1190570af302Sopenharmony_ci      int start = !(regset[i] % 2);
1191570af302Sopenharmony_ci      if (start)
1192570af302Sopenharmony_ci	tnfa->submatch_data[id].so_tag = tag;
1193570af302Sopenharmony_ci      else
1194570af302Sopenharmony_ci	tnfa->submatch_data[id].eo_tag = tag;
1195570af302Sopenharmony_ci    }
1196570af302Sopenharmony_ci  regset[0] = -1;
1197570af302Sopenharmony_ci}
1198570af302Sopenharmony_ci
1199570af302Sopenharmony_ci
1200570af302Sopenharmony_ci/* Adds tags to appropriate locations in the parse tree in `tree', so that
1201570af302Sopenharmony_ci   subexpressions marked for submatch addressing can be traced. */
1202570af302Sopenharmony_cistatic reg_errcode_t
1203570af302Sopenharmony_citre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1204570af302Sopenharmony_ci	     tre_tnfa_t *tnfa)
1205570af302Sopenharmony_ci{
1206570af302Sopenharmony_ci  reg_errcode_t status = REG_OK;
1207570af302Sopenharmony_ci  tre_addtags_symbol_t symbol;
1208570af302Sopenharmony_ci  tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1209570af302Sopenharmony_ci  int bottom = tre_stack_num_objects(stack);
1210570af302Sopenharmony_ci  /* True for first pass (counting number of needed tags) */
1211570af302Sopenharmony_ci  int first_pass = (mem == NULL || tnfa == NULL);
1212570af302Sopenharmony_ci  int *regset, *orig_regset;
1213570af302Sopenharmony_ci  int num_tags = 0; /* Total number of tags. */
1214570af302Sopenharmony_ci  int num_minimals = 0;	 /* Number of special minimal tags. */
1215570af302Sopenharmony_ci  int tag = 0;	    /* The tag that is to be added next. */
1216570af302Sopenharmony_ci  int next_tag = 1; /* Next tag to use after this one. */
1217570af302Sopenharmony_ci  int *parents;	    /* Stack of submatches the current submatch is
1218570af302Sopenharmony_ci		       contained in. */
1219570af302Sopenharmony_ci  int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1220570af302Sopenharmony_ci  tre_tag_states_t *saved_states;
1221570af302Sopenharmony_ci
1222570af302Sopenharmony_ci  tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1223570af302Sopenharmony_ci  if (!first_pass)
1224570af302Sopenharmony_ci    {
1225570af302Sopenharmony_ci      tnfa->end_tag = 0;
1226570af302Sopenharmony_ci      tnfa->minimal_tags[0] = -1;
1227570af302Sopenharmony_ci    }
1228570af302Sopenharmony_ci
1229570af302Sopenharmony_ci  regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1230570af302Sopenharmony_ci  if (regset == NULL)
1231570af302Sopenharmony_ci    return REG_ESPACE;
1232570af302Sopenharmony_ci  regset[0] = -1;
1233570af302Sopenharmony_ci  orig_regset = regset;
1234570af302Sopenharmony_ci
1235570af302Sopenharmony_ci  parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1236570af302Sopenharmony_ci  if (parents == NULL)
1237570af302Sopenharmony_ci    {
1238570af302Sopenharmony_ci      xfree(regset);
1239570af302Sopenharmony_ci      return REG_ESPACE;
1240570af302Sopenharmony_ci    }
1241570af302Sopenharmony_ci  parents[0] = -1;
1242570af302Sopenharmony_ci
1243570af302Sopenharmony_ci  saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1244570af302Sopenharmony_ci  if (saved_states == NULL)
1245570af302Sopenharmony_ci    {
1246570af302Sopenharmony_ci      xfree(regset);
1247570af302Sopenharmony_ci      xfree(parents);
1248570af302Sopenharmony_ci      return REG_ESPACE;
1249570af302Sopenharmony_ci    }
1250570af302Sopenharmony_ci  else
1251570af302Sopenharmony_ci    {
1252570af302Sopenharmony_ci      unsigned int i;
1253570af302Sopenharmony_ci      for (i = 0; i <= tnfa->num_submatches; i++)
1254570af302Sopenharmony_ci	saved_states[i].tag = -1;
1255570af302Sopenharmony_ci    }
1256570af302Sopenharmony_ci
1257570af302Sopenharmony_ci  STACK_PUSH(stack, voidptr, node);
1258570af302Sopenharmony_ci  STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1259570af302Sopenharmony_ci
1260570af302Sopenharmony_ci  while (tre_stack_num_objects(stack) > bottom)
1261570af302Sopenharmony_ci    {
1262570af302Sopenharmony_ci      if (status != REG_OK)
1263570af302Sopenharmony_ci	break;
1264570af302Sopenharmony_ci
1265570af302Sopenharmony_ci      symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1266570af302Sopenharmony_ci      switch (symbol)
1267570af302Sopenharmony_ci	{
1268570af302Sopenharmony_ci
1269570af302Sopenharmony_ci	case ADDTAGS_SET_SUBMATCH_END:
1270570af302Sopenharmony_ci	  {
1271570af302Sopenharmony_ci	    int id = tre_stack_pop_int(stack);
1272570af302Sopenharmony_ci	    int i;
1273570af302Sopenharmony_ci
1274570af302Sopenharmony_ci	    /* Add end of this submatch to regset. */
1275570af302Sopenharmony_ci	    for (i = 0; regset[i] >= 0; i++);
1276570af302Sopenharmony_ci	    regset[i] = id * 2 + 1;
1277570af302Sopenharmony_ci	    regset[i + 1] = -1;
1278570af302Sopenharmony_ci
1279570af302Sopenharmony_ci	    /* Pop this submatch from the parents stack. */
1280570af302Sopenharmony_ci	    for (i = 0; parents[i] >= 0; i++);
1281570af302Sopenharmony_ci	    parents[i - 1] = -1;
1282570af302Sopenharmony_ci	    break;
1283570af302Sopenharmony_ci	  }
1284570af302Sopenharmony_ci
1285570af302Sopenharmony_ci	case ADDTAGS_RECURSE:
1286570af302Sopenharmony_ci	  node = tre_stack_pop_voidptr(stack);
1287570af302Sopenharmony_ci
1288570af302Sopenharmony_ci	  if (node->submatch_id >= 0)
1289570af302Sopenharmony_ci	    {
1290570af302Sopenharmony_ci	      int id = node->submatch_id;
1291570af302Sopenharmony_ci	      int i;
1292570af302Sopenharmony_ci
1293570af302Sopenharmony_ci
1294570af302Sopenharmony_ci	      /* Add start of this submatch to regset. */
1295570af302Sopenharmony_ci	      for (i = 0; regset[i] >= 0; i++);
1296570af302Sopenharmony_ci	      regset[i] = id * 2;
1297570af302Sopenharmony_ci	      regset[i + 1] = -1;
1298570af302Sopenharmony_ci
1299570af302Sopenharmony_ci	      if (!first_pass)
1300570af302Sopenharmony_ci		{
1301570af302Sopenharmony_ci		  for (i = 0; parents[i] >= 0; i++);
1302570af302Sopenharmony_ci		  tnfa->submatch_data[id].parents = NULL;
1303570af302Sopenharmony_ci		  if (i > 0)
1304570af302Sopenharmony_ci		    {
1305570af302Sopenharmony_ci		      int *p = xmalloc(sizeof(*p) * (i + 1));
1306570af302Sopenharmony_ci		      if (p == NULL)
1307570af302Sopenharmony_ci			{
1308570af302Sopenharmony_ci			  status = REG_ESPACE;
1309570af302Sopenharmony_ci			  break;
1310570af302Sopenharmony_ci			}
1311570af302Sopenharmony_ci		      assert(tnfa->submatch_data[id].parents == NULL);
1312570af302Sopenharmony_ci		      tnfa->submatch_data[id].parents = p;
1313570af302Sopenharmony_ci		      for (i = 0; parents[i] >= 0; i++)
1314570af302Sopenharmony_ci			p[i] = parents[i];
1315570af302Sopenharmony_ci		      p[i] = -1;
1316570af302Sopenharmony_ci		    }
1317570af302Sopenharmony_ci		}
1318570af302Sopenharmony_ci
1319570af302Sopenharmony_ci	      /* Add end of this submatch to regset after processing this
1320570af302Sopenharmony_ci		 node. */
1321570af302Sopenharmony_ci	      STACK_PUSHX(stack, int, node->submatch_id);
1322570af302Sopenharmony_ci	      STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1323570af302Sopenharmony_ci	    }
1324570af302Sopenharmony_ci
1325570af302Sopenharmony_ci	  switch (node->type)
1326570af302Sopenharmony_ci	    {
1327570af302Sopenharmony_ci	    case LITERAL:
1328570af302Sopenharmony_ci	      {
1329570af302Sopenharmony_ci		tre_literal_t *lit = node->obj;
1330570af302Sopenharmony_ci
1331570af302Sopenharmony_ci		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1332570af302Sopenharmony_ci		  {
1333570af302Sopenharmony_ci		    int i;
1334570af302Sopenharmony_ci		    if (regset[0] >= 0)
1335570af302Sopenharmony_ci		      {
1336570af302Sopenharmony_ci			/* Regset is not empty, so add a tag before the
1337570af302Sopenharmony_ci			   literal or backref. */
1338570af302Sopenharmony_ci			if (!first_pass)
1339570af302Sopenharmony_ci			  {
1340570af302Sopenharmony_ci			    status = tre_add_tag_left(mem, node, tag);
1341570af302Sopenharmony_ci			    tnfa->tag_directions[tag] = direction;
1342570af302Sopenharmony_ci			    if (minimal_tag >= 0)
1343570af302Sopenharmony_ci			      {
1344570af302Sopenharmony_ci				for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1345570af302Sopenharmony_ci				tnfa->minimal_tags[i] = tag;
1346570af302Sopenharmony_ci				tnfa->minimal_tags[i + 1] = minimal_tag;
1347570af302Sopenharmony_ci				tnfa->minimal_tags[i + 2] = -1;
1348570af302Sopenharmony_ci				minimal_tag = -1;
1349570af302Sopenharmony_ci				num_minimals++;
1350570af302Sopenharmony_ci			      }
1351570af302Sopenharmony_ci			    tre_purge_regset(regset, tnfa, tag);
1352570af302Sopenharmony_ci			  }
1353570af302Sopenharmony_ci			else
1354570af302Sopenharmony_ci			  {
1355570af302Sopenharmony_ci			    node->num_tags = 1;
1356570af302Sopenharmony_ci			  }
1357570af302Sopenharmony_ci
1358570af302Sopenharmony_ci			regset[0] = -1;
1359570af302Sopenharmony_ci			tag = next_tag;
1360570af302Sopenharmony_ci			num_tags++;
1361570af302Sopenharmony_ci			next_tag++;
1362570af302Sopenharmony_ci		      }
1363570af302Sopenharmony_ci		  }
1364570af302Sopenharmony_ci		else
1365570af302Sopenharmony_ci		  {
1366570af302Sopenharmony_ci		    assert(!IS_TAG(lit));
1367570af302Sopenharmony_ci		  }
1368570af302Sopenharmony_ci		break;
1369570af302Sopenharmony_ci	      }
1370570af302Sopenharmony_ci	    case CATENATION:
1371570af302Sopenharmony_ci	      {
1372570af302Sopenharmony_ci		tre_catenation_t *cat = node->obj;
1373570af302Sopenharmony_ci		tre_ast_node_t *left = cat->left;
1374570af302Sopenharmony_ci		tre_ast_node_t *right = cat->right;
1375570af302Sopenharmony_ci		int reserved_tag = -1;
1376570af302Sopenharmony_ci
1377570af302Sopenharmony_ci
1378570af302Sopenharmony_ci		/* After processing right child. */
1379570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, node);
1380570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1381570af302Sopenharmony_ci
1382570af302Sopenharmony_ci		/* Process right child. */
1383570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, right);
1384570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1385570af302Sopenharmony_ci
1386570af302Sopenharmony_ci		/* After processing left child. */
1387570af302Sopenharmony_ci		STACK_PUSHX(stack, int, next_tag + left->num_tags);
1388570af302Sopenharmony_ci		if (left->num_tags > 0 && right->num_tags > 0)
1389570af302Sopenharmony_ci		  {
1390570af302Sopenharmony_ci		    /* Reserve the next tag to the right child. */
1391570af302Sopenharmony_ci		    reserved_tag = next_tag;
1392570af302Sopenharmony_ci		    next_tag++;
1393570af302Sopenharmony_ci		  }
1394570af302Sopenharmony_ci		STACK_PUSHX(stack, int, reserved_tag);
1395570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1396570af302Sopenharmony_ci
1397570af302Sopenharmony_ci		/* Process left child. */
1398570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, left);
1399570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1400570af302Sopenharmony_ci
1401570af302Sopenharmony_ci		}
1402570af302Sopenharmony_ci	      break;
1403570af302Sopenharmony_ci	    case ITERATION:
1404570af302Sopenharmony_ci	      {
1405570af302Sopenharmony_ci		tre_iteration_t *iter = node->obj;
1406570af302Sopenharmony_ci
1407570af302Sopenharmony_ci		if (first_pass)
1408570af302Sopenharmony_ci		  {
1409570af302Sopenharmony_ci		    STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1410570af302Sopenharmony_ci		  }
1411570af302Sopenharmony_ci		else
1412570af302Sopenharmony_ci		  {
1413570af302Sopenharmony_ci		    STACK_PUSHX(stack, int, tag);
1414570af302Sopenharmony_ci		    STACK_PUSHX(stack, int, iter->minimal);
1415570af302Sopenharmony_ci		  }
1416570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, node);
1417570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1418570af302Sopenharmony_ci
1419570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, iter->arg);
1420570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1421570af302Sopenharmony_ci
1422570af302Sopenharmony_ci		/* Regset is not empty, so add a tag here. */
1423570af302Sopenharmony_ci		if (regset[0] >= 0 || iter->minimal)
1424570af302Sopenharmony_ci		  {
1425570af302Sopenharmony_ci		    if (!first_pass)
1426570af302Sopenharmony_ci		      {
1427570af302Sopenharmony_ci			int i;
1428570af302Sopenharmony_ci			status = tre_add_tag_left(mem, node, tag);
1429570af302Sopenharmony_ci			if (iter->minimal)
1430570af302Sopenharmony_ci			  tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1431570af302Sopenharmony_ci			else
1432570af302Sopenharmony_ci			  tnfa->tag_directions[tag] = direction;
1433570af302Sopenharmony_ci			if (minimal_tag >= 0)
1434570af302Sopenharmony_ci			  {
1435570af302Sopenharmony_ci			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1436570af302Sopenharmony_ci			    tnfa->minimal_tags[i] = tag;
1437570af302Sopenharmony_ci			    tnfa->minimal_tags[i + 1] = minimal_tag;
1438570af302Sopenharmony_ci			    tnfa->minimal_tags[i + 2] = -1;
1439570af302Sopenharmony_ci			    minimal_tag = -1;
1440570af302Sopenharmony_ci			    num_minimals++;
1441570af302Sopenharmony_ci			  }
1442570af302Sopenharmony_ci			tre_purge_regset(regset, tnfa, tag);
1443570af302Sopenharmony_ci		      }
1444570af302Sopenharmony_ci
1445570af302Sopenharmony_ci		    regset[0] = -1;
1446570af302Sopenharmony_ci		    tag = next_tag;
1447570af302Sopenharmony_ci		    num_tags++;
1448570af302Sopenharmony_ci		    next_tag++;
1449570af302Sopenharmony_ci		  }
1450570af302Sopenharmony_ci		direction = TRE_TAG_MINIMIZE;
1451570af302Sopenharmony_ci	      }
1452570af302Sopenharmony_ci	      break;
1453570af302Sopenharmony_ci	    case UNION:
1454570af302Sopenharmony_ci	      {
1455570af302Sopenharmony_ci		tre_union_t *uni = node->obj;
1456570af302Sopenharmony_ci		tre_ast_node_t *left = uni->left;
1457570af302Sopenharmony_ci		tre_ast_node_t *right = uni->right;
1458570af302Sopenharmony_ci		int left_tag;
1459570af302Sopenharmony_ci		int right_tag;
1460570af302Sopenharmony_ci
1461570af302Sopenharmony_ci		if (regset[0] >= 0)
1462570af302Sopenharmony_ci		  {
1463570af302Sopenharmony_ci		    left_tag = next_tag;
1464570af302Sopenharmony_ci		    right_tag = next_tag + 1;
1465570af302Sopenharmony_ci		  }
1466570af302Sopenharmony_ci		else
1467570af302Sopenharmony_ci		  {
1468570af302Sopenharmony_ci		    left_tag = tag;
1469570af302Sopenharmony_ci		    right_tag = next_tag;
1470570af302Sopenharmony_ci		  }
1471570af302Sopenharmony_ci
1472570af302Sopenharmony_ci		/* After processing right child. */
1473570af302Sopenharmony_ci		STACK_PUSHX(stack, int, right_tag);
1474570af302Sopenharmony_ci		STACK_PUSHX(stack, int, left_tag);
1475570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, regset);
1476570af302Sopenharmony_ci		STACK_PUSHX(stack, int, regset[0] >= 0);
1477570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, node);
1478570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, right);
1479570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, left);
1480570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1481570af302Sopenharmony_ci
1482570af302Sopenharmony_ci		/* Process right child. */
1483570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, right);
1484570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1485570af302Sopenharmony_ci
1486570af302Sopenharmony_ci		/* After processing left child. */
1487570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1488570af302Sopenharmony_ci
1489570af302Sopenharmony_ci		/* Process left child. */
1490570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, left);
1491570af302Sopenharmony_ci		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1492570af302Sopenharmony_ci
1493570af302Sopenharmony_ci		/* Regset is not empty, so add a tag here. */
1494570af302Sopenharmony_ci		if (regset[0] >= 0)
1495570af302Sopenharmony_ci		  {
1496570af302Sopenharmony_ci		    if (!first_pass)
1497570af302Sopenharmony_ci		      {
1498570af302Sopenharmony_ci			int i;
1499570af302Sopenharmony_ci			status = tre_add_tag_left(mem, node, tag);
1500570af302Sopenharmony_ci			tnfa->tag_directions[tag] = direction;
1501570af302Sopenharmony_ci			if (minimal_tag >= 0)
1502570af302Sopenharmony_ci			  {
1503570af302Sopenharmony_ci			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1504570af302Sopenharmony_ci			    tnfa->minimal_tags[i] = tag;
1505570af302Sopenharmony_ci			    tnfa->minimal_tags[i + 1] = minimal_tag;
1506570af302Sopenharmony_ci			    tnfa->minimal_tags[i + 2] = -1;
1507570af302Sopenharmony_ci			    minimal_tag = -1;
1508570af302Sopenharmony_ci			    num_minimals++;
1509570af302Sopenharmony_ci			  }
1510570af302Sopenharmony_ci			tre_purge_regset(regset, tnfa, tag);
1511570af302Sopenharmony_ci		      }
1512570af302Sopenharmony_ci
1513570af302Sopenharmony_ci		    regset[0] = -1;
1514570af302Sopenharmony_ci		    tag = next_tag;
1515570af302Sopenharmony_ci		    num_tags++;
1516570af302Sopenharmony_ci		    next_tag++;
1517570af302Sopenharmony_ci		  }
1518570af302Sopenharmony_ci
1519570af302Sopenharmony_ci		if (node->num_submatches > 0)
1520570af302Sopenharmony_ci		  {
1521570af302Sopenharmony_ci		    /* The next two tags are reserved for markers. */
1522570af302Sopenharmony_ci		    next_tag++;
1523570af302Sopenharmony_ci		    tag = next_tag;
1524570af302Sopenharmony_ci		    next_tag++;
1525570af302Sopenharmony_ci		  }
1526570af302Sopenharmony_ci
1527570af302Sopenharmony_ci		break;
1528570af302Sopenharmony_ci	      }
1529570af302Sopenharmony_ci	    }
1530570af302Sopenharmony_ci
1531570af302Sopenharmony_ci	  if (node->submatch_id >= 0)
1532570af302Sopenharmony_ci	    {
1533570af302Sopenharmony_ci	      int i;
1534570af302Sopenharmony_ci	      /* Push this submatch on the parents stack. */
1535570af302Sopenharmony_ci	      for (i = 0; parents[i] >= 0; i++);
1536570af302Sopenharmony_ci	      parents[i] = node->submatch_id;
1537570af302Sopenharmony_ci	      parents[i + 1] = -1;
1538570af302Sopenharmony_ci	    }
1539570af302Sopenharmony_ci
1540570af302Sopenharmony_ci	  break; /* end case: ADDTAGS_RECURSE */
1541570af302Sopenharmony_ci
1542570af302Sopenharmony_ci	case ADDTAGS_AFTER_ITERATION:
1543570af302Sopenharmony_ci	  {
1544570af302Sopenharmony_ci	    int minimal = 0;
1545570af302Sopenharmony_ci	    int enter_tag;
1546570af302Sopenharmony_ci	    node = tre_stack_pop_voidptr(stack);
1547570af302Sopenharmony_ci	    if (first_pass)
1548570af302Sopenharmony_ci	      {
1549570af302Sopenharmony_ci		node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1550570af302Sopenharmony_ci		  + tre_stack_pop_int(stack);
1551570af302Sopenharmony_ci		minimal_tag = -1;
1552570af302Sopenharmony_ci	      }
1553570af302Sopenharmony_ci	    else
1554570af302Sopenharmony_ci	      {
1555570af302Sopenharmony_ci		minimal = tre_stack_pop_int(stack);
1556570af302Sopenharmony_ci		enter_tag = tre_stack_pop_int(stack);
1557570af302Sopenharmony_ci		if (minimal)
1558570af302Sopenharmony_ci		  minimal_tag = enter_tag;
1559570af302Sopenharmony_ci	      }
1560570af302Sopenharmony_ci
1561570af302Sopenharmony_ci	    if (!first_pass)
1562570af302Sopenharmony_ci	      {
1563570af302Sopenharmony_ci		if (minimal)
1564570af302Sopenharmony_ci		  direction = TRE_TAG_MINIMIZE;
1565570af302Sopenharmony_ci		else
1566570af302Sopenharmony_ci		  direction = TRE_TAG_MAXIMIZE;
1567570af302Sopenharmony_ci	      }
1568570af302Sopenharmony_ci	    break;
1569570af302Sopenharmony_ci	  }
1570570af302Sopenharmony_ci
1571570af302Sopenharmony_ci	case ADDTAGS_AFTER_CAT_LEFT:
1572570af302Sopenharmony_ci	  {
1573570af302Sopenharmony_ci	    int new_tag = tre_stack_pop_int(stack);
1574570af302Sopenharmony_ci	    next_tag = tre_stack_pop_int(stack);
1575570af302Sopenharmony_ci	    if (new_tag >= 0)
1576570af302Sopenharmony_ci	      {
1577570af302Sopenharmony_ci		tag = new_tag;
1578570af302Sopenharmony_ci	      }
1579570af302Sopenharmony_ci	    break;
1580570af302Sopenharmony_ci	  }
1581570af302Sopenharmony_ci
1582570af302Sopenharmony_ci	case ADDTAGS_AFTER_CAT_RIGHT:
1583570af302Sopenharmony_ci	  node = tre_stack_pop_voidptr(stack);
1584570af302Sopenharmony_ci	  if (first_pass)
1585570af302Sopenharmony_ci	    node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1586570af302Sopenharmony_ci	      + ((tre_catenation_t *)node->obj)->right->num_tags;
1587570af302Sopenharmony_ci	  break;
1588570af302Sopenharmony_ci
1589570af302Sopenharmony_ci	case ADDTAGS_AFTER_UNION_LEFT:
1590570af302Sopenharmony_ci	  /* Lift the bottom of the `regset' array so that when processing
1591570af302Sopenharmony_ci	     the right operand the items currently in the array are
1592570af302Sopenharmony_ci	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
1593570af302Sopenharmony_ci	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1594570af302Sopenharmony_ci	  while (*regset >= 0)
1595570af302Sopenharmony_ci	    regset++;
1596570af302Sopenharmony_ci	  break;
1597570af302Sopenharmony_ci
1598570af302Sopenharmony_ci	case ADDTAGS_AFTER_UNION_RIGHT:
1599570af302Sopenharmony_ci	  {
1600570af302Sopenharmony_ci	    int added_tags, tag_left, tag_right;
1601570af302Sopenharmony_ci	    tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1602570af302Sopenharmony_ci	    tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1603570af302Sopenharmony_ci	    node = tre_stack_pop_voidptr(stack);
1604570af302Sopenharmony_ci	    added_tags = tre_stack_pop_int(stack);
1605570af302Sopenharmony_ci	    if (first_pass)
1606570af302Sopenharmony_ci	      {
1607570af302Sopenharmony_ci		node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1608570af302Sopenharmony_ci		  + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1609570af302Sopenharmony_ci		  + ((node->num_submatches > 0) ? 2 : 0);
1610570af302Sopenharmony_ci	      }
1611570af302Sopenharmony_ci	    regset = tre_stack_pop_voidptr(stack);
1612570af302Sopenharmony_ci	    tag_left = tre_stack_pop_int(stack);
1613570af302Sopenharmony_ci	    tag_right = tre_stack_pop_int(stack);
1614570af302Sopenharmony_ci
1615570af302Sopenharmony_ci	    /* Add tags after both children, the left child gets a smaller
1616570af302Sopenharmony_ci	       tag than the right child.  This guarantees that we prefer
1617570af302Sopenharmony_ci	       the left child over the right child. */
1618570af302Sopenharmony_ci	    /* XXX - This is not always necessary (if the children have
1619570af302Sopenharmony_ci	       tags which must be seen for every match of that child). */
1620570af302Sopenharmony_ci	    /* XXX - Check if this is the only place where tre_add_tag_right
1621570af302Sopenharmony_ci	       is used.	 If so, use tre_add_tag_left (putting the tag before
1622570af302Sopenharmony_ci	       the child as opposed after the child) and throw away
1623570af302Sopenharmony_ci	       tre_add_tag_right. */
1624570af302Sopenharmony_ci	    if (node->num_submatches > 0)
1625570af302Sopenharmony_ci	      {
1626570af302Sopenharmony_ci		if (!first_pass)
1627570af302Sopenharmony_ci		  {
1628570af302Sopenharmony_ci		    status = tre_add_tag_right(mem, left, tag_left);
1629570af302Sopenharmony_ci		    tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1630570af302Sopenharmony_ci		    if (status == REG_OK)
1631570af302Sopenharmony_ci		      status = tre_add_tag_right(mem, right, tag_right);
1632570af302Sopenharmony_ci		    tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1633570af302Sopenharmony_ci		  }
1634570af302Sopenharmony_ci		num_tags += 2;
1635570af302Sopenharmony_ci	      }
1636570af302Sopenharmony_ci	    direction = TRE_TAG_MAXIMIZE;
1637570af302Sopenharmony_ci	    break;
1638570af302Sopenharmony_ci	  }
1639570af302Sopenharmony_ci
1640570af302Sopenharmony_ci	default:
1641570af302Sopenharmony_ci	  assert(0);
1642570af302Sopenharmony_ci	  break;
1643570af302Sopenharmony_ci
1644570af302Sopenharmony_ci	} /* end switch(symbol) */
1645570af302Sopenharmony_ci    } /* end while(tre_stack_num_objects(stack) > bottom) */
1646570af302Sopenharmony_ci
1647570af302Sopenharmony_ci  if (!first_pass)
1648570af302Sopenharmony_ci    tre_purge_regset(regset, tnfa, tag);
1649570af302Sopenharmony_ci
1650570af302Sopenharmony_ci  if (!first_pass && minimal_tag >= 0)
1651570af302Sopenharmony_ci    {
1652570af302Sopenharmony_ci      int i;
1653570af302Sopenharmony_ci      for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1654570af302Sopenharmony_ci      tnfa->minimal_tags[i] = tag;
1655570af302Sopenharmony_ci      tnfa->minimal_tags[i + 1] = minimal_tag;
1656570af302Sopenharmony_ci      tnfa->minimal_tags[i + 2] = -1;
1657570af302Sopenharmony_ci      minimal_tag = -1;
1658570af302Sopenharmony_ci      num_minimals++;
1659570af302Sopenharmony_ci    }
1660570af302Sopenharmony_ci
1661570af302Sopenharmony_ci  assert(tree->num_tags == num_tags);
1662570af302Sopenharmony_ci  tnfa->end_tag = num_tags;
1663570af302Sopenharmony_ci  tnfa->num_tags = num_tags;
1664570af302Sopenharmony_ci  tnfa->num_minimals = num_minimals;
1665570af302Sopenharmony_ci  xfree(orig_regset);
1666570af302Sopenharmony_ci  xfree(parents);
1667570af302Sopenharmony_ci  xfree(saved_states);
1668570af302Sopenharmony_ci  return status;
1669570af302Sopenharmony_ci}
1670570af302Sopenharmony_ci
1671570af302Sopenharmony_ci
1672570af302Sopenharmony_ci
1673570af302Sopenharmony_ci/*
1674570af302Sopenharmony_ci  AST to TNFA compilation routines.
1675570af302Sopenharmony_ci*/
1676570af302Sopenharmony_ci
1677570af302Sopenharmony_citypedef enum {
1678570af302Sopenharmony_ci  COPY_RECURSE,
1679570af302Sopenharmony_ci  COPY_SET_RESULT_PTR
1680570af302Sopenharmony_ci} tre_copyast_symbol_t;
1681570af302Sopenharmony_ci
1682570af302Sopenharmony_ci/* Flags for tre_copy_ast(). */
1683570af302Sopenharmony_ci#define COPY_REMOVE_TAGS	 1
1684570af302Sopenharmony_ci#define COPY_MAXIMIZE_FIRST_TAG	 2
1685570af302Sopenharmony_ci
1686570af302Sopenharmony_cistatic reg_errcode_t
1687570af302Sopenharmony_citre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1688570af302Sopenharmony_ci	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1689570af302Sopenharmony_ci	     tre_ast_node_t **copy, int *max_pos)
1690570af302Sopenharmony_ci{
1691570af302Sopenharmony_ci  reg_errcode_t status = REG_OK;
1692570af302Sopenharmony_ci  int bottom = tre_stack_num_objects(stack);
1693570af302Sopenharmony_ci  int num_copied = 0;
1694570af302Sopenharmony_ci  int first_tag = 1;
1695570af302Sopenharmony_ci  tre_ast_node_t **result = copy;
1696570af302Sopenharmony_ci  tre_copyast_symbol_t symbol;
1697570af302Sopenharmony_ci
1698570af302Sopenharmony_ci  STACK_PUSH(stack, voidptr, ast);
1699570af302Sopenharmony_ci  STACK_PUSH(stack, int, COPY_RECURSE);
1700570af302Sopenharmony_ci
1701570af302Sopenharmony_ci  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1702570af302Sopenharmony_ci    {
1703570af302Sopenharmony_ci      tre_ast_node_t *node;
1704570af302Sopenharmony_ci      if (status != REG_OK)
1705570af302Sopenharmony_ci	break;
1706570af302Sopenharmony_ci
1707570af302Sopenharmony_ci      symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1708570af302Sopenharmony_ci      switch (symbol)
1709570af302Sopenharmony_ci	{
1710570af302Sopenharmony_ci	case COPY_SET_RESULT_PTR:
1711570af302Sopenharmony_ci	  result = tre_stack_pop_voidptr(stack);
1712570af302Sopenharmony_ci	  break;
1713570af302Sopenharmony_ci	case COPY_RECURSE:
1714570af302Sopenharmony_ci	  node = tre_stack_pop_voidptr(stack);
1715570af302Sopenharmony_ci	  switch (node->type)
1716570af302Sopenharmony_ci	    {
1717570af302Sopenharmony_ci	    case LITERAL:
1718570af302Sopenharmony_ci	      {
1719570af302Sopenharmony_ci		tre_literal_t *lit = node->obj;
1720570af302Sopenharmony_ci		int pos = lit->position;
1721570af302Sopenharmony_ci		int min = lit->code_min;
1722570af302Sopenharmony_ci		int max = lit->code_max;
1723570af302Sopenharmony_ci		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1724570af302Sopenharmony_ci		  {
1725570af302Sopenharmony_ci		    /* XXX - e.g. [ab] has only one position but two
1726570af302Sopenharmony_ci		       nodes, so we are creating holes in the state space
1727570af302Sopenharmony_ci		       here.  Not fatal, just wastes memory. */
1728570af302Sopenharmony_ci		    pos += *pos_add;
1729570af302Sopenharmony_ci		    num_copied++;
1730570af302Sopenharmony_ci		  }
1731570af302Sopenharmony_ci		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1732570af302Sopenharmony_ci		  {
1733570af302Sopenharmony_ci		    /* Change this tag to empty. */
1734570af302Sopenharmony_ci		    min = EMPTY;
1735570af302Sopenharmony_ci		    max = pos = -1;
1736570af302Sopenharmony_ci		  }
1737570af302Sopenharmony_ci		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1738570af302Sopenharmony_ci			 && first_tag)
1739570af302Sopenharmony_ci		  {
1740570af302Sopenharmony_ci		    /* Maximize the first tag. */
1741570af302Sopenharmony_ci		    tag_directions[max] = TRE_TAG_MAXIMIZE;
1742570af302Sopenharmony_ci		    first_tag = 0;
1743570af302Sopenharmony_ci		  }
1744570af302Sopenharmony_ci		*result = tre_ast_new_literal(mem, min, max, pos);
1745570af302Sopenharmony_ci		if (*result == NULL)
1746570af302Sopenharmony_ci		  status = REG_ESPACE;
1747570af302Sopenharmony_ci		else {
1748570af302Sopenharmony_ci		  tre_literal_t *p = (*result)->obj;
1749570af302Sopenharmony_ci		  p->class = lit->class;
1750570af302Sopenharmony_ci		  p->neg_classes = lit->neg_classes;
1751570af302Sopenharmony_ci		}
1752570af302Sopenharmony_ci
1753570af302Sopenharmony_ci		if (pos > *max_pos)
1754570af302Sopenharmony_ci		  *max_pos = pos;
1755570af302Sopenharmony_ci		break;
1756570af302Sopenharmony_ci	      }
1757570af302Sopenharmony_ci	    case UNION:
1758570af302Sopenharmony_ci	      {
1759570af302Sopenharmony_ci		tre_union_t *uni = node->obj;
1760570af302Sopenharmony_ci		tre_union_t *tmp;
1761570af302Sopenharmony_ci		*result = tre_ast_new_union(mem, uni->left, uni->right);
1762570af302Sopenharmony_ci		if (*result == NULL)
1763570af302Sopenharmony_ci		  {
1764570af302Sopenharmony_ci		    status = REG_ESPACE;
1765570af302Sopenharmony_ci		    break;
1766570af302Sopenharmony_ci		  }
1767570af302Sopenharmony_ci		tmp = (*result)->obj;
1768570af302Sopenharmony_ci		result = &tmp->left;
1769570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, uni->right);
1770570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_RECURSE);
1771570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, &tmp->right);
1772570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1773570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, uni->left);
1774570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_RECURSE);
1775570af302Sopenharmony_ci		break;
1776570af302Sopenharmony_ci	      }
1777570af302Sopenharmony_ci	    case CATENATION:
1778570af302Sopenharmony_ci	      {
1779570af302Sopenharmony_ci		tre_catenation_t *cat = node->obj;
1780570af302Sopenharmony_ci		tre_catenation_t *tmp;
1781570af302Sopenharmony_ci		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
1782570af302Sopenharmony_ci		if (*result == NULL)
1783570af302Sopenharmony_ci		  {
1784570af302Sopenharmony_ci		    status = REG_ESPACE;
1785570af302Sopenharmony_ci		    break;
1786570af302Sopenharmony_ci		  }
1787570af302Sopenharmony_ci		tmp = (*result)->obj;
1788570af302Sopenharmony_ci		tmp->left = NULL;
1789570af302Sopenharmony_ci		tmp->right = NULL;
1790570af302Sopenharmony_ci		result = &tmp->left;
1791570af302Sopenharmony_ci
1792570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, cat->right);
1793570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_RECURSE);
1794570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, &tmp->right);
1795570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1796570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, cat->left);
1797570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_RECURSE);
1798570af302Sopenharmony_ci		break;
1799570af302Sopenharmony_ci	      }
1800570af302Sopenharmony_ci	    case ITERATION:
1801570af302Sopenharmony_ci	      {
1802570af302Sopenharmony_ci		tre_iteration_t *iter = node->obj;
1803570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, iter->arg);
1804570af302Sopenharmony_ci		STACK_PUSHX(stack, int, COPY_RECURSE);
1805570af302Sopenharmony_ci		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
1806570af302Sopenharmony_ci					   iter->max, iter->minimal);
1807570af302Sopenharmony_ci		if (*result == NULL)
1808570af302Sopenharmony_ci		  {
1809570af302Sopenharmony_ci		    status = REG_ESPACE;
1810570af302Sopenharmony_ci		    break;
1811570af302Sopenharmony_ci		  }
1812570af302Sopenharmony_ci		iter = (*result)->obj;
1813570af302Sopenharmony_ci		result = &iter->arg;
1814570af302Sopenharmony_ci		break;
1815570af302Sopenharmony_ci	      }
1816570af302Sopenharmony_ci	    default:
1817570af302Sopenharmony_ci	      assert(0);
1818570af302Sopenharmony_ci	      break;
1819570af302Sopenharmony_ci	    }
1820570af302Sopenharmony_ci	  break;
1821570af302Sopenharmony_ci	}
1822570af302Sopenharmony_ci    }
1823570af302Sopenharmony_ci  *pos_add += num_copied;
1824570af302Sopenharmony_ci  return status;
1825570af302Sopenharmony_ci}
1826570af302Sopenharmony_ci
1827570af302Sopenharmony_citypedef enum {
1828570af302Sopenharmony_ci  EXPAND_RECURSE,
1829570af302Sopenharmony_ci  EXPAND_AFTER_ITER
1830570af302Sopenharmony_ci} tre_expand_ast_symbol_t;
1831570af302Sopenharmony_ci
1832570af302Sopenharmony_ci/* Expands each iteration node that has a finite nonzero minimum or maximum
1833570af302Sopenharmony_ci   iteration count to a catenated sequence of copies of the node. */
1834570af302Sopenharmony_cistatic reg_errcode_t
1835570af302Sopenharmony_citre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1836570af302Sopenharmony_ci	       int *position, tre_tag_direction_t *tag_directions)
1837570af302Sopenharmony_ci{
1838570af302Sopenharmony_ci  reg_errcode_t status = REG_OK;
1839570af302Sopenharmony_ci  int bottom = tre_stack_num_objects(stack);
1840570af302Sopenharmony_ci  int pos_add = 0;
1841570af302Sopenharmony_ci  int pos_add_total = 0;
1842570af302Sopenharmony_ci  int max_pos = 0;
1843570af302Sopenharmony_ci  int iter_depth = 0;
1844570af302Sopenharmony_ci
1845570af302Sopenharmony_ci  STACK_PUSHR(stack, voidptr, ast);
1846570af302Sopenharmony_ci  STACK_PUSHR(stack, int, EXPAND_RECURSE);
1847570af302Sopenharmony_ci  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1848570af302Sopenharmony_ci    {
1849570af302Sopenharmony_ci      tre_ast_node_t *node;
1850570af302Sopenharmony_ci      tre_expand_ast_symbol_t symbol;
1851570af302Sopenharmony_ci
1852570af302Sopenharmony_ci      if (status != REG_OK)
1853570af302Sopenharmony_ci	break;
1854570af302Sopenharmony_ci
1855570af302Sopenharmony_ci      symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1856570af302Sopenharmony_ci      node = tre_stack_pop_voidptr(stack);
1857570af302Sopenharmony_ci      switch (symbol)
1858570af302Sopenharmony_ci	{
1859570af302Sopenharmony_ci	case EXPAND_RECURSE:
1860570af302Sopenharmony_ci	  switch (node->type)
1861570af302Sopenharmony_ci	    {
1862570af302Sopenharmony_ci	    case LITERAL:
1863570af302Sopenharmony_ci	      {
1864570af302Sopenharmony_ci		tre_literal_t *lit= node->obj;
1865570af302Sopenharmony_ci		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1866570af302Sopenharmony_ci		  {
1867570af302Sopenharmony_ci		    lit->position += pos_add;
1868570af302Sopenharmony_ci		    if (lit->position > max_pos)
1869570af302Sopenharmony_ci		      max_pos = lit->position;
1870570af302Sopenharmony_ci		  }
1871570af302Sopenharmony_ci		break;
1872570af302Sopenharmony_ci	      }
1873570af302Sopenharmony_ci	    case UNION:
1874570af302Sopenharmony_ci	      {
1875570af302Sopenharmony_ci		tre_union_t *uni = node->obj;
1876570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, uni->right);
1877570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1878570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, uni->left);
1879570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880570af302Sopenharmony_ci		break;
1881570af302Sopenharmony_ci	      }
1882570af302Sopenharmony_ci	    case CATENATION:
1883570af302Sopenharmony_ci	      {
1884570af302Sopenharmony_ci		tre_catenation_t *cat = node->obj;
1885570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, cat->right);
1886570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1887570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, cat->left);
1888570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1889570af302Sopenharmony_ci		break;
1890570af302Sopenharmony_ci	      }
1891570af302Sopenharmony_ci	    case ITERATION:
1892570af302Sopenharmony_ci	      {
1893570af302Sopenharmony_ci		tre_iteration_t *iter = node->obj;
1894570af302Sopenharmony_ci		STACK_PUSHX(stack, int, pos_add);
1895570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, node);
1896570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1897570af302Sopenharmony_ci		STACK_PUSHX(stack, voidptr, iter->arg);
1898570af302Sopenharmony_ci		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1899570af302Sopenharmony_ci		/* If we are going to expand this node at EXPAND_AFTER_ITER
1900570af302Sopenharmony_ci		   then don't increase the `pos' fields of the nodes now, it
1901570af302Sopenharmony_ci		   will get done when expanding. */
1902570af302Sopenharmony_ci		if (iter->min > 1 || iter->max > 1)
1903570af302Sopenharmony_ci		  pos_add = 0;
1904570af302Sopenharmony_ci		iter_depth++;
1905570af302Sopenharmony_ci		break;
1906570af302Sopenharmony_ci	      }
1907570af302Sopenharmony_ci	    default:
1908570af302Sopenharmony_ci	      assert(0);
1909570af302Sopenharmony_ci	      break;
1910570af302Sopenharmony_ci	    }
1911570af302Sopenharmony_ci	  break;
1912570af302Sopenharmony_ci	case EXPAND_AFTER_ITER:
1913570af302Sopenharmony_ci	  {
1914570af302Sopenharmony_ci	    tre_iteration_t *iter = node->obj;
1915570af302Sopenharmony_ci	    int pos_add_last;
1916570af302Sopenharmony_ci	    pos_add = tre_stack_pop_int(stack);
1917570af302Sopenharmony_ci	    pos_add_last = pos_add;
1918570af302Sopenharmony_ci	    if (iter->min > 1 || iter->max > 1)
1919570af302Sopenharmony_ci	      {
1920570af302Sopenharmony_ci		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1921570af302Sopenharmony_ci		int j;
1922570af302Sopenharmony_ci		int pos_add_save = pos_add;
1923570af302Sopenharmony_ci
1924570af302Sopenharmony_ci		/* Create a catenated sequence of copies of the node. */
1925570af302Sopenharmony_ci		for (j = 0; j < iter->min; j++)
1926570af302Sopenharmony_ci		  {
1927570af302Sopenharmony_ci		    tre_ast_node_t *copy;
1928570af302Sopenharmony_ci		    /* Remove tags from all but the last copy. */
1929570af302Sopenharmony_ci		    int flags = ((j + 1 < iter->min)
1930570af302Sopenharmony_ci				 ? COPY_REMOVE_TAGS
1931570af302Sopenharmony_ci				 : COPY_MAXIMIZE_FIRST_TAG);
1932570af302Sopenharmony_ci		    pos_add_save = pos_add;
1933570af302Sopenharmony_ci		    status = tre_copy_ast(mem, stack, iter->arg, flags,
1934570af302Sopenharmony_ci					  &pos_add, tag_directions, &copy,
1935570af302Sopenharmony_ci					  &max_pos);
1936570af302Sopenharmony_ci		    if (status != REG_OK)
1937570af302Sopenharmony_ci		      return status;
1938570af302Sopenharmony_ci		    if (seq1 != NULL)
1939570af302Sopenharmony_ci		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
1940570af302Sopenharmony_ci		    else
1941570af302Sopenharmony_ci		      seq1 = copy;
1942570af302Sopenharmony_ci		    if (seq1 == NULL)
1943570af302Sopenharmony_ci		      return REG_ESPACE;
1944570af302Sopenharmony_ci		  }
1945570af302Sopenharmony_ci
1946570af302Sopenharmony_ci		if (iter->max == -1)
1947570af302Sopenharmony_ci		  {
1948570af302Sopenharmony_ci		    /* No upper limit. */
1949570af302Sopenharmony_ci		    pos_add_save = pos_add;
1950570af302Sopenharmony_ci		    status = tre_copy_ast(mem, stack, iter->arg, 0,
1951570af302Sopenharmony_ci					  &pos_add, NULL, &seq2, &max_pos);
1952570af302Sopenharmony_ci		    if (status != REG_OK)
1953570af302Sopenharmony_ci		      return status;
1954570af302Sopenharmony_ci		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1955570af302Sopenharmony_ci		    if (seq2 == NULL)
1956570af302Sopenharmony_ci		      return REG_ESPACE;
1957570af302Sopenharmony_ci		  }
1958570af302Sopenharmony_ci		else
1959570af302Sopenharmony_ci		  {
1960570af302Sopenharmony_ci		    for (j = iter->min; j < iter->max; j++)
1961570af302Sopenharmony_ci		      {
1962570af302Sopenharmony_ci			tre_ast_node_t *tmp, *copy;
1963570af302Sopenharmony_ci			pos_add_save = pos_add;
1964570af302Sopenharmony_ci			status = tre_copy_ast(mem, stack, iter->arg, 0,
1965570af302Sopenharmony_ci					      &pos_add, NULL, &copy, &max_pos);
1966570af302Sopenharmony_ci			if (status != REG_OK)
1967570af302Sopenharmony_ci			  return status;
1968570af302Sopenharmony_ci			if (seq2 != NULL)
1969570af302Sopenharmony_ci			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
1970570af302Sopenharmony_ci			else
1971570af302Sopenharmony_ci			  seq2 = copy;
1972570af302Sopenharmony_ci			if (seq2 == NULL)
1973570af302Sopenharmony_ci			  return REG_ESPACE;
1974570af302Sopenharmony_ci			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1975570af302Sopenharmony_ci			if (tmp == NULL)
1976570af302Sopenharmony_ci			  return REG_ESPACE;
1977570af302Sopenharmony_ci			seq2 = tre_ast_new_union(mem, tmp, seq2);
1978570af302Sopenharmony_ci			if (seq2 == NULL)
1979570af302Sopenharmony_ci			  return REG_ESPACE;
1980570af302Sopenharmony_ci		      }
1981570af302Sopenharmony_ci		  }
1982570af302Sopenharmony_ci
1983570af302Sopenharmony_ci		pos_add = pos_add_save;
1984570af302Sopenharmony_ci		if (seq1 == NULL)
1985570af302Sopenharmony_ci		  seq1 = seq2;
1986570af302Sopenharmony_ci		else if (seq2 != NULL)
1987570af302Sopenharmony_ci		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1988570af302Sopenharmony_ci		if (seq1 == NULL)
1989570af302Sopenharmony_ci		  return REG_ESPACE;
1990570af302Sopenharmony_ci		node->obj = seq1->obj;
1991570af302Sopenharmony_ci		node->type = seq1->type;
1992570af302Sopenharmony_ci	      }
1993570af302Sopenharmony_ci
1994570af302Sopenharmony_ci	    iter_depth--;
1995570af302Sopenharmony_ci	    pos_add_total += pos_add - pos_add_last;
1996570af302Sopenharmony_ci	    if (iter_depth == 0)
1997570af302Sopenharmony_ci	      pos_add = pos_add_total;
1998570af302Sopenharmony_ci
1999570af302Sopenharmony_ci	    break;
2000570af302Sopenharmony_ci	  }
2001570af302Sopenharmony_ci	default:
2002570af302Sopenharmony_ci	  assert(0);
2003570af302Sopenharmony_ci	  break;
2004570af302Sopenharmony_ci	}
2005570af302Sopenharmony_ci    }
2006570af302Sopenharmony_ci
2007570af302Sopenharmony_ci  *position += pos_add_total;
2008570af302Sopenharmony_ci
2009570af302Sopenharmony_ci  /* `max_pos' should never be larger than `*position' if the above
2010570af302Sopenharmony_ci     code works, but just an extra safeguard let's make sure
2011570af302Sopenharmony_ci     `*position' is set large enough so enough memory will be
2012570af302Sopenharmony_ci     allocated for the transition table. */
2013570af302Sopenharmony_ci  if (max_pos > *position)
2014570af302Sopenharmony_ci    *position = max_pos;
2015570af302Sopenharmony_ci
2016570af302Sopenharmony_ci  return status;
2017570af302Sopenharmony_ci}
2018570af302Sopenharmony_ci
2019570af302Sopenharmony_cistatic tre_pos_and_tags_t *
2020570af302Sopenharmony_citre_set_empty(tre_mem_t mem)
2021570af302Sopenharmony_ci{
2022570af302Sopenharmony_ci  tre_pos_and_tags_t *new_set;
2023570af302Sopenharmony_ci
2024570af302Sopenharmony_ci  new_set = tre_mem_calloc(mem, sizeof(*new_set));
2025570af302Sopenharmony_ci  if (new_set == NULL)
2026570af302Sopenharmony_ci    return NULL;
2027570af302Sopenharmony_ci
2028570af302Sopenharmony_ci  new_set[0].position = -1;
2029570af302Sopenharmony_ci  new_set[0].code_min = -1;
2030570af302Sopenharmony_ci  new_set[0].code_max = -1;
2031570af302Sopenharmony_ci
2032570af302Sopenharmony_ci  return new_set;
2033570af302Sopenharmony_ci}
2034570af302Sopenharmony_ci
2035570af302Sopenharmony_cistatic tre_pos_and_tags_t *
2036570af302Sopenharmony_citre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2037570af302Sopenharmony_ci	    tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2038570af302Sopenharmony_ci{
2039570af302Sopenharmony_ci  tre_pos_and_tags_t *new_set;
2040570af302Sopenharmony_ci
2041570af302Sopenharmony_ci  new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2042570af302Sopenharmony_ci  if (new_set == NULL)
2043570af302Sopenharmony_ci    return NULL;
2044570af302Sopenharmony_ci
2045570af302Sopenharmony_ci  new_set[0].position = position;
2046570af302Sopenharmony_ci  new_set[0].code_min = code_min;
2047570af302Sopenharmony_ci  new_set[0].code_max = code_max;
2048570af302Sopenharmony_ci  new_set[0].class = class;
2049570af302Sopenharmony_ci  new_set[0].neg_classes = neg_classes;
2050570af302Sopenharmony_ci  new_set[0].backref = backref;
2051570af302Sopenharmony_ci  new_set[1].position = -1;
2052570af302Sopenharmony_ci  new_set[1].code_min = -1;
2053570af302Sopenharmony_ci  new_set[1].code_max = -1;
2054570af302Sopenharmony_ci
2055570af302Sopenharmony_ci  return new_set;
2056570af302Sopenharmony_ci}
2057570af302Sopenharmony_ci
2058570af302Sopenharmony_cistatic tre_pos_and_tags_t *
2059570af302Sopenharmony_citre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2060570af302Sopenharmony_ci	      int *tags, int assertions)
2061570af302Sopenharmony_ci{
2062570af302Sopenharmony_ci  int s1, s2, i, j;
2063570af302Sopenharmony_ci  tre_pos_and_tags_t *new_set;
2064570af302Sopenharmony_ci  int *new_tags;
2065570af302Sopenharmony_ci  int num_tags;
2066570af302Sopenharmony_ci
2067570af302Sopenharmony_ci  for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2068570af302Sopenharmony_ci  for (s1 = 0; set1[s1].position >= 0; s1++);
2069570af302Sopenharmony_ci  for (s2 = 0; set2[s2].position >= 0; s2++);
2070570af302Sopenharmony_ci  new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2071570af302Sopenharmony_ci  if (!new_set )
2072570af302Sopenharmony_ci    return NULL;
2073570af302Sopenharmony_ci
2074570af302Sopenharmony_ci  for (s1 = 0; set1[s1].position >= 0; s1++)
2075570af302Sopenharmony_ci    {
2076570af302Sopenharmony_ci      new_set[s1].position = set1[s1].position;
2077570af302Sopenharmony_ci      new_set[s1].code_min = set1[s1].code_min;
2078570af302Sopenharmony_ci      new_set[s1].code_max = set1[s1].code_max;
2079570af302Sopenharmony_ci      new_set[s1].assertions = set1[s1].assertions | assertions;
2080570af302Sopenharmony_ci      new_set[s1].class = set1[s1].class;
2081570af302Sopenharmony_ci      new_set[s1].neg_classes = set1[s1].neg_classes;
2082570af302Sopenharmony_ci      new_set[s1].backref = set1[s1].backref;
2083570af302Sopenharmony_ci      if (set1[s1].tags == NULL && tags == NULL)
2084570af302Sopenharmony_ci	new_set[s1].tags = NULL;
2085570af302Sopenharmony_ci      else
2086570af302Sopenharmony_ci	{
2087570af302Sopenharmony_ci	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2088570af302Sopenharmony_ci	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2089570af302Sopenharmony_ci					 * (i + num_tags + 1)));
2090570af302Sopenharmony_ci	  if (new_tags == NULL)
2091570af302Sopenharmony_ci	    return NULL;
2092570af302Sopenharmony_ci	  for (j = 0; j < i; j++)
2093570af302Sopenharmony_ci	    new_tags[j] = set1[s1].tags[j];
2094570af302Sopenharmony_ci	  for (i = 0; i < num_tags; i++)
2095570af302Sopenharmony_ci	    new_tags[j + i] = tags[i];
2096570af302Sopenharmony_ci	  new_tags[j + i] = -1;
2097570af302Sopenharmony_ci	  new_set[s1].tags = new_tags;
2098570af302Sopenharmony_ci	}
2099570af302Sopenharmony_ci    }
2100570af302Sopenharmony_ci
2101570af302Sopenharmony_ci  for (s2 = 0; set2[s2].position >= 0; s2++)
2102570af302Sopenharmony_ci    {
2103570af302Sopenharmony_ci      new_set[s1 + s2].position = set2[s2].position;
2104570af302Sopenharmony_ci      new_set[s1 + s2].code_min = set2[s2].code_min;
2105570af302Sopenharmony_ci      new_set[s1 + s2].code_max = set2[s2].code_max;
2106570af302Sopenharmony_ci      /* XXX - why not | assertions here as well? */
2107570af302Sopenharmony_ci      new_set[s1 + s2].assertions = set2[s2].assertions;
2108570af302Sopenharmony_ci      new_set[s1 + s2].class = set2[s2].class;
2109570af302Sopenharmony_ci      new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2110570af302Sopenharmony_ci      new_set[s1 + s2].backref = set2[s2].backref;
2111570af302Sopenharmony_ci      if (set2[s2].tags == NULL)
2112570af302Sopenharmony_ci	new_set[s1 + s2].tags = NULL;
2113570af302Sopenharmony_ci      else
2114570af302Sopenharmony_ci	{
2115570af302Sopenharmony_ci	  for (i = 0; set2[s2].tags[i] >= 0; i++);
2116570af302Sopenharmony_ci	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2117570af302Sopenharmony_ci	  if (new_tags == NULL)
2118570af302Sopenharmony_ci	    return NULL;
2119570af302Sopenharmony_ci	  for (j = 0; j < i; j++)
2120570af302Sopenharmony_ci	    new_tags[j] = set2[s2].tags[j];
2121570af302Sopenharmony_ci	  new_tags[j] = -1;
2122570af302Sopenharmony_ci	  new_set[s1 + s2].tags = new_tags;
2123570af302Sopenharmony_ci	}
2124570af302Sopenharmony_ci    }
2125570af302Sopenharmony_ci  new_set[s1 + s2].position = -1;
2126570af302Sopenharmony_ci  return new_set;
2127570af302Sopenharmony_ci}
2128570af302Sopenharmony_ci
2129570af302Sopenharmony_ci/* Finds the empty path through `node' which is the one that should be
2130570af302Sopenharmony_ci   taken according to POSIX.2 rules, and adds the tags on that path to
2131570af302Sopenharmony_ci   `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2132570af302Sopenharmony_ci   set to the number of tags seen on the path. */
2133570af302Sopenharmony_cistatic reg_errcode_t
2134570af302Sopenharmony_citre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2135570af302Sopenharmony_ci		int *assertions, int *num_tags_seen)
2136570af302Sopenharmony_ci{
2137570af302Sopenharmony_ci  tre_literal_t *lit;
2138570af302Sopenharmony_ci  tre_union_t *uni;
2139570af302Sopenharmony_ci  tre_catenation_t *cat;
2140570af302Sopenharmony_ci  tre_iteration_t *iter;
2141570af302Sopenharmony_ci  int i;
2142570af302Sopenharmony_ci  int bottom = tre_stack_num_objects(stack);
2143570af302Sopenharmony_ci  reg_errcode_t status = REG_OK;
2144570af302Sopenharmony_ci  if (num_tags_seen)
2145570af302Sopenharmony_ci    *num_tags_seen = 0;
2146570af302Sopenharmony_ci
2147570af302Sopenharmony_ci  status = tre_stack_push_voidptr(stack, node);
2148570af302Sopenharmony_ci
2149570af302Sopenharmony_ci  /* Walk through the tree recursively. */
2150570af302Sopenharmony_ci  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2151570af302Sopenharmony_ci    {
2152570af302Sopenharmony_ci      node = tre_stack_pop_voidptr(stack);
2153570af302Sopenharmony_ci
2154570af302Sopenharmony_ci      switch (node->type)
2155570af302Sopenharmony_ci	{
2156570af302Sopenharmony_ci	case LITERAL:
2157570af302Sopenharmony_ci	  lit = (tre_literal_t *)node->obj;
2158570af302Sopenharmony_ci	  switch (lit->code_min)
2159570af302Sopenharmony_ci	    {
2160570af302Sopenharmony_ci	    case TAG:
2161570af302Sopenharmony_ci	      if (lit->code_max >= 0)
2162570af302Sopenharmony_ci		{
2163570af302Sopenharmony_ci		  if (tags != NULL)
2164570af302Sopenharmony_ci		    {
2165570af302Sopenharmony_ci		      /* Add the tag to `tags'. */
2166570af302Sopenharmony_ci		      for (i = 0; tags[i] >= 0; i++)
2167570af302Sopenharmony_ci			if (tags[i] == lit->code_max)
2168570af302Sopenharmony_ci			  break;
2169570af302Sopenharmony_ci		      if (tags[i] < 0)
2170570af302Sopenharmony_ci			{
2171570af302Sopenharmony_ci			  tags[i] = lit->code_max;
2172570af302Sopenharmony_ci			  tags[i + 1] = -1;
2173570af302Sopenharmony_ci			}
2174570af302Sopenharmony_ci		    }
2175570af302Sopenharmony_ci		  if (num_tags_seen)
2176570af302Sopenharmony_ci		    (*num_tags_seen)++;
2177570af302Sopenharmony_ci		}
2178570af302Sopenharmony_ci	      break;
2179570af302Sopenharmony_ci	    case ASSERTION:
2180570af302Sopenharmony_ci	      assert(lit->code_max >= 1
2181570af302Sopenharmony_ci		     || lit->code_max <= ASSERT_LAST);
2182570af302Sopenharmony_ci	      if (assertions != NULL)
2183570af302Sopenharmony_ci		*assertions |= lit->code_max;
2184570af302Sopenharmony_ci	      break;
2185570af302Sopenharmony_ci	    case EMPTY:
2186570af302Sopenharmony_ci	      break;
2187570af302Sopenharmony_ci	    default:
2188570af302Sopenharmony_ci	      assert(0);
2189570af302Sopenharmony_ci	      break;
2190570af302Sopenharmony_ci	    }
2191570af302Sopenharmony_ci	  break;
2192570af302Sopenharmony_ci
2193570af302Sopenharmony_ci	case UNION:
2194570af302Sopenharmony_ci	  /* Subexpressions starting earlier take priority over ones
2195570af302Sopenharmony_ci	     starting later, so we prefer the left subexpression over the
2196570af302Sopenharmony_ci	     right subexpression. */
2197570af302Sopenharmony_ci	  uni = (tre_union_t *)node->obj;
2198570af302Sopenharmony_ci	  if (uni->left->nullable)
2199570af302Sopenharmony_ci	    STACK_PUSHX(stack, voidptr, uni->left)
2200570af302Sopenharmony_ci	  else if (uni->right->nullable)
2201570af302Sopenharmony_ci	    STACK_PUSHX(stack, voidptr, uni->right)
2202570af302Sopenharmony_ci	  else
2203570af302Sopenharmony_ci	    assert(0);
2204570af302Sopenharmony_ci	  break;
2205570af302Sopenharmony_ci
2206570af302Sopenharmony_ci	case CATENATION:
2207570af302Sopenharmony_ci	  /* The path must go through both children. */
2208570af302Sopenharmony_ci	  cat = (tre_catenation_t *)node->obj;
2209570af302Sopenharmony_ci	  assert(cat->left->nullable);
2210570af302Sopenharmony_ci	  assert(cat->right->nullable);
2211570af302Sopenharmony_ci	  STACK_PUSHX(stack, voidptr, cat->left);
2212570af302Sopenharmony_ci	  STACK_PUSHX(stack, voidptr, cat->right);
2213570af302Sopenharmony_ci	  break;
2214570af302Sopenharmony_ci
2215570af302Sopenharmony_ci	case ITERATION:
2216570af302Sopenharmony_ci	  /* A match with an empty string is preferred over no match at
2217570af302Sopenharmony_ci	     all, so we go through the argument if possible. */
2218570af302Sopenharmony_ci	  iter = (tre_iteration_t *)node->obj;
2219570af302Sopenharmony_ci	  if (iter->arg->nullable)
2220570af302Sopenharmony_ci	    STACK_PUSHX(stack, voidptr, iter->arg);
2221570af302Sopenharmony_ci	  break;
2222570af302Sopenharmony_ci
2223570af302Sopenharmony_ci	default:
2224570af302Sopenharmony_ci	  assert(0);
2225570af302Sopenharmony_ci	  break;
2226570af302Sopenharmony_ci	}
2227570af302Sopenharmony_ci    }
2228570af302Sopenharmony_ci
2229570af302Sopenharmony_ci  return status;
2230570af302Sopenharmony_ci}
2231570af302Sopenharmony_ci
2232570af302Sopenharmony_ci
2233570af302Sopenharmony_citypedef enum {
2234570af302Sopenharmony_ci  NFL_RECURSE,
2235570af302Sopenharmony_ci  NFL_POST_UNION,
2236570af302Sopenharmony_ci  NFL_POST_CATENATION,
2237570af302Sopenharmony_ci  NFL_POST_ITERATION
2238570af302Sopenharmony_ci} tre_nfl_stack_symbol_t;
2239570af302Sopenharmony_ci
2240570af302Sopenharmony_ci
2241570af302Sopenharmony_ci/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2242570af302Sopenharmony_ci   the nodes of the AST `tree'. */
2243570af302Sopenharmony_cistatic reg_errcode_t
2244570af302Sopenharmony_citre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2245570af302Sopenharmony_ci{
2246570af302Sopenharmony_ci  int bottom = tre_stack_num_objects(stack);
2247570af302Sopenharmony_ci
2248570af302Sopenharmony_ci  STACK_PUSHR(stack, voidptr, tree);
2249570af302Sopenharmony_ci  STACK_PUSHR(stack, int, NFL_RECURSE);
2250570af302Sopenharmony_ci
2251570af302Sopenharmony_ci  while (tre_stack_num_objects(stack) > bottom)
2252570af302Sopenharmony_ci    {
2253570af302Sopenharmony_ci      tre_nfl_stack_symbol_t symbol;
2254570af302Sopenharmony_ci      tre_ast_node_t *node;
2255570af302Sopenharmony_ci
2256570af302Sopenharmony_ci      symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2257570af302Sopenharmony_ci      node = tre_stack_pop_voidptr(stack);
2258570af302Sopenharmony_ci      switch (symbol)
2259570af302Sopenharmony_ci	{
2260570af302Sopenharmony_ci	case NFL_RECURSE:
2261570af302Sopenharmony_ci	  switch (node->type)
2262570af302Sopenharmony_ci	    {
2263570af302Sopenharmony_ci	    case LITERAL:
2264570af302Sopenharmony_ci	      {
2265570af302Sopenharmony_ci		tre_literal_t *lit = (tre_literal_t *)node->obj;
2266570af302Sopenharmony_ci		if (IS_BACKREF(lit))
2267570af302Sopenharmony_ci		  {
2268570af302Sopenharmony_ci		    /* Back references: nullable = false, firstpos = {i},
2269570af302Sopenharmony_ci		       lastpos = {i}. */
2270570af302Sopenharmony_ci		    node->nullable = 0;
2271570af302Sopenharmony_ci		    node->firstpos = tre_set_one(mem, lit->position, 0,
2272570af302Sopenharmony_ci					     TRE_CHAR_MAX, 0, NULL, -1);
2273570af302Sopenharmony_ci		    if (!node->firstpos)
2274570af302Sopenharmony_ci		      return REG_ESPACE;
2275570af302Sopenharmony_ci		    node->lastpos = tre_set_one(mem, lit->position, 0,
2276570af302Sopenharmony_ci						TRE_CHAR_MAX, 0, NULL,
2277570af302Sopenharmony_ci						(int)lit->code_max);
2278570af302Sopenharmony_ci		    if (!node->lastpos)
2279570af302Sopenharmony_ci		      return REG_ESPACE;
2280570af302Sopenharmony_ci		  }
2281570af302Sopenharmony_ci		else if (lit->code_min < 0)
2282570af302Sopenharmony_ci		  {
2283570af302Sopenharmony_ci		    /* Tags, empty strings, params, and zero width assertions:
2284570af302Sopenharmony_ci		       nullable = true, firstpos = {}, and lastpos = {}. */
2285570af302Sopenharmony_ci		    node->nullable = 1;
2286570af302Sopenharmony_ci		    node->firstpos = tre_set_empty(mem);
2287570af302Sopenharmony_ci		    if (!node->firstpos)
2288570af302Sopenharmony_ci		      return REG_ESPACE;
2289570af302Sopenharmony_ci		    node->lastpos = tre_set_empty(mem);
2290570af302Sopenharmony_ci		    if (!node->lastpos)
2291570af302Sopenharmony_ci		      return REG_ESPACE;
2292570af302Sopenharmony_ci		  }
2293570af302Sopenharmony_ci		else
2294570af302Sopenharmony_ci		  {
2295570af302Sopenharmony_ci		    /* Literal at position i: nullable = false, firstpos = {i},
2296570af302Sopenharmony_ci		       lastpos = {i}. */
2297570af302Sopenharmony_ci		    node->nullable = 0;
2298570af302Sopenharmony_ci		    node->firstpos =
2299570af302Sopenharmony_ci		      tre_set_one(mem, lit->position, (int)lit->code_min,
2300570af302Sopenharmony_ci				  (int)lit->code_max, 0, NULL, -1);
2301570af302Sopenharmony_ci		    if (!node->firstpos)
2302570af302Sopenharmony_ci		      return REG_ESPACE;
2303570af302Sopenharmony_ci		    node->lastpos = tre_set_one(mem, lit->position,
2304570af302Sopenharmony_ci						(int)lit->code_min,
2305570af302Sopenharmony_ci						(int)lit->code_max,
2306570af302Sopenharmony_ci						lit->class, lit->neg_classes,
2307570af302Sopenharmony_ci						-1);
2308570af302Sopenharmony_ci		    if (!node->lastpos)
2309570af302Sopenharmony_ci		      return REG_ESPACE;
2310570af302Sopenharmony_ci		  }
2311570af302Sopenharmony_ci		break;
2312570af302Sopenharmony_ci	      }
2313570af302Sopenharmony_ci
2314570af302Sopenharmony_ci	    case UNION:
2315570af302Sopenharmony_ci	      /* Compute the attributes for the two subtrees, and after that
2316570af302Sopenharmony_ci		 for this node. */
2317570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, node);
2318570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_POST_UNION);
2319570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2320570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_RECURSE);
2321570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2322570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_RECURSE);
2323570af302Sopenharmony_ci	      break;
2324570af302Sopenharmony_ci
2325570af302Sopenharmony_ci	    case CATENATION:
2326570af302Sopenharmony_ci	      /* Compute the attributes for the two subtrees, and after that
2327570af302Sopenharmony_ci		 for this node. */
2328570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, node);
2329570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2330570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2331570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_RECURSE);
2332570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2333570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_RECURSE);
2334570af302Sopenharmony_ci	      break;
2335570af302Sopenharmony_ci
2336570af302Sopenharmony_ci	    case ITERATION:
2337570af302Sopenharmony_ci	      /* Compute the attributes for the subtree, and after that for
2338570af302Sopenharmony_ci		 this node. */
2339570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, node);
2340570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2341570af302Sopenharmony_ci	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2342570af302Sopenharmony_ci	      STACK_PUSHR(stack, int, NFL_RECURSE);
2343570af302Sopenharmony_ci	      break;
2344570af302Sopenharmony_ci	    }
2345570af302Sopenharmony_ci	  break; /* end case: NFL_RECURSE */
2346570af302Sopenharmony_ci
2347570af302Sopenharmony_ci	case NFL_POST_UNION:
2348570af302Sopenharmony_ci	  {
2349570af302Sopenharmony_ci	    tre_union_t *uni = (tre_union_t *)node->obj;
2350570af302Sopenharmony_ci	    node->nullable = uni->left->nullable || uni->right->nullable;
2351570af302Sopenharmony_ci	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
2352570af302Sopenharmony_ci					   uni->right->firstpos, NULL, 0);
2353570af302Sopenharmony_ci	    if (!node->firstpos)
2354570af302Sopenharmony_ci	      return REG_ESPACE;
2355570af302Sopenharmony_ci	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
2356570af302Sopenharmony_ci					  uni->right->lastpos, NULL, 0);
2357570af302Sopenharmony_ci	    if (!node->lastpos)
2358570af302Sopenharmony_ci	      return REG_ESPACE;
2359570af302Sopenharmony_ci	    break;
2360570af302Sopenharmony_ci	  }
2361570af302Sopenharmony_ci
2362570af302Sopenharmony_ci	case NFL_POST_ITERATION:
2363570af302Sopenharmony_ci	  {
2364570af302Sopenharmony_ci	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2365570af302Sopenharmony_ci
2366570af302Sopenharmony_ci	    if (iter->min == 0 || iter->arg->nullable)
2367570af302Sopenharmony_ci	      node->nullable = 1;
2368570af302Sopenharmony_ci	    else
2369570af302Sopenharmony_ci	      node->nullable = 0;
2370570af302Sopenharmony_ci	    node->firstpos = iter->arg->firstpos;
2371570af302Sopenharmony_ci	    node->lastpos = iter->arg->lastpos;
2372570af302Sopenharmony_ci	    break;
2373570af302Sopenharmony_ci	  }
2374570af302Sopenharmony_ci
2375570af302Sopenharmony_ci	case NFL_POST_CATENATION:
2376570af302Sopenharmony_ci	  {
2377570af302Sopenharmony_ci	    int num_tags, *tags, assertions;
2378570af302Sopenharmony_ci	    reg_errcode_t status;
2379570af302Sopenharmony_ci	    tre_catenation_t *cat = node->obj;
2380570af302Sopenharmony_ci	    node->nullable = cat->left->nullable && cat->right->nullable;
2381570af302Sopenharmony_ci
2382570af302Sopenharmony_ci	    /* Compute firstpos. */
2383570af302Sopenharmony_ci	    if (cat->left->nullable)
2384570af302Sopenharmony_ci	      {
2385570af302Sopenharmony_ci		/* The left side matches the empty string.  Make a first pass
2386570af302Sopenharmony_ci		   with tre_match_empty() to get the number of tags and
2387570af302Sopenharmony_ci		   parameters. */
2388570af302Sopenharmony_ci		status = tre_match_empty(stack, cat->left,
2389570af302Sopenharmony_ci					 NULL, NULL, &num_tags);
2390570af302Sopenharmony_ci		if (status != REG_OK)
2391570af302Sopenharmony_ci		  return status;
2392570af302Sopenharmony_ci		/* Allocate arrays for the tags and parameters. */
2393570af302Sopenharmony_ci		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2394570af302Sopenharmony_ci		if (!tags)
2395570af302Sopenharmony_ci		  return REG_ESPACE;
2396570af302Sopenharmony_ci		tags[0] = -1;
2397570af302Sopenharmony_ci		assertions = 0;
2398570af302Sopenharmony_ci		/* Second pass with tre_mach_empty() to get the list of
2399570af302Sopenharmony_ci		   tags and parameters. */
2400570af302Sopenharmony_ci		status = tre_match_empty(stack, cat->left, tags,
2401570af302Sopenharmony_ci					 &assertions, NULL);
2402570af302Sopenharmony_ci		if (status != REG_OK)
2403570af302Sopenharmony_ci		  {
2404570af302Sopenharmony_ci		    xfree(tags);
2405570af302Sopenharmony_ci		    return status;
2406570af302Sopenharmony_ci		  }
2407570af302Sopenharmony_ci		node->firstpos =
2408570af302Sopenharmony_ci		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2409570af302Sopenharmony_ci				tags, assertions);
2410570af302Sopenharmony_ci		xfree(tags);
2411570af302Sopenharmony_ci		if (!node->firstpos)
2412570af302Sopenharmony_ci		  return REG_ESPACE;
2413570af302Sopenharmony_ci	      }
2414570af302Sopenharmony_ci	    else
2415570af302Sopenharmony_ci	      {
2416570af302Sopenharmony_ci		node->firstpos = cat->left->firstpos;
2417570af302Sopenharmony_ci	      }
2418570af302Sopenharmony_ci
2419570af302Sopenharmony_ci	    /* Compute lastpos. */
2420570af302Sopenharmony_ci	    if (cat->right->nullable)
2421570af302Sopenharmony_ci	      {
2422570af302Sopenharmony_ci		/* The right side matches the empty string.  Make a first pass
2423570af302Sopenharmony_ci		   with tre_match_empty() to get the number of tags and
2424570af302Sopenharmony_ci		   parameters. */
2425570af302Sopenharmony_ci		status = tre_match_empty(stack, cat->right,
2426570af302Sopenharmony_ci					 NULL, NULL, &num_tags);
2427570af302Sopenharmony_ci		if (status != REG_OK)
2428570af302Sopenharmony_ci		  return status;
2429570af302Sopenharmony_ci		/* Allocate arrays for the tags and parameters. */
2430570af302Sopenharmony_ci		tags = xmalloc(sizeof(int) * (num_tags + 1));
2431570af302Sopenharmony_ci		if (!tags)
2432570af302Sopenharmony_ci		  return REG_ESPACE;
2433570af302Sopenharmony_ci		tags[0] = -1;
2434570af302Sopenharmony_ci		assertions = 0;
2435570af302Sopenharmony_ci		/* Second pass with tre_mach_empty() to get the list of
2436570af302Sopenharmony_ci		   tags and parameters. */
2437570af302Sopenharmony_ci		status = tre_match_empty(stack, cat->right, tags,
2438570af302Sopenharmony_ci					 &assertions, NULL);
2439570af302Sopenharmony_ci		if (status != REG_OK)
2440570af302Sopenharmony_ci		  {
2441570af302Sopenharmony_ci		    xfree(tags);
2442570af302Sopenharmony_ci		    return status;
2443570af302Sopenharmony_ci		  }
2444570af302Sopenharmony_ci		node->lastpos =
2445570af302Sopenharmony_ci		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2446570af302Sopenharmony_ci				tags, assertions);
2447570af302Sopenharmony_ci		xfree(tags);
2448570af302Sopenharmony_ci		if (!node->lastpos)
2449570af302Sopenharmony_ci		  return REG_ESPACE;
2450570af302Sopenharmony_ci	      }
2451570af302Sopenharmony_ci	    else
2452570af302Sopenharmony_ci	      {
2453570af302Sopenharmony_ci		node->lastpos = cat->right->lastpos;
2454570af302Sopenharmony_ci	      }
2455570af302Sopenharmony_ci	    break;
2456570af302Sopenharmony_ci	  }
2457570af302Sopenharmony_ci
2458570af302Sopenharmony_ci	default:
2459570af302Sopenharmony_ci	  assert(0);
2460570af302Sopenharmony_ci	  break;
2461570af302Sopenharmony_ci	}
2462570af302Sopenharmony_ci    }
2463570af302Sopenharmony_ci
2464570af302Sopenharmony_ci  return REG_OK;
2465570af302Sopenharmony_ci}
2466570af302Sopenharmony_ci
2467570af302Sopenharmony_ci
2468570af302Sopenharmony_ci/* Adds a transition from each position in `p1' to each position in `p2'. */
2469570af302Sopenharmony_cistatic reg_errcode_t
2470570af302Sopenharmony_citre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2471570af302Sopenharmony_ci	       tre_tnfa_transition_t *transitions,
2472570af302Sopenharmony_ci	       int *counts, int *offs)
2473570af302Sopenharmony_ci{
2474570af302Sopenharmony_ci  tre_pos_and_tags_t *orig_p2 = p2;
2475570af302Sopenharmony_ci  tre_tnfa_transition_t *trans;
2476570af302Sopenharmony_ci  int i, j, k, l, dup, prev_p2_pos;
2477570af302Sopenharmony_ci
2478570af302Sopenharmony_ci  if (transitions != NULL)
2479570af302Sopenharmony_ci    while (p1->position >= 0)
2480570af302Sopenharmony_ci      {
2481570af302Sopenharmony_ci	p2 = orig_p2;
2482570af302Sopenharmony_ci	prev_p2_pos = -1;
2483570af302Sopenharmony_ci	while (p2->position >= 0)
2484570af302Sopenharmony_ci	  {
2485570af302Sopenharmony_ci	    /* Optimization: if this position was already handled, skip it. */
2486570af302Sopenharmony_ci	    if (p2->position == prev_p2_pos)
2487570af302Sopenharmony_ci	      {
2488570af302Sopenharmony_ci		p2++;
2489570af302Sopenharmony_ci		continue;
2490570af302Sopenharmony_ci	      }
2491570af302Sopenharmony_ci	    prev_p2_pos = p2->position;
2492570af302Sopenharmony_ci	    /* Set `trans' to point to the next unused transition from
2493570af302Sopenharmony_ci	       position `p1->position'. */
2494570af302Sopenharmony_ci	    trans = transitions + offs[p1->position];
2495570af302Sopenharmony_ci	    while (trans->state != NULL)
2496570af302Sopenharmony_ci	      {
2497570af302Sopenharmony_ci#if 0
2498570af302Sopenharmony_ci		/* If we find a previous transition from `p1->position' to
2499570af302Sopenharmony_ci		   `p2->position', it is overwritten.  This can happen only
2500570af302Sopenharmony_ci		   if there are nested loops in the regexp, like in "((a)*)*".
2501570af302Sopenharmony_ci		   In POSIX.2 repetition using the outer loop is always
2502570af302Sopenharmony_ci		   preferred over using the inner loop.	 Therefore the
2503570af302Sopenharmony_ci		   transition for the inner loop is useless and can be thrown
2504570af302Sopenharmony_ci		   away. */
2505570af302Sopenharmony_ci		/* XXX - The same position is used for all nodes in a bracket
2506570af302Sopenharmony_ci		   expression, so this optimization cannot be used (it will
2507570af302Sopenharmony_ci		   break bracket expressions) unless I figure out a way to
2508570af302Sopenharmony_ci		   detect it here. */
2509570af302Sopenharmony_ci		if (trans->state_id == p2->position)
2510570af302Sopenharmony_ci		  {
2511570af302Sopenharmony_ci		    break;
2512570af302Sopenharmony_ci		  }
2513570af302Sopenharmony_ci#endif
2514570af302Sopenharmony_ci		trans++;
2515570af302Sopenharmony_ci	      }
2516570af302Sopenharmony_ci
2517570af302Sopenharmony_ci	    if (trans->state == NULL)
2518570af302Sopenharmony_ci	      (trans + 1)->state = NULL;
2519570af302Sopenharmony_ci	    /* Use the character ranges, assertions, etc. from `p1' for
2520570af302Sopenharmony_ci	       the transition from `p1' to `p2'. */
2521570af302Sopenharmony_ci	    trans->code_min = p1->code_min;
2522570af302Sopenharmony_ci	    trans->code_max = p1->code_max;
2523570af302Sopenharmony_ci	    trans->state = transitions + offs[p2->position];
2524570af302Sopenharmony_ci	    trans->state_id = p2->position;
2525570af302Sopenharmony_ci	    trans->assertions = p1->assertions | p2->assertions
2526570af302Sopenharmony_ci	      | (p1->class ? ASSERT_CHAR_CLASS : 0)
2527570af302Sopenharmony_ci	      | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2528570af302Sopenharmony_ci	    if (p1->backref >= 0)
2529570af302Sopenharmony_ci	      {
2530570af302Sopenharmony_ci		assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2531570af302Sopenharmony_ci		assert(p2->backref < 0);
2532570af302Sopenharmony_ci		trans->u.backref = p1->backref;
2533570af302Sopenharmony_ci		trans->assertions |= ASSERT_BACKREF;
2534570af302Sopenharmony_ci	      }
2535570af302Sopenharmony_ci	    else
2536570af302Sopenharmony_ci	      trans->u.class = p1->class;
2537570af302Sopenharmony_ci	    if (p1->neg_classes != NULL)
2538570af302Sopenharmony_ci	      {
2539570af302Sopenharmony_ci		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2540570af302Sopenharmony_ci		trans->neg_classes =
2541570af302Sopenharmony_ci		  xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2542570af302Sopenharmony_ci		if (trans->neg_classes == NULL)
2543570af302Sopenharmony_ci		  return REG_ESPACE;
2544570af302Sopenharmony_ci		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2545570af302Sopenharmony_ci		  trans->neg_classes[i] = p1->neg_classes[i];
2546570af302Sopenharmony_ci		trans->neg_classes[i] = (tre_ctype_t)0;
2547570af302Sopenharmony_ci	      }
2548570af302Sopenharmony_ci	    else
2549570af302Sopenharmony_ci	      trans->neg_classes = NULL;
2550570af302Sopenharmony_ci
2551570af302Sopenharmony_ci	    /* Find out how many tags this transition has. */
2552570af302Sopenharmony_ci	    i = 0;
2553570af302Sopenharmony_ci	    if (p1->tags != NULL)
2554570af302Sopenharmony_ci	      while(p1->tags[i] >= 0)
2555570af302Sopenharmony_ci		i++;
2556570af302Sopenharmony_ci	    j = 0;
2557570af302Sopenharmony_ci	    if (p2->tags != NULL)
2558570af302Sopenharmony_ci	      while(p2->tags[j] >= 0)
2559570af302Sopenharmony_ci		j++;
2560570af302Sopenharmony_ci
2561570af302Sopenharmony_ci	    /* If we are overwriting a transition, free the old tag array. */
2562570af302Sopenharmony_ci	    if (trans->tags != NULL)
2563570af302Sopenharmony_ci	      xfree(trans->tags);
2564570af302Sopenharmony_ci	    trans->tags = NULL;
2565570af302Sopenharmony_ci
2566570af302Sopenharmony_ci	    /* If there were any tags, allocate an array and fill it. */
2567570af302Sopenharmony_ci	    if (i + j > 0)
2568570af302Sopenharmony_ci	      {
2569570af302Sopenharmony_ci		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2570570af302Sopenharmony_ci		if (!trans->tags)
2571570af302Sopenharmony_ci		  return REG_ESPACE;
2572570af302Sopenharmony_ci		i = 0;
2573570af302Sopenharmony_ci		if (p1->tags != NULL)
2574570af302Sopenharmony_ci		  while(p1->tags[i] >= 0)
2575570af302Sopenharmony_ci		    {
2576570af302Sopenharmony_ci		      trans->tags[i] = p1->tags[i];
2577570af302Sopenharmony_ci		      i++;
2578570af302Sopenharmony_ci		    }
2579570af302Sopenharmony_ci		l = i;
2580570af302Sopenharmony_ci		j = 0;
2581570af302Sopenharmony_ci		if (p2->tags != NULL)
2582570af302Sopenharmony_ci		  while (p2->tags[j] >= 0)
2583570af302Sopenharmony_ci		    {
2584570af302Sopenharmony_ci		      /* Don't add duplicates. */
2585570af302Sopenharmony_ci		      dup = 0;
2586570af302Sopenharmony_ci		      for (k = 0; k < i; k++)
2587570af302Sopenharmony_ci			if (trans->tags[k] == p2->tags[j])
2588570af302Sopenharmony_ci			  {
2589570af302Sopenharmony_ci			    dup = 1;
2590570af302Sopenharmony_ci			    break;
2591570af302Sopenharmony_ci			  }
2592570af302Sopenharmony_ci		      if (!dup)
2593570af302Sopenharmony_ci			trans->tags[l++] = p2->tags[j];
2594570af302Sopenharmony_ci		      j++;
2595570af302Sopenharmony_ci		    }
2596570af302Sopenharmony_ci		trans->tags[l] = -1;
2597570af302Sopenharmony_ci	      }
2598570af302Sopenharmony_ci
2599570af302Sopenharmony_ci	    p2++;
2600570af302Sopenharmony_ci	  }
2601570af302Sopenharmony_ci	p1++;
2602570af302Sopenharmony_ci      }
2603570af302Sopenharmony_ci  else
2604570af302Sopenharmony_ci    /* Compute a maximum limit for the number of transitions leaving
2605570af302Sopenharmony_ci       from each state. */
2606570af302Sopenharmony_ci    while (p1->position >= 0)
2607570af302Sopenharmony_ci      {
2608570af302Sopenharmony_ci	p2 = orig_p2;
2609570af302Sopenharmony_ci	while (p2->position >= 0)
2610570af302Sopenharmony_ci	  {
2611570af302Sopenharmony_ci	    counts[p1->position]++;
2612570af302Sopenharmony_ci	    p2++;
2613570af302Sopenharmony_ci	  }
2614570af302Sopenharmony_ci	p1++;
2615570af302Sopenharmony_ci      }
2616570af302Sopenharmony_ci  return REG_OK;
2617570af302Sopenharmony_ci}
2618570af302Sopenharmony_ci
2619570af302Sopenharmony_ci/* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
2620570af302Sopenharmony_ci   labelled with one character range (there are no transitions on empty
2621570af302Sopenharmony_ci   strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2622570af302Sopenharmony_ci   the regexp. */
2623570af302Sopenharmony_cistatic reg_errcode_t
2624570af302Sopenharmony_citre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2625570af302Sopenharmony_ci		int *counts, int *offs)
2626570af302Sopenharmony_ci{
2627570af302Sopenharmony_ci  tre_union_t *uni;
2628570af302Sopenharmony_ci  tre_catenation_t *cat;
2629570af302Sopenharmony_ci  tre_iteration_t *iter;
2630570af302Sopenharmony_ci  reg_errcode_t errcode = REG_OK;
2631570af302Sopenharmony_ci
2632570af302Sopenharmony_ci  /* XXX - recurse using a stack!. */
2633570af302Sopenharmony_ci  switch (node->type)
2634570af302Sopenharmony_ci    {
2635570af302Sopenharmony_ci    case LITERAL:
2636570af302Sopenharmony_ci      break;
2637570af302Sopenharmony_ci    case UNION:
2638570af302Sopenharmony_ci      uni = (tre_union_t *)node->obj;
2639570af302Sopenharmony_ci      errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2640570af302Sopenharmony_ci      if (errcode != REG_OK)
2641570af302Sopenharmony_ci	return errcode;
2642570af302Sopenharmony_ci      errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2643570af302Sopenharmony_ci      break;
2644570af302Sopenharmony_ci
2645570af302Sopenharmony_ci    case CATENATION:
2646570af302Sopenharmony_ci      cat = (tre_catenation_t *)node->obj;
2647570af302Sopenharmony_ci      /* Add a transition from each position in cat->left->lastpos
2648570af302Sopenharmony_ci	 to each position in cat->right->firstpos. */
2649570af302Sopenharmony_ci      errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2650570af302Sopenharmony_ci			       transitions, counts, offs);
2651570af302Sopenharmony_ci      if (errcode != REG_OK)
2652570af302Sopenharmony_ci	return errcode;
2653570af302Sopenharmony_ci      errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2654570af302Sopenharmony_ci      if (errcode != REG_OK)
2655570af302Sopenharmony_ci	return errcode;
2656570af302Sopenharmony_ci      errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2657570af302Sopenharmony_ci      break;
2658570af302Sopenharmony_ci
2659570af302Sopenharmony_ci    case ITERATION:
2660570af302Sopenharmony_ci      iter = (tre_iteration_t *)node->obj;
2661570af302Sopenharmony_ci      assert(iter->max == -1 || iter->max == 1);
2662570af302Sopenharmony_ci
2663570af302Sopenharmony_ci      if (iter->max == -1)
2664570af302Sopenharmony_ci	{
2665570af302Sopenharmony_ci	  assert(iter->min == 0 || iter->min == 1);
2666570af302Sopenharmony_ci	  /* Add a transition from each last position in the iterated
2667570af302Sopenharmony_ci	     expression to each first position. */
2668570af302Sopenharmony_ci	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2669570af302Sopenharmony_ci				   transitions, counts, offs);
2670570af302Sopenharmony_ci	  if (errcode != REG_OK)
2671570af302Sopenharmony_ci	    return errcode;
2672570af302Sopenharmony_ci	}
2673570af302Sopenharmony_ci      errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2674570af302Sopenharmony_ci      break;
2675570af302Sopenharmony_ci    }
2676570af302Sopenharmony_ci  return errcode;
2677570af302Sopenharmony_ci}
2678570af302Sopenharmony_ci
2679570af302Sopenharmony_ci
2680570af302Sopenharmony_ci#define ERROR_EXIT(err)		  \
2681570af302Sopenharmony_ci  do				  \
2682570af302Sopenharmony_ci    {				  \
2683570af302Sopenharmony_ci      errcode = err;		  \
2684570af302Sopenharmony_ci      if (/*CONSTCOND*/1)	  \
2685570af302Sopenharmony_ci      	goto error_exit;	  \
2686570af302Sopenharmony_ci    }				  \
2687570af302Sopenharmony_ci while (/*CONSTCOND*/0)
2688570af302Sopenharmony_ci
2689570af302Sopenharmony_ci
2690570af302Sopenharmony_ciint
2691570af302Sopenharmony_ciregcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2692570af302Sopenharmony_ci{
2693570af302Sopenharmony_ci  tre_stack_t *stack;
2694570af302Sopenharmony_ci  tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2695570af302Sopenharmony_ci  tre_pos_and_tags_t *p;
2696570af302Sopenharmony_ci  int *counts = NULL, *offs = NULL;
2697570af302Sopenharmony_ci  int i, add = 0;
2698570af302Sopenharmony_ci  tre_tnfa_transition_t *transitions, *initial;
2699570af302Sopenharmony_ci  tre_tnfa_t *tnfa = NULL;
2700570af302Sopenharmony_ci  tre_submatch_data_t *submatch_data;
2701570af302Sopenharmony_ci  tre_tag_direction_t *tag_directions = NULL;
2702570af302Sopenharmony_ci  reg_errcode_t errcode;
2703570af302Sopenharmony_ci  tre_mem_t mem;
2704570af302Sopenharmony_ci
2705570af302Sopenharmony_ci  /* Parse context. */
2706570af302Sopenharmony_ci  tre_parse_ctx_t parse_ctx;
2707570af302Sopenharmony_ci
2708570af302Sopenharmony_ci  /* Allocate a stack used throughout the compilation process for various
2709570af302Sopenharmony_ci     purposes. */
2710570af302Sopenharmony_ci  stack = tre_stack_new(512, 1024000, 128);
2711570af302Sopenharmony_ci  if (!stack)
2712570af302Sopenharmony_ci    return REG_ESPACE;
2713570af302Sopenharmony_ci  /* Allocate a fast memory allocator. */
2714570af302Sopenharmony_ci  mem = tre_mem_new();
2715570af302Sopenharmony_ci  if (!mem)
2716570af302Sopenharmony_ci    {
2717570af302Sopenharmony_ci      tre_stack_destroy(stack);
2718570af302Sopenharmony_ci      return REG_ESPACE;
2719570af302Sopenharmony_ci    }
2720570af302Sopenharmony_ci
2721570af302Sopenharmony_ci  /* Parse the regexp. */
2722570af302Sopenharmony_ci  memset(&parse_ctx, 0, sizeof(parse_ctx));
2723570af302Sopenharmony_ci  parse_ctx.mem = mem;
2724570af302Sopenharmony_ci  parse_ctx.stack = stack;
2725570af302Sopenharmony_ci  parse_ctx.start = regex;
2726570af302Sopenharmony_ci  parse_ctx.cflags = cflags;
2727570af302Sopenharmony_ci  parse_ctx.max_backref = -1;
2728570af302Sopenharmony_ci  errcode = tre_parse(&parse_ctx);
2729570af302Sopenharmony_ci  if (errcode != REG_OK)
2730570af302Sopenharmony_ci    ERROR_EXIT(errcode);
2731570af302Sopenharmony_ci  preg->re_nsub = parse_ctx.submatch_id - 1;
2732570af302Sopenharmony_ci  tree = parse_ctx.n;
2733570af302Sopenharmony_ci
2734570af302Sopenharmony_ci#ifdef TRE_DEBUG
2735570af302Sopenharmony_ci  tre_ast_print(tree);
2736570af302Sopenharmony_ci#endif /* TRE_DEBUG */
2737570af302Sopenharmony_ci
2738570af302Sopenharmony_ci  /* Referring to nonexistent subexpressions is illegal. */
2739570af302Sopenharmony_ci  if (parse_ctx.max_backref > (int)preg->re_nsub)
2740570af302Sopenharmony_ci    ERROR_EXIT(REG_ESUBREG);
2741570af302Sopenharmony_ci
2742570af302Sopenharmony_ci  /* Allocate the TNFA struct. */
2743570af302Sopenharmony_ci  tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2744570af302Sopenharmony_ci  if (tnfa == NULL)
2745570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2746570af302Sopenharmony_ci  tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2747570af302Sopenharmony_ci  tnfa->have_approx = 0;
2748570af302Sopenharmony_ci  tnfa->num_submatches = parse_ctx.submatch_id;
2749570af302Sopenharmony_ci
2750570af302Sopenharmony_ci  /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
2751570af302Sopenharmony_ci     regexp does not have back references, this can be skipped. */
2752570af302Sopenharmony_ci  if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2753570af302Sopenharmony_ci    {
2754570af302Sopenharmony_ci
2755570af302Sopenharmony_ci      /* Figure out how many tags we will need. */
2756570af302Sopenharmony_ci      errcode = tre_add_tags(NULL, stack, tree, tnfa);
2757570af302Sopenharmony_ci      if (errcode != REG_OK)
2758570af302Sopenharmony_ci	ERROR_EXIT(errcode);
2759570af302Sopenharmony_ci
2760570af302Sopenharmony_ci      if (tnfa->num_tags > 0)
2761570af302Sopenharmony_ci	{
2762570af302Sopenharmony_ci	  tag_directions = xmalloc(sizeof(*tag_directions)
2763570af302Sopenharmony_ci				   * (tnfa->num_tags + 1));
2764570af302Sopenharmony_ci	  if (tag_directions == NULL)
2765570af302Sopenharmony_ci	    ERROR_EXIT(REG_ESPACE);
2766570af302Sopenharmony_ci	  tnfa->tag_directions = tag_directions;
2767570af302Sopenharmony_ci	  memset(tag_directions, -1,
2768570af302Sopenharmony_ci		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2769570af302Sopenharmony_ci	}
2770570af302Sopenharmony_ci      tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2771570af302Sopenharmony_ci				   sizeof(*tnfa->minimal_tags));
2772570af302Sopenharmony_ci      if (tnfa->minimal_tags == NULL)
2773570af302Sopenharmony_ci	ERROR_EXIT(REG_ESPACE);
2774570af302Sopenharmony_ci
2775570af302Sopenharmony_ci      submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2776570af302Sopenharmony_ci			      sizeof(*submatch_data));
2777570af302Sopenharmony_ci      if (submatch_data == NULL)
2778570af302Sopenharmony_ci	ERROR_EXIT(REG_ESPACE);
2779570af302Sopenharmony_ci      tnfa->submatch_data = submatch_data;
2780570af302Sopenharmony_ci
2781570af302Sopenharmony_ci      errcode = tre_add_tags(mem, stack, tree, tnfa);
2782570af302Sopenharmony_ci      if (errcode != REG_OK)
2783570af302Sopenharmony_ci	ERROR_EXIT(errcode);
2784570af302Sopenharmony_ci
2785570af302Sopenharmony_ci    }
2786570af302Sopenharmony_ci
2787570af302Sopenharmony_ci  /* Expand iteration nodes. */
2788570af302Sopenharmony_ci  errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2789570af302Sopenharmony_ci			   tag_directions);
2790570af302Sopenharmony_ci  if (errcode != REG_OK)
2791570af302Sopenharmony_ci    ERROR_EXIT(errcode);
2792570af302Sopenharmony_ci
2793570af302Sopenharmony_ci  /* Add a dummy node for the final state.
2794570af302Sopenharmony_ci     XXX - For certain patterns this dummy node can be optimized away,
2795570af302Sopenharmony_ci	   for example "a*" or "ab*".	Figure out a simple way to detect
2796570af302Sopenharmony_ci	   this possibility. */
2797570af302Sopenharmony_ci  tmp_ast_l = tree;
2798570af302Sopenharmony_ci  tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2799570af302Sopenharmony_ci  if (tmp_ast_r == NULL)
2800570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2801570af302Sopenharmony_ci
2802570af302Sopenharmony_ci  tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2803570af302Sopenharmony_ci  if (tree == NULL)
2804570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2805570af302Sopenharmony_ci
2806570af302Sopenharmony_ci  errcode = tre_compute_nfl(mem, stack, tree);
2807570af302Sopenharmony_ci  if (errcode != REG_OK)
2808570af302Sopenharmony_ci    ERROR_EXIT(errcode);
2809570af302Sopenharmony_ci
2810570af302Sopenharmony_ci  counts = xmalloc(sizeof(int) * parse_ctx.position);
2811570af302Sopenharmony_ci  if (counts == NULL)
2812570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2813570af302Sopenharmony_ci
2814570af302Sopenharmony_ci  offs = xmalloc(sizeof(int) * parse_ctx.position);
2815570af302Sopenharmony_ci  if (offs == NULL)
2816570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2817570af302Sopenharmony_ci
2818570af302Sopenharmony_ci  for (i = 0; i < parse_ctx.position; i++)
2819570af302Sopenharmony_ci    counts[i] = 0;
2820570af302Sopenharmony_ci  tre_ast_to_tnfa(tree, NULL, counts, NULL);
2821570af302Sopenharmony_ci
2822570af302Sopenharmony_ci  add = 0;
2823570af302Sopenharmony_ci  for (i = 0; i < parse_ctx.position; i++)
2824570af302Sopenharmony_ci    {
2825570af302Sopenharmony_ci      offs[i] = add;
2826570af302Sopenharmony_ci      add += counts[i] + 1;
2827570af302Sopenharmony_ci      counts[i] = 0;
2828570af302Sopenharmony_ci    }
2829570af302Sopenharmony_ci  transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2830570af302Sopenharmony_ci  if (transitions == NULL)
2831570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2832570af302Sopenharmony_ci  tnfa->transitions = transitions;
2833570af302Sopenharmony_ci  tnfa->num_transitions = add;
2834570af302Sopenharmony_ci
2835570af302Sopenharmony_ci  errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2836570af302Sopenharmony_ci  if (errcode != REG_OK)
2837570af302Sopenharmony_ci    ERROR_EXIT(errcode);
2838570af302Sopenharmony_ci
2839570af302Sopenharmony_ci  tnfa->firstpos_chars = NULL;
2840570af302Sopenharmony_ci
2841570af302Sopenharmony_ci  p = tree->firstpos;
2842570af302Sopenharmony_ci  i = 0;
2843570af302Sopenharmony_ci  while (p->position >= 0)
2844570af302Sopenharmony_ci    {
2845570af302Sopenharmony_ci      i++;
2846570af302Sopenharmony_ci      p++;
2847570af302Sopenharmony_ci    }
2848570af302Sopenharmony_ci
2849570af302Sopenharmony_ci  initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2850570af302Sopenharmony_ci  if (initial == NULL)
2851570af302Sopenharmony_ci    ERROR_EXIT(REG_ESPACE);
2852570af302Sopenharmony_ci  tnfa->initial = initial;
2853570af302Sopenharmony_ci
2854570af302Sopenharmony_ci  i = 0;
2855570af302Sopenharmony_ci  for (p = tree->firstpos; p->position >= 0; p++)
2856570af302Sopenharmony_ci    {
2857570af302Sopenharmony_ci      initial[i].state = transitions + offs[p->position];
2858570af302Sopenharmony_ci      initial[i].state_id = p->position;
2859570af302Sopenharmony_ci      initial[i].tags = NULL;
2860570af302Sopenharmony_ci      /* Copy the arrays p->tags, and p->params, they are allocated
2861570af302Sopenharmony_ci	 from a tre_mem object. */
2862570af302Sopenharmony_ci      if (p->tags)
2863570af302Sopenharmony_ci	{
2864570af302Sopenharmony_ci	  int j;
2865570af302Sopenharmony_ci	  for (j = 0; p->tags[j] >= 0; j++);
2866570af302Sopenharmony_ci	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2867570af302Sopenharmony_ci	  if (!initial[i].tags)
2868570af302Sopenharmony_ci	    ERROR_EXIT(REG_ESPACE);
2869570af302Sopenharmony_ci	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2870570af302Sopenharmony_ci	}
2871570af302Sopenharmony_ci      initial[i].assertions = p->assertions;
2872570af302Sopenharmony_ci      i++;
2873570af302Sopenharmony_ci    }
2874570af302Sopenharmony_ci  initial[i].state = NULL;
2875570af302Sopenharmony_ci
2876570af302Sopenharmony_ci  tnfa->num_transitions = add;
2877570af302Sopenharmony_ci  tnfa->final = transitions + offs[tree->lastpos[0].position];
2878570af302Sopenharmony_ci  tnfa->num_states = parse_ctx.position;
2879570af302Sopenharmony_ci  tnfa->cflags = cflags;
2880570af302Sopenharmony_ci
2881570af302Sopenharmony_ci  tre_mem_destroy(mem);
2882570af302Sopenharmony_ci  tre_stack_destroy(stack);
2883570af302Sopenharmony_ci  xfree(counts);
2884570af302Sopenharmony_ci  xfree(offs);
2885570af302Sopenharmony_ci
2886570af302Sopenharmony_ci  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2887570af302Sopenharmony_ci  return REG_OK;
2888570af302Sopenharmony_ci
2889570af302Sopenharmony_ci error_exit:
2890570af302Sopenharmony_ci  /* Free everything that was allocated and return the error code. */
2891570af302Sopenharmony_ci  tre_mem_destroy(mem);
2892570af302Sopenharmony_ci  if (stack != NULL)
2893570af302Sopenharmony_ci    tre_stack_destroy(stack);
2894570af302Sopenharmony_ci  if (counts != NULL)
2895570af302Sopenharmony_ci    xfree(counts);
2896570af302Sopenharmony_ci  if (offs != NULL)
2897570af302Sopenharmony_ci    xfree(offs);
2898570af302Sopenharmony_ci  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2899570af302Sopenharmony_ci  regfree(preg);
2900570af302Sopenharmony_ci  return errcode;
2901570af302Sopenharmony_ci}
2902570af302Sopenharmony_ci
2903570af302Sopenharmony_ci
2904570af302Sopenharmony_ci
2905570af302Sopenharmony_ci
2906570af302Sopenharmony_civoid
2907570af302Sopenharmony_ciregfree(regex_t *preg)
2908570af302Sopenharmony_ci{
2909570af302Sopenharmony_ci  tre_tnfa_t *tnfa;
2910570af302Sopenharmony_ci  unsigned int i;
2911570af302Sopenharmony_ci  tre_tnfa_transition_t *trans;
2912570af302Sopenharmony_ci
2913570af302Sopenharmony_ci  tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2914570af302Sopenharmony_ci  if (!tnfa)
2915570af302Sopenharmony_ci    return;
2916570af302Sopenharmony_ci
2917570af302Sopenharmony_ci  for (i = 0; i < tnfa->num_transitions; i++)
2918570af302Sopenharmony_ci    if (tnfa->transitions[i].state)
2919570af302Sopenharmony_ci      {
2920570af302Sopenharmony_ci	if (tnfa->transitions[i].tags)
2921570af302Sopenharmony_ci	  xfree(tnfa->transitions[i].tags);
2922570af302Sopenharmony_ci	if (tnfa->transitions[i].neg_classes)
2923570af302Sopenharmony_ci	  xfree(tnfa->transitions[i].neg_classes);
2924570af302Sopenharmony_ci      }
2925570af302Sopenharmony_ci  if (tnfa->transitions)
2926570af302Sopenharmony_ci    xfree(tnfa->transitions);
2927570af302Sopenharmony_ci
2928570af302Sopenharmony_ci  if (tnfa->initial)
2929570af302Sopenharmony_ci    {
2930570af302Sopenharmony_ci      for (trans = tnfa->initial; trans->state; trans++)
2931570af302Sopenharmony_ci	{
2932570af302Sopenharmony_ci	  if (trans->tags)
2933570af302Sopenharmony_ci	    xfree(trans->tags);
2934570af302Sopenharmony_ci	}
2935570af302Sopenharmony_ci      xfree(tnfa->initial);
2936570af302Sopenharmony_ci    }
2937570af302Sopenharmony_ci
2938570af302Sopenharmony_ci  if (tnfa->submatch_data)
2939570af302Sopenharmony_ci    {
2940570af302Sopenharmony_ci      for (i = 0; i < tnfa->num_submatches; i++)
2941570af302Sopenharmony_ci	if (tnfa->submatch_data[i].parents)
2942570af302Sopenharmony_ci	  xfree(tnfa->submatch_data[i].parents);
2943570af302Sopenharmony_ci      xfree(tnfa->submatch_data);
2944570af302Sopenharmony_ci    }
2945570af302Sopenharmony_ci
2946570af302Sopenharmony_ci  if (tnfa->tag_directions)
2947570af302Sopenharmony_ci    xfree(tnfa->tag_directions);
2948570af302Sopenharmony_ci  if (tnfa->firstpos_chars)
2949570af302Sopenharmony_ci    xfree(tnfa->firstpos_chars);
2950570af302Sopenharmony_ci  if (tnfa->minimal_tags)
2951570af302Sopenharmony_ci    xfree(tnfa->minimal_tags);
2952570af302Sopenharmony_ci  xfree(tnfa);
2953570af302Sopenharmony_ci}
2954