1570af302Sopenharmony_ci/*
2570af302Sopenharmony_ci  regexec.c - TRE POSIX compatible matching functions (and more).
3570af302Sopenharmony_ci
4570af302Sopenharmony_ci  Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5570af302Sopenharmony_ci  All rights reserved.
6570af302Sopenharmony_ci
7570af302Sopenharmony_ci  Redistribution and use in source and binary forms, with or without
8570af302Sopenharmony_ci  modification, are permitted provided that the following conditions
9570af302Sopenharmony_ci  are met:
10570af302Sopenharmony_ci
11570af302Sopenharmony_ci    1. Redistributions of source code must retain the above copyright
12570af302Sopenharmony_ci       notice, this list of conditions and the following disclaimer.
13570af302Sopenharmony_ci
14570af302Sopenharmony_ci    2. Redistributions in binary form must reproduce the above copyright
15570af302Sopenharmony_ci       notice, this list of conditions and the following disclaimer in the
16570af302Sopenharmony_ci       documentation and/or other materials provided with the distribution.
17570af302Sopenharmony_ci
18570af302Sopenharmony_ci  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19570af302Sopenharmony_ci  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20570af302Sopenharmony_ci  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21570af302Sopenharmony_ci  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22570af302Sopenharmony_ci  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23570af302Sopenharmony_ci  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24570af302Sopenharmony_ci  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25570af302Sopenharmony_ci  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26570af302Sopenharmony_ci  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27570af302Sopenharmony_ci  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28570af302Sopenharmony_ci  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29570af302Sopenharmony_ci
30570af302Sopenharmony_ci*/
31570af302Sopenharmony_ci
32570af302Sopenharmony_ci#include <stdlib.h>
33570af302Sopenharmony_ci#include <string.h>
34570af302Sopenharmony_ci#include <wchar.h>
35570af302Sopenharmony_ci#include <wctype.h>
36570af302Sopenharmony_ci#include <limits.h>
37570af302Sopenharmony_ci#include <stdint.h>
38570af302Sopenharmony_ci
39570af302Sopenharmony_ci#include <regex.h>
40570af302Sopenharmony_ci
41570af302Sopenharmony_ci#include "tre.h"
42570af302Sopenharmony_ci
43570af302Sopenharmony_ci#include <assert.h>
44570af302Sopenharmony_ci
45570af302Sopenharmony_cistatic void
46570af302Sopenharmony_citre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
47570af302Sopenharmony_ci		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
48570af302Sopenharmony_ci
49570af302Sopenharmony_ci/***********************************************************************
50570af302Sopenharmony_ci from tre-match-utils.h
51570af302Sopenharmony_ci***********************************************************************/
52570af302Sopenharmony_ci
53570af302Sopenharmony_ci#define GET_NEXT_WCHAR() do {                                                 \
54570af302Sopenharmony_ci    prev_c = next_c; pos += pos_add_next;                                     \
55570af302Sopenharmony_ci    if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) {        \
56570af302Sopenharmony_ci        if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; }         \
57570af302Sopenharmony_ci        else pos_add_next++;                                                  \
58570af302Sopenharmony_ci    }                                                                         \
59570af302Sopenharmony_ci    str_byte += pos_add_next;                                                 \
60570af302Sopenharmony_ci  } while (0)
61570af302Sopenharmony_ci
62570af302Sopenharmony_ci#define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
63570af302Sopenharmony_ci
64570af302Sopenharmony_ci#define CHECK_ASSERTIONS(assertions)					      \
65570af302Sopenharmony_ci  (((assertions & ASSERT_AT_BOL)					      \
66570af302Sopenharmony_ci    && (pos > 0 || reg_notbol)						      \
67570af302Sopenharmony_ci    && (prev_c != L'\n' || !reg_newline))				      \
68570af302Sopenharmony_ci   || ((assertions & ASSERT_AT_EOL)					      \
69570af302Sopenharmony_ci       && (next_c != L'\0' || reg_noteol)				      \
70570af302Sopenharmony_ci       && (next_c != L'\n' || !reg_newline))				      \
71570af302Sopenharmony_ci   || ((assertions & ASSERT_AT_BOW)					      \
72570af302Sopenharmony_ci       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
73570af302Sopenharmony_ci   || ((assertions & ASSERT_AT_EOW)					      \
74570af302Sopenharmony_ci       && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
75570af302Sopenharmony_ci   || ((assertions & ASSERT_AT_WB)					      \
76570af302Sopenharmony_ci       && (pos != 0 && next_c != L'\0'					      \
77570af302Sopenharmony_ci	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
78570af302Sopenharmony_ci   || ((assertions & ASSERT_AT_WB_NEG)					      \
79570af302Sopenharmony_ci       && (pos == 0 || next_c == L'\0'					      \
80570af302Sopenharmony_ci	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
81570af302Sopenharmony_ci
82570af302Sopenharmony_ci#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
83570af302Sopenharmony_ci  (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
84570af302Sopenharmony_ci       && !(tnfa->cflags & REG_ICASE)                                         \
85570af302Sopenharmony_ci       && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
86570af302Sopenharmony_ci    || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
87570af302Sopenharmony_ci        && (tnfa->cflags & REG_ICASE)                                         \
88570af302Sopenharmony_ci        && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
89570af302Sopenharmony_ci	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
90570af302Sopenharmony_ci    || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
91570af302Sopenharmony_ci        && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
92570af302Sopenharmony_ci                                      tnfa->cflags & REG_ICASE)))
93570af302Sopenharmony_ci
94570af302Sopenharmony_ci
95570af302Sopenharmony_ci
96570af302Sopenharmony_ci
97570af302Sopenharmony_ci/* Returns 1 if `t1' wins `t2', 0 otherwise. */
98570af302Sopenharmony_cistatic int
99570af302Sopenharmony_citre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
100570af302Sopenharmony_ci	      regoff_t *t1, regoff_t *t2)
101570af302Sopenharmony_ci{
102570af302Sopenharmony_ci  int i;
103570af302Sopenharmony_ci  for (i = 0; i < num_tags; i++)
104570af302Sopenharmony_ci    {
105570af302Sopenharmony_ci      if (tag_directions[i] == TRE_TAG_MINIMIZE)
106570af302Sopenharmony_ci	{
107570af302Sopenharmony_ci	  if (t1[i] < t2[i])
108570af302Sopenharmony_ci	    return 1;
109570af302Sopenharmony_ci	  if (t1[i] > t2[i])
110570af302Sopenharmony_ci	    return 0;
111570af302Sopenharmony_ci	}
112570af302Sopenharmony_ci      else
113570af302Sopenharmony_ci	{
114570af302Sopenharmony_ci	  if (t1[i] > t2[i])
115570af302Sopenharmony_ci	    return 1;
116570af302Sopenharmony_ci	  if (t1[i] < t2[i])
117570af302Sopenharmony_ci	    return 0;
118570af302Sopenharmony_ci	}
119570af302Sopenharmony_ci    }
120570af302Sopenharmony_ci  /*  assert(0);*/
121570af302Sopenharmony_ci  return 0;
122570af302Sopenharmony_ci}
123570af302Sopenharmony_ci
124570af302Sopenharmony_cistatic int
125570af302Sopenharmony_citre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
126570af302Sopenharmony_ci{
127570af302Sopenharmony_ci  while (*classes != (tre_ctype_t)0)
128570af302Sopenharmony_ci    if ((!icase && tre_isctype(wc, *classes))
129570af302Sopenharmony_ci	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
130570af302Sopenharmony_ci		      || tre_isctype(tre_tolower(wc), *classes))))
131570af302Sopenharmony_ci      return 1; /* Match. */
132570af302Sopenharmony_ci    else
133570af302Sopenharmony_ci      classes++;
134570af302Sopenharmony_ci  return 0; /* No match. */
135570af302Sopenharmony_ci}
136570af302Sopenharmony_ci
137570af302Sopenharmony_ci
138570af302Sopenharmony_ci/***********************************************************************
139570af302Sopenharmony_ci from tre-match-parallel.c
140570af302Sopenharmony_ci***********************************************************************/
141570af302Sopenharmony_ci
142570af302Sopenharmony_ci/*
143570af302Sopenharmony_ci  This algorithm searches for matches basically by reading characters
144570af302Sopenharmony_ci  in the searched string one by one, starting at the beginning.	 All
145570af302Sopenharmony_ci  matching paths in the TNFA are traversed in parallel.	 When two or
146570af302Sopenharmony_ci  more paths reach the same state, exactly one is chosen according to
147570af302Sopenharmony_ci  tag ordering rules; if returning submatches is not required it does
148570af302Sopenharmony_ci  not matter which path is chosen.
149570af302Sopenharmony_ci
150570af302Sopenharmony_ci  The worst case time required for finding the leftmost and longest
151570af302Sopenharmony_ci  match, or determining that there is no match, is always linearly
152570af302Sopenharmony_ci  dependent on the length of the text being searched.
153570af302Sopenharmony_ci
154570af302Sopenharmony_ci  This algorithm cannot handle TNFAs with back referencing nodes.
155570af302Sopenharmony_ci  See `tre-match-backtrack.c'.
156570af302Sopenharmony_ci*/
157570af302Sopenharmony_ci
158570af302Sopenharmony_citypedef struct {
159570af302Sopenharmony_ci  tre_tnfa_transition_t *state;
160570af302Sopenharmony_ci  regoff_t *tags;
161570af302Sopenharmony_ci} tre_tnfa_reach_t;
162570af302Sopenharmony_ci
163570af302Sopenharmony_citypedef struct {
164570af302Sopenharmony_ci  regoff_t pos;
165570af302Sopenharmony_ci  regoff_t **tags;
166570af302Sopenharmony_ci} tre_reach_pos_t;
167570af302Sopenharmony_ci
168570af302Sopenharmony_ci
169570af302Sopenharmony_cistatic reg_errcode_t
170570af302Sopenharmony_citre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
171570af302Sopenharmony_ci		      regoff_t *match_tags, int eflags,
172570af302Sopenharmony_ci		      regoff_t *match_end_ofs)
173570af302Sopenharmony_ci{
174570af302Sopenharmony_ci  /* State variables required by GET_NEXT_WCHAR. */
175570af302Sopenharmony_ci  tre_char_t prev_c = 0, next_c = 0;
176570af302Sopenharmony_ci  const char *str_byte = string;
177570af302Sopenharmony_ci  regoff_t pos = -1;
178570af302Sopenharmony_ci  regoff_t pos_add_next = 1;
179570af302Sopenharmony_ci#ifdef TRE_MBSTATE
180570af302Sopenharmony_ci  mbstate_t mbstate;
181570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
182570af302Sopenharmony_ci  int reg_notbol = eflags & REG_NOTBOL;
183570af302Sopenharmony_ci  int reg_noteol = eflags & REG_NOTEOL;
184570af302Sopenharmony_ci  int reg_newline = tnfa->cflags & REG_NEWLINE;
185570af302Sopenharmony_ci  reg_errcode_t ret;
186570af302Sopenharmony_ci
187570af302Sopenharmony_ci  char *buf;
188570af302Sopenharmony_ci  tre_tnfa_transition_t *trans_i;
189570af302Sopenharmony_ci  tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
190570af302Sopenharmony_ci  tre_reach_pos_t *reach_pos;
191570af302Sopenharmony_ci  int *tag_i;
192570af302Sopenharmony_ci  int num_tags, i;
193570af302Sopenharmony_ci
194570af302Sopenharmony_ci  regoff_t match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
195570af302Sopenharmony_ci  int new_match = 0;
196570af302Sopenharmony_ci  regoff_t *tmp_tags = NULL;
197570af302Sopenharmony_ci  regoff_t *tmp_iptr;
198570af302Sopenharmony_ci
199570af302Sopenharmony_ci#ifdef TRE_MBSTATE
200570af302Sopenharmony_ci  memset(&mbstate, '\0', sizeof(mbstate));
201570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
202570af302Sopenharmony_ci
203570af302Sopenharmony_ci  if (!match_tags)
204570af302Sopenharmony_ci    num_tags = 0;
205570af302Sopenharmony_ci  else
206570af302Sopenharmony_ci    num_tags = tnfa->num_tags;
207570af302Sopenharmony_ci
208570af302Sopenharmony_ci  /* Allocate memory for temporary data required for matching.	This needs to
209570af302Sopenharmony_ci     be done for every matching operation to be thread safe.  This allocates
210570af302Sopenharmony_ci     everything in a single large block with calloc(). */
211570af302Sopenharmony_ci  {
212570af302Sopenharmony_ci    size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
213570af302Sopenharmony_ci    char *tmp_buf;
214570af302Sopenharmony_ci
215570af302Sopenharmony_ci    /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
216570af302Sopenharmony_ci     * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
217570af302Sopenharmony_ci    if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
218570af302Sopenharmony_ci      return REG_ESPACE;
219570af302Sopenharmony_ci
220570af302Sopenharmony_ci    /* Likewise check rbytes. */
221570af302Sopenharmony_ci    if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
222570af302Sopenharmony_ci      return REG_ESPACE;
223570af302Sopenharmony_ci
224570af302Sopenharmony_ci    /* Likewise check pbytes. */
225570af302Sopenharmony_ci    if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
226570af302Sopenharmony_ci      return REG_ESPACE;
227570af302Sopenharmony_ci
228570af302Sopenharmony_ci    /* Compute the length of the block we need. */
229570af302Sopenharmony_ci    tbytes = sizeof(*tmp_tags) * num_tags;
230570af302Sopenharmony_ci    rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
231570af302Sopenharmony_ci    pbytes = sizeof(*reach_pos) * tnfa->num_states;
232570af302Sopenharmony_ci    xbytes = sizeof(regoff_t) * num_tags;
233570af302Sopenharmony_ci    total_bytes =
234570af302Sopenharmony_ci      (sizeof(long) - 1) * 4 /* for alignment paddings */
235570af302Sopenharmony_ci      + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
236570af302Sopenharmony_ci
237570af302Sopenharmony_ci    /* Allocate the memory. */
238570af302Sopenharmony_ci    buf = calloc(total_bytes, 1);
239570af302Sopenharmony_ci    if (buf == NULL)
240570af302Sopenharmony_ci      return REG_ESPACE;
241570af302Sopenharmony_ci
242570af302Sopenharmony_ci    /* Get the various pointers within tmp_buf (properly aligned). */
243570af302Sopenharmony_ci    tmp_tags = (void *)buf;
244570af302Sopenharmony_ci    tmp_buf = buf + tbytes;
245570af302Sopenharmony_ci    tmp_buf += ALIGN(tmp_buf, long);
246570af302Sopenharmony_ci    reach_next = (void *)tmp_buf;
247570af302Sopenharmony_ci    tmp_buf += rbytes;
248570af302Sopenharmony_ci    tmp_buf += ALIGN(tmp_buf, long);
249570af302Sopenharmony_ci    reach = (void *)tmp_buf;
250570af302Sopenharmony_ci    tmp_buf += rbytes;
251570af302Sopenharmony_ci    tmp_buf += ALIGN(tmp_buf, long);
252570af302Sopenharmony_ci    reach_pos = (void *)tmp_buf;
253570af302Sopenharmony_ci    tmp_buf += pbytes;
254570af302Sopenharmony_ci    tmp_buf += ALIGN(tmp_buf, long);
255570af302Sopenharmony_ci    for (i = 0; i < tnfa->num_states; i++)
256570af302Sopenharmony_ci      {
257570af302Sopenharmony_ci	reach[i].tags = (void *)tmp_buf;
258570af302Sopenharmony_ci	tmp_buf += xbytes;
259570af302Sopenharmony_ci	reach_next[i].tags = (void *)tmp_buf;
260570af302Sopenharmony_ci	tmp_buf += xbytes;
261570af302Sopenharmony_ci      }
262570af302Sopenharmony_ci  }
263570af302Sopenharmony_ci
264570af302Sopenharmony_ci  for (i = 0; i < tnfa->num_states; i++)
265570af302Sopenharmony_ci    reach_pos[i].pos = -1;
266570af302Sopenharmony_ci
267570af302Sopenharmony_ci  GET_NEXT_WCHAR();
268570af302Sopenharmony_ci  pos = 0;
269570af302Sopenharmony_ci
270570af302Sopenharmony_ci  reach_next_i = reach_next;
271570af302Sopenharmony_ci  while (1)
272570af302Sopenharmony_ci    {
273570af302Sopenharmony_ci      /* If no match found yet, add the initial states to `reach_next'. */
274570af302Sopenharmony_ci      if (match_eo < 0)
275570af302Sopenharmony_ci	{
276570af302Sopenharmony_ci	  trans_i = tnfa->initial;
277570af302Sopenharmony_ci	  while (trans_i->state != NULL)
278570af302Sopenharmony_ci	    {
279570af302Sopenharmony_ci	      if (reach_pos[trans_i->state_id].pos < pos)
280570af302Sopenharmony_ci		{
281570af302Sopenharmony_ci		  if (trans_i->assertions
282570af302Sopenharmony_ci		      && CHECK_ASSERTIONS(trans_i->assertions))
283570af302Sopenharmony_ci		    {
284570af302Sopenharmony_ci		      trans_i++;
285570af302Sopenharmony_ci		      continue;
286570af302Sopenharmony_ci		    }
287570af302Sopenharmony_ci
288570af302Sopenharmony_ci		  reach_next_i->state = trans_i->state;
289570af302Sopenharmony_ci		  for (i = 0; i < num_tags; i++)
290570af302Sopenharmony_ci		    reach_next_i->tags[i] = -1;
291570af302Sopenharmony_ci		  tag_i = trans_i->tags;
292570af302Sopenharmony_ci		  if (tag_i)
293570af302Sopenharmony_ci		    while (*tag_i >= 0)
294570af302Sopenharmony_ci		      {
295570af302Sopenharmony_ci			if (*tag_i < num_tags)
296570af302Sopenharmony_ci			  reach_next_i->tags[*tag_i] = pos;
297570af302Sopenharmony_ci			tag_i++;
298570af302Sopenharmony_ci		      }
299570af302Sopenharmony_ci		  if (reach_next_i->state == tnfa->final)
300570af302Sopenharmony_ci		    {
301570af302Sopenharmony_ci		      match_eo = pos;
302570af302Sopenharmony_ci		      new_match = 1;
303570af302Sopenharmony_ci		      for (i = 0; i < num_tags; i++)
304570af302Sopenharmony_ci			match_tags[i] = reach_next_i->tags[i];
305570af302Sopenharmony_ci		    }
306570af302Sopenharmony_ci		  reach_pos[trans_i->state_id].pos = pos;
307570af302Sopenharmony_ci		  reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
308570af302Sopenharmony_ci		  reach_next_i++;
309570af302Sopenharmony_ci		}
310570af302Sopenharmony_ci	      trans_i++;
311570af302Sopenharmony_ci	    }
312570af302Sopenharmony_ci	  reach_next_i->state = NULL;
313570af302Sopenharmony_ci	}
314570af302Sopenharmony_ci      else
315570af302Sopenharmony_ci	{
316570af302Sopenharmony_ci	  if (num_tags == 0 || reach_next_i == reach_next)
317570af302Sopenharmony_ci	    /* We have found a match. */
318570af302Sopenharmony_ci	    break;
319570af302Sopenharmony_ci	}
320570af302Sopenharmony_ci
321570af302Sopenharmony_ci      /* Check for end of string. */
322570af302Sopenharmony_ci      if (!next_c) break;
323570af302Sopenharmony_ci
324570af302Sopenharmony_ci      GET_NEXT_WCHAR();
325570af302Sopenharmony_ci
326570af302Sopenharmony_ci      /* Swap `reach' and `reach_next'. */
327570af302Sopenharmony_ci      reach_i = reach;
328570af302Sopenharmony_ci      reach = reach_next;
329570af302Sopenharmony_ci      reach_next = reach_i;
330570af302Sopenharmony_ci
331570af302Sopenharmony_ci      /* For each state in `reach', weed out states that don't fulfill the
332570af302Sopenharmony_ci	 minimal matching conditions. */
333570af302Sopenharmony_ci      if (tnfa->num_minimals && new_match)
334570af302Sopenharmony_ci	{
335570af302Sopenharmony_ci	  new_match = 0;
336570af302Sopenharmony_ci	  reach_next_i = reach_next;
337570af302Sopenharmony_ci	  for (reach_i = reach; reach_i->state; reach_i++)
338570af302Sopenharmony_ci	    {
339570af302Sopenharmony_ci	      int skip = 0;
340570af302Sopenharmony_ci	      for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
341570af302Sopenharmony_ci		{
342570af302Sopenharmony_ci		  int end = tnfa->minimal_tags[i];
343570af302Sopenharmony_ci		  int start = tnfa->minimal_tags[i + 1];
344570af302Sopenharmony_ci		  if (end >= num_tags)
345570af302Sopenharmony_ci		    {
346570af302Sopenharmony_ci		      skip = 1;
347570af302Sopenharmony_ci		      break;
348570af302Sopenharmony_ci		    }
349570af302Sopenharmony_ci		  else if (reach_i->tags[start] == match_tags[start]
350570af302Sopenharmony_ci			   && reach_i->tags[end] < match_tags[end])
351570af302Sopenharmony_ci		    {
352570af302Sopenharmony_ci		      skip = 1;
353570af302Sopenharmony_ci		      break;
354570af302Sopenharmony_ci		    }
355570af302Sopenharmony_ci		}
356570af302Sopenharmony_ci	      if (!skip)
357570af302Sopenharmony_ci		{
358570af302Sopenharmony_ci		  reach_next_i->state = reach_i->state;
359570af302Sopenharmony_ci		  tmp_iptr = reach_next_i->tags;
360570af302Sopenharmony_ci		  reach_next_i->tags = reach_i->tags;
361570af302Sopenharmony_ci		  reach_i->tags = tmp_iptr;
362570af302Sopenharmony_ci		  reach_next_i++;
363570af302Sopenharmony_ci		}
364570af302Sopenharmony_ci	    }
365570af302Sopenharmony_ci	  reach_next_i->state = NULL;
366570af302Sopenharmony_ci
367570af302Sopenharmony_ci	  /* Swap `reach' and `reach_next'. */
368570af302Sopenharmony_ci	  reach_i = reach;
369570af302Sopenharmony_ci	  reach = reach_next;
370570af302Sopenharmony_ci	  reach_next = reach_i;
371570af302Sopenharmony_ci	}
372570af302Sopenharmony_ci
373570af302Sopenharmony_ci      /* For each state in `reach' see if there is a transition leaving with
374570af302Sopenharmony_ci	 the current input symbol to a state not yet in `reach_next', and
375570af302Sopenharmony_ci	 add the destination states to `reach_next'. */
376570af302Sopenharmony_ci      reach_next_i = reach_next;
377570af302Sopenharmony_ci      for (reach_i = reach; reach_i->state; reach_i++)
378570af302Sopenharmony_ci	{
379570af302Sopenharmony_ci	  for (trans_i = reach_i->state; trans_i->state; trans_i++)
380570af302Sopenharmony_ci	    {
381570af302Sopenharmony_ci	      /* Does this transition match the input symbol? */
382570af302Sopenharmony_ci	      if (trans_i->code_min <= (tre_cint_t)prev_c &&
383570af302Sopenharmony_ci		  trans_i->code_max >= (tre_cint_t)prev_c)
384570af302Sopenharmony_ci		{
385570af302Sopenharmony_ci		  if (trans_i->assertions
386570af302Sopenharmony_ci		      && (CHECK_ASSERTIONS(trans_i->assertions)
387570af302Sopenharmony_ci			  || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
388570af302Sopenharmony_ci		    {
389570af302Sopenharmony_ci		      continue;
390570af302Sopenharmony_ci		    }
391570af302Sopenharmony_ci
392570af302Sopenharmony_ci		  /* Compute the tags after this transition. */
393570af302Sopenharmony_ci		  for (i = 0; i < num_tags; i++)
394570af302Sopenharmony_ci		    tmp_tags[i] = reach_i->tags[i];
395570af302Sopenharmony_ci		  tag_i = trans_i->tags;
396570af302Sopenharmony_ci		  if (tag_i != NULL)
397570af302Sopenharmony_ci		    while (*tag_i >= 0)
398570af302Sopenharmony_ci		      {
399570af302Sopenharmony_ci			if (*tag_i < num_tags)
400570af302Sopenharmony_ci			  tmp_tags[*tag_i] = pos;
401570af302Sopenharmony_ci			tag_i++;
402570af302Sopenharmony_ci		      }
403570af302Sopenharmony_ci
404570af302Sopenharmony_ci		  if (reach_pos[trans_i->state_id].pos < pos)
405570af302Sopenharmony_ci		    {
406570af302Sopenharmony_ci		      /* Found an unvisited node. */
407570af302Sopenharmony_ci		      reach_next_i->state = trans_i->state;
408570af302Sopenharmony_ci		      tmp_iptr = reach_next_i->tags;
409570af302Sopenharmony_ci		      reach_next_i->tags = tmp_tags;
410570af302Sopenharmony_ci		      tmp_tags = tmp_iptr;
411570af302Sopenharmony_ci		      reach_pos[trans_i->state_id].pos = pos;
412570af302Sopenharmony_ci		      reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
413570af302Sopenharmony_ci
414570af302Sopenharmony_ci		      if (reach_next_i->state == tnfa->final
415570af302Sopenharmony_ci			  && (match_eo == -1
416570af302Sopenharmony_ci			      || (num_tags > 0
417570af302Sopenharmony_ci				  && reach_next_i->tags[0] <= match_tags[0])))
418570af302Sopenharmony_ci			{
419570af302Sopenharmony_ci			  match_eo = pos;
420570af302Sopenharmony_ci			  new_match = 1;
421570af302Sopenharmony_ci			  for (i = 0; i < num_tags; i++)
422570af302Sopenharmony_ci			    match_tags[i] = reach_next_i->tags[i];
423570af302Sopenharmony_ci			}
424570af302Sopenharmony_ci		      reach_next_i++;
425570af302Sopenharmony_ci
426570af302Sopenharmony_ci		    }
427570af302Sopenharmony_ci		  else
428570af302Sopenharmony_ci		    {
429570af302Sopenharmony_ci		      assert(reach_pos[trans_i->state_id].pos == pos);
430570af302Sopenharmony_ci		      /* Another path has also reached this state.  We choose
431570af302Sopenharmony_ci			 the winner by examining the tag values for both
432570af302Sopenharmony_ci			 paths. */
433570af302Sopenharmony_ci		      if (tre_tag_order(num_tags, tnfa->tag_directions,
434570af302Sopenharmony_ci					tmp_tags,
435570af302Sopenharmony_ci					*reach_pos[trans_i->state_id].tags))
436570af302Sopenharmony_ci			{
437570af302Sopenharmony_ci			  /* The new path wins. */
438570af302Sopenharmony_ci			  tmp_iptr = *reach_pos[trans_i->state_id].tags;
439570af302Sopenharmony_ci			  *reach_pos[trans_i->state_id].tags = tmp_tags;
440570af302Sopenharmony_ci			  if (trans_i->state == tnfa->final)
441570af302Sopenharmony_ci			    {
442570af302Sopenharmony_ci			      match_eo = pos;
443570af302Sopenharmony_ci			      new_match = 1;
444570af302Sopenharmony_ci			      for (i = 0; i < num_tags; i++)
445570af302Sopenharmony_ci				match_tags[i] = tmp_tags[i];
446570af302Sopenharmony_ci			    }
447570af302Sopenharmony_ci			  tmp_tags = tmp_iptr;
448570af302Sopenharmony_ci			}
449570af302Sopenharmony_ci		    }
450570af302Sopenharmony_ci		}
451570af302Sopenharmony_ci	    }
452570af302Sopenharmony_ci	}
453570af302Sopenharmony_ci      reach_next_i->state = NULL;
454570af302Sopenharmony_ci    }
455570af302Sopenharmony_ci
456570af302Sopenharmony_ci  *match_end_ofs = match_eo;
457570af302Sopenharmony_ci  ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
458570af302Sopenharmony_cierror_exit:
459570af302Sopenharmony_ci  xfree(buf);
460570af302Sopenharmony_ci  return ret;
461570af302Sopenharmony_ci}
462570af302Sopenharmony_ci
463570af302Sopenharmony_ci
464570af302Sopenharmony_ci
465570af302Sopenharmony_ci/***********************************************************************
466570af302Sopenharmony_ci from tre-match-backtrack.c
467570af302Sopenharmony_ci***********************************************************************/
468570af302Sopenharmony_ci
469570af302Sopenharmony_ci/*
470570af302Sopenharmony_ci  This matcher is for regexps that use back referencing.  Regexp matching
471570af302Sopenharmony_ci  with back referencing is an NP-complete problem on the number of back
472570af302Sopenharmony_ci  references.  The easiest way to match them is to use a backtracking
473570af302Sopenharmony_ci  routine which basically goes through all possible paths in the TNFA
474570af302Sopenharmony_ci  and chooses the one which results in the best (leftmost and longest)
475570af302Sopenharmony_ci  match.  This can be spectacularly expensive and may run out of stack
476570af302Sopenharmony_ci  space, but there really is no better known generic algorithm.	 Quoting
477570af302Sopenharmony_ci  Henry Spencer from comp.compilers:
478570af302Sopenharmony_ci  <URL: http://compilers.iecc.com/comparch/article/93-03-102>
479570af302Sopenharmony_ci
480570af302Sopenharmony_ci    POSIX.2 REs require longest match, which is really exciting to
481570af302Sopenharmony_ci    implement since the obsolete ("basic") variant also includes
482570af302Sopenharmony_ci    \<digit>.  I haven't found a better way of tackling this than doing
483570af302Sopenharmony_ci    a preliminary match using a DFA (or simulation) on a modified RE
484570af302Sopenharmony_ci    that just replicates subREs for \<digit>, and then doing a
485570af302Sopenharmony_ci    backtracking match to determine whether the subRE matches were
486570af302Sopenharmony_ci    right.  This can be rather slow, but I console myself with the
487570af302Sopenharmony_ci    thought that people who use \<digit> deserve very slow execution.
488570af302Sopenharmony_ci    (Pun unintentional but very appropriate.)
489570af302Sopenharmony_ci
490570af302Sopenharmony_ci*/
491570af302Sopenharmony_ci
492570af302Sopenharmony_citypedef struct {
493570af302Sopenharmony_ci  regoff_t pos;
494570af302Sopenharmony_ci  const char *str_byte;
495570af302Sopenharmony_ci  tre_tnfa_transition_t *state;
496570af302Sopenharmony_ci  int state_id;
497570af302Sopenharmony_ci  int next_c;
498570af302Sopenharmony_ci  regoff_t *tags;
499570af302Sopenharmony_ci#ifdef TRE_MBSTATE
500570af302Sopenharmony_ci  mbstate_t mbstate;
501570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
502570af302Sopenharmony_ci} tre_backtrack_item_t;
503570af302Sopenharmony_ci
504570af302Sopenharmony_citypedef struct tre_backtrack_struct {
505570af302Sopenharmony_ci  tre_backtrack_item_t item;
506570af302Sopenharmony_ci  struct tre_backtrack_struct *prev;
507570af302Sopenharmony_ci  struct tre_backtrack_struct *next;
508570af302Sopenharmony_ci} *tre_backtrack_t;
509570af302Sopenharmony_ci
510570af302Sopenharmony_ci#ifdef TRE_MBSTATE
511570af302Sopenharmony_ci#define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
512570af302Sopenharmony_ci#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
513570af302Sopenharmony_ci#else /* !TRE_MBSTATE */
514570af302Sopenharmony_ci#define BT_STACK_MBSTATE_IN
515570af302Sopenharmony_ci#define BT_STACK_MBSTATE_OUT
516570af302Sopenharmony_ci#endif /* !TRE_MBSTATE */
517570af302Sopenharmony_ci
518570af302Sopenharmony_ci#define tre_bt_mem_new		  tre_mem_new
519570af302Sopenharmony_ci#define tre_bt_mem_alloc	  tre_mem_alloc
520570af302Sopenharmony_ci#define tre_bt_mem_destroy	  tre_mem_destroy
521570af302Sopenharmony_ci
522570af302Sopenharmony_ci
523570af302Sopenharmony_ci#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
524570af302Sopenharmony_ci  do									      \
525570af302Sopenharmony_ci    {									      \
526570af302Sopenharmony_ci      int i;								      \
527570af302Sopenharmony_ci      if (!stack->next)							      \
528570af302Sopenharmony_ci	{								      \
529570af302Sopenharmony_ci	  tre_backtrack_t s;						      \
530570af302Sopenharmony_ci	  s = tre_bt_mem_alloc(mem, sizeof(*s));			      \
531570af302Sopenharmony_ci	  if (!s)							      \
532570af302Sopenharmony_ci	    {								      \
533570af302Sopenharmony_ci	      tre_bt_mem_destroy(mem);					      \
534570af302Sopenharmony_ci	      if (tags)							      \
535570af302Sopenharmony_ci		xfree(tags);						      \
536570af302Sopenharmony_ci	      if (pmatch)						      \
537570af302Sopenharmony_ci		xfree(pmatch);						      \
538570af302Sopenharmony_ci	      if (states_seen)						      \
539570af302Sopenharmony_ci		xfree(states_seen);					      \
540570af302Sopenharmony_ci	      return REG_ESPACE;					      \
541570af302Sopenharmony_ci	    }								      \
542570af302Sopenharmony_ci	  s->prev = stack;						      \
543570af302Sopenharmony_ci	  s->next = NULL;						      \
544570af302Sopenharmony_ci	  s->item.tags = tre_bt_mem_alloc(mem,				      \
545570af302Sopenharmony_ci					  sizeof(*tags) * tnfa->num_tags);    \
546570af302Sopenharmony_ci	  if (!s->item.tags)						      \
547570af302Sopenharmony_ci	    {								      \
548570af302Sopenharmony_ci	      tre_bt_mem_destroy(mem);					      \
549570af302Sopenharmony_ci	      if (tags)							      \
550570af302Sopenharmony_ci		xfree(tags);						      \
551570af302Sopenharmony_ci	      if (pmatch)						      \
552570af302Sopenharmony_ci		xfree(pmatch);						      \
553570af302Sopenharmony_ci	      if (states_seen)						      \
554570af302Sopenharmony_ci		xfree(states_seen);					      \
555570af302Sopenharmony_ci	      return REG_ESPACE;					      \
556570af302Sopenharmony_ci	    }								      \
557570af302Sopenharmony_ci	  stack->next = s;						      \
558570af302Sopenharmony_ci	  stack = s;							      \
559570af302Sopenharmony_ci	}								      \
560570af302Sopenharmony_ci      else								      \
561570af302Sopenharmony_ci	stack = stack->next;						      \
562570af302Sopenharmony_ci      stack->item.pos = (_pos);						      \
563570af302Sopenharmony_ci      stack->item.str_byte = (_str_byte);				      \
564570af302Sopenharmony_ci      stack->item.state = (_state);					      \
565570af302Sopenharmony_ci      stack->item.state_id = (_state_id);				      \
566570af302Sopenharmony_ci      stack->item.next_c = (_next_c);					      \
567570af302Sopenharmony_ci      for (i = 0; i < tnfa->num_tags; i++)				      \
568570af302Sopenharmony_ci	stack->item.tags[i] = (_tags)[i];				      \
569570af302Sopenharmony_ci      BT_STACK_MBSTATE_IN;						      \
570570af302Sopenharmony_ci    }									      \
571570af302Sopenharmony_ci  while (0)
572570af302Sopenharmony_ci
573570af302Sopenharmony_ci#define BT_STACK_POP()							      \
574570af302Sopenharmony_ci  do									      \
575570af302Sopenharmony_ci    {									      \
576570af302Sopenharmony_ci      int i;								      \
577570af302Sopenharmony_ci      assert(stack->prev);						      \
578570af302Sopenharmony_ci      pos = stack->item.pos;						      \
579570af302Sopenharmony_ci      str_byte = stack->item.str_byte;					      \
580570af302Sopenharmony_ci      state = stack->item.state;					      \
581570af302Sopenharmony_ci      next_c = stack->item.next_c;					      \
582570af302Sopenharmony_ci      for (i = 0; i < tnfa->num_tags; i++)				      \
583570af302Sopenharmony_ci	tags[i] = stack->item.tags[i];					      \
584570af302Sopenharmony_ci      BT_STACK_MBSTATE_OUT;						      \
585570af302Sopenharmony_ci      stack = stack->prev;						      \
586570af302Sopenharmony_ci    }									      \
587570af302Sopenharmony_ci  while (0)
588570af302Sopenharmony_ci
589570af302Sopenharmony_ci#undef MIN
590570af302Sopenharmony_ci#define MIN(a, b) ((a) <= (b) ? (a) : (b))
591570af302Sopenharmony_ci
592570af302Sopenharmony_cistatic reg_errcode_t
593570af302Sopenharmony_citre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
594570af302Sopenharmony_ci		       regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
595570af302Sopenharmony_ci{
596570af302Sopenharmony_ci  /* State variables required by GET_NEXT_WCHAR. */
597570af302Sopenharmony_ci  tre_char_t prev_c = 0, next_c = 0;
598570af302Sopenharmony_ci  const char *str_byte = string;
599570af302Sopenharmony_ci  regoff_t pos = 0;
600570af302Sopenharmony_ci  regoff_t pos_add_next = 1;
601570af302Sopenharmony_ci#ifdef TRE_MBSTATE
602570af302Sopenharmony_ci  mbstate_t mbstate;
603570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
604570af302Sopenharmony_ci  int reg_notbol = eflags & REG_NOTBOL;
605570af302Sopenharmony_ci  int reg_noteol = eflags & REG_NOTEOL;
606570af302Sopenharmony_ci  int reg_newline = tnfa->cflags & REG_NEWLINE;
607570af302Sopenharmony_ci
608570af302Sopenharmony_ci  /* These are used to remember the necessary values of the above
609570af302Sopenharmony_ci     variables to return to the position where the current search
610570af302Sopenharmony_ci     started from. */
611570af302Sopenharmony_ci  int next_c_start;
612570af302Sopenharmony_ci  const char *str_byte_start;
613570af302Sopenharmony_ci  regoff_t pos_start = -1;
614570af302Sopenharmony_ci#ifdef TRE_MBSTATE
615570af302Sopenharmony_ci  mbstate_t mbstate_start;
616570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
617570af302Sopenharmony_ci
618570af302Sopenharmony_ci  /* End offset of best match so far, or -1 if no match found yet. */
619570af302Sopenharmony_ci  regoff_t match_eo = -1;
620570af302Sopenharmony_ci  /* Tag arrays. */
621570af302Sopenharmony_ci  int *next_tags;
622570af302Sopenharmony_ci  regoff_t *tags = NULL;
623570af302Sopenharmony_ci  /* Current TNFA state. */
624570af302Sopenharmony_ci  tre_tnfa_transition_t *state;
625570af302Sopenharmony_ci  int *states_seen = NULL;
626570af302Sopenharmony_ci
627570af302Sopenharmony_ci  /* Memory allocator to for allocating the backtracking stack. */
628570af302Sopenharmony_ci  tre_mem_t mem = tre_bt_mem_new();
629570af302Sopenharmony_ci
630570af302Sopenharmony_ci  /* The backtracking stack. */
631570af302Sopenharmony_ci  tre_backtrack_t stack;
632570af302Sopenharmony_ci
633570af302Sopenharmony_ci  tre_tnfa_transition_t *trans_i;
634570af302Sopenharmony_ci  regmatch_t *pmatch = NULL;
635570af302Sopenharmony_ci  int ret;
636570af302Sopenharmony_ci
637570af302Sopenharmony_ci#ifdef TRE_MBSTATE
638570af302Sopenharmony_ci  memset(&mbstate, '\0', sizeof(mbstate));
639570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
640570af302Sopenharmony_ci
641570af302Sopenharmony_ci  if (!mem)
642570af302Sopenharmony_ci    return REG_ESPACE;
643570af302Sopenharmony_ci  stack = tre_bt_mem_alloc(mem, sizeof(*stack));
644570af302Sopenharmony_ci  if (!stack)
645570af302Sopenharmony_ci    {
646570af302Sopenharmony_ci      ret = REG_ESPACE;
647570af302Sopenharmony_ci      goto error_exit;
648570af302Sopenharmony_ci    }
649570af302Sopenharmony_ci  stack->prev = NULL;
650570af302Sopenharmony_ci  stack->next = NULL;
651570af302Sopenharmony_ci
652570af302Sopenharmony_ci  if (tnfa->num_tags)
653570af302Sopenharmony_ci    {
654570af302Sopenharmony_ci      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
655570af302Sopenharmony_ci      if (!tags)
656570af302Sopenharmony_ci	{
657570af302Sopenharmony_ci	  ret = REG_ESPACE;
658570af302Sopenharmony_ci	  goto error_exit;
659570af302Sopenharmony_ci	}
660570af302Sopenharmony_ci    }
661570af302Sopenharmony_ci  if (tnfa->num_submatches)
662570af302Sopenharmony_ci    {
663570af302Sopenharmony_ci      pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
664570af302Sopenharmony_ci      if (!pmatch)
665570af302Sopenharmony_ci	{
666570af302Sopenharmony_ci	  ret = REG_ESPACE;
667570af302Sopenharmony_ci	  goto error_exit;
668570af302Sopenharmony_ci	}
669570af302Sopenharmony_ci    }
670570af302Sopenharmony_ci  if (tnfa->num_states)
671570af302Sopenharmony_ci    {
672570af302Sopenharmony_ci      states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
673570af302Sopenharmony_ci      if (!states_seen)
674570af302Sopenharmony_ci	{
675570af302Sopenharmony_ci	  ret = REG_ESPACE;
676570af302Sopenharmony_ci	  goto error_exit;
677570af302Sopenharmony_ci	}
678570af302Sopenharmony_ci    }
679570af302Sopenharmony_ci
680570af302Sopenharmony_ci retry:
681570af302Sopenharmony_ci  {
682570af302Sopenharmony_ci    int i;
683570af302Sopenharmony_ci    for (i = 0; i < tnfa->num_tags; i++)
684570af302Sopenharmony_ci      {
685570af302Sopenharmony_ci	tags[i] = -1;
686570af302Sopenharmony_ci	if (match_tags)
687570af302Sopenharmony_ci	  match_tags[i] = -1;
688570af302Sopenharmony_ci      }
689570af302Sopenharmony_ci    for (i = 0; i < tnfa->num_states; i++)
690570af302Sopenharmony_ci      states_seen[i] = 0;
691570af302Sopenharmony_ci  }
692570af302Sopenharmony_ci
693570af302Sopenharmony_ci  state = NULL;
694570af302Sopenharmony_ci  pos = pos_start;
695570af302Sopenharmony_ci  GET_NEXT_WCHAR();
696570af302Sopenharmony_ci  pos_start = pos;
697570af302Sopenharmony_ci  next_c_start = next_c;
698570af302Sopenharmony_ci  str_byte_start = str_byte;
699570af302Sopenharmony_ci#ifdef TRE_MBSTATE
700570af302Sopenharmony_ci  mbstate_start = mbstate;
701570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
702570af302Sopenharmony_ci
703570af302Sopenharmony_ci  /* Handle initial states. */
704570af302Sopenharmony_ci  next_tags = NULL;
705570af302Sopenharmony_ci  for (trans_i = tnfa->initial; trans_i->state; trans_i++)
706570af302Sopenharmony_ci    {
707570af302Sopenharmony_ci      if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
708570af302Sopenharmony_ci	{
709570af302Sopenharmony_ci	  continue;
710570af302Sopenharmony_ci	}
711570af302Sopenharmony_ci      if (state == NULL)
712570af302Sopenharmony_ci	{
713570af302Sopenharmony_ci	  /* Start from this state. */
714570af302Sopenharmony_ci	  state = trans_i->state;
715570af302Sopenharmony_ci	  next_tags = trans_i->tags;
716570af302Sopenharmony_ci	}
717570af302Sopenharmony_ci      else
718570af302Sopenharmony_ci	{
719570af302Sopenharmony_ci	  /* Backtrack to this state. */
720570af302Sopenharmony_ci	  BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
721570af302Sopenharmony_ci			trans_i->state_id, next_c, tags, mbstate);
722570af302Sopenharmony_ci	  {
723570af302Sopenharmony_ci	    int *tmp = trans_i->tags;
724570af302Sopenharmony_ci	    if (tmp)
725570af302Sopenharmony_ci	      while (*tmp >= 0)
726570af302Sopenharmony_ci		stack->item.tags[*tmp++] = pos;
727570af302Sopenharmony_ci	  }
728570af302Sopenharmony_ci	}
729570af302Sopenharmony_ci    }
730570af302Sopenharmony_ci
731570af302Sopenharmony_ci  if (next_tags)
732570af302Sopenharmony_ci    for (; *next_tags >= 0; next_tags++)
733570af302Sopenharmony_ci      tags[*next_tags] = pos;
734570af302Sopenharmony_ci
735570af302Sopenharmony_ci
736570af302Sopenharmony_ci  if (state == NULL)
737570af302Sopenharmony_ci    goto backtrack;
738570af302Sopenharmony_ci
739570af302Sopenharmony_ci  while (1)
740570af302Sopenharmony_ci    {
741570af302Sopenharmony_ci      tre_tnfa_transition_t *next_state;
742570af302Sopenharmony_ci      int empty_br_match;
743570af302Sopenharmony_ci
744570af302Sopenharmony_ci      if (state == tnfa->final)
745570af302Sopenharmony_ci	{
746570af302Sopenharmony_ci	  if (match_eo < pos
747570af302Sopenharmony_ci	      || (match_eo == pos
748570af302Sopenharmony_ci		  && match_tags
749570af302Sopenharmony_ci		  && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
750570af302Sopenharmony_ci				   tags, match_tags)))
751570af302Sopenharmony_ci	    {
752570af302Sopenharmony_ci	      int i;
753570af302Sopenharmony_ci	      /* This match wins the previous match. */
754570af302Sopenharmony_ci	      match_eo = pos;
755570af302Sopenharmony_ci	      if (match_tags)
756570af302Sopenharmony_ci		for (i = 0; i < tnfa->num_tags; i++)
757570af302Sopenharmony_ci		  match_tags[i] = tags[i];
758570af302Sopenharmony_ci	    }
759570af302Sopenharmony_ci	  /* Our TNFAs never have transitions leaving from the final state,
760570af302Sopenharmony_ci	     so we jump right to backtracking. */
761570af302Sopenharmony_ci	  goto backtrack;
762570af302Sopenharmony_ci	}
763570af302Sopenharmony_ci
764570af302Sopenharmony_ci      /* Go to the next character in the input string. */
765570af302Sopenharmony_ci      empty_br_match = 0;
766570af302Sopenharmony_ci      trans_i = state;
767570af302Sopenharmony_ci      if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
768570af302Sopenharmony_ci	{
769570af302Sopenharmony_ci	  /* This is a back reference state.  All transitions leaving from
770570af302Sopenharmony_ci	     this state have the same back reference "assertion".  Instead
771570af302Sopenharmony_ci	     of reading the next character, we match the back reference. */
772570af302Sopenharmony_ci	  regoff_t so, eo;
773570af302Sopenharmony_ci	  int bt = trans_i->u.backref;
774570af302Sopenharmony_ci	  regoff_t bt_len;
775570af302Sopenharmony_ci	  int result;
776570af302Sopenharmony_ci
777570af302Sopenharmony_ci	  /* Get the substring we need to match against.  Remember to
778570af302Sopenharmony_ci	     turn off REG_NOSUB temporarily. */
779570af302Sopenharmony_ci	  tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
780570af302Sopenharmony_ci			  tnfa, tags, pos);
781570af302Sopenharmony_ci	  so = pmatch[bt].rm_so;
782570af302Sopenharmony_ci	  eo = pmatch[bt].rm_eo;
783570af302Sopenharmony_ci	  bt_len = eo - so;
784570af302Sopenharmony_ci
785570af302Sopenharmony_ci	  result = strncmp((const char*)string + so, str_byte - 1,
786570af302Sopenharmony_ci				 (size_t)bt_len);
787570af302Sopenharmony_ci
788570af302Sopenharmony_ci	  if (result == 0)
789570af302Sopenharmony_ci	    {
790570af302Sopenharmony_ci	      /* Back reference matched.  Check for infinite loop. */
791570af302Sopenharmony_ci	      if (bt_len == 0)
792570af302Sopenharmony_ci		empty_br_match = 1;
793570af302Sopenharmony_ci	      if (empty_br_match && states_seen[trans_i->state_id])
794570af302Sopenharmony_ci		{
795570af302Sopenharmony_ci		  goto backtrack;
796570af302Sopenharmony_ci		}
797570af302Sopenharmony_ci
798570af302Sopenharmony_ci	      states_seen[trans_i->state_id] = empty_br_match;
799570af302Sopenharmony_ci
800570af302Sopenharmony_ci	      /* Advance in input string and resync `prev_c', `next_c'
801570af302Sopenharmony_ci		 and pos. */
802570af302Sopenharmony_ci	      str_byte += bt_len - 1;
803570af302Sopenharmony_ci	      pos += bt_len - 1;
804570af302Sopenharmony_ci	      GET_NEXT_WCHAR();
805570af302Sopenharmony_ci	    }
806570af302Sopenharmony_ci	  else
807570af302Sopenharmony_ci	    {
808570af302Sopenharmony_ci	      goto backtrack;
809570af302Sopenharmony_ci	    }
810570af302Sopenharmony_ci	}
811570af302Sopenharmony_ci      else
812570af302Sopenharmony_ci	{
813570af302Sopenharmony_ci	  /* Check for end of string. */
814570af302Sopenharmony_ci	  if (next_c == L'\0')
815570af302Sopenharmony_ci		goto backtrack;
816570af302Sopenharmony_ci
817570af302Sopenharmony_ci	  /* Read the next character. */
818570af302Sopenharmony_ci	  GET_NEXT_WCHAR();
819570af302Sopenharmony_ci	}
820570af302Sopenharmony_ci
821570af302Sopenharmony_ci      next_state = NULL;
822570af302Sopenharmony_ci      for (trans_i = state; trans_i->state; trans_i++)
823570af302Sopenharmony_ci	{
824570af302Sopenharmony_ci	  if (trans_i->code_min <= (tre_cint_t)prev_c
825570af302Sopenharmony_ci	      && trans_i->code_max >= (tre_cint_t)prev_c)
826570af302Sopenharmony_ci	    {
827570af302Sopenharmony_ci	      if (trans_i->assertions
828570af302Sopenharmony_ci		  && (CHECK_ASSERTIONS(trans_i->assertions)
829570af302Sopenharmony_ci		      || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
830570af302Sopenharmony_ci		{
831570af302Sopenharmony_ci		  continue;
832570af302Sopenharmony_ci		}
833570af302Sopenharmony_ci
834570af302Sopenharmony_ci	      if (next_state == NULL)
835570af302Sopenharmony_ci		{
836570af302Sopenharmony_ci		  /* First matching transition. */
837570af302Sopenharmony_ci		  next_state = trans_i->state;
838570af302Sopenharmony_ci		  next_tags = trans_i->tags;
839570af302Sopenharmony_ci		}
840570af302Sopenharmony_ci	      else
841570af302Sopenharmony_ci		{
842570af302Sopenharmony_ci		  /* Second matching transition.  We may need to backtrack here
843570af302Sopenharmony_ci		     to take this transition instead of the first one, so we
844570af302Sopenharmony_ci		     push this transition in the backtracking stack so we can
845570af302Sopenharmony_ci		     jump back here if needed. */
846570af302Sopenharmony_ci		  BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
847570af302Sopenharmony_ci				trans_i->state_id, next_c, tags, mbstate);
848570af302Sopenharmony_ci		  {
849570af302Sopenharmony_ci		    int *tmp;
850570af302Sopenharmony_ci		    for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
851570af302Sopenharmony_ci		      stack->item.tags[*tmp] = pos;
852570af302Sopenharmony_ci		  }
853570af302Sopenharmony_ci#if 0 /* XXX - it's important not to look at all transitions here to keep
854570af302Sopenharmony_ci	 the stack small! */
855570af302Sopenharmony_ci		  break;
856570af302Sopenharmony_ci#endif
857570af302Sopenharmony_ci		}
858570af302Sopenharmony_ci	    }
859570af302Sopenharmony_ci	}
860570af302Sopenharmony_ci
861570af302Sopenharmony_ci      if (next_state != NULL)
862570af302Sopenharmony_ci	{
863570af302Sopenharmony_ci	  /* Matching transitions were found.  Take the first one. */
864570af302Sopenharmony_ci	  state = next_state;
865570af302Sopenharmony_ci
866570af302Sopenharmony_ci	  /* Update the tag values. */
867570af302Sopenharmony_ci	  if (next_tags)
868570af302Sopenharmony_ci	    while (*next_tags >= 0)
869570af302Sopenharmony_ci	      tags[*next_tags++] = pos;
870570af302Sopenharmony_ci	}
871570af302Sopenharmony_ci      else
872570af302Sopenharmony_ci	{
873570af302Sopenharmony_ci	backtrack:
874570af302Sopenharmony_ci	  /* A matching transition was not found.  Try to backtrack. */
875570af302Sopenharmony_ci	  if (stack->prev)
876570af302Sopenharmony_ci	    {
877570af302Sopenharmony_ci	      if (stack->item.state->assertions & ASSERT_BACKREF)
878570af302Sopenharmony_ci		{
879570af302Sopenharmony_ci		  states_seen[stack->item.state_id] = 0;
880570af302Sopenharmony_ci		}
881570af302Sopenharmony_ci
882570af302Sopenharmony_ci	      BT_STACK_POP();
883570af302Sopenharmony_ci	    }
884570af302Sopenharmony_ci	  else if (match_eo < 0)
885570af302Sopenharmony_ci	    {
886570af302Sopenharmony_ci	      /* Try starting from a later position in the input string. */
887570af302Sopenharmony_ci	      /* Check for end of string. */
888570af302Sopenharmony_ci	      if (next_c == L'\0')
889570af302Sopenharmony_ci		    {
890570af302Sopenharmony_ci		      break;
891570af302Sopenharmony_ci		    }
892570af302Sopenharmony_ci	      next_c = next_c_start;
893570af302Sopenharmony_ci#ifdef TRE_MBSTATE
894570af302Sopenharmony_ci	      mbstate = mbstate_start;
895570af302Sopenharmony_ci#endif /* TRE_MBSTATE */
896570af302Sopenharmony_ci	      str_byte = str_byte_start;
897570af302Sopenharmony_ci	      goto retry;
898570af302Sopenharmony_ci	    }
899570af302Sopenharmony_ci	  else
900570af302Sopenharmony_ci	    {
901570af302Sopenharmony_ci	      break;
902570af302Sopenharmony_ci	    }
903570af302Sopenharmony_ci	}
904570af302Sopenharmony_ci    }
905570af302Sopenharmony_ci
906570af302Sopenharmony_ci  ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
907570af302Sopenharmony_ci  *match_end_ofs = match_eo;
908570af302Sopenharmony_ci
909570af302Sopenharmony_ci error_exit:
910570af302Sopenharmony_ci  tre_bt_mem_destroy(mem);
911570af302Sopenharmony_ci#ifndef TRE_USE_ALLOCA
912570af302Sopenharmony_ci  if (tags)
913570af302Sopenharmony_ci    xfree(tags);
914570af302Sopenharmony_ci  if (pmatch)
915570af302Sopenharmony_ci    xfree(pmatch);
916570af302Sopenharmony_ci  if (states_seen)
917570af302Sopenharmony_ci    xfree(states_seen);
918570af302Sopenharmony_ci#endif /* !TRE_USE_ALLOCA */
919570af302Sopenharmony_ci
920570af302Sopenharmony_ci  return ret;
921570af302Sopenharmony_ci}
922570af302Sopenharmony_ci
923570af302Sopenharmony_ci/***********************************************************************
924570af302Sopenharmony_ci from regexec.c
925570af302Sopenharmony_ci***********************************************************************/
926570af302Sopenharmony_ci
927570af302Sopenharmony_ci/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
928570af302Sopenharmony_ci   endpoint values. */
929570af302Sopenharmony_cistatic void
930570af302Sopenharmony_citre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
931570af302Sopenharmony_ci		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
932570af302Sopenharmony_ci{
933570af302Sopenharmony_ci  tre_submatch_data_t *submatch_data;
934570af302Sopenharmony_ci  unsigned int i, j;
935570af302Sopenharmony_ci  int *parents;
936570af302Sopenharmony_ci
937570af302Sopenharmony_ci  i = 0;
938570af302Sopenharmony_ci  if (match_eo >= 0 && !(cflags & REG_NOSUB))
939570af302Sopenharmony_ci    {
940570af302Sopenharmony_ci      /* Construct submatch offsets from the tags. */
941570af302Sopenharmony_ci      submatch_data = tnfa->submatch_data;
942570af302Sopenharmony_ci      while (i < tnfa->num_submatches && i < nmatch)
943570af302Sopenharmony_ci	{
944570af302Sopenharmony_ci	  if (submatch_data[i].so_tag == tnfa->end_tag)
945570af302Sopenharmony_ci	    pmatch[i].rm_so = match_eo;
946570af302Sopenharmony_ci	  else
947570af302Sopenharmony_ci	    pmatch[i].rm_so = tags[submatch_data[i].so_tag];
948570af302Sopenharmony_ci
949570af302Sopenharmony_ci	  if (submatch_data[i].eo_tag == tnfa->end_tag)
950570af302Sopenharmony_ci	    pmatch[i].rm_eo = match_eo;
951570af302Sopenharmony_ci	  else
952570af302Sopenharmony_ci	    pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
953570af302Sopenharmony_ci
954570af302Sopenharmony_ci	  /* If either of the endpoints were not used, this submatch
955570af302Sopenharmony_ci	     was not part of the match. */
956570af302Sopenharmony_ci	  if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
957570af302Sopenharmony_ci	    pmatch[i].rm_so = pmatch[i].rm_eo = -1;
958570af302Sopenharmony_ci
959570af302Sopenharmony_ci	  i++;
960570af302Sopenharmony_ci	}
961570af302Sopenharmony_ci      /* Reset all submatches that are not within all of their parent
962570af302Sopenharmony_ci	 submatches. */
963570af302Sopenharmony_ci      i = 0;
964570af302Sopenharmony_ci      while (i < tnfa->num_submatches && i < nmatch)
965570af302Sopenharmony_ci	{
966570af302Sopenharmony_ci	  if (pmatch[i].rm_eo == -1)
967570af302Sopenharmony_ci	    assert(pmatch[i].rm_so == -1);
968570af302Sopenharmony_ci	  assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
969570af302Sopenharmony_ci
970570af302Sopenharmony_ci	  parents = submatch_data[i].parents;
971570af302Sopenharmony_ci	  if (parents != NULL)
972570af302Sopenharmony_ci	    for (j = 0; parents[j] >= 0; j++)
973570af302Sopenharmony_ci	      {
974570af302Sopenharmony_ci		if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
975570af302Sopenharmony_ci		    || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
976570af302Sopenharmony_ci		  pmatch[i].rm_so = pmatch[i].rm_eo = -1;
977570af302Sopenharmony_ci	      }
978570af302Sopenharmony_ci	  i++;
979570af302Sopenharmony_ci	}
980570af302Sopenharmony_ci    }
981570af302Sopenharmony_ci
982570af302Sopenharmony_ci  while (i < nmatch)
983570af302Sopenharmony_ci    {
984570af302Sopenharmony_ci      pmatch[i].rm_so = -1;
985570af302Sopenharmony_ci      pmatch[i].rm_eo = -1;
986570af302Sopenharmony_ci      i++;
987570af302Sopenharmony_ci    }
988570af302Sopenharmony_ci}
989570af302Sopenharmony_ci
990570af302Sopenharmony_ci
991570af302Sopenharmony_ci/*
992570af302Sopenharmony_ci  Wrapper functions for POSIX compatible regexp matching.
993570af302Sopenharmony_ci*/
994570af302Sopenharmony_ci
995570af302Sopenharmony_ciint
996570af302Sopenharmony_ciregexec(const regex_t *restrict preg, const char *restrict string,
997570af302Sopenharmony_ci	  size_t nmatch, regmatch_t pmatch[restrict], int eflags)
998570af302Sopenharmony_ci{
999570af302Sopenharmony_ci  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
1000570af302Sopenharmony_ci  reg_errcode_t status;
1001570af302Sopenharmony_ci  regoff_t *tags = NULL, eo;
1002570af302Sopenharmony_ci  if (tnfa->cflags & REG_NOSUB) nmatch = 0;
1003570af302Sopenharmony_ci  if (tnfa->num_tags > 0 && nmatch > 0)
1004570af302Sopenharmony_ci    {
1005570af302Sopenharmony_ci      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
1006570af302Sopenharmony_ci      if (tags == NULL)
1007570af302Sopenharmony_ci	return REG_ESPACE;
1008570af302Sopenharmony_ci    }
1009570af302Sopenharmony_ci
1010570af302Sopenharmony_ci  /* Dispatch to the appropriate matcher. */
1011570af302Sopenharmony_ci  if (tnfa->have_backrefs)
1012570af302Sopenharmony_ci    {
1013570af302Sopenharmony_ci      /* The regex has back references, use the backtracking matcher. */
1014570af302Sopenharmony_ci      status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
1015570af302Sopenharmony_ci    }
1016570af302Sopenharmony_ci  else
1017570af302Sopenharmony_ci    {
1018570af302Sopenharmony_ci      /* Exact matching, no back references, use the parallel matcher. */
1019570af302Sopenharmony_ci      status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1020570af302Sopenharmony_ci    }
1021570af302Sopenharmony_ci
1022570af302Sopenharmony_ci  if (status == REG_OK)
1023570af302Sopenharmony_ci    /* A match was found, so fill the submatch registers. */
1024570af302Sopenharmony_ci    tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1025570af302Sopenharmony_ci  if (tags)
1026570af302Sopenharmony_ci    xfree(tags);
1027570af302Sopenharmony_ci  return status;
1028570af302Sopenharmony_ci}
1029