1e18e3516Sopenharmony_ci/*************************************************
2e18e3516Sopenharmony_ci*      Perl-Compatible Regular Expressions       *
3e18e3516Sopenharmony_ci*************************************************/
4e18e3516Sopenharmony_ci
5e18e3516Sopenharmony_ci/* PCRE is a library of functions to support regular expressions whose syntax
6e18e3516Sopenharmony_ciand semantics are as close as possible to those of the Perl 5 language.
7e18e3516Sopenharmony_ci
8e18e3516Sopenharmony_ci                       Written by Philip Hazel
9e18e3516Sopenharmony_ci     Original API code Copyright (c) 1997-2012 University of Cambridge
10e18e3516Sopenharmony_ci          New API code Copyright (c) 2016-2022 University of Cambridge
11e18e3516Sopenharmony_ci
12e18e3516Sopenharmony_ci-----------------------------------------------------------------------------
13e18e3516Sopenharmony_ciRedistribution and use in source and binary forms, with or without
14e18e3516Sopenharmony_cimodification, are permitted provided that the following conditions are met:
15e18e3516Sopenharmony_ci
16e18e3516Sopenharmony_ci    * Redistributions of source code must retain the above copyright notice,
17e18e3516Sopenharmony_ci      this list of conditions and the following disclaimer.
18e18e3516Sopenharmony_ci
19e18e3516Sopenharmony_ci    * Redistributions in binary form must reproduce the above copyright
20e18e3516Sopenharmony_ci      notice, this list of conditions and the following disclaimer in the
21e18e3516Sopenharmony_ci      documentation and/or other materials provided with the distribution.
22e18e3516Sopenharmony_ci
23e18e3516Sopenharmony_ci    * Neither the name of the University of Cambridge nor the names of its
24e18e3516Sopenharmony_ci      contributors may be used to endorse or promote products derived from
25e18e3516Sopenharmony_ci      this software without specific prior written permission.
26e18e3516Sopenharmony_ci
27e18e3516Sopenharmony_ciTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28e18e3516Sopenharmony_ciAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29e18e3516Sopenharmony_ciIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30e18e3516Sopenharmony_ciARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31e18e3516Sopenharmony_ciLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32e18e3516Sopenharmony_ciCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33e18e3516Sopenharmony_ciSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34e18e3516Sopenharmony_ciINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35e18e3516Sopenharmony_ciCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36e18e3516Sopenharmony_ciARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37e18e3516Sopenharmony_ciPOSSIBILITY OF SUCH DAMAGE.
38e18e3516Sopenharmony_ci-----------------------------------------------------------------------------
39e18e3516Sopenharmony_ci*/
40e18e3516Sopenharmony_ci
41e18e3516Sopenharmony_ci
42e18e3516Sopenharmony_ci#ifdef HAVE_CONFIG_H
43e18e3516Sopenharmony_ci#include "config.h"
44e18e3516Sopenharmony_ci#endif
45e18e3516Sopenharmony_ci
46e18e3516Sopenharmony_ci#include "pcre2_internal.h"
47e18e3516Sopenharmony_ci
48e18e3516Sopenharmony_ci#define PTR_STACK_SIZE 20
49e18e3516Sopenharmony_ci
50e18e3516Sopenharmony_ci#define SUBSTITUTE_OPTIONS \
51e18e3516Sopenharmony_ci  (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52e18e3516Sopenharmony_ci   PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
53e18e3516Sopenharmony_ci   PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
54e18e3516Sopenharmony_ci   PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55e18e3516Sopenharmony_ci
56e18e3516Sopenharmony_ci
57e18e3516Sopenharmony_ci
58e18e3516Sopenharmony_ci/*************************************************
59e18e3516Sopenharmony_ci*           Find end of substitute text          *
60e18e3516Sopenharmony_ci*************************************************/
61e18e3516Sopenharmony_ci
62e18e3516Sopenharmony_ci/* In extended mode, we recognize ${name:+set text:unset text} and similar
63e18e3516Sopenharmony_ciconstructions. This requires the identification of unescaped : and }
64e18e3516Sopenharmony_cicharacters. This function scans for such. It must deal with nested ${
65e18e3516Sopenharmony_ciconstructions. The pointer to the text is updated, either to the required end
66e18e3516Sopenharmony_cicharacter, or to where an error was detected.
67e18e3516Sopenharmony_ci
68e18e3516Sopenharmony_ciArguments:
69e18e3516Sopenharmony_ci  code      points to the compiled expression (for options)
70e18e3516Sopenharmony_ci  ptrptr    points to the pointer to the start of the text (updated)
71e18e3516Sopenharmony_ci  ptrend    end of the whole string
72e18e3516Sopenharmony_ci  last      TRUE if the last expected string (only } recognized)
73e18e3516Sopenharmony_ci
74e18e3516Sopenharmony_ciReturns:    0 on success
75e18e3516Sopenharmony_ci            negative error code on failure
76e18e3516Sopenharmony_ci*/
77e18e3516Sopenharmony_ci
78e18e3516Sopenharmony_cistatic int
79e18e3516Sopenharmony_cifind_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
80e18e3516Sopenharmony_ci  BOOL last)
81e18e3516Sopenharmony_ci{
82e18e3516Sopenharmony_ciint rc = 0;
83e18e3516Sopenharmony_ciuint32_t nestlevel = 0;
84e18e3516Sopenharmony_ciBOOL literal = FALSE;
85e18e3516Sopenharmony_ciPCRE2_SPTR ptr = *ptrptr;
86e18e3516Sopenharmony_ci
87e18e3516Sopenharmony_cifor (; ptr < ptrend; ptr++)
88e18e3516Sopenharmony_ci  {
89e18e3516Sopenharmony_ci  if (literal)
90e18e3516Sopenharmony_ci    {
91e18e3516Sopenharmony_ci    if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
92e18e3516Sopenharmony_ci      {
93e18e3516Sopenharmony_ci      literal = FALSE;
94e18e3516Sopenharmony_ci      ptr += 1;
95e18e3516Sopenharmony_ci      }
96e18e3516Sopenharmony_ci    }
97e18e3516Sopenharmony_ci
98e18e3516Sopenharmony_ci  else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99e18e3516Sopenharmony_ci    {
100e18e3516Sopenharmony_ci    if (nestlevel == 0) goto EXIT;
101e18e3516Sopenharmony_ci    nestlevel--;
102e18e3516Sopenharmony_ci    }
103e18e3516Sopenharmony_ci
104e18e3516Sopenharmony_ci  else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
105e18e3516Sopenharmony_ci
106e18e3516Sopenharmony_ci  else if (*ptr == CHAR_DOLLAR_SIGN)
107e18e3516Sopenharmony_ci    {
108e18e3516Sopenharmony_ci    if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
109e18e3516Sopenharmony_ci      {
110e18e3516Sopenharmony_ci      nestlevel++;
111e18e3516Sopenharmony_ci      ptr += 1;
112e18e3516Sopenharmony_ci      }
113e18e3516Sopenharmony_ci    }
114e18e3516Sopenharmony_ci
115e18e3516Sopenharmony_ci  else if (*ptr == CHAR_BACKSLASH)
116e18e3516Sopenharmony_ci    {
117e18e3516Sopenharmony_ci    int erc;
118e18e3516Sopenharmony_ci    int errorcode;
119e18e3516Sopenharmony_ci    uint32_t ch;
120e18e3516Sopenharmony_ci
121e18e3516Sopenharmony_ci    if (ptr < ptrend - 1) switch (ptr[1])
122e18e3516Sopenharmony_ci      {
123e18e3516Sopenharmony_ci      case CHAR_L:
124e18e3516Sopenharmony_ci      case CHAR_l:
125e18e3516Sopenharmony_ci      case CHAR_U:
126e18e3516Sopenharmony_ci      case CHAR_u:
127e18e3516Sopenharmony_ci      ptr += 1;
128e18e3516Sopenharmony_ci      continue;
129e18e3516Sopenharmony_ci      }
130e18e3516Sopenharmony_ci
131e18e3516Sopenharmony_ci    ptr += 1;  /* Must point after \ */
132e18e3516Sopenharmony_ci    erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133e18e3516Sopenharmony_ci      code->overall_options, code->extra_options, FALSE, NULL);
134e18e3516Sopenharmony_ci    ptr -= 1;  /* Back to last code unit of escape */
135e18e3516Sopenharmony_ci    if (errorcode != 0)
136e18e3516Sopenharmony_ci      {
137e18e3516Sopenharmony_ci      rc = errorcode;
138e18e3516Sopenharmony_ci      goto EXIT;
139e18e3516Sopenharmony_ci      }
140e18e3516Sopenharmony_ci
141e18e3516Sopenharmony_ci    switch(erc)
142e18e3516Sopenharmony_ci      {
143e18e3516Sopenharmony_ci      case 0:      /* Data character */
144e18e3516Sopenharmony_ci      case ESC_E:  /* Isolated \E is ignored */
145e18e3516Sopenharmony_ci      break;
146e18e3516Sopenharmony_ci
147e18e3516Sopenharmony_ci      case ESC_Q:
148e18e3516Sopenharmony_ci      literal = TRUE;
149e18e3516Sopenharmony_ci      break;
150e18e3516Sopenharmony_ci
151e18e3516Sopenharmony_ci      default:
152e18e3516Sopenharmony_ci      rc = PCRE2_ERROR_BADREPESCAPE;
153e18e3516Sopenharmony_ci      goto EXIT;
154e18e3516Sopenharmony_ci      }
155e18e3516Sopenharmony_ci    }
156e18e3516Sopenharmony_ci  }
157e18e3516Sopenharmony_ci
158e18e3516Sopenharmony_circ = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
159e18e3516Sopenharmony_ci
160e18e3516Sopenharmony_ciEXIT:
161e18e3516Sopenharmony_ci*ptrptr = ptr;
162e18e3516Sopenharmony_cireturn rc;
163e18e3516Sopenharmony_ci}
164e18e3516Sopenharmony_ci
165e18e3516Sopenharmony_ci
166e18e3516Sopenharmony_ci
167e18e3516Sopenharmony_ci/*************************************************
168e18e3516Sopenharmony_ci*              Match and substitute              *
169e18e3516Sopenharmony_ci*************************************************/
170e18e3516Sopenharmony_ci
171e18e3516Sopenharmony_ci/* This function applies a compiled re to a subject string and creates a new
172e18e3516Sopenharmony_cistring with substitutions. The first 7 arguments are the same as for
173e18e3516Sopenharmony_cipcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174e18e3516Sopenharmony_ci
175e18e3516Sopenharmony_ciArguments:
176e18e3516Sopenharmony_ci  code            points to the compiled expression
177e18e3516Sopenharmony_ci  subject         points to the subject string
178e18e3516Sopenharmony_ci  length          length of subject string (may contain binary zeros)
179e18e3516Sopenharmony_ci  start_offset    where to start in the subject string
180e18e3516Sopenharmony_ci  options         option bits
181e18e3516Sopenharmony_ci  match_data      points to a match_data block, or is NULL
182e18e3516Sopenharmony_ci  context         points a PCRE2 context
183e18e3516Sopenharmony_ci  replacement     points to the replacement string
184e18e3516Sopenharmony_ci  rlength         length of replacement string
185e18e3516Sopenharmony_ci  buffer          where to put the substituted string
186e18e3516Sopenharmony_ci  blength         points to length of buffer; updated to length of string
187e18e3516Sopenharmony_ci
188e18e3516Sopenharmony_ciReturns:          >= 0 number of substitutions made
189e18e3516Sopenharmony_ci                  < 0 an error code
190e18e3516Sopenharmony_ci                  PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191e18e3516Sopenharmony_ci*/
192e18e3516Sopenharmony_ci
193e18e3516Sopenharmony_ci/* This macro checks for space in the buffer before copying into it. On
194e18e3516Sopenharmony_cioverflow, either give an error immediately, or keep on, accumulating the
195e18e3516Sopenharmony_cilength. */
196e18e3516Sopenharmony_ci
197e18e3516Sopenharmony_ci#define CHECKMEMCPY(from,length) \
198e18e3516Sopenharmony_ci  { \
199e18e3516Sopenharmony_ci  if (!overflowed && lengthleft < length) \
200e18e3516Sopenharmony_ci    { \
201e18e3516Sopenharmony_ci    if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202e18e3516Sopenharmony_ci    overflowed = TRUE; \
203e18e3516Sopenharmony_ci    extra_needed = length - lengthleft; \
204e18e3516Sopenharmony_ci    } \
205e18e3516Sopenharmony_ci  else if (overflowed) \
206e18e3516Sopenharmony_ci    { \
207e18e3516Sopenharmony_ci    extra_needed += length; \
208e18e3516Sopenharmony_ci    }  \
209e18e3516Sopenharmony_ci  else \
210e18e3516Sopenharmony_ci    {  \
211e18e3516Sopenharmony_ci    memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212e18e3516Sopenharmony_ci    buff_offset += length; \
213e18e3516Sopenharmony_ci    lengthleft -= length; \
214e18e3516Sopenharmony_ci    } \
215e18e3516Sopenharmony_ci  }
216e18e3516Sopenharmony_ci
217e18e3516Sopenharmony_ci/* Here's the function */
218e18e3516Sopenharmony_ci
219e18e3516Sopenharmony_ciPCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
220e18e3516Sopenharmony_cipcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221e18e3516Sopenharmony_ci  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222e18e3516Sopenharmony_ci  pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223e18e3516Sopenharmony_ci  PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
224e18e3516Sopenharmony_ci{
225e18e3516Sopenharmony_ciint rc;
226e18e3516Sopenharmony_ciint subs;
227e18e3516Sopenharmony_ciint forcecase = 0;
228e18e3516Sopenharmony_ciint forcecasereset = 0;
229e18e3516Sopenharmony_ciuint32_t ovector_count;
230e18e3516Sopenharmony_ciuint32_t goptions = 0;
231e18e3516Sopenharmony_ciuint32_t suboptions;
232e18e3516Sopenharmony_cipcre2_match_data *internal_match_data = NULL;
233e18e3516Sopenharmony_ciBOOL escaped_literal = FALSE;
234e18e3516Sopenharmony_ciBOOL overflowed = FALSE;
235e18e3516Sopenharmony_ciBOOL use_existing_match;
236e18e3516Sopenharmony_ciBOOL replacement_only;
237e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
238e18e3516Sopenharmony_ciBOOL utf = (code->overall_options & PCRE2_UTF) != 0;
239e18e3516Sopenharmony_ciBOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
240e18e3516Sopenharmony_ci#endif
241e18e3516Sopenharmony_ciPCRE2_UCHAR temp[6];
242e18e3516Sopenharmony_ciPCRE2_SPTR ptr;
243e18e3516Sopenharmony_ciPCRE2_SPTR repend;
244e18e3516Sopenharmony_ciPCRE2_SIZE extra_needed = 0;
245e18e3516Sopenharmony_ciPCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246e18e3516Sopenharmony_ciPCRE2_SIZE *ovector;
247e18e3516Sopenharmony_ciPCRE2_SIZE ovecsave[3];
248e18e3516Sopenharmony_cipcre2_substitute_callout_block scb;
249e18e3516Sopenharmony_ci
250e18e3516Sopenharmony_ci/* General initialization */
251e18e3516Sopenharmony_ci
252e18e3516Sopenharmony_cibuff_offset = 0;
253e18e3516Sopenharmony_cilengthleft = buff_length = *blength;
254e18e3516Sopenharmony_ci*blength = PCRE2_UNSET;
255e18e3516Sopenharmony_ciovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
256e18e3516Sopenharmony_ci
257e18e3516Sopenharmony_ci/* Partial matching is not valid. This must come after setting *blength to
258e18e3516Sopenharmony_ciPCRE2_UNSET, so as not to imply an offset in the replacement. */
259e18e3516Sopenharmony_ci
260e18e3516Sopenharmony_ciif ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
261e18e3516Sopenharmony_ci  return PCRE2_ERROR_BADOPTION;
262e18e3516Sopenharmony_ci
263e18e3516Sopenharmony_ci/* Validate length and find the end of the replacement. A NULL replacement of
264e18e3516Sopenharmony_cizero length is interpreted as an empty string. */
265e18e3516Sopenharmony_ci
266e18e3516Sopenharmony_ciif (replacement == NULL)
267e18e3516Sopenharmony_ci  {
268e18e3516Sopenharmony_ci  if (rlength != 0) return PCRE2_ERROR_NULL;
269e18e3516Sopenharmony_ci  replacement = (PCRE2_SPTR)"";
270e18e3516Sopenharmony_ci  }
271e18e3516Sopenharmony_ci
272e18e3516Sopenharmony_ciif (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
273e18e3516Sopenharmony_cirepend = replacement + rlength;
274e18e3516Sopenharmony_ci
275e18e3516Sopenharmony_ci/* Check for using a match that has already happened. Note that the subject
276e18e3516Sopenharmony_cipointer in the match data may be NULL after a no-match. */
277e18e3516Sopenharmony_ci
278e18e3516Sopenharmony_ciuse_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
279e18e3516Sopenharmony_cireplacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
280e18e3516Sopenharmony_ci
281e18e3516Sopenharmony_ci/* If starting from an existing match, there must be an externally provided
282e18e3516Sopenharmony_cimatch data block. We create an internal match_data block in two cases: (a) an
283e18e3516Sopenharmony_ciexternal one is not supplied (and we are not starting from an existing match);
284e18e3516Sopenharmony_ci(b) an existing match is to be used for the first substitution. In the latter
285e18e3516Sopenharmony_cicase, we copy the existing match into the internal block, except for any cached
286e18e3516Sopenharmony_ciheap frame size and pointer. This ensures that no changes are made to the
287e18e3516Sopenharmony_ciexternal match data block. */
288e18e3516Sopenharmony_ci
289e18e3516Sopenharmony_ciif (match_data == NULL)
290e18e3516Sopenharmony_ci  {
291e18e3516Sopenharmony_ci  pcre2_general_context *gcontext;
292e18e3516Sopenharmony_ci  if (use_existing_match) return PCRE2_ERROR_NULL;
293e18e3516Sopenharmony_ci  gcontext = (mcontext == NULL)?
294e18e3516Sopenharmony_ci    (pcre2_general_context *)code :
295e18e3516Sopenharmony_ci    (pcre2_general_context *)mcontext;
296e18e3516Sopenharmony_ci  match_data = internal_match_data =
297e18e3516Sopenharmony_ci    pcre2_match_data_create_from_pattern(code, gcontext);
298e18e3516Sopenharmony_ci  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
299e18e3516Sopenharmony_ci  }
300e18e3516Sopenharmony_ci
301e18e3516Sopenharmony_cielse if (use_existing_match)
302e18e3516Sopenharmony_ci  {
303e18e3516Sopenharmony_ci  pcre2_general_context *gcontext = (mcontext == NULL)?
304e18e3516Sopenharmony_ci    (pcre2_general_context *)code :
305e18e3516Sopenharmony_ci    (pcre2_general_context *)mcontext;
306e18e3516Sopenharmony_ci  int pairs = (code->top_bracket + 1 < match_data->oveccount)?
307e18e3516Sopenharmony_ci    code->top_bracket + 1 : match_data->oveccount;
308e18e3516Sopenharmony_ci  internal_match_data = pcre2_match_data_create(match_data->oveccount,
309e18e3516Sopenharmony_ci    gcontext);
310e18e3516Sopenharmony_ci  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
311e18e3516Sopenharmony_ci  memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
312e18e3516Sopenharmony_ci    + 2*pairs*sizeof(PCRE2_SIZE));
313e18e3516Sopenharmony_ci  internal_match_data->heapframes = NULL;
314e18e3516Sopenharmony_ci  internal_match_data->heapframes_size = 0;
315e18e3516Sopenharmony_ci  match_data = internal_match_data;
316e18e3516Sopenharmony_ci  }
317e18e3516Sopenharmony_ci
318e18e3516Sopenharmony_ci/* Remember ovector details */
319e18e3516Sopenharmony_ci
320e18e3516Sopenharmony_ciovector = pcre2_get_ovector_pointer(match_data);
321e18e3516Sopenharmony_ciovector_count = pcre2_get_ovector_count(match_data);
322e18e3516Sopenharmony_ci
323e18e3516Sopenharmony_ci/* Fixed things in the callout block */
324e18e3516Sopenharmony_ci
325e18e3516Sopenharmony_ciscb.version = 0;
326e18e3516Sopenharmony_ciscb.input = subject;
327e18e3516Sopenharmony_ciscb.output = (PCRE2_SPTR)buffer;
328e18e3516Sopenharmony_ciscb.ovector = ovector;
329e18e3516Sopenharmony_ci
330e18e3516Sopenharmony_ci/* A NULL subject of zero length is treated as an empty string. */
331e18e3516Sopenharmony_ci
332e18e3516Sopenharmony_ciif (subject == NULL)
333e18e3516Sopenharmony_ci  {
334e18e3516Sopenharmony_ci  if (length != 0) return PCRE2_ERROR_NULL;
335e18e3516Sopenharmony_ci  subject = (PCRE2_SPTR)"";
336e18e3516Sopenharmony_ci  }
337e18e3516Sopenharmony_ci
338e18e3516Sopenharmony_ci/* Find length of zero-terminated subject */
339e18e3516Sopenharmony_ci
340e18e3516Sopenharmony_ciif (length == PCRE2_ZERO_TERMINATED)
341e18e3516Sopenharmony_ci  length = subject? PRIV(strlen)(subject) : 0;
342e18e3516Sopenharmony_ci
343e18e3516Sopenharmony_ci/* Check UTF replacement string if necessary. */
344e18e3516Sopenharmony_ci
345e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
346e18e3516Sopenharmony_ciif (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
347e18e3516Sopenharmony_ci  {
348e18e3516Sopenharmony_ci  rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
349e18e3516Sopenharmony_ci  if (rc != 0)
350e18e3516Sopenharmony_ci    {
351e18e3516Sopenharmony_ci    match_data->leftchar = 0;
352e18e3516Sopenharmony_ci    goto EXIT;
353e18e3516Sopenharmony_ci    }
354e18e3516Sopenharmony_ci  }
355e18e3516Sopenharmony_ci#endif  /* SUPPORT_UNICODE */
356e18e3516Sopenharmony_ci
357e18e3516Sopenharmony_ci/* Save the substitute options and remove them from the match options. */
358e18e3516Sopenharmony_ci
359e18e3516Sopenharmony_cisuboptions = options & SUBSTITUTE_OPTIONS;
360e18e3516Sopenharmony_cioptions &= ~SUBSTITUTE_OPTIONS;
361e18e3516Sopenharmony_ci
362e18e3516Sopenharmony_ci/* Error if the start match offset is greater than the length of the subject. */
363e18e3516Sopenharmony_ci
364e18e3516Sopenharmony_ciif (start_offset > length)
365e18e3516Sopenharmony_ci  {
366e18e3516Sopenharmony_ci  match_data->leftchar = 0;
367e18e3516Sopenharmony_ci  rc = PCRE2_ERROR_BADOFFSET;
368e18e3516Sopenharmony_ci  goto EXIT;
369e18e3516Sopenharmony_ci  }
370e18e3516Sopenharmony_ci
371e18e3516Sopenharmony_ci/* Copy up to the start offset, unless only the replacement is required. */
372e18e3516Sopenharmony_ci
373e18e3516Sopenharmony_ciif (!replacement_only) CHECKMEMCPY(subject, start_offset);
374e18e3516Sopenharmony_ci
375e18e3516Sopenharmony_ci/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
376e18e3516Sopenharmony_cimatch is taken from the match_data that was passed in. */
377e18e3516Sopenharmony_ci
378e18e3516Sopenharmony_cisubs = 0;
379e18e3516Sopenharmony_cido
380e18e3516Sopenharmony_ci  {
381e18e3516Sopenharmony_ci  PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
382e18e3516Sopenharmony_ci  uint32_t ptrstackptr = 0;
383e18e3516Sopenharmony_ci
384e18e3516Sopenharmony_ci  if (use_existing_match)
385e18e3516Sopenharmony_ci    {
386e18e3516Sopenharmony_ci    rc = match_data->rc;
387e18e3516Sopenharmony_ci    use_existing_match = FALSE;
388e18e3516Sopenharmony_ci    }
389e18e3516Sopenharmony_ci  else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
390e18e3516Sopenharmony_ci    match_data, mcontext);
391e18e3516Sopenharmony_ci
392e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
393e18e3516Sopenharmony_ci  if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
394e18e3516Sopenharmony_ci#endif
395e18e3516Sopenharmony_ci
396e18e3516Sopenharmony_ci  /* Any error other than no match returns the error code. No match when not
397e18e3516Sopenharmony_ci  doing the special after-empty-match global rematch, or when at the end of the
398e18e3516Sopenharmony_ci  subject, breaks the global loop. Otherwise, advance the starting point by one
399e18e3516Sopenharmony_ci  character, copying it to the output, and try again. */
400e18e3516Sopenharmony_ci
401e18e3516Sopenharmony_ci  if (rc < 0)
402e18e3516Sopenharmony_ci    {
403e18e3516Sopenharmony_ci    PCRE2_SIZE save_start;
404e18e3516Sopenharmony_ci
405e18e3516Sopenharmony_ci    if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
406e18e3516Sopenharmony_ci    if (goptions == 0 || start_offset >= length) break;
407e18e3516Sopenharmony_ci
408e18e3516Sopenharmony_ci    /* Advance by one code point. Then, if CRLF is a valid newline sequence and
409e18e3516Sopenharmony_ci    we have advanced into the middle of it, advance one more code point. In
410e18e3516Sopenharmony_ci    other words, do not start in the middle of CRLF, even if CR and LF on their
411e18e3516Sopenharmony_ci    own are valid newlines. */
412e18e3516Sopenharmony_ci
413e18e3516Sopenharmony_ci    save_start = start_offset++;
414e18e3516Sopenharmony_ci    if (subject[start_offset-1] == CHAR_CR &&
415e18e3516Sopenharmony_ci        code->newline_convention != PCRE2_NEWLINE_CR &&
416e18e3516Sopenharmony_ci        code->newline_convention != PCRE2_NEWLINE_LF &&
417e18e3516Sopenharmony_ci        start_offset < length &&
418e18e3516Sopenharmony_ci        subject[start_offset] == CHAR_LF)
419e18e3516Sopenharmony_ci      start_offset++;
420e18e3516Sopenharmony_ci
421e18e3516Sopenharmony_ci    /* Otherwise, in UTF mode, advance past any secondary code points. */
422e18e3516Sopenharmony_ci
423e18e3516Sopenharmony_ci    else if ((code->overall_options & PCRE2_UTF) != 0)
424e18e3516Sopenharmony_ci      {
425e18e3516Sopenharmony_ci#if PCRE2_CODE_UNIT_WIDTH == 8
426e18e3516Sopenharmony_ci      while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
427e18e3516Sopenharmony_ci        start_offset++;
428e18e3516Sopenharmony_ci#elif PCRE2_CODE_UNIT_WIDTH == 16
429e18e3516Sopenharmony_ci      while (start_offset < length &&
430e18e3516Sopenharmony_ci            (subject[start_offset] & 0xfc00) == 0xdc00)
431e18e3516Sopenharmony_ci        start_offset++;
432e18e3516Sopenharmony_ci#endif
433e18e3516Sopenharmony_ci      }
434e18e3516Sopenharmony_ci
435e18e3516Sopenharmony_ci    /* Copy what we have advanced past (unless not required), reset the special
436e18e3516Sopenharmony_ci    global options, and continue to the next match. */
437e18e3516Sopenharmony_ci
438e18e3516Sopenharmony_ci    fraglength = start_offset - save_start;
439e18e3516Sopenharmony_ci    if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
440e18e3516Sopenharmony_ci    goptions = 0;
441e18e3516Sopenharmony_ci    continue;
442e18e3516Sopenharmony_ci    }
443e18e3516Sopenharmony_ci
444e18e3516Sopenharmony_ci  /* Handle a successful match. Matches that use \K to end before they start
445e18e3516Sopenharmony_ci  or start before the current point in the subject are not supported. */
446e18e3516Sopenharmony_ci
447e18e3516Sopenharmony_ci  if (ovector[1] < ovector[0] || ovector[0] < start_offset)
448e18e3516Sopenharmony_ci    {
449e18e3516Sopenharmony_ci    rc = PCRE2_ERROR_BADSUBSPATTERN;
450e18e3516Sopenharmony_ci    goto EXIT;
451e18e3516Sopenharmony_ci    }
452e18e3516Sopenharmony_ci
453e18e3516Sopenharmony_ci  /* Check for the same match as previous. This is legitimate after matching an
454e18e3516Sopenharmony_ci  empty string that starts after the initial match offset. We have tried again
455e18e3516Sopenharmony_ci  at the match point in case the pattern is one like /(?<=\G.)/ which can never
456e18e3516Sopenharmony_ci  match at its starting point, so running the match achieves the bumpalong. If
457e18e3516Sopenharmony_ci  we do get the same (null) match at the original match point, it isn't such a
458e18e3516Sopenharmony_ci  pattern, so we now do the empty string magic. In all other cases, a repeat
459e18e3516Sopenharmony_ci  match should never occur. */
460e18e3516Sopenharmony_ci
461e18e3516Sopenharmony_ci  if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
462e18e3516Sopenharmony_ci    {
463e18e3516Sopenharmony_ci    if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
464e18e3516Sopenharmony_ci      {
465e18e3516Sopenharmony_ci      goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
466e18e3516Sopenharmony_ci      ovecsave[2] = start_offset;
467e18e3516Sopenharmony_ci      continue;    /* Back to the top of the loop */
468e18e3516Sopenharmony_ci      }
469e18e3516Sopenharmony_ci    rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
470e18e3516Sopenharmony_ci    goto EXIT;
471e18e3516Sopenharmony_ci    }
472e18e3516Sopenharmony_ci
473e18e3516Sopenharmony_ci  /* Count substitutions with a paranoid check for integer overflow; surely no
474e18e3516Sopenharmony_ci  real call to this function would ever hit this! */
475e18e3516Sopenharmony_ci
476e18e3516Sopenharmony_ci  if (subs == INT_MAX)
477e18e3516Sopenharmony_ci    {
478e18e3516Sopenharmony_ci    rc = PCRE2_ERROR_TOOMANYREPLACE;
479e18e3516Sopenharmony_ci    goto EXIT;
480e18e3516Sopenharmony_ci    }
481e18e3516Sopenharmony_ci  subs++;
482e18e3516Sopenharmony_ci
483e18e3516Sopenharmony_ci  /* Copy the text leading up to the match (unless not required), and remember
484e18e3516Sopenharmony_ci  where the insert begins and how many ovector pairs are set. */
485e18e3516Sopenharmony_ci
486e18e3516Sopenharmony_ci  if (rc == 0) rc = ovector_count;
487e18e3516Sopenharmony_ci  fraglength = ovector[0] - start_offset;
488e18e3516Sopenharmony_ci  if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
489e18e3516Sopenharmony_ci  scb.output_offsets[0] = buff_offset;
490e18e3516Sopenharmony_ci  scb.oveccount = rc;
491e18e3516Sopenharmony_ci
492e18e3516Sopenharmony_ci  /* Process the replacement string. If the entire replacement is literal, just
493e18e3516Sopenharmony_ci  copy it with length check. */
494e18e3516Sopenharmony_ci
495e18e3516Sopenharmony_ci  ptr = replacement;
496e18e3516Sopenharmony_ci  if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
497e18e3516Sopenharmony_ci    {
498e18e3516Sopenharmony_ci    CHECKMEMCPY(ptr, rlength);
499e18e3516Sopenharmony_ci    }
500e18e3516Sopenharmony_ci
501e18e3516Sopenharmony_ci  /* Within a non-literal replacement, which must be scanned character by
502e18e3516Sopenharmony_ci  character, local literal mode can be set by \Q, but only in extended mode
503e18e3516Sopenharmony_ci  when backslashes are being interpreted. In extended mode we must handle
504e18e3516Sopenharmony_ci  nested substrings that are to be reprocessed. */
505e18e3516Sopenharmony_ci
506e18e3516Sopenharmony_ci  else for (;;)
507e18e3516Sopenharmony_ci    {
508e18e3516Sopenharmony_ci    uint32_t ch;
509e18e3516Sopenharmony_ci    unsigned int chlen;
510e18e3516Sopenharmony_ci
511e18e3516Sopenharmony_ci    /* If at the end of a nested substring, pop the stack. */
512e18e3516Sopenharmony_ci
513e18e3516Sopenharmony_ci    if (ptr >= repend)
514e18e3516Sopenharmony_ci      {
515e18e3516Sopenharmony_ci      if (ptrstackptr == 0) break;       /* End of replacement string */
516e18e3516Sopenharmony_ci      repend = ptrstack[--ptrstackptr];
517e18e3516Sopenharmony_ci      ptr = ptrstack[--ptrstackptr];
518e18e3516Sopenharmony_ci      continue;
519e18e3516Sopenharmony_ci      }
520e18e3516Sopenharmony_ci
521e18e3516Sopenharmony_ci    /* Handle the next character */
522e18e3516Sopenharmony_ci
523e18e3516Sopenharmony_ci    if (escaped_literal)
524e18e3516Sopenharmony_ci      {
525e18e3516Sopenharmony_ci      if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
526e18e3516Sopenharmony_ci        {
527e18e3516Sopenharmony_ci        escaped_literal = FALSE;
528e18e3516Sopenharmony_ci        ptr += 2;
529e18e3516Sopenharmony_ci        continue;
530e18e3516Sopenharmony_ci        }
531e18e3516Sopenharmony_ci      goto LOADLITERAL;
532e18e3516Sopenharmony_ci      }
533e18e3516Sopenharmony_ci
534e18e3516Sopenharmony_ci    /* Not in literal mode. */
535e18e3516Sopenharmony_ci
536e18e3516Sopenharmony_ci    if (*ptr == CHAR_DOLLAR_SIGN)
537e18e3516Sopenharmony_ci      {
538e18e3516Sopenharmony_ci      int group, n;
539e18e3516Sopenharmony_ci      uint32_t special = 0;
540e18e3516Sopenharmony_ci      BOOL inparens;
541e18e3516Sopenharmony_ci      BOOL star;
542e18e3516Sopenharmony_ci      PCRE2_SIZE sublength;
543e18e3516Sopenharmony_ci      PCRE2_SPTR text1_start = NULL;
544e18e3516Sopenharmony_ci      PCRE2_SPTR text1_end = NULL;
545e18e3516Sopenharmony_ci      PCRE2_SPTR text2_start = NULL;
546e18e3516Sopenharmony_ci      PCRE2_SPTR text2_end = NULL;
547e18e3516Sopenharmony_ci      PCRE2_UCHAR next;
548e18e3516Sopenharmony_ci      PCRE2_UCHAR name[33];
549e18e3516Sopenharmony_ci
550e18e3516Sopenharmony_ci      if (++ptr >= repend) goto BAD;
551e18e3516Sopenharmony_ci      if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
552e18e3516Sopenharmony_ci
553e18e3516Sopenharmony_ci      group = -1;
554e18e3516Sopenharmony_ci      n = 0;
555e18e3516Sopenharmony_ci      inparens = FALSE;
556e18e3516Sopenharmony_ci      star = FALSE;
557e18e3516Sopenharmony_ci
558e18e3516Sopenharmony_ci      if (next == CHAR_LEFT_CURLY_BRACKET)
559e18e3516Sopenharmony_ci        {
560e18e3516Sopenharmony_ci        if (++ptr >= repend) goto BAD;
561e18e3516Sopenharmony_ci        next = *ptr;
562e18e3516Sopenharmony_ci        inparens = TRUE;
563e18e3516Sopenharmony_ci        }
564e18e3516Sopenharmony_ci
565e18e3516Sopenharmony_ci      if (next == CHAR_ASTERISK)
566e18e3516Sopenharmony_ci        {
567e18e3516Sopenharmony_ci        if (++ptr >= repend) goto BAD;
568e18e3516Sopenharmony_ci        next = *ptr;
569e18e3516Sopenharmony_ci        star = TRUE;
570e18e3516Sopenharmony_ci        }
571e18e3516Sopenharmony_ci
572e18e3516Sopenharmony_ci      if (!star && next >= CHAR_0 && next <= CHAR_9)
573e18e3516Sopenharmony_ci        {
574e18e3516Sopenharmony_ci        group = next - CHAR_0;
575e18e3516Sopenharmony_ci        while (++ptr < repend)
576e18e3516Sopenharmony_ci          {
577e18e3516Sopenharmony_ci          next = *ptr;
578e18e3516Sopenharmony_ci          if (next < CHAR_0 || next > CHAR_9) break;
579e18e3516Sopenharmony_ci          group = group * 10 + next - CHAR_0;
580e18e3516Sopenharmony_ci
581e18e3516Sopenharmony_ci          /* A check for a number greater than the hightest captured group
582e18e3516Sopenharmony_ci          is sufficient here; no need for a separate overflow check. If unknown
583e18e3516Sopenharmony_ci          groups are to be treated as unset, just skip over any remaining
584e18e3516Sopenharmony_ci          digits and carry on. */
585e18e3516Sopenharmony_ci
586e18e3516Sopenharmony_ci          if (group > code->top_bracket)
587e18e3516Sopenharmony_ci            {
588e18e3516Sopenharmony_ci            if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
589e18e3516Sopenharmony_ci              {
590e18e3516Sopenharmony_ci              while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
591e18e3516Sopenharmony_ci              break;
592e18e3516Sopenharmony_ci              }
593e18e3516Sopenharmony_ci            else
594e18e3516Sopenharmony_ci              {
595e18e3516Sopenharmony_ci              rc = PCRE2_ERROR_NOSUBSTRING;
596e18e3516Sopenharmony_ci              goto PTREXIT;
597e18e3516Sopenharmony_ci              }
598e18e3516Sopenharmony_ci            }
599e18e3516Sopenharmony_ci          }
600e18e3516Sopenharmony_ci        }
601e18e3516Sopenharmony_ci      else
602e18e3516Sopenharmony_ci        {
603e18e3516Sopenharmony_ci        const uint8_t *ctypes = code->tables + ctypes_offset;
604e18e3516Sopenharmony_ci        while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
605e18e3516Sopenharmony_ci          {
606e18e3516Sopenharmony_ci          name[n++] = next;
607e18e3516Sopenharmony_ci          if (n > 32) goto BAD;
608e18e3516Sopenharmony_ci          if (++ptr >= repend) break;
609e18e3516Sopenharmony_ci          next = *ptr;
610e18e3516Sopenharmony_ci          }
611e18e3516Sopenharmony_ci        if (n == 0) goto BAD;
612e18e3516Sopenharmony_ci        name[n] = 0;
613e18e3516Sopenharmony_ci        }
614e18e3516Sopenharmony_ci
615e18e3516Sopenharmony_ci      /* In extended mode we recognize ${name:+set text:unset text} and
616e18e3516Sopenharmony_ci      ${name:-default text}. */
617e18e3516Sopenharmony_ci
618e18e3516Sopenharmony_ci      if (inparens)
619e18e3516Sopenharmony_ci        {
620e18e3516Sopenharmony_ci        if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
621e18e3516Sopenharmony_ci             !star && ptr < repend - 2 && next == CHAR_COLON)
622e18e3516Sopenharmony_ci          {
623e18e3516Sopenharmony_ci          special = *(++ptr);
624e18e3516Sopenharmony_ci          if (special != CHAR_PLUS && special != CHAR_MINUS)
625e18e3516Sopenharmony_ci            {
626e18e3516Sopenharmony_ci            rc = PCRE2_ERROR_BADSUBSTITUTION;
627e18e3516Sopenharmony_ci            goto PTREXIT;
628e18e3516Sopenharmony_ci            }
629e18e3516Sopenharmony_ci
630e18e3516Sopenharmony_ci          text1_start = ++ptr;
631e18e3516Sopenharmony_ci          rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
632e18e3516Sopenharmony_ci          if (rc != 0) goto PTREXIT;
633e18e3516Sopenharmony_ci          text1_end = ptr;
634e18e3516Sopenharmony_ci
635e18e3516Sopenharmony_ci          if (special == CHAR_PLUS && *ptr == CHAR_COLON)
636e18e3516Sopenharmony_ci            {
637e18e3516Sopenharmony_ci            text2_start = ++ptr;
638e18e3516Sopenharmony_ci            rc = find_text_end(code, &ptr, repend, TRUE);
639e18e3516Sopenharmony_ci            if (rc != 0) goto PTREXIT;
640e18e3516Sopenharmony_ci            text2_end = ptr;
641e18e3516Sopenharmony_ci            }
642e18e3516Sopenharmony_ci          }
643e18e3516Sopenharmony_ci
644e18e3516Sopenharmony_ci        else
645e18e3516Sopenharmony_ci          {
646e18e3516Sopenharmony_ci          if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
647e18e3516Sopenharmony_ci            {
648e18e3516Sopenharmony_ci            rc = PCRE2_ERROR_REPMISSINGBRACE;
649e18e3516Sopenharmony_ci            goto PTREXIT;
650e18e3516Sopenharmony_ci            }
651e18e3516Sopenharmony_ci          }
652e18e3516Sopenharmony_ci
653e18e3516Sopenharmony_ci        ptr++;
654e18e3516Sopenharmony_ci        }
655e18e3516Sopenharmony_ci
656e18e3516Sopenharmony_ci      /* Have found a syntactically correct group number or name, or *name.
657e18e3516Sopenharmony_ci      Only *MARK is currently recognized. */
658e18e3516Sopenharmony_ci
659e18e3516Sopenharmony_ci      if (star)
660e18e3516Sopenharmony_ci        {
661e18e3516Sopenharmony_ci        if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
662e18e3516Sopenharmony_ci          {
663e18e3516Sopenharmony_ci          PCRE2_SPTR mark = pcre2_get_mark(match_data);
664e18e3516Sopenharmony_ci          if (mark != NULL)
665e18e3516Sopenharmony_ci            {
666e18e3516Sopenharmony_ci            PCRE2_SPTR mark_start = mark;
667e18e3516Sopenharmony_ci            while (*mark != 0) mark++;
668e18e3516Sopenharmony_ci            fraglength = mark - mark_start;
669e18e3516Sopenharmony_ci            CHECKMEMCPY(mark_start, fraglength);
670e18e3516Sopenharmony_ci            }
671e18e3516Sopenharmony_ci          }
672e18e3516Sopenharmony_ci        else goto BAD;
673e18e3516Sopenharmony_ci        }
674e18e3516Sopenharmony_ci
675e18e3516Sopenharmony_ci      /* Substitute the contents of a group. We don't use substring_copy
676e18e3516Sopenharmony_ci      functions any more, in order to support case forcing. */
677e18e3516Sopenharmony_ci
678e18e3516Sopenharmony_ci      else
679e18e3516Sopenharmony_ci        {
680e18e3516Sopenharmony_ci        PCRE2_SPTR subptr, subptrend;
681e18e3516Sopenharmony_ci
682e18e3516Sopenharmony_ci        /* Find a number for a named group. In case there are duplicate names,
683e18e3516Sopenharmony_ci        search for the first one that is set. If the name is not found when
684e18e3516Sopenharmony_ci        PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
685e18e3516Sopenharmony_ci        non-existent group. */
686e18e3516Sopenharmony_ci
687e18e3516Sopenharmony_ci        if (group < 0)
688e18e3516Sopenharmony_ci          {
689e18e3516Sopenharmony_ci          PCRE2_SPTR first, last, entry;
690e18e3516Sopenharmony_ci          rc = pcre2_substring_nametable_scan(code, name, &first, &last);
691e18e3516Sopenharmony_ci          if (rc == PCRE2_ERROR_NOSUBSTRING &&
692e18e3516Sopenharmony_ci              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
693e18e3516Sopenharmony_ci            {
694e18e3516Sopenharmony_ci            group = code->top_bracket + 1;
695e18e3516Sopenharmony_ci            }
696e18e3516Sopenharmony_ci          else
697e18e3516Sopenharmony_ci            {
698e18e3516Sopenharmony_ci            if (rc < 0) goto PTREXIT;
699e18e3516Sopenharmony_ci            for (entry = first; entry <= last; entry += rc)
700e18e3516Sopenharmony_ci              {
701e18e3516Sopenharmony_ci              uint32_t ng = GET2(entry, 0);
702e18e3516Sopenharmony_ci              if (ng < ovector_count)
703e18e3516Sopenharmony_ci                {
704e18e3516Sopenharmony_ci                if (group < 0) group = ng;          /* First in ovector */
705e18e3516Sopenharmony_ci                if (ovector[ng*2] != PCRE2_UNSET)
706e18e3516Sopenharmony_ci                  {
707e18e3516Sopenharmony_ci                  group = ng;                       /* First that is set */
708e18e3516Sopenharmony_ci                  break;
709e18e3516Sopenharmony_ci                  }
710e18e3516Sopenharmony_ci                }
711e18e3516Sopenharmony_ci              }
712e18e3516Sopenharmony_ci
713e18e3516Sopenharmony_ci            /* If group is still negative, it means we did not find a group
714e18e3516Sopenharmony_ci            that is in the ovector. Just set the first group. */
715e18e3516Sopenharmony_ci
716e18e3516Sopenharmony_ci            if (group < 0) group = GET2(first, 0);
717e18e3516Sopenharmony_ci            }
718e18e3516Sopenharmony_ci          }
719e18e3516Sopenharmony_ci
720e18e3516Sopenharmony_ci        /* We now have a group that is identified by number. Find the length of
721e18e3516Sopenharmony_ci        the captured string. If a group in a non-special substitution is unset
722e18e3516Sopenharmony_ci        when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
723e18e3516Sopenharmony_ci
724e18e3516Sopenharmony_ci        rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
725e18e3516Sopenharmony_ci        if (rc < 0)
726e18e3516Sopenharmony_ci          {
727e18e3516Sopenharmony_ci          if (rc == PCRE2_ERROR_NOSUBSTRING &&
728e18e3516Sopenharmony_ci              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
729e18e3516Sopenharmony_ci            {
730e18e3516Sopenharmony_ci            rc = PCRE2_ERROR_UNSET;
731e18e3516Sopenharmony_ci            }
732e18e3516Sopenharmony_ci          if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
733e18e3516Sopenharmony_ci          if (special == 0)                           /* Plain substitution */
734e18e3516Sopenharmony_ci            {
735e18e3516Sopenharmony_ci            if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
736e18e3516Sopenharmony_ci            goto PTREXIT;                             /* Else error */
737e18e3516Sopenharmony_ci            }
738e18e3516Sopenharmony_ci          }
739e18e3516Sopenharmony_ci
740e18e3516Sopenharmony_ci        /* If special is '+' we have a 'set' and possibly an 'unset' text,
741e18e3516Sopenharmony_ci        both of which are reprocessed when used. If special is '-' we have a
742e18e3516Sopenharmony_ci        default text for when the group is unset; it must be reprocessed. */
743e18e3516Sopenharmony_ci
744e18e3516Sopenharmony_ci        if (special != 0)
745e18e3516Sopenharmony_ci          {
746e18e3516Sopenharmony_ci          if (special == CHAR_MINUS)
747e18e3516Sopenharmony_ci            {
748e18e3516Sopenharmony_ci            if (rc == 0) goto LITERAL_SUBSTITUTE;
749e18e3516Sopenharmony_ci            text2_start = text1_start;
750e18e3516Sopenharmony_ci            text2_end = text1_end;
751e18e3516Sopenharmony_ci            }
752e18e3516Sopenharmony_ci
753e18e3516Sopenharmony_ci          if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
754e18e3516Sopenharmony_ci          ptrstack[ptrstackptr++] = ptr;
755e18e3516Sopenharmony_ci          ptrstack[ptrstackptr++] = repend;
756e18e3516Sopenharmony_ci
757e18e3516Sopenharmony_ci          if (rc == 0)
758e18e3516Sopenharmony_ci            {
759e18e3516Sopenharmony_ci            ptr = text1_start;
760e18e3516Sopenharmony_ci            repend = text1_end;
761e18e3516Sopenharmony_ci            }
762e18e3516Sopenharmony_ci          else
763e18e3516Sopenharmony_ci            {
764e18e3516Sopenharmony_ci            ptr = text2_start;
765e18e3516Sopenharmony_ci            repend = text2_end;
766e18e3516Sopenharmony_ci            }
767e18e3516Sopenharmony_ci          continue;
768e18e3516Sopenharmony_ci          }
769e18e3516Sopenharmony_ci
770e18e3516Sopenharmony_ci        /* Otherwise we have a literal substitution of a group's contents. */
771e18e3516Sopenharmony_ci
772e18e3516Sopenharmony_ci        LITERAL_SUBSTITUTE:
773e18e3516Sopenharmony_ci        subptr = subject + ovector[group*2];
774e18e3516Sopenharmony_ci        subptrend = subject + ovector[group*2 + 1];
775e18e3516Sopenharmony_ci
776e18e3516Sopenharmony_ci        /* Substitute a literal string, possibly forcing alphabetic case. */
777e18e3516Sopenharmony_ci
778e18e3516Sopenharmony_ci        while (subptr < subptrend)
779e18e3516Sopenharmony_ci          {
780e18e3516Sopenharmony_ci          GETCHARINCTEST(ch, subptr);
781e18e3516Sopenharmony_ci          if (forcecase != 0)
782e18e3516Sopenharmony_ci            {
783e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
784e18e3516Sopenharmony_ci            if (utf || ucp)
785e18e3516Sopenharmony_ci              {
786e18e3516Sopenharmony_ci              uint32_t type = UCD_CHARTYPE(ch);
787e18e3516Sopenharmony_ci              if (PRIV(ucp_gentype)[type] == ucp_L &&
788e18e3516Sopenharmony_ci                  type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
789e18e3516Sopenharmony_ci                ch = UCD_OTHERCASE(ch);
790e18e3516Sopenharmony_ci              }
791e18e3516Sopenharmony_ci            else
792e18e3516Sopenharmony_ci#endif
793e18e3516Sopenharmony_ci              {
794e18e3516Sopenharmony_ci              if (((code->tables + cbits_offset +
795e18e3516Sopenharmony_ci                  ((forcecase > 0)? cbit_upper:cbit_lower)
796e18e3516Sopenharmony_ci                  )[ch/8] & (1u << (ch%8))) == 0)
797e18e3516Sopenharmony_ci                ch = (code->tables + fcc_offset)[ch];
798e18e3516Sopenharmony_ci              }
799e18e3516Sopenharmony_ci            forcecase = forcecasereset;
800e18e3516Sopenharmony_ci            }
801e18e3516Sopenharmony_ci
802e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
803e18e3516Sopenharmony_ci          if (utf) chlen = PRIV(ord2utf)(ch, temp); else
804e18e3516Sopenharmony_ci#endif
805e18e3516Sopenharmony_ci            {
806e18e3516Sopenharmony_ci            temp[0] = ch;
807e18e3516Sopenharmony_ci            chlen = 1;
808e18e3516Sopenharmony_ci            }
809e18e3516Sopenharmony_ci          CHECKMEMCPY(temp, chlen);
810e18e3516Sopenharmony_ci          }
811e18e3516Sopenharmony_ci        }
812e18e3516Sopenharmony_ci      }
813e18e3516Sopenharmony_ci
814e18e3516Sopenharmony_ci    /* Handle an escape sequence in extended mode. We can use check_escape()
815e18e3516Sopenharmony_ci    to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
816e18e3516Sopenharmony_ci    the case-forcing escapes are not supported in pcre2_compile() so must be
817e18e3516Sopenharmony_ci    recognized here. */
818e18e3516Sopenharmony_ci
819e18e3516Sopenharmony_ci    else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
820e18e3516Sopenharmony_ci              *ptr == CHAR_BACKSLASH)
821e18e3516Sopenharmony_ci      {
822e18e3516Sopenharmony_ci      int errorcode;
823e18e3516Sopenharmony_ci
824e18e3516Sopenharmony_ci      if (ptr < repend - 1) switch (ptr[1])
825e18e3516Sopenharmony_ci        {
826e18e3516Sopenharmony_ci        case CHAR_L:
827e18e3516Sopenharmony_ci        forcecase = forcecasereset = -1;
828e18e3516Sopenharmony_ci        ptr += 2;
829e18e3516Sopenharmony_ci        continue;
830e18e3516Sopenharmony_ci
831e18e3516Sopenharmony_ci        case CHAR_l:
832e18e3516Sopenharmony_ci        forcecase = -1;
833e18e3516Sopenharmony_ci        forcecasereset = 0;
834e18e3516Sopenharmony_ci        ptr += 2;
835e18e3516Sopenharmony_ci        continue;
836e18e3516Sopenharmony_ci
837e18e3516Sopenharmony_ci        case CHAR_U:
838e18e3516Sopenharmony_ci        forcecase = forcecasereset = 1;
839e18e3516Sopenharmony_ci        ptr += 2;
840e18e3516Sopenharmony_ci        continue;
841e18e3516Sopenharmony_ci
842e18e3516Sopenharmony_ci        case CHAR_u:
843e18e3516Sopenharmony_ci        forcecase = 1;
844e18e3516Sopenharmony_ci        forcecasereset = 0;
845e18e3516Sopenharmony_ci        ptr += 2;
846e18e3516Sopenharmony_ci        continue;
847e18e3516Sopenharmony_ci
848e18e3516Sopenharmony_ci        default:
849e18e3516Sopenharmony_ci        break;
850e18e3516Sopenharmony_ci        }
851e18e3516Sopenharmony_ci
852e18e3516Sopenharmony_ci      ptr++;  /* Point after \ */
853e18e3516Sopenharmony_ci      rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
854e18e3516Sopenharmony_ci        code->overall_options, code->extra_options, FALSE, NULL);
855e18e3516Sopenharmony_ci      if (errorcode != 0) goto BADESCAPE;
856e18e3516Sopenharmony_ci
857e18e3516Sopenharmony_ci      switch(rc)
858e18e3516Sopenharmony_ci        {
859e18e3516Sopenharmony_ci        case ESC_E:
860e18e3516Sopenharmony_ci        forcecase = forcecasereset = 0;
861e18e3516Sopenharmony_ci        continue;
862e18e3516Sopenharmony_ci
863e18e3516Sopenharmony_ci        case ESC_Q:
864e18e3516Sopenharmony_ci        escaped_literal = TRUE;
865e18e3516Sopenharmony_ci        continue;
866e18e3516Sopenharmony_ci
867e18e3516Sopenharmony_ci        case 0:      /* Data character */
868e18e3516Sopenharmony_ci        goto LITERAL;
869e18e3516Sopenharmony_ci
870e18e3516Sopenharmony_ci        default:
871e18e3516Sopenharmony_ci        goto BADESCAPE;
872e18e3516Sopenharmony_ci        }
873e18e3516Sopenharmony_ci      }
874e18e3516Sopenharmony_ci
875e18e3516Sopenharmony_ci    /* Handle a literal code unit */
876e18e3516Sopenharmony_ci
877e18e3516Sopenharmony_ci    else
878e18e3516Sopenharmony_ci      {
879e18e3516Sopenharmony_ci      LOADLITERAL:
880e18e3516Sopenharmony_ci      GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
881e18e3516Sopenharmony_ci
882e18e3516Sopenharmony_ci      LITERAL:
883e18e3516Sopenharmony_ci      if (forcecase != 0)
884e18e3516Sopenharmony_ci        {
885e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
886e18e3516Sopenharmony_ci        if (utf || ucp)
887e18e3516Sopenharmony_ci          {
888e18e3516Sopenharmony_ci          uint32_t type = UCD_CHARTYPE(ch);
889e18e3516Sopenharmony_ci          if (PRIV(ucp_gentype)[type] == ucp_L &&
890e18e3516Sopenharmony_ci              type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
891e18e3516Sopenharmony_ci            ch = UCD_OTHERCASE(ch);
892e18e3516Sopenharmony_ci          }
893e18e3516Sopenharmony_ci        else
894e18e3516Sopenharmony_ci#endif
895e18e3516Sopenharmony_ci          {
896e18e3516Sopenharmony_ci          if (((code->tables + cbits_offset +
897e18e3516Sopenharmony_ci              ((forcecase > 0)? cbit_upper:cbit_lower)
898e18e3516Sopenharmony_ci              )[ch/8] & (1u << (ch%8))) == 0)
899e18e3516Sopenharmony_ci            ch = (code->tables + fcc_offset)[ch];
900e18e3516Sopenharmony_ci          }
901e18e3516Sopenharmony_ci        forcecase = forcecasereset;
902e18e3516Sopenharmony_ci        }
903e18e3516Sopenharmony_ci
904e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE
905e18e3516Sopenharmony_ci      if (utf) chlen = PRIV(ord2utf)(ch, temp); else
906e18e3516Sopenharmony_ci#endif
907e18e3516Sopenharmony_ci        {
908e18e3516Sopenharmony_ci        temp[0] = ch;
909e18e3516Sopenharmony_ci        chlen = 1;
910e18e3516Sopenharmony_ci        }
911e18e3516Sopenharmony_ci      CHECKMEMCPY(temp, chlen);
912e18e3516Sopenharmony_ci      } /* End handling a literal code unit */
913e18e3516Sopenharmony_ci    }   /* End of loop for scanning the replacement. */
914e18e3516Sopenharmony_ci
915e18e3516Sopenharmony_ci  /* The replacement has been copied to the output, or its size has been
916e18e3516Sopenharmony_ci  remembered. Do the callout if there is one and we have done an actual
917e18e3516Sopenharmony_ci  replacement. */
918e18e3516Sopenharmony_ci
919e18e3516Sopenharmony_ci  if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
920e18e3516Sopenharmony_ci    {
921e18e3516Sopenharmony_ci    scb.subscount = subs;
922e18e3516Sopenharmony_ci    scb.output_offsets[1] = buff_offset;
923e18e3516Sopenharmony_ci    rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
924e18e3516Sopenharmony_ci
925e18e3516Sopenharmony_ci    /* A non-zero return means cancel this substitution. Instead, copy the
926e18e3516Sopenharmony_ci    matched string fragment. */
927e18e3516Sopenharmony_ci
928e18e3516Sopenharmony_ci    if (rc != 0)
929e18e3516Sopenharmony_ci      {
930e18e3516Sopenharmony_ci      PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
931e18e3516Sopenharmony_ci      PCRE2_SIZE oldlength = ovector[1] - ovector[0];
932e18e3516Sopenharmony_ci
933e18e3516Sopenharmony_ci      buff_offset -= newlength;
934e18e3516Sopenharmony_ci      lengthleft += newlength;
935e18e3516Sopenharmony_ci      if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
936e18e3516Sopenharmony_ci
937e18e3516Sopenharmony_ci      /* A negative return means do not do any more. */
938e18e3516Sopenharmony_ci
939e18e3516Sopenharmony_ci      if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
940e18e3516Sopenharmony_ci      }
941e18e3516Sopenharmony_ci    }
942e18e3516Sopenharmony_ci
943e18e3516Sopenharmony_ci  /* Save the details of this match. See above for how this data is used. If we
944e18e3516Sopenharmony_ci  matched an empty string, do the magic for global matches. Update the start
945e18e3516Sopenharmony_ci  offset to point to the rest of the subject string. If we re-used an existing
946e18e3516Sopenharmony_ci  match for the first match, switch to the internal match data block. */
947e18e3516Sopenharmony_ci
948e18e3516Sopenharmony_ci  ovecsave[0] = ovector[0];
949e18e3516Sopenharmony_ci  ovecsave[1] = ovector[1];
950e18e3516Sopenharmony_ci  ovecsave[2] = start_offset;
951e18e3516Sopenharmony_ci
952e18e3516Sopenharmony_ci  goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
953e18e3516Sopenharmony_ci    PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
954e18e3516Sopenharmony_ci  start_offset = ovector[1];
955e18e3516Sopenharmony_ci  } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
956e18e3516Sopenharmony_ci
957e18e3516Sopenharmony_ci/* Copy the rest of the subject unless not required, and terminate the output
958e18e3516Sopenharmony_ciwith a binary zero. */
959e18e3516Sopenharmony_ci
960e18e3516Sopenharmony_ciif (!replacement_only)
961e18e3516Sopenharmony_ci  {
962e18e3516Sopenharmony_ci  fraglength = length - start_offset;
963e18e3516Sopenharmony_ci  CHECKMEMCPY(subject + start_offset, fraglength);
964e18e3516Sopenharmony_ci  }
965e18e3516Sopenharmony_ci
966e18e3516Sopenharmony_citemp[0] = 0;
967e18e3516Sopenharmony_ciCHECKMEMCPY(temp, 1);
968e18e3516Sopenharmony_ci
969e18e3516Sopenharmony_ci/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
970e18e3516Sopenharmony_ciand matching has carried on after a full buffer, in order to compute the length
971e18e3516Sopenharmony_cineeded. Otherwise, an overflow generates an immediate error return. */
972e18e3516Sopenharmony_ci
973e18e3516Sopenharmony_ciif (overflowed)
974e18e3516Sopenharmony_ci  {
975e18e3516Sopenharmony_ci  rc = PCRE2_ERROR_NOMEMORY;
976e18e3516Sopenharmony_ci  *blength = buff_length + extra_needed;
977e18e3516Sopenharmony_ci  }
978e18e3516Sopenharmony_ci
979e18e3516Sopenharmony_ci/* After a successful execution, return the number of substitutions and set the
980e18e3516Sopenharmony_cilength of buffer used, excluding the trailing zero. */
981e18e3516Sopenharmony_ci
982e18e3516Sopenharmony_cielse
983e18e3516Sopenharmony_ci  {
984e18e3516Sopenharmony_ci  rc = subs;
985e18e3516Sopenharmony_ci  *blength = buff_offset - 1;
986e18e3516Sopenharmony_ci  }
987e18e3516Sopenharmony_ci
988e18e3516Sopenharmony_ciEXIT:
989e18e3516Sopenharmony_ciif (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
990e18e3516Sopenharmony_ci  else match_data->rc = rc;
991e18e3516Sopenharmony_cireturn rc;
992e18e3516Sopenharmony_ci
993e18e3516Sopenharmony_ciNOROOM:
994e18e3516Sopenharmony_circ = PCRE2_ERROR_NOMEMORY;
995e18e3516Sopenharmony_cigoto EXIT;
996e18e3516Sopenharmony_ci
997e18e3516Sopenharmony_ciBAD:
998e18e3516Sopenharmony_circ = PCRE2_ERROR_BADREPLACEMENT;
999e18e3516Sopenharmony_cigoto PTREXIT;
1000e18e3516Sopenharmony_ci
1001e18e3516Sopenharmony_ciBADESCAPE:
1002e18e3516Sopenharmony_circ = PCRE2_ERROR_BADREPESCAPE;
1003e18e3516Sopenharmony_ci
1004e18e3516Sopenharmony_ciPTREXIT:
1005e18e3516Sopenharmony_ci*blength = (PCRE2_SIZE)(ptr - replacement);
1006e18e3516Sopenharmony_cigoto EXIT;
1007e18e3516Sopenharmony_ci}
1008e18e3516Sopenharmony_ci
1009e18e3516Sopenharmony_ci/* End of pcre2_substitute.c */
1010