1e18e3516Sopenharmony_ci/************************************************* 2e18e3516Sopenharmony_ci* Perl-Compatible Regular Expressions * 3e18e3516Sopenharmony_ci*************************************************/ 4e18e3516Sopenharmony_ci 5e18e3516Sopenharmony_ci/* PCRE is a library of functions to support regular expressions whose syntax 6e18e3516Sopenharmony_ciand semantics are as close as possible to those of the Perl 5 language. 7e18e3516Sopenharmony_ci 8e18e3516Sopenharmony_ci Written by Philip Hazel 9e18e3516Sopenharmony_ci Original API code Copyright (c) 1997-2012 University of Cambridge 10e18e3516Sopenharmony_ci New API code Copyright (c) 2016-2022 University of Cambridge 11e18e3516Sopenharmony_ci 12e18e3516Sopenharmony_ci----------------------------------------------------------------------------- 13e18e3516Sopenharmony_ciRedistribution and use in source and binary forms, with or without 14e18e3516Sopenharmony_cimodification, are permitted provided that the following conditions are met: 15e18e3516Sopenharmony_ci 16e18e3516Sopenharmony_ci * Redistributions of source code must retain the above copyright notice, 17e18e3516Sopenharmony_ci this list of conditions and the following disclaimer. 18e18e3516Sopenharmony_ci 19e18e3516Sopenharmony_ci * Redistributions in binary form must reproduce the above copyright 20e18e3516Sopenharmony_ci notice, this list of conditions and the following disclaimer in the 21e18e3516Sopenharmony_ci documentation and/or other materials provided with the distribution. 22e18e3516Sopenharmony_ci 23e18e3516Sopenharmony_ci * Neither the name of the University of Cambridge nor the names of its 24e18e3516Sopenharmony_ci contributors may be used to endorse or promote products derived from 25e18e3516Sopenharmony_ci this software without specific prior written permission. 26e18e3516Sopenharmony_ci 27e18e3516Sopenharmony_ciTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28e18e3516Sopenharmony_ciAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29e18e3516Sopenharmony_ciIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30e18e3516Sopenharmony_ciARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31e18e3516Sopenharmony_ciLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32e18e3516Sopenharmony_ciCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33e18e3516Sopenharmony_ciSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34e18e3516Sopenharmony_ciINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35e18e3516Sopenharmony_ciCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36e18e3516Sopenharmony_ciARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37e18e3516Sopenharmony_ciPOSSIBILITY OF SUCH DAMAGE. 38e18e3516Sopenharmony_ci----------------------------------------------------------------------------- 39e18e3516Sopenharmony_ci*/ 40e18e3516Sopenharmony_ci 41e18e3516Sopenharmony_ci 42e18e3516Sopenharmony_ci#ifdef HAVE_CONFIG_H 43e18e3516Sopenharmony_ci#include "config.h" 44e18e3516Sopenharmony_ci#endif 45e18e3516Sopenharmony_ci 46e18e3516Sopenharmony_ci#include "pcre2_internal.h" 47e18e3516Sopenharmony_ci 48e18e3516Sopenharmony_ci#define PTR_STACK_SIZE 20 49e18e3516Sopenharmony_ci 50e18e3516Sopenharmony_ci#define SUBSTITUTE_OPTIONS \ 51e18e3516Sopenharmony_ci (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \ 52e18e3516Sopenharmony_ci PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \ 53e18e3516Sopenharmony_ci PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \ 54e18e3516Sopenharmony_ci PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY) 55e18e3516Sopenharmony_ci 56e18e3516Sopenharmony_ci 57e18e3516Sopenharmony_ci 58e18e3516Sopenharmony_ci/************************************************* 59e18e3516Sopenharmony_ci* Find end of substitute text * 60e18e3516Sopenharmony_ci*************************************************/ 61e18e3516Sopenharmony_ci 62e18e3516Sopenharmony_ci/* In extended mode, we recognize ${name:+set text:unset text} and similar 63e18e3516Sopenharmony_ciconstructions. This requires the identification of unescaped : and } 64e18e3516Sopenharmony_cicharacters. This function scans for such. It must deal with nested ${ 65e18e3516Sopenharmony_ciconstructions. The pointer to the text is updated, either to the required end 66e18e3516Sopenharmony_cicharacter, or to where an error was detected. 67e18e3516Sopenharmony_ci 68e18e3516Sopenharmony_ciArguments: 69e18e3516Sopenharmony_ci code points to the compiled expression (for options) 70e18e3516Sopenharmony_ci ptrptr points to the pointer to the start of the text (updated) 71e18e3516Sopenharmony_ci ptrend end of the whole string 72e18e3516Sopenharmony_ci last TRUE if the last expected string (only } recognized) 73e18e3516Sopenharmony_ci 74e18e3516Sopenharmony_ciReturns: 0 on success 75e18e3516Sopenharmony_ci negative error code on failure 76e18e3516Sopenharmony_ci*/ 77e18e3516Sopenharmony_ci 78e18e3516Sopenharmony_cistatic int 79e18e3516Sopenharmony_cifind_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, 80e18e3516Sopenharmony_ci BOOL last) 81e18e3516Sopenharmony_ci{ 82e18e3516Sopenharmony_ciint rc = 0; 83e18e3516Sopenharmony_ciuint32_t nestlevel = 0; 84e18e3516Sopenharmony_ciBOOL literal = FALSE; 85e18e3516Sopenharmony_ciPCRE2_SPTR ptr = *ptrptr; 86e18e3516Sopenharmony_ci 87e18e3516Sopenharmony_cifor (; ptr < ptrend; ptr++) 88e18e3516Sopenharmony_ci { 89e18e3516Sopenharmony_ci if (literal) 90e18e3516Sopenharmony_ci { 91e18e3516Sopenharmony_ci if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E) 92e18e3516Sopenharmony_ci { 93e18e3516Sopenharmony_ci literal = FALSE; 94e18e3516Sopenharmony_ci ptr += 1; 95e18e3516Sopenharmony_ci } 96e18e3516Sopenharmony_ci } 97e18e3516Sopenharmony_ci 98e18e3516Sopenharmony_ci else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 99e18e3516Sopenharmony_ci { 100e18e3516Sopenharmony_ci if (nestlevel == 0) goto EXIT; 101e18e3516Sopenharmony_ci nestlevel--; 102e18e3516Sopenharmony_ci } 103e18e3516Sopenharmony_ci 104e18e3516Sopenharmony_ci else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT; 105e18e3516Sopenharmony_ci 106e18e3516Sopenharmony_ci else if (*ptr == CHAR_DOLLAR_SIGN) 107e18e3516Sopenharmony_ci { 108e18e3516Sopenharmony_ci if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET) 109e18e3516Sopenharmony_ci { 110e18e3516Sopenharmony_ci nestlevel++; 111e18e3516Sopenharmony_ci ptr += 1; 112e18e3516Sopenharmony_ci } 113e18e3516Sopenharmony_ci } 114e18e3516Sopenharmony_ci 115e18e3516Sopenharmony_ci else if (*ptr == CHAR_BACKSLASH) 116e18e3516Sopenharmony_ci { 117e18e3516Sopenharmony_ci int erc; 118e18e3516Sopenharmony_ci int errorcode; 119e18e3516Sopenharmony_ci uint32_t ch; 120e18e3516Sopenharmony_ci 121e18e3516Sopenharmony_ci if (ptr < ptrend - 1) switch (ptr[1]) 122e18e3516Sopenharmony_ci { 123e18e3516Sopenharmony_ci case CHAR_L: 124e18e3516Sopenharmony_ci case CHAR_l: 125e18e3516Sopenharmony_ci case CHAR_U: 126e18e3516Sopenharmony_ci case CHAR_u: 127e18e3516Sopenharmony_ci ptr += 1; 128e18e3516Sopenharmony_ci continue; 129e18e3516Sopenharmony_ci } 130e18e3516Sopenharmony_ci 131e18e3516Sopenharmony_ci ptr += 1; /* Must point after \ */ 132e18e3516Sopenharmony_ci erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, 133e18e3516Sopenharmony_ci code->overall_options, code->extra_options, FALSE, NULL); 134e18e3516Sopenharmony_ci ptr -= 1; /* Back to last code unit of escape */ 135e18e3516Sopenharmony_ci if (errorcode != 0) 136e18e3516Sopenharmony_ci { 137e18e3516Sopenharmony_ci rc = errorcode; 138e18e3516Sopenharmony_ci goto EXIT; 139e18e3516Sopenharmony_ci } 140e18e3516Sopenharmony_ci 141e18e3516Sopenharmony_ci switch(erc) 142e18e3516Sopenharmony_ci { 143e18e3516Sopenharmony_ci case 0: /* Data character */ 144e18e3516Sopenharmony_ci case ESC_E: /* Isolated \E is ignored */ 145e18e3516Sopenharmony_ci break; 146e18e3516Sopenharmony_ci 147e18e3516Sopenharmony_ci case ESC_Q: 148e18e3516Sopenharmony_ci literal = TRUE; 149e18e3516Sopenharmony_ci break; 150e18e3516Sopenharmony_ci 151e18e3516Sopenharmony_ci default: 152e18e3516Sopenharmony_ci rc = PCRE2_ERROR_BADREPESCAPE; 153e18e3516Sopenharmony_ci goto EXIT; 154e18e3516Sopenharmony_ci } 155e18e3516Sopenharmony_ci } 156e18e3516Sopenharmony_ci } 157e18e3516Sopenharmony_ci 158e18e3516Sopenharmony_circ = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */ 159e18e3516Sopenharmony_ci 160e18e3516Sopenharmony_ciEXIT: 161e18e3516Sopenharmony_ci*ptrptr = ptr; 162e18e3516Sopenharmony_cireturn rc; 163e18e3516Sopenharmony_ci} 164e18e3516Sopenharmony_ci 165e18e3516Sopenharmony_ci 166e18e3516Sopenharmony_ci 167e18e3516Sopenharmony_ci/************************************************* 168e18e3516Sopenharmony_ci* Match and substitute * 169e18e3516Sopenharmony_ci*************************************************/ 170e18e3516Sopenharmony_ci 171e18e3516Sopenharmony_ci/* This function applies a compiled re to a subject string and creates a new 172e18e3516Sopenharmony_cistring with substitutions. The first 7 arguments are the same as for 173e18e3516Sopenharmony_cipcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED. 174e18e3516Sopenharmony_ci 175e18e3516Sopenharmony_ciArguments: 176e18e3516Sopenharmony_ci code points to the compiled expression 177e18e3516Sopenharmony_ci subject points to the subject string 178e18e3516Sopenharmony_ci length length of subject string (may contain binary zeros) 179e18e3516Sopenharmony_ci start_offset where to start in the subject string 180e18e3516Sopenharmony_ci options option bits 181e18e3516Sopenharmony_ci match_data points to a match_data block, or is NULL 182e18e3516Sopenharmony_ci context points a PCRE2 context 183e18e3516Sopenharmony_ci replacement points to the replacement string 184e18e3516Sopenharmony_ci rlength length of replacement string 185e18e3516Sopenharmony_ci buffer where to put the substituted string 186e18e3516Sopenharmony_ci blength points to length of buffer; updated to length of string 187e18e3516Sopenharmony_ci 188e18e3516Sopenharmony_ciReturns: >= 0 number of substitutions made 189e18e3516Sopenharmony_ci < 0 an error code 190e18e3516Sopenharmony_ci PCRE2_ERROR_BADREPLACEMENT means invalid use of $ 191e18e3516Sopenharmony_ci*/ 192e18e3516Sopenharmony_ci 193e18e3516Sopenharmony_ci/* This macro checks for space in the buffer before copying into it. On 194e18e3516Sopenharmony_cioverflow, either give an error immediately, or keep on, accumulating the 195e18e3516Sopenharmony_cilength. */ 196e18e3516Sopenharmony_ci 197e18e3516Sopenharmony_ci#define CHECKMEMCPY(from,length) \ 198e18e3516Sopenharmony_ci { \ 199e18e3516Sopenharmony_ci if (!overflowed && lengthleft < length) \ 200e18e3516Sopenharmony_ci { \ 201e18e3516Sopenharmony_ci if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ 202e18e3516Sopenharmony_ci overflowed = TRUE; \ 203e18e3516Sopenharmony_ci extra_needed = length - lengthleft; \ 204e18e3516Sopenharmony_ci } \ 205e18e3516Sopenharmony_ci else if (overflowed) \ 206e18e3516Sopenharmony_ci { \ 207e18e3516Sopenharmony_ci extra_needed += length; \ 208e18e3516Sopenharmony_ci } \ 209e18e3516Sopenharmony_ci else \ 210e18e3516Sopenharmony_ci { \ 211e18e3516Sopenharmony_ci memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ 212e18e3516Sopenharmony_ci buff_offset += length; \ 213e18e3516Sopenharmony_ci lengthleft -= length; \ 214e18e3516Sopenharmony_ci } \ 215e18e3516Sopenharmony_ci } 216e18e3516Sopenharmony_ci 217e18e3516Sopenharmony_ci/* Here's the function */ 218e18e3516Sopenharmony_ci 219e18e3516Sopenharmony_ciPCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 220e18e3516Sopenharmony_cipcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, 221e18e3516Sopenharmony_ci PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, 222e18e3516Sopenharmony_ci pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, 223e18e3516Sopenharmony_ci PCRE2_UCHAR *buffer, PCRE2_SIZE *blength) 224e18e3516Sopenharmony_ci{ 225e18e3516Sopenharmony_ciint rc; 226e18e3516Sopenharmony_ciint subs; 227e18e3516Sopenharmony_ciint forcecase = 0; 228e18e3516Sopenharmony_ciint forcecasereset = 0; 229e18e3516Sopenharmony_ciuint32_t ovector_count; 230e18e3516Sopenharmony_ciuint32_t goptions = 0; 231e18e3516Sopenharmony_ciuint32_t suboptions; 232e18e3516Sopenharmony_cipcre2_match_data *internal_match_data = NULL; 233e18e3516Sopenharmony_ciBOOL escaped_literal = FALSE; 234e18e3516Sopenharmony_ciBOOL overflowed = FALSE; 235e18e3516Sopenharmony_ciBOOL use_existing_match; 236e18e3516Sopenharmony_ciBOOL replacement_only; 237e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 238e18e3516Sopenharmony_ciBOOL utf = (code->overall_options & PCRE2_UTF) != 0; 239e18e3516Sopenharmony_ciBOOL ucp = (code->overall_options & PCRE2_UCP) != 0; 240e18e3516Sopenharmony_ci#endif 241e18e3516Sopenharmony_ciPCRE2_UCHAR temp[6]; 242e18e3516Sopenharmony_ciPCRE2_SPTR ptr; 243e18e3516Sopenharmony_ciPCRE2_SPTR repend; 244e18e3516Sopenharmony_ciPCRE2_SIZE extra_needed = 0; 245e18e3516Sopenharmony_ciPCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; 246e18e3516Sopenharmony_ciPCRE2_SIZE *ovector; 247e18e3516Sopenharmony_ciPCRE2_SIZE ovecsave[3]; 248e18e3516Sopenharmony_cipcre2_substitute_callout_block scb; 249e18e3516Sopenharmony_ci 250e18e3516Sopenharmony_ci/* General initialization */ 251e18e3516Sopenharmony_ci 252e18e3516Sopenharmony_cibuff_offset = 0; 253e18e3516Sopenharmony_cilengthleft = buff_length = *blength; 254e18e3516Sopenharmony_ci*blength = PCRE2_UNSET; 255e18e3516Sopenharmony_ciovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; 256e18e3516Sopenharmony_ci 257e18e3516Sopenharmony_ci/* Partial matching is not valid. This must come after setting *blength to 258e18e3516Sopenharmony_ciPCRE2_UNSET, so as not to imply an offset in the replacement. */ 259e18e3516Sopenharmony_ci 260e18e3516Sopenharmony_ciif ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) 261e18e3516Sopenharmony_ci return PCRE2_ERROR_BADOPTION; 262e18e3516Sopenharmony_ci 263e18e3516Sopenharmony_ci/* Validate length and find the end of the replacement. A NULL replacement of 264e18e3516Sopenharmony_cizero length is interpreted as an empty string. */ 265e18e3516Sopenharmony_ci 266e18e3516Sopenharmony_ciif (replacement == NULL) 267e18e3516Sopenharmony_ci { 268e18e3516Sopenharmony_ci if (rlength != 0) return PCRE2_ERROR_NULL; 269e18e3516Sopenharmony_ci replacement = (PCRE2_SPTR)""; 270e18e3516Sopenharmony_ci } 271e18e3516Sopenharmony_ci 272e18e3516Sopenharmony_ciif (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); 273e18e3516Sopenharmony_cirepend = replacement + rlength; 274e18e3516Sopenharmony_ci 275e18e3516Sopenharmony_ci/* Check for using a match that has already happened. Note that the subject 276e18e3516Sopenharmony_cipointer in the match data may be NULL after a no-match. */ 277e18e3516Sopenharmony_ci 278e18e3516Sopenharmony_ciuse_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0); 279e18e3516Sopenharmony_cireplacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0); 280e18e3516Sopenharmony_ci 281e18e3516Sopenharmony_ci/* If starting from an existing match, there must be an externally provided 282e18e3516Sopenharmony_cimatch data block. We create an internal match_data block in two cases: (a) an 283e18e3516Sopenharmony_ciexternal one is not supplied (and we are not starting from an existing match); 284e18e3516Sopenharmony_ci(b) an existing match is to be used for the first substitution. In the latter 285e18e3516Sopenharmony_cicase, we copy the existing match into the internal block, except for any cached 286e18e3516Sopenharmony_ciheap frame size and pointer. This ensures that no changes are made to the 287e18e3516Sopenharmony_ciexternal match data block. */ 288e18e3516Sopenharmony_ci 289e18e3516Sopenharmony_ciif (match_data == NULL) 290e18e3516Sopenharmony_ci { 291e18e3516Sopenharmony_ci pcre2_general_context *gcontext; 292e18e3516Sopenharmony_ci if (use_existing_match) return PCRE2_ERROR_NULL; 293e18e3516Sopenharmony_ci gcontext = (mcontext == NULL)? 294e18e3516Sopenharmony_ci (pcre2_general_context *)code : 295e18e3516Sopenharmony_ci (pcre2_general_context *)mcontext; 296e18e3516Sopenharmony_ci match_data = internal_match_data = 297e18e3516Sopenharmony_ci pcre2_match_data_create_from_pattern(code, gcontext); 298e18e3516Sopenharmony_ci if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; 299e18e3516Sopenharmony_ci } 300e18e3516Sopenharmony_ci 301e18e3516Sopenharmony_cielse if (use_existing_match) 302e18e3516Sopenharmony_ci { 303e18e3516Sopenharmony_ci pcre2_general_context *gcontext = (mcontext == NULL)? 304e18e3516Sopenharmony_ci (pcre2_general_context *)code : 305e18e3516Sopenharmony_ci (pcre2_general_context *)mcontext; 306e18e3516Sopenharmony_ci int pairs = (code->top_bracket + 1 < match_data->oveccount)? 307e18e3516Sopenharmony_ci code->top_bracket + 1 : match_data->oveccount; 308e18e3516Sopenharmony_ci internal_match_data = pcre2_match_data_create(match_data->oveccount, 309e18e3516Sopenharmony_ci gcontext); 310e18e3516Sopenharmony_ci if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; 311e18e3516Sopenharmony_ci memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector) 312e18e3516Sopenharmony_ci + 2*pairs*sizeof(PCRE2_SIZE)); 313e18e3516Sopenharmony_ci internal_match_data->heapframes = NULL; 314e18e3516Sopenharmony_ci internal_match_data->heapframes_size = 0; 315e18e3516Sopenharmony_ci match_data = internal_match_data; 316e18e3516Sopenharmony_ci } 317e18e3516Sopenharmony_ci 318e18e3516Sopenharmony_ci/* Remember ovector details */ 319e18e3516Sopenharmony_ci 320e18e3516Sopenharmony_ciovector = pcre2_get_ovector_pointer(match_data); 321e18e3516Sopenharmony_ciovector_count = pcre2_get_ovector_count(match_data); 322e18e3516Sopenharmony_ci 323e18e3516Sopenharmony_ci/* Fixed things in the callout block */ 324e18e3516Sopenharmony_ci 325e18e3516Sopenharmony_ciscb.version = 0; 326e18e3516Sopenharmony_ciscb.input = subject; 327e18e3516Sopenharmony_ciscb.output = (PCRE2_SPTR)buffer; 328e18e3516Sopenharmony_ciscb.ovector = ovector; 329e18e3516Sopenharmony_ci 330e18e3516Sopenharmony_ci/* A NULL subject of zero length is treated as an empty string. */ 331e18e3516Sopenharmony_ci 332e18e3516Sopenharmony_ciif (subject == NULL) 333e18e3516Sopenharmony_ci { 334e18e3516Sopenharmony_ci if (length != 0) return PCRE2_ERROR_NULL; 335e18e3516Sopenharmony_ci subject = (PCRE2_SPTR)""; 336e18e3516Sopenharmony_ci } 337e18e3516Sopenharmony_ci 338e18e3516Sopenharmony_ci/* Find length of zero-terminated subject */ 339e18e3516Sopenharmony_ci 340e18e3516Sopenharmony_ciif (length == PCRE2_ZERO_TERMINATED) 341e18e3516Sopenharmony_ci length = subject? PRIV(strlen)(subject) : 0; 342e18e3516Sopenharmony_ci 343e18e3516Sopenharmony_ci/* Check UTF replacement string if necessary. */ 344e18e3516Sopenharmony_ci 345e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 346e18e3516Sopenharmony_ciif (utf && (options & PCRE2_NO_UTF_CHECK) == 0) 347e18e3516Sopenharmony_ci { 348e18e3516Sopenharmony_ci rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar)); 349e18e3516Sopenharmony_ci if (rc != 0) 350e18e3516Sopenharmony_ci { 351e18e3516Sopenharmony_ci match_data->leftchar = 0; 352e18e3516Sopenharmony_ci goto EXIT; 353e18e3516Sopenharmony_ci } 354e18e3516Sopenharmony_ci } 355e18e3516Sopenharmony_ci#endif /* SUPPORT_UNICODE */ 356e18e3516Sopenharmony_ci 357e18e3516Sopenharmony_ci/* Save the substitute options and remove them from the match options. */ 358e18e3516Sopenharmony_ci 359e18e3516Sopenharmony_cisuboptions = options & SUBSTITUTE_OPTIONS; 360e18e3516Sopenharmony_cioptions &= ~SUBSTITUTE_OPTIONS; 361e18e3516Sopenharmony_ci 362e18e3516Sopenharmony_ci/* Error if the start match offset is greater than the length of the subject. */ 363e18e3516Sopenharmony_ci 364e18e3516Sopenharmony_ciif (start_offset > length) 365e18e3516Sopenharmony_ci { 366e18e3516Sopenharmony_ci match_data->leftchar = 0; 367e18e3516Sopenharmony_ci rc = PCRE2_ERROR_BADOFFSET; 368e18e3516Sopenharmony_ci goto EXIT; 369e18e3516Sopenharmony_ci } 370e18e3516Sopenharmony_ci 371e18e3516Sopenharmony_ci/* Copy up to the start offset, unless only the replacement is required. */ 372e18e3516Sopenharmony_ci 373e18e3516Sopenharmony_ciif (!replacement_only) CHECKMEMCPY(subject, start_offset); 374e18e3516Sopenharmony_ci 375e18e3516Sopenharmony_ci/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first 376e18e3516Sopenharmony_cimatch is taken from the match_data that was passed in. */ 377e18e3516Sopenharmony_ci 378e18e3516Sopenharmony_cisubs = 0; 379e18e3516Sopenharmony_cido 380e18e3516Sopenharmony_ci { 381e18e3516Sopenharmony_ci PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; 382e18e3516Sopenharmony_ci uint32_t ptrstackptr = 0; 383e18e3516Sopenharmony_ci 384e18e3516Sopenharmony_ci if (use_existing_match) 385e18e3516Sopenharmony_ci { 386e18e3516Sopenharmony_ci rc = match_data->rc; 387e18e3516Sopenharmony_ci use_existing_match = FALSE; 388e18e3516Sopenharmony_ci } 389e18e3516Sopenharmony_ci else rc = pcre2_match(code, subject, length, start_offset, options|goptions, 390e18e3516Sopenharmony_ci match_data, mcontext); 391e18e3516Sopenharmony_ci 392e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 393e18e3516Sopenharmony_ci if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ 394e18e3516Sopenharmony_ci#endif 395e18e3516Sopenharmony_ci 396e18e3516Sopenharmony_ci /* Any error other than no match returns the error code. No match when not 397e18e3516Sopenharmony_ci doing the special after-empty-match global rematch, or when at the end of the 398e18e3516Sopenharmony_ci subject, breaks the global loop. Otherwise, advance the starting point by one 399e18e3516Sopenharmony_ci character, copying it to the output, and try again. */ 400e18e3516Sopenharmony_ci 401e18e3516Sopenharmony_ci if (rc < 0) 402e18e3516Sopenharmony_ci { 403e18e3516Sopenharmony_ci PCRE2_SIZE save_start; 404e18e3516Sopenharmony_ci 405e18e3516Sopenharmony_ci if (rc != PCRE2_ERROR_NOMATCH) goto EXIT; 406e18e3516Sopenharmony_ci if (goptions == 0 || start_offset >= length) break; 407e18e3516Sopenharmony_ci 408e18e3516Sopenharmony_ci /* Advance by one code point. Then, if CRLF is a valid newline sequence and 409e18e3516Sopenharmony_ci we have advanced into the middle of it, advance one more code point. In 410e18e3516Sopenharmony_ci other words, do not start in the middle of CRLF, even if CR and LF on their 411e18e3516Sopenharmony_ci own are valid newlines. */ 412e18e3516Sopenharmony_ci 413e18e3516Sopenharmony_ci save_start = start_offset++; 414e18e3516Sopenharmony_ci if (subject[start_offset-1] == CHAR_CR && 415e18e3516Sopenharmony_ci code->newline_convention != PCRE2_NEWLINE_CR && 416e18e3516Sopenharmony_ci code->newline_convention != PCRE2_NEWLINE_LF && 417e18e3516Sopenharmony_ci start_offset < length && 418e18e3516Sopenharmony_ci subject[start_offset] == CHAR_LF) 419e18e3516Sopenharmony_ci start_offset++; 420e18e3516Sopenharmony_ci 421e18e3516Sopenharmony_ci /* Otherwise, in UTF mode, advance past any secondary code points. */ 422e18e3516Sopenharmony_ci 423e18e3516Sopenharmony_ci else if ((code->overall_options & PCRE2_UTF) != 0) 424e18e3516Sopenharmony_ci { 425e18e3516Sopenharmony_ci#if PCRE2_CODE_UNIT_WIDTH == 8 426e18e3516Sopenharmony_ci while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80) 427e18e3516Sopenharmony_ci start_offset++; 428e18e3516Sopenharmony_ci#elif PCRE2_CODE_UNIT_WIDTH == 16 429e18e3516Sopenharmony_ci while (start_offset < length && 430e18e3516Sopenharmony_ci (subject[start_offset] & 0xfc00) == 0xdc00) 431e18e3516Sopenharmony_ci start_offset++; 432e18e3516Sopenharmony_ci#endif 433e18e3516Sopenharmony_ci } 434e18e3516Sopenharmony_ci 435e18e3516Sopenharmony_ci /* Copy what we have advanced past (unless not required), reset the special 436e18e3516Sopenharmony_ci global options, and continue to the next match. */ 437e18e3516Sopenharmony_ci 438e18e3516Sopenharmony_ci fraglength = start_offset - save_start; 439e18e3516Sopenharmony_ci if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength); 440e18e3516Sopenharmony_ci goptions = 0; 441e18e3516Sopenharmony_ci continue; 442e18e3516Sopenharmony_ci } 443e18e3516Sopenharmony_ci 444e18e3516Sopenharmony_ci /* Handle a successful match. Matches that use \K to end before they start 445e18e3516Sopenharmony_ci or start before the current point in the subject are not supported. */ 446e18e3516Sopenharmony_ci 447e18e3516Sopenharmony_ci if (ovector[1] < ovector[0] || ovector[0] < start_offset) 448e18e3516Sopenharmony_ci { 449e18e3516Sopenharmony_ci rc = PCRE2_ERROR_BADSUBSPATTERN; 450e18e3516Sopenharmony_ci goto EXIT; 451e18e3516Sopenharmony_ci } 452e18e3516Sopenharmony_ci 453e18e3516Sopenharmony_ci /* Check for the same match as previous. This is legitimate after matching an 454e18e3516Sopenharmony_ci empty string that starts after the initial match offset. We have tried again 455e18e3516Sopenharmony_ci at the match point in case the pattern is one like /(?<=\G.)/ which can never 456e18e3516Sopenharmony_ci match at its starting point, so running the match achieves the bumpalong. If 457e18e3516Sopenharmony_ci we do get the same (null) match at the original match point, it isn't such a 458e18e3516Sopenharmony_ci pattern, so we now do the empty string magic. In all other cases, a repeat 459e18e3516Sopenharmony_ci match should never occur. */ 460e18e3516Sopenharmony_ci 461e18e3516Sopenharmony_ci if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) 462e18e3516Sopenharmony_ci { 463e18e3516Sopenharmony_ci if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) 464e18e3516Sopenharmony_ci { 465e18e3516Sopenharmony_ci goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; 466e18e3516Sopenharmony_ci ovecsave[2] = start_offset; 467e18e3516Sopenharmony_ci continue; /* Back to the top of the loop */ 468e18e3516Sopenharmony_ci } 469e18e3516Sopenharmony_ci rc = PCRE2_ERROR_INTERNAL_DUPMATCH; 470e18e3516Sopenharmony_ci goto EXIT; 471e18e3516Sopenharmony_ci } 472e18e3516Sopenharmony_ci 473e18e3516Sopenharmony_ci /* Count substitutions with a paranoid check for integer overflow; surely no 474e18e3516Sopenharmony_ci real call to this function would ever hit this! */ 475e18e3516Sopenharmony_ci 476e18e3516Sopenharmony_ci if (subs == INT_MAX) 477e18e3516Sopenharmony_ci { 478e18e3516Sopenharmony_ci rc = PCRE2_ERROR_TOOMANYREPLACE; 479e18e3516Sopenharmony_ci goto EXIT; 480e18e3516Sopenharmony_ci } 481e18e3516Sopenharmony_ci subs++; 482e18e3516Sopenharmony_ci 483e18e3516Sopenharmony_ci /* Copy the text leading up to the match (unless not required), and remember 484e18e3516Sopenharmony_ci where the insert begins and how many ovector pairs are set. */ 485e18e3516Sopenharmony_ci 486e18e3516Sopenharmony_ci if (rc == 0) rc = ovector_count; 487e18e3516Sopenharmony_ci fraglength = ovector[0] - start_offset; 488e18e3516Sopenharmony_ci if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength); 489e18e3516Sopenharmony_ci scb.output_offsets[0] = buff_offset; 490e18e3516Sopenharmony_ci scb.oveccount = rc; 491e18e3516Sopenharmony_ci 492e18e3516Sopenharmony_ci /* Process the replacement string. If the entire replacement is literal, just 493e18e3516Sopenharmony_ci copy it with length check. */ 494e18e3516Sopenharmony_ci 495e18e3516Sopenharmony_ci ptr = replacement; 496e18e3516Sopenharmony_ci if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0) 497e18e3516Sopenharmony_ci { 498e18e3516Sopenharmony_ci CHECKMEMCPY(ptr, rlength); 499e18e3516Sopenharmony_ci } 500e18e3516Sopenharmony_ci 501e18e3516Sopenharmony_ci /* Within a non-literal replacement, which must be scanned character by 502e18e3516Sopenharmony_ci character, local literal mode can be set by \Q, but only in extended mode 503e18e3516Sopenharmony_ci when backslashes are being interpreted. In extended mode we must handle 504e18e3516Sopenharmony_ci nested substrings that are to be reprocessed. */ 505e18e3516Sopenharmony_ci 506e18e3516Sopenharmony_ci else for (;;) 507e18e3516Sopenharmony_ci { 508e18e3516Sopenharmony_ci uint32_t ch; 509e18e3516Sopenharmony_ci unsigned int chlen; 510e18e3516Sopenharmony_ci 511e18e3516Sopenharmony_ci /* If at the end of a nested substring, pop the stack. */ 512e18e3516Sopenharmony_ci 513e18e3516Sopenharmony_ci if (ptr >= repend) 514e18e3516Sopenharmony_ci { 515e18e3516Sopenharmony_ci if (ptrstackptr == 0) break; /* End of replacement string */ 516e18e3516Sopenharmony_ci repend = ptrstack[--ptrstackptr]; 517e18e3516Sopenharmony_ci ptr = ptrstack[--ptrstackptr]; 518e18e3516Sopenharmony_ci continue; 519e18e3516Sopenharmony_ci } 520e18e3516Sopenharmony_ci 521e18e3516Sopenharmony_ci /* Handle the next character */ 522e18e3516Sopenharmony_ci 523e18e3516Sopenharmony_ci if (escaped_literal) 524e18e3516Sopenharmony_ci { 525e18e3516Sopenharmony_ci if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E) 526e18e3516Sopenharmony_ci { 527e18e3516Sopenharmony_ci escaped_literal = FALSE; 528e18e3516Sopenharmony_ci ptr += 2; 529e18e3516Sopenharmony_ci continue; 530e18e3516Sopenharmony_ci } 531e18e3516Sopenharmony_ci goto LOADLITERAL; 532e18e3516Sopenharmony_ci } 533e18e3516Sopenharmony_ci 534e18e3516Sopenharmony_ci /* Not in literal mode. */ 535e18e3516Sopenharmony_ci 536e18e3516Sopenharmony_ci if (*ptr == CHAR_DOLLAR_SIGN) 537e18e3516Sopenharmony_ci { 538e18e3516Sopenharmony_ci int group, n; 539e18e3516Sopenharmony_ci uint32_t special = 0; 540e18e3516Sopenharmony_ci BOOL inparens; 541e18e3516Sopenharmony_ci BOOL star; 542e18e3516Sopenharmony_ci PCRE2_SIZE sublength; 543e18e3516Sopenharmony_ci PCRE2_SPTR text1_start = NULL; 544e18e3516Sopenharmony_ci PCRE2_SPTR text1_end = NULL; 545e18e3516Sopenharmony_ci PCRE2_SPTR text2_start = NULL; 546e18e3516Sopenharmony_ci PCRE2_SPTR text2_end = NULL; 547e18e3516Sopenharmony_ci PCRE2_UCHAR next; 548e18e3516Sopenharmony_ci PCRE2_UCHAR name[33]; 549e18e3516Sopenharmony_ci 550e18e3516Sopenharmony_ci if (++ptr >= repend) goto BAD; 551e18e3516Sopenharmony_ci if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; 552e18e3516Sopenharmony_ci 553e18e3516Sopenharmony_ci group = -1; 554e18e3516Sopenharmony_ci n = 0; 555e18e3516Sopenharmony_ci inparens = FALSE; 556e18e3516Sopenharmony_ci star = FALSE; 557e18e3516Sopenharmony_ci 558e18e3516Sopenharmony_ci if (next == CHAR_LEFT_CURLY_BRACKET) 559e18e3516Sopenharmony_ci { 560e18e3516Sopenharmony_ci if (++ptr >= repend) goto BAD; 561e18e3516Sopenharmony_ci next = *ptr; 562e18e3516Sopenharmony_ci inparens = TRUE; 563e18e3516Sopenharmony_ci } 564e18e3516Sopenharmony_ci 565e18e3516Sopenharmony_ci if (next == CHAR_ASTERISK) 566e18e3516Sopenharmony_ci { 567e18e3516Sopenharmony_ci if (++ptr >= repend) goto BAD; 568e18e3516Sopenharmony_ci next = *ptr; 569e18e3516Sopenharmony_ci star = TRUE; 570e18e3516Sopenharmony_ci } 571e18e3516Sopenharmony_ci 572e18e3516Sopenharmony_ci if (!star && next >= CHAR_0 && next <= CHAR_9) 573e18e3516Sopenharmony_ci { 574e18e3516Sopenharmony_ci group = next - CHAR_0; 575e18e3516Sopenharmony_ci while (++ptr < repend) 576e18e3516Sopenharmony_ci { 577e18e3516Sopenharmony_ci next = *ptr; 578e18e3516Sopenharmony_ci if (next < CHAR_0 || next > CHAR_9) break; 579e18e3516Sopenharmony_ci group = group * 10 + next - CHAR_0; 580e18e3516Sopenharmony_ci 581e18e3516Sopenharmony_ci /* A check for a number greater than the hightest captured group 582e18e3516Sopenharmony_ci is sufficient here; no need for a separate overflow check. If unknown 583e18e3516Sopenharmony_ci groups are to be treated as unset, just skip over any remaining 584e18e3516Sopenharmony_ci digits and carry on. */ 585e18e3516Sopenharmony_ci 586e18e3516Sopenharmony_ci if (group > code->top_bracket) 587e18e3516Sopenharmony_ci { 588e18e3516Sopenharmony_ci if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 589e18e3516Sopenharmony_ci { 590e18e3516Sopenharmony_ci while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9); 591e18e3516Sopenharmony_ci break; 592e18e3516Sopenharmony_ci } 593e18e3516Sopenharmony_ci else 594e18e3516Sopenharmony_ci { 595e18e3516Sopenharmony_ci rc = PCRE2_ERROR_NOSUBSTRING; 596e18e3516Sopenharmony_ci goto PTREXIT; 597e18e3516Sopenharmony_ci } 598e18e3516Sopenharmony_ci } 599e18e3516Sopenharmony_ci } 600e18e3516Sopenharmony_ci } 601e18e3516Sopenharmony_ci else 602e18e3516Sopenharmony_ci { 603e18e3516Sopenharmony_ci const uint8_t *ctypes = code->tables + ctypes_offset; 604e18e3516Sopenharmony_ci while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) 605e18e3516Sopenharmony_ci { 606e18e3516Sopenharmony_ci name[n++] = next; 607e18e3516Sopenharmony_ci if (n > 32) goto BAD; 608e18e3516Sopenharmony_ci if (++ptr >= repend) break; 609e18e3516Sopenharmony_ci next = *ptr; 610e18e3516Sopenharmony_ci } 611e18e3516Sopenharmony_ci if (n == 0) goto BAD; 612e18e3516Sopenharmony_ci name[n] = 0; 613e18e3516Sopenharmony_ci } 614e18e3516Sopenharmony_ci 615e18e3516Sopenharmony_ci /* In extended mode we recognize ${name:+set text:unset text} and 616e18e3516Sopenharmony_ci ${name:-default text}. */ 617e18e3516Sopenharmony_ci 618e18e3516Sopenharmony_ci if (inparens) 619e18e3516Sopenharmony_ci { 620e18e3516Sopenharmony_ci if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 621e18e3516Sopenharmony_ci !star && ptr < repend - 2 && next == CHAR_COLON) 622e18e3516Sopenharmony_ci { 623e18e3516Sopenharmony_ci special = *(++ptr); 624e18e3516Sopenharmony_ci if (special != CHAR_PLUS && special != CHAR_MINUS) 625e18e3516Sopenharmony_ci { 626e18e3516Sopenharmony_ci rc = PCRE2_ERROR_BADSUBSTITUTION; 627e18e3516Sopenharmony_ci goto PTREXIT; 628e18e3516Sopenharmony_ci } 629e18e3516Sopenharmony_ci 630e18e3516Sopenharmony_ci text1_start = ++ptr; 631e18e3516Sopenharmony_ci rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS); 632e18e3516Sopenharmony_ci if (rc != 0) goto PTREXIT; 633e18e3516Sopenharmony_ci text1_end = ptr; 634e18e3516Sopenharmony_ci 635e18e3516Sopenharmony_ci if (special == CHAR_PLUS && *ptr == CHAR_COLON) 636e18e3516Sopenharmony_ci { 637e18e3516Sopenharmony_ci text2_start = ++ptr; 638e18e3516Sopenharmony_ci rc = find_text_end(code, &ptr, repend, TRUE); 639e18e3516Sopenharmony_ci if (rc != 0) goto PTREXIT; 640e18e3516Sopenharmony_ci text2_end = ptr; 641e18e3516Sopenharmony_ci } 642e18e3516Sopenharmony_ci } 643e18e3516Sopenharmony_ci 644e18e3516Sopenharmony_ci else 645e18e3516Sopenharmony_ci { 646e18e3516Sopenharmony_ci if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET) 647e18e3516Sopenharmony_ci { 648e18e3516Sopenharmony_ci rc = PCRE2_ERROR_REPMISSINGBRACE; 649e18e3516Sopenharmony_ci goto PTREXIT; 650e18e3516Sopenharmony_ci } 651e18e3516Sopenharmony_ci } 652e18e3516Sopenharmony_ci 653e18e3516Sopenharmony_ci ptr++; 654e18e3516Sopenharmony_ci } 655e18e3516Sopenharmony_ci 656e18e3516Sopenharmony_ci /* Have found a syntactically correct group number or name, or *name. 657e18e3516Sopenharmony_ci Only *MARK is currently recognized. */ 658e18e3516Sopenharmony_ci 659e18e3516Sopenharmony_ci if (star) 660e18e3516Sopenharmony_ci { 661e18e3516Sopenharmony_ci if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) 662e18e3516Sopenharmony_ci { 663e18e3516Sopenharmony_ci PCRE2_SPTR mark = pcre2_get_mark(match_data); 664e18e3516Sopenharmony_ci if (mark != NULL) 665e18e3516Sopenharmony_ci { 666e18e3516Sopenharmony_ci PCRE2_SPTR mark_start = mark; 667e18e3516Sopenharmony_ci while (*mark != 0) mark++; 668e18e3516Sopenharmony_ci fraglength = mark - mark_start; 669e18e3516Sopenharmony_ci CHECKMEMCPY(mark_start, fraglength); 670e18e3516Sopenharmony_ci } 671e18e3516Sopenharmony_ci } 672e18e3516Sopenharmony_ci else goto BAD; 673e18e3516Sopenharmony_ci } 674e18e3516Sopenharmony_ci 675e18e3516Sopenharmony_ci /* Substitute the contents of a group. We don't use substring_copy 676e18e3516Sopenharmony_ci functions any more, in order to support case forcing. */ 677e18e3516Sopenharmony_ci 678e18e3516Sopenharmony_ci else 679e18e3516Sopenharmony_ci { 680e18e3516Sopenharmony_ci PCRE2_SPTR subptr, subptrend; 681e18e3516Sopenharmony_ci 682e18e3516Sopenharmony_ci /* Find a number for a named group. In case there are duplicate names, 683e18e3516Sopenharmony_ci search for the first one that is set. If the name is not found when 684e18e3516Sopenharmony_ci PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a 685e18e3516Sopenharmony_ci non-existent group. */ 686e18e3516Sopenharmony_ci 687e18e3516Sopenharmony_ci if (group < 0) 688e18e3516Sopenharmony_ci { 689e18e3516Sopenharmony_ci PCRE2_SPTR first, last, entry; 690e18e3516Sopenharmony_ci rc = pcre2_substring_nametable_scan(code, name, &first, &last); 691e18e3516Sopenharmony_ci if (rc == PCRE2_ERROR_NOSUBSTRING && 692e18e3516Sopenharmony_ci (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 693e18e3516Sopenharmony_ci { 694e18e3516Sopenharmony_ci group = code->top_bracket + 1; 695e18e3516Sopenharmony_ci } 696e18e3516Sopenharmony_ci else 697e18e3516Sopenharmony_ci { 698e18e3516Sopenharmony_ci if (rc < 0) goto PTREXIT; 699e18e3516Sopenharmony_ci for (entry = first; entry <= last; entry += rc) 700e18e3516Sopenharmony_ci { 701e18e3516Sopenharmony_ci uint32_t ng = GET2(entry, 0); 702e18e3516Sopenharmony_ci if (ng < ovector_count) 703e18e3516Sopenharmony_ci { 704e18e3516Sopenharmony_ci if (group < 0) group = ng; /* First in ovector */ 705e18e3516Sopenharmony_ci if (ovector[ng*2] != PCRE2_UNSET) 706e18e3516Sopenharmony_ci { 707e18e3516Sopenharmony_ci group = ng; /* First that is set */ 708e18e3516Sopenharmony_ci break; 709e18e3516Sopenharmony_ci } 710e18e3516Sopenharmony_ci } 711e18e3516Sopenharmony_ci } 712e18e3516Sopenharmony_ci 713e18e3516Sopenharmony_ci /* If group is still negative, it means we did not find a group 714e18e3516Sopenharmony_ci that is in the ovector. Just set the first group. */ 715e18e3516Sopenharmony_ci 716e18e3516Sopenharmony_ci if (group < 0) group = GET2(first, 0); 717e18e3516Sopenharmony_ci } 718e18e3516Sopenharmony_ci } 719e18e3516Sopenharmony_ci 720e18e3516Sopenharmony_ci /* We now have a group that is identified by number. Find the length of 721e18e3516Sopenharmony_ci the captured string. If a group in a non-special substitution is unset 722e18e3516Sopenharmony_ci when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */ 723e18e3516Sopenharmony_ci 724e18e3516Sopenharmony_ci rc = pcre2_substring_length_bynumber(match_data, group, &sublength); 725e18e3516Sopenharmony_ci if (rc < 0) 726e18e3516Sopenharmony_ci { 727e18e3516Sopenharmony_ci if (rc == PCRE2_ERROR_NOSUBSTRING && 728e18e3516Sopenharmony_ci (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 729e18e3516Sopenharmony_ci { 730e18e3516Sopenharmony_ci rc = PCRE2_ERROR_UNSET; 731e18e3516Sopenharmony_ci } 732e18e3516Sopenharmony_ci if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */ 733e18e3516Sopenharmony_ci if (special == 0) /* Plain substitution */ 734e18e3516Sopenharmony_ci { 735e18e3516Sopenharmony_ci if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue; 736e18e3516Sopenharmony_ci goto PTREXIT; /* Else error */ 737e18e3516Sopenharmony_ci } 738e18e3516Sopenharmony_ci } 739e18e3516Sopenharmony_ci 740e18e3516Sopenharmony_ci /* If special is '+' we have a 'set' and possibly an 'unset' text, 741e18e3516Sopenharmony_ci both of which are reprocessed when used. If special is '-' we have a 742e18e3516Sopenharmony_ci default text for when the group is unset; it must be reprocessed. */ 743e18e3516Sopenharmony_ci 744e18e3516Sopenharmony_ci if (special != 0) 745e18e3516Sopenharmony_ci { 746e18e3516Sopenharmony_ci if (special == CHAR_MINUS) 747e18e3516Sopenharmony_ci { 748e18e3516Sopenharmony_ci if (rc == 0) goto LITERAL_SUBSTITUTE; 749e18e3516Sopenharmony_ci text2_start = text1_start; 750e18e3516Sopenharmony_ci text2_end = text1_end; 751e18e3516Sopenharmony_ci } 752e18e3516Sopenharmony_ci 753e18e3516Sopenharmony_ci if (ptrstackptr >= PTR_STACK_SIZE) goto BAD; 754e18e3516Sopenharmony_ci ptrstack[ptrstackptr++] = ptr; 755e18e3516Sopenharmony_ci ptrstack[ptrstackptr++] = repend; 756e18e3516Sopenharmony_ci 757e18e3516Sopenharmony_ci if (rc == 0) 758e18e3516Sopenharmony_ci { 759e18e3516Sopenharmony_ci ptr = text1_start; 760e18e3516Sopenharmony_ci repend = text1_end; 761e18e3516Sopenharmony_ci } 762e18e3516Sopenharmony_ci else 763e18e3516Sopenharmony_ci { 764e18e3516Sopenharmony_ci ptr = text2_start; 765e18e3516Sopenharmony_ci repend = text2_end; 766e18e3516Sopenharmony_ci } 767e18e3516Sopenharmony_ci continue; 768e18e3516Sopenharmony_ci } 769e18e3516Sopenharmony_ci 770e18e3516Sopenharmony_ci /* Otherwise we have a literal substitution of a group's contents. */ 771e18e3516Sopenharmony_ci 772e18e3516Sopenharmony_ci LITERAL_SUBSTITUTE: 773e18e3516Sopenharmony_ci subptr = subject + ovector[group*2]; 774e18e3516Sopenharmony_ci subptrend = subject + ovector[group*2 + 1]; 775e18e3516Sopenharmony_ci 776e18e3516Sopenharmony_ci /* Substitute a literal string, possibly forcing alphabetic case. */ 777e18e3516Sopenharmony_ci 778e18e3516Sopenharmony_ci while (subptr < subptrend) 779e18e3516Sopenharmony_ci { 780e18e3516Sopenharmony_ci GETCHARINCTEST(ch, subptr); 781e18e3516Sopenharmony_ci if (forcecase != 0) 782e18e3516Sopenharmony_ci { 783e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 784e18e3516Sopenharmony_ci if (utf || ucp) 785e18e3516Sopenharmony_ci { 786e18e3516Sopenharmony_ci uint32_t type = UCD_CHARTYPE(ch); 787e18e3516Sopenharmony_ci if (PRIV(ucp_gentype)[type] == ucp_L && 788e18e3516Sopenharmony_ci type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 789e18e3516Sopenharmony_ci ch = UCD_OTHERCASE(ch); 790e18e3516Sopenharmony_ci } 791e18e3516Sopenharmony_ci else 792e18e3516Sopenharmony_ci#endif 793e18e3516Sopenharmony_ci { 794e18e3516Sopenharmony_ci if (((code->tables + cbits_offset + 795e18e3516Sopenharmony_ci ((forcecase > 0)? cbit_upper:cbit_lower) 796e18e3516Sopenharmony_ci )[ch/8] & (1u << (ch%8))) == 0) 797e18e3516Sopenharmony_ci ch = (code->tables + fcc_offset)[ch]; 798e18e3516Sopenharmony_ci } 799e18e3516Sopenharmony_ci forcecase = forcecasereset; 800e18e3516Sopenharmony_ci } 801e18e3516Sopenharmony_ci 802e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 803e18e3516Sopenharmony_ci if (utf) chlen = PRIV(ord2utf)(ch, temp); else 804e18e3516Sopenharmony_ci#endif 805e18e3516Sopenharmony_ci { 806e18e3516Sopenharmony_ci temp[0] = ch; 807e18e3516Sopenharmony_ci chlen = 1; 808e18e3516Sopenharmony_ci } 809e18e3516Sopenharmony_ci CHECKMEMCPY(temp, chlen); 810e18e3516Sopenharmony_ci } 811e18e3516Sopenharmony_ci } 812e18e3516Sopenharmony_ci } 813e18e3516Sopenharmony_ci 814e18e3516Sopenharmony_ci /* Handle an escape sequence in extended mode. We can use check_escape() 815e18e3516Sopenharmony_ci to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but 816e18e3516Sopenharmony_ci the case-forcing escapes are not supported in pcre2_compile() so must be 817e18e3516Sopenharmony_ci recognized here. */ 818e18e3516Sopenharmony_ci 819e18e3516Sopenharmony_ci else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 820e18e3516Sopenharmony_ci *ptr == CHAR_BACKSLASH) 821e18e3516Sopenharmony_ci { 822e18e3516Sopenharmony_ci int errorcode; 823e18e3516Sopenharmony_ci 824e18e3516Sopenharmony_ci if (ptr < repend - 1) switch (ptr[1]) 825e18e3516Sopenharmony_ci { 826e18e3516Sopenharmony_ci case CHAR_L: 827e18e3516Sopenharmony_ci forcecase = forcecasereset = -1; 828e18e3516Sopenharmony_ci ptr += 2; 829e18e3516Sopenharmony_ci continue; 830e18e3516Sopenharmony_ci 831e18e3516Sopenharmony_ci case CHAR_l: 832e18e3516Sopenharmony_ci forcecase = -1; 833e18e3516Sopenharmony_ci forcecasereset = 0; 834e18e3516Sopenharmony_ci ptr += 2; 835e18e3516Sopenharmony_ci continue; 836e18e3516Sopenharmony_ci 837e18e3516Sopenharmony_ci case CHAR_U: 838e18e3516Sopenharmony_ci forcecase = forcecasereset = 1; 839e18e3516Sopenharmony_ci ptr += 2; 840e18e3516Sopenharmony_ci continue; 841e18e3516Sopenharmony_ci 842e18e3516Sopenharmony_ci case CHAR_u: 843e18e3516Sopenharmony_ci forcecase = 1; 844e18e3516Sopenharmony_ci forcecasereset = 0; 845e18e3516Sopenharmony_ci ptr += 2; 846e18e3516Sopenharmony_ci continue; 847e18e3516Sopenharmony_ci 848e18e3516Sopenharmony_ci default: 849e18e3516Sopenharmony_ci break; 850e18e3516Sopenharmony_ci } 851e18e3516Sopenharmony_ci 852e18e3516Sopenharmony_ci ptr++; /* Point after \ */ 853e18e3516Sopenharmony_ci rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, 854e18e3516Sopenharmony_ci code->overall_options, code->extra_options, FALSE, NULL); 855e18e3516Sopenharmony_ci if (errorcode != 0) goto BADESCAPE; 856e18e3516Sopenharmony_ci 857e18e3516Sopenharmony_ci switch(rc) 858e18e3516Sopenharmony_ci { 859e18e3516Sopenharmony_ci case ESC_E: 860e18e3516Sopenharmony_ci forcecase = forcecasereset = 0; 861e18e3516Sopenharmony_ci continue; 862e18e3516Sopenharmony_ci 863e18e3516Sopenharmony_ci case ESC_Q: 864e18e3516Sopenharmony_ci escaped_literal = TRUE; 865e18e3516Sopenharmony_ci continue; 866e18e3516Sopenharmony_ci 867e18e3516Sopenharmony_ci case 0: /* Data character */ 868e18e3516Sopenharmony_ci goto LITERAL; 869e18e3516Sopenharmony_ci 870e18e3516Sopenharmony_ci default: 871e18e3516Sopenharmony_ci goto BADESCAPE; 872e18e3516Sopenharmony_ci } 873e18e3516Sopenharmony_ci } 874e18e3516Sopenharmony_ci 875e18e3516Sopenharmony_ci /* Handle a literal code unit */ 876e18e3516Sopenharmony_ci 877e18e3516Sopenharmony_ci else 878e18e3516Sopenharmony_ci { 879e18e3516Sopenharmony_ci LOADLITERAL: 880e18e3516Sopenharmony_ci GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ 881e18e3516Sopenharmony_ci 882e18e3516Sopenharmony_ci LITERAL: 883e18e3516Sopenharmony_ci if (forcecase != 0) 884e18e3516Sopenharmony_ci { 885e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 886e18e3516Sopenharmony_ci if (utf || ucp) 887e18e3516Sopenharmony_ci { 888e18e3516Sopenharmony_ci uint32_t type = UCD_CHARTYPE(ch); 889e18e3516Sopenharmony_ci if (PRIV(ucp_gentype)[type] == ucp_L && 890e18e3516Sopenharmony_ci type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 891e18e3516Sopenharmony_ci ch = UCD_OTHERCASE(ch); 892e18e3516Sopenharmony_ci } 893e18e3516Sopenharmony_ci else 894e18e3516Sopenharmony_ci#endif 895e18e3516Sopenharmony_ci { 896e18e3516Sopenharmony_ci if (((code->tables + cbits_offset + 897e18e3516Sopenharmony_ci ((forcecase > 0)? cbit_upper:cbit_lower) 898e18e3516Sopenharmony_ci )[ch/8] & (1u << (ch%8))) == 0) 899e18e3516Sopenharmony_ci ch = (code->tables + fcc_offset)[ch]; 900e18e3516Sopenharmony_ci } 901e18e3516Sopenharmony_ci forcecase = forcecasereset; 902e18e3516Sopenharmony_ci } 903e18e3516Sopenharmony_ci 904e18e3516Sopenharmony_ci#ifdef SUPPORT_UNICODE 905e18e3516Sopenharmony_ci if (utf) chlen = PRIV(ord2utf)(ch, temp); else 906e18e3516Sopenharmony_ci#endif 907e18e3516Sopenharmony_ci { 908e18e3516Sopenharmony_ci temp[0] = ch; 909e18e3516Sopenharmony_ci chlen = 1; 910e18e3516Sopenharmony_ci } 911e18e3516Sopenharmony_ci CHECKMEMCPY(temp, chlen); 912e18e3516Sopenharmony_ci } /* End handling a literal code unit */ 913e18e3516Sopenharmony_ci } /* End of loop for scanning the replacement. */ 914e18e3516Sopenharmony_ci 915e18e3516Sopenharmony_ci /* The replacement has been copied to the output, or its size has been 916e18e3516Sopenharmony_ci remembered. Do the callout if there is one and we have done an actual 917e18e3516Sopenharmony_ci replacement. */ 918e18e3516Sopenharmony_ci 919e18e3516Sopenharmony_ci if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL) 920e18e3516Sopenharmony_ci { 921e18e3516Sopenharmony_ci scb.subscount = subs; 922e18e3516Sopenharmony_ci scb.output_offsets[1] = buff_offset; 923e18e3516Sopenharmony_ci rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); 924e18e3516Sopenharmony_ci 925e18e3516Sopenharmony_ci /* A non-zero return means cancel this substitution. Instead, copy the 926e18e3516Sopenharmony_ci matched string fragment. */ 927e18e3516Sopenharmony_ci 928e18e3516Sopenharmony_ci if (rc != 0) 929e18e3516Sopenharmony_ci { 930e18e3516Sopenharmony_ci PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; 931e18e3516Sopenharmony_ci PCRE2_SIZE oldlength = ovector[1] - ovector[0]; 932e18e3516Sopenharmony_ci 933e18e3516Sopenharmony_ci buff_offset -= newlength; 934e18e3516Sopenharmony_ci lengthleft += newlength; 935e18e3516Sopenharmony_ci if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); 936e18e3516Sopenharmony_ci 937e18e3516Sopenharmony_ci /* A negative return means do not do any more. */ 938e18e3516Sopenharmony_ci 939e18e3516Sopenharmony_ci if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); 940e18e3516Sopenharmony_ci } 941e18e3516Sopenharmony_ci } 942e18e3516Sopenharmony_ci 943e18e3516Sopenharmony_ci /* Save the details of this match. See above for how this data is used. If we 944e18e3516Sopenharmony_ci matched an empty string, do the magic for global matches. Update the start 945e18e3516Sopenharmony_ci offset to point to the rest of the subject string. If we re-used an existing 946e18e3516Sopenharmony_ci match for the first match, switch to the internal match data block. */ 947e18e3516Sopenharmony_ci 948e18e3516Sopenharmony_ci ovecsave[0] = ovector[0]; 949e18e3516Sopenharmony_ci ovecsave[1] = ovector[1]; 950e18e3516Sopenharmony_ci ovecsave[2] = start_offset; 951e18e3516Sopenharmony_ci 952e18e3516Sopenharmony_ci goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : 953e18e3516Sopenharmony_ci PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; 954e18e3516Sopenharmony_ci start_offset = ovector[1]; 955e18e3516Sopenharmony_ci } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ 956e18e3516Sopenharmony_ci 957e18e3516Sopenharmony_ci/* Copy the rest of the subject unless not required, and terminate the output 958e18e3516Sopenharmony_ciwith a binary zero. */ 959e18e3516Sopenharmony_ci 960e18e3516Sopenharmony_ciif (!replacement_only) 961e18e3516Sopenharmony_ci { 962e18e3516Sopenharmony_ci fraglength = length - start_offset; 963e18e3516Sopenharmony_ci CHECKMEMCPY(subject + start_offset, fraglength); 964e18e3516Sopenharmony_ci } 965e18e3516Sopenharmony_ci 966e18e3516Sopenharmony_citemp[0] = 0; 967e18e3516Sopenharmony_ciCHECKMEMCPY(temp, 1); 968e18e3516Sopenharmony_ci 969e18e3516Sopenharmony_ci/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, 970e18e3516Sopenharmony_ciand matching has carried on after a full buffer, in order to compute the length 971e18e3516Sopenharmony_cineeded. Otherwise, an overflow generates an immediate error return. */ 972e18e3516Sopenharmony_ci 973e18e3516Sopenharmony_ciif (overflowed) 974e18e3516Sopenharmony_ci { 975e18e3516Sopenharmony_ci rc = PCRE2_ERROR_NOMEMORY; 976e18e3516Sopenharmony_ci *blength = buff_length + extra_needed; 977e18e3516Sopenharmony_ci } 978e18e3516Sopenharmony_ci 979e18e3516Sopenharmony_ci/* After a successful execution, return the number of substitutions and set the 980e18e3516Sopenharmony_cilength of buffer used, excluding the trailing zero. */ 981e18e3516Sopenharmony_ci 982e18e3516Sopenharmony_cielse 983e18e3516Sopenharmony_ci { 984e18e3516Sopenharmony_ci rc = subs; 985e18e3516Sopenharmony_ci *blength = buff_offset - 1; 986e18e3516Sopenharmony_ci } 987e18e3516Sopenharmony_ci 988e18e3516Sopenharmony_ciEXIT: 989e18e3516Sopenharmony_ciif (internal_match_data != NULL) pcre2_match_data_free(internal_match_data); 990e18e3516Sopenharmony_ci else match_data->rc = rc; 991e18e3516Sopenharmony_cireturn rc; 992e18e3516Sopenharmony_ci 993e18e3516Sopenharmony_ciNOROOM: 994e18e3516Sopenharmony_circ = PCRE2_ERROR_NOMEMORY; 995e18e3516Sopenharmony_cigoto EXIT; 996e18e3516Sopenharmony_ci 997e18e3516Sopenharmony_ciBAD: 998e18e3516Sopenharmony_circ = PCRE2_ERROR_BADREPLACEMENT; 999e18e3516Sopenharmony_cigoto PTREXIT; 1000e18e3516Sopenharmony_ci 1001e18e3516Sopenharmony_ciBADESCAPE: 1002e18e3516Sopenharmony_circ = PCRE2_ERROR_BADREPESCAPE; 1003e18e3516Sopenharmony_ci 1004e18e3516Sopenharmony_ciPTREXIT: 1005e18e3516Sopenharmony_ci*blength = (PCRE2_SIZE)(ptr - replacement); 1006e18e3516Sopenharmony_cigoto EXIT; 1007e18e3516Sopenharmony_ci} 1008e18e3516Sopenharmony_ci 1009e18e3516Sopenharmony_ci/* End of pcre2_substitute.c */ 1010