1425bb815Sopenharmony_ci/* Copyright JS Foundation and other contributors, http://js.foundation 2425bb815Sopenharmony_ci * 3425bb815Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 4425bb815Sopenharmony_ci * you may not use this file except in compliance with the License. 5425bb815Sopenharmony_ci * You may obtain a copy of the License at 6425bb815Sopenharmony_ci * 7425bb815Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 8425bb815Sopenharmony_ci * 9425bb815Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 10425bb815Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS 11425bb815Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12425bb815Sopenharmony_ci * See the License for the specific language governing permissions and 13425bb815Sopenharmony_ci * limitations under the License. 14425bb815Sopenharmony_ci */ 15425bb815Sopenharmony_ci 16425bb815Sopenharmony_ci#include "ecma-exceptions.h" 17425bb815Sopenharmony_ci#include "ecma-globals.h" 18425bb815Sopenharmony_ci#include "ecma-try-catch-macro.h" 19425bb815Sopenharmony_ci#include "jcontext.h" 20425bb815Sopenharmony_ci#include "jrt-libc-includes.h" 21425bb815Sopenharmony_ci#include "lit-char-helpers.h" 22425bb815Sopenharmony_ci#include "re-compiler.h" 23425bb815Sopenharmony_ci#include "re-parser.h" 24425bb815Sopenharmony_ci 25425bb815Sopenharmony_ci#if ENABLED (JERRY_BUILTIN_REGEXP) 26425bb815Sopenharmony_ci 27425bb815Sopenharmony_ci/** \addtogroup parser Parser 28425bb815Sopenharmony_ci * @{ 29425bb815Sopenharmony_ci * 30425bb815Sopenharmony_ci * \addtogroup regexparser Regular expression 31425bb815Sopenharmony_ci * @{ 32425bb815Sopenharmony_ci * 33425bb815Sopenharmony_ci * \addtogroup regexparser_parser Parser 34425bb815Sopenharmony_ci * @{ 35425bb815Sopenharmony_ci */ 36425bb815Sopenharmony_ci 37425bb815Sopenharmony_ci/** 38425bb815Sopenharmony_ci * Get the start opcode for the current group. 39425bb815Sopenharmony_ci * 40425bb815Sopenharmony_ci * @return RegExp opcode 41425bb815Sopenharmony_ci */ 42425bb815Sopenharmony_cistatic re_opcode_t 43425bb815Sopenharmony_cire_get_group_start_opcode (bool is_capturing) /**< is capturing group */ 44425bb815Sopenharmony_ci{ 45425bb815Sopenharmony_ci return (is_capturing) ? RE_OP_CAPTURING_GROUP_START : RE_OP_NON_CAPTURING_GROUP_START; 46425bb815Sopenharmony_ci} /* re_get_group_start_opcode*/ 47425bb815Sopenharmony_ci 48425bb815Sopenharmony_ci/** 49425bb815Sopenharmony_ci * Get the end opcode for the current group. 50425bb815Sopenharmony_ci * 51425bb815Sopenharmony_ci * @return RegExp opcode 52425bb815Sopenharmony_ci */ 53425bb815Sopenharmony_cistatic re_opcode_t 54425bb815Sopenharmony_cire_get_group_end_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 55425bb815Sopenharmony_ci bool is_capturing) /**< is capturing group */ 56425bb815Sopenharmony_ci{ 57425bb815Sopenharmony_ci if (is_capturing) 58425bb815Sopenharmony_ci { 59425bb815Sopenharmony_ci if (re_ctx_p->token.greedy) 60425bb815Sopenharmony_ci { 61425bb815Sopenharmony_ci return RE_OP_GREEDY_CAPTURING_GROUP_END; 62425bb815Sopenharmony_ci } 63425bb815Sopenharmony_ci 64425bb815Sopenharmony_ci return RE_OP_LAZY_CAPTURING_GROUP_END; 65425bb815Sopenharmony_ci } 66425bb815Sopenharmony_ci 67425bb815Sopenharmony_ci if (re_ctx_p->token.greedy) 68425bb815Sopenharmony_ci { 69425bb815Sopenharmony_ci return RE_OP_GREEDY_NON_CAPTURING_GROUP_END; 70425bb815Sopenharmony_ci } 71425bb815Sopenharmony_ci 72425bb815Sopenharmony_ci return RE_OP_LAZY_NON_CAPTURING_GROUP_END; 73425bb815Sopenharmony_ci} /* re_get_group_end_opcode */ 74425bb815Sopenharmony_ci 75425bb815Sopenharmony_ci/** 76425bb815Sopenharmony_ci * Enclose the given bytecode to a group. 77425bb815Sopenharmony_ci */ 78425bb815Sopenharmony_cistatic void 79425bb815Sopenharmony_cire_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 80425bb815Sopenharmony_ci uint32_t group_start_offset, /**< offset of group start */ 81425bb815Sopenharmony_ci uint32_t idx, /**< index of group */ 82425bb815Sopenharmony_ci uint32_t capture_start, /**< index of first nested capture */ 83425bb815Sopenharmony_ci bool is_capturing) /**< is capturing group */ 84425bb815Sopenharmony_ci{ 85425bb815Sopenharmony_ci uint32_t qmin = re_ctx_p->token.qmin; 86425bb815Sopenharmony_ci uint32_t qmax = re_ctx_p->token.qmax; 87425bb815Sopenharmony_ci 88425bb815Sopenharmony_ci if (JERRY_UNLIKELY (!is_capturing && re_bytecode_size (re_ctx_p) == group_start_offset)) 89425bb815Sopenharmony_ci { 90425bb815Sopenharmony_ci return; 91425bb815Sopenharmony_ci } 92425bb815Sopenharmony_ci 93425bb815Sopenharmony_ci if (qmin == 0) 94425bb815Sopenharmony_ci { 95425bb815Sopenharmony_ci re_insert_value (re_ctx_p, 96425bb815Sopenharmony_ci group_start_offset, 97425bb815Sopenharmony_ci re_bytecode_size (re_ctx_p) - group_start_offset); 98425bb815Sopenharmony_ci } 99425bb815Sopenharmony_ci 100425bb815Sopenharmony_ci re_insert_value (re_ctx_p, group_start_offset, qmin); 101425bb815Sopenharmony_ci re_insert_value (re_ctx_p, group_start_offset, re_ctx_p->captures_count - capture_start); 102425bb815Sopenharmony_ci 103425bb815Sopenharmony_ci if (!is_capturing) 104425bb815Sopenharmony_ci { 105425bb815Sopenharmony_ci re_insert_value (re_ctx_p, group_start_offset, capture_start); 106425bb815Sopenharmony_ci } 107425bb815Sopenharmony_ci else 108425bb815Sopenharmony_ci { 109425bb815Sopenharmony_ci JERRY_ASSERT (idx == capture_start); 110425bb815Sopenharmony_ci } 111425bb815Sopenharmony_ci 112425bb815Sopenharmony_ci re_insert_value (re_ctx_p, group_start_offset, idx); 113425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, group_start_offset, re_get_group_start_opcode (is_capturing)); 114425bb815Sopenharmony_ci 115425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, re_get_group_end_opcode (re_ctx_p, is_capturing)); 116425bb815Sopenharmony_ci re_append_value (re_ctx_p, idx); 117425bb815Sopenharmony_ci re_append_value (re_ctx_p, qmin); 118425bb815Sopenharmony_ci re_append_value (re_ctx_p, qmax + RE_QMAX_OFFSET); 119425bb815Sopenharmony_ci} /* re_insert_into_group */ 120425bb815Sopenharmony_ci 121425bb815Sopenharmony_ci/** 122425bb815Sopenharmony_ci * Insert simple atom iterator. 123425bb815Sopenharmony_ci */ 124425bb815Sopenharmony_cistatic void 125425bb815Sopenharmony_cire_insert_atom_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 126425bb815Sopenharmony_ci uint32_t start_offset) /**< atom start offset */ 127425bb815Sopenharmony_ci{ 128425bb815Sopenharmony_ci const uint32_t qmin = re_ctx_p->token.qmin; 129425bb815Sopenharmony_ci const uint32_t qmax = re_ctx_p->token.qmax; 130425bb815Sopenharmony_ci 131425bb815Sopenharmony_ci if (qmin == 1 && qmax == 1) 132425bb815Sopenharmony_ci { 133425bb815Sopenharmony_ci return; 134425bb815Sopenharmony_ci } 135425bb815Sopenharmony_ci 136425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ITERATOR_END); 137425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset); 138425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, qmax + RE_QMAX_OFFSET); 139425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, qmin); 140425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, start_offset, re_ctx_p->token.greedy ? RE_OP_GREEDY_ITERATOR : RE_OP_LAZY_ITERATOR); 141425bb815Sopenharmony_ci} /* re_insert_atom_iterator */ 142425bb815Sopenharmony_ci 143425bb815Sopenharmony_ci/** 144425bb815Sopenharmony_ci * Insert a lookahead assertion. 145425bb815Sopenharmony_ci */ 146425bb815Sopenharmony_cistatic void 147425bb815Sopenharmony_cire_insert_assertion_lookahead (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 148425bb815Sopenharmony_ci uint32_t start_offset, /**< atom start offset */ 149425bb815Sopenharmony_ci uint32_t capture_start, /**< index of first nested capture */ 150425bb815Sopenharmony_ci bool negative) /** lookahead type */ 151425bb815Sopenharmony_ci{ 152425bb815Sopenharmony_ci const uint32_t qmin = re_ctx_p->token.qmin; 153425bb815Sopenharmony_ci 154425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ASSERT_END); 155425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset); 156425bb815Sopenharmony_ci 157425bb815Sopenharmony_ci /* We need to clear nested capturing group results when a negative assertion or the tail after a positive assertion 158425bb815Sopenharmony_ci * does not match, so we store the begin and end index of nested capturing groups. */ 159425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, re_ctx_p->captures_count - capture_start); 160425bb815Sopenharmony_ci re_insert_value (re_ctx_p, start_offset, capture_start); 161425bb815Sopenharmony_ci 162425bb815Sopenharmony_ci /* Lookaheads always result in zero length matches, which means iterations will always stop on the first match. 163425bb815Sopenharmony_ci * This allows us to not have to deal with iterations beyond one. Either qmin == 0 which will implicitly match, 164425bb815Sopenharmony_ci * or qmin > 0, in which case the first iteration will decide whether the assertion matches depending on whether 165425bb815Sopenharmony_ci * the iteration matched or not. This also allows us to ignore qmax entirely. */ 166425bb815Sopenharmony_ci re_insert_byte (re_ctx_p, start_offset, (uint8_t) JERRY_MIN (qmin, 1)); 167425bb815Sopenharmony_ci 168425bb815Sopenharmony_ci const re_opcode_t opcode = (negative) ? RE_OP_ASSERT_LOOKAHEAD_NEG : RE_OP_ASSERT_LOOKAHEAD_POS; 169425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, start_offset, opcode); 170425bb815Sopenharmony_ci} /* re_insert_assertion_lookahead */ 171425bb815Sopenharmony_ci 172425bb815Sopenharmony_ci/** 173425bb815Sopenharmony_ci * Consume non greedy (question mark) character if present. 174425bb815Sopenharmony_ci */ 175425bb815Sopenharmony_cistatic void 176425bb815Sopenharmony_cire_parse_lazy_char (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */ 177425bb815Sopenharmony_ci{ 178425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 179425bb815Sopenharmony_ci && *re_ctx_p->input_curr_p == LIT_CHAR_QUESTION) 180425bb815Sopenharmony_ci { 181425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 182425bb815Sopenharmony_ci re_ctx_p->token.greedy = false; 183425bb815Sopenharmony_ci return; 184425bb815Sopenharmony_ci } 185425bb815Sopenharmony_ci 186425bb815Sopenharmony_ci re_ctx_p->token.greedy = true; 187425bb815Sopenharmony_ci} /* re_parse_lazy_char */ 188425bb815Sopenharmony_ci 189425bb815Sopenharmony_ci/** 190425bb815Sopenharmony_ci * Parse a max 3 digit long octal number from the input string, with a decimal value less than 256. 191425bb815Sopenharmony_ci * 192425bb815Sopenharmony_ci * @return value of the octal number 193425bb815Sopenharmony_ci */ 194425bb815Sopenharmony_cistatic uint32_t 195425bb815Sopenharmony_cire_parse_octal (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */ 196425bb815Sopenharmony_ci{ 197425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 198425bb815Sopenharmony_ci JERRY_ASSERT (lit_char_is_octal_digit (*re_ctx_p->input_curr_p)); 199425bb815Sopenharmony_ci 200425bb815Sopenharmony_ci uint32_t value = (uint32_t) (*re_ctx_p->input_curr_p++) - LIT_CHAR_0; 201425bb815Sopenharmony_ci 202425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 203425bb815Sopenharmony_ci && lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 204425bb815Sopenharmony_ci { 205425bb815Sopenharmony_ci value = value * 8 + (*re_ctx_p->input_curr_p++) - LIT_CHAR_0; 206425bb815Sopenharmony_ci } 207425bb815Sopenharmony_ci 208425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 209425bb815Sopenharmony_ci && lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 210425bb815Sopenharmony_ci { 211425bb815Sopenharmony_ci const uint32_t new_value = value * 8 + (*re_ctx_p->input_curr_p) - LIT_CHAR_0; 212425bb815Sopenharmony_ci 213425bb815Sopenharmony_ci if (new_value <= RE_MAX_OCTAL_VALUE) 214425bb815Sopenharmony_ci { 215425bb815Sopenharmony_ci value = new_value; 216425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 217425bb815Sopenharmony_ci } 218425bb815Sopenharmony_ci } 219425bb815Sopenharmony_ci 220425bb815Sopenharmony_ci return value; 221425bb815Sopenharmony_ci} /* re_parse_octal */ 222425bb815Sopenharmony_ci 223425bb815Sopenharmony_ci/** 224425bb815Sopenharmony_ci * Check that the currently parsed quantifier is valid. 225425bb815Sopenharmony_ci * 226425bb815Sopenharmony_ci * @return ECMA_VALUE_ERROR, if quantifier is invalid 227425bb815Sopenharmony_ci * ECMA_VALUE_EMPTY, otherwise 228425bb815Sopenharmony_ci */ 229425bb815Sopenharmony_cistatic ecma_value_t 230425bb815Sopenharmony_cire_check_quantifier (re_compiler_ctx_t *re_ctx_p) 231425bb815Sopenharmony_ci{ 232425bb815Sopenharmony_ci if (re_ctx_p->token.qmin > re_ctx_p->token.qmax) 233425bb815Sopenharmony_ci { 234425bb815Sopenharmony_ci /* ECMA-262 v5.1 15.10.2.5 */ 235425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("quantifier error: min > max.")); 236425bb815Sopenharmony_ci } 237425bb815Sopenharmony_ci 238425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 239425bb815Sopenharmony_ci} /* re_check_quantifier */ 240425bb815Sopenharmony_ci 241425bb815Sopenharmony_ci/** 242425bb815Sopenharmony_ci * Parse RegExp quantifier. 243425bb815Sopenharmony_ci * 244425bb815Sopenharmony_ci * @return ECMA_VALUE_TRUE - if parsed successfully 245425bb815Sopenharmony_ci * ECMA_VALUE_FALSE - otherwise 246425bb815Sopenharmony_ci */ 247425bb815Sopenharmony_cistatic ecma_value_t 248425bb815Sopenharmony_cire_parse_quantifier (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 249425bb815Sopenharmony_ci{ 250425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 251425bb815Sopenharmony_ci { 252425bb815Sopenharmony_ci switch (*re_ctx_p->input_curr_p) 253425bb815Sopenharmony_ci { 254425bb815Sopenharmony_ci case LIT_CHAR_QUESTION: 255425bb815Sopenharmony_ci { 256425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 257425bb815Sopenharmony_ci re_ctx_p->token.qmin = 0; 258425bb815Sopenharmony_ci re_ctx_p->token.qmax = 1; 259425bb815Sopenharmony_ci 260425bb815Sopenharmony_ci re_parse_lazy_char (re_ctx_p); 261425bb815Sopenharmony_ci return ECMA_VALUE_TRUE; 262425bb815Sopenharmony_ci } 263425bb815Sopenharmony_ci case LIT_CHAR_ASTERISK: 264425bb815Sopenharmony_ci { 265425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 266425bb815Sopenharmony_ci re_ctx_p->token.qmin = 0; 267425bb815Sopenharmony_ci re_ctx_p->token.qmax = RE_INFINITY; 268425bb815Sopenharmony_ci 269425bb815Sopenharmony_ci re_parse_lazy_char (re_ctx_p); 270425bb815Sopenharmony_ci return ECMA_VALUE_TRUE; 271425bb815Sopenharmony_ci } 272425bb815Sopenharmony_ci case LIT_CHAR_PLUS: 273425bb815Sopenharmony_ci { 274425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 275425bb815Sopenharmony_ci re_ctx_p->token.qmin = 1; 276425bb815Sopenharmony_ci re_ctx_p->token.qmax = RE_INFINITY; 277425bb815Sopenharmony_ci 278425bb815Sopenharmony_ci re_parse_lazy_char (re_ctx_p); 279425bb815Sopenharmony_ci return ECMA_VALUE_TRUE; 280425bb815Sopenharmony_ci } 281425bb815Sopenharmony_ci case LIT_CHAR_LEFT_BRACE: 282425bb815Sopenharmony_ci { 283425bb815Sopenharmony_ci const lit_utf8_byte_t *current_p = re_ctx_p->input_curr_p + 1; 284425bb815Sopenharmony_ci uint32_t qmin = 0; 285425bb815Sopenharmony_ci uint32_t qmax = RE_INFINITY; 286425bb815Sopenharmony_ci 287425bb815Sopenharmony_ci if (current_p >= re_ctx_p->input_end_p) 288425bb815Sopenharmony_ci { 289425bb815Sopenharmony_ci break; 290425bb815Sopenharmony_ci } 291425bb815Sopenharmony_ci 292425bb815Sopenharmony_ci if (!lit_char_is_decimal_digit (*current_p)) 293425bb815Sopenharmony_ci { 294425bb815Sopenharmony_ci break; 295425bb815Sopenharmony_ci } 296425bb815Sopenharmony_ci 297425bb815Sopenharmony_ci qmin = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p); 298425bb815Sopenharmony_ci 299425bb815Sopenharmony_ci if (current_p >= re_ctx_p->input_end_p) 300425bb815Sopenharmony_ci { 301425bb815Sopenharmony_ci break; 302425bb815Sopenharmony_ci } 303425bb815Sopenharmony_ci 304425bb815Sopenharmony_ci lit_utf8_byte_t ch = *current_p++; 305425bb815Sopenharmony_ci if (ch == LIT_CHAR_RIGHT_BRACE) 306425bb815Sopenharmony_ci { 307425bb815Sopenharmony_ci qmax = qmin; 308425bb815Sopenharmony_ci } 309425bb815Sopenharmony_ci else if (ch == LIT_CHAR_COMMA) 310425bb815Sopenharmony_ci { 311425bb815Sopenharmony_ci if (current_p >= re_ctx_p->input_end_p) 312425bb815Sopenharmony_ci { 313425bb815Sopenharmony_ci break; 314425bb815Sopenharmony_ci } 315425bb815Sopenharmony_ci 316425bb815Sopenharmony_ci if (lit_char_is_decimal_digit (*current_p)) 317425bb815Sopenharmony_ci { 318425bb815Sopenharmony_ci qmax = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p); 319425bb815Sopenharmony_ci } 320425bb815Sopenharmony_ci 321425bb815Sopenharmony_ci if (current_p >= re_ctx_p->input_end_p || *current_p++ != LIT_CHAR_RIGHT_BRACE) 322425bb815Sopenharmony_ci { 323425bb815Sopenharmony_ci break; 324425bb815Sopenharmony_ci } 325425bb815Sopenharmony_ci } 326425bb815Sopenharmony_ci else 327425bb815Sopenharmony_ci { 328425bb815Sopenharmony_ci break; 329425bb815Sopenharmony_ci } 330425bb815Sopenharmony_ci 331425bb815Sopenharmony_ci re_ctx_p->token.qmin = qmin; 332425bb815Sopenharmony_ci re_ctx_p->token.qmax = qmax; 333425bb815Sopenharmony_ci re_ctx_p->input_curr_p = current_p; 334425bb815Sopenharmony_ci re_parse_lazy_char (re_ctx_p); 335425bb815Sopenharmony_ci return ECMA_VALUE_TRUE; 336425bb815Sopenharmony_ci } 337425bb815Sopenharmony_ci default: 338425bb815Sopenharmony_ci { 339425bb815Sopenharmony_ci break; 340425bb815Sopenharmony_ci } 341425bb815Sopenharmony_ci } 342425bb815Sopenharmony_ci } 343425bb815Sopenharmony_ci 344425bb815Sopenharmony_ci re_ctx_p->token.qmin = 1; 345425bb815Sopenharmony_ci re_ctx_p->token.qmax = 1; 346425bb815Sopenharmony_ci re_ctx_p->token.greedy = true; 347425bb815Sopenharmony_ci 348425bb815Sopenharmony_ci return ECMA_VALUE_FALSE; 349425bb815Sopenharmony_ci} /* re_parse_quantifier */ 350425bb815Sopenharmony_ci 351425bb815Sopenharmony_ci/** 352425bb815Sopenharmony_ci * Count the number of groups in the current pattern. 353425bb815Sopenharmony_ci */ 354425bb815Sopenharmony_cistatic void 355425bb815Sopenharmony_cire_count_groups (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 356425bb815Sopenharmony_ci{ 357425bb815Sopenharmony_ci bool is_char_class = 0; 358425bb815Sopenharmony_ci re_ctx_p->groups_count = 0; 359425bb815Sopenharmony_ci const lit_utf8_byte_t *curr_p = re_ctx_p->input_start_p; 360425bb815Sopenharmony_ci 361425bb815Sopenharmony_ci while (curr_p < re_ctx_p->input_end_p) 362425bb815Sopenharmony_ci { 363425bb815Sopenharmony_ci switch (*curr_p++) 364425bb815Sopenharmony_ci { 365425bb815Sopenharmony_ci case LIT_CHAR_BACKSLASH: 366425bb815Sopenharmony_ci { 367425bb815Sopenharmony_ci if (curr_p < re_ctx_p->input_end_p) 368425bb815Sopenharmony_ci { 369425bb815Sopenharmony_ci lit_utf8_incr (&curr_p); 370425bb815Sopenharmony_ci } 371425bb815Sopenharmony_ci break; 372425bb815Sopenharmony_ci } 373425bb815Sopenharmony_ci case LIT_CHAR_LEFT_SQUARE: 374425bb815Sopenharmony_ci { 375425bb815Sopenharmony_ci is_char_class = true; 376425bb815Sopenharmony_ci break; 377425bb815Sopenharmony_ci } 378425bb815Sopenharmony_ci case LIT_CHAR_RIGHT_SQUARE: 379425bb815Sopenharmony_ci { 380425bb815Sopenharmony_ci is_char_class = false; 381425bb815Sopenharmony_ci break; 382425bb815Sopenharmony_ci } 383425bb815Sopenharmony_ci case LIT_CHAR_LEFT_PAREN: 384425bb815Sopenharmony_ci { 385425bb815Sopenharmony_ci if (curr_p < re_ctx_p->input_end_p 386425bb815Sopenharmony_ci && *curr_p != LIT_CHAR_QUESTION 387425bb815Sopenharmony_ci && !is_char_class) 388425bb815Sopenharmony_ci { 389425bb815Sopenharmony_ci re_ctx_p->groups_count++; 390425bb815Sopenharmony_ci } 391425bb815Sopenharmony_ci break; 392425bb815Sopenharmony_ci } 393425bb815Sopenharmony_ci } 394425bb815Sopenharmony_ci } 395425bb815Sopenharmony_ci} /* re_count_groups */ 396425bb815Sopenharmony_ci 397425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 398425bb815Sopenharmony_ci/** 399425bb815Sopenharmony_ci * Check if a code point is a Syntax character 400425bb815Sopenharmony_ci * 401425bb815Sopenharmony_ci * @return true, if syntax character 402425bb815Sopenharmony_ci * false, otherwise 403425bb815Sopenharmony_ci */ 404425bb815Sopenharmony_cistatic bool 405425bb815Sopenharmony_cire_is_syntax_char (lit_code_point_t cp) /**< code point */ 406425bb815Sopenharmony_ci{ 407425bb815Sopenharmony_ci return (cp == LIT_CHAR_CIRCUMFLEX 408425bb815Sopenharmony_ci || cp == LIT_CHAR_DOLLAR_SIGN 409425bb815Sopenharmony_ci || cp == LIT_CHAR_BACKSLASH 410425bb815Sopenharmony_ci || cp == LIT_CHAR_DOT 411425bb815Sopenharmony_ci || cp == LIT_CHAR_ASTERISK 412425bb815Sopenharmony_ci || cp == LIT_CHAR_PLUS 413425bb815Sopenharmony_ci || cp == LIT_CHAR_QUESTION 414425bb815Sopenharmony_ci || cp == LIT_CHAR_LEFT_PAREN 415425bb815Sopenharmony_ci || cp == LIT_CHAR_RIGHT_PAREN 416425bb815Sopenharmony_ci || cp == LIT_CHAR_LEFT_SQUARE 417425bb815Sopenharmony_ci || cp == LIT_CHAR_RIGHT_SQUARE 418425bb815Sopenharmony_ci || cp == LIT_CHAR_LEFT_BRACE 419425bb815Sopenharmony_ci || cp == LIT_CHAR_RIGHT_BRACE 420425bb815Sopenharmony_ci || cp == LIT_CHAR_VLINE); 421425bb815Sopenharmony_ci} /* re_is_syntax_char */ 422425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 423425bb815Sopenharmony_ci 424425bb815Sopenharmony_ci/** 425425bb815Sopenharmony_ci * Parse a Character Escape or a Character Class Escape. 426425bb815Sopenharmony_ci * 427425bb815Sopenharmony_ci * @return ECMA_VALUE_EMPTY, if parsed successfully 428425bb815Sopenharmony_ci * ECMA_VALUE_ERROR, otherwise 429425bb815Sopenharmony_ci */ 430425bb815Sopenharmony_cistatic ecma_value_t 431425bb815Sopenharmony_cire_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 432425bb815Sopenharmony_ci{ 433425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 434425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CHAR; 435425bb815Sopenharmony_ci 436425bb815Sopenharmony_ci if (lit_char_is_decimal_digit (*re_ctx_p->input_curr_p)) 437425bb815Sopenharmony_ci { 438425bb815Sopenharmony_ci /* NULL code point escape, only valid if there are no following digits. */ 439425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_0 440425bb815Sopenharmony_ci && (re_ctx_p->input_curr_p + 1 >= re_ctx_p->input_end_p 441425bb815Sopenharmony_ci || !lit_char_is_decimal_digit (re_ctx_p->input_curr_p[1]))) 442425bb815Sopenharmony_ci { 443425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 444425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_UNICODE_CODE_POINT_NULL; 445425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 446425bb815Sopenharmony_ci } 447425bb815Sopenharmony_ci 448425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 449425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 450425bb815Sopenharmony_ci { 451425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape sequence")); 452425bb815Sopenharmony_ci } 453425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 454425bb815Sopenharmony_ci 455425bb815Sopenharmony_ci /* Legacy octal escape sequence */ 456425bb815Sopenharmony_ci if (lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 457425bb815Sopenharmony_ci { 458425bb815Sopenharmony_ci re_ctx_p->token.value = re_parse_octal (re_ctx_p); 459425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 460425bb815Sopenharmony_ci } 461425bb815Sopenharmony_ci 462425bb815Sopenharmony_ci /* Identity escape */ 463425bb815Sopenharmony_ci re_ctx_p->token.value = *re_ctx_p->input_curr_p++; 464425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 465425bb815Sopenharmony_ci } 466425bb815Sopenharmony_ci 467425bb815Sopenharmony_ci lit_code_point_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 468425bb815Sopenharmony_ci switch (ch) 469425bb815Sopenharmony_ci { 470425bb815Sopenharmony_ci /* Character Class escapes */ 471425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_D: 472425bb815Sopenharmony_ci { 473425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 474425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_DIGIT; 475425bb815Sopenharmony_ci break; 476425bb815Sopenharmony_ci } 477425bb815Sopenharmony_ci case LIT_CHAR_UPPERCASE_D: 478425bb815Sopenharmony_ci { 479425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 480425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_NOT_DIGIT; 481425bb815Sopenharmony_ci break; 482425bb815Sopenharmony_ci } 483425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_S: 484425bb815Sopenharmony_ci { 485425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 486425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_WHITESPACE; 487425bb815Sopenharmony_ci break; 488425bb815Sopenharmony_ci } 489425bb815Sopenharmony_ci case LIT_CHAR_UPPERCASE_S: 490425bb815Sopenharmony_ci { 491425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 492425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_NOT_WHITESPACE; 493425bb815Sopenharmony_ci break; 494425bb815Sopenharmony_ci } 495425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_W: 496425bb815Sopenharmony_ci { 497425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 498425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_WORD_CHAR; 499425bb815Sopenharmony_ci break; 500425bb815Sopenharmony_ci } 501425bb815Sopenharmony_ci case LIT_CHAR_UPPERCASE_W: 502425bb815Sopenharmony_ci { 503425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 504425bb815Sopenharmony_ci re_ctx_p->token.value = RE_ESCAPE_NOT_WORD_CHAR; 505425bb815Sopenharmony_ci break; 506425bb815Sopenharmony_ci } 507425bb815Sopenharmony_ci /* Control escapes */ 508425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_F: 509425bb815Sopenharmony_ci { 510425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_FF; 511425bb815Sopenharmony_ci break; 512425bb815Sopenharmony_ci } 513425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_N: 514425bb815Sopenharmony_ci { 515425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_LF; 516425bb815Sopenharmony_ci break; 517425bb815Sopenharmony_ci } 518425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_R: 519425bb815Sopenharmony_ci { 520425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_CR; 521425bb815Sopenharmony_ci break; 522425bb815Sopenharmony_ci } 523425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_T: 524425bb815Sopenharmony_ci { 525425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_TAB; 526425bb815Sopenharmony_ci break; 527425bb815Sopenharmony_ci } 528425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_V: 529425bb815Sopenharmony_ci { 530425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_VTAB; 531425bb815Sopenharmony_ci break; 532425bb815Sopenharmony_ci } 533425bb815Sopenharmony_ci /* Control letter */ 534425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_C: 535425bb815Sopenharmony_ci { 536425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 537425bb815Sopenharmony_ci { 538425bb815Sopenharmony_ci ch = *re_ctx_p->input_curr_p; 539425bb815Sopenharmony_ci 540425bb815Sopenharmony_ci if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END) 541425bb815Sopenharmony_ci || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)) 542425bb815Sopenharmony_ci { 543425bb815Sopenharmony_ci re_ctx_p->token.value = (ch % 32); 544425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 545425bb815Sopenharmony_ci 546425bb815Sopenharmony_ci break; 547425bb815Sopenharmony_ci } 548425bb815Sopenharmony_ci } 549425bb815Sopenharmony_ci 550425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 551425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 552425bb815Sopenharmony_ci { 553425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid control escape sequence")); 554425bb815Sopenharmony_ci } 555425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 556425bb815Sopenharmony_ci 557425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_BACKSLASH; 558425bb815Sopenharmony_ci re_ctx_p->input_curr_p--; 559425bb815Sopenharmony_ci 560425bb815Sopenharmony_ci break; 561425bb815Sopenharmony_ci } 562425bb815Sopenharmony_ci /* Hex escape */ 563425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_X: 564425bb815Sopenharmony_ci { 565425bb815Sopenharmony_ci uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 2); 566425bb815Sopenharmony_ci if (hex_value != UINT32_MAX) 567425bb815Sopenharmony_ci { 568425bb815Sopenharmony_ci re_ctx_p->token.value = hex_value; 569425bb815Sopenharmony_ci re_ctx_p->input_curr_p += 2; 570425bb815Sopenharmony_ci break; 571425bb815Sopenharmony_ci } 572425bb815Sopenharmony_ci 573425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 574425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 575425bb815Sopenharmony_ci { 576425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid hex escape sequence")); 577425bb815Sopenharmony_ci } 578425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 579425bb815Sopenharmony_ci 580425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_LOWERCASE_X; 581425bb815Sopenharmony_ci break; 582425bb815Sopenharmony_ci } 583425bb815Sopenharmony_ci /* Unicode escape */ 584425bb815Sopenharmony_ci case LIT_CHAR_LOWERCASE_U: 585425bb815Sopenharmony_ci { 586425bb815Sopenharmony_ci uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 4); 587425bb815Sopenharmony_ci if (hex_value != UINT32_MAX) 588425bb815Sopenharmony_ci { 589425bb815Sopenharmony_ci re_ctx_p->token.value = hex_value; 590425bb815Sopenharmony_ci re_ctx_p->input_curr_p += 4; 591425bb815Sopenharmony_ci 592425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 593425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE 594425bb815Sopenharmony_ci && lit_is_code_point_utf16_high_surrogate (re_ctx_p->token.value) 595425bb815Sopenharmony_ci && re_ctx_p->input_curr_p + 6 <= re_ctx_p->input_end_p 596425bb815Sopenharmony_ci && re_ctx_p->input_curr_p[0] == '\\' 597425bb815Sopenharmony_ci && re_ctx_p->input_curr_p[1] == 'u') 598425bb815Sopenharmony_ci { 599425bb815Sopenharmony_ci hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p + 2, re_ctx_p->input_end_p, 4); 600425bb815Sopenharmony_ci if (lit_is_code_point_utf16_low_surrogate (hex_value)) 601425bb815Sopenharmony_ci { 602425bb815Sopenharmony_ci re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) re_ctx_p->token.value, 603425bb815Sopenharmony_ci (ecma_char_t) hex_value); 604425bb815Sopenharmony_ci re_ctx_p->input_curr_p += 6; 605425bb815Sopenharmony_ci } 606425bb815Sopenharmony_ci } 607425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 608425bb815Sopenharmony_ci 609425bb815Sopenharmony_ci break; 610425bb815Sopenharmony_ci } 611425bb815Sopenharmony_ci 612425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 613425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 614425bb815Sopenharmony_ci { 615425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p 616425bb815Sopenharmony_ci && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE 617425bb815Sopenharmony_ci && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1])) 618425bb815Sopenharmony_ci { 619425bb815Sopenharmony_ci lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]); 620425bb815Sopenharmony_ci re_ctx_p->input_curr_p += 2; 621425bb815Sopenharmony_ci 622425bb815Sopenharmony_ci while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p)) 623425bb815Sopenharmony_ci { 624425bb815Sopenharmony_ci cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++); 625425bb815Sopenharmony_ci 626425bb815Sopenharmony_ci if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX)) 627425bb815Sopenharmony_ci { 628425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence")); 629425bb815Sopenharmony_ci } 630425bb815Sopenharmony_ci } 631425bb815Sopenharmony_ci 632425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE) 633425bb815Sopenharmony_ci { 634425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 635425bb815Sopenharmony_ci re_ctx_p->token.value = cp; 636425bb815Sopenharmony_ci break; 637425bb815Sopenharmony_ci } 638425bb815Sopenharmony_ci } 639425bb815Sopenharmony_ci 640425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence")); 641425bb815Sopenharmony_ci } 642425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 643425bb815Sopenharmony_ci 644425bb815Sopenharmony_ci re_ctx_p->token.value = LIT_CHAR_LOWERCASE_U; 645425bb815Sopenharmony_ci break; 646425bb815Sopenharmony_ci } 647425bb815Sopenharmony_ci /* Identity escape */ 648425bb815Sopenharmony_ci default: 649425bb815Sopenharmony_ci { 650425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 651425bb815Sopenharmony_ci /* Must be '/', or one of SyntaxCharacter */ 652425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE 653425bb815Sopenharmony_ci && ch != LIT_CHAR_SLASH 654425bb815Sopenharmony_ci && !re_is_syntax_char (ch)) 655425bb815Sopenharmony_ci { 656425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 657425bb815Sopenharmony_ci } 658425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 659425bb815Sopenharmony_ci re_ctx_p->token.value = ch; 660425bb815Sopenharmony_ci } 661425bb815Sopenharmony_ci } 662425bb815Sopenharmony_ci 663425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 664425bb815Sopenharmony_ci} /* re_parse_char_escape */ 665425bb815Sopenharmony_ci 666425bb815Sopenharmony_ci/** 667425bb815Sopenharmony_ci * Read the input pattern and parse the next token for the RegExp compiler 668425bb815Sopenharmony_ci * 669425bb815Sopenharmony_ci * @return empty ecma value - if parsed successfully 670425bb815Sopenharmony_ci * error ecma value - otherwise 671425bb815Sopenharmony_ci * 672425bb815Sopenharmony_ci * Returned value must be freed with ecma_free_value 673425bb815Sopenharmony_ci */ 674425bb815Sopenharmony_cistatic ecma_value_t 675425bb815Sopenharmony_cire_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 676425bb815Sopenharmony_ci{ 677425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 678425bb815Sopenharmony_ci { 679425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_EOF; 680425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 681425bb815Sopenharmony_ci } 682425bb815Sopenharmony_ci 683425bb815Sopenharmony_ci ecma_char_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 684425bb815Sopenharmony_ci 685425bb815Sopenharmony_ci switch (ch) 686425bb815Sopenharmony_ci { 687425bb815Sopenharmony_ci case LIT_CHAR_CIRCUMFLEX: 688425bb815Sopenharmony_ci { 689425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_START; 690425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 691425bb815Sopenharmony_ci } 692425bb815Sopenharmony_ci case LIT_CHAR_DOLLAR_SIGN: 693425bb815Sopenharmony_ci { 694425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_END; 695425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 696425bb815Sopenharmony_ci } 697425bb815Sopenharmony_ci case LIT_CHAR_VLINE: 698425bb815Sopenharmony_ci { 699425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ALTERNATIVE; 700425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 701425bb815Sopenharmony_ci } 702425bb815Sopenharmony_ci case LIT_CHAR_DOT: 703425bb815Sopenharmony_ci { 704425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_PERIOD; 705425bb815Sopenharmony_ci /* Check quantifier */ 706425bb815Sopenharmony_ci break; 707425bb815Sopenharmony_ci } 708425bb815Sopenharmony_ci case LIT_CHAR_BACKSLASH: 709425bb815Sopenharmony_ci { 710425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 711425bb815Sopenharmony_ci { 712425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 713425bb815Sopenharmony_ci } 714425bb815Sopenharmony_ci 715425bb815Sopenharmony_ci /* DecimalEscape, Backreferences cannot start with a zero digit. */ 716425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p > LIT_CHAR_0 && *re_ctx_p->input_curr_p <= LIT_CHAR_9) 717425bb815Sopenharmony_ci { 718425bb815Sopenharmony_ci const lit_utf8_byte_t *digits_p = re_ctx_p->input_curr_p; 719425bb815Sopenharmony_ci const uint32_t value = lit_parse_decimal (&digits_p, re_ctx_p->input_end_p); 720425bb815Sopenharmony_ci 721425bb815Sopenharmony_ci if (re_ctx_p->groups_count < 0) 722425bb815Sopenharmony_ci { 723425bb815Sopenharmony_ci re_count_groups (re_ctx_p); 724425bb815Sopenharmony_ci } 725425bb815Sopenharmony_ci 726425bb815Sopenharmony_ci if (value <= (uint32_t) re_ctx_p->groups_count) 727425bb815Sopenharmony_ci { 728425bb815Sopenharmony_ci /* Valid backreference */ 729425bb815Sopenharmony_ci re_ctx_p->input_curr_p = digits_p; 730425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_BACKREFERENCE; 731425bb815Sopenharmony_ci re_ctx_p->token.value = value; 732425bb815Sopenharmony_ci 733425bb815Sopenharmony_ci /* Check quantifier */ 734425bb815Sopenharmony_ci break; 735425bb815Sopenharmony_ci } 736425bb815Sopenharmony_ci } 737425bb815Sopenharmony_ci 738425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B) 739425bb815Sopenharmony_ci { 740425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 741425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_WORD_BOUNDARY; 742425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 743425bb815Sopenharmony_ci } 744425bb815Sopenharmony_ci else if (*re_ctx_p->input_curr_p == LIT_CHAR_UPPERCASE_B) 745425bb815Sopenharmony_ci { 746425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 747425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY; 748425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 749425bb815Sopenharmony_ci } 750425bb815Sopenharmony_ci 751425bb815Sopenharmony_ci const ecma_value_t parse_result = re_parse_char_escape (re_ctx_p); 752425bb815Sopenharmony_ci 753425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (parse_result)) 754425bb815Sopenharmony_ci { 755425bb815Sopenharmony_ci return parse_result; 756425bb815Sopenharmony_ci } 757425bb815Sopenharmony_ci 758425bb815Sopenharmony_ci /* Check quantifier */ 759425bb815Sopenharmony_ci break; 760425bb815Sopenharmony_ci } 761425bb815Sopenharmony_ci case LIT_CHAR_LEFT_PAREN: 762425bb815Sopenharmony_ci { 763425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 764425bb815Sopenharmony_ci { 765425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated group")); 766425bb815Sopenharmony_ci } 767425bb815Sopenharmony_ci 768425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_QUESTION) 769425bb815Sopenharmony_ci { 770425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 771425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 772425bb815Sopenharmony_ci { 773425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group")); 774425bb815Sopenharmony_ci } 775425bb815Sopenharmony_ci 776425bb815Sopenharmony_ci ch = *re_ctx_p->input_curr_p++; 777425bb815Sopenharmony_ci 778425bb815Sopenharmony_ci if (ch == LIT_CHAR_EQUALS) 779425bb815Sopenharmony_ci { 780425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD; 781425bb815Sopenharmony_ci re_ctx_p->token.value = false; 782425bb815Sopenharmony_ci } 783425bb815Sopenharmony_ci else if (ch == LIT_CHAR_EXCLAMATION) 784425bb815Sopenharmony_ci { 785425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD; 786425bb815Sopenharmony_ci re_ctx_p->token.value = true; 787425bb815Sopenharmony_ci } 788425bb815Sopenharmony_ci else if (ch == LIT_CHAR_COLON) 789425bb815Sopenharmony_ci { 790425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_START_NON_CAPTURE_GROUP; 791425bb815Sopenharmony_ci } 792425bb815Sopenharmony_ci else 793425bb815Sopenharmony_ci { 794425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group")); 795425bb815Sopenharmony_ci } 796425bb815Sopenharmony_ci } 797425bb815Sopenharmony_ci else 798425bb815Sopenharmony_ci { 799425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_START_CAPTURE_GROUP; 800425bb815Sopenharmony_ci } 801425bb815Sopenharmony_ci 802425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 803425bb815Sopenharmony_ci } 804425bb815Sopenharmony_ci case LIT_CHAR_RIGHT_PAREN: 805425bb815Sopenharmony_ci { 806425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_END_GROUP; 807425bb815Sopenharmony_ci 808425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 809425bb815Sopenharmony_ci } 810425bb815Sopenharmony_ci case LIT_CHAR_LEFT_SQUARE: 811425bb815Sopenharmony_ci { 812425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CHAR_CLASS; 813425bb815Sopenharmony_ci 814425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 815425bb815Sopenharmony_ci { 816425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class.")); 817425bb815Sopenharmony_ci } 818425bb815Sopenharmony_ci 819425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 820425bb815Sopenharmony_ci } 821425bb815Sopenharmony_ci case LIT_CHAR_QUESTION: 822425bb815Sopenharmony_ci case LIT_CHAR_ASTERISK: 823425bb815Sopenharmony_ci case LIT_CHAR_PLUS: 824425bb815Sopenharmony_ci { 825425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid quantifier.")); 826425bb815Sopenharmony_ci } 827425bb815Sopenharmony_ci case LIT_CHAR_LEFT_BRACE: 828425bb815Sopenharmony_ci { 829425bb815Sopenharmony_ci re_ctx_p->input_curr_p--; 830425bb815Sopenharmony_ci if (ecma_is_value_true (re_parse_quantifier (re_ctx_p))) 831425bb815Sopenharmony_ci { 832425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Nothing to repeat.")); 833425bb815Sopenharmony_ci } 834425bb815Sopenharmony_ci 835425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 836425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 837425bb815Sopenharmony_ci { 838425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket.")); 839425bb815Sopenharmony_ci } 840425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 841425bb815Sopenharmony_ci 842425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 843425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CHAR; 844425bb815Sopenharmony_ci re_ctx_p->token.value = ch; 845425bb815Sopenharmony_ci 846425bb815Sopenharmony_ci /* Check quantifier */ 847425bb815Sopenharmony_ci break; 848425bb815Sopenharmony_ci } 849425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 850425bb815Sopenharmony_ci case LIT_CHAR_RIGHT_SQUARE: 851425bb815Sopenharmony_ci case LIT_CHAR_RIGHT_BRACE: 852425bb815Sopenharmony_ci { 853425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 854425bb815Sopenharmony_ci { 855425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket.")); 856425bb815Sopenharmony_ci } 857425bb815Sopenharmony_ci 858425bb815Sopenharmony_ci /* FALLTHRU */ 859425bb815Sopenharmony_ci } 860425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 861425bb815Sopenharmony_ci default: 862425bb815Sopenharmony_ci { 863425bb815Sopenharmony_ci re_ctx_p->token.type = RE_TOK_CHAR; 864425bb815Sopenharmony_ci re_ctx_p->token.value = ch; 865425bb815Sopenharmony_ci 866425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 867425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE 868425bb815Sopenharmony_ci && lit_is_code_point_utf16_high_surrogate (ch) 869425bb815Sopenharmony_ci && re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 870425bb815Sopenharmony_ci { 871425bb815Sopenharmony_ci const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p); 872425bb815Sopenharmony_ci if (lit_is_code_point_utf16_low_surrogate (next)) 873425bb815Sopenharmony_ci { 874425bb815Sopenharmony_ci re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point (ch, next); 875425bb815Sopenharmony_ci re_ctx_p->input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT; 876425bb815Sopenharmony_ci } 877425bb815Sopenharmony_ci } 878425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 879425bb815Sopenharmony_ci 880425bb815Sopenharmony_ci /* Check quantifier */ 881425bb815Sopenharmony_ci break; 882425bb815Sopenharmony_ci } 883425bb815Sopenharmony_ci } 884425bb815Sopenharmony_ci 885425bb815Sopenharmony_ci re_parse_quantifier (re_ctx_p); 886425bb815Sopenharmony_ci return re_check_quantifier (re_ctx_p); 887425bb815Sopenharmony_ci} /* re_parse_next_token */ 888425bb815Sopenharmony_ci 889425bb815Sopenharmony_ci/** 890425bb815Sopenharmony_ci * Append a character class range to the bytecode. 891425bb815Sopenharmony_ci */ 892425bb815Sopenharmony_cistatic void 893425bb815Sopenharmony_cire_class_add_range (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 894425bb815Sopenharmony_ci lit_code_point_t start, /**< range begin */ 895425bb815Sopenharmony_ci lit_code_point_t end) /**< range end */ 896425bb815Sopenharmony_ci{ 897425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 898425bb815Sopenharmony_ci { 899425bb815Sopenharmony_ci start = ecma_regexp_canonicalize_char (start, re_ctx_p->flags & RE_FLAG_UNICODE); 900425bb815Sopenharmony_ci end = ecma_regexp_canonicalize_char (end, re_ctx_p->flags & RE_FLAG_UNICODE); 901425bb815Sopenharmony_ci } 902425bb815Sopenharmony_ci 903425bb815Sopenharmony_ci re_append_char (re_ctx_p, start); 904425bb815Sopenharmony_ci re_append_char (re_ctx_p, end); 905425bb815Sopenharmony_ci} /* re_class_add_range */ 906425bb815Sopenharmony_ci 907425bb815Sopenharmony_ci/** 908425bb815Sopenharmony_ci * Add a single character to the character class 909425bb815Sopenharmony_ci */ 910425bb815Sopenharmony_cistatic void 911425bb815Sopenharmony_cire_class_add_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 912425bb815Sopenharmony_ci uint32_t class_offset, /**< character class bytecode offset*/ 913425bb815Sopenharmony_ci lit_code_point_t cp) /**< code point */ 914425bb815Sopenharmony_ci{ 915425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 916425bb815Sopenharmony_ci { 917425bb815Sopenharmony_ci cp = ecma_regexp_canonicalize_char (cp, re_ctx_p->flags & RE_FLAG_UNICODE); 918425bb815Sopenharmony_ci } 919425bb815Sopenharmony_ci 920425bb815Sopenharmony_ci re_insert_char (re_ctx_p, class_offset, cp); 921425bb815Sopenharmony_ci} /* re_class_add_char */ 922425bb815Sopenharmony_ci 923425bb815Sopenharmony_ci/** 924425bb815Sopenharmony_ci * Invalid character code point 925425bb815Sopenharmony_ci */ 926425bb815Sopenharmony_ci#define RE_INVALID_CP 0xFFFFFFFF 927425bb815Sopenharmony_ci 928425bb815Sopenharmony_ci/** 929425bb815Sopenharmony_ci * Read the input pattern and parse the range of character class 930425bb815Sopenharmony_ci * 931425bb815Sopenharmony_ci * @return empty ecma value - if parsed successfully 932425bb815Sopenharmony_ci * error ecma value - otherwise 933425bb815Sopenharmony_ci * 934425bb815Sopenharmony_ci * Returned value must be freed with ecma_free_value 935425bb815Sopenharmony_ci */ 936425bb815Sopenharmony_cistatic ecma_value_t 937425bb815Sopenharmony_cire_parse_char_class (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 938425bb815Sopenharmony_ci{ 939425bb815Sopenharmony_ci static const uint8_t escape_flags[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20}; 940425bb815Sopenharmony_ci const uint32_t class_offset = re_bytecode_size (re_ctx_p); 941425bb815Sopenharmony_ci 942425bb815Sopenharmony_ci uint8_t found_escape_flags = 0; 943425bb815Sopenharmony_ci uint8_t out_class_flags = 0; 944425bb815Sopenharmony_ci 945425bb815Sopenharmony_ci uint32_t range_count = 0; 946425bb815Sopenharmony_ci uint32_t char_count = 0; 947425bb815Sopenharmony_ci bool is_range = false; 948425bb815Sopenharmony_ci 949425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 950425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_CIRCUMFLEX) 951425bb815Sopenharmony_ci { 952425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 953425bb815Sopenharmony_ci out_class_flags |= RE_CLASS_INVERT; 954425bb815Sopenharmony_ci } 955425bb815Sopenharmony_ci 956425bb815Sopenharmony_ci lit_code_point_t start = RE_INVALID_CP; 957425bb815Sopenharmony_ci 958425bb815Sopenharmony_ci while (true) 959425bb815Sopenharmony_ci { 960425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 961425bb815Sopenharmony_ci { 962425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class.")); 963425bb815Sopenharmony_ci } 964425bb815Sopenharmony_ci 965425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_SQUARE) 966425bb815Sopenharmony_ci { 967425bb815Sopenharmony_ci if (is_range) 968425bb815Sopenharmony_ci { 969425bb815Sopenharmony_ci if (start != RE_INVALID_CP) 970425bb815Sopenharmony_ci { 971425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, start); 972425bb815Sopenharmony_ci char_count++; 973425bb815Sopenharmony_ci } 974425bb815Sopenharmony_ci 975425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS); 976425bb815Sopenharmony_ci char_count++; 977425bb815Sopenharmony_ci } 978425bb815Sopenharmony_ci 979425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 980425bb815Sopenharmony_ci break; 981425bb815Sopenharmony_ci } 982425bb815Sopenharmony_ci 983425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 984425bb815Sopenharmony_ci lit_code_point_t current; 985425bb815Sopenharmony_ci 986425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_BACKSLASH) 987425bb815Sopenharmony_ci { 988425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 989425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 990425bb815Sopenharmony_ci { 991425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 992425bb815Sopenharmony_ci } 993425bb815Sopenharmony_ci 994425bb815Sopenharmony_ci if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B) 995425bb815Sopenharmony_ci { 996425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 997425bb815Sopenharmony_ci current = LIT_CHAR_BS; 998425bb815Sopenharmony_ci } 999425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 1000425bb815Sopenharmony_ci else if (*re_ctx_p->input_curr_p == LIT_CHAR_MINUS) 1001425bb815Sopenharmony_ci { 1002425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 1003425bb815Sopenharmony_ci current = LIT_CHAR_MINUS; 1004425bb815Sopenharmony_ci } 1005425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 1006425bb815Sopenharmony_ci else if ((re_ctx_p->flags & RE_FLAG_UNICODE) == 0 1007425bb815Sopenharmony_ci && *re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_C 1008425bb815Sopenharmony_ci && re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p 1009425bb815Sopenharmony_ci && (lit_char_is_decimal_digit (*(re_ctx_p->input_curr_p + 1)) 1010425bb815Sopenharmony_ci || *(re_ctx_p->input_curr_p + 1) == LIT_CHAR_UNDERSCORE)) 1011425bb815Sopenharmony_ci { 1012425bb815Sopenharmony_ci current = ((uint8_t) *(re_ctx_p->input_curr_p + 1) % 32); 1013425bb815Sopenharmony_ci re_ctx_p->input_curr_p += 2; 1014425bb815Sopenharmony_ci } 1015425bb815Sopenharmony_ci else 1016425bb815Sopenharmony_ci { 1017425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (re_parse_char_escape (re_ctx_p))) 1018425bb815Sopenharmony_ci { 1019425bb815Sopenharmony_ci return ECMA_VALUE_ERROR; 1020425bb815Sopenharmony_ci } 1021425bb815Sopenharmony_ci 1022425bb815Sopenharmony_ci if (re_ctx_p->token.type == RE_TOK_CLASS_ESCAPE) 1023425bb815Sopenharmony_ci { 1024425bb815Sopenharmony_ci const uint8_t escape = (uint8_t) re_ctx_p->token.value; 1025425bb815Sopenharmony_ci found_escape_flags |= escape_flags[escape]; 1026425bb815Sopenharmony_ci current = RE_INVALID_CP; 1027425bb815Sopenharmony_ci } 1028425bb815Sopenharmony_ci else 1029425bb815Sopenharmony_ci { 1030425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR); 1031425bb815Sopenharmony_ci current = re_ctx_p->token.value; 1032425bb815Sopenharmony_ci } 1033425bb815Sopenharmony_ci } 1034425bb815Sopenharmony_ci } 1035425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 1036425bb815Sopenharmony_ci else if (re_ctx_p->flags & RE_FLAG_UNICODE) 1037425bb815Sopenharmony_ci { 1038425bb815Sopenharmony_ci current = ecma_regexp_unicode_advance (&re_ctx_p->input_curr_p, re_ctx_p->input_end_p); 1039425bb815Sopenharmony_ci } 1040425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 1041425bb815Sopenharmony_ci else 1042425bb815Sopenharmony_ci { 1043425bb815Sopenharmony_ci current = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 1044425bb815Sopenharmony_ci } 1045425bb815Sopenharmony_ci 1046425bb815Sopenharmony_ci if (is_range) 1047425bb815Sopenharmony_ci { 1048425bb815Sopenharmony_ci is_range = false; 1049425bb815Sopenharmony_ci 1050425bb815Sopenharmony_ci if (start != RE_INVALID_CP && current != RE_INVALID_CP) 1051425bb815Sopenharmony_ci { 1052425bb815Sopenharmony_ci if (start > current) 1053425bb815Sopenharmony_ci { 1054425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Range out of order in character class")); 1055425bb815Sopenharmony_ci } 1056425bb815Sopenharmony_ci 1057425bb815Sopenharmony_ci re_class_add_range (re_ctx_p, start, current); 1058425bb815Sopenharmony_ci range_count++; 1059425bb815Sopenharmony_ci continue; 1060425bb815Sopenharmony_ci } 1061425bb815Sopenharmony_ci 1062425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 1063425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 1064425bb815Sopenharmony_ci { 1065425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid character class")); 1066425bb815Sopenharmony_ci } 1067425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 1068425bb815Sopenharmony_ci 1069425bb815Sopenharmony_ci if (start != RE_INVALID_CP) 1070425bb815Sopenharmony_ci { 1071425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, start); 1072425bb815Sopenharmony_ci char_count++; 1073425bb815Sopenharmony_ci } 1074425bb815Sopenharmony_ci else if (current != RE_INVALID_CP) 1075425bb815Sopenharmony_ci { 1076425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, current); 1077425bb815Sopenharmony_ci char_count++; 1078425bb815Sopenharmony_ci } 1079425bb815Sopenharmony_ci 1080425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS); 1081425bb815Sopenharmony_ci char_count++; 1082425bb815Sopenharmony_ci continue; 1083425bb815Sopenharmony_ci } 1084425bb815Sopenharmony_ci 1085425bb815Sopenharmony_ci if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 1086425bb815Sopenharmony_ci && *re_ctx_p->input_curr_p == LIT_CHAR_MINUS) 1087425bb815Sopenharmony_ci { 1088425bb815Sopenharmony_ci re_ctx_p->input_curr_p++; 1089425bb815Sopenharmony_ci start = current; 1090425bb815Sopenharmony_ci is_range = true; 1091425bb815Sopenharmony_ci continue; 1092425bb815Sopenharmony_ci } 1093425bb815Sopenharmony_ci 1094425bb815Sopenharmony_ci if (current != RE_INVALID_CP) 1095425bb815Sopenharmony_ci { 1096425bb815Sopenharmony_ci re_class_add_char (re_ctx_p, class_offset, current); 1097425bb815Sopenharmony_ci char_count++; 1098425bb815Sopenharmony_ci } 1099425bb815Sopenharmony_ci } 1100425bb815Sopenharmony_ci 1101425bb815Sopenharmony_ci uint8_t escape_count = 0; 1102425bb815Sopenharmony_ci for (ecma_class_escape_t escape = RE_ESCAPE__START; escape < RE_ESCAPE__COUNT; ++escape) 1103425bb815Sopenharmony_ci { 1104425bb815Sopenharmony_ci if (found_escape_flags & escape_flags[escape]) 1105425bb815Sopenharmony_ci { 1106425bb815Sopenharmony_ci re_insert_byte (re_ctx_p, class_offset, (uint8_t) escape); 1107425bb815Sopenharmony_ci escape_count++; 1108425bb815Sopenharmony_ci } 1109425bb815Sopenharmony_ci } 1110425bb815Sopenharmony_ci 1111425bb815Sopenharmony_ci if (range_count > 0) 1112425bb815Sopenharmony_ci { 1113425bb815Sopenharmony_ci re_insert_value (re_ctx_p, class_offset, range_count); 1114425bb815Sopenharmony_ci out_class_flags |= RE_CLASS_HAS_RANGES; 1115425bb815Sopenharmony_ci } 1116425bb815Sopenharmony_ci 1117425bb815Sopenharmony_ci if (char_count > 0) 1118425bb815Sopenharmony_ci { 1119425bb815Sopenharmony_ci re_insert_value (re_ctx_p, class_offset, char_count); 1120425bb815Sopenharmony_ci out_class_flags |= RE_CLASS_HAS_CHARS; 1121425bb815Sopenharmony_ci } 1122425bb815Sopenharmony_ci 1123425bb815Sopenharmony_ci JERRY_ASSERT (escape_count <= RE_CLASS_ESCAPE_COUNT_MASK); 1124425bb815Sopenharmony_ci out_class_flags |= escape_count; 1125425bb815Sopenharmony_ci 1126425bb815Sopenharmony_ci re_insert_byte (re_ctx_p, class_offset, out_class_flags); 1127425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, class_offset, RE_OP_CHAR_CLASS); 1128425bb815Sopenharmony_ci 1129425bb815Sopenharmony_ci re_parse_quantifier (re_ctx_p); 1130425bb815Sopenharmony_ci return re_check_quantifier (re_ctx_p); 1131425bb815Sopenharmony_ci} /* re_parse_char_class */ 1132425bb815Sopenharmony_ci 1133425bb815Sopenharmony_ci/** 1134425bb815Sopenharmony_ci * Parse alternatives 1135425bb815Sopenharmony_ci * 1136425bb815Sopenharmony_ci * @return empty ecma value - if alternative was successfully parsed 1137425bb815Sopenharmony_ci * error ecma value - otherwise 1138425bb815Sopenharmony_ci * 1139425bb815Sopenharmony_ci * Returned value must be freed with ecma_free_value 1140425bb815Sopenharmony_ci */ 1141425bb815Sopenharmony_ciecma_value_t 1142425bb815Sopenharmony_cire_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 1143425bb815Sopenharmony_ci bool expect_eof) /**< expect end of file */ 1144425bb815Sopenharmony_ci{ 1145425bb815Sopenharmony_ci ECMA_CHECK_STACK_USAGE (); 1146425bb815Sopenharmony_ci uint32_t alternative_offset = re_bytecode_size (re_ctx_p); 1147425bb815Sopenharmony_ci bool first_alternative = true; 1148425bb815Sopenharmony_ci 1149425bb815Sopenharmony_ci while (true) 1150425bb815Sopenharmony_ci { 1151425bb815Sopenharmony_ci ecma_value_t next_token_result = re_parse_next_token (re_ctx_p); 1152425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (next_token_result)) 1153425bb815Sopenharmony_ci { 1154425bb815Sopenharmony_ci return next_token_result; 1155425bb815Sopenharmony_ci } 1156425bb815Sopenharmony_ci 1157425bb815Sopenharmony_ci JERRY_ASSERT (ecma_is_value_empty (next_token_result)); 1158425bb815Sopenharmony_ci 1159425bb815Sopenharmony_ci uint32_t atom_offset = re_bytecode_size (re_ctx_p); 1160425bb815Sopenharmony_ci 1161425bb815Sopenharmony_ci switch (re_ctx_p->token.type) 1162425bb815Sopenharmony_ci { 1163425bb815Sopenharmony_ci case RE_TOK_START_CAPTURE_GROUP: 1164425bb815Sopenharmony_ci { 1165425bb815Sopenharmony_ci const uint32_t idx = re_ctx_p->captures_count++; 1166425bb815Sopenharmony_ci const uint32_t capture_start = idx; 1167425bb815Sopenharmony_ci 1168425bb815Sopenharmony_ci ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1169425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (result)) 1170425bb815Sopenharmony_ci { 1171425bb815Sopenharmony_ci return result; 1172425bb815Sopenharmony_ci } 1173425bb815Sopenharmony_ci 1174425bb815Sopenharmony_ci re_parse_quantifier (re_ctx_p); 1175425bb815Sopenharmony_ci 1176425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1177425bb815Sopenharmony_ci { 1178425bb815Sopenharmony_ci return ECMA_VALUE_ERROR; 1179425bb815Sopenharmony_ci } 1180425bb815Sopenharmony_ci 1181425bb815Sopenharmony_ci re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, true); 1182425bb815Sopenharmony_ci break; 1183425bb815Sopenharmony_ci } 1184425bb815Sopenharmony_ci case RE_TOK_START_NON_CAPTURE_GROUP: 1185425bb815Sopenharmony_ci { 1186425bb815Sopenharmony_ci const uint32_t idx = re_ctx_p->non_captures_count++; 1187425bb815Sopenharmony_ci const uint32_t capture_start = re_ctx_p->captures_count; 1188425bb815Sopenharmony_ci 1189425bb815Sopenharmony_ci ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1190425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (result)) 1191425bb815Sopenharmony_ci { 1192425bb815Sopenharmony_ci return result; 1193425bb815Sopenharmony_ci } 1194425bb815Sopenharmony_ci 1195425bb815Sopenharmony_ci re_parse_quantifier (re_ctx_p); 1196425bb815Sopenharmony_ci 1197425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1198425bb815Sopenharmony_ci { 1199425bb815Sopenharmony_ci return ECMA_VALUE_ERROR; 1200425bb815Sopenharmony_ci } 1201425bb815Sopenharmony_ci 1202425bb815Sopenharmony_ci re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, false); 1203425bb815Sopenharmony_ci break; 1204425bb815Sopenharmony_ci } 1205425bb815Sopenharmony_ci case RE_TOK_PERIOD: 1206425bb815Sopenharmony_ci { 1207425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 1208425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, (re_ctx_p->flags & RE_FLAG_UNICODE) ? RE_OP_UNICODE_PERIOD : RE_OP_PERIOD); 1209425bb815Sopenharmony_ci#else /* !ENABLED (JERRY_ES2015) */ 1210425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_PERIOD); 1211425bb815Sopenharmony_ci#endif /* !ENABLED (JERRY_ES2015) */ 1212425bb815Sopenharmony_ci 1213425bb815Sopenharmony_ci re_insert_atom_iterator (re_ctx_p, atom_offset); 1214425bb815Sopenharmony_ci break; 1215425bb815Sopenharmony_ci } 1216425bb815Sopenharmony_ci case RE_TOK_ALTERNATIVE: 1217425bb815Sopenharmony_ci { 1218425bb815Sopenharmony_ci re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1219425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, alternative_offset, first_alternative ? RE_OP_ALTERNATIVE_START 1220425bb815Sopenharmony_ci : RE_OP_ALTERNATIVE_NEXT); 1221425bb815Sopenharmony_ci 1222425bb815Sopenharmony_ci alternative_offset = re_bytecode_size (re_ctx_p); 1223425bb815Sopenharmony_ci first_alternative = false; 1224425bb815Sopenharmony_ci break; 1225425bb815Sopenharmony_ci } 1226425bb815Sopenharmony_ci case RE_TOK_ASSERT_START: 1227425bb815Sopenharmony_ci { 1228425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_START); 1229425bb815Sopenharmony_ci break; 1230425bb815Sopenharmony_ci } 1231425bb815Sopenharmony_ci case RE_TOK_ASSERT_END: 1232425bb815Sopenharmony_ci { 1233425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_END); 1234425bb815Sopenharmony_ci break; 1235425bb815Sopenharmony_ci } 1236425bb815Sopenharmony_ci case RE_TOK_ASSERT_WORD_BOUNDARY: 1237425bb815Sopenharmony_ci { 1238425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY); 1239425bb815Sopenharmony_ci break; 1240425bb815Sopenharmony_ci } 1241425bb815Sopenharmony_ci case RE_TOK_ASSERT_NOT_WORD_BOUNDARY: 1242425bb815Sopenharmony_ci { 1243425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY); 1244425bb815Sopenharmony_ci break; 1245425bb815Sopenharmony_ci } 1246425bb815Sopenharmony_ci case RE_TOK_ASSERT_LOOKAHEAD: 1247425bb815Sopenharmony_ci { 1248425bb815Sopenharmony_ci const uint32_t start_capture_count = re_ctx_p->captures_count; 1249425bb815Sopenharmony_ci const bool is_negative = !!re_ctx_p->token.value; 1250425bb815Sopenharmony_ci 1251425bb815Sopenharmony_ci ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1252425bb815Sopenharmony_ci 1253425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (result)) 1254425bb815Sopenharmony_ci { 1255425bb815Sopenharmony_ci return result; 1256425bb815Sopenharmony_ci } 1257425bb815Sopenharmony_ci 1258425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 1259425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_UNICODE) 1260425bb815Sopenharmony_ci { 1261425bb815Sopenharmony_ci re_ctx_p->token.qmin = 1; 1262425bb815Sopenharmony_ci re_ctx_p->token.qmax = 1; 1263425bb815Sopenharmony_ci re_ctx_p->token.greedy = true; 1264425bb815Sopenharmony_ci } 1265425bb815Sopenharmony_ci else 1266425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 1267425bb815Sopenharmony_ci { 1268425bb815Sopenharmony_ci re_parse_quantifier (re_ctx_p); 1269425bb815Sopenharmony_ci 1270425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1271425bb815Sopenharmony_ci { 1272425bb815Sopenharmony_ci return ECMA_VALUE_ERROR; 1273425bb815Sopenharmony_ci } 1274425bb815Sopenharmony_ci } 1275425bb815Sopenharmony_ci 1276425bb815Sopenharmony_ci re_insert_assertion_lookahead (re_ctx_p, atom_offset, start_capture_count, is_negative); 1277425bb815Sopenharmony_ci break; 1278425bb815Sopenharmony_ci } 1279425bb815Sopenharmony_ci case RE_TOK_BACKREFERENCE: 1280425bb815Sopenharmony_ci { 1281425bb815Sopenharmony_ci const uint32_t backref_idx = re_ctx_p->token.value; 1282425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_BACKREFERENCE); 1283425bb815Sopenharmony_ci re_append_value (re_ctx_p, backref_idx); 1284425bb815Sopenharmony_ci 1285425bb815Sopenharmony_ci if (re_ctx_p->token.qmin != 1 || re_ctx_p->token.qmax != 1) 1286425bb815Sopenharmony_ci { 1287425bb815Sopenharmony_ci const uint32_t group_idx = re_ctx_p->non_captures_count++; 1288425bb815Sopenharmony_ci re_insert_into_group (re_ctx_p, atom_offset, group_idx, re_ctx_p->captures_count, false); 1289425bb815Sopenharmony_ci } 1290425bb815Sopenharmony_ci 1291425bb815Sopenharmony_ci break; 1292425bb815Sopenharmony_ci } 1293425bb815Sopenharmony_ci case RE_TOK_CLASS_ESCAPE: 1294425bb815Sopenharmony_ci { 1295425bb815Sopenharmony_ci const ecma_class_escape_t escape = (ecma_class_escape_t) re_ctx_p->token.value; 1296425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_CLASS_ESCAPE); 1297425bb815Sopenharmony_ci re_append_byte (re_ctx_p, (uint8_t) escape); 1298425bb815Sopenharmony_ci 1299425bb815Sopenharmony_ci re_insert_atom_iterator (re_ctx_p, atom_offset); 1300425bb815Sopenharmony_ci break; 1301425bb815Sopenharmony_ci } 1302425bb815Sopenharmony_ci case RE_TOK_CHAR_CLASS: 1303425bb815Sopenharmony_ci { 1304425bb815Sopenharmony_ci ecma_value_t result = re_parse_char_class (re_ctx_p); 1305425bb815Sopenharmony_ci 1306425bb815Sopenharmony_ci if (ECMA_IS_VALUE_ERROR (result)) 1307425bb815Sopenharmony_ci { 1308425bb815Sopenharmony_ci return result; 1309425bb815Sopenharmony_ci } 1310425bb815Sopenharmony_ci 1311425bb815Sopenharmony_ci re_insert_atom_iterator (re_ctx_p, atom_offset); 1312425bb815Sopenharmony_ci break; 1313425bb815Sopenharmony_ci } 1314425bb815Sopenharmony_ci case RE_TOK_END_GROUP: 1315425bb815Sopenharmony_ci { 1316425bb815Sopenharmony_ci if (expect_eof) 1317425bb815Sopenharmony_ci { 1318425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unmatched ')'")); 1319425bb815Sopenharmony_ci } 1320425bb815Sopenharmony_ci 1321425bb815Sopenharmony_ci if (!first_alternative) 1322425bb815Sopenharmony_ci { 1323425bb815Sopenharmony_ci re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1324425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT); 1325425bb815Sopenharmony_ci } 1326425bb815Sopenharmony_ci 1327425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 1328425bb815Sopenharmony_ci } 1329425bb815Sopenharmony_ci case RE_TOK_EOF: 1330425bb815Sopenharmony_ci { 1331425bb815Sopenharmony_ci if (!expect_eof) 1332425bb815Sopenharmony_ci { 1333425bb815Sopenharmony_ci return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern.")); 1334425bb815Sopenharmony_ci } 1335425bb815Sopenharmony_ci 1336425bb815Sopenharmony_ci if (!first_alternative) 1337425bb815Sopenharmony_ci { 1338425bb815Sopenharmony_ci re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1339425bb815Sopenharmony_ci re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT); 1340425bb815Sopenharmony_ci } 1341425bb815Sopenharmony_ci 1342425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_EOF); 1343425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 1344425bb815Sopenharmony_ci } 1345425bb815Sopenharmony_ci default: 1346425bb815Sopenharmony_ci { 1347425bb815Sopenharmony_ci JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR); 1348425bb815Sopenharmony_ci 1349425bb815Sopenharmony_ci lit_code_point_t ch = re_ctx_p->token.value; 1350425bb815Sopenharmony_ci 1351425bb815Sopenharmony_ci if (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) == 0) 1352425bb815Sopenharmony_ci { 1353425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_BYTE); 1354425bb815Sopenharmony_ci re_append_byte (re_ctx_p, (uint8_t) ch); 1355425bb815Sopenharmony_ci 1356425bb815Sopenharmony_ci re_insert_atom_iterator (re_ctx_p, atom_offset); 1357425bb815Sopenharmony_ci break; 1358425bb815Sopenharmony_ci } 1359425bb815Sopenharmony_ci 1360425bb815Sopenharmony_ci if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 1361425bb815Sopenharmony_ci { 1362425bb815Sopenharmony_ci ch = ecma_regexp_canonicalize_char (ch, re_ctx_p->flags & RE_FLAG_UNICODE); 1363425bb815Sopenharmony_ci } 1364425bb815Sopenharmony_ci 1365425bb815Sopenharmony_ci re_append_opcode (re_ctx_p, RE_OP_CHAR); 1366425bb815Sopenharmony_ci re_append_char (re_ctx_p, ch); 1367425bb815Sopenharmony_ci 1368425bb815Sopenharmony_ci re_insert_atom_iterator (re_ctx_p, atom_offset); 1369425bb815Sopenharmony_ci break; 1370425bb815Sopenharmony_ci } 1371425bb815Sopenharmony_ci } 1372425bb815Sopenharmony_ci } 1373425bb815Sopenharmony_ci 1374425bb815Sopenharmony_ci return ECMA_VALUE_EMPTY; 1375425bb815Sopenharmony_ci} /* re_parse_alternative */ 1376425bb815Sopenharmony_ci 1377425bb815Sopenharmony_ci/** 1378425bb815Sopenharmony_ci * @} 1379425bb815Sopenharmony_ci * @} 1380425bb815Sopenharmony_ci * @} 1381425bb815Sopenharmony_ci */ 1382425bb815Sopenharmony_ci 1383425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 1384