1/* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecma-exceptions.h" 17#include "ecma-globals.h" 18#include "ecma-try-catch-macro.h" 19#include "jcontext.h" 20#include "jrt-libc-includes.h" 21#include "lit-char-helpers.h" 22#include "re-compiler.h" 23#include "re-parser.h" 24 25#if ENABLED (JERRY_BUILTIN_REGEXP) 26 27/** \addtogroup parser Parser 28 * @{ 29 * 30 * \addtogroup regexparser Regular expression 31 * @{ 32 * 33 * \addtogroup regexparser_parser Parser 34 * @{ 35 */ 36 37/** 38 * Get the start opcode for the current group. 39 * 40 * @return RegExp opcode 41 */ 42static re_opcode_t 43re_get_group_start_opcode (bool is_capturing) /**< is capturing group */ 44{ 45 return (is_capturing) ? RE_OP_CAPTURING_GROUP_START : RE_OP_NON_CAPTURING_GROUP_START; 46} /* re_get_group_start_opcode*/ 47 48/** 49 * Get the end opcode for the current group. 50 * 51 * @return RegExp opcode 52 */ 53static re_opcode_t 54re_get_group_end_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 55 bool is_capturing) /**< is capturing group */ 56{ 57 if (is_capturing) 58 { 59 if (re_ctx_p->token.greedy) 60 { 61 return RE_OP_GREEDY_CAPTURING_GROUP_END; 62 } 63 64 return RE_OP_LAZY_CAPTURING_GROUP_END; 65 } 66 67 if (re_ctx_p->token.greedy) 68 { 69 return RE_OP_GREEDY_NON_CAPTURING_GROUP_END; 70 } 71 72 return RE_OP_LAZY_NON_CAPTURING_GROUP_END; 73} /* re_get_group_end_opcode */ 74 75/** 76 * Enclose the given bytecode to a group. 77 */ 78static void 79re_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 80 uint32_t group_start_offset, /**< offset of group start */ 81 uint32_t idx, /**< index of group */ 82 uint32_t capture_start, /**< index of first nested capture */ 83 bool is_capturing) /**< is capturing group */ 84{ 85 uint32_t qmin = re_ctx_p->token.qmin; 86 uint32_t qmax = re_ctx_p->token.qmax; 87 88 if (JERRY_UNLIKELY (!is_capturing && re_bytecode_size (re_ctx_p) == group_start_offset)) 89 { 90 return; 91 } 92 93 if (qmin == 0) 94 { 95 re_insert_value (re_ctx_p, 96 group_start_offset, 97 re_bytecode_size (re_ctx_p) - group_start_offset); 98 } 99 100 re_insert_value (re_ctx_p, group_start_offset, qmin); 101 re_insert_value (re_ctx_p, group_start_offset, re_ctx_p->captures_count - capture_start); 102 103 if (!is_capturing) 104 { 105 re_insert_value (re_ctx_p, group_start_offset, capture_start); 106 } 107 else 108 { 109 JERRY_ASSERT (idx == capture_start); 110 } 111 112 re_insert_value (re_ctx_p, group_start_offset, idx); 113 re_insert_opcode (re_ctx_p, group_start_offset, re_get_group_start_opcode (is_capturing)); 114 115 re_append_opcode (re_ctx_p, re_get_group_end_opcode (re_ctx_p, is_capturing)); 116 re_append_value (re_ctx_p, idx); 117 re_append_value (re_ctx_p, qmin); 118 re_append_value (re_ctx_p, qmax + RE_QMAX_OFFSET); 119} /* re_insert_into_group */ 120 121/** 122 * Insert simple atom iterator. 123 */ 124static void 125re_insert_atom_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 126 uint32_t start_offset) /**< atom start offset */ 127{ 128 const uint32_t qmin = re_ctx_p->token.qmin; 129 const uint32_t qmax = re_ctx_p->token.qmax; 130 131 if (qmin == 1 && qmax == 1) 132 { 133 return; 134 } 135 136 re_append_opcode (re_ctx_p, RE_OP_ITERATOR_END); 137 re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset); 138 re_insert_value (re_ctx_p, start_offset, qmax + RE_QMAX_OFFSET); 139 re_insert_value (re_ctx_p, start_offset, qmin); 140 re_insert_opcode (re_ctx_p, start_offset, re_ctx_p->token.greedy ? RE_OP_GREEDY_ITERATOR : RE_OP_LAZY_ITERATOR); 141} /* re_insert_atom_iterator */ 142 143/** 144 * Insert a lookahead assertion. 145 */ 146static void 147re_insert_assertion_lookahead (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 148 uint32_t start_offset, /**< atom start offset */ 149 uint32_t capture_start, /**< index of first nested capture */ 150 bool negative) /** lookahead type */ 151{ 152 const uint32_t qmin = re_ctx_p->token.qmin; 153 154 re_append_opcode (re_ctx_p, RE_OP_ASSERT_END); 155 re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset); 156 157 /* We need to clear nested capturing group results when a negative assertion or the tail after a positive assertion 158 * does not match, so we store the begin and end index of nested capturing groups. */ 159 re_insert_value (re_ctx_p, start_offset, re_ctx_p->captures_count - capture_start); 160 re_insert_value (re_ctx_p, start_offset, capture_start); 161 162 /* Lookaheads always result in zero length matches, which means iterations will always stop on the first match. 163 * This allows us to not have to deal with iterations beyond one. Either qmin == 0 which will implicitly match, 164 * or qmin > 0, in which case the first iteration will decide whether the assertion matches depending on whether 165 * the iteration matched or not. This also allows us to ignore qmax entirely. */ 166 re_insert_byte (re_ctx_p, start_offset, (uint8_t) JERRY_MIN (qmin, 1)); 167 168 const re_opcode_t opcode = (negative) ? RE_OP_ASSERT_LOOKAHEAD_NEG : RE_OP_ASSERT_LOOKAHEAD_POS; 169 re_insert_opcode (re_ctx_p, start_offset, opcode); 170} /* re_insert_assertion_lookahead */ 171 172/** 173 * Consume non greedy (question mark) character if present. 174 */ 175static void 176re_parse_lazy_char (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */ 177{ 178 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 179 && *re_ctx_p->input_curr_p == LIT_CHAR_QUESTION) 180 { 181 re_ctx_p->input_curr_p++; 182 re_ctx_p->token.greedy = false; 183 return; 184 } 185 186 re_ctx_p->token.greedy = true; 187} /* re_parse_lazy_char */ 188 189/** 190 * Parse a max 3 digit long octal number from the input string, with a decimal value less than 256. 191 * 192 * @return value of the octal number 193 */ 194static uint32_t 195re_parse_octal (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */ 196{ 197 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 198 JERRY_ASSERT (lit_char_is_octal_digit (*re_ctx_p->input_curr_p)); 199 200 uint32_t value = (uint32_t) (*re_ctx_p->input_curr_p++) - LIT_CHAR_0; 201 202 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 203 && lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 204 { 205 value = value * 8 + (*re_ctx_p->input_curr_p++) - LIT_CHAR_0; 206 } 207 208 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 209 && lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 210 { 211 const uint32_t new_value = value * 8 + (*re_ctx_p->input_curr_p) - LIT_CHAR_0; 212 213 if (new_value <= RE_MAX_OCTAL_VALUE) 214 { 215 value = new_value; 216 re_ctx_p->input_curr_p++; 217 } 218 } 219 220 return value; 221} /* re_parse_octal */ 222 223/** 224 * Check that the currently parsed quantifier is valid. 225 * 226 * @return ECMA_VALUE_ERROR, if quantifier is invalid 227 * ECMA_VALUE_EMPTY, otherwise 228 */ 229static ecma_value_t 230re_check_quantifier (re_compiler_ctx_t *re_ctx_p) 231{ 232 if (re_ctx_p->token.qmin > re_ctx_p->token.qmax) 233 { 234 /* ECMA-262 v5.1 15.10.2.5 */ 235 return ecma_raise_syntax_error (ECMA_ERR_MSG ("quantifier error: min > max.")); 236 } 237 238 return ECMA_VALUE_EMPTY; 239} /* re_check_quantifier */ 240 241/** 242 * Parse RegExp quantifier. 243 * 244 * @return ECMA_VALUE_TRUE - if parsed successfully 245 * ECMA_VALUE_FALSE - otherwise 246 */ 247static ecma_value_t 248re_parse_quantifier (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 249{ 250 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 251 { 252 switch (*re_ctx_p->input_curr_p) 253 { 254 case LIT_CHAR_QUESTION: 255 { 256 re_ctx_p->input_curr_p++; 257 re_ctx_p->token.qmin = 0; 258 re_ctx_p->token.qmax = 1; 259 260 re_parse_lazy_char (re_ctx_p); 261 return ECMA_VALUE_TRUE; 262 } 263 case LIT_CHAR_ASTERISK: 264 { 265 re_ctx_p->input_curr_p++; 266 re_ctx_p->token.qmin = 0; 267 re_ctx_p->token.qmax = RE_INFINITY; 268 269 re_parse_lazy_char (re_ctx_p); 270 return ECMA_VALUE_TRUE; 271 } 272 case LIT_CHAR_PLUS: 273 { 274 re_ctx_p->input_curr_p++; 275 re_ctx_p->token.qmin = 1; 276 re_ctx_p->token.qmax = RE_INFINITY; 277 278 re_parse_lazy_char (re_ctx_p); 279 return ECMA_VALUE_TRUE; 280 } 281 case LIT_CHAR_LEFT_BRACE: 282 { 283 const lit_utf8_byte_t *current_p = re_ctx_p->input_curr_p + 1; 284 uint32_t qmin = 0; 285 uint32_t qmax = RE_INFINITY; 286 287 if (current_p >= re_ctx_p->input_end_p) 288 { 289 break; 290 } 291 292 if (!lit_char_is_decimal_digit (*current_p)) 293 { 294 break; 295 } 296 297 qmin = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p); 298 299 if (current_p >= re_ctx_p->input_end_p) 300 { 301 break; 302 } 303 304 lit_utf8_byte_t ch = *current_p++; 305 if (ch == LIT_CHAR_RIGHT_BRACE) 306 { 307 qmax = qmin; 308 } 309 else if (ch == LIT_CHAR_COMMA) 310 { 311 if (current_p >= re_ctx_p->input_end_p) 312 { 313 break; 314 } 315 316 if (lit_char_is_decimal_digit (*current_p)) 317 { 318 qmax = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p); 319 } 320 321 if (current_p >= re_ctx_p->input_end_p || *current_p++ != LIT_CHAR_RIGHT_BRACE) 322 { 323 break; 324 } 325 } 326 else 327 { 328 break; 329 } 330 331 re_ctx_p->token.qmin = qmin; 332 re_ctx_p->token.qmax = qmax; 333 re_ctx_p->input_curr_p = current_p; 334 re_parse_lazy_char (re_ctx_p); 335 return ECMA_VALUE_TRUE; 336 } 337 default: 338 { 339 break; 340 } 341 } 342 } 343 344 re_ctx_p->token.qmin = 1; 345 re_ctx_p->token.qmax = 1; 346 re_ctx_p->token.greedy = true; 347 348 return ECMA_VALUE_FALSE; 349} /* re_parse_quantifier */ 350 351/** 352 * Count the number of groups in the current pattern. 353 */ 354static void 355re_count_groups (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 356{ 357 bool is_char_class = 0; 358 re_ctx_p->groups_count = 0; 359 const lit_utf8_byte_t *curr_p = re_ctx_p->input_start_p; 360 361 while (curr_p < re_ctx_p->input_end_p) 362 { 363 switch (*curr_p++) 364 { 365 case LIT_CHAR_BACKSLASH: 366 { 367 if (curr_p < re_ctx_p->input_end_p) 368 { 369 lit_utf8_incr (&curr_p); 370 } 371 break; 372 } 373 case LIT_CHAR_LEFT_SQUARE: 374 { 375 is_char_class = true; 376 break; 377 } 378 case LIT_CHAR_RIGHT_SQUARE: 379 { 380 is_char_class = false; 381 break; 382 } 383 case LIT_CHAR_LEFT_PAREN: 384 { 385 if (curr_p < re_ctx_p->input_end_p 386 && *curr_p != LIT_CHAR_QUESTION 387 && !is_char_class) 388 { 389 re_ctx_p->groups_count++; 390 } 391 break; 392 } 393 } 394 } 395} /* re_count_groups */ 396 397#if ENABLED (JERRY_ES2015) 398/** 399 * Check if a code point is a Syntax character 400 * 401 * @return true, if syntax character 402 * false, otherwise 403 */ 404static bool 405re_is_syntax_char (lit_code_point_t cp) /**< code point */ 406{ 407 return (cp == LIT_CHAR_CIRCUMFLEX 408 || cp == LIT_CHAR_DOLLAR_SIGN 409 || cp == LIT_CHAR_BACKSLASH 410 || cp == LIT_CHAR_DOT 411 || cp == LIT_CHAR_ASTERISK 412 || cp == LIT_CHAR_PLUS 413 || cp == LIT_CHAR_QUESTION 414 || cp == LIT_CHAR_LEFT_PAREN 415 || cp == LIT_CHAR_RIGHT_PAREN 416 || cp == LIT_CHAR_LEFT_SQUARE 417 || cp == LIT_CHAR_RIGHT_SQUARE 418 || cp == LIT_CHAR_LEFT_BRACE 419 || cp == LIT_CHAR_RIGHT_BRACE 420 || cp == LIT_CHAR_VLINE); 421} /* re_is_syntax_char */ 422#endif /* ENABLED (JERRY_ES2015) */ 423 424/** 425 * Parse a Character Escape or a Character Class Escape. 426 * 427 * @return ECMA_VALUE_EMPTY, if parsed successfully 428 * ECMA_VALUE_ERROR, otherwise 429 */ 430static ecma_value_t 431re_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 432{ 433 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 434 re_ctx_p->token.type = RE_TOK_CHAR; 435 436 if (lit_char_is_decimal_digit (*re_ctx_p->input_curr_p)) 437 { 438 /* NULL code point escape, only valid if there are no following digits. */ 439 if (*re_ctx_p->input_curr_p == LIT_CHAR_0 440 && (re_ctx_p->input_curr_p + 1 >= re_ctx_p->input_end_p 441 || !lit_char_is_decimal_digit (re_ctx_p->input_curr_p[1]))) 442 { 443 re_ctx_p->input_curr_p++; 444 re_ctx_p->token.value = LIT_UNICODE_CODE_POINT_NULL; 445 return ECMA_VALUE_EMPTY; 446 } 447 448#if ENABLED (JERRY_ES2015) 449 if (re_ctx_p->flags & RE_FLAG_UNICODE) 450 { 451 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape sequence")); 452 } 453#endif /* ENABLED (JERRY_ES2015) */ 454 455 /* Legacy octal escape sequence */ 456 if (lit_char_is_octal_digit (*re_ctx_p->input_curr_p)) 457 { 458 re_ctx_p->token.value = re_parse_octal (re_ctx_p); 459 return ECMA_VALUE_EMPTY; 460 } 461 462 /* Identity escape */ 463 re_ctx_p->token.value = *re_ctx_p->input_curr_p++; 464 return ECMA_VALUE_EMPTY; 465 } 466 467 lit_code_point_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 468 switch (ch) 469 { 470 /* Character Class escapes */ 471 case LIT_CHAR_LOWERCASE_D: 472 { 473 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 474 re_ctx_p->token.value = RE_ESCAPE_DIGIT; 475 break; 476 } 477 case LIT_CHAR_UPPERCASE_D: 478 { 479 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 480 re_ctx_p->token.value = RE_ESCAPE_NOT_DIGIT; 481 break; 482 } 483 case LIT_CHAR_LOWERCASE_S: 484 { 485 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 486 re_ctx_p->token.value = RE_ESCAPE_WHITESPACE; 487 break; 488 } 489 case LIT_CHAR_UPPERCASE_S: 490 { 491 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 492 re_ctx_p->token.value = RE_ESCAPE_NOT_WHITESPACE; 493 break; 494 } 495 case LIT_CHAR_LOWERCASE_W: 496 { 497 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 498 re_ctx_p->token.value = RE_ESCAPE_WORD_CHAR; 499 break; 500 } 501 case LIT_CHAR_UPPERCASE_W: 502 { 503 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE; 504 re_ctx_p->token.value = RE_ESCAPE_NOT_WORD_CHAR; 505 break; 506 } 507 /* Control escapes */ 508 case LIT_CHAR_LOWERCASE_F: 509 { 510 re_ctx_p->token.value = LIT_CHAR_FF; 511 break; 512 } 513 case LIT_CHAR_LOWERCASE_N: 514 { 515 re_ctx_p->token.value = LIT_CHAR_LF; 516 break; 517 } 518 case LIT_CHAR_LOWERCASE_R: 519 { 520 re_ctx_p->token.value = LIT_CHAR_CR; 521 break; 522 } 523 case LIT_CHAR_LOWERCASE_T: 524 { 525 re_ctx_p->token.value = LIT_CHAR_TAB; 526 break; 527 } 528 case LIT_CHAR_LOWERCASE_V: 529 { 530 re_ctx_p->token.value = LIT_CHAR_VTAB; 531 break; 532 } 533 /* Control letter */ 534 case LIT_CHAR_LOWERCASE_C: 535 { 536 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 537 { 538 ch = *re_ctx_p->input_curr_p; 539 540 if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END) 541 || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)) 542 { 543 re_ctx_p->token.value = (ch % 32); 544 re_ctx_p->input_curr_p++; 545 546 break; 547 } 548 } 549 550#if ENABLED (JERRY_ES2015) 551 if (re_ctx_p->flags & RE_FLAG_UNICODE) 552 { 553 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid control escape sequence")); 554 } 555#endif /* ENABLED (JERRY_ES2015) */ 556 557 re_ctx_p->token.value = LIT_CHAR_BACKSLASH; 558 re_ctx_p->input_curr_p--; 559 560 break; 561 } 562 /* Hex escape */ 563 case LIT_CHAR_LOWERCASE_X: 564 { 565 uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 2); 566 if (hex_value != UINT32_MAX) 567 { 568 re_ctx_p->token.value = hex_value; 569 re_ctx_p->input_curr_p += 2; 570 break; 571 } 572 573#if ENABLED (JERRY_ES2015) 574 if (re_ctx_p->flags & RE_FLAG_UNICODE) 575 { 576 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid hex escape sequence")); 577 } 578#endif /* ENABLED (JERRY_ES2015) */ 579 580 re_ctx_p->token.value = LIT_CHAR_LOWERCASE_X; 581 break; 582 } 583 /* Unicode escape */ 584 case LIT_CHAR_LOWERCASE_U: 585 { 586 uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 4); 587 if (hex_value != UINT32_MAX) 588 { 589 re_ctx_p->token.value = hex_value; 590 re_ctx_p->input_curr_p += 4; 591 592#if ENABLED (JERRY_ES2015) 593 if (re_ctx_p->flags & RE_FLAG_UNICODE 594 && lit_is_code_point_utf16_high_surrogate (re_ctx_p->token.value) 595 && re_ctx_p->input_curr_p + 6 <= re_ctx_p->input_end_p 596 && re_ctx_p->input_curr_p[0] == '\\' 597 && re_ctx_p->input_curr_p[1] == 'u') 598 { 599 hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p + 2, re_ctx_p->input_end_p, 4); 600 if (lit_is_code_point_utf16_low_surrogate (hex_value)) 601 { 602 re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) re_ctx_p->token.value, 603 (ecma_char_t) hex_value); 604 re_ctx_p->input_curr_p += 6; 605 } 606 } 607#endif /* ENABLED (JERRY_ES2015) */ 608 609 break; 610 } 611 612#if ENABLED (JERRY_ES2015) 613 if (re_ctx_p->flags & RE_FLAG_UNICODE) 614 { 615 if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p 616 && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE 617 && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1])) 618 { 619 lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]); 620 re_ctx_p->input_curr_p += 2; 621 622 while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p)) 623 { 624 cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++); 625 626 if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX)) 627 { 628 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence")); 629 } 630 } 631 632 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE) 633 { 634 re_ctx_p->input_curr_p++; 635 re_ctx_p->token.value = cp; 636 break; 637 } 638 } 639 640 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence")); 641 } 642#endif /* ENABLED (JERRY_ES2015) */ 643 644 re_ctx_p->token.value = LIT_CHAR_LOWERCASE_U; 645 break; 646 } 647 /* Identity escape */ 648 default: 649 { 650#if ENABLED (JERRY_ES2015) 651 /* Must be '/', or one of SyntaxCharacter */ 652 if (re_ctx_p->flags & RE_FLAG_UNICODE 653 && ch != LIT_CHAR_SLASH 654 && !re_is_syntax_char (ch)) 655 { 656 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 657 } 658#endif /* ENABLED (JERRY_ES2015) */ 659 re_ctx_p->token.value = ch; 660 } 661 } 662 663 return ECMA_VALUE_EMPTY; 664} /* re_parse_char_escape */ 665 666/** 667 * Read the input pattern and parse the next token for the RegExp compiler 668 * 669 * @return empty ecma value - if parsed successfully 670 * error ecma value - otherwise 671 * 672 * Returned value must be freed with ecma_free_value 673 */ 674static ecma_value_t 675re_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 676{ 677 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 678 { 679 re_ctx_p->token.type = RE_TOK_EOF; 680 return ECMA_VALUE_EMPTY; 681 } 682 683 ecma_char_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 684 685 switch (ch) 686 { 687 case LIT_CHAR_CIRCUMFLEX: 688 { 689 re_ctx_p->token.type = RE_TOK_ASSERT_START; 690 return ECMA_VALUE_EMPTY; 691 } 692 case LIT_CHAR_DOLLAR_SIGN: 693 { 694 re_ctx_p->token.type = RE_TOK_ASSERT_END; 695 return ECMA_VALUE_EMPTY; 696 } 697 case LIT_CHAR_VLINE: 698 { 699 re_ctx_p->token.type = RE_TOK_ALTERNATIVE; 700 return ECMA_VALUE_EMPTY; 701 } 702 case LIT_CHAR_DOT: 703 { 704 re_ctx_p->token.type = RE_TOK_PERIOD; 705 /* Check quantifier */ 706 break; 707 } 708 case LIT_CHAR_BACKSLASH: 709 { 710 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 711 { 712 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 713 } 714 715 /* DecimalEscape, Backreferences cannot start with a zero digit. */ 716 if (*re_ctx_p->input_curr_p > LIT_CHAR_0 && *re_ctx_p->input_curr_p <= LIT_CHAR_9) 717 { 718 const lit_utf8_byte_t *digits_p = re_ctx_p->input_curr_p; 719 const uint32_t value = lit_parse_decimal (&digits_p, re_ctx_p->input_end_p); 720 721 if (re_ctx_p->groups_count < 0) 722 { 723 re_count_groups (re_ctx_p); 724 } 725 726 if (value <= (uint32_t) re_ctx_p->groups_count) 727 { 728 /* Valid backreference */ 729 re_ctx_p->input_curr_p = digits_p; 730 re_ctx_p->token.type = RE_TOK_BACKREFERENCE; 731 re_ctx_p->token.value = value; 732 733 /* Check quantifier */ 734 break; 735 } 736 } 737 738 if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B) 739 { 740 re_ctx_p->input_curr_p++; 741 re_ctx_p->token.type = RE_TOK_ASSERT_WORD_BOUNDARY; 742 return ECMA_VALUE_EMPTY; 743 } 744 else if (*re_ctx_p->input_curr_p == LIT_CHAR_UPPERCASE_B) 745 { 746 re_ctx_p->input_curr_p++; 747 re_ctx_p->token.type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY; 748 return ECMA_VALUE_EMPTY; 749 } 750 751 const ecma_value_t parse_result = re_parse_char_escape (re_ctx_p); 752 753 if (ECMA_IS_VALUE_ERROR (parse_result)) 754 { 755 return parse_result; 756 } 757 758 /* Check quantifier */ 759 break; 760 } 761 case LIT_CHAR_LEFT_PAREN: 762 { 763 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 764 { 765 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated group")); 766 } 767 768 if (*re_ctx_p->input_curr_p == LIT_CHAR_QUESTION) 769 { 770 re_ctx_p->input_curr_p++; 771 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 772 { 773 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group")); 774 } 775 776 ch = *re_ctx_p->input_curr_p++; 777 778 if (ch == LIT_CHAR_EQUALS) 779 { 780 re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD; 781 re_ctx_p->token.value = false; 782 } 783 else if (ch == LIT_CHAR_EXCLAMATION) 784 { 785 re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD; 786 re_ctx_p->token.value = true; 787 } 788 else if (ch == LIT_CHAR_COLON) 789 { 790 re_ctx_p->token.type = RE_TOK_START_NON_CAPTURE_GROUP; 791 } 792 else 793 { 794 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group")); 795 } 796 } 797 else 798 { 799 re_ctx_p->token.type = RE_TOK_START_CAPTURE_GROUP; 800 } 801 802 return ECMA_VALUE_EMPTY; 803 } 804 case LIT_CHAR_RIGHT_PAREN: 805 { 806 re_ctx_p->token.type = RE_TOK_END_GROUP; 807 808 return ECMA_VALUE_EMPTY; 809 } 810 case LIT_CHAR_LEFT_SQUARE: 811 { 812 re_ctx_p->token.type = RE_TOK_CHAR_CLASS; 813 814 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 815 { 816 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class.")); 817 } 818 819 return ECMA_VALUE_EMPTY; 820 } 821 case LIT_CHAR_QUESTION: 822 case LIT_CHAR_ASTERISK: 823 case LIT_CHAR_PLUS: 824 { 825 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid quantifier.")); 826 } 827 case LIT_CHAR_LEFT_BRACE: 828 { 829 re_ctx_p->input_curr_p--; 830 if (ecma_is_value_true (re_parse_quantifier (re_ctx_p))) 831 { 832 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Nothing to repeat.")); 833 } 834 835#if ENABLED (JERRY_ES2015) 836 if (re_ctx_p->flags & RE_FLAG_UNICODE) 837 { 838 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket.")); 839 } 840#endif /* ENABLED (JERRY_ES2015) */ 841 842 re_ctx_p->input_curr_p++; 843 re_ctx_p->token.type = RE_TOK_CHAR; 844 re_ctx_p->token.value = ch; 845 846 /* Check quantifier */ 847 break; 848 } 849#if ENABLED (JERRY_ES2015) 850 case LIT_CHAR_RIGHT_SQUARE: 851 case LIT_CHAR_RIGHT_BRACE: 852 { 853 if (re_ctx_p->flags & RE_FLAG_UNICODE) 854 { 855 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket.")); 856 } 857 858 /* FALLTHRU */ 859 } 860#endif /* ENABLED (JERRY_ES2015) */ 861 default: 862 { 863 re_ctx_p->token.type = RE_TOK_CHAR; 864 re_ctx_p->token.value = ch; 865 866#if ENABLED (JERRY_ES2015) 867 if (re_ctx_p->flags & RE_FLAG_UNICODE 868 && lit_is_code_point_utf16_high_surrogate (ch) 869 && re_ctx_p->input_curr_p < re_ctx_p->input_end_p) 870 { 871 const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p); 872 if (lit_is_code_point_utf16_low_surrogate (next)) 873 { 874 re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point (ch, next); 875 re_ctx_p->input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT; 876 } 877 } 878#endif /* ENABLED (JERRY_ES2015) */ 879 880 /* Check quantifier */ 881 break; 882 } 883 } 884 885 re_parse_quantifier (re_ctx_p); 886 return re_check_quantifier (re_ctx_p); 887} /* re_parse_next_token */ 888 889/** 890 * Append a character class range to the bytecode. 891 */ 892static void 893re_class_add_range (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 894 lit_code_point_t start, /**< range begin */ 895 lit_code_point_t end) /**< range end */ 896{ 897 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 898 { 899 start = ecma_regexp_canonicalize_char (start, re_ctx_p->flags & RE_FLAG_UNICODE); 900 end = ecma_regexp_canonicalize_char (end, re_ctx_p->flags & RE_FLAG_UNICODE); 901 } 902 903 re_append_char (re_ctx_p, start); 904 re_append_char (re_ctx_p, end); 905} /* re_class_add_range */ 906 907/** 908 * Add a single character to the character class 909 */ 910static void 911re_class_add_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 912 uint32_t class_offset, /**< character class bytecode offset*/ 913 lit_code_point_t cp) /**< code point */ 914{ 915 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 916 { 917 cp = ecma_regexp_canonicalize_char (cp, re_ctx_p->flags & RE_FLAG_UNICODE); 918 } 919 920 re_insert_char (re_ctx_p, class_offset, cp); 921} /* re_class_add_char */ 922 923/** 924 * Invalid character code point 925 */ 926#define RE_INVALID_CP 0xFFFFFFFF 927 928/** 929 * Read the input pattern and parse the range of character class 930 * 931 * @return empty ecma value - if parsed successfully 932 * error ecma value - otherwise 933 * 934 * Returned value must be freed with ecma_free_value 935 */ 936static ecma_value_t 937re_parse_char_class (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */ 938{ 939 static const uint8_t escape_flags[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20}; 940 const uint32_t class_offset = re_bytecode_size (re_ctx_p); 941 942 uint8_t found_escape_flags = 0; 943 uint8_t out_class_flags = 0; 944 945 uint32_t range_count = 0; 946 uint32_t char_count = 0; 947 bool is_range = false; 948 949 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 950 if (*re_ctx_p->input_curr_p == LIT_CHAR_CIRCUMFLEX) 951 { 952 re_ctx_p->input_curr_p++; 953 out_class_flags |= RE_CLASS_INVERT; 954 } 955 956 lit_code_point_t start = RE_INVALID_CP; 957 958 while (true) 959 { 960 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 961 { 962 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class.")); 963 } 964 965 if (*re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_SQUARE) 966 { 967 if (is_range) 968 { 969 if (start != RE_INVALID_CP) 970 { 971 re_class_add_char (re_ctx_p, class_offset, start); 972 char_count++; 973 } 974 975 re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS); 976 char_count++; 977 } 978 979 re_ctx_p->input_curr_p++; 980 break; 981 } 982 983 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p); 984 lit_code_point_t current; 985 986 if (*re_ctx_p->input_curr_p == LIT_CHAR_BACKSLASH) 987 { 988 re_ctx_p->input_curr_p++; 989 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p) 990 { 991 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape")); 992 } 993 994 if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B) 995 { 996 re_ctx_p->input_curr_p++; 997 current = LIT_CHAR_BS; 998 } 999#if ENABLED (JERRY_ES2015) 1000 else if (*re_ctx_p->input_curr_p == LIT_CHAR_MINUS) 1001 { 1002 re_ctx_p->input_curr_p++; 1003 current = LIT_CHAR_MINUS; 1004 } 1005#endif /* ENABLED (JERRY_ES2015) */ 1006 else if ((re_ctx_p->flags & RE_FLAG_UNICODE) == 0 1007 && *re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_C 1008 && re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p 1009 && (lit_char_is_decimal_digit (*(re_ctx_p->input_curr_p + 1)) 1010 || *(re_ctx_p->input_curr_p + 1) == LIT_CHAR_UNDERSCORE)) 1011 { 1012 current = ((uint8_t) *(re_ctx_p->input_curr_p + 1) % 32); 1013 re_ctx_p->input_curr_p += 2; 1014 } 1015 else 1016 { 1017 if (ECMA_IS_VALUE_ERROR (re_parse_char_escape (re_ctx_p))) 1018 { 1019 return ECMA_VALUE_ERROR; 1020 } 1021 1022 if (re_ctx_p->token.type == RE_TOK_CLASS_ESCAPE) 1023 { 1024 const uint8_t escape = (uint8_t) re_ctx_p->token.value; 1025 found_escape_flags |= escape_flags[escape]; 1026 current = RE_INVALID_CP; 1027 } 1028 else 1029 { 1030 JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR); 1031 current = re_ctx_p->token.value; 1032 } 1033 } 1034 } 1035#if ENABLED (JERRY_ES2015) 1036 else if (re_ctx_p->flags & RE_FLAG_UNICODE) 1037 { 1038 current = ecma_regexp_unicode_advance (&re_ctx_p->input_curr_p, re_ctx_p->input_end_p); 1039 } 1040#endif /* ENABLED (JERRY_ES2015) */ 1041 else 1042 { 1043 current = lit_cesu8_read_next (&re_ctx_p->input_curr_p); 1044 } 1045 1046 if (is_range) 1047 { 1048 is_range = false; 1049 1050 if (start != RE_INVALID_CP && current != RE_INVALID_CP) 1051 { 1052 if (start > current) 1053 { 1054 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Range out of order in character class")); 1055 } 1056 1057 re_class_add_range (re_ctx_p, start, current); 1058 range_count++; 1059 continue; 1060 } 1061 1062#if ENABLED (JERRY_ES2015) 1063 if (re_ctx_p->flags & RE_FLAG_UNICODE) 1064 { 1065 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid character class")); 1066 } 1067#endif /* ENABLED (JERRY_ES2015) */ 1068 1069 if (start != RE_INVALID_CP) 1070 { 1071 re_class_add_char (re_ctx_p, class_offset, start); 1072 char_count++; 1073 } 1074 else if (current != RE_INVALID_CP) 1075 { 1076 re_class_add_char (re_ctx_p, class_offset, current); 1077 char_count++; 1078 } 1079 1080 re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS); 1081 char_count++; 1082 continue; 1083 } 1084 1085 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p 1086 && *re_ctx_p->input_curr_p == LIT_CHAR_MINUS) 1087 { 1088 re_ctx_p->input_curr_p++; 1089 start = current; 1090 is_range = true; 1091 continue; 1092 } 1093 1094 if (current != RE_INVALID_CP) 1095 { 1096 re_class_add_char (re_ctx_p, class_offset, current); 1097 char_count++; 1098 } 1099 } 1100 1101 uint8_t escape_count = 0; 1102 for (ecma_class_escape_t escape = RE_ESCAPE__START; escape < RE_ESCAPE__COUNT; ++escape) 1103 { 1104 if (found_escape_flags & escape_flags[escape]) 1105 { 1106 re_insert_byte (re_ctx_p, class_offset, (uint8_t) escape); 1107 escape_count++; 1108 } 1109 } 1110 1111 if (range_count > 0) 1112 { 1113 re_insert_value (re_ctx_p, class_offset, range_count); 1114 out_class_flags |= RE_CLASS_HAS_RANGES; 1115 } 1116 1117 if (char_count > 0) 1118 { 1119 re_insert_value (re_ctx_p, class_offset, char_count); 1120 out_class_flags |= RE_CLASS_HAS_CHARS; 1121 } 1122 1123 JERRY_ASSERT (escape_count <= RE_CLASS_ESCAPE_COUNT_MASK); 1124 out_class_flags |= escape_count; 1125 1126 re_insert_byte (re_ctx_p, class_offset, out_class_flags); 1127 re_insert_opcode (re_ctx_p, class_offset, RE_OP_CHAR_CLASS); 1128 1129 re_parse_quantifier (re_ctx_p); 1130 return re_check_quantifier (re_ctx_p); 1131} /* re_parse_char_class */ 1132 1133/** 1134 * Parse alternatives 1135 * 1136 * @return empty ecma value - if alternative was successfully parsed 1137 * error ecma value - otherwise 1138 * 1139 * Returned value must be freed with ecma_free_value 1140 */ 1141ecma_value_t 1142re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ 1143 bool expect_eof) /**< expect end of file */ 1144{ 1145 ECMA_CHECK_STACK_USAGE (); 1146 uint32_t alternative_offset = re_bytecode_size (re_ctx_p); 1147 bool first_alternative = true; 1148 1149 while (true) 1150 { 1151 ecma_value_t next_token_result = re_parse_next_token (re_ctx_p); 1152 if (ECMA_IS_VALUE_ERROR (next_token_result)) 1153 { 1154 return next_token_result; 1155 } 1156 1157 JERRY_ASSERT (ecma_is_value_empty (next_token_result)); 1158 1159 uint32_t atom_offset = re_bytecode_size (re_ctx_p); 1160 1161 switch (re_ctx_p->token.type) 1162 { 1163 case RE_TOK_START_CAPTURE_GROUP: 1164 { 1165 const uint32_t idx = re_ctx_p->captures_count++; 1166 const uint32_t capture_start = idx; 1167 1168 ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1169 if (ECMA_IS_VALUE_ERROR (result)) 1170 { 1171 return result; 1172 } 1173 1174 re_parse_quantifier (re_ctx_p); 1175 1176 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1177 { 1178 return ECMA_VALUE_ERROR; 1179 } 1180 1181 re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, true); 1182 break; 1183 } 1184 case RE_TOK_START_NON_CAPTURE_GROUP: 1185 { 1186 const uint32_t idx = re_ctx_p->non_captures_count++; 1187 const uint32_t capture_start = re_ctx_p->captures_count; 1188 1189 ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1190 if (ECMA_IS_VALUE_ERROR (result)) 1191 { 1192 return result; 1193 } 1194 1195 re_parse_quantifier (re_ctx_p); 1196 1197 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1198 { 1199 return ECMA_VALUE_ERROR; 1200 } 1201 1202 re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, false); 1203 break; 1204 } 1205 case RE_TOK_PERIOD: 1206 { 1207#if ENABLED (JERRY_ES2015) 1208 re_append_opcode (re_ctx_p, (re_ctx_p->flags & RE_FLAG_UNICODE) ? RE_OP_UNICODE_PERIOD : RE_OP_PERIOD); 1209#else /* !ENABLED (JERRY_ES2015) */ 1210 re_append_opcode (re_ctx_p, RE_OP_PERIOD); 1211#endif /* !ENABLED (JERRY_ES2015) */ 1212 1213 re_insert_atom_iterator (re_ctx_p, atom_offset); 1214 break; 1215 } 1216 case RE_TOK_ALTERNATIVE: 1217 { 1218 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1219 re_insert_opcode (re_ctx_p, alternative_offset, first_alternative ? RE_OP_ALTERNATIVE_START 1220 : RE_OP_ALTERNATIVE_NEXT); 1221 1222 alternative_offset = re_bytecode_size (re_ctx_p); 1223 first_alternative = false; 1224 break; 1225 } 1226 case RE_TOK_ASSERT_START: 1227 { 1228 re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_START); 1229 break; 1230 } 1231 case RE_TOK_ASSERT_END: 1232 { 1233 re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_END); 1234 break; 1235 } 1236 case RE_TOK_ASSERT_WORD_BOUNDARY: 1237 { 1238 re_append_opcode (re_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY); 1239 break; 1240 } 1241 case RE_TOK_ASSERT_NOT_WORD_BOUNDARY: 1242 { 1243 re_append_opcode (re_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY); 1244 break; 1245 } 1246 case RE_TOK_ASSERT_LOOKAHEAD: 1247 { 1248 const uint32_t start_capture_count = re_ctx_p->captures_count; 1249 const bool is_negative = !!re_ctx_p->token.value; 1250 1251 ecma_value_t result = re_parse_alternative (re_ctx_p, false); 1252 1253 if (ECMA_IS_VALUE_ERROR (result)) 1254 { 1255 return result; 1256 } 1257 1258#if ENABLED (JERRY_ES2015) 1259 if (re_ctx_p->flags & RE_FLAG_UNICODE) 1260 { 1261 re_ctx_p->token.qmin = 1; 1262 re_ctx_p->token.qmax = 1; 1263 re_ctx_p->token.greedy = true; 1264 } 1265 else 1266#endif /* ENABLED (JERRY_ES2015) */ 1267 { 1268 re_parse_quantifier (re_ctx_p); 1269 1270 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p))) 1271 { 1272 return ECMA_VALUE_ERROR; 1273 } 1274 } 1275 1276 re_insert_assertion_lookahead (re_ctx_p, atom_offset, start_capture_count, is_negative); 1277 break; 1278 } 1279 case RE_TOK_BACKREFERENCE: 1280 { 1281 const uint32_t backref_idx = re_ctx_p->token.value; 1282 re_append_opcode (re_ctx_p, RE_OP_BACKREFERENCE); 1283 re_append_value (re_ctx_p, backref_idx); 1284 1285 if (re_ctx_p->token.qmin != 1 || re_ctx_p->token.qmax != 1) 1286 { 1287 const uint32_t group_idx = re_ctx_p->non_captures_count++; 1288 re_insert_into_group (re_ctx_p, atom_offset, group_idx, re_ctx_p->captures_count, false); 1289 } 1290 1291 break; 1292 } 1293 case RE_TOK_CLASS_ESCAPE: 1294 { 1295 const ecma_class_escape_t escape = (ecma_class_escape_t) re_ctx_p->token.value; 1296 re_append_opcode (re_ctx_p, RE_OP_CLASS_ESCAPE); 1297 re_append_byte (re_ctx_p, (uint8_t) escape); 1298 1299 re_insert_atom_iterator (re_ctx_p, atom_offset); 1300 break; 1301 } 1302 case RE_TOK_CHAR_CLASS: 1303 { 1304 ecma_value_t result = re_parse_char_class (re_ctx_p); 1305 1306 if (ECMA_IS_VALUE_ERROR (result)) 1307 { 1308 return result; 1309 } 1310 1311 re_insert_atom_iterator (re_ctx_p, atom_offset); 1312 break; 1313 } 1314 case RE_TOK_END_GROUP: 1315 { 1316 if (expect_eof) 1317 { 1318 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unmatched ')'")); 1319 } 1320 1321 if (!first_alternative) 1322 { 1323 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1324 re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT); 1325 } 1326 1327 return ECMA_VALUE_EMPTY; 1328 } 1329 case RE_TOK_EOF: 1330 { 1331 if (!expect_eof) 1332 { 1333 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern.")); 1334 } 1335 1336 if (!first_alternative) 1337 { 1338 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset); 1339 re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT); 1340 } 1341 1342 re_append_opcode (re_ctx_p, RE_OP_EOF); 1343 return ECMA_VALUE_EMPTY; 1344 } 1345 default: 1346 { 1347 JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR); 1348 1349 lit_code_point_t ch = re_ctx_p->token.value; 1350 1351 if (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) == 0) 1352 { 1353 re_append_opcode (re_ctx_p, RE_OP_BYTE); 1354 re_append_byte (re_ctx_p, (uint8_t) ch); 1355 1356 re_insert_atom_iterator (re_ctx_p, atom_offset); 1357 break; 1358 } 1359 1360 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) 1361 { 1362 ch = ecma_regexp_canonicalize_char (ch, re_ctx_p->flags & RE_FLAG_UNICODE); 1363 } 1364 1365 re_append_opcode (re_ctx_p, RE_OP_CHAR); 1366 re_append_char (re_ctx_p, ch); 1367 1368 re_insert_atom_iterator (re_ctx_p, atom_offset); 1369 break; 1370 } 1371 } 1372 } 1373 1374 return ECMA_VALUE_EMPTY; 1375} /* re_parse_alternative */ 1376 1377/** 1378 * @} 1379 * @} 1380 * @} 1381 */ 1382 1383#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 1384