1/* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecma-exceptions.h"
17#include "ecma-globals.h"
18#include "ecma-try-catch-macro.h"
19#include "jcontext.h"
20#include "jrt-libc-includes.h"
21#include "lit-char-helpers.h"
22#include "re-compiler.h"
23#include "re-parser.h"
24
25#if ENABLED (JERRY_BUILTIN_REGEXP)
26
27/** \addtogroup parser Parser
28 * @{
29 *
30 * \addtogroup regexparser Regular expression
31 * @{
32 *
33 * \addtogroup regexparser_parser Parser
34 * @{
35 */
36
37/**
38 * Get the start opcode for the current group.
39 *
40 * @return RegExp opcode
41 */
42static re_opcode_t
43re_get_group_start_opcode (bool is_capturing) /**< is capturing group */
44{
45  return (is_capturing) ? RE_OP_CAPTURING_GROUP_START : RE_OP_NON_CAPTURING_GROUP_START;
46} /* re_get_group_start_opcode*/
47
48/**
49 * Get the end opcode for the current group.
50 *
51 * @return RegExp opcode
52 */
53static re_opcode_t
54re_get_group_end_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
55                         bool is_capturing) /**< is capturing group */
56{
57  if (is_capturing)
58  {
59    if (re_ctx_p->token.greedy)
60    {
61      return RE_OP_GREEDY_CAPTURING_GROUP_END;
62    }
63
64    return RE_OP_LAZY_CAPTURING_GROUP_END;
65  }
66
67  if (re_ctx_p->token.greedy)
68  {
69    return RE_OP_GREEDY_NON_CAPTURING_GROUP_END;
70  }
71
72  return RE_OP_LAZY_NON_CAPTURING_GROUP_END;
73} /* re_get_group_end_opcode */
74
75/**
76 * Enclose the given bytecode to a group.
77 */
78static void
79re_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
80                      uint32_t group_start_offset, /**< offset of group start */
81                      uint32_t idx, /**< index of group */
82                      uint32_t capture_start, /**< index of first nested capture */
83                      bool is_capturing) /**< is capturing group */
84{
85  uint32_t qmin = re_ctx_p->token.qmin;
86  uint32_t qmax = re_ctx_p->token.qmax;
87
88  if (JERRY_UNLIKELY (!is_capturing && re_bytecode_size (re_ctx_p) == group_start_offset))
89  {
90    return;
91  }
92
93  if (qmin == 0)
94  {
95    re_insert_value (re_ctx_p,
96                     group_start_offset,
97                     re_bytecode_size (re_ctx_p) - group_start_offset);
98  }
99
100  re_insert_value (re_ctx_p, group_start_offset, qmin);
101  re_insert_value (re_ctx_p, group_start_offset, re_ctx_p->captures_count - capture_start);
102
103  if (!is_capturing)
104  {
105    re_insert_value (re_ctx_p, group_start_offset, capture_start);
106  }
107  else
108  {
109    JERRY_ASSERT (idx == capture_start);
110  }
111
112  re_insert_value (re_ctx_p, group_start_offset, idx);
113  re_insert_opcode (re_ctx_p, group_start_offset, re_get_group_start_opcode (is_capturing));
114
115  re_append_opcode (re_ctx_p, re_get_group_end_opcode (re_ctx_p, is_capturing));
116  re_append_value (re_ctx_p, idx);
117  re_append_value (re_ctx_p, qmin);
118  re_append_value (re_ctx_p, qmax + RE_QMAX_OFFSET);
119} /* re_insert_into_group */
120
121/**
122 * Insert simple atom iterator.
123 */
124static void
125re_insert_atom_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
126                         uint32_t start_offset) /**< atom start offset */
127{
128  const uint32_t qmin = re_ctx_p->token.qmin;
129  const uint32_t qmax = re_ctx_p->token.qmax;
130
131  if (qmin == 1 && qmax == 1)
132  {
133    return;
134  }
135
136  re_append_opcode (re_ctx_p, RE_OP_ITERATOR_END);
137  re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset);
138  re_insert_value (re_ctx_p, start_offset, qmax + RE_QMAX_OFFSET);
139  re_insert_value (re_ctx_p, start_offset, qmin);
140  re_insert_opcode (re_ctx_p, start_offset, re_ctx_p->token.greedy ? RE_OP_GREEDY_ITERATOR : RE_OP_LAZY_ITERATOR);
141} /* re_insert_atom_iterator */
142
143/**
144 * Insert a lookahead assertion.
145 */
146static void
147re_insert_assertion_lookahead (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
148                               uint32_t start_offset, /**< atom start offset */
149                               uint32_t capture_start, /**< index of first nested capture */
150                               bool negative) /** lookahead type */
151{
152  const uint32_t qmin = re_ctx_p->token.qmin;
153
154  re_append_opcode (re_ctx_p, RE_OP_ASSERT_END);
155  re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset);
156
157  /* We need to clear nested capturing group results when a negative assertion or the tail after a positive assertion
158   * does not match, so we store the begin and end index of nested capturing groups. */
159  re_insert_value (re_ctx_p, start_offset, re_ctx_p->captures_count - capture_start);
160  re_insert_value (re_ctx_p, start_offset, capture_start);
161
162  /* Lookaheads always result in zero length matches, which means iterations will always stop on the first match.
163   * This allows us to not have to deal with iterations beyond one. Either qmin == 0 which will implicitly match,
164   * or qmin > 0, in which case the first iteration will decide whether the assertion matches depending on whether
165   * the iteration matched or not. This also allows us to ignore qmax entirely. */
166  re_insert_byte (re_ctx_p, start_offset, (uint8_t) JERRY_MIN (qmin, 1));
167
168  const re_opcode_t opcode = (negative) ? RE_OP_ASSERT_LOOKAHEAD_NEG : RE_OP_ASSERT_LOOKAHEAD_POS;
169  re_insert_opcode (re_ctx_p, start_offset, opcode);
170} /* re_insert_assertion_lookahead */
171
172/**
173 * Consume non greedy (question mark) character if present.
174 */
175static void
176re_parse_lazy_char (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */
177{
178  if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
179      && *re_ctx_p->input_curr_p == LIT_CHAR_QUESTION)
180  {
181    re_ctx_p->input_curr_p++;
182    re_ctx_p->token.greedy = false;
183    return;
184  }
185
186  re_ctx_p->token.greedy = true;
187} /* re_parse_lazy_char */
188
189/**
190 * Parse a max 3 digit long octal number from the input string, with a decimal value less than 256.
191 *
192 * @return value of the octal number
193 */
194static uint32_t
195re_parse_octal (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */
196{
197  JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
198  JERRY_ASSERT (lit_char_is_octal_digit (*re_ctx_p->input_curr_p));
199
200  uint32_t value = (uint32_t) (*re_ctx_p->input_curr_p++) - LIT_CHAR_0;
201
202  if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
203      && lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
204  {
205    value = value * 8 + (*re_ctx_p->input_curr_p++) - LIT_CHAR_0;
206  }
207
208  if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
209      && lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
210  {
211    const uint32_t new_value = value * 8 + (*re_ctx_p->input_curr_p) - LIT_CHAR_0;
212
213    if (new_value <= RE_MAX_OCTAL_VALUE)
214    {
215      value = new_value;
216      re_ctx_p->input_curr_p++;
217    }
218  }
219
220  return value;
221} /* re_parse_octal */
222
223/**
224 * Check that the currently parsed quantifier is valid.
225 *
226 * @return ECMA_VALUE_ERROR, if quantifier is invalid
227 *         ECMA_VALUE_EMPTY, otherwise
228 */
229static ecma_value_t
230re_check_quantifier (re_compiler_ctx_t *re_ctx_p)
231{
232  if (re_ctx_p->token.qmin > re_ctx_p->token.qmax)
233  {
234    /* ECMA-262 v5.1 15.10.2.5 */
235    return ecma_raise_syntax_error (ECMA_ERR_MSG ("quantifier error: min > max."));
236  }
237
238  return ECMA_VALUE_EMPTY;
239} /* re_check_quantifier */
240
241/**
242 * Parse RegExp quantifier.
243 *
244 * @return ECMA_VALUE_TRUE - if parsed successfully
245 *         ECMA_VALUE_FALSE - otherwise
246 */
247static ecma_value_t
248re_parse_quantifier (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
249{
250  if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
251  {
252    switch (*re_ctx_p->input_curr_p)
253    {
254      case LIT_CHAR_QUESTION:
255      {
256        re_ctx_p->input_curr_p++;
257        re_ctx_p->token.qmin = 0;
258        re_ctx_p->token.qmax = 1;
259
260        re_parse_lazy_char (re_ctx_p);
261        return ECMA_VALUE_TRUE;
262      }
263      case LIT_CHAR_ASTERISK:
264      {
265        re_ctx_p->input_curr_p++;
266        re_ctx_p->token.qmin = 0;
267        re_ctx_p->token.qmax = RE_INFINITY;
268
269        re_parse_lazy_char (re_ctx_p);
270        return ECMA_VALUE_TRUE;
271      }
272      case LIT_CHAR_PLUS:
273      {
274        re_ctx_p->input_curr_p++;
275        re_ctx_p->token.qmin = 1;
276        re_ctx_p->token.qmax = RE_INFINITY;
277
278        re_parse_lazy_char (re_ctx_p);
279        return ECMA_VALUE_TRUE;
280      }
281      case LIT_CHAR_LEFT_BRACE:
282      {
283        const lit_utf8_byte_t *current_p = re_ctx_p->input_curr_p + 1;
284        uint32_t qmin = 0;
285        uint32_t qmax = RE_INFINITY;
286
287        if (current_p >= re_ctx_p->input_end_p)
288        {
289          break;
290        }
291
292        if (!lit_char_is_decimal_digit (*current_p))
293        {
294          break;
295        }
296
297        qmin = lit_parse_decimal (&current_p, re_ctx_p->input_end_p);
298
299        if (current_p >= re_ctx_p->input_end_p)
300        {
301          break;
302        }
303
304        lit_utf8_byte_t ch = *current_p++;
305        if (ch == LIT_CHAR_RIGHT_BRACE)
306        {
307          qmax = qmin;
308        }
309        else if (ch == LIT_CHAR_COMMA)
310        {
311          if (current_p >= re_ctx_p->input_end_p)
312          {
313            break;
314          }
315
316          if (lit_char_is_decimal_digit (*current_p))
317          {
318            qmax = lit_parse_decimal (&current_p, re_ctx_p->input_end_p);
319          }
320
321          if (current_p >= re_ctx_p->input_end_p || *current_p++ != LIT_CHAR_RIGHT_BRACE)
322          {
323            break;
324          }
325        }
326        else
327        {
328          break;
329        }
330
331        re_ctx_p->token.qmin = qmin;
332        re_ctx_p->token.qmax = qmax;
333        re_ctx_p->input_curr_p = current_p;
334        re_parse_lazy_char (re_ctx_p);
335        return ECMA_VALUE_TRUE;
336      }
337      default:
338      {
339        break;
340      }
341    }
342  }
343
344  re_ctx_p->token.qmin = 1;
345  re_ctx_p->token.qmax = 1;
346  re_ctx_p->token.greedy = true;
347
348  return ECMA_VALUE_FALSE;
349} /* re_parse_quantifier */
350
351/**
352 * Count the number of groups in the current pattern.
353 */
354static void
355re_count_groups (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
356{
357  bool is_char_class = 0;
358  re_ctx_p->groups_count = 0;
359  const lit_utf8_byte_t *curr_p = re_ctx_p->input_start_p;
360
361  while (curr_p < re_ctx_p->input_end_p)
362  {
363    switch (*curr_p++)
364    {
365      case LIT_CHAR_BACKSLASH:
366      {
367        if (curr_p < re_ctx_p->input_end_p)
368        {
369          lit_utf8_incr (&curr_p);
370        }
371        break;
372      }
373      case LIT_CHAR_LEFT_SQUARE:
374      {
375        is_char_class = true;
376        break;
377      }
378      case LIT_CHAR_RIGHT_SQUARE:
379      {
380        is_char_class = false;
381        break;
382      }
383      case LIT_CHAR_LEFT_PAREN:
384      {
385        if (curr_p < re_ctx_p->input_end_p
386            && *curr_p != LIT_CHAR_QUESTION
387            && !is_char_class)
388        {
389          re_ctx_p->groups_count++;
390        }
391        break;
392      }
393    }
394  }
395} /* re_count_groups */
396
397#if ENABLED (JERRY_ES2015)
398/**
399 * Check if a code point is a Syntax character
400 *
401 * @return true, if syntax character
402 *         false, otherwise
403 */
404static bool
405re_is_syntax_char (lit_code_point_t cp) /**< code point */
406{
407  return (cp == LIT_CHAR_CIRCUMFLEX
408          || cp == LIT_CHAR_DOLLAR_SIGN
409          || cp == LIT_CHAR_BACKSLASH
410          || cp == LIT_CHAR_DOT
411          || cp == LIT_CHAR_ASTERISK
412          || cp == LIT_CHAR_PLUS
413          || cp == LIT_CHAR_QUESTION
414          || cp == LIT_CHAR_LEFT_PAREN
415          || cp == LIT_CHAR_RIGHT_PAREN
416          || cp == LIT_CHAR_LEFT_SQUARE
417          || cp == LIT_CHAR_RIGHT_SQUARE
418          || cp == LIT_CHAR_LEFT_BRACE
419          || cp == LIT_CHAR_RIGHT_BRACE
420          || cp == LIT_CHAR_VLINE);
421} /* re_is_syntax_char */
422#endif /* ENABLED (JERRY_ES2015) */
423
424/**
425 * Parse a Character Escape or a Character Class Escape.
426 *
427 * @return ECMA_VALUE_EMPTY, if parsed successfully
428 *         ECMA_VALUE_ERROR, otherwise
429 */
430static ecma_value_t
431re_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
432{
433  JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
434  re_ctx_p->token.type = RE_TOK_CHAR;
435
436  if (lit_char_is_decimal_digit (*re_ctx_p->input_curr_p))
437  {
438    /* NULL code point escape, only valid if there are no following digits. */
439    if (*re_ctx_p->input_curr_p == LIT_CHAR_0
440        && (re_ctx_p->input_curr_p + 1 >= re_ctx_p->input_end_p
441            || !lit_char_is_decimal_digit (re_ctx_p->input_curr_p[1])))
442    {
443      re_ctx_p->input_curr_p++;
444      re_ctx_p->token.value = LIT_UNICODE_CODE_POINT_NULL;
445      return ECMA_VALUE_EMPTY;
446    }
447
448#if ENABLED (JERRY_ES2015)
449    if (re_ctx_p->flags & RE_FLAG_UNICODE)
450    {
451      return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape sequence"));
452    }
453#endif /* ENABLED (JERRY_ES2015) */
454
455    /* Legacy octal escape sequence */
456    if (lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
457    {
458      re_ctx_p->token.value = re_parse_octal (re_ctx_p);
459      return ECMA_VALUE_EMPTY;
460    }
461
462    /* Identity escape */
463    re_ctx_p->token.value = *re_ctx_p->input_curr_p++;
464    return ECMA_VALUE_EMPTY;
465  }
466
467  lit_code_point_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
468  switch (ch)
469  {
470    /* Character Class escapes */
471    case LIT_CHAR_LOWERCASE_D:
472    {
473      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
474      re_ctx_p->token.value = RE_ESCAPE_DIGIT;
475      break;
476    }
477    case LIT_CHAR_UPPERCASE_D:
478    {
479      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
480      re_ctx_p->token.value = RE_ESCAPE_NOT_DIGIT;
481      break;
482    }
483    case LIT_CHAR_LOWERCASE_S:
484    {
485      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
486      re_ctx_p->token.value = RE_ESCAPE_WHITESPACE;
487      break;
488    }
489    case LIT_CHAR_UPPERCASE_S:
490    {
491      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
492      re_ctx_p->token.value = RE_ESCAPE_NOT_WHITESPACE;
493      break;
494    }
495    case LIT_CHAR_LOWERCASE_W:
496    {
497      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
498      re_ctx_p->token.value = RE_ESCAPE_WORD_CHAR;
499      break;
500    }
501    case LIT_CHAR_UPPERCASE_W:
502    {
503      re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
504      re_ctx_p->token.value = RE_ESCAPE_NOT_WORD_CHAR;
505      break;
506    }
507    /* Control escapes */
508    case LIT_CHAR_LOWERCASE_F:
509    {
510      re_ctx_p->token.value = LIT_CHAR_FF;
511      break;
512    }
513    case LIT_CHAR_LOWERCASE_N:
514    {
515      re_ctx_p->token.value = LIT_CHAR_LF;
516      break;
517    }
518    case LIT_CHAR_LOWERCASE_R:
519    {
520      re_ctx_p->token.value = LIT_CHAR_CR;
521      break;
522    }
523    case LIT_CHAR_LOWERCASE_T:
524    {
525      re_ctx_p->token.value = LIT_CHAR_TAB;
526      break;
527    }
528    case LIT_CHAR_LOWERCASE_V:
529    {
530      re_ctx_p->token.value = LIT_CHAR_VTAB;
531      break;
532    }
533    /* Control letter */
534    case LIT_CHAR_LOWERCASE_C:
535    {
536      if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
537      {
538        ch = *re_ctx_p->input_curr_p;
539
540        if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
541            || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
542        {
543          re_ctx_p->token.value = (ch % 32);
544          re_ctx_p->input_curr_p++;
545
546          break;
547        }
548      }
549
550#if ENABLED (JERRY_ES2015)
551      if (re_ctx_p->flags & RE_FLAG_UNICODE)
552      {
553        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid control escape sequence"));
554      }
555#endif /* ENABLED (JERRY_ES2015) */
556
557      re_ctx_p->token.value = LIT_CHAR_BACKSLASH;
558      re_ctx_p->input_curr_p--;
559
560      break;
561    }
562    /* Hex escape */
563    case LIT_CHAR_LOWERCASE_X:
564    {
565      uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 2);
566      if (hex_value != UINT32_MAX)
567      {
568        re_ctx_p->token.value = hex_value;
569        re_ctx_p->input_curr_p += 2;
570        break;
571      }
572
573#if ENABLED (JERRY_ES2015)
574      if (re_ctx_p->flags & RE_FLAG_UNICODE)
575      {
576        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid hex escape sequence"));
577      }
578#endif /* ENABLED (JERRY_ES2015) */
579
580      re_ctx_p->token.value = LIT_CHAR_LOWERCASE_X;
581      break;
582    }
583    /* Unicode escape */
584    case LIT_CHAR_LOWERCASE_U:
585    {
586      uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 4);
587      if (hex_value != UINT32_MAX)
588      {
589        re_ctx_p->token.value = hex_value;
590        re_ctx_p->input_curr_p += 4;
591
592#if ENABLED (JERRY_ES2015)
593        if (re_ctx_p->flags & RE_FLAG_UNICODE
594            && lit_is_code_point_utf16_high_surrogate (re_ctx_p->token.value)
595            && re_ctx_p->input_curr_p + 6 <= re_ctx_p->input_end_p
596            && re_ctx_p->input_curr_p[0] == '\\'
597            && re_ctx_p->input_curr_p[1] == 'u')
598        {
599          hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p + 2, re_ctx_p->input_end_p, 4);
600          if (lit_is_code_point_utf16_low_surrogate (hex_value))
601          {
602            re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) re_ctx_p->token.value,
603                                                                              (ecma_char_t) hex_value);
604            re_ctx_p->input_curr_p += 6;
605          }
606        }
607#endif /* ENABLED (JERRY_ES2015) */
608
609        break;
610      }
611
612#if ENABLED (JERRY_ES2015)
613      if (re_ctx_p->flags & RE_FLAG_UNICODE)
614      {
615        if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
616            && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE
617            && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1]))
618        {
619          lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]);
620          re_ctx_p->input_curr_p += 2;
621
622          while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
623          {
624            cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
625
626            if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
627            {
628              return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
629            }
630          }
631
632          if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
633          {
634            re_ctx_p->input_curr_p++;
635            re_ctx_p->token.value = cp;
636            break;
637          }
638        }
639
640        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
641      }
642#endif /* ENABLED (JERRY_ES2015) */
643
644      re_ctx_p->token.value = LIT_CHAR_LOWERCASE_U;
645      break;
646    }
647    /* Identity escape */
648    default:
649    {
650#if ENABLED (JERRY_ES2015)
651      /* Must be '/', or one of SyntaxCharacter */
652      if (re_ctx_p->flags & RE_FLAG_UNICODE
653          && ch != LIT_CHAR_SLASH
654          && !re_is_syntax_char (ch))
655      {
656        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
657      }
658#endif /* ENABLED (JERRY_ES2015) */
659      re_ctx_p->token.value = ch;
660    }
661  }
662
663  return ECMA_VALUE_EMPTY;
664} /* re_parse_char_escape */
665
666/**
667 * Read the input pattern and parse the next token for the RegExp compiler
668 *
669 * @return empty ecma value - if parsed successfully
670 *         error ecma value - otherwise
671 *
672 *         Returned value must be freed with ecma_free_value
673 */
674static ecma_value_t
675re_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
676{
677  if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
678  {
679    re_ctx_p->token.type = RE_TOK_EOF;
680    return ECMA_VALUE_EMPTY;
681  }
682
683  ecma_char_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
684
685  switch (ch)
686  {
687    case LIT_CHAR_CIRCUMFLEX:
688    {
689      re_ctx_p->token.type = RE_TOK_ASSERT_START;
690      return ECMA_VALUE_EMPTY;
691    }
692    case LIT_CHAR_DOLLAR_SIGN:
693    {
694      re_ctx_p->token.type = RE_TOK_ASSERT_END;
695      return ECMA_VALUE_EMPTY;
696    }
697    case LIT_CHAR_VLINE:
698    {
699      re_ctx_p->token.type = RE_TOK_ALTERNATIVE;
700      return ECMA_VALUE_EMPTY;
701    }
702    case LIT_CHAR_DOT:
703    {
704      re_ctx_p->token.type = RE_TOK_PERIOD;
705      /* Check quantifier */
706      break;
707    }
708    case LIT_CHAR_BACKSLASH:
709    {
710      if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
711      {
712        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
713      }
714
715      /* DecimalEscape, Backreferences cannot start with a zero digit. */
716      if (*re_ctx_p->input_curr_p > LIT_CHAR_0 && *re_ctx_p->input_curr_p <= LIT_CHAR_9)
717      {
718        const lit_utf8_byte_t *digits_p = re_ctx_p->input_curr_p;
719        const uint32_t value = lit_parse_decimal (&digits_p, re_ctx_p->input_end_p);
720
721        if (re_ctx_p->groups_count < 0)
722        {
723          re_count_groups (re_ctx_p);
724        }
725
726        if (value <= (uint32_t) re_ctx_p->groups_count)
727        {
728          /* Valid backreference */
729          re_ctx_p->input_curr_p = digits_p;
730          re_ctx_p->token.type = RE_TOK_BACKREFERENCE;
731          re_ctx_p->token.value = value;
732
733          /* Check quantifier */
734          break;
735        }
736      }
737
738      if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B)
739      {
740        re_ctx_p->input_curr_p++;
741        re_ctx_p->token.type = RE_TOK_ASSERT_WORD_BOUNDARY;
742        return ECMA_VALUE_EMPTY;
743      }
744      else if (*re_ctx_p->input_curr_p == LIT_CHAR_UPPERCASE_B)
745      {
746        re_ctx_p->input_curr_p++;
747        re_ctx_p->token.type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY;
748        return ECMA_VALUE_EMPTY;
749      }
750
751      const ecma_value_t parse_result = re_parse_char_escape (re_ctx_p);
752
753      if (ECMA_IS_VALUE_ERROR (parse_result))
754      {
755        return parse_result;
756      }
757
758      /* Check quantifier */
759      break;
760    }
761    case LIT_CHAR_LEFT_PAREN:
762    {
763      if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
764      {
765        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated group"));
766      }
767
768      if (*re_ctx_p->input_curr_p == LIT_CHAR_QUESTION)
769      {
770        re_ctx_p->input_curr_p++;
771        if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
772        {
773          return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group"));
774        }
775
776        ch = *re_ctx_p->input_curr_p++;
777
778        if (ch == LIT_CHAR_EQUALS)
779        {
780          re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD;
781          re_ctx_p->token.value = false;
782        }
783        else if (ch == LIT_CHAR_EXCLAMATION)
784        {
785          re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD;
786          re_ctx_p->token.value = true;
787        }
788        else if (ch == LIT_CHAR_COLON)
789        {
790          re_ctx_p->token.type = RE_TOK_START_NON_CAPTURE_GROUP;
791        }
792        else
793        {
794          return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group"));
795        }
796      }
797      else
798      {
799        re_ctx_p->token.type = RE_TOK_START_CAPTURE_GROUP;
800      }
801
802      return ECMA_VALUE_EMPTY;
803    }
804    case LIT_CHAR_RIGHT_PAREN:
805    {
806      re_ctx_p->token.type = RE_TOK_END_GROUP;
807
808      return ECMA_VALUE_EMPTY;
809    }
810    case LIT_CHAR_LEFT_SQUARE:
811    {
812      re_ctx_p->token.type = RE_TOK_CHAR_CLASS;
813
814      if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
815      {
816        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class."));
817      }
818
819      return ECMA_VALUE_EMPTY;
820    }
821    case LIT_CHAR_QUESTION:
822    case LIT_CHAR_ASTERISK:
823    case LIT_CHAR_PLUS:
824    {
825      return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid quantifier."));
826    }
827    case LIT_CHAR_LEFT_BRACE:
828    {
829      re_ctx_p->input_curr_p--;
830      if (ecma_is_value_true (re_parse_quantifier (re_ctx_p)))
831      {
832        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Nothing to repeat."));
833      }
834
835#if ENABLED (JERRY_ES2015)
836      if (re_ctx_p->flags & RE_FLAG_UNICODE)
837      {
838        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket."));
839      }
840#endif /* ENABLED (JERRY_ES2015) */
841
842      re_ctx_p->input_curr_p++;
843      re_ctx_p->token.type = RE_TOK_CHAR;
844      re_ctx_p->token.value = ch;
845
846      /* Check quantifier */
847      break;
848    }
849#if ENABLED (JERRY_ES2015)
850    case LIT_CHAR_RIGHT_SQUARE:
851    case LIT_CHAR_RIGHT_BRACE:
852    {
853      if (re_ctx_p->flags & RE_FLAG_UNICODE)
854      {
855        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket."));
856      }
857
858      /* FALLTHRU */
859    }
860#endif /* ENABLED (JERRY_ES2015) */
861    default:
862    {
863      re_ctx_p->token.type = RE_TOK_CHAR;
864      re_ctx_p->token.value = ch;
865
866#if ENABLED (JERRY_ES2015)
867      if (re_ctx_p->flags & RE_FLAG_UNICODE
868          && lit_is_code_point_utf16_high_surrogate (ch)
869          && re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
870      {
871        const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p);
872        if (lit_is_code_point_utf16_low_surrogate (next))
873        {
874          re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point (ch, next);
875          re_ctx_p->input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
876        }
877      }
878#endif /* ENABLED (JERRY_ES2015) */
879
880      /* Check quantifier */
881      break;
882    }
883  }
884
885  re_parse_quantifier (re_ctx_p);
886  return re_check_quantifier (re_ctx_p);
887} /* re_parse_next_token */
888
889/**
890 * Append a character class range to the bytecode.
891 */
892static void
893re_class_add_range (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
894                    lit_code_point_t start, /**< range begin */
895                    lit_code_point_t end) /**< range end */
896{
897  if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
898  {
899    start = ecma_regexp_canonicalize_char (start, re_ctx_p->flags & RE_FLAG_UNICODE);
900    end = ecma_regexp_canonicalize_char (end, re_ctx_p->flags & RE_FLAG_UNICODE);
901  }
902
903  re_append_char (re_ctx_p, start);
904  re_append_char (re_ctx_p, end);
905} /* re_class_add_range */
906
907/**
908 * Add a single character to the character class
909 */
910static void
911re_class_add_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
912                    uint32_t class_offset, /**< character class bytecode offset*/
913                    lit_code_point_t cp) /**< code point */
914{
915  if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
916  {
917    cp = ecma_regexp_canonicalize_char (cp, re_ctx_p->flags & RE_FLAG_UNICODE);
918  }
919
920  re_insert_char (re_ctx_p, class_offset, cp);
921} /* re_class_add_char */
922
923/**
924 * Invalid character code point
925 */
926#define RE_INVALID_CP 0xFFFFFFFF
927
928/**
929 * Read the input pattern and parse the range of character class
930 *
931 * @return empty ecma value - if parsed successfully
932 *         error ecma value - otherwise
933 *
934 *         Returned value must be freed with ecma_free_value
935 */
936static ecma_value_t
937re_parse_char_class (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
938{
939  static const uint8_t escape_flags[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20};
940  const uint32_t class_offset = re_bytecode_size (re_ctx_p);
941
942  uint8_t found_escape_flags = 0;
943  uint8_t out_class_flags = 0;
944
945  uint32_t range_count = 0;
946  uint32_t char_count = 0;
947  bool is_range = false;
948
949  JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
950  if (*re_ctx_p->input_curr_p == LIT_CHAR_CIRCUMFLEX)
951  {
952    re_ctx_p->input_curr_p++;
953    out_class_flags |= RE_CLASS_INVERT;
954  }
955
956  lit_code_point_t start = RE_INVALID_CP;
957
958  while (true)
959  {
960    if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
961    {
962      return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class."));
963    }
964
965    if (*re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_SQUARE)
966    {
967      if (is_range)
968      {
969        if (start != RE_INVALID_CP)
970        {
971          re_class_add_char (re_ctx_p, class_offset, start);
972          char_count++;
973        }
974
975        re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS);
976        char_count++;
977      }
978
979      re_ctx_p->input_curr_p++;
980      break;
981    }
982
983    JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
984    lit_code_point_t current;
985
986    if (*re_ctx_p->input_curr_p == LIT_CHAR_BACKSLASH)
987    {
988      re_ctx_p->input_curr_p++;
989      if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
990      {
991        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
992      }
993
994      if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B)
995      {
996        re_ctx_p->input_curr_p++;
997        current = LIT_CHAR_BS;
998      }
999#if ENABLED (JERRY_ES2015)
1000      else if (*re_ctx_p->input_curr_p == LIT_CHAR_MINUS)
1001      {
1002        re_ctx_p->input_curr_p++;
1003        current = LIT_CHAR_MINUS;
1004      }
1005#endif /* ENABLED (JERRY_ES2015) */
1006      else if ((re_ctx_p->flags & RE_FLAG_UNICODE) == 0
1007               && *re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_C
1008               && re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
1009               && (lit_char_is_decimal_digit (*(re_ctx_p->input_curr_p + 1))
1010                   || *(re_ctx_p->input_curr_p + 1) == LIT_CHAR_UNDERSCORE))
1011      {
1012        current = ((uint8_t) *(re_ctx_p->input_curr_p + 1) % 32);
1013        re_ctx_p->input_curr_p += 2;
1014      }
1015      else
1016      {
1017        if (ECMA_IS_VALUE_ERROR (re_parse_char_escape (re_ctx_p)))
1018        {
1019          return ECMA_VALUE_ERROR;
1020        }
1021
1022        if (re_ctx_p->token.type == RE_TOK_CLASS_ESCAPE)
1023        {
1024          const uint8_t escape = (uint8_t) re_ctx_p->token.value;
1025          found_escape_flags |= escape_flags[escape];
1026          current = RE_INVALID_CP;
1027        }
1028        else
1029        {
1030          JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR);
1031          current = re_ctx_p->token.value;
1032        }
1033      }
1034    }
1035#if ENABLED (JERRY_ES2015)
1036    else if (re_ctx_p->flags & RE_FLAG_UNICODE)
1037    {
1038      current = ecma_regexp_unicode_advance (&re_ctx_p->input_curr_p, re_ctx_p->input_end_p);
1039    }
1040#endif /* ENABLED (JERRY_ES2015) */
1041    else
1042    {
1043      current = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
1044    }
1045
1046    if (is_range)
1047    {
1048      is_range = false;
1049
1050      if (start != RE_INVALID_CP && current != RE_INVALID_CP)
1051      {
1052        if (start > current)
1053        {
1054          return ecma_raise_syntax_error (ECMA_ERR_MSG ("Range out of order in character class"));
1055        }
1056
1057        re_class_add_range (re_ctx_p, start, current);
1058        range_count++;
1059        continue;
1060      }
1061
1062#if ENABLED (JERRY_ES2015)
1063      if (re_ctx_p->flags & RE_FLAG_UNICODE)
1064      {
1065        return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid character class"));
1066      }
1067#endif /* ENABLED (JERRY_ES2015) */
1068
1069      if (start != RE_INVALID_CP)
1070      {
1071        re_class_add_char (re_ctx_p, class_offset, start);
1072        char_count++;
1073      }
1074      else if (current != RE_INVALID_CP)
1075      {
1076        re_class_add_char (re_ctx_p, class_offset, current);
1077        char_count++;
1078      }
1079
1080      re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS);
1081      char_count++;
1082      continue;
1083    }
1084
1085    if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
1086        && *re_ctx_p->input_curr_p == LIT_CHAR_MINUS)
1087    {
1088      re_ctx_p->input_curr_p++;
1089      start = current;
1090      is_range = true;
1091      continue;
1092    }
1093
1094    if (current != RE_INVALID_CP)
1095    {
1096      re_class_add_char (re_ctx_p, class_offset, current);
1097      char_count++;
1098    }
1099  }
1100
1101  uint8_t escape_count = 0;
1102  for (ecma_class_escape_t escape = RE_ESCAPE__START; escape < RE_ESCAPE__COUNT; ++escape)
1103  {
1104    if (found_escape_flags & escape_flags[escape])
1105    {
1106      re_insert_byte (re_ctx_p, class_offset, (uint8_t) escape);
1107      escape_count++;
1108    }
1109  }
1110
1111  if (range_count > 0)
1112  {
1113    re_insert_value (re_ctx_p, class_offset, range_count);
1114    out_class_flags |= RE_CLASS_HAS_RANGES;
1115  }
1116
1117  if (char_count > 0)
1118  {
1119    re_insert_value (re_ctx_p, class_offset, char_count);
1120    out_class_flags |= RE_CLASS_HAS_CHARS;
1121  }
1122
1123  JERRY_ASSERT (escape_count <= RE_CLASS_ESCAPE_COUNT_MASK);
1124  out_class_flags |= escape_count;
1125
1126  re_insert_byte (re_ctx_p, class_offset, out_class_flags);
1127  re_insert_opcode (re_ctx_p, class_offset, RE_OP_CHAR_CLASS);
1128
1129  re_parse_quantifier (re_ctx_p);
1130  return re_check_quantifier (re_ctx_p);
1131} /* re_parse_char_class */
1132
1133/**
1134 * Parse alternatives
1135 *
1136 * @return empty ecma value - if alternative was successfully parsed
1137 *         error ecma value - otherwise
1138 *
1139 *         Returned value must be freed with ecma_free_value
1140 */
1141ecma_value_t
1142re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
1143                      bool expect_eof) /**< expect end of file */
1144{
1145  ECMA_CHECK_STACK_USAGE ();
1146  uint32_t alternative_offset = re_bytecode_size (re_ctx_p);
1147  bool first_alternative = true;
1148
1149  while (true)
1150  {
1151    ecma_value_t next_token_result = re_parse_next_token (re_ctx_p);
1152    if (ECMA_IS_VALUE_ERROR (next_token_result))
1153    {
1154      return next_token_result;
1155    }
1156
1157    JERRY_ASSERT (ecma_is_value_empty (next_token_result));
1158
1159    uint32_t atom_offset = re_bytecode_size (re_ctx_p);
1160
1161    switch (re_ctx_p->token.type)
1162    {
1163      case RE_TOK_START_CAPTURE_GROUP:
1164      {
1165        const uint32_t idx = re_ctx_p->captures_count++;
1166        const uint32_t capture_start = idx;
1167
1168        ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1169        if (ECMA_IS_VALUE_ERROR (result))
1170        {
1171          return result;
1172        }
1173
1174        re_parse_quantifier (re_ctx_p);
1175
1176        if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1177        {
1178          return ECMA_VALUE_ERROR;
1179        }
1180
1181        re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, true);
1182        break;
1183      }
1184      case RE_TOK_START_NON_CAPTURE_GROUP:
1185      {
1186        const uint32_t idx = re_ctx_p->non_captures_count++;
1187        const uint32_t capture_start = re_ctx_p->captures_count;
1188
1189        ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1190        if (ECMA_IS_VALUE_ERROR (result))
1191        {
1192          return result;
1193        }
1194
1195        re_parse_quantifier (re_ctx_p);
1196
1197        if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1198        {
1199          return ECMA_VALUE_ERROR;
1200        }
1201
1202        re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, false);
1203        break;
1204      }
1205      case RE_TOK_PERIOD:
1206      {
1207#if ENABLED (JERRY_ES2015)
1208        re_append_opcode (re_ctx_p, (re_ctx_p->flags & RE_FLAG_UNICODE) ? RE_OP_UNICODE_PERIOD : RE_OP_PERIOD);
1209#else /* !ENABLED (JERRY_ES2015) */
1210        re_append_opcode (re_ctx_p, RE_OP_PERIOD);
1211#endif /* !ENABLED (JERRY_ES2015) */
1212
1213        re_insert_atom_iterator (re_ctx_p, atom_offset);
1214        break;
1215      }
1216      case RE_TOK_ALTERNATIVE:
1217      {
1218        re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1219        re_insert_opcode (re_ctx_p, alternative_offset, first_alternative ? RE_OP_ALTERNATIVE_START
1220                                                                          : RE_OP_ALTERNATIVE_NEXT);
1221
1222        alternative_offset = re_bytecode_size (re_ctx_p);
1223        first_alternative = false;
1224        break;
1225      }
1226      case RE_TOK_ASSERT_START:
1227      {
1228        re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_START);
1229        break;
1230      }
1231      case RE_TOK_ASSERT_END:
1232      {
1233        re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_END);
1234        break;
1235      }
1236      case RE_TOK_ASSERT_WORD_BOUNDARY:
1237      {
1238        re_append_opcode (re_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
1239        break;
1240      }
1241      case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
1242      {
1243        re_append_opcode (re_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
1244        break;
1245      }
1246      case RE_TOK_ASSERT_LOOKAHEAD:
1247      {
1248        const uint32_t start_capture_count = re_ctx_p->captures_count;
1249        const bool is_negative = !!re_ctx_p->token.value;
1250
1251        ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1252
1253        if (ECMA_IS_VALUE_ERROR (result))
1254        {
1255          return result;
1256        }
1257
1258#if ENABLED (JERRY_ES2015)
1259        if (re_ctx_p->flags & RE_FLAG_UNICODE)
1260        {
1261          re_ctx_p->token.qmin = 1;
1262          re_ctx_p->token.qmax = 1;
1263          re_ctx_p->token.greedy = true;
1264        }
1265        else
1266#endif /* ENABLED (JERRY_ES2015) */
1267        {
1268          re_parse_quantifier (re_ctx_p);
1269
1270          if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1271          {
1272            return ECMA_VALUE_ERROR;
1273          }
1274        }
1275
1276        re_insert_assertion_lookahead (re_ctx_p, atom_offset, start_capture_count, is_negative);
1277        break;
1278      }
1279      case RE_TOK_BACKREFERENCE:
1280      {
1281        const uint32_t backref_idx = re_ctx_p->token.value;
1282        re_append_opcode (re_ctx_p, RE_OP_BACKREFERENCE);
1283        re_append_value (re_ctx_p, backref_idx);
1284
1285        if (re_ctx_p->token.qmin != 1 || re_ctx_p->token.qmax != 1)
1286        {
1287          const uint32_t group_idx = re_ctx_p->non_captures_count++;
1288          re_insert_into_group (re_ctx_p, atom_offset, group_idx, re_ctx_p->captures_count, false);
1289        }
1290
1291        break;
1292      }
1293      case RE_TOK_CLASS_ESCAPE:
1294      {
1295        const ecma_class_escape_t escape = (ecma_class_escape_t) re_ctx_p->token.value;
1296        re_append_opcode (re_ctx_p, RE_OP_CLASS_ESCAPE);
1297        re_append_byte (re_ctx_p, (uint8_t) escape);
1298
1299        re_insert_atom_iterator (re_ctx_p, atom_offset);
1300        break;
1301      }
1302      case RE_TOK_CHAR_CLASS:
1303      {
1304        ecma_value_t result = re_parse_char_class (re_ctx_p);
1305
1306        if (ECMA_IS_VALUE_ERROR (result))
1307        {
1308          return result;
1309        }
1310
1311        re_insert_atom_iterator (re_ctx_p, atom_offset);
1312        break;
1313      }
1314      case RE_TOK_END_GROUP:
1315      {
1316        if (expect_eof)
1317        {
1318          return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unmatched ')'"));
1319        }
1320
1321        if (!first_alternative)
1322        {
1323          re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1324          re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT);
1325        }
1326
1327        return ECMA_VALUE_EMPTY;
1328      }
1329      case RE_TOK_EOF:
1330      {
1331        if (!expect_eof)
1332        {
1333          return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern."));
1334        }
1335
1336        if (!first_alternative)
1337        {
1338          re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1339          re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT);
1340        }
1341
1342        re_append_opcode (re_ctx_p, RE_OP_EOF);
1343        return ECMA_VALUE_EMPTY;
1344      }
1345      default:
1346      {
1347        JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR);
1348
1349        lit_code_point_t ch = re_ctx_p->token.value;
1350
1351        if (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) == 0)
1352        {
1353          re_append_opcode (re_ctx_p, RE_OP_BYTE);
1354          re_append_byte (re_ctx_p, (uint8_t) ch);
1355
1356          re_insert_atom_iterator (re_ctx_p, atom_offset);
1357          break;
1358        }
1359
1360        if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
1361        {
1362          ch = ecma_regexp_canonicalize_char (ch, re_ctx_p->flags & RE_FLAG_UNICODE);
1363        }
1364
1365        re_append_opcode (re_ctx_p, RE_OP_CHAR);
1366        re_append_char (re_ctx_p, ch);
1367
1368        re_insert_atom_iterator (re_ctx_p, atom_offset);
1369        break;
1370      }
1371    }
1372  }
1373
1374  return ECMA_VALUE_EMPTY;
1375} /* re_parse_alternative */
1376
1377/**
1378 * @}
1379 * @}
1380 * @}
1381 */
1382
1383#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
1384