1/* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ECMA_REGEXP_OBJECT_H
17#define ECMA_REGEXP_OBJECT_H
18
19#if ENABLED (JERRY_BUILTIN_REGEXP)
20
21#include "ecma-globals.h"
22#include "re-compiler.h"
23
24/** \addtogroup ecma ECMA
25 * @{
26 *
27 * \addtogroup ecmaregexpobject ECMA RegExp object related routines
28 * @{
29 */
30
31/**
32 * RegExp flags
33 * Note:
34 *      This enum has to be kept in sync with jerry_regexp_flags_t.
35 */
36typedef enum
37{
38  RE_FLAG_EMPTY = 0u,              /* Empty RegExp flags */
39  RE_FLAG_GLOBAL = (1u << 1),      /**< ECMA-262 v5, 15.10.7.2 */
40  RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
41  RE_FLAG_MULTILINE = (1u << 3),   /**< ECMA-262 v5, 15.10.7.4 */
42  RE_FLAG_STICKY = (1u << 4),      /**< ECMA-262 v6, 21.2.5.12 */
43  RE_FLAG_UNICODE = (1u << 5)      /**< ECMA-262 v6, 21.2.5.15 */
44} ecma_regexp_flags_t;
45
46/**
47 * Class escapes
48 */
49typedef enum
50{
51  RE_ESCAPE__START,                   /**< escapes start */
52  RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */
53  RE_ESCAPE_NOT_DIGIT,                /**< not digit */
54  RE_ESCAPE_WORD_CHAR,                /**< word char */
55  RE_ESCAPE_NOT_WORD_CHAR,            /**< not word char */
56  RE_ESCAPE_WHITESPACE,               /**< whitespace */
57  RE_ESCAPE_NOT_WHITESPACE,           /**< not whitespace */
58  RE_ESCAPE__COUNT,                   /**< escape count */
59} ecma_class_escape_t;
60
61/**
62 * Character class flags escape count mask size.
63 */
64#define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u)
65
66/**
67 * Character class flags escape count mask.
68 */
69#define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u)
70
71/**
72 * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits
73 * hold a value that contains the number of class escapes present in the character class.
74 */
75typedef enum
76{
77  RE_CLASS_HAS_CHARS = (1 << 5),    /**< contains individual characters */
78  RE_CLASS_HAS_RANGES = (1 << 6),   /**< contains character ranges */
79  RE_CLASS_INVERT = (1 << 7),       /**< inverted */
80} ecma_char_class_flags_t;
81
82/**
83 * Structure for matching capturing groups and storing their result
84 */
85typedef struct
86{
87  const lit_utf8_byte_t *begin_p; /**< capture start pointer */
88  const lit_utf8_byte_t *end_p;   /**< capture end pointer */
89  const uint8_t *bc_p;            /**< group bytecode pointer */
90  uint32_t iterator;              /**< iteration counter */
91  uint32_t subcapture_count;      /**< number of nested capturing groups */
92} ecma_regexp_capture_t;
93
94/**
95 * Structure for matching non-capturing groups
96 */
97typedef struct
98{
99  const lit_utf8_byte_t *begin_p; /**< substring start pointer */
100  const uint8_t *bc_p;            /**< group bytecode pointer */
101  uint32_t iterator;              /**< iteration counter */
102  uint32_t subcapture_start;      /**< first nested capturing group index */
103  uint32_t subcapture_count;      /**< number of nested capturing groups */
104} ecma_regexp_non_capture_t;
105
106/**
107 * Check if an ecma_regexp_capture_t contains a defined capture
108 */
109#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL)
110
111ecma_value_t
112ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p);
113
114#if (JERRY_STACK_LIMIT != 0)
115/**
116 * Value used ase result when stack limit is reached
117 */
118#define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX)
119
120/**
121 * Checks if the stack limit has been reached during regexp matching
122 */
123#define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK))
124#else /* JERRY_STACK_LIMIT == 0 */
125#define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
126#endif /* JERRY_STACK_LIMIT != 0 */
127
128/**
129 * Offset applied to qmax when encoded into the bytecode.
130 *
131 * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store
132 * it in a single byte az zero.
133 */
134#define RE_QMAX_OFFSET 1
135
136/**
137 * RegExp executor context
138 */
139typedef struct
140{
141  const lit_utf8_byte_t *input_start_p;        /**< start of input string */
142  const lit_utf8_byte_t *input_end_p;          /**< end of input string */
143  uint32_t captures_count;                     /**< number of capture groups */
144  uint32_t non_captures_count;                 /**< number of non-capture groups */
145  ecma_regexp_capture_t *captures_p;           /**< capturing groups */
146  ecma_regexp_non_capture_t *non_captures_p;   /**< non-capturing groups */
147  uint16_t flags;                              /**< RegExp flags */
148  uint8_t char_size;                           /**< size of encoded characters */
149} ecma_regexp_ctx_t;
150
151#if ENABLED (JERRY_ES2015)
152lit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p);
153#endif /* ENABLED (JERRY_ES2015) */
154
155ecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p);
156ecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p,
157                                      ecma_string_t *input_string_p);
158ecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg);
159lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode);
160ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
161void ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p,
162                                              ecma_string_t *source_p,
163                                              uint16_t flags);
164ecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg);
165ecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg);
166ecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg);
167ecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg);
168
169ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p);
170
171ecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p);
172ecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p,
173                                                 ecma_value_t pattern_value,
174                                                 ecma_value_t flags_value);
175ecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p,
176                                               ecma_value_t pattern_value,
177                                               uint16_t flags);
178/**
179 * @}
180 * @}
181 */
182
183#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
184#endif /* !ECMA_REGEXP_OBJECT_H */
185