1425bb815Sopenharmony_ci/* Copyright JS Foundation and other contributors, http://js.foundation
2425bb815Sopenharmony_ci *
3425bb815Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
4425bb815Sopenharmony_ci * you may not use this file except in compliance with the License.
5425bb815Sopenharmony_ci * You may obtain a copy of the License at
6425bb815Sopenharmony_ci *
7425bb815Sopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
8425bb815Sopenharmony_ci *
9425bb815Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
10425bb815Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS
11425bb815Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12425bb815Sopenharmony_ci * See the License for the specific language governing permissions and
13425bb815Sopenharmony_ci * limitations under the License.
14425bb815Sopenharmony_ci */
15425bb815Sopenharmony_ci
16425bb815Sopenharmony_ci#ifndef ECMA_REGEXP_OBJECT_H
17425bb815Sopenharmony_ci#define ECMA_REGEXP_OBJECT_H
18425bb815Sopenharmony_ci
19425bb815Sopenharmony_ci#if ENABLED (JERRY_BUILTIN_REGEXP)
20425bb815Sopenharmony_ci
21425bb815Sopenharmony_ci#include "ecma-globals.h"
22425bb815Sopenharmony_ci#include "re-compiler.h"
23425bb815Sopenharmony_ci
24425bb815Sopenharmony_ci/** \addtogroup ecma ECMA
25425bb815Sopenharmony_ci * @{
26425bb815Sopenharmony_ci *
27425bb815Sopenharmony_ci * \addtogroup ecmaregexpobject ECMA RegExp object related routines
28425bb815Sopenharmony_ci * @{
29425bb815Sopenharmony_ci */
30425bb815Sopenharmony_ci
31425bb815Sopenharmony_ci/**
32425bb815Sopenharmony_ci * RegExp flags
33425bb815Sopenharmony_ci * Note:
34425bb815Sopenharmony_ci *      This enum has to be kept in sync with jerry_regexp_flags_t.
35425bb815Sopenharmony_ci */
36425bb815Sopenharmony_citypedef enum
37425bb815Sopenharmony_ci{
38425bb815Sopenharmony_ci  RE_FLAG_EMPTY = 0u,              /* Empty RegExp flags */
39425bb815Sopenharmony_ci  RE_FLAG_GLOBAL = (1u << 1),      /**< ECMA-262 v5, 15.10.7.2 */
40425bb815Sopenharmony_ci  RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
41425bb815Sopenharmony_ci  RE_FLAG_MULTILINE = (1u << 3),   /**< ECMA-262 v5, 15.10.7.4 */
42425bb815Sopenharmony_ci  RE_FLAG_STICKY = (1u << 4),      /**< ECMA-262 v6, 21.2.5.12 */
43425bb815Sopenharmony_ci  RE_FLAG_UNICODE = (1u << 5)      /**< ECMA-262 v6, 21.2.5.15 */
44425bb815Sopenharmony_ci} ecma_regexp_flags_t;
45425bb815Sopenharmony_ci
46425bb815Sopenharmony_ci/**
47425bb815Sopenharmony_ci * Class escapes
48425bb815Sopenharmony_ci */
49425bb815Sopenharmony_citypedef enum
50425bb815Sopenharmony_ci{
51425bb815Sopenharmony_ci  RE_ESCAPE__START,                   /**< escapes start */
52425bb815Sopenharmony_ci  RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */
53425bb815Sopenharmony_ci  RE_ESCAPE_NOT_DIGIT,                /**< not digit */
54425bb815Sopenharmony_ci  RE_ESCAPE_WORD_CHAR,                /**< word char */
55425bb815Sopenharmony_ci  RE_ESCAPE_NOT_WORD_CHAR,            /**< not word char */
56425bb815Sopenharmony_ci  RE_ESCAPE_WHITESPACE,               /**< whitespace */
57425bb815Sopenharmony_ci  RE_ESCAPE_NOT_WHITESPACE,           /**< not whitespace */
58425bb815Sopenharmony_ci  RE_ESCAPE__COUNT,                   /**< escape count */
59425bb815Sopenharmony_ci} ecma_class_escape_t;
60425bb815Sopenharmony_ci
61425bb815Sopenharmony_ci/**
62425bb815Sopenharmony_ci * Character class flags escape count mask size.
63425bb815Sopenharmony_ci */
64425bb815Sopenharmony_ci#define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u)
65425bb815Sopenharmony_ci
66425bb815Sopenharmony_ci/**
67425bb815Sopenharmony_ci * Character class flags escape count mask.
68425bb815Sopenharmony_ci */
69425bb815Sopenharmony_ci#define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u)
70425bb815Sopenharmony_ci
71425bb815Sopenharmony_ci/**
72425bb815Sopenharmony_ci * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits
73425bb815Sopenharmony_ci * hold a value that contains the number of class escapes present in the character class.
74425bb815Sopenharmony_ci */
75425bb815Sopenharmony_citypedef enum
76425bb815Sopenharmony_ci{
77425bb815Sopenharmony_ci  RE_CLASS_HAS_CHARS = (1 << 5),    /**< contains individual characters */
78425bb815Sopenharmony_ci  RE_CLASS_HAS_RANGES = (1 << 6),   /**< contains character ranges */
79425bb815Sopenharmony_ci  RE_CLASS_INVERT = (1 << 7),       /**< inverted */
80425bb815Sopenharmony_ci} ecma_char_class_flags_t;
81425bb815Sopenharmony_ci
82425bb815Sopenharmony_ci/**
83425bb815Sopenharmony_ci * Structure for matching capturing groups and storing their result
84425bb815Sopenharmony_ci */
85425bb815Sopenharmony_citypedef struct
86425bb815Sopenharmony_ci{
87425bb815Sopenharmony_ci  const lit_utf8_byte_t *begin_p; /**< capture start pointer */
88425bb815Sopenharmony_ci  const lit_utf8_byte_t *end_p;   /**< capture end pointer */
89425bb815Sopenharmony_ci  const uint8_t *bc_p;            /**< group bytecode pointer */
90425bb815Sopenharmony_ci  uint32_t iterator;              /**< iteration counter */
91425bb815Sopenharmony_ci  uint32_t subcapture_count;      /**< number of nested capturing groups */
92425bb815Sopenharmony_ci} ecma_regexp_capture_t;
93425bb815Sopenharmony_ci
94425bb815Sopenharmony_ci/**
95425bb815Sopenharmony_ci * Structure for matching non-capturing groups
96425bb815Sopenharmony_ci */
97425bb815Sopenharmony_citypedef struct
98425bb815Sopenharmony_ci{
99425bb815Sopenharmony_ci  const lit_utf8_byte_t *begin_p; /**< substring start pointer */
100425bb815Sopenharmony_ci  const uint8_t *bc_p;            /**< group bytecode pointer */
101425bb815Sopenharmony_ci  uint32_t iterator;              /**< iteration counter */
102425bb815Sopenharmony_ci  uint32_t subcapture_start;      /**< first nested capturing group index */
103425bb815Sopenharmony_ci  uint32_t subcapture_count;      /**< number of nested capturing groups */
104425bb815Sopenharmony_ci} ecma_regexp_non_capture_t;
105425bb815Sopenharmony_ci
106425bb815Sopenharmony_ci/**
107425bb815Sopenharmony_ci * Check if an ecma_regexp_capture_t contains a defined capture
108425bb815Sopenharmony_ci */
109425bb815Sopenharmony_ci#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL)
110425bb815Sopenharmony_ci
111425bb815Sopenharmony_ciecma_value_t
112425bb815Sopenharmony_ciecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p);
113425bb815Sopenharmony_ci
114425bb815Sopenharmony_ci#if (JERRY_STACK_LIMIT != 0)
115425bb815Sopenharmony_ci/**
116425bb815Sopenharmony_ci * Value used ase result when stack limit is reached
117425bb815Sopenharmony_ci */
118425bb815Sopenharmony_ci#define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX)
119425bb815Sopenharmony_ci
120425bb815Sopenharmony_ci/**
121425bb815Sopenharmony_ci * Checks if the stack limit has been reached during regexp matching
122425bb815Sopenharmony_ci */
123425bb815Sopenharmony_ci#define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK))
124425bb815Sopenharmony_ci#else /* JERRY_STACK_LIMIT == 0 */
125425bb815Sopenharmony_ci#define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
126425bb815Sopenharmony_ci#endif /* JERRY_STACK_LIMIT != 0 */
127425bb815Sopenharmony_ci
128425bb815Sopenharmony_ci/**
129425bb815Sopenharmony_ci * Offset applied to qmax when encoded into the bytecode.
130425bb815Sopenharmony_ci *
131425bb815Sopenharmony_ci * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store
132425bb815Sopenharmony_ci * it in a single byte az zero.
133425bb815Sopenharmony_ci */
134425bb815Sopenharmony_ci#define RE_QMAX_OFFSET 1
135425bb815Sopenharmony_ci
136425bb815Sopenharmony_ci/**
137425bb815Sopenharmony_ci * RegExp executor context
138425bb815Sopenharmony_ci */
139425bb815Sopenharmony_citypedef struct
140425bb815Sopenharmony_ci{
141425bb815Sopenharmony_ci  const lit_utf8_byte_t *input_start_p;        /**< start of input string */
142425bb815Sopenharmony_ci  const lit_utf8_byte_t *input_end_p;          /**< end of input string */
143425bb815Sopenharmony_ci  uint32_t captures_count;                     /**< number of capture groups */
144425bb815Sopenharmony_ci  uint32_t non_captures_count;                 /**< number of non-capture groups */
145425bb815Sopenharmony_ci  ecma_regexp_capture_t *captures_p;           /**< capturing groups */
146425bb815Sopenharmony_ci  ecma_regexp_non_capture_t *non_captures_p;   /**< non-capturing groups */
147425bb815Sopenharmony_ci  uint16_t flags;                              /**< RegExp flags */
148425bb815Sopenharmony_ci  uint8_t char_size;                           /**< size of encoded characters */
149425bb815Sopenharmony_ci} ecma_regexp_ctx_t;
150425bb815Sopenharmony_ci
151425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015)
152425bb815Sopenharmony_cilit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p);
153425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */
154425bb815Sopenharmony_ci
155425bb815Sopenharmony_ciecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p);
156425bb815Sopenharmony_ciecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p,
157425bb815Sopenharmony_ci                                      ecma_string_t *input_string_p);
158425bb815Sopenharmony_ciecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg);
159425bb815Sopenharmony_cilit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode);
160425bb815Sopenharmony_ciecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
161425bb815Sopenharmony_civoid ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p,
162425bb815Sopenharmony_ci                                              ecma_string_t *source_p,
163425bb815Sopenharmony_ci                                              uint16_t flags);
164425bb815Sopenharmony_ciecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg);
165425bb815Sopenharmony_ciecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg);
166425bb815Sopenharmony_ciecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg);
167425bb815Sopenharmony_ciecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg);
168425bb815Sopenharmony_ci
169425bb815Sopenharmony_ciecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p);
170425bb815Sopenharmony_ci
171425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p);
172425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p,
173425bb815Sopenharmony_ci                                                 ecma_value_t pattern_value,
174425bb815Sopenharmony_ci                                                 ecma_value_t flags_value);
175425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p,
176425bb815Sopenharmony_ci                                               ecma_value_t pattern_value,
177425bb815Sopenharmony_ci                                               uint16_t flags);
178425bb815Sopenharmony_ci/**
179425bb815Sopenharmony_ci * @}
180425bb815Sopenharmony_ci * @}
181425bb815Sopenharmony_ci */
182425bb815Sopenharmony_ci
183425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
184425bb815Sopenharmony_ci#endif /* !ECMA_REGEXP_OBJECT_H */
185