1425bb815Sopenharmony_ci/* Copyright JS Foundation and other contributors, http://js.foundation 2425bb815Sopenharmony_ci * 3425bb815Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 4425bb815Sopenharmony_ci * you may not use this file except in compliance with the License. 5425bb815Sopenharmony_ci * You may obtain a copy of the License at 6425bb815Sopenharmony_ci * 7425bb815Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 8425bb815Sopenharmony_ci * 9425bb815Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 10425bb815Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS 11425bb815Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12425bb815Sopenharmony_ci * See the License for the specific language governing permissions and 13425bb815Sopenharmony_ci * limitations under the License. 14425bb815Sopenharmony_ci */ 15425bb815Sopenharmony_ci 16425bb815Sopenharmony_ci#ifndef ECMA_REGEXP_OBJECT_H 17425bb815Sopenharmony_ci#define ECMA_REGEXP_OBJECT_H 18425bb815Sopenharmony_ci 19425bb815Sopenharmony_ci#if ENABLED (JERRY_BUILTIN_REGEXP) 20425bb815Sopenharmony_ci 21425bb815Sopenharmony_ci#include "ecma-globals.h" 22425bb815Sopenharmony_ci#include "re-compiler.h" 23425bb815Sopenharmony_ci 24425bb815Sopenharmony_ci/** \addtogroup ecma ECMA 25425bb815Sopenharmony_ci * @{ 26425bb815Sopenharmony_ci * 27425bb815Sopenharmony_ci * \addtogroup ecmaregexpobject ECMA RegExp object related routines 28425bb815Sopenharmony_ci * @{ 29425bb815Sopenharmony_ci */ 30425bb815Sopenharmony_ci 31425bb815Sopenharmony_ci/** 32425bb815Sopenharmony_ci * RegExp flags 33425bb815Sopenharmony_ci * Note: 34425bb815Sopenharmony_ci * This enum has to be kept in sync with jerry_regexp_flags_t. 35425bb815Sopenharmony_ci */ 36425bb815Sopenharmony_citypedef enum 37425bb815Sopenharmony_ci{ 38425bb815Sopenharmony_ci RE_FLAG_EMPTY = 0u, /* Empty RegExp flags */ 39425bb815Sopenharmony_ci RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */ 40425bb815Sopenharmony_ci RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */ 41425bb815Sopenharmony_ci RE_FLAG_MULTILINE = (1u << 3), /**< ECMA-262 v5, 15.10.7.4 */ 42425bb815Sopenharmony_ci RE_FLAG_STICKY = (1u << 4), /**< ECMA-262 v6, 21.2.5.12 */ 43425bb815Sopenharmony_ci RE_FLAG_UNICODE = (1u << 5) /**< ECMA-262 v6, 21.2.5.15 */ 44425bb815Sopenharmony_ci} ecma_regexp_flags_t; 45425bb815Sopenharmony_ci 46425bb815Sopenharmony_ci/** 47425bb815Sopenharmony_ci * Class escapes 48425bb815Sopenharmony_ci */ 49425bb815Sopenharmony_citypedef enum 50425bb815Sopenharmony_ci{ 51425bb815Sopenharmony_ci RE_ESCAPE__START, /**< escapes start */ 52425bb815Sopenharmony_ci RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */ 53425bb815Sopenharmony_ci RE_ESCAPE_NOT_DIGIT, /**< not digit */ 54425bb815Sopenharmony_ci RE_ESCAPE_WORD_CHAR, /**< word char */ 55425bb815Sopenharmony_ci RE_ESCAPE_NOT_WORD_CHAR, /**< not word char */ 56425bb815Sopenharmony_ci RE_ESCAPE_WHITESPACE, /**< whitespace */ 57425bb815Sopenharmony_ci RE_ESCAPE_NOT_WHITESPACE, /**< not whitespace */ 58425bb815Sopenharmony_ci RE_ESCAPE__COUNT, /**< escape count */ 59425bb815Sopenharmony_ci} ecma_class_escape_t; 60425bb815Sopenharmony_ci 61425bb815Sopenharmony_ci/** 62425bb815Sopenharmony_ci * Character class flags escape count mask size. 63425bb815Sopenharmony_ci */ 64425bb815Sopenharmony_ci#define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u) 65425bb815Sopenharmony_ci 66425bb815Sopenharmony_ci/** 67425bb815Sopenharmony_ci * Character class flags escape count mask. 68425bb815Sopenharmony_ci */ 69425bb815Sopenharmony_ci#define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u) 70425bb815Sopenharmony_ci 71425bb815Sopenharmony_ci/** 72425bb815Sopenharmony_ci * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits 73425bb815Sopenharmony_ci * hold a value that contains the number of class escapes present in the character class. 74425bb815Sopenharmony_ci */ 75425bb815Sopenharmony_citypedef enum 76425bb815Sopenharmony_ci{ 77425bb815Sopenharmony_ci RE_CLASS_HAS_CHARS = (1 << 5), /**< contains individual characters */ 78425bb815Sopenharmony_ci RE_CLASS_HAS_RANGES = (1 << 6), /**< contains character ranges */ 79425bb815Sopenharmony_ci RE_CLASS_INVERT = (1 << 7), /**< inverted */ 80425bb815Sopenharmony_ci} ecma_char_class_flags_t; 81425bb815Sopenharmony_ci 82425bb815Sopenharmony_ci/** 83425bb815Sopenharmony_ci * Structure for matching capturing groups and storing their result 84425bb815Sopenharmony_ci */ 85425bb815Sopenharmony_citypedef struct 86425bb815Sopenharmony_ci{ 87425bb815Sopenharmony_ci const lit_utf8_byte_t *begin_p; /**< capture start pointer */ 88425bb815Sopenharmony_ci const lit_utf8_byte_t *end_p; /**< capture end pointer */ 89425bb815Sopenharmony_ci const uint8_t *bc_p; /**< group bytecode pointer */ 90425bb815Sopenharmony_ci uint32_t iterator; /**< iteration counter */ 91425bb815Sopenharmony_ci uint32_t subcapture_count; /**< number of nested capturing groups */ 92425bb815Sopenharmony_ci} ecma_regexp_capture_t; 93425bb815Sopenharmony_ci 94425bb815Sopenharmony_ci/** 95425bb815Sopenharmony_ci * Structure for matching non-capturing groups 96425bb815Sopenharmony_ci */ 97425bb815Sopenharmony_citypedef struct 98425bb815Sopenharmony_ci{ 99425bb815Sopenharmony_ci const lit_utf8_byte_t *begin_p; /**< substring start pointer */ 100425bb815Sopenharmony_ci const uint8_t *bc_p; /**< group bytecode pointer */ 101425bb815Sopenharmony_ci uint32_t iterator; /**< iteration counter */ 102425bb815Sopenharmony_ci uint32_t subcapture_start; /**< first nested capturing group index */ 103425bb815Sopenharmony_ci uint32_t subcapture_count; /**< number of nested capturing groups */ 104425bb815Sopenharmony_ci} ecma_regexp_non_capture_t; 105425bb815Sopenharmony_ci 106425bb815Sopenharmony_ci/** 107425bb815Sopenharmony_ci * Check if an ecma_regexp_capture_t contains a defined capture 108425bb815Sopenharmony_ci */ 109425bb815Sopenharmony_ci#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL) 110425bb815Sopenharmony_ci 111425bb815Sopenharmony_ciecma_value_t 112425bb815Sopenharmony_ciecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p); 113425bb815Sopenharmony_ci 114425bb815Sopenharmony_ci#if (JERRY_STACK_LIMIT != 0) 115425bb815Sopenharmony_ci/** 116425bb815Sopenharmony_ci * Value used ase result when stack limit is reached 117425bb815Sopenharmony_ci */ 118425bb815Sopenharmony_ci#define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX) 119425bb815Sopenharmony_ci 120425bb815Sopenharmony_ci/** 121425bb815Sopenharmony_ci * Checks if the stack limit has been reached during regexp matching 122425bb815Sopenharmony_ci */ 123425bb815Sopenharmony_ci#define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK)) 124425bb815Sopenharmony_ci#else /* JERRY_STACK_LIMIT == 0 */ 125425bb815Sopenharmony_ci#define ECMA_RE_STACK_LIMIT_REACHED(p) (false) 126425bb815Sopenharmony_ci#endif /* JERRY_STACK_LIMIT != 0 */ 127425bb815Sopenharmony_ci 128425bb815Sopenharmony_ci/** 129425bb815Sopenharmony_ci * Offset applied to qmax when encoded into the bytecode. 130425bb815Sopenharmony_ci * 131425bb815Sopenharmony_ci * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store 132425bb815Sopenharmony_ci * it in a single byte az zero. 133425bb815Sopenharmony_ci */ 134425bb815Sopenharmony_ci#define RE_QMAX_OFFSET 1 135425bb815Sopenharmony_ci 136425bb815Sopenharmony_ci/** 137425bb815Sopenharmony_ci * RegExp executor context 138425bb815Sopenharmony_ci */ 139425bb815Sopenharmony_citypedef struct 140425bb815Sopenharmony_ci{ 141425bb815Sopenharmony_ci const lit_utf8_byte_t *input_start_p; /**< start of input string */ 142425bb815Sopenharmony_ci const lit_utf8_byte_t *input_end_p; /**< end of input string */ 143425bb815Sopenharmony_ci uint32_t captures_count; /**< number of capture groups */ 144425bb815Sopenharmony_ci uint32_t non_captures_count; /**< number of non-capture groups */ 145425bb815Sopenharmony_ci ecma_regexp_capture_t *captures_p; /**< capturing groups */ 146425bb815Sopenharmony_ci ecma_regexp_non_capture_t *non_captures_p; /**< non-capturing groups */ 147425bb815Sopenharmony_ci uint16_t flags; /**< RegExp flags */ 148425bb815Sopenharmony_ci uint8_t char_size; /**< size of encoded characters */ 149425bb815Sopenharmony_ci} ecma_regexp_ctx_t; 150425bb815Sopenharmony_ci 151425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 152425bb815Sopenharmony_cilit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p); 153425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 154425bb815Sopenharmony_ci 155425bb815Sopenharmony_ciecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p); 156425bb815Sopenharmony_ciecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p, 157425bb815Sopenharmony_ci ecma_string_t *input_string_p); 158425bb815Sopenharmony_ciecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg); 159425bb815Sopenharmony_cilit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode); 160425bb815Sopenharmony_ciecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p); 161425bb815Sopenharmony_civoid ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p, 162425bb815Sopenharmony_ci ecma_string_t *source_p, 163425bb815Sopenharmony_ci uint16_t flags); 164425bb815Sopenharmony_ciecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg); 165425bb815Sopenharmony_ciecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg); 166425bb815Sopenharmony_ciecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg); 167425bb815Sopenharmony_ciecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg); 168425bb815Sopenharmony_ci 169425bb815Sopenharmony_ciecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p); 170425bb815Sopenharmony_ci 171425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p); 172425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p, 173425bb815Sopenharmony_ci ecma_value_t pattern_value, 174425bb815Sopenharmony_ci ecma_value_t flags_value); 175425bb815Sopenharmony_ciecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p, 176425bb815Sopenharmony_ci ecma_value_t pattern_value, 177425bb815Sopenharmony_ci uint16_t flags); 178425bb815Sopenharmony_ci/** 179425bb815Sopenharmony_ci * @} 180425bb815Sopenharmony_ci * @} 181425bb815Sopenharmony_ci */ 182425bb815Sopenharmony_ci 183425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 184425bb815Sopenharmony_ci#endif /* !ECMA_REGEXP_OBJECT_H */ 185