1/* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecma-exceptions.h" 17#include "ecma-helpers.h" 18#include "ecma-regexp-object.h" 19#include "ecma-try-catch-macro.h" 20#include "lit-char-helpers.h" 21#include "jcontext.h" 22#include "jrt-libc-includes.h" 23#include "jmem.h" 24#include "re-bytecode.h" 25#include "re-compiler.h" 26#include "re-compiler-context.h" 27#include "re-parser.h" 28 29#if ENABLED (JERRY_BUILTIN_REGEXP) 30 31/** \addtogroup parser Parser 32 * @{ 33 * 34 * \addtogroup regexparser Regular expression 35 * @{ 36 * 37 * \addtogroup regexparser_compiler Compiler 38 * @{ 39 */ 40 41/** 42 * Search for the given pattern in the RegExp cache. 43 * 44 * @return pointer to bytecode if found 45 * NULL - otherwise 46 */ 47static re_compiled_code_t * 48re_cache_lookup (ecma_string_t *pattern_str_p, /**< pattern string */ 49 uint16_t flags) /**< flags */ 50{ 51 re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache); 52 53 for (uint8_t idx = 0u; idx < RE_CACHE_SIZE; idx++) 54 { 55 re_compiled_code_t *cached_bytecode_p = cache_p[idx]; 56 57 if (cached_bytecode_p == NULL) 58 { 59 break; 60 } 61 62 ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source); 63 64 if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags 65 && ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p)) 66 { 67 return cached_bytecode_p; 68 } 69 } 70 71 return NULL; 72} /* re_cache_lookup */ 73 74/** 75 * Run garbage collection in RegExp cache. 76 */ 77void 78re_cache_gc (void) 79{ 80 re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache); 81 82 for (uint32_t i = 0u; i < RE_CACHE_SIZE; i++) 83 { 84 const re_compiled_code_t *cached_bytecode_p = cache_p[i]; 85 86 if (cached_bytecode_p == NULL) 87 { 88 break; 89 } 90 91 ecma_bytecode_deref ((ecma_compiled_code_t *) cached_bytecode_p); 92 cache_p[i] = NULL; 93 } 94 95 JERRY_CONTEXT (re_cache_idx) = 0; 96} /* re_cache_gc */ 97 98/** 99 * Compilation of RegExp bytecode 100 * 101 * @return pointer to bytecode if compilation was successful 102 * NULL - otherwise 103 */ 104re_compiled_code_t * 105re_compile_bytecode (ecma_string_t *pattern_str_p, /**< pattern */ 106 uint16_t flags) /**< flags */ 107{ 108 re_compiled_code_t *cached_bytecode_p = re_cache_lookup (pattern_str_p, flags); 109 110 if (cached_bytecode_p != NULL) 111 { 112 ecma_bytecode_ref ((ecma_compiled_code_t *) cached_bytecode_p); 113 return cached_bytecode_p; 114 } 115 116 re_compiler_ctx_t re_ctx; 117 re_ctx.flags = flags; 118 re_ctx.captures_count = 1; 119 re_ctx.non_captures_count = 0; 120 121 re_initialize_regexp_bytecode (&re_ctx); 122 123 ECMA_STRING_TO_UTF8_STRING (pattern_str_p, pattern_start_p, pattern_start_size); 124 125 re_ctx.input_start_p = pattern_start_p; 126 re_ctx.input_curr_p = (lit_utf8_byte_t *) pattern_start_p; 127 re_ctx.input_end_p = pattern_start_p + pattern_start_size; 128 re_ctx.groups_count = -1; 129 130 /* Parse RegExp pattern */ 131 ecma_value_t result = re_parse_alternative (&re_ctx, true); 132 133 ECMA_FINALIZE_UTF8_STRING (pattern_start_p, pattern_start_size); 134 135 if (ECMA_IS_VALUE_ERROR (result)) 136 { 137 /* Compilation failed, free bytecode. */ 138 jmem_heap_free_block (re_ctx.bytecode_start_p, re_ctx.bytecode_size); 139 return NULL; 140 } 141 142 /* Align bytecode size to JMEM_ALIGNMENT so that it can be stored in the bytecode header. */ 143 const uint32_t final_size = JERRY_ALIGNUP (re_ctx.bytecode_size, JMEM_ALIGNMENT); 144 re_compiled_code_t *re_compiled_code_p = (re_compiled_code_t *) jmem_heap_realloc_block (re_ctx.bytecode_start_p, 145 re_ctx.bytecode_size, 146 final_size); 147 148 /* Bytecoded will be inserted into the cache and returned to the caller, so refcount is implicitly set to 2. */ 149 re_compiled_code_p->header.refs = 2; 150 re_compiled_code_p->header.size = (uint16_t) (final_size >> JMEM_ALIGNMENT_LOG); 151 re_compiled_code_p->header.status_flags = re_ctx.flags; 152 153 ecma_ref_ecma_string (pattern_str_p); 154 re_compiled_code_p->source = ecma_make_string_value (pattern_str_p); 155 re_compiled_code_p->captures_count = re_ctx.captures_count; 156 re_compiled_code_p->non_captures_count = re_ctx.non_captures_count; 157 158#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) 159 if (JERRY_CONTEXT (jerry_init_flags) & ECMA_INIT_SHOW_REGEXP_OPCODES) 160 { 161 re_dump_bytecode (&re_ctx); 162 } 163#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */ 164 165 uint8_t cache_idx = JERRY_CONTEXT (re_cache_idx); 166 167 if (JERRY_CONTEXT (re_cache)[cache_idx] != NULL) 168 { 169 ecma_bytecode_deref ((ecma_compiled_code_t *) JERRY_CONTEXT (re_cache)[cache_idx]); 170 } 171 172 JERRY_CONTEXT (re_cache)[cache_idx] = re_compiled_code_p; 173 JERRY_CONTEXT (re_cache_idx) = (uint8_t) (cache_idx + 1) % RE_CACHE_SIZE; 174 175 return re_compiled_code_p; 176} /* re_compile_bytecode */ 177 178/** 179 * @} 180 * @} 181 * @} 182 */ 183 184#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 185