1// Copyright 2020 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/regexp/experimental/experimental.h" 6 7#include "src/common/assert-scope.h" 8#include "src/objects/js-regexp-inl.h" 9#include "src/regexp/experimental/experimental-compiler.h" 10#include "src/regexp/experimental/experimental-interpreter.h" 11#include "src/regexp/regexp-parser.h" 12#include "src/utils/ostreams.h" 13 14namespace v8 { 15namespace internal { 16 17bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags, 18 int capture_count) { 19 DCHECK(FLAG_enable_experimental_regexp_engine || 20 FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); 21 return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count); 22} 23 24void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re, 25 Handle<String> source, RegExpFlags flags, 26 int capture_count) { 27 DCHECK(FLAG_enable_experimental_regexp_engine); 28 if (FLAG_trace_experimental_regexp_engine) { 29 StdoutStream{} << "Initializing experimental regexp " << *source 30 << std::endl; 31 } 32 33 isolate->factory()->SetRegExpExperimentalData( 34 re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count); 35} 36 37bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) { 38 DCHECK(FLAG_enable_experimental_regexp_engine); 39 DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL); 40#ifdef VERIFY_HEAP 41 re->JSRegExpVerify(isolate); 42#endif 43 44 static constexpr bool kIsLatin1 = true; 45 return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue); 46} 47 48template <class T> 49Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) { 50 STATIC_ASSERT(std::is_trivial<T>::value); 51 52 int byte_length = sizeof(T) * data.length(); 53 Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length); 54 DisallowGarbageCollection no_gc; 55 MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length); 56 return byte_array; 57} 58 59namespace { 60 61struct CompilationResult { 62 Handle<ByteArray> bytecode; 63 Handle<FixedArray> capture_name_map; 64}; 65 66// Compiles source pattern, but doesn't change the regexp object. 67base::Optional<CompilationResult> CompileImpl(Isolate* isolate, 68 Handle<JSRegExp> regexp) { 69 Zone zone(isolate->allocator(), ZONE_NAME); 70 71 Handle<String> source(regexp->source(), isolate); 72 73 // Parse and compile the regexp source. 74 RegExpCompileData parse_result; 75 DCHECK(!isolate->has_pending_exception()); 76 77 bool parse_success = RegExpParser::ParseRegExpFromHeapString( 78 isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()), 79 &parse_result); 80 if (!parse_success) { 81 // The pattern was already parsed successfully during initialization, so 82 // the only way parsing can fail now is because of stack overflow. 83 DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow); 84 USE(RegExp::ThrowRegExpException(isolate, regexp, source, 85 parse_result.error)); 86 return base::nullopt; 87 } 88 89 ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile( 90 parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone); 91 92 CompilationResult result; 93 result.bytecode = VectorToByteArray(isolate, bytecode.ToVector()); 94 result.capture_name_map = 95 RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures); 96 return result; 97} 98 99} // namespace 100 101bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { 102 DCHECK(FLAG_enable_experimental_regexp_engine); 103 DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL); 104#ifdef VERIFY_HEAP 105 re->JSRegExpVerify(isolate); 106#endif 107 108 Handle<String> source(re->source(), isolate); 109 if (FLAG_trace_experimental_regexp_engine) { 110 StdoutStream{} << "Compiling experimental regexp " << *source << std::endl; 111 } 112 113 base::Optional<CompilationResult> compilation_result = 114 CompileImpl(isolate, re); 115 if (!compilation_result.has_value()) { 116 DCHECK(isolate->has_pending_exception()); 117 return false; 118 } 119 120 re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode); 121 re->set_capture_name_map(compilation_result->capture_name_map); 122 123 return true; 124} 125 126base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) { 127 RegExpInstruction* inst_begin = 128 reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress()); 129 int inst_num = raw_bytes.length() / sizeof(RegExpInstruction); 130 DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length()); 131 return base::Vector<RegExpInstruction>(inst_begin, inst_num); 132} 133 134namespace { 135 136int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin, 137 ByteArray bytecode, String subject, int capture_count, 138 int32_t* output_registers, int32_t output_register_count, 139 int32_t subject_index) { 140 DisallowGarbageCollection no_gc; 141 // TODO(cbruni): remove once gcmole is fixed. 142 DisableGCMole no_gc_mole; 143 144 int register_count_per_match = 145 JSRegExp::RegistersForCaptureCount(capture_count); 146 147 int32_t result; 148 do { 149 DCHECK(subject.IsFlat()); 150 Zone zone(isolate->allocator(), ZONE_NAME); 151 result = ExperimentalRegExpInterpreter::FindMatches( 152 isolate, call_origin, bytecode, register_count_per_match, subject, 153 subject_index, output_registers, output_register_count, &zone); 154 } while (result == RegExp::kInternalRegExpRetry && 155 call_origin == RegExp::kFromRuntime); 156 return result; 157} 158 159} // namespace 160 161// Returns the number of matches. 162int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, 163 RegExp::CallOrigin call_origin, 164 JSRegExp regexp, String subject, 165 int32_t* output_registers, 166 int32_t output_register_count, 167 int32_t subject_index) { 168 DCHECK(FLAG_enable_experimental_regexp_engine); 169 DisallowGarbageCollection no_gc; 170 171 if (FLAG_trace_experimental_regexp_engine) { 172 StdoutStream{} << "Executing experimental regexp " << regexp.source() 173 << std::endl; 174 } 175 176 static constexpr bool kIsLatin1 = true; 177 ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1)); 178 179 return ExecRawImpl(isolate, call_origin, bytecode, subject, 180 regexp.capture_count(), output_registers, 181 output_register_count, subject_index); 182} 183 184int32_t ExperimentalRegExp::MatchForCallFromJs( 185 Address subject, int32_t start_position, Address input_start, 186 Address input_end, int* output_registers, int32_t output_register_count, 187 RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) { 188 DCHECK(FLAG_enable_experimental_regexp_engine); 189 DCHECK_NOT_NULL(isolate); 190 DCHECK_NOT_NULL(output_registers); 191 DCHECK(call_origin == RegExp::CallOrigin::kFromJs); 192 193 DisallowGarbageCollection no_gc; 194 DisallowJavascriptExecution no_js(isolate); 195 DisallowHandleAllocation no_handles; 196 DisallowHandleDereference no_deref; 197 198 String subject_string = String::cast(Object(subject)); 199 200 JSRegExp regexp_obj = JSRegExp::cast(Object(regexp)); 201 202 return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string, 203 output_registers, output_register_count, start_position); 204} 205 206MaybeHandle<Object> ExperimentalRegExp::Exec( 207 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 208 int subject_index, Handle<RegExpMatchInfo> last_match_info, 209 RegExp::ExecQuirks exec_quirks) { 210 DCHECK(FLAG_enable_experimental_regexp_engine); 211 DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL); 212#ifdef VERIFY_HEAP 213 regexp->JSRegExpVerify(isolate); 214#endif 215 216 if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) { 217 DCHECK(isolate->has_pending_exception()); 218 return MaybeHandle<Object>(); 219 } 220 221 DCHECK(IsCompiled(regexp, isolate)); 222 223 subject = String::Flatten(isolate, subject); 224 225 int capture_count = regexp->capture_count(); 226 int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); 227 228 int32_t* output_registers; 229 std::unique_ptr<int32_t[]> output_registers_release; 230 if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { 231 output_registers = isolate->jsregexp_static_offsets_vector(); 232 } else { 233 output_registers = NewArray<int32_t>(output_register_count); 234 output_registers_release.reset(output_registers); 235 } 236 237 int num_matches = 238 ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject, 239 output_registers, output_register_count, subject_index); 240 241 if (num_matches > 0) { 242 DCHECK_EQ(num_matches, 1); 243 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) { 244 if (output_registers[0] >= subject->length()) { 245 return isolate->factory()->null_value(); 246 } 247 } 248 return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 249 capture_count, output_registers); 250 } else if (num_matches == 0) { 251 return isolate->factory()->null_value(); 252 } else { 253 DCHECK_LT(num_matches, 0); 254 DCHECK(isolate->has_pending_exception()); 255 return MaybeHandle<Object>(); 256 } 257} 258 259int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate, 260 Handle<JSRegExp> regexp, 261 Handle<String> subject, 262 int32_t* output_registers, 263 int32_t output_register_count, 264 int32_t subject_index) { 265 DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); 266 267 if (FLAG_trace_experimental_regexp_engine) { 268 StdoutStream{} << "Experimental execution (oneshot) of regexp " 269 << regexp->source() << std::endl; 270 } 271 272 base::Optional<CompilationResult> compilation_result = 273 CompileImpl(isolate, regexp); 274 if (!compilation_result.has_value()) return RegExp::kInternalRegExpException; 275 276 DisallowGarbageCollection no_gc; 277 return ExecRawImpl(isolate, RegExp::kFromRuntime, 278 *compilation_result->bytecode, *subject, 279 regexp->capture_count(), output_registers, 280 output_register_count, subject_index); 281} 282 283MaybeHandle<Object> ExperimentalRegExp::OneshotExec( 284 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 285 int subject_index, Handle<RegExpMatchInfo> last_match_info, 286 RegExp::ExecQuirks exec_quirks) { 287 DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); 288 DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED); 289 290 int capture_count = regexp->capture_count(); 291 int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); 292 293 int32_t* output_registers; 294 std::unique_ptr<int32_t[]> output_registers_release; 295 if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { 296 output_registers = isolate->jsregexp_static_offsets_vector(); 297 } else { 298 output_registers = NewArray<int32_t>(output_register_count); 299 output_registers_release.reset(output_registers); 300 } 301 302 int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers, 303 output_register_count, subject_index); 304 305 if (num_matches > 0) { 306 DCHECK_EQ(num_matches, 1); 307 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) { 308 if (output_registers[0] >= subject->length()) { 309 return isolate->factory()->null_value(); 310 } 311 } 312 return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 313 capture_count, output_registers); 314 } else if (num_matches == 0) { 315 return isolate->factory()->null_value(); 316 } else { 317 DCHECK_LT(num_matches, 0); 318 DCHECK(isolate->has_pending_exception()); 319 return MaybeHandle<Object>(); 320 } 321} 322 323} // namespace internal 324} // namespace v8 325