1 // Copyright 2020 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/regexp/experimental/experimental.h"
6
7 #include "src/common/assert-scope.h"
8 #include "src/objects/js-regexp-inl.h"
9 #include "src/regexp/experimental/experimental-compiler.h"
10 #include "src/regexp/experimental/experimental-interpreter.h"
11 #include "src/regexp/regexp-parser.h"
12 #include "src/utils/ostreams.h"
13
14 namespace v8 {
15 namespace internal {
16
CanBeHandled(RegExpTree* tree, RegExpFlags flags, int capture_count)17 bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags,
18 int capture_count) {
19 DCHECK(FLAG_enable_experimental_regexp_engine ||
20 FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
21 return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
22 }
23
Initialize(Isolate* isolate, Handle<JSRegExp> re, Handle<String> source, RegExpFlags flags, int capture_count)24 void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
25 Handle<String> source, RegExpFlags flags,
26 int capture_count) {
27 DCHECK(FLAG_enable_experimental_regexp_engine);
28 if (FLAG_trace_experimental_regexp_engine) {
29 StdoutStream{} << "Initializing experimental regexp " << *source
30 << std::endl;
31 }
32
33 isolate->factory()->SetRegExpExperimentalData(
34 re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count);
35 }
36
IsCompiled(Handle<JSRegExp> re, Isolate* isolate)37 bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
38 DCHECK(FLAG_enable_experimental_regexp_engine);
39 DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
40 #ifdef VERIFY_HEAP
41 re->JSRegExpVerify(isolate);
42 #endif
43
44 static constexpr bool kIsLatin1 = true;
45 return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue);
46 }
47
48 template <class T>
VectorToByteArray(Isolate* isolate, base::Vector<T> data)49 Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) {
50 STATIC_ASSERT(std::is_trivial<T>::value);
51
52 int byte_length = sizeof(T) * data.length();
53 Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
54 DisallowGarbageCollection no_gc;
55 MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
56 return byte_array;
57 }
58
59 namespace {
60
61 struct CompilationResult {
62 Handle<ByteArray> bytecode;
63 Handle<FixedArray> capture_name_map;
64 };
65
66 // Compiles source pattern, but doesn't change the regexp object.
CompileImpl(Isolate* isolate, Handle<JSRegExp> regexp)67 base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
68 Handle<JSRegExp> regexp) {
69 Zone zone(isolate->allocator(), ZONE_NAME);
70
71 Handle<String> source(regexp->source(), isolate);
72
73 // Parse and compile the regexp source.
74 RegExpCompileData parse_result;
75 DCHECK(!isolate->has_pending_exception());
76
77 bool parse_success = RegExpParser::ParseRegExpFromHeapString(
78 isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()),
79 &parse_result);
80 if (!parse_success) {
81 // The pattern was already parsed successfully during initialization, so
82 // the only way parsing can fail now is because of stack overflow.
83 DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
84 USE(RegExp::ThrowRegExpException(isolate, regexp, source,
85 parse_result.error));
86 return base::nullopt;
87 }
88
89 ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
90 parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone);
91
92 CompilationResult result;
93 result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
94 result.capture_name_map =
95 RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures);
96 return result;
97 }
98
99 } // namespace
100
Compile(Isolate* isolate, Handle<JSRegExp> re)101 bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
102 DCHECK(FLAG_enable_experimental_regexp_engine);
103 DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
104 #ifdef VERIFY_HEAP
105 re->JSRegExpVerify(isolate);
106 #endif
107
108 Handle<String> source(re->source(), isolate);
109 if (FLAG_trace_experimental_regexp_engine) {
110 StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
111 }
112
113 base::Optional<CompilationResult> compilation_result =
114 CompileImpl(isolate, re);
115 if (!compilation_result.has_value()) {
116 DCHECK(isolate->has_pending_exception());
117 return false;
118 }
119
120 re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode);
121 re->set_capture_name_map(compilation_result->capture_name_map);
122
123 return true;
124 }
125
AsInstructionSequence(ByteArray raw_bytes)126 base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
127 RegExpInstruction* inst_begin =
128 reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
129 int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
130 DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
131 return base::Vector<RegExpInstruction>(inst_begin, inst_num);
132 }
133
134 namespace {
135
ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray bytecode, String subject, int capture_count, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)136 int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
137 ByteArray bytecode, String subject, int capture_count,
138 int32_t* output_registers, int32_t output_register_count,
139 int32_t subject_index) {
140 DisallowGarbageCollection no_gc;
141 // TODO(cbruni): remove once gcmole is fixed.
142 DisableGCMole no_gc_mole;
143
144 int register_count_per_match =
145 JSRegExp::RegistersForCaptureCount(capture_count);
146
147 int32_t result;
148 do {
149 DCHECK(subject.IsFlat());
150 Zone zone(isolate->allocator(), ZONE_NAME);
151 result = ExperimentalRegExpInterpreter::FindMatches(
152 isolate, call_origin, bytecode, register_count_per_match, subject,
153 subject_index, output_registers, output_register_count, &zone);
154 } while (result == RegExp::kInternalRegExpRetry &&
155 call_origin == RegExp::kFromRuntime);
156 return result;
157 }
158
159 } // namespace
160
161 // Returns the number of matches.
ExecRaw(Isolate* isolate, RegExp::CallOrigin call_origin, JSRegExp regexp, String subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)162 int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
163 RegExp::CallOrigin call_origin,
164 JSRegExp regexp, String subject,
165 int32_t* output_registers,
166 int32_t output_register_count,
167 int32_t subject_index) {
168 DCHECK(FLAG_enable_experimental_regexp_engine);
169 DisallowGarbageCollection no_gc;
170
171 if (FLAG_trace_experimental_regexp_engine) {
172 StdoutStream{} << "Executing experimental regexp " << regexp.source()
173 << std::endl;
174 }
175
176 static constexpr bool kIsLatin1 = true;
177 ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1));
178
179 return ExecRawImpl(isolate, call_origin, bytecode, subject,
180 regexp.capture_count(), output_registers,
181 output_register_count, subject_index);
182 }
183
MatchForCallFromJs( Address subject, int32_t start_position, Address input_start, Address input_end, int* output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp)184 int32_t ExperimentalRegExp::MatchForCallFromJs(
185 Address subject, int32_t start_position, Address input_start,
186 Address input_end, int* output_registers, int32_t output_register_count,
187 RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
188 DCHECK(FLAG_enable_experimental_regexp_engine);
189 DCHECK_NOT_NULL(isolate);
190 DCHECK_NOT_NULL(output_registers);
191 DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
192
193 DisallowGarbageCollection no_gc;
194 DisallowJavascriptExecution no_js(isolate);
195 DisallowHandleAllocation no_handles;
196 DisallowHandleDereference no_deref;
197
198 String subject_string = String::cast(Object(subject));
199
200 JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
201
202 return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
203 output_registers, output_register_count, start_position);
204 }
205
Exec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)206 MaybeHandle<Object> ExperimentalRegExp::Exec(
207 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
208 int subject_index, Handle<RegExpMatchInfo> last_match_info,
209 RegExp::ExecQuirks exec_quirks) {
210 DCHECK(FLAG_enable_experimental_regexp_engine);
211 DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL);
212 #ifdef VERIFY_HEAP
213 regexp->JSRegExpVerify(isolate);
214 #endif
215
216 if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
217 DCHECK(isolate->has_pending_exception());
218 return MaybeHandle<Object>();
219 }
220
221 DCHECK(IsCompiled(regexp, isolate));
222
223 subject = String::Flatten(isolate, subject);
224
225 int capture_count = regexp->capture_count();
226 int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
227
228 int32_t* output_registers;
229 std::unique_ptr<int32_t[]> output_registers_release;
230 if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
231 output_registers = isolate->jsregexp_static_offsets_vector();
232 } else {
233 output_registers = NewArray<int32_t>(output_register_count);
234 output_registers_release.reset(output_registers);
235 }
236
237 int num_matches =
238 ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
239 output_registers, output_register_count, subject_index);
240
241 if (num_matches > 0) {
242 DCHECK_EQ(num_matches, 1);
243 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
244 if (output_registers[0] >= subject->length()) {
245 return isolate->factory()->null_value();
246 }
247 }
248 return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
249 capture_count, output_registers);
250 } else if (num_matches == 0) {
251 return isolate->factory()->null_value();
252 } else {
253 DCHECK_LT(num_matches, 0);
254 DCHECK(isolate->has_pending_exception());
255 return MaybeHandle<Object>();
256 }
257 }
258
OneshotExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)259 int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate,
260 Handle<JSRegExp> regexp,
261 Handle<String> subject,
262 int32_t* output_registers,
263 int32_t output_register_count,
264 int32_t subject_index) {
265 DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
266
267 if (FLAG_trace_experimental_regexp_engine) {
268 StdoutStream{} << "Experimental execution (oneshot) of regexp "
269 << regexp->source() << std::endl;
270 }
271
272 base::Optional<CompilationResult> compilation_result =
273 CompileImpl(isolate, regexp);
274 if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
275
276 DisallowGarbageCollection no_gc;
277 return ExecRawImpl(isolate, RegExp::kFromRuntime,
278 *compilation_result->bytecode, *subject,
279 regexp->capture_count(), output_registers,
280 output_register_count, subject_index);
281 }
282
OneshotExec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)283 MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
284 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
285 int subject_index, Handle<RegExpMatchInfo> last_match_info,
286 RegExp::ExecQuirks exec_quirks) {
287 DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
288 DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED);
289
290 int capture_count = regexp->capture_count();
291 int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
292
293 int32_t* output_registers;
294 std::unique_ptr<int32_t[]> output_registers_release;
295 if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
296 output_registers = isolate->jsregexp_static_offsets_vector();
297 } else {
298 output_registers = NewArray<int32_t>(output_register_count);
299 output_registers_release.reset(output_registers);
300 }
301
302 int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers,
303 output_register_count, subject_index);
304
305 if (num_matches > 0) {
306 DCHECK_EQ(num_matches, 1);
307 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
308 if (output_registers[0] >= subject->length()) {
309 return isolate->factory()->null_value();
310 }
311 }
312 return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
313 capture_count, output_registers);
314 } else if (num_matches == 0) {
315 return isolate->factory()->null_value();
316 } else {
317 DCHECK_LT(num_matches, 0);
318 DCHECK(isolate->has_pending_exception());
319 return MaybeHandle<Object>();
320 }
321 }
322
323 } // namespace internal
324 } // namespace v8
325