1 // Copyright 2020 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/regexp/experimental/experimental.h"
6 
7 #include "src/common/assert-scope.h"
8 #include "src/objects/js-regexp-inl.h"
9 #include "src/regexp/experimental/experimental-compiler.h"
10 #include "src/regexp/experimental/experimental-interpreter.h"
11 #include "src/regexp/regexp-parser.h"
12 #include "src/utils/ostreams.h"
13 
14 namespace v8 {
15 namespace internal {
16 
CanBeHandled(RegExpTree* tree, RegExpFlags flags, int capture_count)17 bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags,
18                                       int capture_count) {
19   DCHECK(FLAG_enable_experimental_regexp_engine ||
20          FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
21   return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
22 }
23 
Initialize(Isolate* isolate, Handle<JSRegExp> re, Handle<String> source, RegExpFlags flags, int capture_count)24 void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
25                                     Handle<String> source, RegExpFlags flags,
26                                     int capture_count) {
27   DCHECK(FLAG_enable_experimental_regexp_engine);
28   if (FLAG_trace_experimental_regexp_engine) {
29     StdoutStream{} << "Initializing experimental regexp " << *source
30                    << std::endl;
31   }
32 
33   isolate->factory()->SetRegExpExperimentalData(
34       re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count);
35 }
36 
IsCompiled(Handle<JSRegExp> re, Isolate* isolate)37 bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
38   DCHECK(FLAG_enable_experimental_regexp_engine);
39   DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
40 #ifdef VERIFY_HEAP
41   re->JSRegExpVerify(isolate);
42 #endif
43 
44   static constexpr bool kIsLatin1 = true;
45   return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue);
46 }
47 
48 template <class T>
VectorToByteArray(Isolate* isolate, base::Vector<T> data)49 Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) {
50   STATIC_ASSERT(std::is_trivial<T>::value);
51 
52   int byte_length = sizeof(T) * data.length();
53   Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
54   DisallowGarbageCollection no_gc;
55   MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
56   return byte_array;
57 }
58 
59 namespace {
60 
61 struct CompilationResult {
62   Handle<ByteArray> bytecode;
63   Handle<FixedArray> capture_name_map;
64 };
65 
66 // Compiles source pattern, but doesn't change the regexp object.
CompileImpl(Isolate* isolate, Handle<JSRegExp> regexp)67 base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
68                                               Handle<JSRegExp> regexp) {
69   Zone zone(isolate->allocator(), ZONE_NAME);
70 
71   Handle<String> source(regexp->source(), isolate);
72 
73   // Parse and compile the regexp source.
74   RegExpCompileData parse_result;
75   DCHECK(!isolate->has_pending_exception());
76 
77   bool parse_success = RegExpParser::ParseRegExpFromHeapString(
78       isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()),
79       &parse_result);
80   if (!parse_success) {
81     // The pattern was already parsed successfully during initialization, so
82     // the only way parsing can fail now is because of stack overflow.
83     DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
84     USE(RegExp::ThrowRegExpException(isolate, regexp, source,
85                                      parse_result.error));
86     return base::nullopt;
87   }
88 
89   ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
90       parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone);
91 
92   CompilationResult result;
93   result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
94   result.capture_name_map =
95       RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures);
96   return result;
97 }
98 
99 }  // namespace
100 
Compile(Isolate* isolate, Handle<JSRegExp> re)101 bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
102   DCHECK(FLAG_enable_experimental_regexp_engine);
103   DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
104 #ifdef VERIFY_HEAP
105   re->JSRegExpVerify(isolate);
106 #endif
107 
108   Handle<String> source(re->source(), isolate);
109   if (FLAG_trace_experimental_regexp_engine) {
110     StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
111   }
112 
113   base::Optional<CompilationResult> compilation_result =
114       CompileImpl(isolate, re);
115   if (!compilation_result.has_value()) {
116     DCHECK(isolate->has_pending_exception());
117     return false;
118   }
119 
120   re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode);
121   re->set_capture_name_map(compilation_result->capture_name_map);
122 
123   return true;
124 }
125 
AsInstructionSequence(ByteArray raw_bytes)126 base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
127   RegExpInstruction* inst_begin =
128       reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
129   int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
130   DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
131   return base::Vector<RegExpInstruction>(inst_begin, inst_num);
132 }
133 
134 namespace {
135 
ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray bytecode, String subject, int capture_count, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)136 int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
137                     ByteArray bytecode, String subject, int capture_count,
138                     int32_t* output_registers, int32_t output_register_count,
139                     int32_t subject_index) {
140   DisallowGarbageCollection no_gc;
141   // TODO(cbruni): remove once gcmole is fixed.
142   DisableGCMole no_gc_mole;
143 
144   int register_count_per_match =
145       JSRegExp::RegistersForCaptureCount(capture_count);
146 
147   int32_t result;
148   do {
149     DCHECK(subject.IsFlat());
150     Zone zone(isolate->allocator(), ZONE_NAME);
151     result = ExperimentalRegExpInterpreter::FindMatches(
152         isolate, call_origin, bytecode, register_count_per_match, subject,
153         subject_index, output_registers, output_register_count, &zone);
154   } while (result == RegExp::kInternalRegExpRetry &&
155            call_origin == RegExp::kFromRuntime);
156   return result;
157 }
158 
159 }  // namespace
160 
161 // Returns the number of matches.
ExecRaw(Isolate* isolate, RegExp::CallOrigin call_origin, JSRegExp regexp, String subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)162 int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
163                                     RegExp::CallOrigin call_origin,
164                                     JSRegExp regexp, String subject,
165                                     int32_t* output_registers,
166                                     int32_t output_register_count,
167                                     int32_t subject_index) {
168   DCHECK(FLAG_enable_experimental_regexp_engine);
169   DisallowGarbageCollection no_gc;
170 
171   if (FLAG_trace_experimental_regexp_engine) {
172     StdoutStream{} << "Executing experimental regexp " << regexp.source()
173                    << std::endl;
174   }
175 
176   static constexpr bool kIsLatin1 = true;
177   ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1));
178 
179   return ExecRawImpl(isolate, call_origin, bytecode, subject,
180                      regexp.capture_count(), output_registers,
181                      output_register_count, subject_index);
182 }
183 
MatchForCallFromJs( Address subject, int32_t start_position, Address input_start, Address input_end, int* output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp)184 int32_t ExperimentalRegExp::MatchForCallFromJs(
185     Address subject, int32_t start_position, Address input_start,
186     Address input_end, int* output_registers, int32_t output_register_count,
187     RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
188   DCHECK(FLAG_enable_experimental_regexp_engine);
189   DCHECK_NOT_NULL(isolate);
190   DCHECK_NOT_NULL(output_registers);
191   DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
192 
193   DisallowGarbageCollection no_gc;
194   DisallowJavascriptExecution no_js(isolate);
195   DisallowHandleAllocation no_handles;
196   DisallowHandleDereference no_deref;
197 
198   String subject_string = String::cast(Object(subject));
199 
200   JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
201 
202   return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
203                  output_registers, output_register_count, start_position);
204 }
205 
Exec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)206 MaybeHandle<Object> ExperimentalRegExp::Exec(
207     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
208     int subject_index, Handle<RegExpMatchInfo> last_match_info,
209     RegExp::ExecQuirks exec_quirks) {
210   DCHECK(FLAG_enable_experimental_regexp_engine);
211   DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL);
212 #ifdef VERIFY_HEAP
213   regexp->JSRegExpVerify(isolate);
214 #endif
215 
216   if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
217     DCHECK(isolate->has_pending_exception());
218     return MaybeHandle<Object>();
219   }
220 
221   DCHECK(IsCompiled(regexp, isolate));
222 
223   subject = String::Flatten(isolate, subject);
224 
225   int capture_count = regexp->capture_count();
226   int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
227 
228   int32_t* output_registers;
229   std::unique_ptr<int32_t[]> output_registers_release;
230   if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
231     output_registers = isolate->jsregexp_static_offsets_vector();
232   } else {
233     output_registers = NewArray<int32_t>(output_register_count);
234     output_registers_release.reset(output_registers);
235   }
236 
237   int num_matches =
238       ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
239               output_registers, output_register_count, subject_index);
240 
241   if (num_matches > 0) {
242     DCHECK_EQ(num_matches, 1);
243     if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
244       if (output_registers[0] >= subject->length()) {
245         return isolate->factory()->null_value();
246       }
247     }
248     return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
249                                     capture_count, output_registers);
250   } else if (num_matches == 0) {
251     return isolate->factory()->null_value();
252   } else {
253     DCHECK_LT(num_matches, 0);
254     DCHECK(isolate->has_pending_exception());
255     return MaybeHandle<Object>();
256   }
257 }
258 
OneshotExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index)259 int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate,
260                                            Handle<JSRegExp> regexp,
261                                            Handle<String> subject,
262                                            int32_t* output_registers,
263                                            int32_t output_register_count,
264                                            int32_t subject_index) {
265   DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
266 
267   if (FLAG_trace_experimental_regexp_engine) {
268     StdoutStream{} << "Experimental execution (oneshot) of regexp "
269                    << regexp->source() << std::endl;
270   }
271 
272   base::Optional<CompilationResult> compilation_result =
273       CompileImpl(isolate, regexp);
274   if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
275 
276   DisallowGarbageCollection no_gc;
277   return ExecRawImpl(isolate, RegExp::kFromRuntime,
278                      *compilation_result->bytecode, *subject,
279                      regexp->capture_count(), output_registers,
280                      output_register_count, subject_index);
281 }
282 
OneshotExec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)283 MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
284     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
285     int subject_index, Handle<RegExpMatchInfo> last_match_info,
286     RegExp::ExecQuirks exec_quirks) {
287   DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
288   DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED);
289 
290   int capture_count = regexp->capture_count();
291   int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
292 
293   int32_t* output_registers;
294   std::unique_ptr<int32_t[]> output_registers_release;
295   if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
296     output_registers = isolate->jsregexp_static_offsets_vector();
297   } else {
298     output_registers = NewArray<int32_t>(output_register_count);
299     output_registers_release.reset(output_registers);
300   }
301 
302   int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers,
303                                    output_register_count, subject_index);
304 
305   if (num_matches > 0) {
306     DCHECK_EQ(num_matches, 1);
307     if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
308       if (output_registers[0] >= subject->length()) {
309         return isolate->factory()->null_value();
310       }
311     }
312     return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
313                                     capture_count, output_registers);
314   } else if (num_matches == 0) {
315     return isolate->factory()->null_value();
316   } else {
317     DCHECK_LT(num_matches, 0);
318     DCHECK(isolate->has_pending_exception());
319     return MaybeHandle<Object>();
320   }
321 }
322 
323 }  // namespace internal
324 }  // namespace v8
325