1// Copyright 2020 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/regexp/experimental/experimental.h"
6
7#include "src/common/assert-scope.h"
8#include "src/objects/js-regexp-inl.h"
9#include "src/regexp/experimental/experimental-compiler.h"
10#include "src/regexp/experimental/experimental-interpreter.h"
11#include "src/regexp/regexp-parser.h"
12#include "src/utils/ostreams.h"
13
14namespace v8 {
15namespace internal {
16
17bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags,
18                                      int capture_count) {
19  DCHECK(FLAG_enable_experimental_regexp_engine ||
20         FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
21  return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
22}
23
24void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
25                                    Handle<String> source, RegExpFlags flags,
26                                    int capture_count) {
27  DCHECK(FLAG_enable_experimental_regexp_engine);
28  if (FLAG_trace_experimental_regexp_engine) {
29    StdoutStream{} << "Initializing experimental regexp " << *source
30                   << std::endl;
31  }
32
33  isolate->factory()->SetRegExpExperimentalData(
34      re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count);
35}
36
37bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
38  DCHECK(FLAG_enable_experimental_regexp_engine);
39  DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
40#ifdef VERIFY_HEAP
41  re->JSRegExpVerify(isolate);
42#endif
43
44  static constexpr bool kIsLatin1 = true;
45  return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue);
46}
47
48template <class T>
49Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) {
50  STATIC_ASSERT(std::is_trivial<T>::value);
51
52  int byte_length = sizeof(T) * data.length();
53  Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
54  DisallowGarbageCollection no_gc;
55  MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
56  return byte_array;
57}
58
59namespace {
60
61struct CompilationResult {
62  Handle<ByteArray> bytecode;
63  Handle<FixedArray> capture_name_map;
64};
65
66// Compiles source pattern, but doesn't change the regexp object.
67base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
68                                              Handle<JSRegExp> regexp) {
69  Zone zone(isolate->allocator(), ZONE_NAME);
70
71  Handle<String> source(regexp->source(), isolate);
72
73  // Parse and compile the regexp source.
74  RegExpCompileData parse_result;
75  DCHECK(!isolate->has_pending_exception());
76
77  bool parse_success = RegExpParser::ParseRegExpFromHeapString(
78      isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()),
79      &parse_result);
80  if (!parse_success) {
81    // The pattern was already parsed successfully during initialization, so
82    // the only way parsing can fail now is because of stack overflow.
83    DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
84    USE(RegExp::ThrowRegExpException(isolate, regexp, source,
85                                     parse_result.error));
86    return base::nullopt;
87  }
88
89  ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
90      parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone);
91
92  CompilationResult result;
93  result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
94  result.capture_name_map =
95      RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures);
96  return result;
97}
98
99}  // namespace
100
101bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
102  DCHECK(FLAG_enable_experimental_regexp_engine);
103  DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
104#ifdef VERIFY_HEAP
105  re->JSRegExpVerify(isolate);
106#endif
107
108  Handle<String> source(re->source(), isolate);
109  if (FLAG_trace_experimental_regexp_engine) {
110    StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
111  }
112
113  base::Optional<CompilationResult> compilation_result =
114      CompileImpl(isolate, re);
115  if (!compilation_result.has_value()) {
116    DCHECK(isolate->has_pending_exception());
117    return false;
118  }
119
120  re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode);
121  re->set_capture_name_map(compilation_result->capture_name_map);
122
123  return true;
124}
125
126base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
127  RegExpInstruction* inst_begin =
128      reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
129  int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
130  DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
131  return base::Vector<RegExpInstruction>(inst_begin, inst_num);
132}
133
134namespace {
135
136int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
137                    ByteArray bytecode, String subject, int capture_count,
138                    int32_t* output_registers, int32_t output_register_count,
139                    int32_t subject_index) {
140  DisallowGarbageCollection no_gc;
141  // TODO(cbruni): remove once gcmole is fixed.
142  DisableGCMole no_gc_mole;
143
144  int register_count_per_match =
145      JSRegExp::RegistersForCaptureCount(capture_count);
146
147  int32_t result;
148  do {
149    DCHECK(subject.IsFlat());
150    Zone zone(isolate->allocator(), ZONE_NAME);
151    result = ExperimentalRegExpInterpreter::FindMatches(
152        isolate, call_origin, bytecode, register_count_per_match, subject,
153        subject_index, output_registers, output_register_count, &zone);
154  } while (result == RegExp::kInternalRegExpRetry &&
155           call_origin == RegExp::kFromRuntime);
156  return result;
157}
158
159}  // namespace
160
161// Returns the number of matches.
162int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
163                                    RegExp::CallOrigin call_origin,
164                                    JSRegExp regexp, String subject,
165                                    int32_t* output_registers,
166                                    int32_t output_register_count,
167                                    int32_t subject_index) {
168  DCHECK(FLAG_enable_experimental_regexp_engine);
169  DisallowGarbageCollection no_gc;
170
171  if (FLAG_trace_experimental_regexp_engine) {
172    StdoutStream{} << "Executing experimental regexp " << regexp.source()
173                   << std::endl;
174  }
175
176  static constexpr bool kIsLatin1 = true;
177  ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1));
178
179  return ExecRawImpl(isolate, call_origin, bytecode, subject,
180                     regexp.capture_count(), output_registers,
181                     output_register_count, subject_index);
182}
183
184int32_t ExperimentalRegExp::MatchForCallFromJs(
185    Address subject, int32_t start_position, Address input_start,
186    Address input_end, int* output_registers, int32_t output_register_count,
187    RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
188  DCHECK(FLAG_enable_experimental_regexp_engine);
189  DCHECK_NOT_NULL(isolate);
190  DCHECK_NOT_NULL(output_registers);
191  DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
192
193  DisallowGarbageCollection no_gc;
194  DisallowJavascriptExecution no_js(isolate);
195  DisallowHandleAllocation no_handles;
196  DisallowHandleDereference no_deref;
197
198  String subject_string = String::cast(Object(subject));
199
200  JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
201
202  return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
203                 output_registers, output_register_count, start_position);
204}
205
206MaybeHandle<Object> ExperimentalRegExp::Exec(
207    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
208    int subject_index, Handle<RegExpMatchInfo> last_match_info,
209    RegExp::ExecQuirks exec_quirks) {
210  DCHECK(FLAG_enable_experimental_regexp_engine);
211  DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL);
212#ifdef VERIFY_HEAP
213  regexp->JSRegExpVerify(isolate);
214#endif
215
216  if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
217    DCHECK(isolate->has_pending_exception());
218    return MaybeHandle<Object>();
219  }
220
221  DCHECK(IsCompiled(regexp, isolate));
222
223  subject = String::Flatten(isolate, subject);
224
225  int capture_count = regexp->capture_count();
226  int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
227
228  int32_t* output_registers;
229  std::unique_ptr<int32_t[]> output_registers_release;
230  if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
231    output_registers = isolate->jsregexp_static_offsets_vector();
232  } else {
233    output_registers = NewArray<int32_t>(output_register_count);
234    output_registers_release.reset(output_registers);
235  }
236
237  int num_matches =
238      ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
239              output_registers, output_register_count, subject_index);
240
241  if (num_matches > 0) {
242    DCHECK_EQ(num_matches, 1);
243    if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
244      if (output_registers[0] >= subject->length()) {
245        return isolate->factory()->null_value();
246      }
247    }
248    return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
249                                    capture_count, output_registers);
250  } else if (num_matches == 0) {
251    return isolate->factory()->null_value();
252  } else {
253    DCHECK_LT(num_matches, 0);
254    DCHECK(isolate->has_pending_exception());
255    return MaybeHandle<Object>();
256  }
257}
258
259int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate,
260                                           Handle<JSRegExp> regexp,
261                                           Handle<String> subject,
262                                           int32_t* output_registers,
263                                           int32_t output_register_count,
264                                           int32_t subject_index) {
265  DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
266
267  if (FLAG_trace_experimental_regexp_engine) {
268    StdoutStream{} << "Experimental execution (oneshot) of regexp "
269                   << regexp->source() << std::endl;
270  }
271
272  base::Optional<CompilationResult> compilation_result =
273      CompileImpl(isolate, regexp);
274  if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
275
276  DisallowGarbageCollection no_gc;
277  return ExecRawImpl(isolate, RegExp::kFromRuntime,
278                     *compilation_result->bytecode, *subject,
279                     regexp->capture_count(), output_registers,
280                     output_register_count, subject_index);
281}
282
283MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
284    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
285    int subject_index, Handle<RegExpMatchInfo> last_match_info,
286    RegExp::ExecQuirks exec_quirks) {
287  DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
288  DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED);
289
290  int capture_count = regexp->capture_count();
291  int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
292
293  int32_t* output_registers;
294  std::unique_ptr<int32_t[]> output_registers_release;
295  if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
296    output_registers = isolate->jsregexp_static_offsets_vector();
297  } else {
298    output_registers = NewArray<int32_t>(output_register_count);
299    output_registers_release.reset(output_registers);
300  }
301
302  int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers,
303                                   output_register_count, subject_index);
304
305  if (num_matches > 0) {
306    DCHECK_EQ(num_matches, 1);
307    if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
308      if (output_registers[0] >= subject->length()) {
309        return isolate->factory()->null_value();
310      }
311    }
312    return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
313                                    capture_count, output_registers);
314  } else if (num_matches == 0) {
315    return isolate->factory()->null_value();
316  } else {
317    DCHECK_LT(num_matches, 0);
318    DCHECK(isolate->has_pending_exception());
319    return MaybeHandle<Object>();
320  }
321}
322
323}  // namespace internal
324}  // namespace v8
325