1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // A simple interpreter for the Irregexp byte code.
6 
7 #include "src/regexp/regexp-interpreter.h"
8 
9 #include "src/base/small-vector.h"
10 #include "src/base/strings.h"
11 #include "src/execution/isolate.h"
12 #include "src/logging/counters.h"
13 #include "src/objects/js-regexp-inl.h"
14 #include "src/objects/string-inl.h"
15 #include "src/regexp/regexp-bytecodes.h"
16 #include "src/regexp/regexp-macro-assembler.h"
17 #include "src/regexp/regexp-stack.h"  // For kMaximumStackSize.
18 #include "src/regexp/regexp.h"
19 #include "src/strings/unicode.h"
20 #include "src/utils/memcopy.h"
21 #include "src/utils/utils.h"
22 
23 #ifdef V8_INTL_SUPPORT
24 #include "unicode/uchar.h"
25 #endif  // V8_INTL_SUPPORT
26 
27 // Use token threaded dispatch iff the compiler supports computed gotos and the
28 // build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
29 #if V8_HAS_COMPUTED_GOTO && \
30     defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
31 #define V8_USE_COMPUTED_GOTO 1
32 #endif  // V8_HAS_COMPUTED_GOTO
33 
34 namespace v8 {
35 namespace internal {
36 
37 namespace {
38 
BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, base::Vector<const base::uc16> subject, bool unicode)39 bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
40                           base::Vector<const base::uc16> subject,
41                           bool unicode) {
42   Address offset_a =
43       reinterpret_cast<Address>(const_cast<base::uc16*>(&subject.at(from)));
44   Address offset_b =
45       reinterpret_cast<Address>(const_cast<base::uc16*>(&subject.at(current)));
46   size_t length = len * base::kUC16Size;
47 
48   bool result = unicode
49                     ? RegExpMacroAssembler::CaseInsensitiveCompareUnicode(
50                           offset_a, offset_b, length, isolate)
51                     : RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
52                           offset_a, offset_b, length, isolate);
53   return result == 1;
54 }
55 
BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, base::Vector<const uint8_t> subject, bool unicode)56 bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
57                           base::Vector<const uint8_t> subject, bool unicode) {
58   // For Latin1 characters the unicode flag makes no difference.
59   for (int i = 0; i < len; i++) {
60     unsigned int old_char = subject[from++];
61     unsigned int new_char = subject[current++];
62     if (old_char == new_char) continue;
63     // Convert both characters to lower case.
64     old_char |= 0x20;
65     new_char |= 0x20;
66     if (old_char != new_char) return false;
67     // Not letters in the ASCII range and Latin-1 range.
68     if (!(old_char - 'a' <= 'z' - 'a') &&
69         !(old_char - 224 <= 254 - 224 && old_char != 247)) {
70       return false;
71     }
72   }
73   return true;
74 }
75 
76 #ifdef DEBUG
MaybeTraceInterpreter(const byte* code_base, const byte* pc, int stack_depth, int current_position, uint32_t current_char, int bytecode_length, const char* bytecode_name)77 void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
78                            int stack_depth, int current_position,
79                            uint32_t current_char, int bytecode_length,
80                            const char* bytecode_name) {
81   if (FLAG_trace_regexp_bytecodes) {
82     const bool printable = std::isprint(current_char);
83     const char* format =
84         printable
85             ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
86             : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
87     PrintF(format, pc - code_base, stack_depth, current_position, current_char,
88            printable ? current_char : '.');
89 
90     RegExpBytecodeDisassembleSingle(code_base, pc);
91   }
92 }
93 #endif  // DEBUG
94 
Load32Aligned(const byte* pc)95 int32_t Load32Aligned(const byte* pc) {
96   DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
97   return *reinterpret_cast<const int32_t*>(pc);
98 }
99 
100 // TODO(jgruber): Rename to Load16AlignedUnsigned.
Load16Aligned(const byte* pc)101 uint32_t Load16Aligned(const byte* pc) {
102   DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
103   return *reinterpret_cast<const uint16_t*>(pc);
104 }
105 
Load16AlignedSigned(const byte* pc)106 int32_t Load16AlignedSigned(const byte* pc) {
107   DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
108   return *reinterpret_cast<const int16_t*>(pc);
109 }
110 
111 // Helpers to access the packed argument. Takes the 32 bits containing the
112 // current bytecode, where the 8 LSB contain the bytecode and the rest contains
113 // a packed 24-bit argument.
114 // TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
115 // police restrictions during bytecode generation.
LoadPacked24Signed(int32_t bytecode_and_packed_arg)116 int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {
117   return bytecode_and_packed_arg >> BYTECODE_SHIFT;
118 }
LoadPacked24Unsigned(int32_t bytecode_and_packed_arg)119 uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {
120   return static_cast<uint32_t>(bytecode_and_packed_arg) >> BYTECODE_SHIFT;
121 }
122 
123 // A simple abstraction over the backtracking stack used by the interpreter.
124 //
125 // Despite the name 'backtracking' stack, it's actually used as a generic stack
126 // that stores both program counters (= offsets into the bytecode) and generic
127 // integer values.
128 class BacktrackStack {
129  public:
130   BacktrackStack() = default;
131   BacktrackStack(const BacktrackStack&) = delete;
132   BacktrackStack& operator=(const BacktrackStack&) = delete;
133 
push(int v)134   V8_WARN_UNUSED_RESULT bool push(int v) {
135     data_.emplace_back(v);
136     return (static_cast<int>(data_.size()) <= kMaxSize);
137   }
peek() const138   int peek() const {
139     DCHECK(!data_.empty());
140     return data_.back();
141   }
pop()142   int pop() {
143     int v = peek();
144     data_.pop_back();
145     return v;
146   }
147 
148   // The 'sp' is the index of the first empty element in the stack.
sp() const149   int sp() const { return static_cast<int>(data_.size()); }
set_sp(int new_sp)150   void set_sp(int new_sp) {
151     DCHECK_LE(new_sp, sp());
152     data_.resize_no_init(new_sp);
153   }
154 
155  private:
156   // Semi-arbitrary. Should be large enough for common cases to remain in the
157   // static stack-allocated backing store, but small enough not to waste space.
158   static constexpr int kStaticCapacity = 64;
159 
160   using ValueT = int;
161   base::SmallVector<ValueT, kStaticCapacity> data_;
162 
163   static constexpr int kMaxSize =
164       RegExpStack::kMaximumStackSize / sizeof(ValueT);
165 };
166 
167 // Registers used during interpreter execution. These consist of output
168 // registers in indices [0, output_register_count[ which will contain matcher
169 // results as a {start,end} index tuple for each capture (where the whole match
170 // counts as implicit capture 0); and internal registers in indices
171 // [output_register_count, total_register_count[.
172 class InterpreterRegisters {
173  public:
174   using RegisterT = int;
175 
InterpreterRegisters(int total_register_count, RegisterT* output_registers, int output_register_count)176   InterpreterRegisters(int total_register_count, RegisterT* output_registers,
177                        int output_register_count)
178       : registers_(total_register_count),
179         output_registers_(output_registers),
180         output_register_count_(output_register_count) {
181     // TODO(jgruber): Use int32_t consistently for registers. Currently, CSA
182     // uses int32_t while runtime uses int.
183     STATIC_ASSERT(sizeof(int) == sizeof(int32_t));
184     DCHECK_GE(output_register_count, 2);  // At least 2 for the match itself.
185     DCHECK_GE(total_register_count, output_register_count);
186     DCHECK_LE(total_register_count, RegExpMacroAssembler::kMaxRegisterCount);
187     DCHECK_NOT_NULL(output_registers);
188 
189     // Initialize the output register region to -1 signifying 'no match'.
190     std::memset(registers_.data(), -1,
191                 output_register_count * sizeof(RegisterT));
192   }
193 
operator [](size_t index) const194   const RegisterT& operator[](size_t index) const { return registers_[index]; }
operator [](size_t index)195   RegisterT& operator[](size_t index) { return registers_[index]; }
196 
CopyToOutputRegisters()197   void CopyToOutputRegisters() {
198     MemCopy(output_registers_, registers_.data(),
199             output_register_count_ * sizeof(RegisterT));
200   }
201 
202  private:
203   static constexpr int kStaticCapacity = 64;  // Arbitrary.
204   base::SmallVector<RegisterT, kStaticCapacity> registers_;
205   RegisterT* const output_registers_;
206   const int output_register_count_;
207 };
208 
ThrowStackOverflow(Isolate* isolate, RegExp::CallOrigin call_origin)209 IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
210                                                RegExp::CallOrigin call_origin) {
211   CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
212   // We abort interpreter execution after the stack overflow is thrown, and thus
213   // allow allocation here despite the outer DisallowGarbageCollectionScope.
214   AllowGarbageCollection yes_gc;
215   isolate->StackOverflow();
216   return IrregexpInterpreter::EXCEPTION;
217 }
218 
219 // Only throws if called from the runtime, otherwise just returns the EXCEPTION
220 // status code.
MaybeThrowStackOverflow( Isolate* isolate, RegExp::CallOrigin call_origin)221 IrregexpInterpreter::Result MaybeThrowStackOverflow(
222     Isolate* isolate, RegExp::CallOrigin call_origin) {
223   if (call_origin == RegExp::CallOrigin::kFromRuntime) {
224     return ThrowStackOverflow(isolate, call_origin);
225   } else {
226     return IrregexpInterpreter::EXCEPTION;
227   }
228 }
229 
230 template <typename Char>
UpdateCodeAndSubjectReferences( Isolate* isolate, Handle<ByteArray> code_array, Handle<String> subject_string, ByteArray* code_array_out, const byte** code_base_out, const byte** pc_out, String* subject_string_out, base::Vector<const Char>* subject_string_vector_out)231 void UpdateCodeAndSubjectReferences(
232     Isolate* isolate, Handle<ByteArray> code_array,
233     Handle<String> subject_string, ByteArray* code_array_out,
234     const byte** code_base_out, const byte** pc_out, String* subject_string_out,
235     base::Vector<const Char>* subject_string_vector_out) {
236   DisallowGarbageCollection no_gc;
237 
238   if (*code_base_out != code_array->GetDataStartAddress()) {
239     *code_array_out = *code_array;
240     const intptr_t pc_offset = *pc_out - *code_base_out;
241     DCHECK_GT(pc_offset, 0);
242     *code_base_out = code_array->GetDataStartAddress();
243     *pc_out = *code_base_out + pc_offset;
244   }
245 
246   DCHECK(subject_string->IsFlat());
247   *subject_string_out = *subject_string;
248   *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
249 }
250 
251 // Runs all pending interrupts and updates unhandlified object references if
252 // necessary.
253 template <typename Char>
HandleInterrupts( Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out, String* subject_string_out, const byte** code_base_out, base::Vector<const Char>* subject_string_vector_out, const byte** pc_out)254 IrregexpInterpreter::Result HandleInterrupts(
255     Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
256     String* subject_string_out, const byte** code_base_out,
257     base::Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
258   DisallowGarbageCollection no_gc;
259 
260   StackLimitCheck check(isolate);
261   bool js_has_overflowed = check.JsHasOverflowed();
262 
263   if (call_origin == RegExp::CallOrigin::kFromJs) {
264     // Direct calls from JavaScript can be interrupted in two ways:
265     // 1. A real stack overflow, in which case we let the caller throw the
266     //    exception.
267     // 2. The stack guard was used to interrupt execution for another purpose,
268     //    forcing the call through the runtime system.
269     if (js_has_overflowed) {
270       return IrregexpInterpreter::EXCEPTION;
271     } else if (check.InterruptRequested()) {
272       return IrregexpInterpreter::RETRY;
273     }
274   } else {
275     DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
276     // Prepare for possible GC.
277     HandleScope handles(isolate);
278     Handle<ByteArray> code_handle(*code_array_out, isolate);
279     Handle<String> subject_handle(*subject_string_out, isolate);
280 
281     if (js_has_overflowed) {
282       return ThrowStackOverflow(isolate, call_origin);
283     } else if (check.InterruptRequested()) {
284       const bool was_one_byte =
285           String::IsOneByteRepresentationUnderneath(*subject_string_out);
286       Object result;
287       {
288         AllowGarbageCollection yes_gc;
289         result = isolate->stack_guard()->HandleInterrupts();
290       }
291       if (result.IsException(isolate)) {
292         return IrregexpInterpreter::EXCEPTION;
293       }
294 
295       // If we changed between a LATIN1 and a UC16 string, we need to
296       // restart regexp matching with the appropriate template instantiation of
297       // RawMatch.
298       if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
299           was_one_byte) {
300         return IrregexpInterpreter::RETRY;
301       }
302 
303       UpdateCodeAndSubjectReferences(
304           isolate, code_handle, subject_handle, code_array_out, code_base_out,
305           pc_out, subject_string_out, subject_string_vector_out);
306     }
307   }
308 
309   return IrregexpInterpreter::SUCCESS;
310 }
311 
CheckBitInTable(const uint32_t current_char, const byte* const table)312 bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
313   int mask = RegExpMacroAssembler::kTableMask;
314   int b = table[(current_char & mask) >> kBitsPerByteLog2];
315   int bit = (current_char & (kBitsPerByte - 1));
316   return (b & (1 << bit)) != 0;
317 }
318 
319 // Returns true iff 0 <= index < length.
IndexIsInBounds(int index, int length)320 bool IndexIsInBounds(int index, int length) {
321   DCHECK_GE(length, 0);
322   return static_cast<uintptr_t>(index) < static_cast<uintptr_t>(length);
323 }
324 
325 // If computed gotos are supported by the compiler, we can get addresses to
326 // labels directly in C/C++. Every bytecode handler has its own label and we
327 // store the addresses in a dispatch table indexed by bytecode. To execute the
328 // next handler we simply jump (goto) directly to its address.
329 #if V8_USE_COMPUTED_GOTO
330 #define BC_LABEL(name) BC_##name:
331 #define DECODE()                                                   \
332   do {                                                             \
333     next_insn = Load32Aligned(next_pc);                            \
334     next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \
335   } while (false)
336 #define DISPATCH()  \
337   pc = next_pc;     \
338   insn = next_insn; \
339   goto* next_handler_addr
340 // Without computed goto support, we fall back to a simple switch-based
341 // dispatch (A large switch statement inside a loop with a case for every
342 // bytecode).
343 #else  // V8_USE_COMPUTED_GOTO
344 #define BC_LABEL(name) case BC_##name:
345 #define DECODE() next_insn = Load32Aligned(next_pc)
346 #define DISPATCH()  \
347   pc = next_pc;     \
348   insn = next_insn; \
349   goto switch_dispatch_continuation
350 #endif  // V8_USE_COMPUTED_GOTO
351 
352 // ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
353 // instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
354 // We want those two macros as far apart as possible, because the goto in
355 // DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
356 // don't hit the cache and have to fetch the next handler address from physical
357 // memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
358 // potentially be executed unconditionally, reducing memory stall.
359 #define ADVANCE(name)                             \
360   next_pc = pc + RegExpBytecodeLength(BC_##name); \
361   DECODE()
362 #define SET_PC_FROM_OFFSET(offset) \
363   next_pc = code_base + offset;    \
364   DECODE()
365 
366 // Current position mutations.
367 #define SET_CURRENT_POSITION(value)                        \
368   do {                                                     \
369     current = (value);                                     \
370     DCHECK(base::IsInRange(current, 0, subject.length())); \
371   } while (false)
372 #define ADVANCE_CURRENT_POSITION(by) SET_CURRENT_POSITION(current + (by))
373 
374 #ifdef DEBUG
375 #define BYTECODE(name)                                                \
376   BC_LABEL(name)                                                      \
377   MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
378                         current_char, RegExpBytecodeLength(BC_##name), #name);
379 #else
380 #define BYTECODE(name) BC_LABEL(name)
381 #endif  // DEBUG
382 
383 template <typename Char>
RawMatch( Isolate* isolate, ByteArray code_array, String subject_string, base::Vector<const Char> subject, int* output_registers, int output_register_count, int total_register_count, int current, uint32_t current_char, RegExp::CallOrigin call_origin, const uint32_t backtrack_limit)384 IrregexpInterpreter::Result RawMatch(
385     Isolate* isolate, ByteArray code_array, String subject_string,
386     base::Vector<const Char> subject, int* output_registers,
387     int output_register_count, int total_register_count, int current,
388     uint32_t current_char, RegExp::CallOrigin call_origin,
389     const uint32_t backtrack_limit) {
390   DisallowGarbageCollection no_gc;
391 
392 #if V8_USE_COMPUTED_GOTO
393 
394 // We have to make sure that no OOB access to the dispatch table is possible and
395 // all values are valid label addresses.
396 // Otherwise jumps to arbitrary addresses could potentially happen.
397 // This is ensured as follows:
398 // Every index to the dispatch table gets masked using BYTECODE_MASK in
399 // DECODE(). This way we can only get values between 0 (only the least
400 // significant byte of an integer is used) and kRegExpPaddedBytecodeCount - 1
401 // (BYTECODE_MASK is defined to be exactly this value).
402 // All entries from kRegExpBytecodeCount to kRegExpPaddedBytecodeCount have to
403 // be filled with BREAKs (invalid operation).
404 
405 // Fill dispatch table from last defined bytecode up to the next power of two
406 // with BREAK (invalid operation).
407 // TODO(pthier): Find a way to fill up automatically (at compile time)
408 // 59 real bytecodes -> 5 fillers
409 #define BYTECODE_FILLER_ITERATOR(V) \
410   V(BREAK) /* 1 */                  \
411   V(BREAK) /* 2 */                  \
412   V(BREAK) /* 3 */                  \
413   V(BREAK) /* 4 */                  \
414   V(BREAK) /* 5 */
415 
416 #define COUNT(...) +1
417   static constexpr int kRegExpBytecodeFillerCount =
418       BYTECODE_FILLER_ITERATOR(COUNT);
419 #undef COUNT
420 
421   // Make sure kRegExpPaddedBytecodeCount is actually the closest possible power
422   // of two.
423   DCHECK_EQ(kRegExpPaddedBytecodeCount,
424             base::bits::RoundUpToPowerOfTwo32(kRegExpBytecodeCount));
425 
426   // Make sure every bytecode we get by using BYTECODE_MASK is well defined.
427   STATIC_ASSERT(kRegExpBytecodeCount <= kRegExpPaddedBytecodeCount);
428   STATIC_ASSERT(kRegExpBytecodeCount + kRegExpBytecodeFillerCount ==
429                 kRegExpPaddedBytecodeCount);
430 
431 #define DECLARE_DISPATCH_TABLE_ENTRY(name, ...) &&BC_##name,
432   static const void* const dispatch_table[kRegExpPaddedBytecodeCount] = {
433       BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)
434           BYTECODE_FILLER_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
435 #undef DECLARE_DISPATCH_TABLE_ENTRY
436 #undef BYTECODE_FILLER_ITERATOR
437 
438 #endif  // V8_USE_COMPUTED_GOTO
439 
440   const byte* pc = code_array.GetDataStartAddress();
441   const byte* code_base = pc;
442 
443   InterpreterRegisters registers(total_register_count, output_registers,
444                                  output_register_count);
445   BacktrackStack backtrack_stack;
446 
447   uint32_t backtrack_count = 0;
448 
449 #ifdef DEBUG
450   if (FLAG_trace_regexp_bytecodes) {
451     PrintF("\n\nStart bytecode interpreter\n\n");
452   }
453 #endif
454 
455   while (true) {
456     const byte* next_pc = pc;
457     int32_t insn;
458     int32_t next_insn;
459 #if V8_USE_COMPUTED_GOTO
460     const void* next_handler_addr;
461     DECODE();
462     DISPATCH();
463 #else
464     insn = Load32Aligned(pc);
465     switch (insn & BYTECODE_MASK) {
466 #endif  // V8_USE_COMPUTED_GOTO
467     BYTECODE(BREAK) { UNREACHABLE(); }
468     BYTECODE(PUSH_CP) {
469       ADVANCE(PUSH_CP);
470       if (!backtrack_stack.push(current)) {
471         return MaybeThrowStackOverflow(isolate, call_origin);
472       }
473       DISPATCH();
474     }
475     BYTECODE(PUSH_BT) {
476       ADVANCE(PUSH_BT);
477       if (!backtrack_stack.push(Load32Aligned(pc + 4))) {
478         return MaybeThrowStackOverflow(isolate, call_origin);
479       }
480       DISPATCH();
481     }
482     BYTECODE(PUSH_REGISTER) {
483       ADVANCE(PUSH_REGISTER);
484       if (!backtrack_stack.push(registers[LoadPacked24Unsigned(insn)])) {
485         return MaybeThrowStackOverflow(isolate, call_origin);
486       }
487       DISPATCH();
488     }
489     BYTECODE(SET_REGISTER) {
490       ADVANCE(SET_REGISTER);
491       registers[LoadPacked24Unsigned(insn)] = Load32Aligned(pc + 4);
492       DISPATCH();
493     }
494     BYTECODE(ADVANCE_REGISTER) {
495       ADVANCE(ADVANCE_REGISTER);
496       registers[LoadPacked24Unsigned(insn)] += Load32Aligned(pc + 4);
497       DISPATCH();
498     }
499     BYTECODE(SET_REGISTER_TO_CP) {
500       ADVANCE(SET_REGISTER_TO_CP);
501       registers[LoadPacked24Unsigned(insn)] = current + Load32Aligned(pc + 4);
502       DISPATCH();
503     }
504     BYTECODE(SET_CP_TO_REGISTER) {
505       ADVANCE(SET_CP_TO_REGISTER);
506       SET_CURRENT_POSITION(registers[LoadPacked24Unsigned(insn)]);
507       DISPATCH();
508     }
509     BYTECODE(SET_REGISTER_TO_SP) {
510       ADVANCE(SET_REGISTER_TO_SP);
511       registers[LoadPacked24Unsigned(insn)] = backtrack_stack.sp();
512       DISPATCH();
513     }
514     BYTECODE(SET_SP_TO_REGISTER) {
515       ADVANCE(SET_SP_TO_REGISTER);
516       backtrack_stack.set_sp(registers[LoadPacked24Unsigned(insn)]);
517       DISPATCH();
518     }
519     BYTECODE(POP_CP) {
520       ADVANCE(POP_CP);
521       SET_CURRENT_POSITION(backtrack_stack.pop());
522       DISPATCH();
523     }
524     BYTECODE(POP_BT) {
525       STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0);
526       if (++backtrack_count == backtrack_limit) {
527         int return_code = LoadPacked24Signed(insn);
528         return static_cast<IrregexpInterpreter::Result>(return_code);
529       }
530 
531       IrregexpInterpreter::Result return_code =
532           HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
533                            &code_base, &subject, &pc);
534       if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
535 
536       SET_PC_FROM_OFFSET(backtrack_stack.pop());
537       DISPATCH();
538     }
539     BYTECODE(POP_REGISTER) {
540       ADVANCE(POP_REGISTER);
541       registers[LoadPacked24Unsigned(insn)] = backtrack_stack.pop();
542       DISPATCH();
543     }
544     BYTECODE(FAIL) {
545       isolate->counters()->regexp_backtracks()->AddSample(
546           static_cast<int>(backtrack_count));
547       return IrregexpInterpreter::FAILURE;
548     }
549     BYTECODE(SUCCEED) {
550       isolate->counters()->regexp_backtracks()->AddSample(
551           static_cast<int>(backtrack_count));
552       registers.CopyToOutputRegisters();
553       return IrregexpInterpreter::SUCCESS;
554     }
555     BYTECODE(ADVANCE_CP) {
556       ADVANCE(ADVANCE_CP);
557       ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
558       DISPATCH();
559     }
560     BYTECODE(GOTO) {
561       SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
562       DISPATCH();
563     }
564     BYTECODE(ADVANCE_CP_AND_GOTO) {
565       SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
566       ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
567       DISPATCH();
568     }
569     BYTECODE(CHECK_GREEDY) {
570       if (current == backtrack_stack.peek()) {
571         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
572         backtrack_stack.pop();
573       } else {
574         ADVANCE(CHECK_GREEDY);
575       }
576       DISPATCH();
577     }
578     BYTECODE(LOAD_CURRENT_CHAR) {
579       int pos = current + LoadPacked24Signed(insn);
580       if (pos >= subject.length() || pos < 0) {
581         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
582       } else {
583         ADVANCE(LOAD_CURRENT_CHAR);
584         current_char = subject[pos];
585       }
586       DISPATCH();
587     }
588     BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
589       ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
590       int pos = current + LoadPacked24Signed(insn);
591       current_char = subject[pos];
592       DISPATCH();
593     }
594     BYTECODE(LOAD_2_CURRENT_CHARS) {
595       int pos = current + LoadPacked24Signed(insn);
596       if (pos + 2 > subject.length() || pos < 0) {
597         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
598       } else {
599         ADVANCE(LOAD_2_CURRENT_CHARS);
600         Char next = subject[pos + 1];
601         current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
602       }
603       DISPATCH();
604     }
605     BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
606       ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
607       int pos = current + LoadPacked24Signed(insn);
608       Char next = subject[pos + 1];
609       current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
610       DISPATCH();
611     }
612     BYTECODE(LOAD_4_CURRENT_CHARS) {
613       DCHECK_EQ(1, sizeof(Char));
614       int pos = current + LoadPacked24Signed(insn);
615       if (pos + 4 > subject.length() || pos < 0) {
616         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
617       } else {
618         ADVANCE(LOAD_4_CURRENT_CHARS);
619         Char next1 = subject[pos + 1];
620         Char next2 = subject[pos + 2];
621         Char next3 = subject[pos + 3];
622         current_char =
623             (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
624       }
625       DISPATCH();
626     }
627     BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
628       ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
629       DCHECK_EQ(1, sizeof(Char));
630       int pos = current + LoadPacked24Signed(insn);
631       Char next1 = subject[pos + 1];
632       Char next2 = subject[pos + 2];
633       Char next3 = subject[pos + 3];
634       current_char =
635           (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
636       DISPATCH();
637     }
638     BYTECODE(CHECK_4_CHARS) {
639       uint32_t c = Load32Aligned(pc + 4);
640       if (c == current_char) {
641         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
642       } else {
643         ADVANCE(CHECK_4_CHARS);
644       }
645       DISPATCH();
646     }
647     BYTECODE(CHECK_CHAR) {
648       uint32_t c = LoadPacked24Unsigned(insn);
649       if (c == current_char) {
650         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
651       } else {
652         ADVANCE(CHECK_CHAR);
653       }
654       DISPATCH();
655     }
656     BYTECODE(CHECK_NOT_4_CHARS) {
657       uint32_t c = Load32Aligned(pc + 4);
658       if (c != current_char) {
659         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
660       } else {
661         ADVANCE(CHECK_NOT_4_CHARS);
662       }
663       DISPATCH();
664     }
665     BYTECODE(CHECK_NOT_CHAR) {
666       uint32_t c = LoadPacked24Unsigned(insn);
667       if (c != current_char) {
668         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
669       } else {
670         ADVANCE(CHECK_NOT_CHAR);
671       }
672       DISPATCH();
673     }
674     BYTECODE(AND_CHECK_4_CHARS) {
675       uint32_t c = Load32Aligned(pc + 4);
676       if (c == (current_char & Load32Aligned(pc + 8))) {
677         SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
678       } else {
679         ADVANCE(AND_CHECK_4_CHARS);
680       }
681       DISPATCH();
682     }
683     BYTECODE(AND_CHECK_CHAR) {
684       uint32_t c = LoadPacked24Unsigned(insn);
685       if (c == (current_char & Load32Aligned(pc + 4))) {
686         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
687       } else {
688         ADVANCE(AND_CHECK_CHAR);
689       }
690       DISPATCH();
691     }
692     BYTECODE(AND_CHECK_NOT_4_CHARS) {
693       uint32_t c = Load32Aligned(pc + 4);
694       if (c != (current_char & Load32Aligned(pc + 8))) {
695         SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
696       } else {
697         ADVANCE(AND_CHECK_NOT_4_CHARS);
698       }
699       DISPATCH();
700     }
701     BYTECODE(AND_CHECK_NOT_CHAR) {
702       uint32_t c = LoadPacked24Unsigned(insn);
703       if (c != (current_char & Load32Aligned(pc + 4))) {
704         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
705       } else {
706         ADVANCE(AND_CHECK_NOT_CHAR);
707       }
708       DISPATCH();
709     }
710     BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
711       uint32_t c = LoadPacked24Unsigned(insn);
712       uint32_t minus = Load16Aligned(pc + 4);
713       uint32_t mask = Load16Aligned(pc + 6);
714       if (c != ((current_char - minus) & mask)) {
715         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
716       } else {
717         ADVANCE(MINUS_AND_CHECK_NOT_CHAR);
718       }
719       DISPATCH();
720     }
721     BYTECODE(CHECK_CHAR_IN_RANGE) {
722       uint32_t from = Load16Aligned(pc + 4);
723       uint32_t to = Load16Aligned(pc + 6);
724       if (from <= current_char && current_char <= to) {
725         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
726       } else {
727         ADVANCE(CHECK_CHAR_IN_RANGE);
728       }
729       DISPATCH();
730     }
731     BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
732       uint32_t from = Load16Aligned(pc + 4);
733       uint32_t to = Load16Aligned(pc + 6);
734       if (from > current_char || current_char > to) {
735         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
736       } else {
737         ADVANCE(CHECK_CHAR_NOT_IN_RANGE);
738       }
739       DISPATCH();
740     }
741     BYTECODE(CHECK_BIT_IN_TABLE) {
742       if (CheckBitInTable(current_char, pc + 8)) {
743         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
744       } else {
745         ADVANCE(CHECK_BIT_IN_TABLE);
746       }
747       DISPATCH();
748     }
749     BYTECODE(CHECK_LT) {
750       uint32_t limit = LoadPacked24Unsigned(insn);
751       if (current_char < limit) {
752         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
753       } else {
754         ADVANCE(CHECK_LT);
755       }
756       DISPATCH();
757     }
758     BYTECODE(CHECK_GT) {
759       uint32_t limit = LoadPacked24Unsigned(insn);
760       if (current_char > limit) {
761         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
762       } else {
763         ADVANCE(CHECK_GT);
764       }
765       DISPATCH();
766     }
767     BYTECODE(CHECK_REGISTER_LT) {
768       if (registers[LoadPacked24Unsigned(insn)] < Load32Aligned(pc + 4)) {
769         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
770       } else {
771         ADVANCE(CHECK_REGISTER_LT);
772       }
773       DISPATCH();
774     }
775     BYTECODE(CHECK_REGISTER_GE) {
776       if (registers[LoadPacked24Unsigned(insn)] >= Load32Aligned(pc + 4)) {
777         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
778       } else {
779         ADVANCE(CHECK_REGISTER_GE);
780       }
781       DISPATCH();
782     }
783     BYTECODE(CHECK_REGISTER_EQ_POS) {
784       if (registers[LoadPacked24Unsigned(insn)] == current) {
785         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
786       } else {
787         ADVANCE(CHECK_REGISTER_EQ_POS);
788       }
789       DISPATCH();
790     }
791     BYTECODE(CHECK_NOT_REGS_EQUAL) {
792       if (registers[LoadPacked24Unsigned(insn)] ==
793           registers[Load32Aligned(pc + 4)]) {
794         ADVANCE(CHECK_NOT_REGS_EQUAL);
795       } else {
796         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
797       }
798       DISPATCH();
799     }
800     BYTECODE(CHECK_NOT_BACK_REF) {
801       int from = registers[LoadPacked24Unsigned(insn)];
802       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
803       if (from >= 0 && len > 0) {
804         if (current + len > subject.length() ||
805             !CompareCharsEqual(&subject[from], &subject[current], len)) {
806           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
807           DISPATCH();
808         }
809         ADVANCE_CURRENT_POSITION(len);
810       }
811       ADVANCE(CHECK_NOT_BACK_REF);
812       DISPATCH();
813     }
814     BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
815       int from = registers[LoadPacked24Unsigned(insn)];
816       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
817       if (from >= 0 && len > 0) {
818         if (current - len < 0 ||
819             !CompareCharsEqual(&subject[from], &subject[current - len], len)) {
820           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
821           DISPATCH();
822         }
823         SET_CURRENT_POSITION(current - len);
824       }
825       ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
826       DISPATCH();
827     }
828     BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
829       int from = registers[LoadPacked24Unsigned(insn)];
830       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
831       if (from >= 0 && len > 0) {
832         if (current + len > subject.length() ||
833             !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
834           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
835           DISPATCH();
836         }
837         ADVANCE_CURRENT_POSITION(len);
838       }
839       ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
840       DISPATCH();
841     }
842     BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
843       int from = registers[LoadPacked24Unsigned(insn)];
844       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
845       if (from >= 0 && len > 0) {
846         if (current + len > subject.length() ||
847             !BackRefMatchesNoCase(isolate, from, current, len, subject,
848                                   false)) {
849           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
850           DISPATCH();
851         }
852         ADVANCE_CURRENT_POSITION(len);
853       }
854       ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
855       DISPATCH();
856     }
857     BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
858       int from = registers[LoadPacked24Unsigned(insn)];
859       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
860       if (from >= 0 && len > 0) {
861         if (current - len < 0 ||
862             !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
863                                   true)) {
864           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
865           DISPATCH();
866         }
867         SET_CURRENT_POSITION(current - len);
868       }
869       ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
870       DISPATCH();
871     }
872     BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
873       int from = registers[LoadPacked24Unsigned(insn)];
874       int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
875       if (from >= 0 && len > 0) {
876         if (current - len < 0 ||
877             !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
878                                   false)) {
879           SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
880           DISPATCH();
881         }
882         SET_CURRENT_POSITION(current - len);
883       }
884       ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
885       DISPATCH();
886     }
887     BYTECODE(CHECK_AT_START) {
888       if (current + LoadPacked24Signed(insn) == 0) {
889         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
890       } else {
891         ADVANCE(CHECK_AT_START);
892       }
893       DISPATCH();
894     }
895     BYTECODE(CHECK_NOT_AT_START) {
896       if (current + LoadPacked24Signed(insn) == 0) {
897         ADVANCE(CHECK_NOT_AT_START);
898       } else {
899         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
900       }
901       DISPATCH();
902     }
903     BYTECODE(SET_CURRENT_POSITION_FROM_END) {
904       ADVANCE(SET_CURRENT_POSITION_FROM_END);
905       int by = LoadPacked24Unsigned(insn);
906       if (subject.length() - current > by) {
907         SET_CURRENT_POSITION(subject.length() - by);
908         current_char = subject[current - 1];
909       }
910       DISPATCH();
911     }
912     BYTECODE(CHECK_CURRENT_POSITION) {
913       int pos = current + LoadPacked24Signed(insn);
914       if (pos > subject.length() || pos < 0) {
915         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
916       } else {
917         ADVANCE(CHECK_CURRENT_POSITION);
918       }
919       DISPATCH();
920     }
921     BYTECODE(SKIP_UNTIL_CHAR) {
922       int32_t load_offset = LoadPacked24Signed(insn);
923       int32_t advance = Load16AlignedSigned(pc + 4);
924       uint32_t c = Load16Aligned(pc + 6);
925       while (IndexIsInBounds(current + load_offset, subject.length())) {
926         current_char = subject[current + load_offset];
927         if (c == current_char) {
928           SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
929           DISPATCH();
930         }
931         ADVANCE_CURRENT_POSITION(advance);
932       }
933       SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
934       DISPATCH();
935     }
936     BYTECODE(SKIP_UNTIL_CHAR_AND) {
937       int32_t load_offset = LoadPacked24Signed(insn);
938       int32_t advance = Load16AlignedSigned(pc + 4);
939       uint16_t c = Load16Aligned(pc + 6);
940       uint32_t mask = Load32Aligned(pc + 8);
941       int32_t maximum_offset = Load32Aligned(pc + 12);
942       while (static_cast<uintptr_t>(current + maximum_offset) <=
943              static_cast<uintptr_t>(subject.length())) {
944         current_char = subject[current + load_offset];
945         if (c == (current_char & mask)) {
946           SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
947           DISPATCH();
948         }
949         ADVANCE_CURRENT_POSITION(advance);
950       }
951       SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
952       DISPATCH();
953     }
954     BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
955       int32_t load_offset = LoadPacked24Signed(insn);
956       int32_t advance = Load16AlignedSigned(pc + 4);
957       uint16_t c = Load16Aligned(pc + 6);
958       int32_t maximum_offset = Load32Aligned(pc + 8);
959       while (static_cast<uintptr_t>(current + maximum_offset) <=
960              static_cast<uintptr_t>(subject.length())) {
961         current_char = subject[current + load_offset];
962         if (c == current_char) {
963           SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
964           DISPATCH();
965         }
966         ADVANCE_CURRENT_POSITION(advance);
967       }
968       SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
969       DISPATCH();
970     }
971     BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
972       int32_t load_offset = LoadPacked24Signed(insn);
973       int32_t advance = Load16AlignedSigned(pc + 4);
974       const byte* table = pc + 8;
975       while (IndexIsInBounds(current + load_offset, subject.length())) {
976         current_char = subject[current + load_offset];
977         if (CheckBitInTable(current_char, table)) {
978           SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
979           DISPATCH();
980         }
981         ADVANCE_CURRENT_POSITION(advance);
982       }
983       SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
984       DISPATCH();
985     }
986     BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
987       int32_t load_offset = LoadPacked24Signed(insn);
988       int32_t advance = Load16AlignedSigned(pc + 4);
989       uint16_t limit = Load16Aligned(pc + 6);
990       const byte* table = pc + 8;
991       while (IndexIsInBounds(current + load_offset, subject.length())) {
992         current_char = subject[current + load_offset];
993         if (current_char > limit) {
994           SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
995           DISPATCH();
996         }
997         if (!CheckBitInTable(current_char, table)) {
998           SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
999           DISPATCH();
1000         }
1001         ADVANCE_CURRENT_POSITION(advance);
1002       }
1003       SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
1004       DISPATCH();
1005     }
1006     BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
1007       int32_t load_offset = LoadPacked24Signed(insn);
1008       int32_t advance = Load32Aligned(pc + 4);
1009       uint16_t c = Load16Aligned(pc + 8);
1010       uint16_t c2 = Load16Aligned(pc + 10);
1011       while (IndexIsInBounds(current + load_offset, subject.length())) {
1012         current_char = subject[current + load_offset];
1013         // The two if-statements below are split up intentionally, as combining
1014         // them seems to result in register allocation behaving quite
1015         // differently and slowing down the resulting code.
1016         if (c == current_char) {
1017           SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
1018           DISPATCH();
1019         }
1020         if (c2 == current_char) {
1021           SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
1022           DISPATCH();
1023         }
1024         ADVANCE_CURRENT_POSITION(advance);
1025       }
1026       SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
1027       DISPATCH();
1028     }
1029 #if V8_USE_COMPUTED_GOTO
1030 // Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
1031 // V8_USE_COMPUTED_GOTO here.
1032 #else
1033       default:
1034         UNREACHABLE();
1035     }
1036   // Label we jump to in DISPATCH(). There must be no instructions between the
1037   // end of the switch, this label and the end of the loop.
1038   switch_dispatch_continuation : {}
1039 #endif  // V8_USE_COMPUTED_GOTO
1040   }
1041 }
1042 
1043 #undef BYTECODE
1044 #undef ADVANCE_CURRENT_POSITION
1045 #undef SET_CURRENT_POSITION
1046 #undef DISPATCH
1047 #undef DECODE
1048 #undef SET_PC_FROM_OFFSET
1049 #undef ADVANCE
1050 #undef BC_LABEL
1051 #undef V8_USE_COMPUTED_GOTO
1052 
1053 }  // namespace
1054 
1055 // static
Match( Isolate* isolate, JSRegExp regexp, String subject_string, int* output_registers, int output_register_count, int start_position, RegExp::CallOrigin call_origin)1056 IrregexpInterpreter::Result IrregexpInterpreter::Match(
1057     Isolate* isolate, JSRegExp regexp, String subject_string,
1058     int* output_registers, int output_register_count, int start_position,
1059     RegExp::CallOrigin call_origin) {
1060   if (FLAG_regexp_tier_up) regexp.TierUpTick();
1061 
1062   bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
1063   ByteArray code_array = ByteArray::cast(regexp.bytecode(is_one_byte));
1064   int total_register_count = regexp.max_register_count();
1065 
1066   return MatchInternal(isolate, code_array, subject_string, output_registers,
1067                        output_register_count, total_register_count,
1068                        start_position, call_origin, regexp.backtrack_limit());
1069 }
1070 
MatchInternal( Isolate* isolate, ByteArray code_array, String subject_string, int* output_registers, int output_register_count, int total_register_count, int start_position, RegExp::CallOrigin call_origin, uint32_t backtrack_limit)1071 IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
1072     Isolate* isolate, ByteArray code_array, String subject_string,
1073     int* output_registers, int output_register_count, int total_register_count,
1074     int start_position, RegExp::CallOrigin call_origin,
1075     uint32_t backtrack_limit) {
1076   DCHECK(subject_string.IsFlat());
1077 
1078   // TODO(chromium:1262676): Remove this CHECK once fixed.
1079   CHECK(code_array.IsByteArray());
1080 
1081   // Note: Heap allocation *is* allowed in two situations if calling from
1082   // Runtime:
1083   // 1. When creating & throwing a stack overflow exception. The interpreter
1084   //    aborts afterwards, and thus possible-moved objects are never used.
1085   // 2. When handling interrupts. We manually relocate unhandlified references
1086   //    after interrupts have run.
1087   DisallowGarbageCollection no_gc;
1088 
1089   base::uc16 previous_char = '\n';
1090   String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
1091   // Because interrupts can result in GC and string content relocation, the
1092   // checksum verification in FlatContent may fail even though this code is
1093   // safe. See (2) above.
1094   subject_content.UnsafeDisableChecksumVerification();
1095   if (subject_content.IsOneByte()) {
1096     base::Vector<const uint8_t> subject_vector =
1097         subject_content.ToOneByteVector();
1098     if (start_position != 0) previous_char = subject_vector[start_position - 1];
1099     return RawMatch(isolate, code_array, subject_string, subject_vector,
1100                     output_registers, output_register_count,
1101                     total_register_count, start_position, previous_char,
1102                     call_origin, backtrack_limit);
1103   } else {
1104     DCHECK(subject_content.IsTwoByte());
1105     base::Vector<const base::uc16> subject_vector =
1106         subject_content.ToUC16Vector();
1107     if (start_position != 0) previous_char = subject_vector[start_position - 1];
1108     return RawMatch(isolate, code_array, subject_string, subject_vector,
1109                     output_registers, output_register_count,
1110                     total_register_count, start_position, previous_char,
1111                     call_origin, backtrack_limit);
1112   }
1113 }
1114 
1115 #ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1116 
1117 // This method is called through an external reference from RegExpExecInternal
1118 // builtin.
MatchForCallFromJs( Address subject, int32_t start_position, Address, Address, int* output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp)1119 IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
1120     Address subject, int32_t start_position, Address, Address,
1121     int* output_registers, int32_t output_register_count,
1122     RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
1123   DCHECK_NOT_NULL(isolate);
1124   DCHECK_NOT_NULL(output_registers);
1125   DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
1126 
1127   DisallowGarbageCollection no_gc;
1128   DisallowJavascriptExecution no_js(isolate);
1129   DisallowHandleAllocation no_handles;
1130   DisallowHandleDereference no_deref;
1131 
1132   String subject_string = String::cast(Object(subject));
1133   JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
1134 
1135   if (regexp_obj.MarkedForTierUp()) {
1136     // Returning RETRY will re-enter through runtime, where actual recompilation
1137     // for tier-up takes place.
1138     return IrregexpInterpreter::RETRY;
1139   }
1140 
1141   return Match(isolate, regexp_obj, subject_string, output_registers,
1142                output_register_count, start_position, call_origin);
1143 }
1144 
1145 #endif  // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1146 
MatchForCallFromRuntime( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string, int* output_registers, int output_register_count, int start_position)1147 IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
1148     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
1149     int* output_registers, int output_register_count, int start_position) {
1150   return Match(isolate, *regexp, *subject_string, output_registers,
1151                output_register_count, start_position,
1152                RegExp::CallOrigin::kFromRuntime);
1153 }
1154 
1155 }  // namespace internal
1156 }  // namespace v8
1157