1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // A simple interpreter for the Irregexp byte code.
6
7 #include "src/regexp/regexp-interpreter.h"
8
9 #include "src/base/small-vector.h"
10 #include "src/base/strings.h"
11 #include "src/execution/isolate.h"
12 #include "src/logging/counters.h"
13 #include "src/objects/js-regexp-inl.h"
14 #include "src/objects/string-inl.h"
15 #include "src/regexp/regexp-bytecodes.h"
16 #include "src/regexp/regexp-macro-assembler.h"
17 #include "src/regexp/regexp-stack.h" // For kMaximumStackSize.
18 #include "src/regexp/regexp.h"
19 #include "src/strings/unicode.h"
20 #include "src/utils/memcopy.h"
21 #include "src/utils/utils.h"
22
23 #ifdef V8_INTL_SUPPORT
24 #include "unicode/uchar.h"
25 #endif // V8_INTL_SUPPORT
26
27 // Use token threaded dispatch iff the compiler supports computed gotos and the
28 // build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
29 #if V8_HAS_COMPUTED_GOTO && \
30 defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
31 #define V8_USE_COMPUTED_GOTO 1
32 #endif // V8_HAS_COMPUTED_GOTO
33
34 namespace v8 {
35 namespace internal {
36
37 namespace {
38
BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, base::Vector<const base::uc16> subject, bool unicode)39 bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
40 base::Vector<const base::uc16> subject,
41 bool unicode) {
42 Address offset_a =
43 reinterpret_cast<Address>(const_cast<base::uc16*>(&subject.at(from)));
44 Address offset_b =
45 reinterpret_cast<Address>(const_cast<base::uc16*>(&subject.at(current)));
46 size_t length = len * base::kUC16Size;
47
48 bool result = unicode
49 ? RegExpMacroAssembler::CaseInsensitiveCompareUnicode(
50 offset_a, offset_b, length, isolate)
51 : RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
52 offset_a, offset_b, length, isolate);
53 return result == 1;
54 }
55
BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, base::Vector<const uint8_t> subject, bool unicode)56 bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
57 base::Vector<const uint8_t> subject, bool unicode) {
58 // For Latin1 characters the unicode flag makes no difference.
59 for (int i = 0; i < len; i++) {
60 unsigned int old_char = subject[from++];
61 unsigned int new_char = subject[current++];
62 if (old_char == new_char) continue;
63 // Convert both characters to lower case.
64 old_char |= 0x20;
65 new_char |= 0x20;
66 if (old_char != new_char) return false;
67 // Not letters in the ASCII range and Latin-1 range.
68 if (!(old_char - 'a' <= 'z' - 'a') &&
69 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
70 return false;
71 }
72 }
73 return true;
74 }
75
76 #ifdef DEBUG
MaybeTraceInterpreter(const byte* code_base, const byte* pc, int stack_depth, int current_position, uint32_t current_char, int bytecode_length, const char* bytecode_name)77 void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
78 int stack_depth, int current_position,
79 uint32_t current_char, int bytecode_length,
80 const char* bytecode_name) {
81 if (FLAG_trace_regexp_bytecodes) {
82 const bool printable = std::isprint(current_char);
83 const char* format =
84 printable
85 ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
86 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
87 PrintF(format, pc - code_base, stack_depth, current_position, current_char,
88 printable ? current_char : '.');
89
90 RegExpBytecodeDisassembleSingle(code_base, pc);
91 }
92 }
93 #endif // DEBUG
94
Load32Aligned(const byte* pc)95 int32_t Load32Aligned(const byte* pc) {
96 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
97 return *reinterpret_cast<const int32_t*>(pc);
98 }
99
100 // TODO(jgruber): Rename to Load16AlignedUnsigned.
Load16Aligned(const byte* pc)101 uint32_t Load16Aligned(const byte* pc) {
102 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
103 return *reinterpret_cast<const uint16_t*>(pc);
104 }
105
Load16AlignedSigned(const byte* pc)106 int32_t Load16AlignedSigned(const byte* pc) {
107 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
108 return *reinterpret_cast<const int16_t*>(pc);
109 }
110
111 // Helpers to access the packed argument. Takes the 32 bits containing the
112 // current bytecode, where the 8 LSB contain the bytecode and the rest contains
113 // a packed 24-bit argument.
114 // TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
115 // police restrictions during bytecode generation.
LoadPacked24Signed(int32_t bytecode_and_packed_arg)116 int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {
117 return bytecode_and_packed_arg >> BYTECODE_SHIFT;
118 }
LoadPacked24Unsigned(int32_t bytecode_and_packed_arg)119 uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {
120 return static_cast<uint32_t>(bytecode_and_packed_arg) >> BYTECODE_SHIFT;
121 }
122
123 // A simple abstraction over the backtracking stack used by the interpreter.
124 //
125 // Despite the name 'backtracking' stack, it's actually used as a generic stack
126 // that stores both program counters (= offsets into the bytecode) and generic
127 // integer values.
128 class BacktrackStack {
129 public:
130 BacktrackStack() = default;
131 BacktrackStack(const BacktrackStack&) = delete;
132 BacktrackStack& operator=(const BacktrackStack&) = delete;
133
push(int v)134 V8_WARN_UNUSED_RESULT bool push(int v) {
135 data_.emplace_back(v);
136 return (static_cast<int>(data_.size()) <= kMaxSize);
137 }
peek() const138 int peek() const {
139 DCHECK(!data_.empty());
140 return data_.back();
141 }
pop()142 int pop() {
143 int v = peek();
144 data_.pop_back();
145 return v;
146 }
147
148 // The 'sp' is the index of the first empty element in the stack.
sp() const149 int sp() const { return static_cast<int>(data_.size()); }
set_sp(int new_sp)150 void set_sp(int new_sp) {
151 DCHECK_LE(new_sp, sp());
152 data_.resize_no_init(new_sp);
153 }
154
155 private:
156 // Semi-arbitrary. Should be large enough for common cases to remain in the
157 // static stack-allocated backing store, but small enough not to waste space.
158 static constexpr int kStaticCapacity = 64;
159
160 using ValueT = int;
161 base::SmallVector<ValueT, kStaticCapacity> data_;
162
163 static constexpr int kMaxSize =
164 RegExpStack::kMaximumStackSize / sizeof(ValueT);
165 };
166
167 // Registers used during interpreter execution. These consist of output
168 // registers in indices [0, output_register_count[ which will contain matcher
169 // results as a {start,end} index tuple for each capture (where the whole match
170 // counts as implicit capture 0); and internal registers in indices
171 // [output_register_count, total_register_count[.
172 class InterpreterRegisters {
173 public:
174 using RegisterT = int;
175
InterpreterRegisters(int total_register_count, RegisterT* output_registers, int output_register_count)176 InterpreterRegisters(int total_register_count, RegisterT* output_registers,
177 int output_register_count)
178 : registers_(total_register_count),
179 output_registers_(output_registers),
180 output_register_count_(output_register_count) {
181 // TODO(jgruber): Use int32_t consistently for registers. Currently, CSA
182 // uses int32_t while runtime uses int.
183 STATIC_ASSERT(sizeof(int) == sizeof(int32_t));
184 DCHECK_GE(output_register_count, 2); // At least 2 for the match itself.
185 DCHECK_GE(total_register_count, output_register_count);
186 DCHECK_LE(total_register_count, RegExpMacroAssembler::kMaxRegisterCount);
187 DCHECK_NOT_NULL(output_registers);
188
189 // Initialize the output register region to -1 signifying 'no match'.
190 std::memset(registers_.data(), -1,
191 output_register_count * sizeof(RegisterT));
192 }
193
operator [](size_t index) const194 const RegisterT& operator[](size_t index) const { return registers_[index]; }
operator [](size_t index)195 RegisterT& operator[](size_t index) { return registers_[index]; }
196
CopyToOutputRegisters()197 void CopyToOutputRegisters() {
198 MemCopy(output_registers_, registers_.data(),
199 output_register_count_ * sizeof(RegisterT));
200 }
201
202 private:
203 static constexpr int kStaticCapacity = 64; // Arbitrary.
204 base::SmallVector<RegisterT, kStaticCapacity> registers_;
205 RegisterT* const output_registers_;
206 const int output_register_count_;
207 };
208
ThrowStackOverflow(Isolate* isolate, RegExp::CallOrigin call_origin)209 IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
210 RegExp::CallOrigin call_origin) {
211 CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
212 // We abort interpreter execution after the stack overflow is thrown, and thus
213 // allow allocation here despite the outer DisallowGarbageCollectionScope.
214 AllowGarbageCollection yes_gc;
215 isolate->StackOverflow();
216 return IrregexpInterpreter::EXCEPTION;
217 }
218
219 // Only throws if called from the runtime, otherwise just returns the EXCEPTION
220 // status code.
MaybeThrowStackOverflow( Isolate* isolate, RegExp::CallOrigin call_origin)221 IrregexpInterpreter::Result MaybeThrowStackOverflow(
222 Isolate* isolate, RegExp::CallOrigin call_origin) {
223 if (call_origin == RegExp::CallOrigin::kFromRuntime) {
224 return ThrowStackOverflow(isolate, call_origin);
225 } else {
226 return IrregexpInterpreter::EXCEPTION;
227 }
228 }
229
230 template <typename Char>
UpdateCodeAndSubjectReferences( Isolate* isolate, Handle<ByteArray> code_array, Handle<String> subject_string, ByteArray* code_array_out, const byte** code_base_out, const byte** pc_out, String* subject_string_out, base::Vector<const Char>* subject_string_vector_out)231 void UpdateCodeAndSubjectReferences(
232 Isolate* isolate, Handle<ByteArray> code_array,
233 Handle<String> subject_string, ByteArray* code_array_out,
234 const byte** code_base_out, const byte** pc_out, String* subject_string_out,
235 base::Vector<const Char>* subject_string_vector_out) {
236 DisallowGarbageCollection no_gc;
237
238 if (*code_base_out != code_array->GetDataStartAddress()) {
239 *code_array_out = *code_array;
240 const intptr_t pc_offset = *pc_out - *code_base_out;
241 DCHECK_GT(pc_offset, 0);
242 *code_base_out = code_array->GetDataStartAddress();
243 *pc_out = *code_base_out + pc_offset;
244 }
245
246 DCHECK(subject_string->IsFlat());
247 *subject_string_out = *subject_string;
248 *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
249 }
250
251 // Runs all pending interrupts and updates unhandlified object references if
252 // necessary.
253 template <typename Char>
HandleInterrupts( Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out, String* subject_string_out, const byte** code_base_out, base::Vector<const Char>* subject_string_vector_out, const byte** pc_out)254 IrregexpInterpreter::Result HandleInterrupts(
255 Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
256 String* subject_string_out, const byte** code_base_out,
257 base::Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
258 DisallowGarbageCollection no_gc;
259
260 StackLimitCheck check(isolate);
261 bool js_has_overflowed = check.JsHasOverflowed();
262
263 if (call_origin == RegExp::CallOrigin::kFromJs) {
264 // Direct calls from JavaScript can be interrupted in two ways:
265 // 1. A real stack overflow, in which case we let the caller throw the
266 // exception.
267 // 2. The stack guard was used to interrupt execution for another purpose,
268 // forcing the call through the runtime system.
269 if (js_has_overflowed) {
270 return IrregexpInterpreter::EXCEPTION;
271 } else if (check.InterruptRequested()) {
272 return IrregexpInterpreter::RETRY;
273 }
274 } else {
275 DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
276 // Prepare for possible GC.
277 HandleScope handles(isolate);
278 Handle<ByteArray> code_handle(*code_array_out, isolate);
279 Handle<String> subject_handle(*subject_string_out, isolate);
280
281 if (js_has_overflowed) {
282 return ThrowStackOverflow(isolate, call_origin);
283 } else if (check.InterruptRequested()) {
284 const bool was_one_byte =
285 String::IsOneByteRepresentationUnderneath(*subject_string_out);
286 Object result;
287 {
288 AllowGarbageCollection yes_gc;
289 result = isolate->stack_guard()->HandleInterrupts();
290 }
291 if (result.IsException(isolate)) {
292 return IrregexpInterpreter::EXCEPTION;
293 }
294
295 // If we changed between a LATIN1 and a UC16 string, we need to
296 // restart regexp matching with the appropriate template instantiation of
297 // RawMatch.
298 if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
299 was_one_byte) {
300 return IrregexpInterpreter::RETRY;
301 }
302
303 UpdateCodeAndSubjectReferences(
304 isolate, code_handle, subject_handle, code_array_out, code_base_out,
305 pc_out, subject_string_out, subject_string_vector_out);
306 }
307 }
308
309 return IrregexpInterpreter::SUCCESS;
310 }
311
CheckBitInTable(const uint32_t current_char, const byte* const table)312 bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
313 int mask = RegExpMacroAssembler::kTableMask;
314 int b = table[(current_char & mask) >> kBitsPerByteLog2];
315 int bit = (current_char & (kBitsPerByte - 1));
316 return (b & (1 << bit)) != 0;
317 }
318
319 // Returns true iff 0 <= index < length.
IndexIsInBounds(int index, int length)320 bool IndexIsInBounds(int index, int length) {
321 DCHECK_GE(length, 0);
322 return static_cast<uintptr_t>(index) < static_cast<uintptr_t>(length);
323 }
324
325 // If computed gotos are supported by the compiler, we can get addresses to
326 // labels directly in C/C++. Every bytecode handler has its own label and we
327 // store the addresses in a dispatch table indexed by bytecode. To execute the
328 // next handler we simply jump (goto) directly to its address.
329 #if V8_USE_COMPUTED_GOTO
330 #define BC_LABEL(name) BC_##name:
331 #define DECODE() \
332 do { \
333 next_insn = Load32Aligned(next_pc); \
334 next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \
335 } while (false)
336 #define DISPATCH() \
337 pc = next_pc; \
338 insn = next_insn; \
339 goto* next_handler_addr
340 // Without computed goto support, we fall back to a simple switch-based
341 // dispatch (A large switch statement inside a loop with a case for every
342 // bytecode).
343 #else // V8_USE_COMPUTED_GOTO
344 #define BC_LABEL(name) case BC_##name:
345 #define DECODE() next_insn = Load32Aligned(next_pc)
346 #define DISPATCH() \
347 pc = next_pc; \
348 insn = next_insn; \
349 goto switch_dispatch_continuation
350 #endif // V8_USE_COMPUTED_GOTO
351
352 // ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
353 // instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
354 // We want those two macros as far apart as possible, because the goto in
355 // DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
356 // don't hit the cache and have to fetch the next handler address from physical
357 // memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
358 // potentially be executed unconditionally, reducing memory stall.
359 #define ADVANCE(name) \
360 next_pc = pc + RegExpBytecodeLength(BC_##name); \
361 DECODE()
362 #define SET_PC_FROM_OFFSET(offset) \
363 next_pc = code_base + offset; \
364 DECODE()
365
366 // Current position mutations.
367 #define SET_CURRENT_POSITION(value) \
368 do { \
369 current = (value); \
370 DCHECK(base::IsInRange(current, 0, subject.length())); \
371 } while (false)
372 #define ADVANCE_CURRENT_POSITION(by) SET_CURRENT_POSITION(current + (by))
373
374 #ifdef DEBUG
375 #define BYTECODE(name) \
376 BC_LABEL(name) \
377 MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
378 current_char, RegExpBytecodeLength(BC_##name), #name);
379 #else
380 #define BYTECODE(name) BC_LABEL(name)
381 #endif // DEBUG
382
383 template <typename Char>
RawMatch( Isolate* isolate, ByteArray code_array, String subject_string, base::Vector<const Char> subject, int* output_registers, int output_register_count, int total_register_count, int current, uint32_t current_char, RegExp::CallOrigin call_origin, const uint32_t backtrack_limit)384 IrregexpInterpreter::Result RawMatch(
385 Isolate* isolate, ByteArray code_array, String subject_string,
386 base::Vector<const Char> subject, int* output_registers,
387 int output_register_count, int total_register_count, int current,
388 uint32_t current_char, RegExp::CallOrigin call_origin,
389 const uint32_t backtrack_limit) {
390 DisallowGarbageCollection no_gc;
391
392 #if V8_USE_COMPUTED_GOTO
393
394 // We have to make sure that no OOB access to the dispatch table is possible and
395 // all values are valid label addresses.
396 // Otherwise jumps to arbitrary addresses could potentially happen.
397 // This is ensured as follows:
398 // Every index to the dispatch table gets masked using BYTECODE_MASK in
399 // DECODE(). This way we can only get values between 0 (only the least
400 // significant byte of an integer is used) and kRegExpPaddedBytecodeCount - 1
401 // (BYTECODE_MASK is defined to be exactly this value).
402 // All entries from kRegExpBytecodeCount to kRegExpPaddedBytecodeCount have to
403 // be filled with BREAKs (invalid operation).
404
405 // Fill dispatch table from last defined bytecode up to the next power of two
406 // with BREAK (invalid operation).
407 // TODO(pthier): Find a way to fill up automatically (at compile time)
408 // 59 real bytecodes -> 5 fillers
409 #define BYTECODE_FILLER_ITERATOR(V) \
410 V(BREAK) /* 1 */ \
411 V(BREAK) /* 2 */ \
412 V(BREAK) /* 3 */ \
413 V(BREAK) /* 4 */ \
414 V(BREAK) /* 5 */
415
416 #define COUNT(...) +1
417 static constexpr int kRegExpBytecodeFillerCount =
418 BYTECODE_FILLER_ITERATOR(COUNT);
419 #undef COUNT
420
421 // Make sure kRegExpPaddedBytecodeCount is actually the closest possible power
422 // of two.
423 DCHECK_EQ(kRegExpPaddedBytecodeCount,
424 base::bits::RoundUpToPowerOfTwo32(kRegExpBytecodeCount));
425
426 // Make sure every bytecode we get by using BYTECODE_MASK is well defined.
427 STATIC_ASSERT(kRegExpBytecodeCount <= kRegExpPaddedBytecodeCount);
428 STATIC_ASSERT(kRegExpBytecodeCount + kRegExpBytecodeFillerCount ==
429 kRegExpPaddedBytecodeCount);
430
431 #define DECLARE_DISPATCH_TABLE_ENTRY(name, ...) &&BC_##name,
432 static const void* const dispatch_table[kRegExpPaddedBytecodeCount] = {
433 BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)
434 BYTECODE_FILLER_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
435 #undef DECLARE_DISPATCH_TABLE_ENTRY
436 #undef BYTECODE_FILLER_ITERATOR
437
438 #endif // V8_USE_COMPUTED_GOTO
439
440 const byte* pc = code_array.GetDataStartAddress();
441 const byte* code_base = pc;
442
443 InterpreterRegisters registers(total_register_count, output_registers,
444 output_register_count);
445 BacktrackStack backtrack_stack;
446
447 uint32_t backtrack_count = 0;
448
449 #ifdef DEBUG
450 if (FLAG_trace_regexp_bytecodes) {
451 PrintF("\n\nStart bytecode interpreter\n\n");
452 }
453 #endif
454
455 while (true) {
456 const byte* next_pc = pc;
457 int32_t insn;
458 int32_t next_insn;
459 #if V8_USE_COMPUTED_GOTO
460 const void* next_handler_addr;
461 DECODE();
462 DISPATCH();
463 #else
464 insn = Load32Aligned(pc);
465 switch (insn & BYTECODE_MASK) {
466 #endif // V8_USE_COMPUTED_GOTO
467 BYTECODE(BREAK) { UNREACHABLE(); }
468 BYTECODE(PUSH_CP) {
469 ADVANCE(PUSH_CP);
470 if (!backtrack_stack.push(current)) {
471 return MaybeThrowStackOverflow(isolate, call_origin);
472 }
473 DISPATCH();
474 }
475 BYTECODE(PUSH_BT) {
476 ADVANCE(PUSH_BT);
477 if (!backtrack_stack.push(Load32Aligned(pc + 4))) {
478 return MaybeThrowStackOverflow(isolate, call_origin);
479 }
480 DISPATCH();
481 }
482 BYTECODE(PUSH_REGISTER) {
483 ADVANCE(PUSH_REGISTER);
484 if (!backtrack_stack.push(registers[LoadPacked24Unsigned(insn)])) {
485 return MaybeThrowStackOverflow(isolate, call_origin);
486 }
487 DISPATCH();
488 }
489 BYTECODE(SET_REGISTER) {
490 ADVANCE(SET_REGISTER);
491 registers[LoadPacked24Unsigned(insn)] = Load32Aligned(pc + 4);
492 DISPATCH();
493 }
494 BYTECODE(ADVANCE_REGISTER) {
495 ADVANCE(ADVANCE_REGISTER);
496 registers[LoadPacked24Unsigned(insn)] += Load32Aligned(pc + 4);
497 DISPATCH();
498 }
499 BYTECODE(SET_REGISTER_TO_CP) {
500 ADVANCE(SET_REGISTER_TO_CP);
501 registers[LoadPacked24Unsigned(insn)] = current + Load32Aligned(pc + 4);
502 DISPATCH();
503 }
504 BYTECODE(SET_CP_TO_REGISTER) {
505 ADVANCE(SET_CP_TO_REGISTER);
506 SET_CURRENT_POSITION(registers[LoadPacked24Unsigned(insn)]);
507 DISPATCH();
508 }
509 BYTECODE(SET_REGISTER_TO_SP) {
510 ADVANCE(SET_REGISTER_TO_SP);
511 registers[LoadPacked24Unsigned(insn)] = backtrack_stack.sp();
512 DISPATCH();
513 }
514 BYTECODE(SET_SP_TO_REGISTER) {
515 ADVANCE(SET_SP_TO_REGISTER);
516 backtrack_stack.set_sp(registers[LoadPacked24Unsigned(insn)]);
517 DISPATCH();
518 }
519 BYTECODE(POP_CP) {
520 ADVANCE(POP_CP);
521 SET_CURRENT_POSITION(backtrack_stack.pop());
522 DISPATCH();
523 }
524 BYTECODE(POP_BT) {
525 STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0);
526 if (++backtrack_count == backtrack_limit) {
527 int return_code = LoadPacked24Signed(insn);
528 return static_cast<IrregexpInterpreter::Result>(return_code);
529 }
530
531 IrregexpInterpreter::Result return_code =
532 HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
533 &code_base, &subject, &pc);
534 if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
535
536 SET_PC_FROM_OFFSET(backtrack_stack.pop());
537 DISPATCH();
538 }
539 BYTECODE(POP_REGISTER) {
540 ADVANCE(POP_REGISTER);
541 registers[LoadPacked24Unsigned(insn)] = backtrack_stack.pop();
542 DISPATCH();
543 }
544 BYTECODE(FAIL) {
545 isolate->counters()->regexp_backtracks()->AddSample(
546 static_cast<int>(backtrack_count));
547 return IrregexpInterpreter::FAILURE;
548 }
549 BYTECODE(SUCCEED) {
550 isolate->counters()->regexp_backtracks()->AddSample(
551 static_cast<int>(backtrack_count));
552 registers.CopyToOutputRegisters();
553 return IrregexpInterpreter::SUCCESS;
554 }
555 BYTECODE(ADVANCE_CP) {
556 ADVANCE(ADVANCE_CP);
557 ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
558 DISPATCH();
559 }
560 BYTECODE(GOTO) {
561 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
562 DISPATCH();
563 }
564 BYTECODE(ADVANCE_CP_AND_GOTO) {
565 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
566 ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
567 DISPATCH();
568 }
569 BYTECODE(CHECK_GREEDY) {
570 if (current == backtrack_stack.peek()) {
571 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
572 backtrack_stack.pop();
573 } else {
574 ADVANCE(CHECK_GREEDY);
575 }
576 DISPATCH();
577 }
578 BYTECODE(LOAD_CURRENT_CHAR) {
579 int pos = current + LoadPacked24Signed(insn);
580 if (pos >= subject.length() || pos < 0) {
581 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
582 } else {
583 ADVANCE(LOAD_CURRENT_CHAR);
584 current_char = subject[pos];
585 }
586 DISPATCH();
587 }
588 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
589 ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
590 int pos = current + LoadPacked24Signed(insn);
591 current_char = subject[pos];
592 DISPATCH();
593 }
594 BYTECODE(LOAD_2_CURRENT_CHARS) {
595 int pos = current + LoadPacked24Signed(insn);
596 if (pos + 2 > subject.length() || pos < 0) {
597 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
598 } else {
599 ADVANCE(LOAD_2_CURRENT_CHARS);
600 Char next = subject[pos + 1];
601 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
602 }
603 DISPATCH();
604 }
605 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
606 ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
607 int pos = current + LoadPacked24Signed(insn);
608 Char next = subject[pos + 1];
609 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
610 DISPATCH();
611 }
612 BYTECODE(LOAD_4_CURRENT_CHARS) {
613 DCHECK_EQ(1, sizeof(Char));
614 int pos = current + LoadPacked24Signed(insn);
615 if (pos + 4 > subject.length() || pos < 0) {
616 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
617 } else {
618 ADVANCE(LOAD_4_CURRENT_CHARS);
619 Char next1 = subject[pos + 1];
620 Char next2 = subject[pos + 2];
621 Char next3 = subject[pos + 3];
622 current_char =
623 (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
624 }
625 DISPATCH();
626 }
627 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
628 ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
629 DCHECK_EQ(1, sizeof(Char));
630 int pos = current + LoadPacked24Signed(insn);
631 Char next1 = subject[pos + 1];
632 Char next2 = subject[pos + 2];
633 Char next3 = subject[pos + 3];
634 current_char =
635 (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
636 DISPATCH();
637 }
638 BYTECODE(CHECK_4_CHARS) {
639 uint32_t c = Load32Aligned(pc + 4);
640 if (c == current_char) {
641 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
642 } else {
643 ADVANCE(CHECK_4_CHARS);
644 }
645 DISPATCH();
646 }
647 BYTECODE(CHECK_CHAR) {
648 uint32_t c = LoadPacked24Unsigned(insn);
649 if (c == current_char) {
650 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
651 } else {
652 ADVANCE(CHECK_CHAR);
653 }
654 DISPATCH();
655 }
656 BYTECODE(CHECK_NOT_4_CHARS) {
657 uint32_t c = Load32Aligned(pc + 4);
658 if (c != current_char) {
659 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
660 } else {
661 ADVANCE(CHECK_NOT_4_CHARS);
662 }
663 DISPATCH();
664 }
665 BYTECODE(CHECK_NOT_CHAR) {
666 uint32_t c = LoadPacked24Unsigned(insn);
667 if (c != current_char) {
668 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
669 } else {
670 ADVANCE(CHECK_NOT_CHAR);
671 }
672 DISPATCH();
673 }
674 BYTECODE(AND_CHECK_4_CHARS) {
675 uint32_t c = Load32Aligned(pc + 4);
676 if (c == (current_char & Load32Aligned(pc + 8))) {
677 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
678 } else {
679 ADVANCE(AND_CHECK_4_CHARS);
680 }
681 DISPATCH();
682 }
683 BYTECODE(AND_CHECK_CHAR) {
684 uint32_t c = LoadPacked24Unsigned(insn);
685 if (c == (current_char & Load32Aligned(pc + 4))) {
686 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
687 } else {
688 ADVANCE(AND_CHECK_CHAR);
689 }
690 DISPATCH();
691 }
692 BYTECODE(AND_CHECK_NOT_4_CHARS) {
693 uint32_t c = Load32Aligned(pc + 4);
694 if (c != (current_char & Load32Aligned(pc + 8))) {
695 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
696 } else {
697 ADVANCE(AND_CHECK_NOT_4_CHARS);
698 }
699 DISPATCH();
700 }
701 BYTECODE(AND_CHECK_NOT_CHAR) {
702 uint32_t c = LoadPacked24Unsigned(insn);
703 if (c != (current_char & Load32Aligned(pc + 4))) {
704 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
705 } else {
706 ADVANCE(AND_CHECK_NOT_CHAR);
707 }
708 DISPATCH();
709 }
710 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
711 uint32_t c = LoadPacked24Unsigned(insn);
712 uint32_t minus = Load16Aligned(pc + 4);
713 uint32_t mask = Load16Aligned(pc + 6);
714 if (c != ((current_char - minus) & mask)) {
715 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
716 } else {
717 ADVANCE(MINUS_AND_CHECK_NOT_CHAR);
718 }
719 DISPATCH();
720 }
721 BYTECODE(CHECK_CHAR_IN_RANGE) {
722 uint32_t from = Load16Aligned(pc + 4);
723 uint32_t to = Load16Aligned(pc + 6);
724 if (from <= current_char && current_char <= to) {
725 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
726 } else {
727 ADVANCE(CHECK_CHAR_IN_RANGE);
728 }
729 DISPATCH();
730 }
731 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
732 uint32_t from = Load16Aligned(pc + 4);
733 uint32_t to = Load16Aligned(pc + 6);
734 if (from > current_char || current_char > to) {
735 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
736 } else {
737 ADVANCE(CHECK_CHAR_NOT_IN_RANGE);
738 }
739 DISPATCH();
740 }
741 BYTECODE(CHECK_BIT_IN_TABLE) {
742 if (CheckBitInTable(current_char, pc + 8)) {
743 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
744 } else {
745 ADVANCE(CHECK_BIT_IN_TABLE);
746 }
747 DISPATCH();
748 }
749 BYTECODE(CHECK_LT) {
750 uint32_t limit = LoadPacked24Unsigned(insn);
751 if (current_char < limit) {
752 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
753 } else {
754 ADVANCE(CHECK_LT);
755 }
756 DISPATCH();
757 }
758 BYTECODE(CHECK_GT) {
759 uint32_t limit = LoadPacked24Unsigned(insn);
760 if (current_char > limit) {
761 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
762 } else {
763 ADVANCE(CHECK_GT);
764 }
765 DISPATCH();
766 }
767 BYTECODE(CHECK_REGISTER_LT) {
768 if (registers[LoadPacked24Unsigned(insn)] < Load32Aligned(pc + 4)) {
769 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
770 } else {
771 ADVANCE(CHECK_REGISTER_LT);
772 }
773 DISPATCH();
774 }
775 BYTECODE(CHECK_REGISTER_GE) {
776 if (registers[LoadPacked24Unsigned(insn)] >= Load32Aligned(pc + 4)) {
777 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
778 } else {
779 ADVANCE(CHECK_REGISTER_GE);
780 }
781 DISPATCH();
782 }
783 BYTECODE(CHECK_REGISTER_EQ_POS) {
784 if (registers[LoadPacked24Unsigned(insn)] == current) {
785 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
786 } else {
787 ADVANCE(CHECK_REGISTER_EQ_POS);
788 }
789 DISPATCH();
790 }
791 BYTECODE(CHECK_NOT_REGS_EQUAL) {
792 if (registers[LoadPacked24Unsigned(insn)] ==
793 registers[Load32Aligned(pc + 4)]) {
794 ADVANCE(CHECK_NOT_REGS_EQUAL);
795 } else {
796 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
797 }
798 DISPATCH();
799 }
800 BYTECODE(CHECK_NOT_BACK_REF) {
801 int from = registers[LoadPacked24Unsigned(insn)];
802 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
803 if (from >= 0 && len > 0) {
804 if (current + len > subject.length() ||
805 !CompareCharsEqual(&subject[from], &subject[current], len)) {
806 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
807 DISPATCH();
808 }
809 ADVANCE_CURRENT_POSITION(len);
810 }
811 ADVANCE(CHECK_NOT_BACK_REF);
812 DISPATCH();
813 }
814 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
815 int from = registers[LoadPacked24Unsigned(insn)];
816 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
817 if (from >= 0 && len > 0) {
818 if (current - len < 0 ||
819 !CompareCharsEqual(&subject[from], &subject[current - len], len)) {
820 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
821 DISPATCH();
822 }
823 SET_CURRENT_POSITION(current - len);
824 }
825 ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
826 DISPATCH();
827 }
828 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
829 int from = registers[LoadPacked24Unsigned(insn)];
830 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
831 if (from >= 0 && len > 0) {
832 if (current + len > subject.length() ||
833 !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
834 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
835 DISPATCH();
836 }
837 ADVANCE_CURRENT_POSITION(len);
838 }
839 ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
840 DISPATCH();
841 }
842 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
843 int from = registers[LoadPacked24Unsigned(insn)];
844 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
845 if (from >= 0 && len > 0) {
846 if (current + len > subject.length() ||
847 !BackRefMatchesNoCase(isolate, from, current, len, subject,
848 false)) {
849 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
850 DISPATCH();
851 }
852 ADVANCE_CURRENT_POSITION(len);
853 }
854 ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
855 DISPATCH();
856 }
857 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
858 int from = registers[LoadPacked24Unsigned(insn)];
859 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
860 if (from >= 0 && len > 0) {
861 if (current - len < 0 ||
862 !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
863 true)) {
864 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
865 DISPATCH();
866 }
867 SET_CURRENT_POSITION(current - len);
868 }
869 ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
870 DISPATCH();
871 }
872 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
873 int from = registers[LoadPacked24Unsigned(insn)];
874 int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
875 if (from >= 0 && len > 0) {
876 if (current - len < 0 ||
877 !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
878 false)) {
879 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
880 DISPATCH();
881 }
882 SET_CURRENT_POSITION(current - len);
883 }
884 ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
885 DISPATCH();
886 }
887 BYTECODE(CHECK_AT_START) {
888 if (current + LoadPacked24Signed(insn) == 0) {
889 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
890 } else {
891 ADVANCE(CHECK_AT_START);
892 }
893 DISPATCH();
894 }
895 BYTECODE(CHECK_NOT_AT_START) {
896 if (current + LoadPacked24Signed(insn) == 0) {
897 ADVANCE(CHECK_NOT_AT_START);
898 } else {
899 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
900 }
901 DISPATCH();
902 }
903 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
904 ADVANCE(SET_CURRENT_POSITION_FROM_END);
905 int by = LoadPacked24Unsigned(insn);
906 if (subject.length() - current > by) {
907 SET_CURRENT_POSITION(subject.length() - by);
908 current_char = subject[current - 1];
909 }
910 DISPATCH();
911 }
912 BYTECODE(CHECK_CURRENT_POSITION) {
913 int pos = current + LoadPacked24Signed(insn);
914 if (pos > subject.length() || pos < 0) {
915 SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
916 } else {
917 ADVANCE(CHECK_CURRENT_POSITION);
918 }
919 DISPATCH();
920 }
921 BYTECODE(SKIP_UNTIL_CHAR) {
922 int32_t load_offset = LoadPacked24Signed(insn);
923 int32_t advance = Load16AlignedSigned(pc + 4);
924 uint32_t c = Load16Aligned(pc + 6);
925 while (IndexIsInBounds(current + load_offset, subject.length())) {
926 current_char = subject[current + load_offset];
927 if (c == current_char) {
928 SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
929 DISPATCH();
930 }
931 ADVANCE_CURRENT_POSITION(advance);
932 }
933 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
934 DISPATCH();
935 }
936 BYTECODE(SKIP_UNTIL_CHAR_AND) {
937 int32_t load_offset = LoadPacked24Signed(insn);
938 int32_t advance = Load16AlignedSigned(pc + 4);
939 uint16_t c = Load16Aligned(pc + 6);
940 uint32_t mask = Load32Aligned(pc + 8);
941 int32_t maximum_offset = Load32Aligned(pc + 12);
942 while (static_cast<uintptr_t>(current + maximum_offset) <=
943 static_cast<uintptr_t>(subject.length())) {
944 current_char = subject[current + load_offset];
945 if (c == (current_char & mask)) {
946 SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
947 DISPATCH();
948 }
949 ADVANCE_CURRENT_POSITION(advance);
950 }
951 SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
952 DISPATCH();
953 }
954 BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
955 int32_t load_offset = LoadPacked24Signed(insn);
956 int32_t advance = Load16AlignedSigned(pc + 4);
957 uint16_t c = Load16Aligned(pc + 6);
958 int32_t maximum_offset = Load32Aligned(pc + 8);
959 while (static_cast<uintptr_t>(current + maximum_offset) <=
960 static_cast<uintptr_t>(subject.length())) {
961 current_char = subject[current + load_offset];
962 if (c == current_char) {
963 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
964 DISPATCH();
965 }
966 ADVANCE_CURRENT_POSITION(advance);
967 }
968 SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
969 DISPATCH();
970 }
971 BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
972 int32_t load_offset = LoadPacked24Signed(insn);
973 int32_t advance = Load16AlignedSigned(pc + 4);
974 const byte* table = pc + 8;
975 while (IndexIsInBounds(current + load_offset, subject.length())) {
976 current_char = subject[current + load_offset];
977 if (CheckBitInTable(current_char, table)) {
978 SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
979 DISPATCH();
980 }
981 ADVANCE_CURRENT_POSITION(advance);
982 }
983 SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
984 DISPATCH();
985 }
986 BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
987 int32_t load_offset = LoadPacked24Signed(insn);
988 int32_t advance = Load16AlignedSigned(pc + 4);
989 uint16_t limit = Load16Aligned(pc + 6);
990 const byte* table = pc + 8;
991 while (IndexIsInBounds(current + load_offset, subject.length())) {
992 current_char = subject[current + load_offset];
993 if (current_char > limit) {
994 SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
995 DISPATCH();
996 }
997 if (!CheckBitInTable(current_char, table)) {
998 SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
999 DISPATCH();
1000 }
1001 ADVANCE_CURRENT_POSITION(advance);
1002 }
1003 SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
1004 DISPATCH();
1005 }
1006 BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
1007 int32_t load_offset = LoadPacked24Signed(insn);
1008 int32_t advance = Load32Aligned(pc + 4);
1009 uint16_t c = Load16Aligned(pc + 8);
1010 uint16_t c2 = Load16Aligned(pc + 10);
1011 while (IndexIsInBounds(current + load_offset, subject.length())) {
1012 current_char = subject[current + load_offset];
1013 // The two if-statements below are split up intentionally, as combining
1014 // them seems to result in register allocation behaving quite
1015 // differently and slowing down the resulting code.
1016 if (c == current_char) {
1017 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
1018 DISPATCH();
1019 }
1020 if (c2 == current_char) {
1021 SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
1022 DISPATCH();
1023 }
1024 ADVANCE_CURRENT_POSITION(advance);
1025 }
1026 SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
1027 DISPATCH();
1028 }
1029 #if V8_USE_COMPUTED_GOTO
1030 // Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
1031 // V8_USE_COMPUTED_GOTO here.
1032 #else
1033 default:
1034 UNREACHABLE();
1035 }
1036 // Label we jump to in DISPATCH(). There must be no instructions between the
1037 // end of the switch, this label and the end of the loop.
1038 switch_dispatch_continuation : {}
1039 #endif // V8_USE_COMPUTED_GOTO
1040 }
1041 }
1042
1043 #undef BYTECODE
1044 #undef ADVANCE_CURRENT_POSITION
1045 #undef SET_CURRENT_POSITION
1046 #undef DISPATCH
1047 #undef DECODE
1048 #undef SET_PC_FROM_OFFSET
1049 #undef ADVANCE
1050 #undef BC_LABEL
1051 #undef V8_USE_COMPUTED_GOTO
1052
1053 } // namespace
1054
1055 // static
Match( Isolate* isolate, JSRegExp regexp, String subject_string, int* output_registers, int output_register_count, int start_position, RegExp::CallOrigin call_origin)1056 IrregexpInterpreter::Result IrregexpInterpreter::Match(
1057 Isolate* isolate, JSRegExp regexp, String subject_string,
1058 int* output_registers, int output_register_count, int start_position,
1059 RegExp::CallOrigin call_origin) {
1060 if (FLAG_regexp_tier_up) regexp.TierUpTick();
1061
1062 bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
1063 ByteArray code_array = ByteArray::cast(regexp.bytecode(is_one_byte));
1064 int total_register_count = regexp.max_register_count();
1065
1066 return MatchInternal(isolate, code_array, subject_string, output_registers,
1067 output_register_count, total_register_count,
1068 start_position, call_origin, regexp.backtrack_limit());
1069 }
1070
MatchInternal( Isolate* isolate, ByteArray code_array, String subject_string, int* output_registers, int output_register_count, int total_register_count, int start_position, RegExp::CallOrigin call_origin, uint32_t backtrack_limit)1071 IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
1072 Isolate* isolate, ByteArray code_array, String subject_string,
1073 int* output_registers, int output_register_count, int total_register_count,
1074 int start_position, RegExp::CallOrigin call_origin,
1075 uint32_t backtrack_limit) {
1076 DCHECK(subject_string.IsFlat());
1077
1078 // TODO(chromium:1262676): Remove this CHECK once fixed.
1079 CHECK(code_array.IsByteArray());
1080
1081 // Note: Heap allocation *is* allowed in two situations if calling from
1082 // Runtime:
1083 // 1. When creating & throwing a stack overflow exception. The interpreter
1084 // aborts afterwards, and thus possible-moved objects are never used.
1085 // 2. When handling interrupts. We manually relocate unhandlified references
1086 // after interrupts have run.
1087 DisallowGarbageCollection no_gc;
1088
1089 base::uc16 previous_char = '\n';
1090 String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
1091 // Because interrupts can result in GC and string content relocation, the
1092 // checksum verification in FlatContent may fail even though this code is
1093 // safe. See (2) above.
1094 subject_content.UnsafeDisableChecksumVerification();
1095 if (subject_content.IsOneByte()) {
1096 base::Vector<const uint8_t> subject_vector =
1097 subject_content.ToOneByteVector();
1098 if (start_position != 0) previous_char = subject_vector[start_position - 1];
1099 return RawMatch(isolate, code_array, subject_string, subject_vector,
1100 output_registers, output_register_count,
1101 total_register_count, start_position, previous_char,
1102 call_origin, backtrack_limit);
1103 } else {
1104 DCHECK(subject_content.IsTwoByte());
1105 base::Vector<const base::uc16> subject_vector =
1106 subject_content.ToUC16Vector();
1107 if (start_position != 0) previous_char = subject_vector[start_position - 1];
1108 return RawMatch(isolate, code_array, subject_string, subject_vector,
1109 output_registers, output_register_count,
1110 total_register_count, start_position, previous_char,
1111 call_origin, backtrack_limit);
1112 }
1113 }
1114
1115 #ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1116
1117 // This method is called through an external reference from RegExpExecInternal
1118 // builtin.
MatchForCallFromJs( Address subject, int32_t start_position, Address, Address, int* output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp)1119 IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
1120 Address subject, int32_t start_position, Address, Address,
1121 int* output_registers, int32_t output_register_count,
1122 RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
1123 DCHECK_NOT_NULL(isolate);
1124 DCHECK_NOT_NULL(output_registers);
1125 DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
1126
1127 DisallowGarbageCollection no_gc;
1128 DisallowJavascriptExecution no_js(isolate);
1129 DisallowHandleAllocation no_handles;
1130 DisallowHandleDereference no_deref;
1131
1132 String subject_string = String::cast(Object(subject));
1133 JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
1134
1135 if (regexp_obj.MarkedForTierUp()) {
1136 // Returning RETRY will re-enter through runtime, where actual recompilation
1137 // for tier-up takes place.
1138 return IrregexpInterpreter::RETRY;
1139 }
1140
1141 return Match(isolate, regexp_obj, subject_string, output_registers,
1142 output_register_count, start_position, call_origin);
1143 }
1144
1145 #endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1146
MatchForCallFromRuntime( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string, int* output_registers, int output_register_count, int start_position)1147 IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
1148 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
1149 int* output_registers, int output_register_count, int start_position) {
1150 return Match(isolate, *regexp, *subject_string, output_registers,
1151 output_register_count, start_position,
1152 RegExp::CallOrigin::kFromRuntime);
1153 }
1154
1155 } // namespace internal
1156 } // namespace v8
1157