1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/builtins/builtins-regexp-gen.h"
6
7#include "src/builtins/builtins-constructor-gen.h"
8#include "src/builtins/builtins-utils-gen.h"
9#include "src/builtins/builtins.h"
10#include "src/builtins/growable-fixed-array-gen.h"
11#include "src/codegen/code-factory.h"
12#include "src/codegen/code-stub-assembler.h"
13#include "src/codegen/macro-assembler.h"
14#include "src/common/globals.h"
15#include "src/execution/protectors.h"
16#include "src/heap/factory-inl.h"
17#include "src/logging/counters.h"
18#include "src/objects/js-regexp-string-iterator.h"
19#include "src/objects/js-regexp.h"
20#include "src/objects/regexp-match-info.h"
21#include "src/regexp/regexp-flags.h"
22
23namespace v8 {
24namespace internal {
25
26// Tail calls the regular expression interpreter.
27// static
28void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
29  ExternalReference interpreter_code_entry =
30      ExternalReference::re_match_for_call_from_js();
31  masm->Jump(interpreter_code_entry);
32}
33
34// Tail calls the experimental regular expression engine.
35// static
36void Builtins::Generate_RegExpExperimentalTrampoline(MacroAssembler* masm) {
37  ExternalReference interpreter_code_entry =
38      ExternalReference::re_experimental_match_for_call_from_js();
39  masm->Jump(interpreter_code_entry);
40}
41
42TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); }
43
44TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() {
45  return IntPtrConstant(0);
46}
47
48// If code is a builtin, return the address to the (possibly embedded) builtin
49// code entry, otherwise return the entry of the code object itself.
50TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<CodeT> code) {
51  if (V8_EXTERNAL_CODE_SPACE_BOOL) {
52    // When external code space is enabled we can load the entry point directly
53    // from the CodeT object.
54    return GetCodeEntry(code);
55  }
56
57  TVARIABLE(RawPtrT, var_result);
58
59  Label if_code_is_off_heap(this), out(this);
60  TNode<Int32T> builtin_index =
61      LoadObjectField<Int32T>(code, Code::kBuiltinIndexOffset);
62  {
63    GotoIfNot(
64        Word32Equal(builtin_index,
65                    Int32Constant(static_cast<int>(Builtin::kNoBuiltinId))),
66        &if_code_is_off_heap);
67    var_result = ReinterpretCast<RawPtrT>(
68        IntPtrAdd(BitcastTaggedToWord(code),
69                  IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
70    Goto(&out);
71  }
72
73  BIND(&if_code_is_off_heap);
74  {
75    TNode<IntPtrT> builtin_entry_offset_from_isolate_root =
76        IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()),
77                  ChangeInt32ToIntPtr(Word32Shl(
78                      builtin_index, Int32Constant(kSystemPointerSizeLog2))));
79
80    var_result = ReinterpretCast<RawPtrT>(
81        Load(MachineType::Pointer(),
82             ExternalConstant(ExternalReference::isolate_root(isolate())),
83             builtin_entry_offset_from_isolate_root));
84    Goto(&out);
85  }
86
87  BIND(&out);
88  return var_result.value();
89}
90
91// -----------------------------------------------------------------------------
92// ES6 section 21.2 RegExp Objects
93
94TNode<JSRegExpResult> RegExpBuiltinsAssembler::AllocateRegExpResult(
95    TNode<Context> context, TNode<Smi> length, TNode<Smi> index,
96    TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index,
97    TNode<BoolT> has_indices, TNode<FixedArray>* elements_out) {
98  CSA_DCHECK(this, SmiLessThanOrEqual(
99                       length, SmiConstant(JSArray::kMaxFastArrayLength)));
100  CSA_DCHECK(this, SmiGreaterThan(length, SmiConstant(0)));
101
102  // Allocate.
103
104  Label result_has_indices(this), allocated(this);
105  const ElementsKind elements_kind = PACKED_ELEMENTS;
106  base::Optional<TNode<AllocationSite>> no_gc_site = base::nullopt;
107  TNode<IntPtrT> length_intptr = SmiUntag(length);
108  // Note: The returned `var_elements` may be in young large object space, but
109  // `var_array` is guaranteed to be in new space so we could skip write
110  // barriers below.
111  TVARIABLE(JSArray, var_array);
112  TVARIABLE(FixedArrayBase, var_elements);
113
114  GotoIf(has_indices, &result_has_indices);
115  {
116    TNode<Map> map = CAST(LoadContextElement(LoadNativeContext(context),
117                                             Context::REGEXP_RESULT_MAP_INDEX));
118    std::tie(var_array, var_elements) =
119        AllocateUninitializedJSArrayWithElements(
120            elements_kind, map, length, no_gc_site, length_intptr,
121            AllocationFlag::kAllowLargeObjectAllocation, JSRegExpResult::kSize);
122    Goto(&allocated);
123  }
124
125  BIND(&result_has_indices);
126  {
127    TNode<Map> map =
128        CAST(LoadContextElement(LoadNativeContext(context),
129                                Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX));
130    std::tie(var_array, var_elements) =
131        AllocateUninitializedJSArrayWithElements(
132            elements_kind, map, length, no_gc_site, length_intptr,
133            AllocationFlag::kAllowLargeObjectAllocation,
134            JSRegExpResultWithIndices::kSize);
135    Goto(&allocated);
136  }
137
138  BIND(&allocated);
139
140  // Finish result initialization.
141
142  TNode<JSRegExpResult> result =
143      UncheckedCast<JSRegExpResult>(var_array.value());
144
145  // Load undefined value once here to avoid multiple LoadRoots.
146  TNode<Oddball> undefined_value = UncheckedCast<Oddball>(
147      CodeAssembler::LoadRoot(RootIndex::kUndefinedValue));
148
149  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index);
150  // TODO(jgruber,turbofan): Could skip barrier but the MemoryOptimizer
151  // complains.
152  StoreObjectField(result, JSRegExpResult::kInputOffset, input);
153  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset,
154                                 undefined_value);
155  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kNamesOffset,
156                                 undefined_value);
157
158  StoreObjectField(result, JSRegExpResult::kRegexpInputOffset, input);
159
160  // If non-smi last_index then store an SmiZero instead.
161  {
162    TNode<Smi> last_index_smi = Select<Smi>(
163        TaggedIsSmi(last_index), [=] { return CAST(last_index); },
164        [=] { return SmiZero(); });
165    StoreObjectField(result, JSRegExpResult::kRegexpLastIndexOffset,
166                     last_index_smi);
167  }
168
169  Label finish_initialization(this);
170  GotoIfNot(has_indices, &finish_initialization);
171  {
172    static_assert(
173        std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
174        "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
175    StoreObjectFieldNoWriteBarrier(
176        result, JSRegExpResultWithIndices::kIndicesOffset, undefined_value);
177    Goto(&finish_initialization);
178  }
179
180  BIND(&finish_initialization);
181
182  // Finish elements initialization.
183
184  FillFixedArrayWithValue(elements_kind, var_elements.value(), IntPtrZero(),
185                          length_intptr, RootIndex::kUndefinedValue);
186
187  if (elements_out) *elements_out = CAST(var_elements.value());
188  return result;
189}
190
191TNode<Object> RegExpBuiltinsAssembler::FastLoadLastIndexBeforeSmiCheck(
192    TNode<JSRegExp> regexp) {
193  // Load the in-object field.
194  static const int field_offset =
195      JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
196  return LoadObjectField(regexp, field_offset);
197}
198
199TNode<Object> RegExpBuiltinsAssembler::SlowLoadLastIndex(TNode<Context> context,
200                                                         TNode<Object> regexp) {
201  return GetProperty(context, regexp, isolate()->factory()->lastIndex_string());
202}
203
204// The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
205// JSRegExp instance.
206void RegExpBuiltinsAssembler::FastStoreLastIndex(TNode<JSRegExp> regexp,
207                                                 TNode<Smi> value) {
208  // Store the in-object field.
209  static const int field_offset =
210      JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
211  StoreObjectField(regexp, field_offset, value);
212}
213
214void RegExpBuiltinsAssembler::SlowStoreLastIndex(TNode<Context> context,
215                                                 TNode<Object> regexp,
216                                                 TNode<Object> value) {
217  TNode<String> name = HeapConstant(isolate()->factory()->lastIndex_string());
218  SetPropertyStrict(context, regexp, name, value);
219}
220
221TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
222    TNode<Context> context, TNode<JSRegExp> regexp,
223    TNode<RegExpMatchInfo> match_info, TNode<String> string,
224    TNode<Number> last_index) {
225  Label named_captures(this), maybe_build_indices(this), out(this);
226
227  TNode<IntPtrT> num_indices = SmiUntag(CAST(UnsafeLoadFixedArrayElement(
228      match_info, RegExpMatchInfo::kNumberOfCapturesIndex)));
229  TNode<Smi> num_results = SmiTag(WordShr(num_indices, 1));
230  TNode<Smi> start = CAST(UnsafeLoadFixedArrayElement(
231      match_info, RegExpMatchInfo::kFirstCaptureIndex));
232  TNode<Smi> end = CAST(UnsafeLoadFixedArrayElement(
233      match_info, RegExpMatchInfo::kFirstCaptureIndex + 1));
234
235  // Calculate the substring of the first match before creating the result array
236  // to avoid an unnecessary write barrier storing the first result.
237
238  TNode<String> first =
239      CAST(CallBuiltin(Builtin::kSubString, context, string, start, end));
240
241  // Load flags and check if the result object needs to have indices.
242  const TNode<Smi> flags =
243      CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
244  const TNode<BoolT> has_indices = IsSetSmi(flags, JSRegExp::kHasIndices);
245  TNode<FixedArray> result_elements;
246  TNode<JSRegExpResult> result =
247      AllocateRegExpResult(context, num_results, start, string, regexp,
248                           last_index, has_indices, &result_elements);
249
250  UnsafeStoreFixedArrayElement(result_elements, 0, first);
251
252  // If no captures exist we can skip named capture handling as well.
253  GotoIf(SmiEqual(num_results, SmiConstant(1)), &maybe_build_indices);
254
255  // Store all remaining captures.
256  TNode<IntPtrT> limit = IntPtrAdd(
257      IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices);
258
259  TVARIABLE(IntPtrT, var_from_cursor,
260            IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2));
261  TVARIABLE(IntPtrT, var_to_cursor, IntPtrConstant(1));
262
263  Label loop(this, {&var_from_cursor, &var_to_cursor});
264
265  Goto(&loop);
266  BIND(&loop);
267  {
268    TNode<IntPtrT> from_cursor = var_from_cursor.value();
269    TNode<IntPtrT> to_cursor = var_to_cursor.value();
270    TNode<Smi> start_cursor =
271        CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor));
272
273    Label next_iter(this);
274    GotoIf(SmiEqual(start_cursor, SmiConstant(-1)), &next_iter);
275
276    TNode<IntPtrT> from_cursor_plus1 =
277        IntPtrAdd(from_cursor, IntPtrConstant(1));
278    TNode<Smi> end_cursor =
279        CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor_plus1));
280
281    TNode<String> capture = CAST(CallBuiltin(Builtin::kSubString, context,
282                                             string, start_cursor, end_cursor));
283    UnsafeStoreFixedArrayElement(result_elements, to_cursor, capture);
284    Goto(&next_iter);
285
286    BIND(&next_iter);
287    var_from_cursor = IntPtrAdd(from_cursor, IntPtrConstant(2));
288    var_to_cursor = IntPtrAdd(to_cursor, IntPtrConstant(1));
289    Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
290           &named_captures);
291  }
292
293  BIND(&named_captures);
294  {
295    CSA_DCHECK(this, SmiGreaterThan(num_results, SmiConstant(1)));
296
297    // Preparations for named capture properties. Exit early if the result does
298    // not have any named captures to minimize performance impact.
299
300    TNode<FixedArray> data =
301        CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
302
303    // We reach this point only if captures exist, implying that the assigned
304    // regexp engine must be able to handle captures.
305    CSA_DCHECK(
306        this,
307        Word32Or(
308            SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
309                     SmiConstant(JSRegExp::IRREGEXP)),
310            SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
311                     SmiConstant(JSRegExp::EXPERIMENTAL))));
312
313    // The names fixed array associates names at even indices with a capture
314    // index at odd indices.
315    TNode<Object> maybe_names =
316        LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
317    GotoIf(TaggedEqual(maybe_names, SmiZero()), &maybe_build_indices);
318
319    // One or more named captures exist, add a property for each one.
320
321    TNode<FixedArray> names = CAST(maybe_names);
322    TNode<IntPtrT> names_length = LoadAndUntagFixedArrayBaseLength(names);
323    CSA_DCHECK(this, IntPtrGreaterThan(names_length, IntPtrZero()));
324
325    // Stash names in case we need them to build the indices array later.
326    StoreObjectField(result, JSRegExpResult::kNamesOffset, names);
327
328    // Allocate a new object to store the named capture properties.
329    // TODO(jgruber): Could be optimized by adding the object map to the heap
330    // root list.
331
332    TNode<IntPtrT> num_properties = WordSar(names_length, 1);
333    TNode<NativeContext> native_context = LoadNativeContext(context);
334    TNode<Map> map = LoadSlowObjectWithNullPrototypeMap(native_context);
335    TNode<HeapObject> properties;
336    if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) {
337      properties = AllocateSwissNameDictionary(num_properties);
338    } else {
339      properties = AllocateNameDictionary(
340          num_properties, AllocationFlag::kAllowLargeObjectAllocation);
341    }
342
343    TNode<JSObject> group_object = AllocateJSObjectFromMap(map, properties);
344    StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object);
345
346    TVARIABLE(IntPtrT, var_i, IntPtrZero());
347
348    Label inner_loop(this, &var_i);
349
350    Goto(&inner_loop);
351    BIND(&inner_loop);
352    {
353      TNode<IntPtrT> i = var_i.value();
354      TNode<IntPtrT> i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
355      TNode<IntPtrT> i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
356
357      TNode<String> name = CAST(LoadFixedArrayElement(names, i));
358      TNode<Smi> index = CAST(LoadFixedArrayElement(names, i_plus_1));
359      TNode<HeapObject> capture =
360          CAST(LoadFixedArrayElement(result_elements, SmiUntag(index)));
361
362      // TODO(v8:8213): For maintainability, we should call a CSA/Torque
363      // implementation of CreateDataProperty instead.
364
365      // At this point the spec says to call CreateDataProperty. However, we can
366      // skip most of the steps and go straight to adding a dictionary entry
367      // because we know a bunch of useful facts:
368      // - All keys are non-numeric internalized strings
369      // - No keys repeat
370      // - Receiver has no prototype
371      // - Receiver isn't used as a prototype
372      // - Receiver isn't any special object like a Promise intrinsic object
373      // - Receiver is extensible
374      // - Receiver has no interceptors
375      Label add_dictionary_property_slow(this, Label::kDeferred);
376      Add<PropertyDictionary>(CAST(properties), name, capture,
377                              &add_dictionary_property_slow);
378
379      var_i = i_plus_2;
380      Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length),
381             &maybe_build_indices, &inner_loop);
382
383      BIND(&add_dictionary_property_slow);
384      // If the dictionary needs resizing, the above Add call will jump here
385      // before making any changes. This shouldn't happen because we allocated
386      // the dictionary with enough space above.
387      Unreachable();
388    }
389  }
390
391  // Build indices if needed (i.e. if the /d flag is present) after named
392  // capture groups are processed.
393  BIND(&maybe_build_indices);
394  GotoIfNot(has_indices, &out);
395  {
396    const TNode<Object> maybe_names =
397        LoadObjectField(result, JSRegExpResultWithIndices::kNamesOffset);
398    const TNode<JSRegExpResultIndices> indices =
399        UncheckedCast<JSRegExpResultIndices>(
400            CallRuntime(Runtime::kRegExpBuildIndices, context, regexp,
401                        match_info, maybe_names));
402    StoreObjectField(result, JSRegExpResultWithIndices::kIndicesOffset,
403                     indices);
404    Goto(&out);
405  }
406
407  BIND(&out);
408  return result;
409}
410
411void RegExpBuiltinsAssembler::GetStringPointers(
412    TNode<RawPtrT> string_data, TNode<IntPtrT> offset,
413    TNode<IntPtrT> last_index, TNode<IntPtrT> string_length,
414    String::Encoding encoding, TVariable<RawPtrT>* var_string_start,
415    TVariable<RawPtrT>* var_string_end) {
416  DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
417  DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());
418
419  const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
420                                ? UINT8_ELEMENTS
421                                : UINT16_ELEMENTS;
422
423  TNode<IntPtrT> from_offset =
424      ElementOffsetFromIndex(IntPtrAdd(offset, last_index), kind);
425  *var_string_start =
426      ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, from_offset));
427
428  TNode<IntPtrT> to_offset =
429      ElementOffsetFromIndex(IntPtrAdd(offset, string_length), kind);
430  *var_string_end = ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, to_offset));
431}
432
433TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
434    TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
435    TNode<Number> last_index, TNode<RegExpMatchInfo> match_info,
436    RegExp::ExecQuirks exec_quirks) {
437  ToDirectStringAssembler to_direct(state(), string);
438
439  TVARIABLE(HeapObject, var_result);
440  Label out(this), atom(this), runtime(this, Label::kDeferred),
441      retry_experimental(this, Label::kDeferred);
442
443  // External constants.
444  TNode<ExternalReference> isolate_address =
445      ExternalConstant(ExternalReference::isolate_address(isolate()));
446  TNode<ExternalReference> static_offsets_vector_address = ExternalConstant(
447      ExternalReference::address_of_static_offsets_vector(isolate()));
448
449  // At this point, last_index is definitely a canonicalized non-negative
450  // number, which implies that any non-Smi last_index is greater than
451  // the maximal string length. If lastIndex > string.length then the matcher
452  // must fail.
453
454  Label if_failure(this);
455
456  CSA_DCHECK(this, IsNumberNormalized(last_index));
457  CSA_DCHECK(this, IsNumberPositive(last_index));
458  GotoIf(TaggedIsNotSmi(last_index), &if_failure);
459
460  TNode<IntPtrT> int_string_length = LoadStringLengthAsWord(string);
461  TNode<IntPtrT> int_last_index = SmiUntag(CAST(last_index));
462
463  GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure);
464
465  // Since the RegExp has been compiled, data contains a fixed array.
466  TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
467  {
468    // Dispatch on the type of the RegExp.
469    {
470      Label next(this), unreachable(this, Label::kDeferred);
471      TNode<Int32T> tag = LoadAndUntagToWord32FixedArrayElement(
472          data, IntPtrConstant(JSRegExp::kTagIndex));
473
474      int32_t values[] = {
475          JSRegExp::IRREGEXP,
476          JSRegExp::ATOM,
477          JSRegExp::EXPERIMENTAL,
478      };
479      Label* labels[] = {&next, &atom, &next};
480
481      STATIC_ASSERT(arraysize(values) == arraysize(labels));
482      Switch(tag, &unreachable, values, labels, arraysize(values));
483
484      BIND(&unreachable);
485      Unreachable();
486
487      BIND(&next);
488    }
489
490    // Check (number_of_captures + 1) * 2 <= offsets vector size
491    // Or              number_of_captures <= offsets vector size / 2 - 1
492    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
493        data, JSRegExp::kIrregexpCaptureCountIndex));
494
495    const int kOffsetsSize = Isolate::kJSRegexpStaticOffsetsVectorSize;
496    STATIC_ASSERT(kOffsetsSize >= 2);
497    GotoIf(SmiAbove(capture_count, SmiConstant(kOffsetsSize / 2 - 1)),
498           &runtime);
499  }
500
501  // Unpack the string if possible.
502
503  to_direct.TryToDirect(&runtime);
504
505  // Load the irregexp code or bytecode object and offsets into the subject
506  // string. Both depend on whether the string is one- or two-byte.
507
508  TVARIABLE(RawPtrT, var_string_start);
509  TVARIABLE(RawPtrT, var_string_end);
510  TVARIABLE(Object, var_code);
511  TVARIABLE(Object, var_bytecode);
512
513  {
514    TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime);
515
516    Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
517    Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
518           &if_isonebyte, &if_istwobyte);
519
520    BIND(&if_isonebyte);
521    {
522      GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
523                        int_string_length, String::ONE_BYTE_ENCODING,
524                        &var_string_start, &var_string_end);
525      var_code =
526          UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex);
527      var_bytecode = UnsafeLoadFixedArrayElement(
528          data, JSRegExp::kIrregexpLatin1BytecodeIndex);
529      Goto(&next);
530    }
531
532    BIND(&if_istwobyte);
533    {
534      GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
535                        int_string_length, String::TWO_BYTE_ENCODING,
536                        &var_string_start, &var_string_end);
537      var_code =
538          UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex);
539      var_bytecode = UnsafeLoadFixedArrayElement(
540          data, JSRegExp::kIrregexpUC16BytecodeIndex);
541      Goto(&next);
542    }
543
544    BIND(&next);
545  }
546
547  // Check that the irregexp code has been generated for the actual string
548  // encoding. If it has, the field contains a code object; and otherwise it
549  // contains the uninitialized sentinel as a smi.
550#ifdef DEBUG
551  {
552    Label next(this);
553    GotoIfNot(TaggedIsSmi(var_code.value()), &next);
554    CSA_DCHECK(this, SmiEqual(CAST(var_code.value()),
555                              SmiConstant(JSRegExp::kUninitializedValue)));
556    Goto(&next);
557    BIND(&next);
558  }
559#endif
560
561  GotoIf(TaggedIsSmi(var_code.value()), &runtime);
562  TNode<CodeT> code = CAST(var_code.value());
563
564  Label if_success(this), if_exception(this, Label::kDeferred);
565  {
566    IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
567
568    // Set up args for the final call into generated Irregexp code.
569
570    MachineType type_int32 = MachineType::Int32();
571    MachineType type_tagged = MachineType::AnyTagged();
572    MachineType type_ptr = MachineType::Pointer();
573
574    // Result: A NativeRegExpMacroAssembler::Result return code.
575    MachineType retval_type = type_int32;
576
577    // Argument 0: Original subject string.
578    MachineType arg0_type = type_tagged;
579    TNode<String> arg0 = string;
580
581    // Argument 1: Previous index.
582    MachineType arg1_type = type_int32;
583    TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index);
584
585    // Argument 2: Start of string data. This argument is ignored in the
586    // interpreter.
587    MachineType arg2_type = type_ptr;
588    TNode<RawPtrT> arg2 = var_string_start.value();
589
590    // Argument 3: End of string data. This argument is ignored in the
591    // interpreter.
592    MachineType arg3_type = type_ptr;
593    TNode<RawPtrT> arg3 = var_string_end.value();
594
595    // Argument 4: static offsets vector buffer.
596    MachineType arg4_type = type_ptr;
597    TNode<ExternalReference> arg4 = static_offsets_vector_address;
598
599    // Argument 5: Number of capture registers.
600    // Setting this to the number of registers required to store all captures
601    // forces global regexps to behave as non-global.
602    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
603        data, JSRegExp::kIrregexpCaptureCountIndex));
604    // capture_count is the number of captures without the match itself.
605    // Required registers = (capture_count + 1) * 2.
606    STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) * 2));
607    TNode<Smi> register_count =
608        SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
609
610    MachineType arg5_type = type_int32;
611    TNode<Int32T> arg5 = SmiToInt32(register_count);
612
613    // Argument 6: Indicate that this is a direct call from JavaScript.
614    MachineType arg6_type = type_int32;
615    TNode<Int32T> arg6 = Int32Constant(RegExp::CallOrigin::kFromJs);
616
617    // Argument 7: Pass current isolate address.
618    MachineType arg7_type = type_ptr;
619    TNode<ExternalReference> arg7 = isolate_address;
620
621    // Argument 8: Regular expression object. This argument is ignored in native
622    // irregexp code.
623    MachineType arg8_type = type_tagged;
624    TNode<JSRegExp> arg8 = regexp;
625
626    TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code);
627
628    // AIX uses function descriptors on CFunction calls. code_entry in this case
629    // may also point to a Regex interpreter entry trampoline which does not
630    // have a function descriptor. This method is ineffective on other platforms
631    // and is equivalent to CallCFunction.
632    TNode<Int32T> result =
633        UncheckedCast<Int32T>(CallCFunctionWithoutFunctionDescriptor(
634            code_entry, retval_type, std::make_pair(arg0_type, arg0),
635            std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
636            std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
637            std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6),
638            std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8)));
639
640    // Check the result.
641    // We expect exactly one result since we force the called regexp to behave
642    // as non-global.
643    TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
644    GotoIf(
645        IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
646        &if_success);
647    GotoIf(
648        IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
649        &if_failure);
650    GotoIf(IntPtrEqual(int_result,
651                       IntPtrConstant(RegExp::kInternalRegExpException)),
652           &if_exception);
653    GotoIf(IntPtrEqual(
654               int_result,
655               IntPtrConstant(RegExp::kInternalRegExpFallbackToExperimental)),
656           &retry_experimental);
657
658    CSA_DCHECK(this, IntPtrEqual(int_result,
659                                 IntPtrConstant(RegExp::kInternalRegExpRetry)));
660    Goto(&runtime);
661  }
662
663  BIND(&if_success);
664  {
665    if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
666      static constexpr int kMatchStartOffset = 0;
667      TNode<IntPtrT> value = ChangeInt32ToIntPtr(UncheckedCast<Int32T>(
668          Load(MachineType::Int32(), static_offsets_vector_address,
669               IntPtrConstant(kMatchStartOffset))));
670      GotoIf(UintPtrGreaterThanOrEqual(value, int_string_length), &if_failure);
671    }
672
673    // Check that the last match info has space for the capture registers and
674    // the additional information. Ensure no overflow in add.
675    STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);
676    TNode<Smi> available_slots =
677        SmiSub(LoadFixedArrayBaseLength(match_info),
678               SmiConstant(RegExpMatchInfo::kLastMatchOverhead));
679    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
680        data, JSRegExp::kIrregexpCaptureCountIndex));
681    // Calculate number of register_count = (capture_count + 1) * 2.
682    TNode<Smi> register_count =
683        SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
684    GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);
685
686    // Fill match_info.
687    UnsafeStoreFixedArrayElement(
688        match_info, RegExpMatchInfo::kNumberOfCapturesIndex, register_count);
689    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
690                                 string);
691    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
692                                 string);
693
694    // Fill match and capture offsets in match_info.
695    {
696      TNode<IntPtrT> limit_offset =
697          ElementOffsetFromIndex(register_count, INT32_ELEMENTS, 0);
698
699      TNode<IntPtrT> to_offset = ElementOffsetFromIndex(
700          IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS,
701          RegExpMatchInfo::kHeaderSize - kHeapObjectTag);
702      TVARIABLE(IntPtrT, var_to_offset, to_offset);
703
704      VariableList vars({&var_to_offset}, zone());
705      BuildFastLoop<IntPtrT>(
706          vars, IntPtrZero(), limit_offset,
707          [&](TNode<IntPtrT> offset) {
708            TNode<Int32T> value = UncheckedCast<Int32T>(Load(
709                MachineType::Int32(), static_offsets_vector_address, offset));
710            TNode<Smi> smi_value = SmiFromInt32(value);
711            StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,
712                                var_to_offset.value(), smi_value);
713            Increment(&var_to_offset, kTaggedSize);
714          },
715          kInt32Size, IndexAdvanceMode::kPost);
716    }
717
718    var_result = match_info;
719    Goto(&out);
720  }
721
722  BIND(&if_failure);
723  {
724    var_result = NullConstant();
725    Goto(&out);
726  }
727
728  BIND(&if_exception);
729  {
730// A stack overflow was detected in RegExp code.
731#ifdef DEBUG
732    TNode<ExternalReference> pending_exception_address =
733        ExternalConstant(ExternalReference::Create(
734            IsolateAddressId::kPendingExceptionAddress, isolate()));
735    CSA_DCHECK(this, IsTheHole(Load<Object>(pending_exception_address)));
736#endif  // DEBUG
737    CallRuntime(Runtime::kThrowStackOverflow, context);
738    Unreachable();
739  }
740
741  BIND(&retry_experimental);
742  {
743    auto target_fn =
744        exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
745            ? Runtime::kRegExpExperimentalOneshotExecTreatMatchAtEndAsFailure
746            : Runtime::kRegExpExperimentalOneshotExec;
747    var_result = CAST(CallRuntime(target_fn, context, regexp, string,
748                                  last_index, match_info));
749    Goto(&out);
750  }
751
752  BIND(&runtime);
753  {
754    auto target_fn =
755        exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
756            ? Runtime::kRegExpExecTreatMatchAtEndAsFailure
757            : Runtime::kRegExpExec;
758    var_result = CAST(CallRuntime(target_fn, context, regexp, string,
759                                  last_index, match_info));
760    Goto(&out);
761  }
762
763  BIND(&atom);
764  {
765    // TODO(jgruber): A call with 4 args stresses register allocation, this
766    // should probably just be inlined.
767    var_result = CAST(CallBuiltin(Builtin::kRegExpExecAtom, context, regexp,
768                                  string, last_index, match_info));
769    Goto(&out);
770  }
771
772  BIND(&out);
773  return var_result.value();
774}
775
776TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
777    TNode<Context> context, TNode<Object> object, TNode<Map> map) {
778  Label out(this);
779  TVARIABLE(BoolT, var_result);
780
781#ifdef V8_ENABLE_FORCE_SLOW_PATH
782  var_result = Int32FalseConstant();
783  GotoIfForceSlowPath(&out);
784#endif
785
786  const TNode<NativeContext> native_context = LoadNativeContext(context);
787  const TNode<HeapObject> regexp_fun =
788      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
789  const TNode<Object> initial_map =
790      LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
791  const TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
792
793  var_result = has_initialmap;
794  GotoIfNot(has_initialmap, &out);
795
796  // The smi check is required to omit ToLength(lastIndex) calls with possible
797  // user-code execution on the fast path.
798  TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
799  var_result = TaggedIsPositiveSmi(last_index);
800  Goto(&out);
801
802  BIND(&out);
803  return var_result.value();
804}
805
806TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
807    TNode<Context> context, TNode<Object> object) {
808  CSA_DCHECK(this, TaggedIsNotSmi(object));
809  return IsFastRegExpNoPrototype(context, object, LoadMap(CAST(object)));
810}
811
812void RegExpBuiltinsAssembler::BranchIfFastRegExp(
813    TNode<Context> context, TNode<HeapObject> object, TNode<Map> map,
814    PrototypeCheckAssembler::Flags prototype_check_flags,
815    base::Optional<DescriptorIndexNameValue> additional_property_to_check,
816    Label* if_isunmodified, Label* if_ismodified) {
817  CSA_DCHECK(this, TaggedEqual(LoadMap(object), map));
818
819  GotoIfForceSlowPath(if_ismodified);
820
821  // This should only be needed for String.p.(split||matchAll), but we are
822  // conservative here.
823  GotoIf(IsRegExpSpeciesProtectorCellInvalid(), if_ismodified);
824
825  TNode<NativeContext> native_context = LoadNativeContext(context);
826  TNode<JSFunction> regexp_fun =
827      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
828  TNode<Map> initial_map = CAST(
829      LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset));
830  TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
831
832  GotoIfNot(has_initialmap, if_ismodified);
833
834  // The smi check is required to omit ToLength(lastIndex) calls with possible
835  // user-code execution on the fast path.
836  TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
837  GotoIfNot(TaggedIsPositiveSmi(last_index), if_ismodified);
838
839  // Verify the prototype.
840
841  TNode<Map> initial_proto_initial_map = CAST(
842      LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX));
843
844  DescriptorIndexNameValue properties_to_check[2];
845  int property_count = 0;
846  properties_to_check[property_count++] = DescriptorIndexNameValue{
847      JSRegExp::kExecFunctionDescriptorIndex, RootIndex::kexec_string,
848      Context::REGEXP_EXEC_FUNCTION_INDEX};
849  if (additional_property_to_check) {
850    properties_to_check[property_count++] = *additional_property_to_check;
851  }
852
853  PrototypeCheckAssembler prototype_check_assembler(
854      state(), prototype_check_flags, native_context, initial_proto_initial_map,
855      base::Vector<DescriptorIndexNameValue>(properties_to_check,
856                                             property_count));
857
858  TNode<HeapObject> prototype = LoadMapPrototype(map);
859  prototype_check_assembler.CheckAndBranch(prototype, if_isunmodified,
860                                           if_ismodified);
861}
862void RegExpBuiltinsAssembler::BranchIfFastRegExpForSearch(
863    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
864    Label* if_ismodified) {
865  BranchIfFastRegExp(
866      context, object, LoadMap(object),
867      PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
868      DescriptorIndexNameValue{JSRegExp::kSymbolSearchFunctionDescriptorIndex,
869                               RootIndex::ksearch_symbol,
870                               Context::REGEXP_SEARCH_FUNCTION_INDEX},
871      if_isunmodified, if_ismodified);
872}
873
874void RegExpBuiltinsAssembler::BranchIfFastRegExpForMatch(
875    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
876    Label* if_ismodified) {
877  BranchIfFastRegExp(
878      context, object, LoadMap(object),
879      PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
880      DescriptorIndexNameValue{JSRegExp::kSymbolMatchFunctionDescriptorIndex,
881                               RootIndex::kmatch_symbol,
882                               Context::REGEXP_MATCH_FUNCTION_INDEX},
883      if_isunmodified, if_ismodified);
884}
885
886void RegExpBuiltinsAssembler::BranchIfFastRegExp_Strict(
887    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
888    Label* if_ismodified) {
889  BranchIfFastRegExp(context, object, LoadMap(object),
890                     PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
891                     base::nullopt, if_isunmodified, if_ismodified);
892}
893
894void RegExpBuiltinsAssembler::BranchIfFastRegExp_Permissive(
895    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
896    Label* if_ismodified) {
897  BranchIfFastRegExp(context, object, LoadMap(object),
898                     PrototypeCheckAssembler::kCheckFull, base::nullopt,
899                     if_isunmodified, if_ismodified);
900}
901
902void RegExpBuiltinsAssembler::BranchIfRegExpResult(const TNode<Context> context,
903                                                   const TNode<Object> object,
904                                                   Label* if_isunmodified,
905                                                   Label* if_ismodified) {
906  // Could be a Smi.
907  const TNode<Map> map = LoadReceiverMap(object);
908
909  const TNode<NativeContext> native_context = LoadNativeContext(context);
910  const TNode<Object> initial_regexp_result_map =
911      LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);
912
913  Label maybe_result_with_indices(this);
914  Branch(TaggedEqual(map, initial_regexp_result_map), if_isunmodified,
915         &maybe_result_with_indices);
916  BIND(&maybe_result_with_indices);
917  {
918    static_assert(
919        std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
920        "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
921    const TNode<Object> initial_regexp_result_with_indices_map =
922        LoadContextElement(native_context,
923                           Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX);
924    Branch(TaggedEqual(map, initial_regexp_result_with_indices_map),
925           if_isunmodified, if_ismodified);
926  }
927}
928
929// Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
930// and {match_info} is updated on success.
931// The slow path is implemented in RegExp::AtomExec.
932TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
933  auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
934  auto subject_string = Parameter<String>(Descriptor::kString);
935  auto last_index = Parameter<Smi>(Descriptor::kLastIndex);
936  auto match_info = Parameter<FixedArray>(Descriptor::kMatchInfo);
937  auto context = Parameter<Context>(Descriptor::kContext);
938
939  CSA_DCHECK(this, TaggedIsPositiveSmi(last_index));
940
941  TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
942  CSA_DCHECK(
943      this,
944      SmiEqual(CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kTagIndex)),
945               SmiConstant(JSRegExp::ATOM)));
946
947  // Callers ensure that last_index is in-bounds.
948  CSA_DCHECK(this,
949             UintPtrLessThanOrEqual(SmiUntag(last_index),
950                                    LoadStringLengthAsWord(subject_string)));
951
952  const TNode<String> needle_string =
953      CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex));
954
955  // ATOM patterns are guaranteed to not be the empty string (these are
956  // intercepted and replaced in JSRegExp::Initialize.
957  //
958  // This is especially relevant for crbug.com/1075514: atom patterns are
959  // non-empty and thus guaranteed not to match at the end of the string.
960  CSA_DCHECK(this, IntPtrGreaterThan(LoadStringLengthAsWord(needle_string),
961                                     IntPtrConstant(0)));
962
963  const TNode<Smi> match_from =
964      CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string,
965                       needle_string, last_index));
966
967  Label if_failure(this), if_success(this);
968  Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);
969
970  BIND(&if_success);
971  {
972    CSA_DCHECK(this, TaggedIsPositiveSmi(match_from));
973    CSA_DCHECK(this, UintPtrLessThan(SmiUntag(match_from),
974                                     LoadStringLengthAsWord(subject_string)));
975
976    const int kNumRegisters = 2;
977    STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);
978
979    const TNode<Smi> match_to =
980        SmiAdd(match_from, LoadStringLengthAsSmi(needle_string));
981
982    UnsafeStoreFixedArrayElement(match_info,
983                                 RegExpMatchInfo::kNumberOfCapturesIndex,
984                                 SmiConstant(kNumRegisters));
985    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
986                                 subject_string);
987    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
988                                 subject_string);
989    UnsafeStoreFixedArrayElement(
990        match_info, RegExpMatchInfo::kFirstCaptureIndex, match_from);
991    UnsafeStoreFixedArrayElement(
992        match_info, RegExpMatchInfo::kFirstCaptureIndex + 1, match_to);
993
994    Return(match_info);
995  }
996
997  BIND(&if_failure);
998  Return(NullConstant());
999}
1000
1001TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) {
1002  auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
1003  auto string = Parameter<String>(Descriptor::kString);
1004  auto last_index = Parameter<Number>(Descriptor::kLastIndex);
1005  auto match_info = Parameter<RegExpMatchInfo>(Descriptor::kMatchInfo);
1006  auto context = Parameter<Context>(Descriptor::kContext);
1007
1008  CSA_DCHECK(this, IsNumberNormalized(last_index));
1009  CSA_DCHECK(this, IsNumberPositive(last_index));
1010
1011  Return(RegExpExecInternal(context, regexp, string, last_index, match_info));
1012}
1013
1014TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
1015                                                   TNode<Object> regexp,
1016                                                   bool is_fastpath) {
1017  TVARIABLE(String, result);
1018  Label runtime(this, Label::kDeferred), done(this, &result);
1019  if (is_fastpath) {
1020    GotoIfForceSlowPath(&runtime);
1021  }
1022
1023  Isolate* isolate = this->isolate();
1024
1025  const TNode<IntPtrT> int_one = IntPtrConstant(1);
1026  TVARIABLE(Uint32T, var_length, Uint32Constant(0));
1027  TVARIABLE(IntPtrT, var_flags);
1028
1029  // First, count the number of characters we will need and check which flags
1030  // are set.
1031
1032  if (is_fastpath) {
1033    // Refer to JSRegExp's flag property on the fast-path.
1034    CSA_DCHECK(this, IsJSRegExp(CAST(regexp)));
1035    const TNode<Smi> flags_smi =
1036        CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
1037    var_flags = SmiUntag(flags_smi);
1038
1039#define CASE_FOR_FLAG(Lower, Camel, ...)                                \
1040  do {                                                                  \
1041    Label next(this);                                                   \
1042    GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1043    var_length = Uint32Add(var_length.value(), Uint32Constant(1));      \
1044    Goto(&next);                                                        \
1045    BIND(&next);                                                        \
1046  } while (false);
1047
1048    REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1049#undef CASE_FOR_FLAG
1050  } else {
1051    DCHECK(!is_fastpath);
1052
1053    // Fall back to GetProperty stub on the slow-path.
1054    var_flags = IntPtrZero();
1055
1056#define CASE_FOR_FLAG(NAME, FLAG)                                          \
1057  do {                                                                     \
1058    Label next(this);                                                      \
1059    const TNode<Object> flag = GetProperty(                                \
1060        context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1061    Label if_isflagset(this);                                              \
1062    BranchIfToBooleanIsTrue(flag, &if_isflagset, &next);                   \
1063    BIND(&if_isflagset);                                                   \
1064    var_length = Uint32Add(var_length.value(), Uint32Constant(1));         \
1065    var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG)));   \
1066    Goto(&next);                                                           \
1067    BIND(&next);                                                           \
1068  } while (false)
1069
1070    CASE_FOR_FLAG("global", JSRegExp::kGlobal);
1071    CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
1072    CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
1073    CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
1074    CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
1075    CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
1076    CASE_FOR_FLAG("hasIndices", JSRegExp::kHasIndices);
1077#undef CASE_FOR_FLAG
1078
1079#define CASE_FOR_FLAG(NAME, V8_FLAG_EXTERN_REF, FLAG)                      \
1080  do {                                                                     \
1081    Label next(this);                                                      \
1082    TNode<Word32T> flag_value = UncheckedCast<Word32T>(                    \
1083        Load(MachineType::Uint8(), ExternalConstant(V8_FLAG_EXTERN_REF))); \
1084    GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)),         \
1085                       Int32Constant(0)),                                  \
1086           &next);                                                         \
1087    const TNode<Object> flag = GetProperty(                                \
1088        context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1089    Label if_isflagset(this);                                              \
1090    BranchIfToBooleanIsTrue(flag, &if_isflagset, &next);                   \
1091    BIND(&if_isflagset);                                                   \
1092    var_length = Uint32Add(var_length.value(), Uint32Constant(1));         \
1093    var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG)));   \
1094    Goto(&next);                                                           \
1095    BIND(&next);                                                           \
1096  } while (false)
1097
1098    CASE_FOR_FLAG(
1099        "linear",
1100        ExternalReference::address_of_enable_experimental_regexp_engine(),
1101        JSRegExp::kLinear);
1102#undef CASE_FOR_FLAG
1103  }
1104
1105  // Allocate a string of the required length and fill it with the
1106  // corresponding char for each set flag.
1107
1108  {
1109    const TNode<String> string = AllocateSeqOneByteString(var_length.value());
1110
1111    TVARIABLE(IntPtrT, var_offset,
1112              IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
1113
1114#define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...)              \
1115  do {                                                                  \
1116    Label next(this);                                                   \
1117    GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1118    const TNode<Int32T> value = Int32Constant(Char);                    \
1119    StoreNoWriteBarrier(MachineRepresentation::kWord8, string,          \
1120                        var_offset.value(), value);                     \
1121    var_offset = IntPtrAdd(var_offset.value(), int_one);                \
1122    Goto(&next);                                                        \
1123    BIND(&next);                                                        \
1124  } while (false);
1125
1126    REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1127#undef CASE_FOR_FLAG
1128
1129    if (is_fastpath) {
1130#ifdef V8_ENABLE_FORCE_SLOW_PATH
1131      result = string;
1132      Goto(&done);
1133
1134      BIND(&runtime);
1135      {
1136        result =
1137            CAST(CallRuntime(Runtime::kRegExpStringFromFlags, context, regexp));
1138        Goto(&done);
1139      }
1140
1141      BIND(&done);
1142      return result.value();
1143#else
1144      return string;
1145#endif
1146    } else {
1147      return string;
1148    }
1149  }
1150}
1151
1152// ES#sec-regexpinitialize
1153// Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
1154TNode<Object> RegExpBuiltinsAssembler::RegExpInitialize(
1155    const TNode<Context> context, const TNode<JSRegExp> regexp,
1156    const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags) {
1157  // Normalize pattern.
1158  const TNode<Object> pattern = Select<Object>(
1159      IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); },
1160      [=] { return ToString_Inline(context, maybe_pattern); });
1161
1162  // Normalize flags.
1163  const TNode<Object> flags = Select<Object>(
1164      IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); },
1165      [=] { return ToString_Inline(context, maybe_flags); });
1166
1167  // Initialize.
1168
1169  return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
1170                     pattern, flags);
1171}
1172
1173// ES#sec-regexp-pattern-flags
1174// RegExp ( pattern, flags )
1175TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) {
1176  auto pattern = Parameter<Object>(Descriptor::kPattern);
1177  auto flags = Parameter<Object>(Descriptor::kFlags);
1178  auto new_target = Parameter<Object>(Descriptor::kJSNewTarget);
1179  auto context = Parameter<Context>(Descriptor::kContext);
1180
1181  Isolate* isolate = this->isolate();
1182
1183  TVARIABLE(Object, var_flags, flags);
1184  TVARIABLE(Object, var_pattern, pattern);
1185  TVARIABLE(Object, var_new_target, new_target);
1186
1187  TNode<NativeContext> native_context = LoadNativeContext(context);
1188  TNode<JSFunction> regexp_function =
1189      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
1190
1191  TNode<BoolT> pattern_is_regexp = IsRegExp(context, pattern);
1192
1193  {
1194    Label next(this);
1195
1196    GotoIfNot(IsUndefined(new_target), &next);
1197    var_new_target = regexp_function;
1198
1199    GotoIfNot(pattern_is_regexp, &next);
1200    GotoIfNot(IsUndefined(flags), &next);
1201
1202    TNode<Object> value =
1203        GetProperty(context, pattern, isolate->factory()->constructor_string());
1204
1205    GotoIfNot(TaggedEqual(value, regexp_function), &next);
1206    Return(pattern);
1207
1208    BIND(&next);
1209  }
1210
1211  {
1212    Label next(this), if_patternisfastregexp(this),
1213        if_patternisslowregexp(this);
1214    GotoIf(TaggedIsSmi(pattern), &next);
1215
1216    GotoIf(IsJSRegExp(CAST(pattern)), &if_patternisfastregexp);
1217
1218    Branch(pattern_is_regexp, &if_patternisslowregexp, &next);
1219
1220    BIND(&if_patternisfastregexp);
1221    {
1222      TNode<Object> source =
1223          LoadObjectField(CAST(pattern), JSRegExp::kSourceOffset);
1224      var_pattern = source;
1225
1226      {
1227        Label inner_next(this);
1228        GotoIfNot(IsUndefined(flags), &inner_next);
1229
1230        var_flags = FlagsGetter(context, pattern, true);
1231        Goto(&inner_next);
1232
1233        BIND(&inner_next);
1234      }
1235
1236      Goto(&next);
1237    }
1238
1239    BIND(&if_patternisslowregexp);
1240    {
1241      var_pattern =
1242          GetProperty(context, pattern, isolate->factory()->source_string());
1243
1244      {
1245        Label inner_next(this);
1246        GotoIfNot(IsUndefined(flags), &inner_next);
1247
1248        var_flags =
1249            GetProperty(context, pattern, isolate->factory()->flags_string());
1250        Goto(&inner_next);
1251
1252        BIND(&inner_next);
1253      }
1254
1255      Goto(&next);
1256    }
1257
1258    BIND(&next);
1259  }
1260
1261  // Allocate.
1262
1263  TVARIABLE(JSRegExp, var_regexp);
1264  {
1265    Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred),
1266        next(this);
1267    Branch(TaggedEqual(var_new_target.value(), regexp_function),
1268           &allocate_jsregexp, &allocate_generic);
1269
1270    BIND(&allocate_jsregexp);
1271    {
1272      const TNode<Map> initial_map = CAST(LoadObjectField(
1273          regexp_function, JSFunction::kPrototypeOrInitialMapOffset));
1274      var_regexp = CAST(AllocateJSObjectFromMap(initial_map));
1275      Goto(&next);
1276    }
1277
1278    BIND(&allocate_generic);
1279    {
1280      ConstructorBuiltinsAssembler constructor_assembler(this->state());
1281      var_regexp = CAST(constructor_assembler.FastNewObject(
1282          context, regexp_function, CAST(var_new_target.value())));
1283      Goto(&next);
1284    }
1285
1286    BIND(&next);
1287  }
1288
1289  const TNode<Object> result = RegExpInitialize(
1290      context, var_regexp.value(), var_pattern.value(), var_flags.value());
1291  Return(result);
1292}
1293
1294// ES#sec-regexp.prototype.compile
1295// RegExp.prototype.compile ( pattern, flags )
1296TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) {
1297  auto maybe_receiver = Parameter<Object>(Descriptor::kReceiver);
1298  auto maybe_pattern = Parameter<Object>(Descriptor::kPattern);
1299  auto maybe_flags = Parameter<Object>(Descriptor::kFlags);
1300  auto context = Parameter<Context>(Descriptor::kContext);
1301
1302  ThrowIfNotInstanceType(context, maybe_receiver, JS_REG_EXP_TYPE,
1303                         "RegExp.prototype.compile");
1304  const TNode<JSRegExp> receiver = CAST(maybe_receiver);
1305
1306  TVARIABLE(Object, var_flags, maybe_flags);
1307  TVARIABLE(Object, var_pattern, maybe_pattern);
1308
1309  // Handle a JSRegExp pattern.
1310  {
1311    Label next(this);
1312
1313    GotoIf(TaggedIsSmi(maybe_pattern), &next);
1314    GotoIfNot(IsJSRegExp(CAST(maybe_pattern)), &next);
1315
1316    // {maybe_flags} must be undefined in this case, otherwise throw.
1317    {
1318      Label maybe_flags_is_undefined(this);
1319      GotoIf(IsUndefined(maybe_flags), &maybe_flags_is_undefined);
1320
1321      ThrowTypeError(context, MessageTemplate::kRegExpFlags);
1322
1323      BIND(&maybe_flags_is_undefined);
1324    }
1325
1326    const TNode<JSRegExp> pattern = CAST(maybe_pattern);
1327    const TNode<String> new_flags = FlagsGetter(context, pattern, true);
1328    const TNode<Object> new_pattern =
1329        LoadObjectField(pattern, JSRegExp::kSourceOffset);
1330
1331    var_flags = new_flags;
1332    var_pattern = new_pattern;
1333
1334    Goto(&next);
1335    BIND(&next);
1336  }
1337
1338  const TNode<Object> result = RegExpInitialize(
1339      context, receiver, var_pattern.value(), var_flags.value());
1340  Return(result);
1341}
1342
1343// Fast-path implementation for flag checks on an unmodified JSRegExp instance.
1344TNode<BoolT> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp,
1345                                                     JSRegExp::Flag flag) {
1346  TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
1347  TNode<Smi> mask = SmiConstant(flag);
1348  return ReinterpretCast<BoolT>(SmiToInt32(
1349      SmiShr(SmiAnd(flags, mask),
1350             base::bits::CountTrailingZeros(static_cast<int>(flag)))));
1351}
1352
1353// Load through the GetProperty stub.
1354TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
1355                                                     TNode<Object> regexp,
1356                                                     JSRegExp::Flag flag) {
1357  Label out(this), if_true(this), if_false(this);
1358  TVARIABLE(BoolT, var_result);
1359
1360  // Only enabled based on a runtime flag.
1361  if (flag == JSRegExp::kLinear) {
1362    TNode<Word32T> flag_value = UncheckedCast<Word32T>(Load(
1363        MachineType::Uint8(),
1364        ExternalConstant(ExternalReference::
1365                             address_of_enable_experimental_regexp_engine())));
1366    GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)),
1367                       Int32Constant(0)),
1368           &if_false);
1369  }
1370
1371  Handle<String> name;
1372  switch (flag) {
1373    case JSRegExp::kNone:
1374      UNREACHABLE();
1375#define V(Lower, Camel, LowerCamel, Char, Bit)          \
1376  case JSRegExp::k##Camel:                              \
1377    name = isolate()->factory()->LowerCamel##_string(); \
1378    break;
1379      REGEXP_FLAG_LIST(V)
1380#undef V
1381  }
1382
1383  TNode<Object> value = GetProperty(context, regexp, name);
1384  BranchIfToBooleanIsTrue(value, &if_true, &if_false);
1385
1386  BIND(&if_true);
1387  var_result = BoolConstant(true);
1388  Goto(&out);
1389
1390  BIND(&if_false);
1391  var_result = BoolConstant(false);
1392  Goto(&out);
1393
1394  BIND(&out);
1395  return var_result.value();
1396}
1397
1398TNode<BoolT> RegExpBuiltinsAssembler::FlagGetter(TNode<Context> context,
1399                                                 TNode<Object> regexp,
1400                                                 JSRegExp::Flag flag,
1401                                                 bool is_fastpath) {
1402  return is_fastpath ? FastFlagGetter(CAST(regexp), flag)
1403                     : SlowFlagGetter(context, regexp, flag);
1404}
1405
1406TNode<Number> RegExpBuiltinsAssembler::AdvanceStringIndex(
1407    TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode,
1408    bool is_fastpath) {
1409  CSA_DCHECK(this, IsNumberNormalized(index));
1410  if (is_fastpath) CSA_DCHECK(this, TaggedIsPositiveSmi(index));
1411
1412  // Default to last_index + 1.
1413  // TODO(pwong): Consider using TrySmiAdd for the fast path to reduce generated
1414  // code.
1415  TNode<Number> index_plus_one = NumberInc(index);
1416  TVARIABLE(Number, var_result, index_plus_one);
1417
1418  // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT
1419  // anyway, consider using UintPtrT index to simplify the code below.
1420
1421  // Advancing the index has some subtle issues involving the distinction
1422  // between Smis and HeapNumbers. There's three cases:
1423  // * {index} is a Smi, {index_plus_one} is a Smi. The standard case.
1424  // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber.
1425  //   In this case we can return the result early, because
1426  //   {index_plus_one} > {string}.length.
1427  // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only
1428  //   occur when {index} is outside the Smi range since we normalize
1429  //   explicitly. Again we can return early.
1430  if (is_fastpath) {
1431    // Must be in Smi range on the fast path. We control the value of {index}
1432    // on all call-sites and can never exceed the length of the string.
1433    STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue);
1434    CSA_DCHECK(this, TaggedIsPositiveSmi(index_plus_one));
1435  }
1436
1437  Label if_isunicode(this), out(this);
1438  GotoIfNot(is_unicode, &out);
1439
1440  // Keep this unconditional (even on the fast path) just to be safe.
1441  Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out);
1442
1443  BIND(&if_isunicode);
1444  {
1445    TNode<UintPtrT> string_length = Unsigned(LoadStringLengthAsWord(string));
1446    TNode<UintPtrT> untagged_plus_one =
1447        Unsigned(SmiUntag(CAST(index_plus_one)));
1448    GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out);
1449
1450    TNode<Int32T> lead =
1451        StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index))));
1452    GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)),
1453                          Int32Constant(0xD800)),
1454              &out);
1455
1456    TNode<Int32T> trail = StringCharCodeAt(string, untagged_plus_one);
1457    GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)),
1458                          Int32Constant(0xDC00)),
1459              &out);
1460
1461    // At a surrogate pair, return index + 2.
1462    TNode<Number> index_plus_two = NumberInc(index_plus_one);
1463    var_result = index_plus_two;
1464
1465    Goto(&out);
1466  }
1467
1468  BIND(&out);
1469  return var_result.value();
1470}
1471
1472// ES#sec-createregexpstringiterator
1473// CreateRegExpStringIterator ( R, S, global, fullUnicode )
1474TNode<Object> RegExpMatchAllAssembler::CreateRegExpStringIterator(
1475    TNode<NativeContext> native_context, TNode<Object> regexp,
1476    TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode) {
1477  TNode<Map> map = CAST(LoadContextElement(
1478      native_context,
1479      Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX));
1480
1481  // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, «
1482  // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]],
1483  // [[Done]] »).
1484  TNode<HeapObject> iterator = Allocate(JSRegExpStringIterator::kHeaderSize);
1485  StoreMapNoWriteBarrier(iterator, map);
1486  StoreObjectFieldRoot(iterator,
1487                       JSRegExpStringIterator::kPropertiesOrHashOffset,
1488                       RootIndex::kEmptyFixedArray);
1489  StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset,
1490                       RootIndex::kEmptyFixedArray);
1491
1492  // 5. Set iterator.[[IteratingRegExp]] to R.
1493  StoreObjectFieldNoWriteBarrier(
1494      iterator, JSRegExpStringIterator::kIteratingRegExpOffset, regexp);
1495
1496  // 6. Set iterator.[[IteratedString]] to S.
1497  StoreObjectFieldNoWriteBarrier(
1498      iterator, JSRegExpStringIterator::kIteratedStringOffset, string);
1499
1500  // 7. Set iterator.[[Global]] to global.
1501  // 8. Set iterator.[[Unicode]] to fullUnicode.
1502  // 9. Set iterator.[[Done]] to false.
1503  TNode<Int32T> global_flag =
1504      Word32Shl(ReinterpretCast<Int32T>(global),
1505                Int32Constant(JSRegExpStringIterator::GlobalBit::kShift));
1506  TNode<Int32T> unicode_flag =
1507      Word32Shl(ReinterpretCast<Int32T>(full_unicode),
1508                Int32Constant(JSRegExpStringIterator::UnicodeBit::kShift));
1509  TNode<Int32T> iterator_flags = Word32Or(global_flag, unicode_flag);
1510  StoreObjectFieldNoWriteBarrier(iterator, JSRegExpStringIterator::kFlagsOffset,
1511                                 SmiFromInt32(iterator_flags));
1512
1513  return iterator;
1514}
1515
1516// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
1517// JSRegExp, {string} is a String, and {limit} is a Smi.
1518TNode<JSArray> RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(
1519    TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
1520    const TNode<Smi> limit) {
1521  CSA_DCHECK(this, IsFastRegExpPermissive(context, regexp));
1522  CSA_DCHECK(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
1523
1524  const TNode<IntPtrT> int_limit = SmiUntag(limit);
1525
1526  const ElementsKind kind = PACKED_ELEMENTS;
1527
1528  const TNode<NativeContext> native_context = LoadNativeContext(context);
1529  TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1530
1531  Label return_empty_array(this, Label::kDeferred);
1532  TVARIABLE(JSArray, var_result);
1533  Label done(this);
1534
1535  // If limit is zero, return an empty array.
1536  {
1537    Label next(this), if_limitiszero(this, Label::kDeferred);
1538    Branch(SmiEqual(limit, SmiZero()), &return_empty_array, &next);
1539    BIND(&next);
1540  }
1541
1542  const TNode<Smi> string_length = LoadStringLengthAsSmi(string);
1543
1544  // If passed the empty {string}, return either an empty array or a singleton
1545  // array depending on whether the {regexp} matches.
1546  {
1547    Label next(this), if_stringisempty(this, Label::kDeferred);
1548    Branch(SmiEqual(string_length, SmiZero()), &if_stringisempty, &next);
1549
1550    BIND(&if_stringisempty);
1551    {
1552      const TNode<Object> last_match_info = LoadContextElement(
1553          native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1554
1555      const TNode<Object> match_indices =
1556          CallBuiltin(Builtin::kRegExpExecInternal, context, regexp, string,
1557                      SmiZero(), last_match_info);
1558
1559      Label return_singleton_array(this);
1560      Branch(IsNull(match_indices), &return_singleton_array,
1561             &return_empty_array);
1562
1563      BIND(&return_singleton_array);
1564      {
1565        TNode<Smi> length = SmiConstant(1);
1566        TNode<IntPtrT> capacity = IntPtrConstant(1);
1567        base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1568        var_result =
1569            AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1570
1571        TNode<FixedArray> fixed_array = CAST(LoadElements(var_result.value()));
1572        UnsafeStoreFixedArrayElement(fixed_array, 0, string);
1573
1574        Goto(&done);
1575      }
1576    }
1577
1578    BIND(&next);
1579  }
1580
1581  // Loop preparations.
1582
1583  GrowableFixedArray array(state());
1584
1585  TVARIABLE(Smi, var_last_matched_until, SmiZero());
1586  TVARIABLE(Smi, var_next_search_from, SmiZero());
1587
1588  Label loop(this, {array.var_array(), array.var_length(), array.var_capacity(),
1589                    &var_last_matched_until, &var_next_search_from}),
1590      push_suffix_and_out(this), out(this);
1591  Goto(&loop);
1592
1593  BIND(&loop);
1594  {
1595    const TNode<Smi> next_search_from = var_next_search_from.value();
1596    const TNode<Smi> last_matched_until = var_last_matched_until.value();
1597
1598    // We're done if we've reached the end of the string.
1599    {
1600      Label next(this);
1601      Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out,
1602             &next);
1603      BIND(&next);
1604    }
1605
1606    // Search for the given {regexp}.
1607
1608    const TNode<Object> last_match_info = LoadContextElement(
1609        native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1610
1611    const TNode<HeapObject> match_indices_ho = RegExpExecInternal(
1612        context, regexp, string, next_search_from, CAST(last_match_info),
1613        RegExp::ExecQuirks::kTreatMatchAtEndAsFailure);
1614
1615    // We're done if no match was found.
1616    {
1617      Label next(this);
1618      Branch(IsNull(match_indices_ho), &push_suffix_and_out, &next);
1619      BIND(&next);
1620    }
1621
1622    TNode<FixedArray> match_indices = CAST(match_indices_ho);
1623    const TNode<Smi> match_from = CAST(UnsafeLoadFixedArrayElement(
1624        match_indices, RegExpMatchInfo::kFirstCaptureIndex));
1625    const TNode<Smi> match_to = CAST(UnsafeLoadFixedArrayElement(
1626        match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1));
1627    CSA_DCHECK(this, SmiNotEqual(match_from, string_length));
1628
1629    // Advance index and continue if the match is empty.
1630    {
1631      Label next(this);
1632
1633      GotoIfNot(SmiEqual(match_to, next_search_from), &next);
1634      GotoIfNot(SmiEqual(match_to, last_matched_until), &next);
1635
1636      const TNode<BoolT> is_unicode =
1637          FastFlagGetter(regexp, JSRegExp::kUnicode);
1638      const TNode<Number> new_next_search_from =
1639          AdvanceStringIndex(string, next_search_from, is_unicode, true);
1640      var_next_search_from = CAST(new_next_search_from);
1641      Goto(&loop);
1642
1643      BIND(&next);
1644    }
1645
1646    // A valid match was found, add the new substring to the array.
1647    {
1648      const TNode<Smi> from = last_matched_until;
1649      const TNode<Smi> to = match_from;
1650      array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1651      GotoIf(WordEqual(array.length(), int_limit), &out);
1652    }
1653
1654    // Add all captures to the array.
1655    {
1656      const TNode<Smi> num_registers = CAST(LoadFixedArrayElement(
1657          match_indices, RegExpMatchInfo::kNumberOfCapturesIndex));
1658      const TNode<IntPtrT> int_num_registers = SmiUntag(num_registers);
1659
1660      TVARIABLE(IntPtrT, var_reg, IntPtrConstant(2));
1661
1662      Label nested_loop(this, {array.var_array(), array.var_length(),
1663                               array.var_capacity(), &var_reg}),
1664          nested_loop_out(this);
1665      Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop,
1666             &nested_loop_out);
1667
1668      BIND(&nested_loop);
1669      {
1670        const TNode<IntPtrT> reg = var_reg.value();
1671        const TNode<Object> from = LoadFixedArrayElement(
1672            match_indices, reg,
1673            RegExpMatchInfo::kFirstCaptureIndex * kTaggedSize);
1674        const TNode<Smi> to = CAST(LoadFixedArrayElement(
1675            match_indices, reg,
1676            (RegExpMatchInfo::kFirstCaptureIndex + 1) * kTaggedSize));
1677
1678        Label select_capture(this), select_undefined(this), store_value(this);
1679        TVARIABLE(Object, var_value);
1680        Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined,
1681               &select_capture);
1682
1683        BIND(&select_capture);
1684        {
1685          var_value =
1686              CallBuiltin(Builtin::kSubString, context, string, from, to);
1687          Goto(&store_value);
1688        }
1689
1690        BIND(&select_undefined);
1691        {
1692          var_value = UndefinedConstant();
1693          Goto(&store_value);
1694        }
1695
1696        BIND(&store_value);
1697        {
1698          array.Push(var_value.value());
1699          GotoIf(WordEqual(array.length(), int_limit), &out);
1700
1701          const TNode<IntPtrT> new_reg = IntPtrAdd(reg, IntPtrConstant(2));
1702          var_reg = new_reg;
1703
1704          Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
1705                 &nested_loop_out);
1706        }
1707      }
1708
1709      BIND(&nested_loop_out);
1710    }
1711
1712    var_last_matched_until = match_to;
1713    var_next_search_from = match_to;
1714    Goto(&loop);
1715  }
1716
1717  BIND(&push_suffix_and_out);
1718  {
1719    const TNode<Smi> from = var_last_matched_until.value();
1720    const TNode<Smi> to = string_length;
1721    array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1722    Goto(&out);
1723  }
1724
1725  BIND(&out);
1726  {
1727    var_result = array.ToJSArray(context);
1728    Goto(&done);
1729  }
1730
1731  BIND(&return_empty_array);
1732  {
1733    TNode<Smi> length = SmiZero();
1734    TNode<IntPtrT> capacity = IntPtrZero();
1735    base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1736    var_result =
1737        AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1738    Goto(&done);
1739  }
1740
1741  BIND(&done);
1742  return var_result.value();
1743}
1744
1745}  // namespace internal
1746}  // namespace v8
1747