1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-regexp-gen.h"
6 
7 #include "src/builtins/builtins-constructor-gen.h"
8 #include "src/builtins/builtins-utils-gen.h"
9 #include "src/builtins/builtins.h"
10 #include "src/builtins/growable-fixed-array-gen.h"
11 #include "src/codegen/code-factory.h"
12 #include "src/codegen/code-stub-assembler.h"
13 #include "src/codegen/macro-assembler.h"
14 #include "src/common/globals.h"
15 #include "src/execution/protectors.h"
16 #include "src/heap/factory-inl.h"
17 #include "src/logging/counters.h"
18 #include "src/objects/js-regexp-string-iterator.h"
19 #include "src/objects/js-regexp.h"
20 #include "src/objects/regexp-match-info.h"
21 #include "src/regexp/regexp-flags.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 // Tail calls the regular expression interpreter.
27 // static
Generate_RegExpInterpreterTrampoline(MacroAssembler* masm)28 void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
29   ExternalReference interpreter_code_entry =
30       ExternalReference::re_match_for_call_from_js();
31   masm->Jump(interpreter_code_entry);
32 }
33 
34 // Tail calls the experimental regular expression engine.
35 // static
Generate_RegExpExperimentalTrampoline(MacroAssembler* masm)36 void Builtins::Generate_RegExpExperimentalTrampoline(MacroAssembler* masm) {
37   ExternalReference interpreter_code_entry =
38       ExternalReference::re_experimental_match_for_call_from_js();
39   masm->Jump(interpreter_code_entry);
40 }
41 
SmiZero()42 TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); }
43 
IntPtrZero()44 TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() {
45   return IntPtrConstant(0);
46 }
47 
48 // If code is a builtin, return the address to the (possibly embedded) builtin
49 // code entry, otherwise return the entry of the code object itself.
LoadCodeObjectEntry(TNode<CodeT> code)50 TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<CodeT> code) {
51   if (V8_EXTERNAL_CODE_SPACE_BOOL) {
52     // When external code space is enabled we can load the entry point directly
53     // from the CodeT object.
54     return GetCodeEntry(code);
55   }
56 
57   TVARIABLE(RawPtrT, var_result);
58 
59   Label if_code_is_off_heap(this), out(this);
60   TNode<Int32T> builtin_index =
61       LoadObjectField<Int32T>(code, Code::kBuiltinIndexOffset);
62   {
63     GotoIfNot(
64         Word32Equal(builtin_index,
65                     Int32Constant(static_cast<int>(Builtin::kNoBuiltinId))),
66         &if_code_is_off_heap);
67     var_result = ReinterpretCast<RawPtrT>(
68         IntPtrAdd(BitcastTaggedToWord(code),
69                   IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
70     Goto(&out);
71   }
72 
73   BIND(&if_code_is_off_heap);
74   {
75     TNode<IntPtrT> builtin_entry_offset_from_isolate_root =
76         IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()),
77                   ChangeInt32ToIntPtr(Word32Shl(
78                       builtin_index, Int32Constant(kSystemPointerSizeLog2))));
79 
80     var_result = ReinterpretCast<RawPtrT>(
81         Load(MachineType::Pointer(),
82              ExternalConstant(ExternalReference::isolate_root(isolate())),
83              builtin_entry_offset_from_isolate_root));
84     Goto(&out);
85   }
86 
87   BIND(&out);
88   return var_result.value();
89 }
90 
91 // -----------------------------------------------------------------------------
92 // ES6 section 21.2 RegExp Objects
93 
AllocateRegExpResult( TNode<Context> context, TNode<Smi> length, TNode<Smi> index, TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index, TNode<BoolT> has_indices, TNode<FixedArray>* elements_out)94 TNode<JSRegExpResult> RegExpBuiltinsAssembler::AllocateRegExpResult(
95     TNode<Context> context, TNode<Smi> length, TNode<Smi> index,
96     TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index,
97     TNode<BoolT> has_indices, TNode<FixedArray>* elements_out) {
98   CSA_DCHECK(this, SmiLessThanOrEqual(
99                        length, SmiConstant(JSArray::kMaxFastArrayLength)));
100   CSA_DCHECK(this, SmiGreaterThan(length, SmiConstant(0)));
101 
102   // Allocate.
103 
104   Label result_has_indices(this), allocated(this);
105   const ElementsKind elements_kind = PACKED_ELEMENTS;
106   base::Optional<TNode<AllocationSite>> no_gc_site = base::nullopt;
107   TNode<IntPtrT> length_intptr = SmiUntag(length);
108   // Note: The returned `var_elements` may be in young large object space, but
109   // `var_array` is guaranteed to be in new space so we could skip write
110   // barriers below.
111   TVARIABLE(JSArray, var_array);
112   TVARIABLE(FixedArrayBase, var_elements);
113 
114   GotoIf(has_indices, &result_has_indices);
115   {
116     TNode<Map> map = CAST(LoadContextElement(LoadNativeContext(context),
117                                              Context::REGEXP_RESULT_MAP_INDEX));
118     std::tie(var_array, var_elements) =
119         AllocateUninitializedJSArrayWithElements(
120             elements_kind, map, length, no_gc_site, length_intptr,
121             AllocationFlag::kAllowLargeObjectAllocation, JSRegExpResult::kSize);
122     Goto(&allocated);
123   }
124 
125   BIND(&result_has_indices);
126   {
127     TNode<Map> map =
128         CAST(LoadContextElement(LoadNativeContext(context),
129                                 Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX));
130     std::tie(var_array, var_elements) =
131         AllocateUninitializedJSArrayWithElements(
132             elements_kind, map, length, no_gc_site, length_intptr,
133             AllocationFlag::kAllowLargeObjectAllocation,
134             JSRegExpResultWithIndices::kSize);
135     Goto(&allocated);
136   }
137 
138   BIND(&allocated);
139 
140   // Finish result initialization.
141 
142   TNode<JSRegExpResult> result =
143       UncheckedCast<JSRegExpResult>(var_array.value());
144 
145   // Load undefined value once here to avoid multiple LoadRoots.
146   TNode<Oddball> undefined_value = UncheckedCast<Oddball>(
147       CodeAssembler::LoadRoot(RootIndex::kUndefinedValue));
148 
149   StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index);
150   // TODO(jgruber,turbofan): Could skip barrier but the MemoryOptimizer
151   // complains.
152   StoreObjectField(result, JSRegExpResult::kInputOffset, input);
153   StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset,
154                                  undefined_value);
155   StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kNamesOffset,
156                                  undefined_value);
157 
158   StoreObjectField(result, JSRegExpResult::kRegexpInputOffset, input);
159 
160   // If non-smi last_index then store an SmiZero instead.
161   {
162     TNode<Smi> last_index_smi = Select<Smi>(
163         TaggedIsSmi(last_index), [=] { return CAST(last_index); },
164         [=] { return SmiZero(); });
165     StoreObjectField(result, JSRegExpResult::kRegexpLastIndexOffset,
166                      last_index_smi);
167   }
168 
169   Label finish_initialization(this);
170   GotoIfNot(has_indices, &finish_initialization);
171   {
172     static_assert(
173         std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
174         "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
175     StoreObjectFieldNoWriteBarrier(
176         result, JSRegExpResultWithIndices::kIndicesOffset, undefined_value);
177     Goto(&finish_initialization);
178   }
179 
180   BIND(&finish_initialization);
181 
182   // Finish elements initialization.
183 
184   FillFixedArrayWithValue(elements_kind, var_elements.value(), IntPtrZero(),
185                           length_intptr, RootIndex::kUndefinedValue);
186 
187   if (elements_out) *elements_out = CAST(var_elements.value());
188   return result;
189 }
190 
FastLoadLastIndexBeforeSmiCheck( TNode<JSRegExp> regexp)191 TNode<Object> RegExpBuiltinsAssembler::FastLoadLastIndexBeforeSmiCheck(
192     TNode<JSRegExp> regexp) {
193   // Load the in-object field.
194   static const int field_offset =
195       JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
196   return LoadObjectField(regexp, field_offset);
197 }
198 
SlowLoadLastIndex(TNode<Context> context, TNode<Object> regexp)199 TNode<Object> RegExpBuiltinsAssembler::SlowLoadLastIndex(TNode<Context> context,
200                                                          TNode<Object> regexp) {
201   return GetProperty(context, regexp, isolate()->factory()->lastIndex_string());
202 }
203 
204 // The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
205 // JSRegExp instance.
FastStoreLastIndex(TNode<JSRegExp> regexp, TNode<Smi> value)206 void RegExpBuiltinsAssembler::FastStoreLastIndex(TNode<JSRegExp> regexp,
207                                                  TNode<Smi> value) {
208   // Store the in-object field.
209   static const int field_offset =
210       JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
211   StoreObjectField(regexp, field_offset, value);
212 }
213 
SlowStoreLastIndex(TNode<Context> context, TNode<Object> regexp, TNode<Object> value)214 void RegExpBuiltinsAssembler::SlowStoreLastIndex(TNode<Context> context,
215                                                  TNode<Object> regexp,
216                                                  TNode<Object> value) {
217   TNode<String> name = HeapConstant(isolate()->factory()->lastIndex_string());
218   SetPropertyStrict(context, regexp, name, value);
219 }
220 
ConstructNewResultFromMatchInfo( TNode<Context> context, TNode<JSRegExp> regexp, TNode<RegExpMatchInfo> match_info, TNode<String> string, TNode<Number> last_index)221 TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
222     TNode<Context> context, TNode<JSRegExp> regexp,
223     TNode<RegExpMatchInfo> match_info, TNode<String> string,
224     TNode<Number> last_index) {
225   Label named_captures(this), maybe_build_indices(this), out(this);
226 
227   TNode<IntPtrT> num_indices = SmiUntag(CAST(UnsafeLoadFixedArrayElement(
228       match_info, RegExpMatchInfo::kNumberOfCapturesIndex)));
229   TNode<Smi> num_results = SmiTag(WordShr(num_indices, 1));
230   TNode<Smi> start = CAST(UnsafeLoadFixedArrayElement(
231       match_info, RegExpMatchInfo::kFirstCaptureIndex));
232   TNode<Smi> end = CAST(UnsafeLoadFixedArrayElement(
233       match_info, RegExpMatchInfo::kFirstCaptureIndex + 1));
234 
235   // Calculate the substring of the first match before creating the result array
236   // to avoid an unnecessary write barrier storing the first result.
237 
238   TNode<String> first =
239       CAST(CallBuiltin(Builtin::kSubString, context, string, start, end));
240 
241   // Load flags and check if the result object needs to have indices.
242   const TNode<Smi> flags =
243       CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
244   const TNode<BoolT> has_indices = IsSetSmi(flags, JSRegExp::kHasIndices);
245   TNode<FixedArray> result_elements;
246   TNode<JSRegExpResult> result =
247       AllocateRegExpResult(context, num_results, start, string, regexp,
248                            last_index, has_indices, &result_elements);
249 
250   UnsafeStoreFixedArrayElement(result_elements, 0, first);
251 
252   // If no captures exist we can skip named capture handling as well.
253   GotoIf(SmiEqual(num_results, SmiConstant(1)), &maybe_build_indices);
254 
255   // Store all remaining captures.
256   TNode<IntPtrT> limit = IntPtrAdd(
257       IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices);
258 
259   TVARIABLE(IntPtrT, var_from_cursor,
260             IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2));
261   TVARIABLE(IntPtrT, var_to_cursor, IntPtrConstant(1));
262 
263   Label loop(this, {&var_from_cursor, &var_to_cursor});
264 
265   Goto(&loop);
266   BIND(&loop);
267   {
268     TNode<IntPtrT> from_cursor = var_from_cursor.value();
269     TNode<IntPtrT> to_cursor = var_to_cursor.value();
270     TNode<Smi> start_cursor =
271         CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor));
272 
273     Label next_iter(this);
274     GotoIf(SmiEqual(start_cursor, SmiConstant(-1)), &next_iter);
275 
276     TNode<IntPtrT> from_cursor_plus1 =
277         IntPtrAdd(from_cursor, IntPtrConstant(1));
278     TNode<Smi> end_cursor =
279         CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor_plus1));
280 
281     TNode<String> capture = CAST(CallBuiltin(Builtin::kSubString, context,
282                                              string, start_cursor, end_cursor));
283     UnsafeStoreFixedArrayElement(result_elements, to_cursor, capture);
284     Goto(&next_iter);
285 
286     BIND(&next_iter);
287     var_from_cursor = IntPtrAdd(from_cursor, IntPtrConstant(2));
288     var_to_cursor = IntPtrAdd(to_cursor, IntPtrConstant(1));
289     Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
290            &named_captures);
291   }
292 
293   BIND(&named_captures);
294   {
295     CSA_DCHECK(this, SmiGreaterThan(num_results, SmiConstant(1)));
296 
297     // Preparations for named capture properties. Exit early if the result does
298     // not have any named captures to minimize performance impact.
299 
300     TNode<FixedArray> data =
301         CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
302 
303     // We reach this point only if captures exist, implying that the assigned
304     // regexp engine must be able to handle captures.
305     CSA_DCHECK(
306         this,
307         Word32Or(
308             SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
309                      SmiConstant(JSRegExp::IRREGEXP)),
310             SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
311                      SmiConstant(JSRegExp::EXPERIMENTAL))));
312 
313     // The names fixed array associates names at even indices with a capture
314     // index at odd indices.
315     TNode<Object> maybe_names =
316         LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
317     GotoIf(TaggedEqual(maybe_names, SmiZero()), &maybe_build_indices);
318 
319     // One or more named captures exist, add a property for each one.
320 
321     TNode<FixedArray> names = CAST(maybe_names);
322     TNode<IntPtrT> names_length = LoadAndUntagFixedArrayBaseLength(names);
323     CSA_DCHECK(this, IntPtrGreaterThan(names_length, IntPtrZero()));
324 
325     // Stash names in case we need them to build the indices array later.
326     StoreObjectField(result, JSRegExpResult::kNamesOffset, names);
327 
328     // Allocate a new object to store the named capture properties.
329     // TODO(jgruber): Could be optimized by adding the object map to the heap
330     // root list.
331 
332     TNode<IntPtrT> num_properties = WordSar(names_length, 1);
333     TNode<NativeContext> native_context = LoadNativeContext(context);
334     TNode<Map> map = LoadSlowObjectWithNullPrototypeMap(native_context);
335     TNode<HeapObject> properties;
336     if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) {
337       properties = AllocateSwissNameDictionary(num_properties);
338     } else {
339       properties = AllocateNameDictionary(
340           num_properties, AllocationFlag::kAllowLargeObjectAllocation);
341     }
342 
343     TNode<JSObject> group_object = AllocateJSObjectFromMap(map, properties);
344     StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object);
345 
346     TVARIABLE(IntPtrT, var_i, IntPtrZero());
347 
348     Label inner_loop(this, &var_i);
349 
350     Goto(&inner_loop);
351     BIND(&inner_loop);
352     {
353       TNode<IntPtrT> i = var_i.value();
354       TNode<IntPtrT> i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
355       TNode<IntPtrT> i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
356 
357       TNode<String> name = CAST(LoadFixedArrayElement(names, i));
358       TNode<Smi> index = CAST(LoadFixedArrayElement(names, i_plus_1));
359       TNode<HeapObject> capture =
360           CAST(LoadFixedArrayElement(result_elements, SmiUntag(index)));
361 
362       // TODO(v8:8213): For maintainability, we should call a CSA/Torque
363       // implementation of CreateDataProperty instead.
364 
365       // At this point the spec says to call CreateDataProperty. However, we can
366       // skip most of the steps and go straight to adding a dictionary entry
367       // because we know a bunch of useful facts:
368       // - All keys are non-numeric internalized strings
369       // - No keys repeat
370       // - Receiver has no prototype
371       // - Receiver isn't used as a prototype
372       // - Receiver isn't any special object like a Promise intrinsic object
373       // - Receiver is extensible
374       // - Receiver has no interceptors
375       Label add_dictionary_property_slow(this, Label::kDeferred);
376       Add<PropertyDictionary>(CAST(properties), name, capture,
377                               &add_dictionary_property_slow);
378 
379       var_i = i_plus_2;
380       Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length),
381              &maybe_build_indices, &inner_loop);
382 
383       BIND(&add_dictionary_property_slow);
384       // If the dictionary needs resizing, the above Add call will jump here
385       // before making any changes. This shouldn't happen because we allocated
386       // the dictionary with enough space above.
387       Unreachable();
388     }
389   }
390 
391   // Build indices if needed (i.e. if the /d flag is present) after named
392   // capture groups are processed.
393   BIND(&maybe_build_indices);
394   GotoIfNot(has_indices, &out);
395   {
396     const TNode<Object> maybe_names =
397         LoadObjectField(result, JSRegExpResultWithIndices::kNamesOffset);
398     const TNode<JSRegExpResultIndices> indices =
399         UncheckedCast<JSRegExpResultIndices>(
400             CallRuntime(Runtime::kRegExpBuildIndices, context, regexp,
401                         match_info, maybe_names));
402     StoreObjectField(result, JSRegExpResultWithIndices::kIndicesOffset,
403                      indices);
404     Goto(&out);
405   }
406 
407   BIND(&out);
408   return result;
409 }
410 
GetStringPointers( TNode<RawPtrT> string_data, TNode<IntPtrT> offset, TNode<IntPtrT> last_index, TNode<IntPtrT> string_length, String::Encoding encoding, TVariable<RawPtrT>* var_string_start, TVariable<RawPtrT>* var_string_end)411 void RegExpBuiltinsAssembler::GetStringPointers(
412     TNode<RawPtrT> string_data, TNode<IntPtrT> offset,
413     TNode<IntPtrT> last_index, TNode<IntPtrT> string_length,
414     String::Encoding encoding, TVariable<RawPtrT>* var_string_start,
415     TVariable<RawPtrT>* var_string_end) {
416   DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
417   DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());
418 
419   const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
420                                 ? UINT8_ELEMENTS
421                                 : UINT16_ELEMENTS;
422 
423   TNode<IntPtrT> from_offset =
424       ElementOffsetFromIndex(IntPtrAdd(offset, last_index), kind);
425   *var_string_start =
426       ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, from_offset));
427 
428   TNode<IntPtrT> to_offset =
429       ElementOffsetFromIndex(IntPtrAdd(offset, string_length), kind);
430   *var_string_end = ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, to_offset));
431 }
432 
RegExpExecInternal( TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, TNode<Number> last_index, TNode<RegExpMatchInfo> match_info, RegExp::ExecQuirks exec_quirks)433 TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
434     TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
435     TNode<Number> last_index, TNode<RegExpMatchInfo> match_info,
436     RegExp::ExecQuirks exec_quirks) {
437   ToDirectStringAssembler to_direct(state(), string);
438 
439   TVARIABLE(HeapObject, var_result);
440   Label out(this), atom(this), runtime(this, Label::kDeferred),
441       retry_experimental(this, Label::kDeferred);
442 
443   // External constants.
444   TNode<ExternalReference> isolate_address =
445       ExternalConstant(ExternalReference::isolate_address(isolate()));
446   TNode<ExternalReference> static_offsets_vector_address = ExternalConstant(
447       ExternalReference::address_of_static_offsets_vector(isolate()));
448 
449   // At this point, last_index is definitely a canonicalized non-negative
450   // number, which implies that any non-Smi last_index is greater than
451   // the maximal string length. If lastIndex > string.length then the matcher
452   // must fail.
453 
454   Label if_failure(this);
455 
456   CSA_DCHECK(this, IsNumberNormalized(last_index));
457   CSA_DCHECK(this, IsNumberPositive(last_index));
458   GotoIf(TaggedIsNotSmi(last_index), &if_failure);
459 
460   TNode<IntPtrT> int_string_length = LoadStringLengthAsWord(string);
461   TNode<IntPtrT> int_last_index = SmiUntag(CAST(last_index));
462 
463   GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure);
464 
465   // Since the RegExp has been compiled, data contains a fixed array.
466   TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
467   {
468     // Dispatch on the type of the RegExp.
469     {
470       Label next(this), unreachable(this, Label::kDeferred);
471       TNode<Int32T> tag = LoadAndUntagToWord32FixedArrayElement(
472           data, IntPtrConstant(JSRegExp::kTagIndex));
473 
474       int32_t values[] = {
475           JSRegExp::IRREGEXP,
476           JSRegExp::ATOM,
477           JSRegExp::EXPERIMENTAL,
478       };
479       Label* labels[] = {&next, &atom, &next};
480 
481       STATIC_ASSERT(arraysize(values) == arraysize(labels));
482       Switch(tag, &unreachable, values, labels, arraysize(values));
483 
484       BIND(&unreachable);
485       Unreachable();
486 
487       BIND(&next);
488     }
489 
490     // Check (number_of_captures + 1) * 2 <= offsets vector size
491     // Or              number_of_captures <= offsets vector size / 2 - 1
492     TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
493         data, JSRegExp::kIrregexpCaptureCountIndex));
494 
495     const int kOffsetsSize = Isolate::kJSRegexpStaticOffsetsVectorSize;
496     STATIC_ASSERT(kOffsetsSize >= 2);
497     GotoIf(SmiAbove(capture_count, SmiConstant(kOffsetsSize / 2 - 1)),
498            &runtime);
499   }
500 
501   // Unpack the string if possible.
502 
503   to_direct.TryToDirect(&runtime);
504 
505   // Load the irregexp code or bytecode object and offsets into the subject
506   // string. Both depend on whether the string is one- or two-byte.
507 
508   TVARIABLE(RawPtrT, var_string_start);
509   TVARIABLE(RawPtrT, var_string_end);
510   TVARIABLE(Object, var_code);
511   TVARIABLE(Object, var_bytecode);
512 
513   {
514     TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime);
515 
516     Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
517     Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
518            &if_isonebyte, &if_istwobyte);
519 
520     BIND(&if_isonebyte);
521     {
522       GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
523                         int_string_length, String::ONE_BYTE_ENCODING,
524                         &var_string_start, &var_string_end);
525       var_code =
526           UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex);
527       var_bytecode = UnsafeLoadFixedArrayElement(
528           data, JSRegExp::kIrregexpLatin1BytecodeIndex);
529       Goto(&next);
530     }
531 
532     BIND(&if_istwobyte);
533     {
534       GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
535                         int_string_length, String::TWO_BYTE_ENCODING,
536                         &var_string_start, &var_string_end);
537       var_code =
538           UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex);
539       var_bytecode = UnsafeLoadFixedArrayElement(
540           data, JSRegExp::kIrregexpUC16BytecodeIndex);
541       Goto(&next);
542     }
543 
544     BIND(&next);
545   }
546 
547   // Check that the irregexp code has been generated for the actual string
548   // encoding. If it has, the field contains a code object; and otherwise it
549   // contains the uninitialized sentinel as a smi.
550 #ifdef DEBUG
551   {
552     Label next(this);
553     GotoIfNot(TaggedIsSmi(var_code.value()), &next);
554     CSA_DCHECK(this, SmiEqual(CAST(var_code.value()),
555                               SmiConstant(JSRegExp::kUninitializedValue)));
556     Goto(&next);
557     BIND(&next);
558   }
559 #endif
560 
561   GotoIf(TaggedIsSmi(var_code.value()), &runtime);
562   TNode<CodeT> code = CAST(var_code.value());
563 
564   Label if_success(this), if_exception(this, Label::kDeferred);
565   {
566     IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
567 
568     // Set up args for the final call into generated Irregexp code.
569 
570     MachineType type_int32 = MachineType::Int32();
571     MachineType type_tagged = MachineType::AnyTagged();
572     MachineType type_ptr = MachineType::Pointer();
573 
574     // Result: A NativeRegExpMacroAssembler::Result return code.
575     MachineType retval_type = type_int32;
576 
577     // Argument 0: Original subject string.
578     MachineType arg0_type = type_tagged;
579     TNode<String> arg0 = string;
580 
581     // Argument 1: Previous index.
582     MachineType arg1_type = type_int32;
583     TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index);
584 
585     // Argument 2: Start of string data. This argument is ignored in the
586     // interpreter.
587     MachineType arg2_type = type_ptr;
588     TNode<RawPtrT> arg2 = var_string_start.value();
589 
590     // Argument 3: End of string data. This argument is ignored in the
591     // interpreter.
592     MachineType arg3_type = type_ptr;
593     TNode<RawPtrT> arg3 = var_string_end.value();
594 
595     // Argument 4: static offsets vector buffer.
596     MachineType arg4_type = type_ptr;
597     TNode<ExternalReference> arg4 = static_offsets_vector_address;
598 
599     // Argument 5: Number of capture registers.
600     // Setting this to the number of registers required to store all captures
601     // forces global regexps to behave as non-global.
602     TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
603         data, JSRegExp::kIrregexpCaptureCountIndex));
604     // capture_count is the number of captures without the match itself.
605     // Required registers = (capture_count + 1) * 2.
606     STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) * 2));
607     TNode<Smi> register_count =
608         SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
609 
610     MachineType arg5_type = type_int32;
611     TNode<Int32T> arg5 = SmiToInt32(register_count);
612 
613     // Argument 6: Indicate that this is a direct call from JavaScript.
614     MachineType arg6_type = type_int32;
615     TNode<Int32T> arg6 = Int32Constant(RegExp::CallOrigin::kFromJs);
616 
617     // Argument 7: Pass current isolate address.
618     MachineType arg7_type = type_ptr;
619     TNode<ExternalReference> arg7 = isolate_address;
620 
621     // Argument 8: Regular expression object. This argument is ignored in native
622     // irregexp code.
623     MachineType arg8_type = type_tagged;
624     TNode<JSRegExp> arg8 = regexp;
625 
626     TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code);
627 
628     // AIX uses function descriptors on CFunction calls. code_entry in this case
629     // may also point to a Regex interpreter entry trampoline which does not
630     // have a function descriptor. This method is ineffective on other platforms
631     // and is equivalent to CallCFunction.
632     TNode<Int32T> result =
633         UncheckedCast<Int32T>(CallCFunctionWithoutFunctionDescriptor(
634             code_entry, retval_type, std::make_pair(arg0_type, arg0),
635             std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
636             std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
637             std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6),
638             std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8)));
639 
640     // Check the result.
641     // We expect exactly one result since we force the called regexp to behave
642     // as non-global.
643     TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
644     GotoIf(
645         IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
646         &if_success);
647     GotoIf(
648         IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
649         &if_failure);
650     GotoIf(IntPtrEqual(int_result,
651                        IntPtrConstant(RegExp::kInternalRegExpException)),
652            &if_exception);
653     GotoIf(IntPtrEqual(
654                int_result,
655                IntPtrConstant(RegExp::kInternalRegExpFallbackToExperimental)),
656            &retry_experimental);
657 
658     CSA_DCHECK(this, IntPtrEqual(int_result,
659                                  IntPtrConstant(RegExp::kInternalRegExpRetry)));
660     Goto(&runtime);
661   }
662 
663   BIND(&if_success);
664   {
665     if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
666       static constexpr int kMatchStartOffset = 0;
667       TNode<IntPtrT> value = ChangeInt32ToIntPtr(UncheckedCast<Int32T>(
668           Load(MachineType::Int32(), static_offsets_vector_address,
669                IntPtrConstant(kMatchStartOffset))));
670       GotoIf(UintPtrGreaterThanOrEqual(value, int_string_length), &if_failure);
671     }
672 
673     // Check that the last match info has space for the capture registers and
674     // the additional information. Ensure no overflow in add.
675     STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);
676     TNode<Smi> available_slots =
677         SmiSub(LoadFixedArrayBaseLength(match_info),
678                SmiConstant(RegExpMatchInfo::kLastMatchOverhead));
679     TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
680         data, JSRegExp::kIrregexpCaptureCountIndex));
681     // Calculate number of register_count = (capture_count + 1) * 2.
682     TNode<Smi> register_count =
683         SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
684     GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);
685 
686     // Fill match_info.
687     UnsafeStoreFixedArrayElement(
688         match_info, RegExpMatchInfo::kNumberOfCapturesIndex, register_count);
689     UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
690                                  string);
691     UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
692                                  string);
693 
694     // Fill match and capture offsets in match_info.
695     {
696       TNode<IntPtrT> limit_offset =
697           ElementOffsetFromIndex(register_count, INT32_ELEMENTS, 0);
698 
699       TNode<IntPtrT> to_offset = ElementOffsetFromIndex(
700           IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS,
701           RegExpMatchInfo::kHeaderSize - kHeapObjectTag);
702       TVARIABLE(IntPtrT, var_to_offset, to_offset);
703 
704       VariableList vars({&var_to_offset}, zone());
705       BuildFastLoop<IntPtrT>(
706           vars, IntPtrZero(), limit_offset,
707           [&](TNode<IntPtrT> offset) {
708             TNode<Int32T> value = UncheckedCast<Int32T>(Load(
709                 MachineType::Int32(), static_offsets_vector_address, offset));
710             TNode<Smi> smi_value = SmiFromInt32(value);
711             StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,
712                                 var_to_offset.value(), smi_value);
713             Increment(&var_to_offset, kTaggedSize);
714           },
715           kInt32Size, IndexAdvanceMode::kPost);
716     }
717 
718     var_result = match_info;
719     Goto(&out);
720   }
721 
722   BIND(&if_failure);
723   {
724     var_result = NullConstant();
725     Goto(&out);
726   }
727 
728   BIND(&if_exception);
729   {
730 // A stack overflow was detected in RegExp code.
731 #ifdef DEBUG
732     TNode<ExternalReference> pending_exception_address =
733         ExternalConstant(ExternalReference::Create(
734             IsolateAddressId::kPendingExceptionAddress, isolate()));
735     CSA_DCHECK(this, IsTheHole(Load<Object>(pending_exception_address)));
736 #endif  // DEBUG
737     CallRuntime(Runtime::kThrowStackOverflow, context);
738     Unreachable();
739   }
740 
741   BIND(&retry_experimental);
742   {
743     auto target_fn =
744         exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
745             ? Runtime::kRegExpExperimentalOneshotExecTreatMatchAtEndAsFailure
746             : Runtime::kRegExpExperimentalOneshotExec;
747     var_result = CAST(CallRuntime(target_fn, context, regexp, string,
748                                   last_index, match_info));
749     Goto(&out);
750   }
751 
752   BIND(&runtime);
753   {
754     auto target_fn =
755         exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
756             ? Runtime::kRegExpExecTreatMatchAtEndAsFailure
757             : Runtime::kRegExpExec;
758     var_result = CAST(CallRuntime(target_fn, context, regexp, string,
759                                   last_index, match_info));
760     Goto(&out);
761   }
762 
763   BIND(&atom);
764   {
765     // TODO(jgruber): A call with 4 args stresses register allocation, this
766     // should probably just be inlined.
767     var_result = CAST(CallBuiltin(Builtin::kRegExpExecAtom, context, regexp,
768                                   string, last_index, match_info));
769     Goto(&out);
770   }
771 
772   BIND(&out);
773   return var_result.value();
774 }
775 
776 TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
777     TNode<Context> context, TNode<Object> object, TNode<Map> map) {
778   Label out(this);
779   TVARIABLE(BoolT, var_result);
780 
781 #ifdef V8_ENABLE_FORCE_SLOW_PATH
782   var_result = Int32FalseConstant();
783   GotoIfForceSlowPath(&out);
784 #endif
785 
786   const TNode<NativeContext> native_context = LoadNativeContext(context);
787   const TNode<HeapObject> regexp_fun =
788       CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
789   const TNode<Object> initial_map =
790       LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
791   const TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
792 
793   var_result = has_initialmap;
794   GotoIfNot(has_initialmap, &out);
795 
796   // The smi check is required to omit ToLength(lastIndex) calls with possible
797   // user-code execution on the fast path.
798   TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
799   var_result = TaggedIsPositiveSmi(last_index);
800   Goto(&out);
801 
802   BIND(&out);
803   return var_result.value();
804 }
805 
806 TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
807     TNode<Context> context, TNode<Object> object) {
808   CSA_DCHECK(this, TaggedIsNotSmi(object));
809   return IsFastRegExpNoPrototype(context, object, LoadMap(CAST(object)));
810 }
811 
812 void RegExpBuiltinsAssembler::BranchIfFastRegExp(
813     TNode<Context> context, TNode<HeapObject> object, TNode<Map> map,
814     PrototypeCheckAssembler::Flags prototype_check_flags,
815     base::Optional<DescriptorIndexNameValue> additional_property_to_check,
816     Label* if_isunmodified, Label* if_ismodified) {
817   CSA_DCHECK(this, TaggedEqual(LoadMap(object), map));
818 
819   GotoIfForceSlowPath(if_ismodified);
820 
821   // This should only be needed for String.p.(split||matchAll), but we are
822   // conservative here.
823   GotoIf(IsRegExpSpeciesProtectorCellInvalid(), if_ismodified);
824 
825   TNode<NativeContext> native_context = LoadNativeContext(context);
826   TNode<JSFunction> regexp_fun =
827       CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
828   TNode<Map> initial_map = CAST(
829       LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset));
830   TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
831 
832   GotoIfNot(has_initialmap, if_ismodified);
833 
834   // The smi check is required to omit ToLength(lastIndex) calls with possible
835   // user-code execution on the fast path.
836   TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
837   GotoIfNot(TaggedIsPositiveSmi(last_index), if_ismodified);
838 
839   // Verify the prototype.
840 
841   TNode<Map> initial_proto_initial_map = CAST(
842       LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX));
843 
844   DescriptorIndexNameValue properties_to_check[2];
845   int property_count = 0;
846   properties_to_check[property_count++] = DescriptorIndexNameValue{
847       JSRegExp::kExecFunctionDescriptorIndex, RootIndex::kexec_string,
848       Context::REGEXP_EXEC_FUNCTION_INDEX};
849   if (additional_property_to_check) {
850     properties_to_check[property_count++] = *additional_property_to_check;
851   }
852 
853   PrototypeCheckAssembler prototype_check_assembler(
854       state(), prototype_check_flags, native_context, initial_proto_initial_map,
855       base::Vector<DescriptorIndexNameValue>(properties_to_check,
856                                              property_count));
857 
858   TNode<HeapObject> prototype = LoadMapPrototype(map);
859   prototype_check_assembler.CheckAndBranch(prototype, if_isunmodified,
860                                            if_ismodified);
861 }
862 void RegExpBuiltinsAssembler::BranchIfFastRegExpForSearch(
863     TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
864     Label* if_ismodified) {
865   BranchIfFastRegExp(
866       context, object, LoadMap(object),
867       PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
868       DescriptorIndexNameValue{JSRegExp::kSymbolSearchFunctionDescriptorIndex,
869                                RootIndex::ksearch_symbol,
870                                Context::REGEXP_SEARCH_FUNCTION_INDEX},
871       if_isunmodified, if_ismodified);
872 }
873 
874 void RegExpBuiltinsAssembler::BranchIfFastRegExpForMatch(
875     TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
876     Label* if_ismodified) {
877   BranchIfFastRegExp(
878       context, object, LoadMap(object),
879       PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
880       DescriptorIndexNameValue{JSRegExp::kSymbolMatchFunctionDescriptorIndex,
881                                RootIndex::kmatch_symbol,
882                                Context::REGEXP_MATCH_FUNCTION_INDEX},
883       if_isunmodified, if_ismodified);
884 }
885 
886 void RegExpBuiltinsAssembler::BranchIfFastRegExp_Strict(
887     TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
888     Label* if_ismodified) {
889   BranchIfFastRegExp(context, object, LoadMap(object),
890                      PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
891                      base::nullopt, if_isunmodified, if_ismodified);
892 }
893 
894 void RegExpBuiltinsAssembler::BranchIfFastRegExp_Permissive(
895     TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
896     Label* if_ismodified) {
897   BranchIfFastRegExp(context, object, LoadMap(object),
898                      PrototypeCheckAssembler::kCheckFull, base::nullopt,
899                      if_isunmodified, if_ismodified);
900 }
901 
902 void RegExpBuiltinsAssembler::BranchIfRegExpResult(const TNode<Context> context,
903                                                    const TNode<Object> object,
904                                                    Label* if_isunmodified,
905                                                    Label* if_ismodified) {
906   // Could be a Smi.
907   const TNode<Map> map = LoadReceiverMap(object);
908 
909   const TNode<NativeContext> native_context = LoadNativeContext(context);
910   const TNode<Object> initial_regexp_result_map =
911       LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);
912 
913   Label maybe_result_with_indices(this);
914   Branch(TaggedEqual(map, initial_regexp_result_map), if_isunmodified,
915          &maybe_result_with_indices);
916   BIND(&maybe_result_with_indices);
917   {
918     static_assert(
919         std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
920         "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
921     const TNode<Object> initial_regexp_result_with_indices_map =
922         LoadContextElement(native_context,
923                            Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX);
924     Branch(TaggedEqual(map, initial_regexp_result_with_indices_map),
925            if_isunmodified, if_ismodified);
926   }
927 }
928 
929 // Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
930 // and {match_info} is updated on success.
931 // The slow path is implemented in RegExp::AtomExec.
932 TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
933   auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
934   auto subject_string = Parameter<String>(Descriptor::kString);
935   auto last_index = Parameter<Smi>(Descriptor::kLastIndex);
936   auto match_info = Parameter<FixedArray>(Descriptor::kMatchInfo);
937   auto context = Parameter<Context>(Descriptor::kContext);
938 
939   CSA_DCHECK(this, TaggedIsPositiveSmi(last_index));
940 
941   TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
942   CSA_DCHECK(
943       this,
944       SmiEqual(CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kTagIndex)),
945                SmiConstant(JSRegExp::ATOM)));
946 
947   // Callers ensure that last_index is in-bounds.
948   CSA_DCHECK(this,
949              UintPtrLessThanOrEqual(SmiUntag(last_index),
950                                     LoadStringLengthAsWord(subject_string)));
951 
952   const TNode<String> needle_string =
953       CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex));
954 
955   // ATOM patterns are guaranteed to not be the empty string (these are
956   // intercepted and replaced in JSRegExp::Initialize.
957   //
958   // This is especially relevant for crbug.com/1075514: atom patterns are
959   // non-empty and thus guaranteed not to match at the end of the string.
960   CSA_DCHECK(this, IntPtrGreaterThan(LoadStringLengthAsWord(needle_string),
961                                      IntPtrConstant(0)));
962 
963   const TNode<Smi> match_from =
964       CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string,
965                        needle_string, last_index));
966 
967   Label if_failure(this), if_success(this);
968   Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);
969 
970   BIND(&if_success);
971   {
972     CSA_DCHECK(this, TaggedIsPositiveSmi(match_from));
973     CSA_DCHECK(this, UintPtrLessThan(SmiUntag(match_from),
974                                      LoadStringLengthAsWord(subject_string)));
975 
976     const int kNumRegisters = 2;
977     STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);
978 
979     const TNode<Smi> match_to =
980         SmiAdd(match_from, LoadStringLengthAsSmi(needle_string));
981 
982     UnsafeStoreFixedArrayElement(match_info,
983                                  RegExpMatchInfo::kNumberOfCapturesIndex,
984                                  SmiConstant(kNumRegisters));
985     UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
986                                  subject_string);
987     UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
988                                  subject_string);
989     UnsafeStoreFixedArrayElement(
990         match_info, RegExpMatchInfo::kFirstCaptureIndex, match_from);
991     UnsafeStoreFixedArrayElement(
992         match_info, RegExpMatchInfo::kFirstCaptureIndex + 1, match_to);
993 
994     Return(match_info);
995   }
996 
997   BIND(&if_failure);
998   Return(NullConstant());
999 }
1000 
TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler)1001 TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) {
1002   auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
1003   auto string = Parameter<String>(Descriptor::kString);
1004   auto last_index = Parameter<Number>(Descriptor::kLastIndex);
1005   auto match_info = Parameter<RegExpMatchInfo>(Descriptor::kMatchInfo);
1006   auto context = Parameter<Context>(Descriptor::kContext);
1007 
1008   CSA_DCHECK(this, IsNumberNormalized(last_index));
1009   CSA_DCHECK(this, IsNumberPositive(last_index));
1010 
1011   Return(RegExpExecInternal(context, regexp, string, last_index, match_info));
1012 }
1013 
FlagsGetter(TNode<Context> context, TNode<Object> regexp, bool is_fastpath)1014 TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
1015                                                    TNode<Object> regexp,
1016                                                    bool is_fastpath) {
1017   TVARIABLE(String, result);
1018   Label runtime(this, Label::kDeferred), done(this, &result);
1019   if (is_fastpath) {
1020     GotoIfForceSlowPath(&runtime);
1021   }
1022 
1023   Isolate* isolate = this->isolate();
1024 
1025   const TNode<IntPtrT> int_one = IntPtrConstant(1);
1026   TVARIABLE(Uint32T, var_length, Uint32Constant(0));
1027   TVARIABLE(IntPtrT, var_flags);
1028 
1029   // First, count the number of characters we will need and check which flags
1030   // are set.
1031 
1032   if (is_fastpath) {
1033     // Refer to JSRegExp's flag property on the fast-path.
1034     CSA_DCHECK(this, IsJSRegExp(CAST(regexp)));
1035     const TNode<Smi> flags_smi =
1036         CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
1037     var_flags = SmiUntag(flags_smi);
1038 
1039 #define CASE_FOR_FLAG(Lower, Camel, ...)                                \
1040   do {                                                                  \
1041     Label next(this);                                                   \
1042     GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1043     var_length = Uint32Add(var_length.value(), Uint32Constant(1));      \
1044     Goto(&next);                                                        \
1045     BIND(&next);                                                        \
1046   } while (false);
1047 
1048     REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1049 #undef CASE_FOR_FLAG
1050   } else {
1051     DCHECK(!is_fastpath);
1052 
1053     // Fall back to GetProperty stub on the slow-path.
1054     var_flags = IntPtrZero();
1055 
1056 #define CASE_FOR_FLAG(NAME, FLAG)                                          \
1057   do {                                                                     \
1058     Label next(this);                                                      \
1059     const TNode<Object> flag = GetProperty(                                \
1060         context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1061     Label if_isflagset(this);                                              \
1062     BranchIfToBooleanIsTrue(flag, &if_isflagset, &next);                   \
1063     BIND(&if_isflagset);                                                   \
1064     var_length = Uint32Add(var_length.value(), Uint32Constant(1));         \
1065     var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG)));   \
1066     Goto(&next);                                                           \
1067     BIND(&next);                                                           \
1068   } while (false)
1069 
1070     CASE_FOR_FLAG("global", JSRegExp::kGlobal);
1071     CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
1072     CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
1073     CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
1074     CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
1075     CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
1076     CASE_FOR_FLAG("hasIndices", JSRegExp::kHasIndices);
1077 #undef CASE_FOR_FLAG
1078 
1079 #define CASE_FOR_FLAG(NAME, V8_FLAG_EXTERN_REF, FLAG)                      \
1080   do {                                                                     \
1081     Label next(this);                                                      \
1082     TNode<Word32T> flag_value = UncheckedCast<Word32T>(                    \
1083         Load(MachineType::Uint8(), ExternalConstant(V8_FLAG_EXTERN_REF))); \
1084     GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)),         \
1085                        Int32Constant(0)),                                  \
1086            &next);                                                         \
1087     const TNode<Object> flag = GetProperty(                                \
1088         context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1089     Label if_isflagset(this);                                              \
1090     BranchIfToBooleanIsTrue(flag, &if_isflagset, &next);                   \
1091     BIND(&if_isflagset);                                                   \
1092     var_length = Uint32Add(var_length.value(), Uint32Constant(1));         \
1093     var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG)));   \
1094     Goto(&next);                                                           \
1095     BIND(&next);                                                           \
1096   } while (false)
1097 
1098     CASE_FOR_FLAG(
1099         "linear",
1100         ExternalReference::address_of_enable_experimental_regexp_engine(),
1101         JSRegExp::kLinear);
1102 #undef CASE_FOR_FLAG
1103   }
1104 
1105   // Allocate a string of the required length and fill it with the
1106   // corresponding char for each set flag.
1107 
1108   {
1109     const TNode<String> string = AllocateSeqOneByteString(var_length.value());
1110 
1111     TVARIABLE(IntPtrT, var_offset,
1112               IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
1113 
1114 #define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...)              \
1115   do {                                                                  \
1116     Label next(this);                                                   \
1117     GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1118     const TNode<Int32T> value = Int32Constant(Char);                    \
1119     StoreNoWriteBarrier(MachineRepresentation::kWord8, string,          \
1120                         var_offset.value(), value);                     \
1121     var_offset = IntPtrAdd(var_offset.value(), int_one);                \
1122     Goto(&next);                                                        \
1123     BIND(&next);                                                        \
1124   } while (false);
1125 
1126     REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1127 #undef CASE_FOR_FLAG
1128 
1129     if (is_fastpath) {
1130 #ifdef V8_ENABLE_FORCE_SLOW_PATH
1131       result = string;
1132       Goto(&done);
1133 
1134       BIND(&runtime);
1135       {
1136         result =
1137             CAST(CallRuntime(Runtime::kRegExpStringFromFlags, context, regexp));
1138         Goto(&done);
1139       }
1140 
1141       BIND(&done);
1142       return result.value();
1143 #else
1144       return string;
1145 #endif
1146     } else {
1147       return string;
1148     }
1149   }
1150 }
1151 
1152 // ES#sec-regexpinitialize
1153 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
RegExpInitialize( const TNode<Context> context, const TNode<JSRegExp> regexp, const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags)1154 TNode<Object> RegExpBuiltinsAssembler::RegExpInitialize(
1155     const TNode<Context> context, const TNode<JSRegExp> regexp,
1156     const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags) {
1157   // Normalize pattern.
1158   const TNode<Object> pattern = Select<Object>(
1159       IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); },
1160       [=] { return ToString_Inline(context, maybe_pattern); });
1161 
1162   // Normalize flags.
1163   const TNode<Object> flags = Select<Object>(
1164       IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); },
1165       [=] { return ToString_Inline(context, maybe_flags); });
1166 
1167   // Initialize.
1168 
1169   return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
1170                      pattern, flags);
1171 }
1172 
1173 // ES#sec-regexp-pattern-flags
1174 // RegExp ( pattern, flags )
TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler)1175 TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) {
1176   auto pattern = Parameter<Object>(Descriptor::kPattern);
1177   auto flags = Parameter<Object>(Descriptor::kFlags);
1178   auto new_target = Parameter<Object>(Descriptor::kJSNewTarget);
1179   auto context = Parameter<Context>(Descriptor::kContext);
1180 
1181   Isolate* isolate = this->isolate();
1182 
1183   TVARIABLE(Object, var_flags, flags);
1184   TVARIABLE(Object, var_pattern, pattern);
1185   TVARIABLE(Object, var_new_target, new_target);
1186 
1187   TNode<NativeContext> native_context = LoadNativeContext(context);
1188   TNode<JSFunction> regexp_function =
1189       CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
1190 
1191   TNode<BoolT> pattern_is_regexp = IsRegExp(context, pattern);
1192 
1193   {
1194     Label next(this);
1195 
1196     GotoIfNot(IsUndefined(new_target), &next);
1197     var_new_target = regexp_function;
1198 
1199     GotoIfNot(pattern_is_regexp, &next);
1200     GotoIfNot(IsUndefined(flags), &next);
1201 
1202     TNode<Object> value =
1203         GetProperty(context, pattern, isolate->factory()->constructor_string());
1204 
1205     GotoIfNot(TaggedEqual(value, regexp_function), &next);
1206     Return(pattern);
1207 
1208     BIND(&next);
1209   }
1210 
1211   {
1212     Label next(this), if_patternisfastregexp(this),
1213         if_patternisslowregexp(this);
1214     GotoIf(TaggedIsSmi(pattern), &next);
1215 
1216     GotoIf(IsJSRegExp(CAST(pattern)), &if_patternisfastregexp);
1217 
1218     Branch(pattern_is_regexp, &if_patternisslowregexp, &next);
1219 
1220     BIND(&if_patternisfastregexp);
1221     {
1222       TNode<Object> source =
1223           LoadObjectField(CAST(pattern), JSRegExp::kSourceOffset);
1224       var_pattern = source;
1225 
1226       {
1227         Label inner_next(this);
1228         GotoIfNot(IsUndefined(flags), &inner_next);
1229 
1230         var_flags = FlagsGetter(context, pattern, true);
1231         Goto(&inner_next);
1232 
1233         BIND(&inner_next);
1234       }
1235 
1236       Goto(&next);
1237     }
1238 
1239     BIND(&if_patternisslowregexp);
1240     {
1241       var_pattern =
1242           GetProperty(context, pattern, isolate->factory()->source_string());
1243 
1244       {
1245         Label inner_next(this);
1246         GotoIfNot(IsUndefined(flags), &inner_next);
1247 
1248         var_flags =
1249             GetProperty(context, pattern, isolate->factory()->flags_string());
1250         Goto(&inner_next);
1251 
1252         BIND(&inner_next);
1253       }
1254 
1255       Goto(&next);
1256     }
1257 
1258     BIND(&next);
1259   }
1260 
1261   // Allocate.
1262 
1263   TVARIABLE(JSRegExp, var_regexp);
1264   {
1265     Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred),
1266         next(this);
1267     Branch(TaggedEqual(var_new_target.value(), regexp_function),
1268            &allocate_jsregexp, &allocate_generic);
1269 
1270     BIND(&allocate_jsregexp);
1271     {
1272       const TNode<Map> initial_map = CAST(LoadObjectField(
1273           regexp_function, JSFunction::kPrototypeOrInitialMapOffset));
1274       var_regexp = CAST(AllocateJSObjectFromMap(initial_map));
1275       Goto(&next);
1276     }
1277 
1278     BIND(&allocate_generic);
1279     {
1280       ConstructorBuiltinsAssembler constructor_assembler(this->state());
1281       var_regexp = CAST(constructor_assembler.FastNewObject(
1282           context, regexp_function, CAST(var_new_target.value())));
1283       Goto(&next);
1284     }
1285 
1286     BIND(&next);
1287   }
1288 
1289   const TNode<Object> result = RegExpInitialize(
1290       context, var_regexp.value(), var_pattern.value(), var_flags.value());
1291   Return(result);
1292 }
1293 
1294 // ES#sec-regexp.prototype.compile
1295 // RegExp.prototype.compile ( pattern, flags )
TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler)1296 TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) {
1297   auto maybe_receiver = Parameter<Object>(Descriptor::kReceiver);
1298   auto maybe_pattern = Parameter<Object>(Descriptor::kPattern);
1299   auto maybe_flags = Parameter<Object>(Descriptor::kFlags);
1300   auto context = Parameter<Context>(Descriptor::kContext);
1301 
1302   ThrowIfNotInstanceType(context, maybe_receiver, JS_REG_EXP_TYPE,
1303                          "RegExp.prototype.compile");
1304   const TNode<JSRegExp> receiver = CAST(maybe_receiver);
1305 
1306   TVARIABLE(Object, var_flags, maybe_flags);
1307   TVARIABLE(Object, var_pattern, maybe_pattern);
1308 
1309   // Handle a JSRegExp pattern.
1310   {
1311     Label next(this);
1312 
1313     GotoIf(TaggedIsSmi(maybe_pattern), &next);
1314     GotoIfNot(IsJSRegExp(CAST(maybe_pattern)), &next);
1315 
1316     // {maybe_flags} must be undefined in this case, otherwise throw.
1317     {
1318       Label maybe_flags_is_undefined(this);
1319       GotoIf(IsUndefined(maybe_flags), &maybe_flags_is_undefined);
1320 
1321       ThrowTypeError(context, MessageTemplate::kRegExpFlags);
1322 
1323       BIND(&maybe_flags_is_undefined);
1324     }
1325 
1326     const TNode<JSRegExp> pattern = CAST(maybe_pattern);
1327     const TNode<String> new_flags = FlagsGetter(context, pattern, true);
1328     const TNode<Object> new_pattern =
1329         LoadObjectField(pattern, JSRegExp::kSourceOffset);
1330 
1331     var_flags = new_flags;
1332     var_pattern = new_pattern;
1333 
1334     Goto(&next);
1335     BIND(&next);
1336   }
1337 
1338   const TNode<Object> result = RegExpInitialize(
1339       context, receiver, var_pattern.value(), var_flags.value());
1340   Return(result);
1341 }
1342 
1343 // Fast-path implementation for flag checks on an unmodified JSRegExp instance.
FastFlagGetter(TNode<JSRegExp> regexp, JSRegExp::Flag flag)1344 TNode<BoolT> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp,
1345                                                      JSRegExp::Flag flag) {
1346   TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
1347   TNode<Smi> mask = SmiConstant(flag);
1348   return ReinterpretCast<BoolT>(SmiToInt32(
1349       SmiShr(SmiAnd(flags, mask),
1350              base::bits::CountTrailingZeros(static_cast<int>(flag)))));
1351 }
1352 
1353 // Load through the GetProperty stub.
SlowFlagGetter(TNode<Context> context, TNode<Object> regexp, JSRegExp::Flag flag)1354 TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
1355                                                      TNode<Object> regexp,
1356                                                      JSRegExp::Flag flag) {
1357   Label out(this), if_true(this), if_false(this);
1358   TVARIABLE(BoolT, var_result);
1359 
1360   // Only enabled based on a runtime flag.
1361   if (flag == JSRegExp::kLinear) {
1362     TNode<Word32T> flag_value = UncheckedCast<Word32T>(Load(
1363         MachineType::Uint8(),
1364         ExternalConstant(ExternalReference::
1365                              address_of_enable_experimental_regexp_engine())));
1366     GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)),
1367                        Int32Constant(0)),
1368            &if_false);
1369   }
1370 
1371   Handle<String> name;
1372   switch (flag) {
1373     case JSRegExp::kNone:
1374       UNREACHABLE();
1375 #define V(Lower, Camel, LowerCamel, Char, Bit)          \
1376   case JSRegExp::k##Camel:                              \
1377     name = isolate()->factory()->LowerCamel##_string(); \
1378     break;
1379       REGEXP_FLAG_LIST(V)
1380 #undef V
1381   }
1382 
1383   TNode<Object> value = GetProperty(context, regexp, name);
1384   BranchIfToBooleanIsTrue(value, &if_true, &if_false);
1385 
1386   BIND(&if_true);
1387   var_result = BoolConstant(true);
1388   Goto(&out);
1389 
1390   BIND(&if_false);
1391   var_result = BoolConstant(false);
1392   Goto(&out);
1393 
1394   BIND(&out);
1395   return var_result.value();
1396 }
1397 
FlagGetter(TNode<Context> context, TNode<Object> regexp, JSRegExp::Flag flag, bool is_fastpath)1398 TNode<BoolT> RegExpBuiltinsAssembler::FlagGetter(TNode<Context> context,
1399                                                  TNode<Object> regexp,
1400                                                  JSRegExp::Flag flag,
1401                                                  bool is_fastpath) {
1402   return is_fastpath ? FastFlagGetter(CAST(regexp), flag)
1403                      : SlowFlagGetter(context, regexp, flag);
1404 }
1405 
AdvanceStringIndex( TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode, bool is_fastpath)1406 TNode<Number> RegExpBuiltinsAssembler::AdvanceStringIndex(
1407     TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode,
1408     bool is_fastpath) {
1409   CSA_DCHECK(this, IsNumberNormalized(index));
1410   if (is_fastpath) CSA_DCHECK(this, TaggedIsPositiveSmi(index));
1411 
1412   // Default to last_index + 1.
1413   // TODO(pwong): Consider using TrySmiAdd for the fast path to reduce generated
1414   // code.
1415   TNode<Number> index_plus_one = NumberInc(index);
1416   TVARIABLE(Number, var_result, index_plus_one);
1417 
1418   // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT
1419   // anyway, consider using UintPtrT index to simplify the code below.
1420 
1421   // Advancing the index has some subtle issues involving the distinction
1422   // between Smis and HeapNumbers. There's three cases:
1423   // * {index} is a Smi, {index_plus_one} is a Smi. The standard case.
1424   // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber.
1425   //   In this case we can return the result early, because
1426   //   {index_plus_one} > {string}.length.
1427   // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only
1428   //   occur when {index} is outside the Smi range since we normalize
1429   //   explicitly. Again we can return early.
1430   if (is_fastpath) {
1431     // Must be in Smi range on the fast path. We control the value of {index}
1432     // on all call-sites and can never exceed the length of the string.
1433     STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue);
1434     CSA_DCHECK(this, TaggedIsPositiveSmi(index_plus_one));
1435   }
1436 
1437   Label if_isunicode(this), out(this);
1438   GotoIfNot(is_unicode, &out);
1439 
1440   // Keep this unconditional (even on the fast path) just to be safe.
1441   Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out);
1442 
1443   BIND(&if_isunicode);
1444   {
1445     TNode<UintPtrT> string_length = Unsigned(LoadStringLengthAsWord(string));
1446     TNode<UintPtrT> untagged_plus_one =
1447         Unsigned(SmiUntag(CAST(index_plus_one)));
1448     GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out);
1449 
1450     TNode<Int32T> lead =
1451         StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index))));
1452     GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)),
1453                           Int32Constant(0xD800)),
1454               &out);
1455 
1456     TNode<Int32T> trail = StringCharCodeAt(string, untagged_plus_one);
1457     GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)),
1458                           Int32Constant(0xDC00)),
1459               &out);
1460 
1461     // At a surrogate pair, return index + 2.
1462     TNode<Number> index_plus_two = NumberInc(index_plus_one);
1463     var_result = index_plus_two;
1464 
1465     Goto(&out);
1466   }
1467 
1468   BIND(&out);
1469   return var_result.value();
1470 }
1471 
1472 // ES#sec-createregexpstringiterator
1473 // CreateRegExpStringIterator ( R, S, global, fullUnicode )
CreateRegExpStringIterator( TNode<NativeContext> native_context, TNode<Object> regexp, TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode)1474 TNode<Object> RegExpMatchAllAssembler::CreateRegExpStringIterator(
1475     TNode<NativeContext> native_context, TNode<Object> regexp,
1476     TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode) {
1477   TNode<Map> map = CAST(LoadContextElement(
1478       native_context,
1479       Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX));
1480 
1481   // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, «
1482   // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]],
1483   // [[Done]] »).
1484   TNode<HeapObject> iterator = Allocate(JSRegExpStringIterator::kHeaderSize);
1485   StoreMapNoWriteBarrier(iterator, map);
1486   StoreObjectFieldRoot(iterator,
1487                        JSRegExpStringIterator::kPropertiesOrHashOffset,
1488                        RootIndex::kEmptyFixedArray);
1489   StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset,
1490                        RootIndex::kEmptyFixedArray);
1491 
1492   // 5. Set iterator.[[IteratingRegExp]] to R.
1493   StoreObjectFieldNoWriteBarrier(
1494       iterator, JSRegExpStringIterator::kIteratingRegExpOffset, regexp);
1495 
1496   // 6. Set iterator.[[IteratedString]] to S.
1497   StoreObjectFieldNoWriteBarrier(
1498       iterator, JSRegExpStringIterator::kIteratedStringOffset, string);
1499 
1500   // 7. Set iterator.[[Global]] to global.
1501   // 8. Set iterator.[[Unicode]] to fullUnicode.
1502   // 9. Set iterator.[[Done]] to false.
1503   TNode<Int32T> global_flag =
1504       Word32Shl(ReinterpretCast<Int32T>(global),
1505                 Int32Constant(JSRegExpStringIterator::GlobalBit::kShift));
1506   TNode<Int32T> unicode_flag =
1507       Word32Shl(ReinterpretCast<Int32T>(full_unicode),
1508                 Int32Constant(JSRegExpStringIterator::UnicodeBit::kShift));
1509   TNode<Int32T> iterator_flags = Word32Or(global_flag, unicode_flag);
1510   StoreObjectFieldNoWriteBarrier(iterator, JSRegExpStringIterator::kFlagsOffset,
1511                                  SmiFromInt32(iterator_flags));
1512 
1513   return iterator;
1514 }
1515 
1516 // Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
1517 // JSRegExp, {string} is a String, and {limit} is a Smi.
RegExpPrototypeSplitBody( TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, const TNode<Smi> limit)1518 TNode<JSArray> RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(
1519     TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
1520     const TNode<Smi> limit) {
1521   CSA_DCHECK(this, IsFastRegExpPermissive(context, regexp));
1522   CSA_DCHECK(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
1523 
1524   const TNode<IntPtrT> int_limit = SmiUntag(limit);
1525 
1526   const ElementsKind kind = PACKED_ELEMENTS;
1527 
1528   const TNode<NativeContext> native_context = LoadNativeContext(context);
1529   TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1530 
1531   Label return_empty_array(this, Label::kDeferred);
1532   TVARIABLE(JSArray, var_result);
1533   Label done(this);
1534 
1535   // If limit is zero, return an empty array.
1536   {
1537     Label next(this), if_limitiszero(this, Label::kDeferred);
1538     Branch(SmiEqual(limit, SmiZero()), &return_empty_array, &next);
1539     BIND(&next);
1540   }
1541 
1542   const TNode<Smi> string_length = LoadStringLengthAsSmi(string);
1543 
1544   // If passed the empty {string}, return either an empty array or a singleton
1545   // array depending on whether the {regexp} matches.
1546   {
1547     Label next(this), if_stringisempty(this, Label::kDeferred);
1548     Branch(SmiEqual(string_length, SmiZero()), &if_stringisempty, &next);
1549 
1550     BIND(&if_stringisempty);
1551     {
1552       const TNode<Object> last_match_info = LoadContextElement(
1553           native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1554 
1555       const TNode<Object> match_indices =
1556           CallBuiltin(Builtin::kRegExpExecInternal, context, regexp, string,
1557                       SmiZero(), last_match_info);
1558 
1559       Label return_singleton_array(this);
1560       Branch(IsNull(match_indices), &return_singleton_array,
1561              &return_empty_array);
1562 
1563       BIND(&return_singleton_array);
1564       {
1565         TNode<Smi> length = SmiConstant(1);
1566         TNode<IntPtrT> capacity = IntPtrConstant(1);
1567         base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1568         var_result =
1569             AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1570 
1571         TNode<FixedArray> fixed_array = CAST(LoadElements(var_result.value()));
1572         UnsafeStoreFixedArrayElement(fixed_array, 0, string);
1573 
1574         Goto(&done);
1575       }
1576     }
1577 
1578     BIND(&next);
1579   }
1580 
1581   // Loop preparations.
1582 
1583   GrowableFixedArray array(state());
1584 
1585   TVARIABLE(Smi, var_last_matched_until, SmiZero());
1586   TVARIABLE(Smi, var_next_search_from, SmiZero());
1587 
1588   Label loop(this, {array.var_array(), array.var_length(), array.var_capacity(),
1589                     &var_last_matched_until, &var_next_search_from}),
1590       push_suffix_and_out(this), out(this);
1591   Goto(&loop);
1592 
1593   BIND(&loop);
1594   {
1595     const TNode<Smi> next_search_from = var_next_search_from.value();
1596     const TNode<Smi> last_matched_until = var_last_matched_until.value();
1597 
1598     // We're done if we've reached the end of the string.
1599     {
1600       Label next(this);
1601       Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out,
1602              &next);
1603       BIND(&next);
1604     }
1605 
1606     // Search for the given {regexp}.
1607 
1608     const TNode<Object> last_match_info = LoadContextElement(
1609         native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1610 
1611     const TNode<HeapObject> match_indices_ho = RegExpExecInternal(
1612         context, regexp, string, next_search_from, CAST(last_match_info),
1613         RegExp::ExecQuirks::kTreatMatchAtEndAsFailure);
1614 
1615     // We're done if no match was found.
1616     {
1617       Label next(this);
1618       Branch(IsNull(match_indices_ho), &push_suffix_and_out, &next);
1619       BIND(&next);
1620     }
1621 
1622     TNode<FixedArray> match_indices = CAST(match_indices_ho);
1623     const TNode<Smi> match_from = CAST(UnsafeLoadFixedArrayElement(
1624         match_indices, RegExpMatchInfo::kFirstCaptureIndex));
1625     const TNode<Smi> match_to = CAST(UnsafeLoadFixedArrayElement(
1626         match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1));
1627     CSA_DCHECK(this, SmiNotEqual(match_from, string_length));
1628 
1629     // Advance index and continue if the match is empty.
1630     {
1631       Label next(this);
1632 
1633       GotoIfNot(SmiEqual(match_to, next_search_from), &next);
1634       GotoIfNot(SmiEqual(match_to, last_matched_until), &next);
1635 
1636       const TNode<BoolT> is_unicode =
1637           FastFlagGetter(regexp, JSRegExp::kUnicode);
1638       const TNode<Number> new_next_search_from =
1639           AdvanceStringIndex(string, next_search_from, is_unicode, true);
1640       var_next_search_from = CAST(new_next_search_from);
1641       Goto(&loop);
1642 
1643       BIND(&next);
1644     }
1645 
1646     // A valid match was found, add the new substring to the array.
1647     {
1648       const TNode<Smi> from = last_matched_until;
1649       const TNode<Smi> to = match_from;
1650       array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1651       GotoIf(WordEqual(array.length(), int_limit), &out);
1652     }
1653 
1654     // Add all captures to the array.
1655     {
1656       const TNode<Smi> num_registers = CAST(LoadFixedArrayElement(
1657           match_indices, RegExpMatchInfo::kNumberOfCapturesIndex));
1658       const TNode<IntPtrT> int_num_registers = SmiUntag(num_registers);
1659 
1660       TVARIABLE(IntPtrT, var_reg, IntPtrConstant(2));
1661 
1662       Label nested_loop(this, {array.var_array(), array.var_length(),
1663                                array.var_capacity(), &var_reg}),
1664           nested_loop_out(this);
1665       Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop,
1666              &nested_loop_out);
1667 
1668       BIND(&nested_loop);
1669       {
1670         const TNode<IntPtrT> reg = var_reg.value();
1671         const TNode<Object> from = LoadFixedArrayElement(
1672             match_indices, reg,
1673             RegExpMatchInfo::kFirstCaptureIndex * kTaggedSize);
1674         const TNode<Smi> to = CAST(LoadFixedArrayElement(
1675             match_indices, reg,
1676             (RegExpMatchInfo::kFirstCaptureIndex + 1) * kTaggedSize));
1677 
1678         Label select_capture(this), select_undefined(this), store_value(this);
1679         TVARIABLE(Object, var_value);
1680         Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined,
1681                &select_capture);
1682 
1683         BIND(&select_capture);
1684         {
1685           var_value =
1686               CallBuiltin(Builtin::kSubString, context, string, from, to);
1687           Goto(&store_value);
1688         }
1689 
1690         BIND(&select_undefined);
1691         {
1692           var_value = UndefinedConstant();
1693           Goto(&store_value);
1694         }
1695 
1696         BIND(&store_value);
1697         {
1698           array.Push(var_value.value());
1699           GotoIf(WordEqual(array.length(), int_limit), &out);
1700 
1701           const TNode<IntPtrT> new_reg = IntPtrAdd(reg, IntPtrConstant(2));
1702           var_reg = new_reg;
1703 
1704           Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
1705                  &nested_loop_out);
1706         }
1707       }
1708 
1709       BIND(&nested_loop_out);
1710     }
1711 
1712     var_last_matched_until = match_to;
1713     var_next_search_from = match_to;
1714     Goto(&loop);
1715   }
1716 
1717   BIND(&push_suffix_and_out);
1718   {
1719     const TNode<Smi> from = var_last_matched_until.value();
1720     const TNode<Smi> to = string_length;
1721     array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1722     Goto(&out);
1723   }
1724 
1725   BIND(&out);
1726   {
1727     var_result = array.ToJSArray(context);
1728     Goto(&done);
1729   }
1730 
1731   BIND(&return_empty_array);
1732   {
1733     TNode<Smi> length = SmiZero();
1734     TNode<IntPtrT> capacity = IntPtrZero();
1735     base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1736     var_result =
1737         AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1738     Goto(&done);
1739   }
1740 
1741   BIND(&done);
1742   return var_result.value();
1743 }
1744 
1745 }  // namespace internal
1746 }  // namespace v8
1747