1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-regexp-gen.h"
6
7 #include "src/builtins/builtins-constructor-gen.h"
8 #include "src/builtins/builtins-utils-gen.h"
9 #include "src/builtins/builtins.h"
10 #include "src/builtins/growable-fixed-array-gen.h"
11 #include "src/codegen/code-factory.h"
12 #include "src/codegen/code-stub-assembler.h"
13 #include "src/codegen/macro-assembler.h"
14 #include "src/common/globals.h"
15 #include "src/execution/protectors.h"
16 #include "src/heap/factory-inl.h"
17 #include "src/logging/counters.h"
18 #include "src/objects/js-regexp-string-iterator.h"
19 #include "src/objects/js-regexp.h"
20 #include "src/objects/regexp-match-info.h"
21 #include "src/regexp/regexp-flags.h"
22
23 namespace v8 {
24 namespace internal {
25
26 // Tail calls the regular expression interpreter.
27 // static
Generate_RegExpInterpreterTrampoline(MacroAssembler* masm)28 void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
29 ExternalReference interpreter_code_entry =
30 ExternalReference::re_match_for_call_from_js();
31 masm->Jump(interpreter_code_entry);
32 }
33
34 // Tail calls the experimental regular expression engine.
35 // static
Generate_RegExpExperimentalTrampoline(MacroAssembler* masm)36 void Builtins::Generate_RegExpExperimentalTrampoline(MacroAssembler* masm) {
37 ExternalReference interpreter_code_entry =
38 ExternalReference::re_experimental_match_for_call_from_js();
39 masm->Jump(interpreter_code_entry);
40 }
41
SmiZero()42 TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); }
43
IntPtrZero()44 TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() {
45 return IntPtrConstant(0);
46 }
47
48 // If code is a builtin, return the address to the (possibly embedded) builtin
49 // code entry, otherwise return the entry of the code object itself.
LoadCodeObjectEntry(TNode<CodeT> code)50 TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<CodeT> code) {
51 if (V8_EXTERNAL_CODE_SPACE_BOOL) {
52 // When external code space is enabled we can load the entry point directly
53 // from the CodeT object.
54 return GetCodeEntry(code);
55 }
56
57 TVARIABLE(RawPtrT, var_result);
58
59 Label if_code_is_off_heap(this), out(this);
60 TNode<Int32T> builtin_index =
61 LoadObjectField<Int32T>(code, Code::kBuiltinIndexOffset);
62 {
63 GotoIfNot(
64 Word32Equal(builtin_index,
65 Int32Constant(static_cast<int>(Builtin::kNoBuiltinId))),
66 &if_code_is_off_heap);
67 var_result = ReinterpretCast<RawPtrT>(
68 IntPtrAdd(BitcastTaggedToWord(code),
69 IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
70 Goto(&out);
71 }
72
73 BIND(&if_code_is_off_heap);
74 {
75 TNode<IntPtrT> builtin_entry_offset_from_isolate_root =
76 IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()),
77 ChangeInt32ToIntPtr(Word32Shl(
78 builtin_index, Int32Constant(kSystemPointerSizeLog2))));
79
80 var_result = ReinterpretCast<RawPtrT>(
81 Load(MachineType::Pointer(),
82 ExternalConstant(ExternalReference::isolate_root(isolate())),
83 builtin_entry_offset_from_isolate_root));
84 Goto(&out);
85 }
86
87 BIND(&out);
88 return var_result.value();
89 }
90
91 // -----------------------------------------------------------------------------
92 // ES6 section 21.2 RegExp Objects
93
AllocateRegExpResult( TNode<Context> context, TNode<Smi> length, TNode<Smi> index, TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index, TNode<BoolT> has_indices, TNode<FixedArray>* elements_out)94 TNode<JSRegExpResult> RegExpBuiltinsAssembler::AllocateRegExpResult(
95 TNode<Context> context, TNode<Smi> length, TNode<Smi> index,
96 TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index,
97 TNode<BoolT> has_indices, TNode<FixedArray>* elements_out) {
98 CSA_DCHECK(this, SmiLessThanOrEqual(
99 length, SmiConstant(JSArray::kMaxFastArrayLength)));
100 CSA_DCHECK(this, SmiGreaterThan(length, SmiConstant(0)));
101
102 // Allocate.
103
104 Label result_has_indices(this), allocated(this);
105 const ElementsKind elements_kind = PACKED_ELEMENTS;
106 base::Optional<TNode<AllocationSite>> no_gc_site = base::nullopt;
107 TNode<IntPtrT> length_intptr = SmiUntag(length);
108 // Note: The returned `var_elements` may be in young large object space, but
109 // `var_array` is guaranteed to be in new space so we could skip write
110 // barriers below.
111 TVARIABLE(JSArray, var_array);
112 TVARIABLE(FixedArrayBase, var_elements);
113
114 GotoIf(has_indices, &result_has_indices);
115 {
116 TNode<Map> map = CAST(LoadContextElement(LoadNativeContext(context),
117 Context::REGEXP_RESULT_MAP_INDEX));
118 std::tie(var_array, var_elements) =
119 AllocateUninitializedJSArrayWithElements(
120 elements_kind, map, length, no_gc_site, length_intptr,
121 AllocationFlag::kAllowLargeObjectAllocation, JSRegExpResult::kSize);
122 Goto(&allocated);
123 }
124
125 BIND(&result_has_indices);
126 {
127 TNode<Map> map =
128 CAST(LoadContextElement(LoadNativeContext(context),
129 Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX));
130 std::tie(var_array, var_elements) =
131 AllocateUninitializedJSArrayWithElements(
132 elements_kind, map, length, no_gc_site, length_intptr,
133 AllocationFlag::kAllowLargeObjectAllocation,
134 JSRegExpResultWithIndices::kSize);
135 Goto(&allocated);
136 }
137
138 BIND(&allocated);
139
140 // Finish result initialization.
141
142 TNode<JSRegExpResult> result =
143 UncheckedCast<JSRegExpResult>(var_array.value());
144
145 // Load undefined value once here to avoid multiple LoadRoots.
146 TNode<Oddball> undefined_value = UncheckedCast<Oddball>(
147 CodeAssembler::LoadRoot(RootIndex::kUndefinedValue));
148
149 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index);
150 // TODO(jgruber,turbofan): Could skip barrier but the MemoryOptimizer
151 // complains.
152 StoreObjectField(result, JSRegExpResult::kInputOffset, input);
153 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset,
154 undefined_value);
155 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kNamesOffset,
156 undefined_value);
157
158 StoreObjectField(result, JSRegExpResult::kRegexpInputOffset, input);
159
160 // If non-smi last_index then store an SmiZero instead.
161 {
162 TNode<Smi> last_index_smi = Select<Smi>(
163 TaggedIsSmi(last_index), [=] { return CAST(last_index); },
164 [=] { return SmiZero(); });
165 StoreObjectField(result, JSRegExpResult::kRegexpLastIndexOffset,
166 last_index_smi);
167 }
168
169 Label finish_initialization(this);
170 GotoIfNot(has_indices, &finish_initialization);
171 {
172 static_assert(
173 std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
174 "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
175 StoreObjectFieldNoWriteBarrier(
176 result, JSRegExpResultWithIndices::kIndicesOffset, undefined_value);
177 Goto(&finish_initialization);
178 }
179
180 BIND(&finish_initialization);
181
182 // Finish elements initialization.
183
184 FillFixedArrayWithValue(elements_kind, var_elements.value(), IntPtrZero(),
185 length_intptr, RootIndex::kUndefinedValue);
186
187 if (elements_out) *elements_out = CAST(var_elements.value());
188 return result;
189 }
190
FastLoadLastIndexBeforeSmiCheck( TNode<JSRegExp> regexp)191 TNode<Object> RegExpBuiltinsAssembler::FastLoadLastIndexBeforeSmiCheck(
192 TNode<JSRegExp> regexp) {
193 // Load the in-object field.
194 static const int field_offset =
195 JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
196 return LoadObjectField(regexp, field_offset);
197 }
198
SlowLoadLastIndex(TNode<Context> context, TNode<Object> regexp)199 TNode<Object> RegExpBuiltinsAssembler::SlowLoadLastIndex(TNode<Context> context,
200 TNode<Object> regexp) {
201 return GetProperty(context, regexp, isolate()->factory()->lastIndex_string());
202 }
203
204 // The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
205 // JSRegExp instance.
FastStoreLastIndex(TNode<JSRegExp> regexp, TNode<Smi> value)206 void RegExpBuiltinsAssembler::FastStoreLastIndex(TNode<JSRegExp> regexp,
207 TNode<Smi> value) {
208 // Store the in-object field.
209 static const int field_offset =
210 JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
211 StoreObjectField(regexp, field_offset, value);
212 }
213
SlowStoreLastIndex(TNode<Context> context, TNode<Object> regexp, TNode<Object> value)214 void RegExpBuiltinsAssembler::SlowStoreLastIndex(TNode<Context> context,
215 TNode<Object> regexp,
216 TNode<Object> value) {
217 TNode<String> name = HeapConstant(isolate()->factory()->lastIndex_string());
218 SetPropertyStrict(context, regexp, name, value);
219 }
220
ConstructNewResultFromMatchInfo( TNode<Context> context, TNode<JSRegExp> regexp, TNode<RegExpMatchInfo> match_info, TNode<String> string, TNode<Number> last_index)221 TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
222 TNode<Context> context, TNode<JSRegExp> regexp,
223 TNode<RegExpMatchInfo> match_info, TNode<String> string,
224 TNode<Number> last_index) {
225 Label named_captures(this), maybe_build_indices(this), out(this);
226
227 TNode<IntPtrT> num_indices = SmiUntag(CAST(UnsafeLoadFixedArrayElement(
228 match_info, RegExpMatchInfo::kNumberOfCapturesIndex)));
229 TNode<Smi> num_results = SmiTag(WordShr(num_indices, 1));
230 TNode<Smi> start = CAST(UnsafeLoadFixedArrayElement(
231 match_info, RegExpMatchInfo::kFirstCaptureIndex));
232 TNode<Smi> end = CAST(UnsafeLoadFixedArrayElement(
233 match_info, RegExpMatchInfo::kFirstCaptureIndex + 1));
234
235 // Calculate the substring of the first match before creating the result array
236 // to avoid an unnecessary write barrier storing the first result.
237
238 TNode<String> first =
239 CAST(CallBuiltin(Builtin::kSubString, context, string, start, end));
240
241 // Load flags and check if the result object needs to have indices.
242 const TNode<Smi> flags =
243 CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
244 const TNode<BoolT> has_indices = IsSetSmi(flags, JSRegExp::kHasIndices);
245 TNode<FixedArray> result_elements;
246 TNode<JSRegExpResult> result =
247 AllocateRegExpResult(context, num_results, start, string, regexp,
248 last_index, has_indices, &result_elements);
249
250 UnsafeStoreFixedArrayElement(result_elements, 0, first);
251
252 // If no captures exist we can skip named capture handling as well.
253 GotoIf(SmiEqual(num_results, SmiConstant(1)), &maybe_build_indices);
254
255 // Store all remaining captures.
256 TNode<IntPtrT> limit = IntPtrAdd(
257 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices);
258
259 TVARIABLE(IntPtrT, var_from_cursor,
260 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2));
261 TVARIABLE(IntPtrT, var_to_cursor, IntPtrConstant(1));
262
263 Label loop(this, {&var_from_cursor, &var_to_cursor});
264
265 Goto(&loop);
266 BIND(&loop);
267 {
268 TNode<IntPtrT> from_cursor = var_from_cursor.value();
269 TNode<IntPtrT> to_cursor = var_to_cursor.value();
270 TNode<Smi> start_cursor =
271 CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor));
272
273 Label next_iter(this);
274 GotoIf(SmiEqual(start_cursor, SmiConstant(-1)), &next_iter);
275
276 TNode<IntPtrT> from_cursor_plus1 =
277 IntPtrAdd(from_cursor, IntPtrConstant(1));
278 TNode<Smi> end_cursor =
279 CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor_plus1));
280
281 TNode<String> capture = CAST(CallBuiltin(Builtin::kSubString, context,
282 string, start_cursor, end_cursor));
283 UnsafeStoreFixedArrayElement(result_elements, to_cursor, capture);
284 Goto(&next_iter);
285
286 BIND(&next_iter);
287 var_from_cursor = IntPtrAdd(from_cursor, IntPtrConstant(2));
288 var_to_cursor = IntPtrAdd(to_cursor, IntPtrConstant(1));
289 Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
290 &named_captures);
291 }
292
293 BIND(&named_captures);
294 {
295 CSA_DCHECK(this, SmiGreaterThan(num_results, SmiConstant(1)));
296
297 // Preparations for named capture properties. Exit early if the result does
298 // not have any named captures to minimize performance impact.
299
300 TNode<FixedArray> data =
301 CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
302
303 // We reach this point only if captures exist, implying that the assigned
304 // regexp engine must be able to handle captures.
305 CSA_DCHECK(
306 this,
307 Word32Or(
308 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
309 SmiConstant(JSRegExp::IRREGEXP)),
310 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
311 SmiConstant(JSRegExp::EXPERIMENTAL))));
312
313 // The names fixed array associates names at even indices with a capture
314 // index at odd indices.
315 TNode<Object> maybe_names =
316 LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
317 GotoIf(TaggedEqual(maybe_names, SmiZero()), &maybe_build_indices);
318
319 // One or more named captures exist, add a property for each one.
320
321 TNode<FixedArray> names = CAST(maybe_names);
322 TNode<IntPtrT> names_length = LoadAndUntagFixedArrayBaseLength(names);
323 CSA_DCHECK(this, IntPtrGreaterThan(names_length, IntPtrZero()));
324
325 // Stash names in case we need them to build the indices array later.
326 StoreObjectField(result, JSRegExpResult::kNamesOffset, names);
327
328 // Allocate a new object to store the named capture properties.
329 // TODO(jgruber): Could be optimized by adding the object map to the heap
330 // root list.
331
332 TNode<IntPtrT> num_properties = WordSar(names_length, 1);
333 TNode<NativeContext> native_context = LoadNativeContext(context);
334 TNode<Map> map = LoadSlowObjectWithNullPrototypeMap(native_context);
335 TNode<HeapObject> properties;
336 if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) {
337 properties = AllocateSwissNameDictionary(num_properties);
338 } else {
339 properties = AllocateNameDictionary(
340 num_properties, AllocationFlag::kAllowLargeObjectAllocation);
341 }
342
343 TNode<JSObject> group_object = AllocateJSObjectFromMap(map, properties);
344 StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object);
345
346 TVARIABLE(IntPtrT, var_i, IntPtrZero());
347
348 Label inner_loop(this, &var_i);
349
350 Goto(&inner_loop);
351 BIND(&inner_loop);
352 {
353 TNode<IntPtrT> i = var_i.value();
354 TNode<IntPtrT> i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
355 TNode<IntPtrT> i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
356
357 TNode<String> name = CAST(LoadFixedArrayElement(names, i));
358 TNode<Smi> index = CAST(LoadFixedArrayElement(names, i_plus_1));
359 TNode<HeapObject> capture =
360 CAST(LoadFixedArrayElement(result_elements, SmiUntag(index)));
361
362 // TODO(v8:8213): For maintainability, we should call a CSA/Torque
363 // implementation of CreateDataProperty instead.
364
365 // At this point the spec says to call CreateDataProperty. However, we can
366 // skip most of the steps and go straight to adding a dictionary entry
367 // because we know a bunch of useful facts:
368 // - All keys are non-numeric internalized strings
369 // - No keys repeat
370 // - Receiver has no prototype
371 // - Receiver isn't used as a prototype
372 // - Receiver isn't any special object like a Promise intrinsic object
373 // - Receiver is extensible
374 // - Receiver has no interceptors
375 Label add_dictionary_property_slow(this, Label::kDeferred);
376 Add<PropertyDictionary>(CAST(properties), name, capture,
377 &add_dictionary_property_slow);
378
379 var_i = i_plus_2;
380 Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length),
381 &maybe_build_indices, &inner_loop);
382
383 BIND(&add_dictionary_property_slow);
384 // If the dictionary needs resizing, the above Add call will jump here
385 // before making any changes. This shouldn't happen because we allocated
386 // the dictionary with enough space above.
387 Unreachable();
388 }
389 }
390
391 // Build indices if needed (i.e. if the /d flag is present) after named
392 // capture groups are processed.
393 BIND(&maybe_build_indices);
394 GotoIfNot(has_indices, &out);
395 {
396 const TNode<Object> maybe_names =
397 LoadObjectField(result, JSRegExpResultWithIndices::kNamesOffset);
398 const TNode<JSRegExpResultIndices> indices =
399 UncheckedCast<JSRegExpResultIndices>(
400 CallRuntime(Runtime::kRegExpBuildIndices, context, regexp,
401 match_info, maybe_names));
402 StoreObjectField(result, JSRegExpResultWithIndices::kIndicesOffset,
403 indices);
404 Goto(&out);
405 }
406
407 BIND(&out);
408 return result;
409 }
410
GetStringPointers( TNode<RawPtrT> string_data, TNode<IntPtrT> offset, TNode<IntPtrT> last_index, TNode<IntPtrT> string_length, String::Encoding encoding, TVariable<RawPtrT>* var_string_start, TVariable<RawPtrT>* var_string_end)411 void RegExpBuiltinsAssembler::GetStringPointers(
412 TNode<RawPtrT> string_data, TNode<IntPtrT> offset,
413 TNode<IntPtrT> last_index, TNode<IntPtrT> string_length,
414 String::Encoding encoding, TVariable<RawPtrT>* var_string_start,
415 TVariable<RawPtrT>* var_string_end) {
416 DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
417 DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());
418
419 const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
420 ? UINT8_ELEMENTS
421 : UINT16_ELEMENTS;
422
423 TNode<IntPtrT> from_offset =
424 ElementOffsetFromIndex(IntPtrAdd(offset, last_index), kind);
425 *var_string_start =
426 ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, from_offset));
427
428 TNode<IntPtrT> to_offset =
429 ElementOffsetFromIndex(IntPtrAdd(offset, string_length), kind);
430 *var_string_end = ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, to_offset));
431 }
432
RegExpExecInternal( TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, TNode<Number> last_index, TNode<RegExpMatchInfo> match_info, RegExp::ExecQuirks exec_quirks)433 TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
434 TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
435 TNode<Number> last_index, TNode<RegExpMatchInfo> match_info,
436 RegExp::ExecQuirks exec_quirks) {
437 ToDirectStringAssembler to_direct(state(), string);
438
439 TVARIABLE(HeapObject, var_result);
440 Label out(this), atom(this), runtime(this, Label::kDeferred),
441 retry_experimental(this, Label::kDeferred);
442
443 // External constants.
444 TNode<ExternalReference> isolate_address =
445 ExternalConstant(ExternalReference::isolate_address(isolate()));
446 TNode<ExternalReference> static_offsets_vector_address = ExternalConstant(
447 ExternalReference::address_of_static_offsets_vector(isolate()));
448
449 // At this point, last_index is definitely a canonicalized non-negative
450 // number, which implies that any non-Smi last_index is greater than
451 // the maximal string length. If lastIndex > string.length then the matcher
452 // must fail.
453
454 Label if_failure(this);
455
456 CSA_DCHECK(this, IsNumberNormalized(last_index));
457 CSA_DCHECK(this, IsNumberPositive(last_index));
458 GotoIf(TaggedIsNotSmi(last_index), &if_failure);
459
460 TNode<IntPtrT> int_string_length = LoadStringLengthAsWord(string);
461 TNode<IntPtrT> int_last_index = SmiUntag(CAST(last_index));
462
463 GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure);
464
465 // Since the RegExp has been compiled, data contains a fixed array.
466 TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
467 {
468 // Dispatch on the type of the RegExp.
469 {
470 Label next(this), unreachable(this, Label::kDeferred);
471 TNode<Int32T> tag = LoadAndUntagToWord32FixedArrayElement(
472 data, IntPtrConstant(JSRegExp::kTagIndex));
473
474 int32_t values[] = {
475 JSRegExp::IRREGEXP,
476 JSRegExp::ATOM,
477 JSRegExp::EXPERIMENTAL,
478 };
479 Label* labels[] = {&next, &atom, &next};
480
481 STATIC_ASSERT(arraysize(values) == arraysize(labels));
482 Switch(tag, &unreachable, values, labels, arraysize(values));
483
484 BIND(&unreachable);
485 Unreachable();
486
487 BIND(&next);
488 }
489
490 // Check (number_of_captures + 1) * 2 <= offsets vector size
491 // Or number_of_captures <= offsets vector size / 2 - 1
492 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
493 data, JSRegExp::kIrregexpCaptureCountIndex));
494
495 const int kOffsetsSize = Isolate::kJSRegexpStaticOffsetsVectorSize;
496 STATIC_ASSERT(kOffsetsSize >= 2);
497 GotoIf(SmiAbove(capture_count, SmiConstant(kOffsetsSize / 2 - 1)),
498 &runtime);
499 }
500
501 // Unpack the string if possible.
502
503 to_direct.TryToDirect(&runtime);
504
505 // Load the irregexp code or bytecode object and offsets into the subject
506 // string. Both depend on whether the string is one- or two-byte.
507
508 TVARIABLE(RawPtrT, var_string_start);
509 TVARIABLE(RawPtrT, var_string_end);
510 TVARIABLE(Object, var_code);
511 TVARIABLE(Object, var_bytecode);
512
513 {
514 TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime);
515
516 Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
517 Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
518 &if_isonebyte, &if_istwobyte);
519
520 BIND(&if_isonebyte);
521 {
522 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
523 int_string_length, String::ONE_BYTE_ENCODING,
524 &var_string_start, &var_string_end);
525 var_code =
526 UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex);
527 var_bytecode = UnsafeLoadFixedArrayElement(
528 data, JSRegExp::kIrregexpLatin1BytecodeIndex);
529 Goto(&next);
530 }
531
532 BIND(&if_istwobyte);
533 {
534 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
535 int_string_length, String::TWO_BYTE_ENCODING,
536 &var_string_start, &var_string_end);
537 var_code =
538 UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex);
539 var_bytecode = UnsafeLoadFixedArrayElement(
540 data, JSRegExp::kIrregexpUC16BytecodeIndex);
541 Goto(&next);
542 }
543
544 BIND(&next);
545 }
546
547 // Check that the irregexp code has been generated for the actual string
548 // encoding. If it has, the field contains a code object; and otherwise it
549 // contains the uninitialized sentinel as a smi.
550 #ifdef DEBUG
551 {
552 Label next(this);
553 GotoIfNot(TaggedIsSmi(var_code.value()), &next);
554 CSA_DCHECK(this, SmiEqual(CAST(var_code.value()),
555 SmiConstant(JSRegExp::kUninitializedValue)));
556 Goto(&next);
557 BIND(&next);
558 }
559 #endif
560
561 GotoIf(TaggedIsSmi(var_code.value()), &runtime);
562 TNode<CodeT> code = CAST(var_code.value());
563
564 Label if_success(this), if_exception(this, Label::kDeferred);
565 {
566 IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
567
568 // Set up args for the final call into generated Irregexp code.
569
570 MachineType type_int32 = MachineType::Int32();
571 MachineType type_tagged = MachineType::AnyTagged();
572 MachineType type_ptr = MachineType::Pointer();
573
574 // Result: A NativeRegExpMacroAssembler::Result return code.
575 MachineType retval_type = type_int32;
576
577 // Argument 0: Original subject string.
578 MachineType arg0_type = type_tagged;
579 TNode<String> arg0 = string;
580
581 // Argument 1: Previous index.
582 MachineType arg1_type = type_int32;
583 TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index);
584
585 // Argument 2: Start of string data. This argument is ignored in the
586 // interpreter.
587 MachineType arg2_type = type_ptr;
588 TNode<RawPtrT> arg2 = var_string_start.value();
589
590 // Argument 3: End of string data. This argument is ignored in the
591 // interpreter.
592 MachineType arg3_type = type_ptr;
593 TNode<RawPtrT> arg3 = var_string_end.value();
594
595 // Argument 4: static offsets vector buffer.
596 MachineType arg4_type = type_ptr;
597 TNode<ExternalReference> arg4 = static_offsets_vector_address;
598
599 // Argument 5: Number of capture registers.
600 // Setting this to the number of registers required to store all captures
601 // forces global regexps to behave as non-global.
602 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
603 data, JSRegExp::kIrregexpCaptureCountIndex));
604 // capture_count is the number of captures without the match itself.
605 // Required registers = (capture_count + 1) * 2.
606 STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) * 2));
607 TNode<Smi> register_count =
608 SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
609
610 MachineType arg5_type = type_int32;
611 TNode<Int32T> arg5 = SmiToInt32(register_count);
612
613 // Argument 6: Indicate that this is a direct call from JavaScript.
614 MachineType arg6_type = type_int32;
615 TNode<Int32T> arg6 = Int32Constant(RegExp::CallOrigin::kFromJs);
616
617 // Argument 7: Pass current isolate address.
618 MachineType arg7_type = type_ptr;
619 TNode<ExternalReference> arg7 = isolate_address;
620
621 // Argument 8: Regular expression object. This argument is ignored in native
622 // irregexp code.
623 MachineType arg8_type = type_tagged;
624 TNode<JSRegExp> arg8 = regexp;
625
626 TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code);
627
628 // AIX uses function descriptors on CFunction calls. code_entry in this case
629 // may also point to a Regex interpreter entry trampoline which does not
630 // have a function descriptor. This method is ineffective on other platforms
631 // and is equivalent to CallCFunction.
632 TNode<Int32T> result =
633 UncheckedCast<Int32T>(CallCFunctionWithoutFunctionDescriptor(
634 code_entry, retval_type, std::make_pair(arg0_type, arg0),
635 std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
636 std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
637 std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6),
638 std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8)));
639
640 // Check the result.
641 // We expect exactly one result since we force the called regexp to behave
642 // as non-global.
643 TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
644 GotoIf(
645 IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
646 &if_success);
647 GotoIf(
648 IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
649 &if_failure);
650 GotoIf(IntPtrEqual(int_result,
651 IntPtrConstant(RegExp::kInternalRegExpException)),
652 &if_exception);
653 GotoIf(IntPtrEqual(
654 int_result,
655 IntPtrConstant(RegExp::kInternalRegExpFallbackToExperimental)),
656 &retry_experimental);
657
658 CSA_DCHECK(this, IntPtrEqual(int_result,
659 IntPtrConstant(RegExp::kInternalRegExpRetry)));
660 Goto(&runtime);
661 }
662
663 BIND(&if_success);
664 {
665 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
666 static constexpr int kMatchStartOffset = 0;
667 TNode<IntPtrT> value = ChangeInt32ToIntPtr(UncheckedCast<Int32T>(
668 Load(MachineType::Int32(), static_offsets_vector_address,
669 IntPtrConstant(kMatchStartOffset))));
670 GotoIf(UintPtrGreaterThanOrEqual(value, int_string_length), &if_failure);
671 }
672
673 // Check that the last match info has space for the capture registers and
674 // the additional information. Ensure no overflow in add.
675 STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);
676 TNode<Smi> available_slots =
677 SmiSub(LoadFixedArrayBaseLength(match_info),
678 SmiConstant(RegExpMatchInfo::kLastMatchOverhead));
679 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
680 data, JSRegExp::kIrregexpCaptureCountIndex));
681 // Calculate number of register_count = (capture_count + 1) * 2.
682 TNode<Smi> register_count =
683 SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
684 GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);
685
686 // Fill match_info.
687 UnsafeStoreFixedArrayElement(
688 match_info, RegExpMatchInfo::kNumberOfCapturesIndex, register_count);
689 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
690 string);
691 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
692 string);
693
694 // Fill match and capture offsets in match_info.
695 {
696 TNode<IntPtrT> limit_offset =
697 ElementOffsetFromIndex(register_count, INT32_ELEMENTS, 0);
698
699 TNode<IntPtrT> to_offset = ElementOffsetFromIndex(
700 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS,
701 RegExpMatchInfo::kHeaderSize - kHeapObjectTag);
702 TVARIABLE(IntPtrT, var_to_offset, to_offset);
703
704 VariableList vars({&var_to_offset}, zone());
705 BuildFastLoop<IntPtrT>(
706 vars, IntPtrZero(), limit_offset,
707 [&](TNode<IntPtrT> offset) {
708 TNode<Int32T> value = UncheckedCast<Int32T>(Load(
709 MachineType::Int32(), static_offsets_vector_address, offset));
710 TNode<Smi> smi_value = SmiFromInt32(value);
711 StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,
712 var_to_offset.value(), smi_value);
713 Increment(&var_to_offset, kTaggedSize);
714 },
715 kInt32Size, IndexAdvanceMode::kPost);
716 }
717
718 var_result = match_info;
719 Goto(&out);
720 }
721
722 BIND(&if_failure);
723 {
724 var_result = NullConstant();
725 Goto(&out);
726 }
727
728 BIND(&if_exception);
729 {
730 // A stack overflow was detected in RegExp code.
731 #ifdef DEBUG
732 TNode<ExternalReference> pending_exception_address =
733 ExternalConstant(ExternalReference::Create(
734 IsolateAddressId::kPendingExceptionAddress, isolate()));
735 CSA_DCHECK(this, IsTheHole(Load<Object>(pending_exception_address)));
736 #endif // DEBUG
737 CallRuntime(Runtime::kThrowStackOverflow, context);
738 Unreachable();
739 }
740
741 BIND(&retry_experimental);
742 {
743 auto target_fn =
744 exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
745 ? Runtime::kRegExpExperimentalOneshotExecTreatMatchAtEndAsFailure
746 : Runtime::kRegExpExperimentalOneshotExec;
747 var_result = CAST(CallRuntime(target_fn, context, regexp, string,
748 last_index, match_info));
749 Goto(&out);
750 }
751
752 BIND(&runtime);
753 {
754 auto target_fn =
755 exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure
756 ? Runtime::kRegExpExecTreatMatchAtEndAsFailure
757 : Runtime::kRegExpExec;
758 var_result = CAST(CallRuntime(target_fn, context, regexp, string,
759 last_index, match_info));
760 Goto(&out);
761 }
762
763 BIND(&atom);
764 {
765 // TODO(jgruber): A call with 4 args stresses register allocation, this
766 // should probably just be inlined.
767 var_result = CAST(CallBuiltin(Builtin::kRegExpExecAtom, context, regexp,
768 string, last_index, match_info));
769 Goto(&out);
770 }
771
772 BIND(&out);
773 return var_result.value();
774 }
775
776 TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
777 TNode<Context> context, TNode<Object> object, TNode<Map> map) {
778 Label out(this);
779 TVARIABLE(BoolT, var_result);
780
781 #ifdef V8_ENABLE_FORCE_SLOW_PATH
782 var_result = Int32FalseConstant();
783 GotoIfForceSlowPath(&out);
784 #endif
785
786 const TNode<NativeContext> native_context = LoadNativeContext(context);
787 const TNode<HeapObject> regexp_fun =
788 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
789 const TNode<Object> initial_map =
790 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
791 const TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
792
793 var_result = has_initialmap;
794 GotoIfNot(has_initialmap, &out);
795
796 // The smi check is required to omit ToLength(lastIndex) calls with possible
797 // user-code execution on the fast path.
798 TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
799 var_result = TaggedIsPositiveSmi(last_index);
800 Goto(&out);
801
802 BIND(&out);
803 return var_result.value();
804 }
805
806 TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
807 TNode<Context> context, TNode<Object> object) {
808 CSA_DCHECK(this, TaggedIsNotSmi(object));
809 return IsFastRegExpNoPrototype(context, object, LoadMap(CAST(object)));
810 }
811
812 void RegExpBuiltinsAssembler::BranchIfFastRegExp(
813 TNode<Context> context, TNode<HeapObject> object, TNode<Map> map,
814 PrototypeCheckAssembler::Flags prototype_check_flags,
815 base::Optional<DescriptorIndexNameValue> additional_property_to_check,
816 Label* if_isunmodified, Label* if_ismodified) {
817 CSA_DCHECK(this, TaggedEqual(LoadMap(object), map));
818
819 GotoIfForceSlowPath(if_ismodified);
820
821 // This should only be needed for String.p.(split||matchAll), but we are
822 // conservative here.
823 GotoIf(IsRegExpSpeciesProtectorCellInvalid(), if_ismodified);
824
825 TNode<NativeContext> native_context = LoadNativeContext(context);
826 TNode<JSFunction> regexp_fun =
827 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
828 TNode<Map> initial_map = CAST(
829 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset));
830 TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
831
832 GotoIfNot(has_initialmap, if_ismodified);
833
834 // The smi check is required to omit ToLength(lastIndex) calls with possible
835 // user-code execution on the fast path.
836 TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
837 GotoIfNot(TaggedIsPositiveSmi(last_index), if_ismodified);
838
839 // Verify the prototype.
840
841 TNode<Map> initial_proto_initial_map = CAST(
842 LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX));
843
844 DescriptorIndexNameValue properties_to_check[2];
845 int property_count = 0;
846 properties_to_check[property_count++] = DescriptorIndexNameValue{
847 JSRegExp::kExecFunctionDescriptorIndex, RootIndex::kexec_string,
848 Context::REGEXP_EXEC_FUNCTION_INDEX};
849 if (additional_property_to_check) {
850 properties_to_check[property_count++] = *additional_property_to_check;
851 }
852
853 PrototypeCheckAssembler prototype_check_assembler(
854 state(), prototype_check_flags, native_context, initial_proto_initial_map,
855 base::Vector<DescriptorIndexNameValue>(properties_to_check,
856 property_count));
857
858 TNode<HeapObject> prototype = LoadMapPrototype(map);
859 prototype_check_assembler.CheckAndBranch(prototype, if_isunmodified,
860 if_ismodified);
861 }
862 void RegExpBuiltinsAssembler::BranchIfFastRegExpForSearch(
863 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
864 Label* if_ismodified) {
865 BranchIfFastRegExp(
866 context, object, LoadMap(object),
867 PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
868 DescriptorIndexNameValue{JSRegExp::kSymbolSearchFunctionDescriptorIndex,
869 RootIndex::ksearch_symbol,
870 Context::REGEXP_SEARCH_FUNCTION_INDEX},
871 if_isunmodified, if_ismodified);
872 }
873
874 void RegExpBuiltinsAssembler::BranchIfFastRegExpForMatch(
875 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
876 Label* if_ismodified) {
877 BranchIfFastRegExp(
878 context, object, LoadMap(object),
879 PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
880 DescriptorIndexNameValue{JSRegExp::kSymbolMatchFunctionDescriptorIndex,
881 RootIndex::kmatch_symbol,
882 Context::REGEXP_MATCH_FUNCTION_INDEX},
883 if_isunmodified, if_ismodified);
884 }
885
886 void RegExpBuiltinsAssembler::BranchIfFastRegExp_Strict(
887 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
888 Label* if_ismodified) {
889 BranchIfFastRegExp(context, object, LoadMap(object),
890 PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
891 base::nullopt, if_isunmodified, if_ismodified);
892 }
893
894 void RegExpBuiltinsAssembler::BranchIfFastRegExp_Permissive(
895 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
896 Label* if_ismodified) {
897 BranchIfFastRegExp(context, object, LoadMap(object),
898 PrototypeCheckAssembler::kCheckFull, base::nullopt,
899 if_isunmodified, if_ismodified);
900 }
901
902 void RegExpBuiltinsAssembler::BranchIfRegExpResult(const TNode<Context> context,
903 const TNode<Object> object,
904 Label* if_isunmodified,
905 Label* if_ismodified) {
906 // Could be a Smi.
907 const TNode<Map> map = LoadReceiverMap(object);
908
909 const TNode<NativeContext> native_context = LoadNativeContext(context);
910 const TNode<Object> initial_regexp_result_map =
911 LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);
912
913 Label maybe_result_with_indices(this);
914 Branch(TaggedEqual(map, initial_regexp_result_map), if_isunmodified,
915 &maybe_result_with_indices);
916 BIND(&maybe_result_with_indices);
917 {
918 static_assert(
919 std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value,
920 "JSRegExpResultWithIndices is a subclass of JSRegExpResult");
921 const TNode<Object> initial_regexp_result_with_indices_map =
922 LoadContextElement(native_context,
923 Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX);
924 Branch(TaggedEqual(map, initial_regexp_result_with_indices_map),
925 if_isunmodified, if_ismodified);
926 }
927 }
928
929 // Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
930 // and {match_info} is updated on success.
931 // The slow path is implemented in RegExp::AtomExec.
932 TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
933 auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
934 auto subject_string = Parameter<String>(Descriptor::kString);
935 auto last_index = Parameter<Smi>(Descriptor::kLastIndex);
936 auto match_info = Parameter<FixedArray>(Descriptor::kMatchInfo);
937 auto context = Parameter<Context>(Descriptor::kContext);
938
939 CSA_DCHECK(this, TaggedIsPositiveSmi(last_index));
940
941 TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
942 CSA_DCHECK(
943 this,
944 SmiEqual(CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kTagIndex)),
945 SmiConstant(JSRegExp::ATOM)));
946
947 // Callers ensure that last_index is in-bounds.
948 CSA_DCHECK(this,
949 UintPtrLessThanOrEqual(SmiUntag(last_index),
950 LoadStringLengthAsWord(subject_string)));
951
952 const TNode<String> needle_string =
953 CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex));
954
955 // ATOM patterns are guaranteed to not be the empty string (these are
956 // intercepted and replaced in JSRegExp::Initialize.
957 //
958 // This is especially relevant for crbug.com/1075514: atom patterns are
959 // non-empty and thus guaranteed not to match at the end of the string.
960 CSA_DCHECK(this, IntPtrGreaterThan(LoadStringLengthAsWord(needle_string),
961 IntPtrConstant(0)));
962
963 const TNode<Smi> match_from =
964 CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string,
965 needle_string, last_index));
966
967 Label if_failure(this), if_success(this);
968 Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);
969
970 BIND(&if_success);
971 {
972 CSA_DCHECK(this, TaggedIsPositiveSmi(match_from));
973 CSA_DCHECK(this, UintPtrLessThan(SmiUntag(match_from),
974 LoadStringLengthAsWord(subject_string)));
975
976 const int kNumRegisters = 2;
977 STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);
978
979 const TNode<Smi> match_to =
980 SmiAdd(match_from, LoadStringLengthAsSmi(needle_string));
981
982 UnsafeStoreFixedArrayElement(match_info,
983 RegExpMatchInfo::kNumberOfCapturesIndex,
984 SmiConstant(kNumRegisters));
985 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
986 subject_string);
987 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
988 subject_string);
989 UnsafeStoreFixedArrayElement(
990 match_info, RegExpMatchInfo::kFirstCaptureIndex, match_from);
991 UnsafeStoreFixedArrayElement(
992 match_info, RegExpMatchInfo::kFirstCaptureIndex + 1, match_to);
993
994 Return(match_info);
995 }
996
997 BIND(&if_failure);
998 Return(NullConstant());
999 }
1000
TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler)1001 TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) {
1002 auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp);
1003 auto string = Parameter<String>(Descriptor::kString);
1004 auto last_index = Parameter<Number>(Descriptor::kLastIndex);
1005 auto match_info = Parameter<RegExpMatchInfo>(Descriptor::kMatchInfo);
1006 auto context = Parameter<Context>(Descriptor::kContext);
1007
1008 CSA_DCHECK(this, IsNumberNormalized(last_index));
1009 CSA_DCHECK(this, IsNumberPositive(last_index));
1010
1011 Return(RegExpExecInternal(context, regexp, string, last_index, match_info));
1012 }
1013
FlagsGetter(TNode<Context> context, TNode<Object> regexp, bool is_fastpath)1014 TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
1015 TNode<Object> regexp,
1016 bool is_fastpath) {
1017 TVARIABLE(String, result);
1018 Label runtime(this, Label::kDeferred), done(this, &result);
1019 if (is_fastpath) {
1020 GotoIfForceSlowPath(&runtime);
1021 }
1022
1023 Isolate* isolate = this->isolate();
1024
1025 const TNode<IntPtrT> int_one = IntPtrConstant(1);
1026 TVARIABLE(Uint32T, var_length, Uint32Constant(0));
1027 TVARIABLE(IntPtrT, var_flags);
1028
1029 // First, count the number of characters we will need and check which flags
1030 // are set.
1031
1032 if (is_fastpath) {
1033 // Refer to JSRegExp's flag property on the fast-path.
1034 CSA_DCHECK(this, IsJSRegExp(CAST(regexp)));
1035 const TNode<Smi> flags_smi =
1036 CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
1037 var_flags = SmiUntag(flags_smi);
1038
1039 #define CASE_FOR_FLAG(Lower, Camel, ...) \
1040 do { \
1041 Label next(this); \
1042 GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1043 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
1044 Goto(&next); \
1045 BIND(&next); \
1046 } while (false);
1047
1048 REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1049 #undef CASE_FOR_FLAG
1050 } else {
1051 DCHECK(!is_fastpath);
1052
1053 // Fall back to GetProperty stub on the slow-path.
1054 var_flags = IntPtrZero();
1055
1056 #define CASE_FOR_FLAG(NAME, FLAG) \
1057 do { \
1058 Label next(this); \
1059 const TNode<Object> flag = GetProperty( \
1060 context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1061 Label if_isflagset(this); \
1062 BranchIfToBooleanIsTrue(flag, &if_isflagset, &next); \
1063 BIND(&if_isflagset); \
1064 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
1065 var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG))); \
1066 Goto(&next); \
1067 BIND(&next); \
1068 } while (false)
1069
1070 CASE_FOR_FLAG("global", JSRegExp::kGlobal);
1071 CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
1072 CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
1073 CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
1074 CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
1075 CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
1076 CASE_FOR_FLAG("hasIndices", JSRegExp::kHasIndices);
1077 #undef CASE_FOR_FLAG
1078
1079 #define CASE_FOR_FLAG(NAME, V8_FLAG_EXTERN_REF, FLAG) \
1080 do { \
1081 Label next(this); \
1082 TNode<Word32T> flag_value = UncheckedCast<Word32T>( \
1083 Load(MachineType::Uint8(), ExternalConstant(V8_FLAG_EXTERN_REF))); \
1084 GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)), \
1085 Int32Constant(0)), \
1086 &next); \
1087 const TNode<Object> flag = GetProperty( \
1088 context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1089 Label if_isflagset(this); \
1090 BranchIfToBooleanIsTrue(flag, &if_isflagset, &next); \
1091 BIND(&if_isflagset); \
1092 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
1093 var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG))); \
1094 Goto(&next); \
1095 BIND(&next); \
1096 } while (false)
1097
1098 CASE_FOR_FLAG(
1099 "linear",
1100 ExternalReference::address_of_enable_experimental_regexp_engine(),
1101 JSRegExp::kLinear);
1102 #undef CASE_FOR_FLAG
1103 }
1104
1105 // Allocate a string of the required length and fill it with the
1106 // corresponding char for each set flag.
1107
1108 {
1109 const TNode<String> string = AllocateSeqOneByteString(var_length.value());
1110
1111 TVARIABLE(IntPtrT, var_offset,
1112 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
1113
1114 #define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...) \
1115 do { \
1116 Label next(this); \
1117 GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
1118 const TNode<Int32T> value = Int32Constant(Char); \
1119 StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \
1120 var_offset.value(), value); \
1121 var_offset = IntPtrAdd(var_offset.value(), int_one); \
1122 Goto(&next); \
1123 BIND(&next); \
1124 } while (false);
1125
1126 REGEXP_FLAG_LIST(CASE_FOR_FLAG)
1127 #undef CASE_FOR_FLAG
1128
1129 if (is_fastpath) {
1130 #ifdef V8_ENABLE_FORCE_SLOW_PATH
1131 result = string;
1132 Goto(&done);
1133
1134 BIND(&runtime);
1135 {
1136 result =
1137 CAST(CallRuntime(Runtime::kRegExpStringFromFlags, context, regexp));
1138 Goto(&done);
1139 }
1140
1141 BIND(&done);
1142 return result.value();
1143 #else
1144 return string;
1145 #endif
1146 } else {
1147 return string;
1148 }
1149 }
1150 }
1151
1152 // ES#sec-regexpinitialize
1153 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
RegExpInitialize( const TNode<Context> context, const TNode<JSRegExp> regexp, const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags)1154 TNode<Object> RegExpBuiltinsAssembler::RegExpInitialize(
1155 const TNode<Context> context, const TNode<JSRegExp> regexp,
1156 const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags) {
1157 // Normalize pattern.
1158 const TNode<Object> pattern = Select<Object>(
1159 IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); },
1160 [=] { return ToString_Inline(context, maybe_pattern); });
1161
1162 // Normalize flags.
1163 const TNode<Object> flags = Select<Object>(
1164 IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); },
1165 [=] { return ToString_Inline(context, maybe_flags); });
1166
1167 // Initialize.
1168
1169 return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
1170 pattern, flags);
1171 }
1172
1173 // ES#sec-regexp-pattern-flags
1174 // RegExp ( pattern, flags )
TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler)1175 TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) {
1176 auto pattern = Parameter<Object>(Descriptor::kPattern);
1177 auto flags = Parameter<Object>(Descriptor::kFlags);
1178 auto new_target = Parameter<Object>(Descriptor::kJSNewTarget);
1179 auto context = Parameter<Context>(Descriptor::kContext);
1180
1181 Isolate* isolate = this->isolate();
1182
1183 TVARIABLE(Object, var_flags, flags);
1184 TVARIABLE(Object, var_pattern, pattern);
1185 TVARIABLE(Object, var_new_target, new_target);
1186
1187 TNode<NativeContext> native_context = LoadNativeContext(context);
1188 TNode<JSFunction> regexp_function =
1189 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
1190
1191 TNode<BoolT> pattern_is_regexp = IsRegExp(context, pattern);
1192
1193 {
1194 Label next(this);
1195
1196 GotoIfNot(IsUndefined(new_target), &next);
1197 var_new_target = regexp_function;
1198
1199 GotoIfNot(pattern_is_regexp, &next);
1200 GotoIfNot(IsUndefined(flags), &next);
1201
1202 TNode<Object> value =
1203 GetProperty(context, pattern, isolate->factory()->constructor_string());
1204
1205 GotoIfNot(TaggedEqual(value, regexp_function), &next);
1206 Return(pattern);
1207
1208 BIND(&next);
1209 }
1210
1211 {
1212 Label next(this), if_patternisfastregexp(this),
1213 if_patternisslowregexp(this);
1214 GotoIf(TaggedIsSmi(pattern), &next);
1215
1216 GotoIf(IsJSRegExp(CAST(pattern)), &if_patternisfastregexp);
1217
1218 Branch(pattern_is_regexp, &if_patternisslowregexp, &next);
1219
1220 BIND(&if_patternisfastregexp);
1221 {
1222 TNode<Object> source =
1223 LoadObjectField(CAST(pattern), JSRegExp::kSourceOffset);
1224 var_pattern = source;
1225
1226 {
1227 Label inner_next(this);
1228 GotoIfNot(IsUndefined(flags), &inner_next);
1229
1230 var_flags = FlagsGetter(context, pattern, true);
1231 Goto(&inner_next);
1232
1233 BIND(&inner_next);
1234 }
1235
1236 Goto(&next);
1237 }
1238
1239 BIND(&if_patternisslowregexp);
1240 {
1241 var_pattern =
1242 GetProperty(context, pattern, isolate->factory()->source_string());
1243
1244 {
1245 Label inner_next(this);
1246 GotoIfNot(IsUndefined(flags), &inner_next);
1247
1248 var_flags =
1249 GetProperty(context, pattern, isolate->factory()->flags_string());
1250 Goto(&inner_next);
1251
1252 BIND(&inner_next);
1253 }
1254
1255 Goto(&next);
1256 }
1257
1258 BIND(&next);
1259 }
1260
1261 // Allocate.
1262
1263 TVARIABLE(JSRegExp, var_regexp);
1264 {
1265 Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred),
1266 next(this);
1267 Branch(TaggedEqual(var_new_target.value(), regexp_function),
1268 &allocate_jsregexp, &allocate_generic);
1269
1270 BIND(&allocate_jsregexp);
1271 {
1272 const TNode<Map> initial_map = CAST(LoadObjectField(
1273 regexp_function, JSFunction::kPrototypeOrInitialMapOffset));
1274 var_regexp = CAST(AllocateJSObjectFromMap(initial_map));
1275 Goto(&next);
1276 }
1277
1278 BIND(&allocate_generic);
1279 {
1280 ConstructorBuiltinsAssembler constructor_assembler(this->state());
1281 var_regexp = CAST(constructor_assembler.FastNewObject(
1282 context, regexp_function, CAST(var_new_target.value())));
1283 Goto(&next);
1284 }
1285
1286 BIND(&next);
1287 }
1288
1289 const TNode<Object> result = RegExpInitialize(
1290 context, var_regexp.value(), var_pattern.value(), var_flags.value());
1291 Return(result);
1292 }
1293
1294 // ES#sec-regexp.prototype.compile
1295 // RegExp.prototype.compile ( pattern, flags )
TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler)1296 TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) {
1297 auto maybe_receiver = Parameter<Object>(Descriptor::kReceiver);
1298 auto maybe_pattern = Parameter<Object>(Descriptor::kPattern);
1299 auto maybe_flags = Parameter<Object>(Descriptor::kFlags);
1300 auto context = Parameter<Context>(Descriptor::kContext);
1301
1302 ThrowIfNotInstanceType(context, maybe_receiver, JS_REG_EXP_TYPE,
1303 "RegExp.prototype.compile");
1304 const TNode<JSRegExp> receiver = CAST(maybe_receiver);
1305
1306 TVARIABLE(Object, var_flags, maybe_flags);
1307 TVARIABLE(Object, var_pattern, maybe_pattern);
1308
1309 // Handle a JSRegExp pattern.
1310 {
1311 Label next(this);
1312
1313 GotoIf(TaggedIsSmi(maybe_pattern), &next);
1314 GotoIfNot(IsJSRegExp(CAST(maybe_pattern)), &next);
1315
1316 // {maybe_flags} must be undefined in this case, otherwise throw.
1317 {
1318 Label maybe_flags_is_undefined(this);
1319 GotoIf(IsUndefined(maybe_flags), &maybe_flags_is_undefined);
1320
1321 ThrowTypeError(context, MessageTemplate::kRegExpFlags);
1322
1323 BIND(&maybe_flags_is_undefined);
1324 }
1325
1326 const TNode<JSRegExp> pattern = CAST(maybe_pattern);
1327 const TNode<String> new_flags = FlagsGetter(context, pattern, true);
1328 const TNode<Object> new_pattern =
1329 LoadObjectField(pattern, JSRegExp::kSourceOffset);
1330
1331 var_flags = new_flags;
1332 var_pattern = new_pattern;
1333
1334 Goto(&next);
1335 BIND(&next);
1336 }
1337
1338 const TNode<Object> result = RegExpInitialize(
1339 context, receiver, var_pattern.value(), var_flags.value());
1340 Return(result);
1341 }
1342
1343 // Fast-path implementation for flag checks on an unmodified JSRegExp instance.
FastFlagGetter(TNode<JSRegExp> regexp, JSRegExp::Flag flag)1344 TNode<BoolT> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp,
1345 JSRegExp::Flag flag) {
1346 TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
1347 TNode<Smi> mask = SmiConstant(flag);
1348 return ReinterpretCast<BoolT>(SmiToInt32(
1349 SmiShr(SmiAnd(flags, mask),
1350 base::bits::CountTrailingZeros(static_cast<int>(flag)))));
1351 }
1352
1353 // Load through the GetProperty stub.
SlowFlagGetter(TNode<Context> context, TNode<Object> regexp, JSRegExp::Flag flag)1354 TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
1355 TNode<Object> regexp,
1356 JSRegExp::Flag flag) {
1357 Label out(this), if_true(this), if_false(this);
1358 TVARIABLE(BoolT, var_result);
1359
1360 // Only enabled based on a runtime flag.
1361 if (flag == JSRegExp::kLinear) {
1362 TNode<Word32T> flag_value = UncheckedCast<Word32T>(Load(
1363 MachineType::Uint8(),
1364 ExternalConstant(ExternalReference::
1365 address_of_enable_experimental_regexp_engine())));
1366 GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)),
1367 Int32Constant(0)),
1368 &if_false);
1369 }
1370
1371 Handle<String> name;
1372 switch (flag) {
1373 case JSRegExp::kNone:
1374 UNREACHABLE();
1375 #define V(Lower, Camel, LowerCamel, Char, Bit) \
1376 case JSRegExp::k##Camel: \
1377 name = isolate()->factory()->LowerCamel##_string(); \
1378 break;
1379 REGEXP_FLAG_LIST(V)
1380 #undef V
1381 }
1382
1383 TNode<Object> value = GetProperty(context, regexp, name);
1384 BranchIfToBooleanIsTrue(value, &if_true, &if_false);
1385
1386 BIND(&if_true);
1387 var_result = BoolConstant(true);
1388 Goto(&out);
1389
1390 BIND(&if_false);
1391 var_result = BoolConstant(false);
1392 Goto(&out);
1393
1394 BIND(&out);
1395 return var_result.value();
1396 }
1397
FlagGetter(TNode<Context> context, TNode<Object> regexp, JSRegExp::Flag flag, bool is_fastpath)1398 TNode<BoolT> RegExpBuiltinsAssembler::FlagGetter(TNode<Context> context,
1399 TNode<Object> regexp,
1400 JSRegExp::Flag flag,
1401 bool is_fastpath) {
1402 return is_fastpath ? FastFlagGetter(CAST(regexp), flag)
1403 : SlowFlagGetter(context, regexp, flag);
1404 }
1405
AdvanceStringIndex( TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode, bool is_fastpath)1406 TNode<Number> RegExpBuiltinsAssembler::AdvanceStringIndex(
1407 TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode,
1408 bool is_fastpath) {
1409 CSA_DCHECK(this, IsNumberNormalized(index));
1410 if (is_fastpath) CSA_DCHECK(this, TaggedIsPositiveSmi(index));
1411
1412 // Default to last_index + 1.
1413 // TODO(pwong): Consider using TrySmiAdd for the fast path to reduce generated
1414 // code.
1415 TNode<Number> index_plus_one = NumberInc(index);
1416 TVARIABLE(Number, var_result, index_plus_one);
1417
1418 // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT
1419 // anyway, consider using UintPtrT index to simplify the code below.
1420
1421 // Advancing the index has some subtle issues involving the distinction
1422 // between Smis and HeapNumbers. There's three cases:
1423 // * {index} is a Smi, {index_plus_one} is a Smi. The standard case.
1424 // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber.
1425 // In this case we can return the result early, because
1426 // {index_plus_one} > {string}.length.
1427 // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only
1428 // occur when {index} is outside the Smi range since we normalize
1429 // explicitly. Again we can return early.
1430 if (is_fastpath) {
1431 // Must be in Smi range on the fast path. We control the value of {index}
1432 // on all call-sites and can never exceed the length of the string.
1433 STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue);
1434 CSA_DCHECK(this, TaggedIsPositiveSmi(index_plus_one));
1435 }
1436
1437 Label if_isunicode(this), out(this);
1438 GotoIfNot(is_unicode, &out);
1439
1440 // Keep this unconditional (even on the fast path) just to be safe.
1441 Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out);
1442
1443 BIND(&if_isunicode);
1444 {
1445 TNode<UintPtrT> string_length = Unsigned(LoadStringLengthAsWord(string));
1446 TNode<UintPtrT> untagged_plus_one =
1447 Unsigned(SmiUntag(CAST(index_plus_one)));
1448 GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out);
1449
1450 TNode<Int32T> lead =
1451 StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index))));
1452 GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)),
1453 Int32Constant(0xD800)),
1454 &out);
1455
1456 TNode<Int32T> trail = StringCharCodeAt(string, untagged_plus_one);
1457 GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)),
1458 Int32Constant(0xDC00)),
1459 &out);
1460
1461 // At a surrogate pair, return index + 2.
1462 TNode<Number> index_plus_two = NumberInc(index_plus_one);
1463 var_result = index_plus_two;
1464
1465 Goto(&out);
1466 }
1467
1468 BIND(&out);
1469 return var_result.value();
1470 }
1471
1472 // ES#sec-createregexpstringiterator
1473 // CreateRegExpStringIterator ( R, S, global, fullUnicode )
CreateRegExpStringIterator( TNode<NativeContext> native_context, TNode<Object> regexp, TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode)1474 TNode<Object> RegExpMatchAllAssembler::CreateRegExpStringIterator(
1475 TNode<NativeContext> native_context, TNode<Object> regexp,
1476 TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode) {
1477 TNode<Map> map = CAST(LoadContextElement(
1478 native_context,
1479 Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX));
1480
1481 // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, «
1482 // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]],
1483 // [[Done]] »).
1484 TNode<HeapObject> iterator = Allocate(JSRegExpStringIterator::kHeaderSize);
1485 StoreMapNoWriteBarrier(iterator, map);
1486 StoreObjectFieldRoot(iterator,
1487 JSRegExpStringIterator::kPropertiesOrHashOffset,
1488 RootIndex::kEmptyFixedArray);
1489 StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset,
1490 RootIndex::kEmptyFixedArray);
1491
1492 // 5. Set iterator.[[IteratingRegExp]] to R.
1493 StoreObjectFieldNoWriteBarrier(
1494 iterator, JSRegExpStringIterator::kIteratingRegExpOffset, regexp);
1495
1496 // 6. Set iterator.[[IteratedString]] to S.
1497 StoreObjectFieldNoWriteBarrier(
1498 iterator, JSRegExpStringIterator::kIteratedStringOffset, string);
1499
1500 // 7. Set iterator.[[Global]] to global.
1501 // 8. Set iterator.[[Unicode]] to fullUnicode.
1502 // 9. Set iterator.[[Done]] to false.
1503 TNode<Int32T> global_flag =
1504 Word32Shl(ReinterpretCast<Int32T>(global),
1505 Int32Constant(JSRegExpStringIterator::GlobalBit::kShift));
1506 TNode<Int32T> unicode_flag =
1507 Word32Shl(ReinterpretCast<Int32T>(full_unicode),
1508 Int32Constant(JSRegExpStringIterator::UnicodeBit::kShift));
1509 TNode<Int32T> iterator_flags = Word32Or(global_flag, unicode_flag);
1510 StoreObjectFieldNoWriteBarrier(iterator, JSRegExpStringIterator::kFlagsOffset,
1511 SmiFromInt32(iterator_flags));
1512
1513 return iterator;
1514 }
1515
1516 // Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
1517 // JSRegExp, {string} is a String, and {limit} is a Smi.
RegExpPrototypeSplitBody( TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, const TNode<Smi> limit)1518 TNode<JSArray> RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(
1519 TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
1520 const TNode<Smi> limit) {
1521 CSA_DCHECK(this, IsFastRegExpPermissive(context, regexp));
1522 CSA_DCHECK(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
1523
1524 const TNode<IntPtrT> int_limit = SmiUntag(limit);
1525
1526 const ElementsKind kind = PACKED_ELEMENTS;
1527
1528 const TNode<NativeContext> native_context = LoadNativeContext(context);
1529 TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1530
1531 Label return_empty_array(this, Label::kDeferred);
1532 TVARIABLE(JSArray, var_result);
1533 Label done(this);
1534
1535 // If limit is zero, return an empty array.
1536 {
1537 Label next(this), if_limitiszero(this, Label::kDeferred);
1538 Branch(SmiEqual(limit, SmiZero()), &return_empty_array, &next);
1539 BIND(&next);
1540 }
1541
1542 const TNode<Smi> string_length = LoadStringLengthAsSmi(string);
1543
1544 // If passed the empty {string}, return either an empty array or a singleton
1545 // array depending on whether the {regexp} matches.
1546 {
1547 Label next(this), if_stringisempty(this, Label::kDeferred);
1548 Branch(SmiEqual(string_length, SmiZero()), &if_stringisempty, &next);
1549
1550 BIND(&if_stringisempty);
1551 {
1552 const TNode<Object> last_match_info = LoadContextElement(
1553 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1554
1555 const TNode<Object> match_indices =
1556 CallBuiltin(Builtin::kRegExpExecInternal, context, regexp, string,
1557 SmiZero(), last_match_info);
1558
1559 Label return_singleton_array(this);
1560 Branch(IsNull(match_indices), &return_singleton_array,
1561 &return_empty_array);
1562
1563 BIND(&return_singleton_array);
1564 {
1565 TNode<Smi> length = SmiConstant(1);
1566 TNode<IntPtrT> capacity = IntPtrConstant(1);
1567 base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1568 var_result =
1569 AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1570
1571 TNode<FixedArray> fixed_array = CAST(LoadElements(var_result.value()));
1572 UnsafeStoreFixedArrayElement(fixed_array, 0, string);
1573
1574 Goto(&done);
1575 }
1576 }
1577
1578 BIND(&next);
1579 }
1580
1581 // Loop preparations.
1582
1583 GrowableFixedArray array(state());
1584
1585 TVARIABLE(Smi, var_last_matched_until, SmiZero());
1586 TVARIABLE(Smi, var_next_search_from, SmiZero());
1587
1588 Label loop(this, {array.var_array(), array.var_length(), array.var_capacity(),
1589 &var_last_matched_until, &var_next_search_from}),
1590 push_suffix_and_out(this), out(this);
1591 Goto(&loop);
1592
1593 BIND(&loop);
1594 {
1595 const TNode<Smi> next_search_from = var_next_search_from.value();
1596 const TNode<Smi> last_matched_until = var_last_matched_until.value();
1597
1598 // We're done if we've reached the end of the string.
1599 {
1600 Label next(this);
1601 Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out,
1602 &next);
1603 BIND(&next);
1604 }
1605
1606 // Search for the given {regexp}.
1607
1608 const TNode<Object> last_match_info = LoadContextElement(
1609 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
1610
1611 const TNode<HeapObject> match_indices_ho = RegExpExecInternal(
1612 context, regexp, string, next_search_from, CAST(last_match_info),
1613 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure);
1614
1615 // We're done if no match was found.
1616 {
1617 Label next(this);
1618 Branch(IsNull(match_indices_ho), &push_suffix_and_out, &next);
1619 BIND(&next);
1620 }
1621
1622 TNode<FixedArray> match_indices = CAST(match_indices_ho);
1623 const TNode<Smi> match_from = CAST(UnsafeLoadFixedArrayElement(
1624 match_indices, RegExpMatchInfo::kFirstCaptureIndex));
1625 const TNode<Smi> match_to = CAST(UnsafeLoadFixedArrayElement(
1626 match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1));
1627 CSA_DCHECK(this, SmiNotEqual(match_from, string_length));
1628
1629 // Advance index and continue if the match is empty.
1630 {
1631 Label next(this);
1632
1633 GotoIfNot(SmiEqual(match_to, next_search_from), &next);
1634 GotoIfNot(SmiEqual(match_to, last_matched_until), &next);
1635
1636 const TNode<BoolT> is_unicode =
1637 FastFlagGetter(regexp, JSRegExp::kUnicode);
1638 const TNode<Number> new_next_search_from =
1639 AdvanceStringIndex(string, next_search_from, is_unicode, true);
1640 var_next_search_from = CAST(new_next_search_from);
1641 Goto(&loop);
1642
1643 BIND(&next);
1644 }
1645
1646 // A valid match was found, add the new substring to the array.
1647 {
1648 const TNode<Smi> from = last_matched_until;
1649 const TNode<Smi> to = match_from;
1650 array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1651 GotoIf(WordEqual(array.length(), int_limit), &out);
1652 }
1653
1654 // Add all captures to the array.
1655 {
1656 const TNode<Smi> num_registers = CAST(LoadFixedArrayElement(
1657 match_indices, RegExpMatchInfo::kNumberOfCapturesIndex));
1658 const TNode<IntPtrT> int_num_registers = SmiUntag(num_registers);
1659
1660 TVARIABLE(IntPtrT, var_reg, IntPtrConstant(2));
1661
1662 Label nested_loop(this, {array.var_array(), array.var_length(),
1663 array.var_capacity(), &var_reg}),
1664 nested_loop_out(this);
1665 Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop,
1666 &nested_loop_out);
1667
1668 BIND(&nested_loop);
1669 {
1670 const TNode<IntPtrT> reg = var_reg.value();
1671 const TNode<Object> from = LoadFixedArrayElement(
1672 match_indices, reg,
1673 RegExpMatchInfo::kFirstCaptureIndex * kTaggedSize);
1674 const TNode<Smi> to = CAST(LoadFixedArrayElement(
1675 match_indices, reg,
1676 (RegExpMatchInfo::kFirstCaptureIndex + 1) * kTaggedSize));
1677
1678 Label select_capture(this), select_undefined(this), store_value(this);
1679 TVARIABLE(Object, var_value);
1680 Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined,
1681 &select_capture);
1682
1683 BIND(&select_capture);
1684 {
1685 var_value =
1686 CallBuiltin(Builtin::kSubString, context, string, from, to);
1687 Goto(&store_value);
1688 }
1689
1690 BIND(&select_undefined);
1691 {
1692 var_value = UndefinedConstant();
1693 Goto(&store_value);
1694 }
1695
1696 BIND(&store_value);
1697 {
1698 array.Push(var_value.value());
1699 GotoIf(WordEqual(array.length(), int_limit), &out);
1700
1701 const TNode<IntPtrT> new_reg = IntPtrAdd(reg, IntPtrConstant(2));
1702 var_reg = new_reg;
1703
1704 Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
1705 &nested_loop_out);
1706 }
1707 }
1708
1709 BIND(&nested_loop_out);
1710 }
1711
1712 var_last_matched_until = match_to;
1713 var_next_search_from = match_to;
1714 Goto(&loop);
1715 }
1716
1717 BIND(&push_suffix_and_out);
1718 {
1719 const TNode<Smi> from = var_last_matched_until.value();
1720 const TNode<Smi> to = string_length;
1721 array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to));
1722 Goto(&out);
1723 }
1724
1725 BIND(&out);
1726 {
1727 var_result = array.ToJSArray(context);
1728 Goto(&done);
1729 }
1730
1731 BIND(&return_empty_array);
1732 {
1733 TNode<Smi> length = SmiZero();
1734 TNode<IntPtrT> capacity = IntPtrZero();
1735 base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt;
1736 var_result =
1737 AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1738 Goto(&done);
1739 }
1740
1741 BIND(&done);
1742 return var_result.value();
1743 }
1744
1745 } // namespace internal
1746 } // namespace v8
1747