1// Copyright 2017 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/builtins/builtins-regexp-gen.h" 6 7#include "src/builtins/builtins-constructor-gen.h" 8#include "src/builtins/builtins-utils-gen.h" 9#include "src/builtins/builtins.h" 10#include "src/builtins/growable-fixed-array-gen.h" 11#include "src/codegen/code-factory.h" 12#include "src/codegen/code-stub-assembler.h" 13#include "src/codegen/macro-assembler.h" 14#include "src/common/globals.h" 15#include "src/execution/protectors.h" 16#include "src/heap/factory-inl.h" 17#include "src/logging/counters.h" 18#include "src/objects/js-regexp-string-iterator.h" 19#include "src/objects/js-regexp.h" 20#include "src/objects/regexp-match-info.h" 21#include "src/regexp/regexp-flags.h" 22 23namespace v8 { 24namespace internal { 25 26// Tail calls the regular expression interpreter. 27// static 28void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) { 29 ExternalReference interpreter_code_entry = 30 ExternalReference::re_match_for_call_from_js(); 31 masm->Jump(interpreter_code_entry); 32} 33 34// Tail calls the experimental regular expression engine. 35// static 36void Builtins::Generate_RegExpExperimentalTrampoline(MacroAssembler* masm) { 37 ExternalReference interpreter_code_entry = 38 ExternalReference::re_experimental_match_for_call_from_js(); 39 masm->Jump(interpreter_code_entry); 40} 41 42TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); } 43 44TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() { 45 return IntPtrConstant(0); 46} 47 48// If code is a builtin, return the address to the (possibly embedded) builtin 49// code entry, otherwise return the entry of the code object itself. 50TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<CodeT> code) { 51 if (V8_EXTERNAL_CODE_SPACE_BOOL) { 52 // When external code space is enabled we can load the entry point directly 53 // from the CodeT object. 54 return GetCodeEntry(code); 55 } 56 57 TVARIABLE(RawPtrT, var_result); 58 59 Label if_code_is_off_heap(this), out(this); 60 TNode<Int32T> builtin_index = 61 LoadObjectField<Int32T>(code, Code::kBuiltinIndexOffset); 62 { 63 GotoIfNot( 64 Word32Equal(builtin_index, 65 Int32Constant(static_cast<int>(Builtin::kNoBuiltinId))), 66 &if_code_is_off_heap); 67 var_result = ReinterpretCast<RawPtrT>( 68 IntPtrAdd(BitcastTaggedToWord(code), 69 IntPtrConstant(Code::kHeaderSize - kHeapObjectTag))); 70 Goto(&out); 71 } 72 73 BIND(&if_code_is_off_heap); 74 { 75 TNode<IntPtrT> builtin_entry_offset_from_isolate_root = 76 IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()), 77 ChangeInt32ToIntPtr(Word32Shl( 78 builtin_index, Int32Constant(kSystemPointerSizeLog2)))); 79 80 var_result = ReinterpretCast<RawPtrT>( 81 Load(MachineType::Pointer(), 82 ExternalConstant(ExternalReference::isolate_root(isolate())), 83 builtin_entry_offset_from_isolate_root)); 84 Goto(&out); 85 } 86 87 BIND(&out); 88 return var_result.value(); 89} 90 91// ----------------------------------------------------------------------------- 92// ES6 section 21.2 RegExp Objects 93 94TNode<JSRegExpResult> RegExpBuiltinsAssembler::AllocateRegExpResult( 95 TNode<Context> context, TNode<Smi> length, TNode<Smi> index, 96 TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index, 97 TNode<BoolT> has_indices, TNode<FixedArray>* elements_out) { 98 CSA_DCHECK(this, SmiLessThanOrEqual( 99 length, SmiConstant(JSArray::kMaxFastArrayLength))); 100 CSA_DCHECK(this, SmiGreaterThan(length, SmiConstant(0))); 101 102 // Allocate. 103 104 Label result_has_indices(this), allocated(this); 105 const ElementsKind elements_kind = PACKED_ELEMENTS; 106 base::Optional<TNode<AllocationSite>> no_gc_site = base::nullopt; 107 TNode<IntPtrT> length_intptr = SmiUntag(length); 108 // Note: The returned `var_elements` may be in young large object space, but 109 // `var_array` is guaranteed to be in new space so we could skip write 110 // barriers below. 111 TVARIABLE(JSArray, var_array); 112 TVARIABLE(FixedArrayBase, var_elements); 113 114 GotoIf(has_indices, &result_has_indices); 115 { 116 TNode<Map> map = CAST(LoadContextElement(LoadNativeContext(context), 117 Context::REGEXP_RESULT_MAP_INDEX)); 118 std::tie(var_array, var_elements) = 119 AllocateUninitializedJSArrayWithElements( 120 elements_kind, map, length, no_gc_site, length_intptr, 121 AllocationFlag::kAllowLargeObjectAllocation, JSRegExpResult::kSize); 122 Goto(&allocated); 123 } 124 125 BIND(&result_has_indices); 126 { 127 TNode<Map> map = 128 CAST(LoadContextElement(LoadNativeContext(context), 129 Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX)); 130 std::tie(var_array, var_elements) = 131 AllocateUninitializedJSArrayWithElements( 132 elements_kind, map, length, no_gc_site, length_intptr, 133 AllocationFlag::kAllowLargeObjectAllocation, 134 JSRegExpResultWithIndices::kSize); 135 Goto(&allocated); 136 } 137 138 BIND(&allocated); 139 140 // Finish result initialization. 141 142 TNode<JSRegExpResult> result = 143 UncheckedCast<JSRegExpResult>(var_array.value()); 144 145 // Load undefined value once here to avoid multiple LoadRoots. 146 TNode<Oddball> undefined_value = UncheckedCast<Oddball>( 147 CodeAssembler::LoadRoot(RootIndex::kUndefinedValue)); 148 149 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index); 150 // TODO(jgruber,turbofan): Could skip barrier but the MemoryOptimizer 151 // complains. 152 StoreObjectField(result, JSRegExpResult::kInputOffset, input); 153 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset, 154 undefined_value); 155 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kNamesOffset, 156 undefined_value); 157 158 StoreObjectField(result, JSRegExpResult::kRegexpInputOffset, input); 159 160 // If non-smi last_index then store an SmiZero instead. 161 { 162 TNode<Smi> last_index_smi = Select<Smi>( 163 TaggedIsSmi(last_index), [=] { return CAST(last_index); }, 164 [=] { return SmiZero(); }); 165 StoreObjectField(result, JSRegExpResult::kRegexpLastIndexOffset, 166 last_index_smi); 167 } 168 169 Label finish_initialization(this); 170 GotoIfNot(has_indices, &finish_initialization); 171 { 172 static_assert( 173 std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value, 174 "JSRegExpResultWithIndices is a subclass of JSRegExpResult"); 175 StoreObjectFieldNoWriteBarrier( 176 result, JSRegExpResultWithIndices::kIndicesOffset, undefined_value); 177 Goto(&finish_initialization); 178 } 179 180 BIND(&finish_initialization); 181 182 // Finish elements initialization. 183 184 FillFixedArrayWithValue(elements_kind, var_elements.value(), IntPtrZero(), 185 length_intptr, RootIndex::kUndefinedValue); 186 187 if (elements_out) *elements_out = CAST(var_elements.value()); 188 return result; 189} 190 191TNode<Object> RegExpBuiltinsAssembler::FastLoadLastIndexBeforeSmiCheck( 192 TNode<JSRegExp> regexp) { 193 // Load the in-object field. 194 static const int field_offset = 195 JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize; 196 return LoadObjectField(regexp, field_offset); 197} 198 199TNode<Object> RegExpBuiltinsAssembler::SlowLoadLastIndex(TNode<Context> context, 200 TNode<Object> regexp) { 201 return GetProperty(context, regexp, isolate()->factory()->lastIndex_string()); 202} 203 204// The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified 205// JSRegExp instance. 206void RegExpBuiltinsAssembler::FastStoreLastIndex(TNode<JSRegExp> regexp, 207 TNode<Smi> value) { 208 // Store the in-object field. 209 static const int field_offset = 210 JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize; 211 StoreObjectField(regexp, field_offset, value); 212} 213 214void RegExpBuiltinsAssembler::SlowStoreLastIndex(TNode<Context> context, 215 TNode<Object> regexp, 216 TNode<Object> value) { 217 TNode<String> name = HeapConstant(isolate()->factory()->lastIndex_string()); 218 SetPropertyStrict(context, regexp, name, value); 219} 220 221TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo( 222 TNode<Context> context, TNode<JSRegExp> regexp, 223 TNode<RegExpMatchInfo> match_info, TNode<String> string, 224 TNode<Number> last_index) { 225 Label named_captures(this), maybe_build_indices(this), out(this); 226 227 TNode<IntPtrT> num_indices = SmiUntag(CAST(UnsafeLoadFixedArrayElement( 228 match_info, RegExpMatchInfo::kNumberOfCapturesIndex))); 229 TNode<Smi> num_results = SmiTag(WordShr(num_indices, 1)); 230 TNode<Smi> start = CAST(UnsafeLoadFixedArrayElement( 231 match_info, RegExpMatchInfo::kFirstCaptureIndex)); 232 TNode<Smi> end = CAST(UnsafeLoadFixedArrayElement( 233 match_info, RegExpMatchInfo::kFirstCaptureIndex + 1)); 234 235 // Calculate the substring of the first match before creating the result array 236 // to avoid an unnecessary write barrier storing the first result. 237 238 TNode<String> first = 239 CAST(CallBuiltin(Builtin::kSubString, context, string, start, end)); 240 241 // Load flags and check if the result object needs to have indices. 242 const TNode<Smi> flags = 243 CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset)); 244 const TNode<BoolT> has_indices = IsSetSmi(flags, JSRegExp::kHasIndices); 245 TNode<FixedArray> result_elements; 246 TNode<JSRegExpResult> result = 247 AllocateRegExpResult(context, num_results, start, string, regexp, 248 last_index, has_indices, &result_elements); 249 250 UnsafeStoreFixedArrayElement(result_elements, 0, first); 251 252 // If no captures exist we can skip named capture handling as well. 253 GotoIf(SmiEqual(num_results, SmiConstant(1)), &maybe_build_indices); 254 255 // Store all remaining captures. 256 TNode<IntPtrT> limit = IntPtrAdd( 257 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices); 258 259 TVARIABLE(IntPtrT, var_from_cursor, 260 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2)); 261 TVARIABLE(IntPtrT, var_to_cursor, IntPtrConstant(1)); 262 263 Label loop(this, {&var_from_cursor, &var_to_cursor}); 264 265 Goto(&loop); 266 BIND(&loop); 267 { 268 TNode<IntPtrT> from_cursor = var_from_cursor.value(); 269 TNode<IntPtrT> to_cursor = var_to_cursor.value(); 270 TNode<Smi> start_cursor = 271 CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor)); 272 273 Label next_iter(this); 274 GotoIf(SmiEqual(start_cursor, SmiConstant(-1)), &next_iter); 275 276 TNode<IntPtrT> from_cursor_plus1 = 277 IntPtrAdd(from_cursor, IntPtrConstant(1)); 278 TNode<Smi> end_cursor = 279 CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor_plus1)); 280 281 TNode<String> capture = CAST(CallBuiltin(Builtin::kSubString, context, 282 string, start_cursor, end_cursor)); 283 UnsafeStoreFixedArrayElement(result_elements, to_cursor, capture); 284 Goto(&next_iter); 285 286 BIND(&next_iter); 287 var_from_cursor = IntPtrAdd(from_cursor, IntPtrConstant(2)); 288 var_to_cursor = IntPtrAdd(to_cursor, IntPtrConstant(1)); 289 Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop, 290 &named_captures); 291 } 292 293 BIND(&named_captures); 294 { 295 CSA_DCHECK(this, SmiGreaterThan(num_results, SmiConstant(1))); 296 297 // Preparations for named capture properties. Exit early if the result does 298 // not have any named captures to minimize performance impact. 299 300 TNode<FixedArray> data = 301 CAST(LoadObjectField(regexp, JSRegExp::kDataOffset)); 302 303 // We reach this point only if captures exist, implying that the assigned 304 // regexp engine must be able to handle captures. 305 CSA_DCHECK( 306 this, 307 Word32Or( 308 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)), 309 SmiConstant(JSRegExp::IRREGEXP)), 310 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)), 311 SmiConstant(JSRegExp::EXPERIMENTAL)))); 312 313 // The names fixed array associates names at even indices with a capture 314 // index at odd indices. 315 TNode<Object> maybe_names = 316 LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex); 317 GotoIf(TaggedEqual(maybe_names, SmiZero()), &maybe_build_indices); 318 319 // One or more named captures exist, add a property for each one. 320 321 TNode<FixedArray> names = CAST(maybe_names); 322 TNode<IntPtrT> names_length = LoadAndUntagFixedArrayBaseLength(names); 323 CSA_DCHECK(this, IntPtrGreaterThan(names_length, IntPtrZero())); 324 325 // Stash names in case we need them to build the indices array later. 326 StoreObjectField(result, JSRegExpResult::kNamesOffset, names); 327 328 // Allocate a new object to store the named capture properties. 329 // TODO(jgruber): Could be optimized by adding the object map to the heap 330 // root list. 331 332 TNode<IntPtrT> num_properties = WordSar(names_length, 1); 333 TNode<NativeContext> native_context = LoadNativeContext(context); 334 TNode<Map> map = LoadSlowObjectWithNullPrototypeMap(native_context); 335 TNode<HeapObject> properties; 336 if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) { 337 properties = AllocateSwissNameDictionary(num_properties); 338 } else { 339 properties = AllocateNameDictionary( 340 num_properties, AllocationFlag::kAllowLargeObjectAllocation); 341 } 342 343 TNode<JSObject> group_object = AllocateJSObjectFromMap(map, properties); 344 StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object); 345 346 TVARIABLE(IntPtrT, var_i, IntPtrZero()); 347 348 Label inner_loop(this, &var_i); 349 350 Goto(&inner_loop); 351 BIND(&inner_loop); 352 { 353 TNode<IntPtrT> i = var_i.value(); 354 TNode<IntPtrT> i_plus_1 = IntPtrAdd(i, IntPtrConstant(1)); 355 TNode<IntPtrT> i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1)); 356 357 TNode<String> name = CAST(LoadFixedArrayElement(names, i)); 358 TNode<Smi> index = CAST(LoadFixedArrayElement(names, i_plus_1)); 359 TNode<HeapObject> capture = 360 CAST(LoadFixedArrayElement(result_elements, SmiUntag(index))); 361 362 // TODO(v8:8213): For maintainability, we should call a CSA/Torque 363 // implementation of CreateDataProperty instead. 364 365 // At this point the spec says to call CreateDataProperty. However, we can 366 // skip most of the steps and go straight to adding a dictionary entry 367 // because we know a bunch of useful facts: 368 // - All keys are non-numeric internalized strings 369 // - No keys repeat 370 // - Receiver has no prototype 371 // - Receiver isn't used as a prototype 372 // - Receiver isn't any special object like a Promise intrinsic object 373 // - Receiver is extensible 374 // - Receiver has no interceptors 375 Label add_dictionary_property_slow(this, Label::kDeferred); 376 Add<PropertyDictionary>(CAST(properties), name, capture, 377 &add_dictionary_property_slow); 378 379 var_i = i_plus_2; 380 Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length), 381 &maybe_build_indices, &inner_loop); 382 383 BIND(&add_dictionary_property_slow); 384 // If the dictionary needs resizing, the above Add call will jump here 385 // before making any changes. This shouldn't happen because we allocated 386 // the dictionary with enough space above. 387 Unreachable(); 388 } 389 } 390 391 // Build indices if needed (i.e. if the /d flag is present) after named 392 // capture groups are processed. 393 BIND(&maybe_build_indices); 394 GotoIfNot(has_indices, &out); 395 { 396 const TNode<Object> maybe_names = 397 LoadObjectField(result, JSRegExpResultWithIndices::kNamesOffset); 398 const TNode<JSRegExpResultIndices> indices = 399 UncheckedCast<JSRegExpResultIndices>( 400 CallRuntime(Runtime::kRegExpBuildIndices, context, regexp, 401 match_info, maybe_names)); 402 StoreObjectField(result, JSRegExpResultWithIndices::kIndicesOffset, 403 indices); 404 Goto(&out); 405 } 406 407 BIND(&out); 408 return result; 409} 410 411void RegExpBuiltinsAssembler::GetStringPointers( 412 TNode<RawPtrT> string_data, TNode<IntPtrT> offset, 413 TNode<IntPtrT> last_index, TNode<IntPtrT> string_length, 414 String::Encoding encoding, TVariable<RawPtrT>* var_string_start, 415 TVariable<RawPtrT>* var_string_end) { 416 DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation()); 417 DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation()); 418 419 const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING) 420 ? UINT8_ELEMENTS 421 : UINT16_ELEMENTS; 422 423 TNode<IntPtrT> from_offset = 424 ElementOffsetFromIndex(IntPtrAdd(offset, last_index), kind); 425 *var_string_start = 426 ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, from_offset)); 427 428 TNode<IntPtrT> to_offset = 429 ElementOffsetFromIndex(IntPtrAdd(offset, string_length), kind); 430 *var_string_end = ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, to_offset)); 431} 432 433TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( 434 TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, 435 TNode<Number> last_index, TNode<RegExpMatchInfo> match_info, 436 RegExp::ExecQuirks exec_quirks) { 437 ToDirectStringAssembler to_direct(state(), string); 438 439 TVARIABLE(HeapObject, var_result); 440 Label out(this), atom(this), runtime(this, Label::kDeferred), 441 retry_experimental(this, Label::kDeferred); 442 443 // External constants. 444 TNode<ExternalReference> isolate_address = 445 ExternalConstant(ExternalReference::isolate_address(isolate())); 446 TNode<ExternalReference> static_offsets_vector_address = ExternalConstant( 447 ExternalReference::address_of_static_offsets_vector(isolate())); 448 449 // At this point, last_index is definitely a canonicalized non-negative 450 // number, which implies that any non-Smi last_index is greater than 451 // the maximal string length. If lastIndex > string.length then the matcher 452 // must fail. 453 454 Label if_failure(this); 455 456 CSA_DCHECK(this, IsNumberNormalized(last_index)); 457 CSA_DCHECK(this, IsNumberPositive(last_index)); 458 GotoIf(TaggedIsNotSmi(last_index), &if_failure); 459 460 TNode<IntPtrT> int_string_length = LoadStringLengthAsWord(string); 461 TNode<IntPtrT> int_last_index = SmiUntag(CAST(last_index)); 462 463 GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure); 464 465 // Since the RegExp has been compiled, data contains a fixed array. 466 TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset)); 467 { 468 // Dispatch on the type of the RegExp. 469 { 470 Label next(this), unreachable(this, Label::kDeferred); 471 TNode<Int32T> tag = LoadAndUntagToWord32FixedArrayElement( 472 data, IntPtrConstant(JSRegExp::kTagIndex)); 473 474 int32_t values[] = { 475 JSRegExp::IRREGEXP, 476 JSRegExp::ATOM, 477 JSRegExp::EXPERIMENTAL, 478 }; 479 Label* labels[] = {&next, &atom, &next}; 480 481 STATIC_ASSERT(arraysize(values) == arraysize(labels)); 482 Switch(tag, &unreachable, values, labels, arraysize(values)); 483 484 BIND(&unreachable); 485 Unreachable(); 486 487 BIND(&next); 488 } 489 490 // Check (number_of_captures + 1) * 2 <= offsets vector size 491 // Or number_of_captures <= offsets vector size / 2 - 1 492 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement( 493 data, JSRegExp::kIrregexpCaptureCountIndex)); 494 495 const int kOffsetsSize = Isolate::kJSRegexpStaticOffsetsVectorSize; 496 STATIC_ASSERT(kOffsetsSize >= 2); 497 GotoIf(SmiAbove(capture_count, SmiConstant(kOffsetsSize / 2 - 1)), 498 &runtime); 499 } 500 501 // Unpack the string if possible. 502 503 to_direct.TryToDirect(&runtime); 504 505 // Load the irregexp code or bytecode object and offsets into the subject 506 // string. Both depend on whether the string is one- or two-byte. 507 508 TVARIABLE(RawPtrT, var_string_start); 509 TVARIABLE(RawPtrT, var_string_end); 510 TVARIABLE(Object, var_code); 511 TVARIABLE(Object, var_bytecode); 512 513 { 514 TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime); 515 516 Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred); 517 Branch(IsOneByteStringInstanceType(to_direct.instance_type()), 518 &if_isonebyte, &if_istwobyte); 519 520 BIND(&if_isonebyte); 521 { 522 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index, 523 int_string_length, String::ONE_BYTE_ENCODING, 524 &var_string_start, &var_string_end); 525 var_code = 526 UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex); 527 var_bytecode = UnsafeLoadFixedArrayElement( 528 data, JSRegExp::kIrregexpLatin1BytecodeIndex); 529 Goto(&next); 530 } 531 532 BIND(&if_istwobyte); 533 { 534 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index, 535 int_string_length, String::TWO_BYTE_ENCODING, 536 &var_string_start, &var_string_end); 537 var_code = 538 UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex); 539 var_bytecode = UnsafeLoadFixedArrayElement( 540 data, JSRegExp::kIrregexpUC16BytecodeIndex); 541 Goto(&next); 542 } 543 544 BIND(&next); 545 } 546 547 // Check that the irregexp code has been generated for the actual string 548 // encoding. If it has, the field contains a code object; and otherwise it 549 // contains the uninitialized sentinel as a smi. 550#ifdef DEBUG 551 { 552 Label next(this); 553 GotoIfNot(TaggedIsSmi(var_code.value()), &next); 554 CSA_DCHECK(this, SmiEqual(CAST(var_code.value()), 555 SmiConstant(JSRegExp::kUninitializedValue))); 556 Goto(&next); 557 BIND(&next); 558 } 559#endif 560 561 GotoIf(TaggedIsSmi(var_code.value()), &runtime); 562 TNode<CodeT> code = CAST(var_code.value()); 563 564 Label if_success(this), if_exception(this, Label::kDeferred); 565 { 566 IncrementCounter(isolate()->counters()->regexp_entry_native(), 1); 567 568 // Set up args for the final call into generated Irregexp code. 569 570 MachineType type_int32 = MachineType::Int32(); 571 MachineType type_tagged = MachineType::AnyTagged(); 572 MachineType type_ptr = MachineType::Pointer(); 573 574 // Result: A NativeRegExpMacroAssembler::Result return code. 575 MachineType retval_type = type_int32; 576 577 // Argument 0: Original subject string. 578 MachineType arg0_type = type_tagged; 579 TNode<String> arg0 = string; 580 581 // Argument 1: Previous index. 582 MachineType arg1_type = type_int32; 583 TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index); 584 585 // Argument 2: Start of string data. This argument is ignored in the 586 // interpreter. 587 MachineType arg2_type = type_ptr; 588 TNode<RawPtrT> arg2 = var_string_start.value(); 589 590 // Argument 3: End of string data. This argument is ignored in the 591 // interpreter. 592 MachineType arg3_type = type_ptr; 593 TNode<RawPtrT> arg3 = var_string_end.value(); 594 595 // Argument 4: static offsets vector buffer. 596 MachineType arg4_type = type_ptr; 597 TNode<ExternalReference> arg4 = static_offsets_vector_address; 598 599 // Argument 5: Number of capture registers. 600 // Setting this to the number of registers required to store all captures 601 // forces global regexps to behave as non-global. 602 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement( 603 data, JSRegExp::kIrregexpCaptureCountIndex)); 604 // capture_count is the number of captures without the match itself. 605 // Required registers = (capture_count + 1) * 2. 606 STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) * 2)); 607 TNode<Smi> register_count = 608 SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1); 609 610 MachineType arg5_type = type_int32; 611 TNode<Int32T> arg5 = SmiToInt32(register_count); 612 613 // Argument 6: Indicate that this is a direct call from JavaScript. 614 MachineType arg6_type = type_int32; 615 TNode<Int32T> arg6 = Int32Constant(RegExp::CallOrigin::kFromJs); 616 617 // Argument 7: Pass current isolate address. 618 MachineType arg7_type = type_ptr; 619 TNode<ExternalReference> arg7 = isolate_address; 620 621 // Argument 8: Regular expression object. This argument is ignored in native 622 // irregexp code. 623 MachineType arg8_type = type_tagged; 624 TNode<JSRegExp> arg8 = regexp; 625 626 TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code); 627 628 // AIX uses function descriptors on CFunction calls. code_entry in this case 629 // may also point to a Regex interpreter entry trampoline which does not 630 // have a function descriptor. This method is ineffective on other platforms 631 // and is equivalent to CallCFunction. 632 TNode<Int32T> result = 633 UncheckedCast<Int32T>(CallCFunctionWithoutFunctionDescriptor( 634 code_entry, retval_type, std::make_pair(arg0_type, arg0), 635 std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2), 636 std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4), 637 std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6), 638 std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8))); 639 640 // Check the result. 641 // We expect exactly one result since we force the called regexp to behave 642 // as non-global. 643 TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result); 644 GotoIf( 645 IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)), 646 &if_success); 647 GotoIf( 648 IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)), 649 &if_failure); 650 GotoIf(IntPtrEqual(int_result, 651 IntPtrConstant(RegExp::kInternalRegExpException)), 652 &if_exception); 653 GotoIf(IntPtrEqual( 654 int_result, 655 IntPtrConstant(RegExp::kInternalRegExpFallbackToExperimental)), 656 &retry_experimental); 657 658 CSA_DCHECK(this, IntPtrEqual(int_result, 659 IntPtrConstant(RegExp::kInternalRegExpRetry))); 660 Goto(&runtime); 661 } 662 663 BIND(&if_success); 664 { 665 if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) { 666 static constexpr int kMatchStartOffset = 0; 667 TNode<IntPtrT> value = ChangeInt32ToIntPtr(UncheckedCast<Int32T>( 668 Load(MachineType::Int32(), static_offsets_vector_address, 669 IntPtrConstant(kMatchStartOffset)))); 670 GotoIf(UintPtrGreaterThanOrEqual(value, int_string_length), &if_failure); 671 } 672 673 // Check that the last match info has space for the capture registers and 674 // the additional information. Ensure no overflow in add. 675 STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset); 676 TNode<Smi> available_slots = 677 SmiSub(LoadFixedArrayBaseLength(match_info), 678 SmiConstant(RegExpMatchInfo::kLastMatchOverhead)); 679 TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement( 680 data, JSRegExp::kIrregexpCaptureCountIndex)); 681 // Calculate number of register_count = (capture_count + 1) * 2. 682 TNode<Smi> register_count = 683 SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1); 684 GotoIf(SmiGreaterThan(register_count, available_slots), &runtime); 685 686 // Fill match_info. 687 UnsafeStoreFixedArrayElement( 688 match_info, RegExpMatchInfo::kNumberOfCapturesIndex, register_count); 689 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex, 690 string); 691 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex, 692 string); 693 694 // Fill match and capture offsets in match_info. 695 { 696 TNode<IntPtrT> limit_offset = 697 ElementOffsetFromIndex(register_count, INT32_ELEMENTS, 0); 698 699 TNode<IntPtrT> to_offset = ElementOffsetFromIndex( 700 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS, 701 RegExpMatchInfo::kHeaderSize - kHeapObjectTag); 702 TVARIABLE(IntPtrT, var_to_offset, to_offset); 703 704 VariableList vars({&var_to_offset}, zone()); 705 BuildFastLoop<IntPtrT>( 706 vars, IntPtrZero(), limit_offset, 707 [&](TNode<IntPtrT> offset) { 708 TNode<Int32T> value = UncheckedCast<Int32T>(Load( 709 MachineType::Int32(), static_offsets_vector_address, offset)); 710 TNode<Smi> smi_value = SmiFromInt32(value); 711 StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info, 712 var_to_offset.value(), smi_value); 713 Increment(&var_to_offset, kTaggedSize); 714 }, 715 kInt32Size, IndexAdvanceMode::kPost); 716 } 717 718 var_result = match_info; 719 Goto(&out); 720 } 721 722 BIND(&if_failure); 723 { 724 var_result = NullConstant(); 725 Goto(&out); 726 } 727 728 BIND(&if_exception); 729 { 730// A stack overflow was detected in RegExp code. 731#ifdef DEBUG 732 TNode<ExternalReference> pending_exception_address = 733 ExternalConstant(ExternalReference::Create( 734 IsolateAddressId::kPendingExceptionAddress, isolate())); 735 CSA_DCHECK(this, IsTheHole(Load<Object>(pending_exception_address))); 736#endif // DEBUG 737 CallRuntime(Runtime::kThrowStackOverflow, context); 738 Unreachable(); 739 } 740 741 BIND(&retry_experimental); 742 { 743 auto target_fn = 744 exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure 745 ? Runtime::kRegExpExperimentalOneshotExecTreatMatchAtEndAsFailure 746 : Runtime::kRegExpExperimentalOneshotExec; 747 var_result = CAST(CallRuntime(target_fn, context, regexp, string, 748 last_index, match_info)); 749 Goto(&out); 750 } 751 752 BIND(&runtime); 753 { 754 auto target_fn = 755 exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure 756 ? Runtime::kRegExpExecTreatMatchAtEndAsFailure 757 : Runtime::kRegExpExec; 758 var_result = CAST(CallRuntime(target_fn, context, regexp, string, 759 last_index, match_info)); 760 Goto(&out); 761 } 762 763 BIND(&atom); 764 { 765 // TODO(jgruber): A call with 4 args stresses register allocation, this 766 // should probably just be inlined. 767 var_result = CAST(CallBuiltin(Builtin::kRegExpExecAtom, context, regexp, 768 string, last_index, match_info)); 769 Goto(&out); 770 } 771 772 BIND(&out); 773 return var_result.value(); 774} 775 776TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype( 777 TNode<Context> context, TNode<Object> object, TNode<Map> map) { 778 Label out(this); 779 TVARIABLE(BoolT, var_result); 780 781#ifdef V8_ENABLE_FORCE_SLOW_PATH 782 var_result = Int32FalseConstant(); 783 GotoIfForceSlowPath(&out); 784#endif 785 786 const TNode<NativeContext> native_context = LoadNativeContext(context); 787 const TNode<HeapObject> regexp_fun = 788 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX)); 789 const TNode<Object> initial_map = 790 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset); 791 const TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map); 792 793 var_result = has_initialmap; 794 GotoIfNot(has_initialmap, &out); 795 796 // The smi check is required to omit ToLength(lastIndex) calls with possible 797 // user-code execution on the fast path. 798 TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object)); 799 var_result = TaggedIsPositiveSmi(last_index); 800 Goto(&out); 801 802 BIND(&out); 803 return var_result.value(); 804} 805 806TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype( 807 TNode<Context> context, TNode<Object> object) { 808 CSA_DCHECK(this, TaggedIsNotSmi(object)); 809 return IsFastRegExpNoPrototype(context, object, LoadMap(CAST(object))); 810} 811 812void RegExpBuiltinsAssembler::BranchIfFastRegExp( 813 TNode<Context> context, TNode<HeapObject> object, TNode<Map> map, 814 PrototypeCheckAssembler::Flags prototype_check_flags, 815 base::Optional<DescriptorIndexNameValue> additional_property_to_check, 816 Label* if_isunmodified, Label* if_ismodified) { 817 CSA_DCHECK(this, TaggedEqual(LoadMap(object), map)); 818 819 GotoIfForceSlowPath(if_ismodified); 820 821 // This should only be needed for String.p.(split||matchAll), but we are 822 // conservative here. 823 GotoIf(IsRegExpSpeciesProtectorCellInvalid(), if_ismodified); 824 825 TNode<NativeContext> native_context = LoadNativeContext(context); 826 TNode<JSFunction> regexp_fun = 827 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX)); 828 TNode<Map> initial_map = CAST( 829 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset)); 830 TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map); 831 832 GotoIfNot(has_initialmap, if_ismodified); 833 834 // The smi check is required to omit ToLength(lastIndex) calls with possible 835 // user-code execution on the fast path. 836 TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object)); 837 GotoIfNot(TaggedIsPositiveSmi(last_index), if_ismodified); 838 839 // Verify the prototype. 840 841 TNode<Map> initial_proto_initial_map = CAST( 842 LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX)); 843 844 DescriptorIndexNameValue properties_to_check[2]; 845 int property_count = 0; 846 properties_to_check[property_count++] = DescriptorIndexNameValue{ 847 JSRegExp::kExecFunctionDescriptorIndex, RootIndex::kexec_string, 848 Context::REGEXP_EXEC_FUNCTION_INDEX}; 849 if (additional_property_to_check) { 850 properties_to_check[property_count++] = *additional_property_to_check; 851 } 852 853 PrototypeCheckAssembler prototype_check_assembler( 854 state(), prototype_check_flags, native_context, initial_proto_initial_map, 855 base::Vector<DescriptorIndexNameValue>(properties_to_check, 856 property_count)); 857 858 TNode<HeapObject> prototype = LoadMapPrototype(map); 859 prototype_check_assembler.CheckAndBranch(prototype, if_isunmodified, 860 if_ismodified); 861} 862void RegExpBuiltinsAssembler::BranchIfFastRegExpForSearch( 863 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified, 864 Label* if_ismodified) { 865 BranchIfFastRegExp( 866 context, object, LoadMap(object), 867 PrototypeCheckAssembler::kCheckPrototypePropertyConstness, 868 DescriptorIndexNameValue{JSRegExp::kSymbolSearchFunctionDescriptorIndex, 869 RootIndex::ksearch_symbol, 870 Context::REGEXP_SEARCH_FUNCTION_INDEX}, 871 if_isunmodified, if_ismodified); 872} 873 874void RegExpBuiltinsAssembler::BranchIfFastRegExpForMatch( 875 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified, 876 Label* if_ismodified) { 877 BranchIfFastRegExp( 878 context, object, LoadMap(object), 879 PrototypeCheckAssembler::kCheckPrototypePropertyConstness, 880 DescriptorIndexNameValue{JSRegExp::kSymbolMatchFunctionDescriptorIndex, 881 RootIndex::kmatch_symbol, 882 Context::REGEXP_MATCH_FUNCTION_INDEX}, 883 if_isunmodified, if_ismodified); 884} 885 886void RegExpBuiltinsAssembler::BranchIfFastRegExp_Strict( 887 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified, 888 Label* if_ismodified) { 889 BranchIfFastRegExp(context, object, LoadMap(object), 890 PrototypeCheckAssembler::kCheckPrototypePropertyConstness, 891 base::nullopt, if_isunmodified, if_ismodified); 892} 893 894void RegExpBuiltinsAssembler::BranchIfFastRegExp_Permissive( 895 TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified, 896 Label* if_ismodified) { 897 BranchIfFastRegExp(context, object, LoadMap(object), 898 PrototypeCheckAssembler::kCheckFull, base::nullopt, 899 if_isunmodified, if_ismodified); 900} 901 902void RegExpBuiltinsAssembler::BranchIfRegExpResult(const TNode<Context> context, 903 const TNode<Object> object, 904 Label* if_isunmodified, 905 Label* if_ismodified) { 906 // Could be a Smi. 907 const TNode<Map> map = LoadReceiverMap(object); 908 909 const TNode<NativeContext> native_context = LoadNativeContext(context); 910 const TNode<Object> initial_regexp_result_map = 911 LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX); 912 913 Label maybe_result_with_indices(this); 914 Branch(TaggedEqual(map, initial_regexp_result_map), if_isunmodified, 915 &maybe_result_with_indices); 916 BIND(&maybe_result_with_indices); 917 { 918 static_assert( 919 std::is_base_of<JSRegExpResult, JSRegExpResultWithIndices>::value, 920 "JSRegExpResultWithIndices is a subclass of JSRegExpResult"); 921 const TNode<Object> initial_regexp_result_with_indices_map = 922 LoadContextElement(native_context, 923 Context::REGEXP_RESULT_WITH_INDICES_MAP_INDEX); 924 Branch(TaggedEqual(map, initial_regexp_result_with_indices_map), 925 if_isunmodified, if_ismodified); 926 } 927} 928 929// Fast path stub for ATOM regexps. String matching is done by StringIndexOf, 930// and {match_info} is updated on success. 931// The slow path is implemented in RegExp::AtomExec. 932TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) { 933 auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp); 934 auto subject_string = Parameter<String>(Descriptor::kString); 935 auto last_index = Parameter<Smi>(Descriptor::kLastIndex); 936 auto match_info = Parameter<FixedArray>(Descriptor::kMatchInfo); 937 auto context = Parameter<Context>(Descriptor::kContext); 938 939 CSA_DCHECK(this, TaggedIsPositiveSmi(last_index)); 940 941 TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset)); 942 CSA_DCHECK( 943 this, 944 SmiEqual(CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kTagIndex)), 945 SmiConstant(JSRegExp::ATOM))); 946 947 // Callers ensure that last_index is in-bounds. 948 CSA_DCHECK(this, 949 UintPtrLessThanOrEqual(SmiUntag(last_index), 950 LoadStringLengthAsWord(subject_string))); 951 952 const TNode<String> needle_string = 953 CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex)); 954 955 // ATOM patterns are guaranteed to not be the empty string (these are 956 // intercepted and replaced in JSRegExp::Initialize. 957 // 958 // This is especially relevant for crbug.com/1075514: atom patterns are 959 // non-empty and thus guaranteed not to match at the end of the string. 960 CSA_DCHECK(this, IntPtrGreaterThan(LoadStringLengthAsWord(needle_string), 961 IntPtrConstant(0))); 962 963 const TNode<Smi> match_from = 964 CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string, 965 needle_string, last_index)); 966 967 Label if_failure(this), if_success(this); 968 Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success); 969 970 BIND(&if_success); 971 { 972 CSA_DCHECK(this, TaggedIsPositiveSmi(match_from)); 973 CSA_DCHECK(this, UintPtrLessThan(SmiUntag(match_from), 974 LoadStringLengthAsWord(subject_string))); 975 976 const int kNumRegisters = 2; 977 STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters); 978 979 const TNode<Smi> match_to = 980 SmiAdd(match_from, LoadStringLengthAsSmi(needle_string)); 981 982 UnsafeStoreFixedArrayElement(match_info, 983 RegExpMatchInfo::kNumberOfCapturesIndex, 984 SmiConstant(kNumRegisters)); 985 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex, 986 subject_string); 987 UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex, 988 subject_string); 989 UnsafeStoreFixedArrayElement( 990 match_info, RegExpMatchInfo::kFirstCaptureIndex, match_from); 991 UnsafeStoreFixedArrayElement( 992 match_info, RegExpMatchInfo::kFirstCaptureIndex + 1, match_to); 993 994 Return(match_info); 995 } 996 997 BIND(&if_failure); 998 Return(NullConstant()); 999} 1000 1001TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) { 1002 auto regexp = Parameter<JSRegExp>(Descriptor::kRegExp); 1003 auto string = Parameter<String>(Descriptor::kString); 1004 auto last_index = Parameter<Number>(Descriptor::kLastIndex); 1005 auto match_info = Parameter<RegExpMatchInfo>(Descriptor::kMatchInfo); 1006 auto context = Parameter<Context>(Descriptor::kContext); 1007 1008 CSA_DCHECK(this, IsNumberNormalized(last_index)); 1009 CSA_DCHECK(this, IsNumberPositive(last_index)); 1010 1011 Return(RegExpExecInternal(context, regexp, string, last_index, match_info)); 1012} 1013 1014TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, 1015 TNode<Object> regexp, 1016 bool is_fastpath) { 1017 TVARIABLE(String, result); 1018 Label runtime(this, Label::kDeferred), done(this, &result); 1019 if (is_fastpath) { 1020 GotoIfForceSlowPath(&runtime); 1021 } 1022 1023 Isolate* isolate = this->isolate(); 1024 1025 const TNode<IntPtrT> int_one = IntPtrConstant(1); 1026 TVARIABLE(Uint32T, var_length, Uint32Constant(0)); 1027 TVARIABLE(IntPtrT, var_flags); 1028 1029 // First, count the number of characters we will need and check which flags 1030 // are set. 1031 1032 if (is_fastpath) { 1033 // Refer to JSRegExp's flag property on the fast-path. 1034 CSA_DCHECK(this, IsJSRegExp(CAST(regexp))); 1035 const TNode<Smi> flags_smi = 1036 CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset)); 1037 var_flags = SmiUntag(flags_smi); 1038 1039#define CASE_FOR_FLAG(Lower, Camel, ...) \ 1040 do { \ 1041 Label next(this); \ 1042 GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \ 1043 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \ 1044 Goto(&next); \ 1045 BIND(&next); \ 1046 } while (false); 1047 1048 REGEXP_FLAG_LIST(CASE_FOR_FLAG) 1049#undef CASE_FOR_FLAG 1050 } else { 1051 DCHECK(!is_fastpath); 1052 1053 // Fall back to GetProperty stub on the slow-path. 1054 var_flags = IntPtrZero(); 1055 1056#define CASE_FOR_FLAG(NAME, FLAG) \ 1057 do { \ 1058 Label next(this); \ 1059 const TNode<Object> flag = GetProperty( \ 1060 context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \ 1061 Label if_isflagset(this); \ 1062 BranchIfToBooleanIsTrue(flag, &if_isflagset, &next); \ 1063 BIND(&if_isflagset); \ 1064 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \ 1065 var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG))); \ 1066 Goto(&next); \ 1067 BIND(&next); \ 1068 } while (false) 1069 1070 CASE_FOR_FLAG("global", JSRegExp::kGlobal); 1071 CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase); 1072 CASE_FOR_FLAG("multiline", JSRegExp::kMultiline); 1073 CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll); 1074 CASE_FOR_FLAG("unicode", JSRegExp::kUnicode); 1075 CASE_FOR_FLAG("sticky", JSRegExp::kSticky); 1076 CASE_FOR_FLAG("hasIndices", JSRegExp::kHasIndices); 1077#undef CASE_FOR_FLAG 1078 1079#define CASE_FOR_FLAG(NAME, V8_FLAG_EXTERN_REF, FLAG) \ 1080 do { \ 1081 Label next(this); \ 1082 TNode<Word32T> flag_value = UncheckedCast<Word32T>( \ 1083 Load(MachineType::Uint8(), ExternalConstant(V8_FLAG_EXTERN_REF))); \ 1084 GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)), \ 1085 Int32Constant(0)), \ 1086 &next); \ 1087 const TNode<Object> flag = GetProperty( \ 1088 context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \ 1089 Label if_isflagset(this); \ 1090 BranchIfToBooleanIsTrue(flag, &if_isflagset, &next); \ 1091 BIND(&if_isflagset); \ 1092 var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \ 1093 var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG))); \ 1094 Goto(&next); \ 1095 BIND(&next); \ 1096 } while (false) 1097 1098 CASE_FOR_FLAG( 1099 "linear", 1100 ExternalReference::address_of_enable_experimental_regexp_engine(), 1101 JSRegExp::kLinear); 1102#undef CASE_FOR_FLAG 1103 } 1104 1105 // Allocate a string of the required length and fill it with the 1106 // corresponding char for each set flag. 1107 1108 { 1109 const TNode<String> string = AllocateSeqOneByteString(var_length.value()); 1110 1111 TVARIABLE(IntPtrT, var_offset, 1112 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag)); 1113 1114#define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...) \ 1115 do { \ 1116 Label next(this); \ 1117 GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \ 1118 const TNode<Int32T> value = Int32Constant(Char); \ 1119 StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \ 1120 var_offset.value(), value); \ 1121 var_offset = IntPtrAdd(var_offset.value(), int_one); \ 1122 Goto(&next); \ 1123 BIND(&next); \ 1124 } while (false); 1125 1126 REGEXP_FLAG_LIST(CASE_FOR_FLAG) 1127#undef CASE_FOR_FLAG 1128 1129 if (is_fastpath) { 1130#ifdef V8_ENABLE_FORCE_SLOW_PATH 1131 result = string; 1132 Goto(&done); 1133 1134 BIND(&runtime); 1135 { 1136 result = 1137 CAST(CallRuntime(Runtime::kRegExpStringFromFlags, context, regexp)); 1138 Goto(&done); 1139 } 1140 1141 BIND(&done); 1142 return result.value(); 1143#else 1144 return string; 1145#endif 1146 } else { 1147 return string; 1148 } 1149 } 1150} 1151 1152// ES#sec-regexpinitialize 1153// Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 1154TNode<Object> RegExpBuiltinsAssembler::RegExpInitialize( 1155 const TNode<Context> context, const TNode<JSRegExp> regexp, 1156 const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags) { 1157 // Normalize pattern. 1158 const TNode<Object> pattern = Select<Object>( 1159 IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); }, 1160 [=] { return ToString_Inline(context, maybe_pattern); }); 1161 1162 // Normalize flags. 1163 const TNode<Object> flags = Select<Object>( 1164 IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); }, 1165 [=] { return ToString_Inline(context, maybe_flags); }); 1166 1167 // Initialize. 1168 1169 return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp, 1170 pattern, flags); 1171} 1172 1173// ES#sec-regexp-pattern-flags 1174// RegExp ( pattern, flags ) 1175TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) { 1176 auto pattern = Parameter<Object>(Descriptor::kPattern); 1177 auto flags = Parameter<Object>(Descriptor::kFlags); 1178 auto new_target = Parameter<Object>(Descriptor::kJSNewTarget); 1179 auto context = Parameter<Context>(Descriptor::kContext); 1180 1181 Isolate* isolate = this->isolate(); 1182 1183 TVARIABLE(Object, var_flags, flags); 1184 TVARIABLE(Object, var_pattern, pattern); 1185 TVARIABLE(Object, var_new_target, new_target); 1186 1187 TNode<NativeContext> native_context = LoadNativeContext(context); 1188 TNode<JSFunction> regexp_function = 1189 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX)); 1190 1191 TNode<BoolT> pattern_is_regexp = IsRegExp(context, pattern); 1192 1193 { 1194 Label next(this); 1195 1196 GotoIfNot(IsUndefined(new_target), &next); 1197 var_new_target = regexp_function; 1198 1199 GotoIfNot(pattern_is_regexp, &next); 1200 GotoIfNot(IsUndefined(flags), &next); 1201 1202 TNode<Object> value = 1203 GetProperty(context, pattern, isolate->factory()->constructor_string()); 1204 1205 GotoIfNot(TaggedEqual(value, regexp_function), &next); 1206 Return(pattern); 1207 1208 BIND(&next); 1209 } 1210 1211 { 1212 Label next(this), if_patternisfastregexp(this), 1213 if_patternisslowregexp(this); 1214 GotoIf(TaggedIsSmi(pattern), &next); 1215 1216 GotoIf(IsJSRegExp(CAST(pattern)), &if_patternisfastregexp); 1217 1218 Branch(pattern_is_regexp, &if_patternisslowregexp, &next); 1219 1220 BIND(&if_patternisfastregexp); 1221 { 1222 TNode<Object> source = 1223 LoadObjectField(CAST(pattern), JSRegExp::kSourceOffset); 1224 var_pattern = source; 1225 1226 { 1227 Label inner_next(this); 1228 GotoIfNot(IsUndefined(flags), &inner_next); 1229 1230 var_flags = FlagsGetter(context, pattern, true); 1231 Goto(&inner_next); 1232 1233 BIND(&inner_next); 1234 } 1235 1236 Goto(&next); 1237 } 1238 1239 BIND(&if_patternisslowregexp); 1240 { 1241 var_pattern = 1242 GetProperty(context, pattern, isolate->factory()->source_string()); 1243 1244 { 1245 Label inner_next(this); 1246 GotoIfNot(IsUndefined(flags), &inner_next); 1247 1248 var_flags = 1249 GetProperty(context, pattern, isolate->factory()->flags_string()); 1250 Goto(&inner_next); 1251 1252 BIND(&inner_next); 1253 } 1254 1255 Goto(&next); 1256 } 1257 1258 BIND(&next); 1259 } 1260 1261 // Allocate. 1262 1263 TVARIABLE(JSRegExp, var_regexp); 1264 { 1265 Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred), 1266 next(this); 1267 Branch(TaggedEqual(var_new_target.value(), regexp_function), 1268 &allocate_jsregexp, &allocate_generic); 1269 1270 BIND(&allocate_jsregexp); 1271 { 1272 const TNode<Map> initial_map = CAST(LoadObjectField( 1273 regexp_function, JSFunction::kPrototypeOrInitialMapOffset)); 1274 var_regexp = CAST(AllocateJSObjectFromMap(initial_map)); 1275 Goto(&next); 1276 } 1277 1278 BIND(&allocate_generic); 1279 { 1280 ConstructorBuiltinsAssembler constructor_assembler(this->state()); 1281 var_regexp = CAST(constructor_assembler.FastNewObject( 1282 context, regexp_function, CAST(var_new_target.value()))); 1283 Goto(&next); 1284 } 1285 1286 BIND(&next); 1287 } 1288 1289 const TNode<Object> result = RegExpInitialize( 1290 context, var_regexp.value(), var_pattern.value(), var_flags.value()); 1291 Return(result); 1292} 1293 1294// ES#sec-regexp.prototype.compile 1295// RegExp.prototype.compile ( pattern, flags ) 1296TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) { 1297 auto maybe_receiver = Parameter<Object>(Descriptor::kReceiver); 1298 auto maybe_pattern = Parameter<Object>(Descriptor::kPattern); 1299 auto maybe_flags = Parameter<Object>(Descriptor::kFlags); 1300 auto context = Parameter<Context>(Descriptor::kContext); 1301 1302 ThrowIfNotInstanceType(context, maybe_receiver, JS_REG_EXP_TYPE, 1303 "RegExp.prototype.compile"); 1304 const TNode<JSRegExp> receiver = CAST(maybe_receiver); 1305 1306 TVARIABLE(Object, var_flags, maybe_flags); 1307 TVARIABLE(Object, var_pattern, maybe_pattern); 1308 1309 // Handle a JSRegExp pattern. 1310 { 1311 Label next(this); 1312 1313 GotoIf(TaggedIsSmi(maybe_pattern), &next); 1314 GotoIfNot(IsJSRegExp(CAST(maybe_pattern)), &next); 1315 1316 // {maybe_flags} must be undefined in this case, otherwise throw. 1317 { 1318 Label maybe_flags_is_undefined(this); 1319 GotoIf(IsUndefined(maybe_flags), &maybe_flags_is_undefined); 1320 1321 ThrowTypeError(context, MessageTemplate::kRegExpFlags); 1322 1323 BIND(&maybe_flags_is_undefined); 1324 } 1325 1326 const TNode<JSRegExp> pattern = CAST(maybe_pattern); 1327 const TNode<String> new_flags = FlagsGetter(context, pattern, true); 1328 const TNode<Object> new_pattern = 1329 LoadObjectField(pattern, JSRegExp::kSourceOffset); 1330 1331 var_flags = new_flags; 1332 var_pattern = new_pattern; 1333 1334 Goto(&next); 1335 BIND(&next); 1336 } 1337 1338 const TNode<Object> result = RegExpInitialize( 1339 context, receiver, var_pattern.value(), var_flags.value()); 1340 Return(result); 1341} 1342 1343// Fast-path implementation for flag checks on an unmodified JSRegExp instance. 1344TNode<BoolT> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp, 1345 JSRegExp::Flag flag) { 1346 TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset)); 1347 TNode<Smi> mask = SmiConstant(flag); 1348 return ReinterpretCast<BoolT>(SmiToInt32( 1349 SmiShr(SmiAnd(flags, mask), 1350 base::bits::CountTrailingZeros(static_cast<int>(flag))))); 1351} 1352 1353// Load through the GetProperty stub. 1354TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context, 1355 TNode<Object> regexp, 1356 JSRegExp::Flag flag) { 1357 Label out(this), if_true(this), if_false(this); 1358 TVARIABLE(BoolT, var_result); 1359 1360 // Only enabled based on a runtime flag. 1361 if (flag == JSRegExp::kLinear) { 1362 TNode<Word32T> flag_value = UncheckedCast<Word32T>(Load( 1363 MachineType::Uint8(), 1364 ExternalConstant(ExternalReference:: 1365 address_of_enable_experimental_regexp_engine()))); 1366 GotoIf(Word32Equal(Word32And(flag_value, Int32Constant(0xFF)), 1367 Int32Constant(0)), 1368 &if_false); 1369 } 1370 1371 Handle<String> name; 1372 switch (flag) { 1373 case JSRegExp::kNone: 1374 UNREACHABLE(); 1375#define V(Lower, Camel, LowerCamel, Char, Bit) \ 1376 case JSRegExp::k##Camel: \ 1377 name = isolate()->factory()->LowerCamel##_string(); \ 1378 break; 1379 REGEXP_FLAG_LIST(V) 1380#undef V 1381 } 1382 1383 TNode<Object> value = GetProperty(context, regexp, name); 1384 BranchIfToBooleanIsTrue(value, &if_true, &if_false); 1385 1386 BIND(&if_true); 1387 var_result = BoolConstant(true); 1388 Goto(&out); 1389 1390 BIND(&if_false); 1391 var_result = BoolConstant(false); 1392 Goto(&out); 1393 1394 BIND(&out); 1395 return var_result.value(); 1396} 1397 1398TNode<BoolT> RegExpBuiltinsAssembler::FlagGetter(TNode<Context> context, 1399 TNode<Object> regexp, 1400 JSRegExp::Flag flag, 1401 bool is_fastpath) { 1402 return is_fastpath ? FastFlagGetter(CAST(regexp), flag) 1403 : SlowFlagGetter(context, regexp, flag); 1404} 1405 1406TNode<Number> RegExpBuiltinsAssembler::AdvanceStringIndex( 1407 TNode<String> string, TNode<Number> index, TNode<BoolT> is_unicode, 1408 bool is_fastpath) { 1409 CSA_DCHECK(this, IsNumberNormalized(index)); 1410 if (is_fastpath) CSA_DCHECK(this, TaggedIsPositiveSmi(index)); 1411 1412 // Default to last_index + 1. 1413 // TODO(pwong): Consider using TrySmiAdd for the fast path to reduce generated 1414 // code. 1415 TNode<Number> index_plus_one = NumberInc(index); 1416 TVARIABLE(Number, var_result, index_plus_one); 1417 1418 // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT 1419 // anyway, consider using UintPtrT index to simplify the code below. 1420 1421 // Advancing the index has some subtle issues involving the distinction 1422 // between Smis and HeapNumbers. There's three cases: 1423 // * {index} is a Smi, {index_plus_one} is a Smi. The standard case. 1424 // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber. 1425 // In this case we can return the result early, because 1426 // {index_plus_one} > {string}.length. 1427 // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only 1428 // occur when {index} is outside the Smi range since we normalize 1429 // explicitly. Again we can return early. 1430 if (is_fastpath) { 1431 // Must be in Smi range on the fast path. We control the value of {index} 1432 // on all call-sites and can never exceed the length of the string. 1433 STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue); 1434 CSA_DCHECK(this, TaggedIsPositiveSmi(index_plus_one)); 1435 } 1436 1437 Label if_isunicode(this), out(this); 1438 GotoIfNot(is_unicode, &out); 1439 1440 // Keep this unconditional (even on the fast path) just to be safe. 1441 Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out); 1442 1443 BIND(&if_isunicode); 1444 { 1445 TNode<UintPtrT> string_length = Unsigned(LoadStringLengthAsWord(string)); 1446 TNode<UintPtrT> untagged_plus_one = 1447 Unsigned(SmiUntag(CAST(index_plus_one))); 1448 GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out); 1449 1450 TNode<Int32T> lead = 1451 StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index)))); 1452 GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)), 1453 Int32Constant(0xD800)), 1454 &out); 1455 1456 TNode<Int32T> trail = StringCharCodeAt(string, untagged_plus_one); 1457 GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)), 1458 Int32Constant(0xDC00)), 1459 &out); 1460 1461 // At a surrogate pair, return index + 2. 1462 TNode<Number> index_plus_two = NumberInc(index_plus_one); 1463 var_result = index_plus_two; 1464 1465 Goto(&out); 1466 } 1467 1468 BIND(&out); 1469 return var_result.value(); 1470} 1471 1472// ES#sec-createregexpstringiterator 1473// CreateRegExpStringIterator ( R, S, global, fullUnicode ) 1474TNode<Object> RegExpMatchAllAssembler::CreateRegExpStringIterator( 1475 TNode<NativeContext> native_context, TNode<Object> regexp, 1476 TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode) { 1477 TNode<Map> map = CAST(LoadContextElement( 1478 native_context, 1479 Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX)); 1480 1481 // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, « 1482 // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]], 1483 // [[Done]] »). 1484 TNode<HeapObject> iterator = Allocate(JSRegExpStringIterator::kHeaderSize); 1485 StoreMapNoWriteBarrier(iterator, map); 1486 StoreObjectFieldRoot(iterator, 1487 JSRegExpStringIterator::kPropertiesOrHashOffset, 1488 RootIndex::kEmptyFixedArray); 1489 StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset, 1490 RootIndex::kEmptyFixedArray); 1491 1492 // 5. Set iterator.[[IteratingRegExp]] to R. 1493 StoreObjectFieldNoWriteBarrier( 1494 iterator, JSRegExpStringIterator::kIteratingRegExpOffset, regexp); 1495 1496 // 6. Set iterator.[[IteratedString]] to S. 1497 StoreObjectFieldNoWriteBarrier( 1498 iterator, JSRegExpStringIterator::kIteratedStringOffset, string); 1499 1500 // 7. Set iterator.[[Global]] to global. 1501 // 8. Set iterator.[[Unicode]] to fullUnicode. 1502 // 9. Set iterator.[[Done]] to false. 1503 TNode<Int32T> global_flag = 1504 Word32Shl(ReinterpretCast<Int32T>(global), 1505 Int32Constant(JSRegExpStringIterator::GlobalBit::kShift)); 1506 TNode<Int32T> unicode_flag = 1507 Word32Shl(ReinterpretCast<Int32T>(full_unicode), 1508 Int32Constant(JSRegExpStringIterator::UnicodeBit::kShift)); 1509 TNode<Int32T> iterator_flags = Word32Or(global_flag, unicode_flag); 1510 StoreObjectFieldNoWriteBarrier(iterator, JSRegExpStringIterator::kFlagsOffset, 1511 SmiFromInt32(iterator_flags)); 1512 1513 return iterator; 1514} 1515 1516// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky 1517// JSRegExp, {string} is a String, and {limit} is a Smi. 1518TNode<JSArray> RegExpBuiltinsAssembler::RegExpPrototypeSplitBody( 1519 TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string, 1520 const TNode<Smi> limit) { 1521 CSA_DCHECK(this, IsFastRegExpPermissive(context, regexp)); 1522 CSA_DCHECK(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky))); 1523 1524 const TNode<IntPtrT> int_limit = SmiUntag(limit); 1525 1526 const ElementsKind kind = PACKED_ELEMENTS; 1527 1528 const TNode<NativeContext> native_context = LoadNativeContext(context); 1529 TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context); 1530 1531 Label return_empty_array(this, Label::kDeferred); 1532 TVARIABLE(JSArray, var_result); 1533 Label done(this); 1534 1535 // If limit is zero, return an empty array. 1536 { 1537 Label next(this), if_limitiszero(this, Label::kDeferred); 1538 Branch(SmiEqual(limit, SmiZero()), &return_empty_array, &next); 1539 BIND(&next); 1540 } 1541 1542 const TNode<Smi> string_length = LoadStringLengthAsSmi(string); 1543 1544 // If passed the empty {string}, return either an empty array or a singleton 1545 // array depending on whether the {regexp} matches. 1546 { 1547 Label next(this), if_stringisempty(this, Label::kDeferred); 1548 Branch(SmiEqual(string_length, SmiZero()), &if_stringisempty, &next); 1549 1550 BIND(&if_stringisempty); 1551 { 1552 const TNode<Object> last_match_info = LoadContextElement( 1553 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX); 1554 1555 const TNode<Object> match_indices = 1556 CallBuiltin(Builtin::kRegExpExecInternal, context, regexp, string, 1557 SmiZero(), last_match_info); 1558 1559 Label return_singleton_array(this); 1560 Branch(IsNull(match_indices), &return_singleton_array, 1561 &return_empty_array); 1562 1563 BIND(&return_singleton_array); 1564 { 1565 TNode<Smi> length = SmiConstant(1); 1566 TNode<IntPtrT> capacity = IntPtrConstant(1); 1567 base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt; 1568 var_result = 1569 AllocateJSArray(kind, array_map, capacity, length, allocation_site); 1570 1571 TNode<FixedArray> fixed_array = CAST(LoadElements(var_result.value())); 1572 UnsafeStoreFixedArrayElement(fixed_array, 0, string); 1573 1574 Goto(&done); 1575 } 1576 } 1577 1578 BIND(&next); 1579 } 1580 1581 // Loop preparations. 1582 1583 GrowableFixedArray array(state()); 1584 1585 TVARIABLE(Smi, var_last_matched_until, SmiZero()); 1586 TVARIABLE(Smi, var_next_search_from, SmiZero()); 1587 1588 Label loop(this, {array.var_array(), array.var_length(), array.var_capacity(), 1589 &var_last_matched_until, &var_next_search_from}), 1590 push_suffix_and_out(this), out(this); 1591 Goto(&loop); 1592 1593 BIND(&loop); 1594 { 1595 const TNode<Smi> next_search_from = var_next_search_from.value(); 1596 const TNode<Smi> last_matched_until = var_last_matched_until.value(); 1597 1598 // We're done if we've reached the end of the string. 1599 { 1600 Label next(this); 1601 Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out, 1602 &next); 1603 BIND(&next); 1604 } 1605 1606 // Search for the given {regexp}. 1607 1608 const TNode<Object> last_match_info = LoadContextElement( 1609 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX); 1610 1611 const TNode<HeapObject> match_indices_ho = RegExpExecInternal( 1612 context, regexp, string, next_search_from, CAST(last_match_info), 1613 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure); 1614 1615 // We're done if no match was found. 1616 { 1617 Label next(this); 1618 Branch(IsNull(match_indices_ho), &push_suffix_and_out, &next); 1619 BIND(&next); 1620 } 1621 1622 TNode<FixedArray> match_indices = CAST(match_indices_ho); 1623 const TNode<Smi> match_from = CAST(UnsafeLoadFixedArrayElement( 1624 match_indices, RegExpMatchInfo::kFirstCaptureIndex)); 1625 const TNode<Smi> match_to = CAST(UnsafeLoadFixedArrayElement( 1626 match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1)); 1627 CSA_DCHECK(this, SmiNotEqual(match_from, string_length)); 1628 1629 // Advance index and continue if the match is empty. 1630 { 1631 Label next(this); 1632 1633 GotoIfNot(SmiEqual(match_to, next_search_from), &next); 1634 GotoIfNot(SmiEqual(match_to, last_matched_until), &next); 1635 1636 const TNode<BoolT> is_unicode = 1637 FastFlagGetter(regexp, JSRegExp::kUnicode); 1638 const TNode<Number> new_next_search_from = 1639 AdvanceStringIndex(string, next_search_from, is_unicode, true); 1640 var_next_search_from = CAST(new_next_search_from); 1641 Goto(&loop); 1642 1643 BIND(&next); 1644 } 1645 1646 // A valid match was found, add the new substring to the array. 1647 { 1648 const TNode<Smi> from = last_matched_until; 1649 const TNode<Smi> to = match_from; 1650 array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to)); 1651 GotoIf(WordEqual(array.length(), int_limit), &out); 1652 } 1653 1654 // Add all captures to the array. 1655 { 1656 const TNode<Smi> num_registers = CAST(LoadFixedArrayElement( 1657 match_indices, RegExpMatchInfo::kNumberOfCapturesIndex)); 1658 const TNode<IntPtrT> int_num_registers = SmiUntag(num_registers); 1659 1660 TVARIABLE(IntPtrT, var_reg, IntPtrConstant(2)); 1661 1662 Label nested_loop(this, {array.var_array(), array.var_length(), 1663 array.var_capacity(), &var_reg}), 1664 nested_loop_out(this); 1665 Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop, 1666 &nested_loop_out); 1667 1668 BIND(&nested_loop); 1669 { 1670 const TNode<IntPtrT> reg = var_reg.value(); 1671 const TNode<Object> from = LoadFixedArrayElement( 1672 match_indices, reg, 1673 RegExpMatchInfo::kFirstCaptureIndex * kTaggedSize); 1674 const TNode<Smi> to = CAST(LoadFixedArrayElement( 1675 match_indices, reg, 1676 (RegExpMatchInfo::kFirstCaptureIndex + 1) * kTaggedSize)); 1677 1678 Label select_capture(this), select_undefined(this), store_value(this); 1679 TVARIABLE(Object, var_value); 1680 Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined, 1681 &select_capture); 1682 1683 BIND(&select_capture); 1684 { 1685 var_value = 1686 CallBuiltin(Builtin::kSubString, context, string, from, to); 1687 Goto(&store_value); 1688 } 1689 1690 BIND(&select_undefined); 1691 { 1692 var_value = UndefinedConstant(); 1693 Goto(&store_value); 1694 } 1695 1696 BIND(&store_value); 1697 { 1698 array.Push(var_value.value()); 1699 GotoIf(WordEqual(array.length(), int_limit), &out); 1700 1701 const TNode<IntPtrT> new_reg = IntPtrAdd(reg, IntPtrConstant(2)); 1702 var_reg = new_reg; 1703 1704 Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop, 1705 &nested_loop_out); 1706 } 1707 } 1708 1709 BIND(&nested_loop_out); 1710 } 1711 1712 var_last_matched_until = match_to; 1713 var_next_search_from = match_to; 1714 Goto(&loop); 1715 } 1716 1717 BIND(&push_suffix_and_out); 1718 { 1719 const TNode<Smi> from = var_last_matched_until.value(); 1720 const TNode<Smi> to = string_length; 1721 array.Push(CallBuiltin(Builtin::kSubString, context, string, from, to)); 1722 Goto(&out); 1723 } 1724 1725 BIND(&out); 1726 { 1727 var_result = array.ToJSArray(context); 1728 Goto(&done); 1729 } 1730 1731 BIND(&return_empty_array); 1732 { 1733 TNode<Smi> length = SmiZero(); 1734 TNode<IntPtrT> capacity = IntPtrZero(); 1735 base::Optional<TNode<AllocationSite>> allocation_site = base::nullopt; 1736 var_result = 1737 AllocateJSArray(kind, array_map, capacity, length, allocation_site); 1738 Goto(&done); 1739 } 1740 1741 BIND(&done); 1742 return var_result.value(); 1743} 1744 1745} // namespace internal 1746} // namespace v8 1747