1// Copyright 2014 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <functional> 6 7#include "src/base/small-vector.h" 8#include "src/base/strings.h" 9#include "src/common/message-template.h" 10#include "src/execution/arguments-inl.h" 11#include "src/execution/isolate-inl.h" 12#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. 13#include "src/logging/counters.h" 14#include "src/numbers/conversions-inl.h" 15#include "src/objects/js-array-inl.h" 16#include "src/objects/js-regexp-inl.h" 17#include "src/regexp/regexp-utils.h" 18#include "src/regexp/regexp.h" 19#include "src/runtime/runtime-utils.h" 20#include "src/strings/string-builder-inl.h" 21#include "src/strings/string-search.h" 22 23namespace v8 { 24namespace internal { 25 26namespace { 27 28// Fairly arbitrary, but intended to fit: 29// 30// - captures 31// - results 32// - parsed replacement pattern parts 33// 34// for small, common cases. 35constexpr int kStaticVectorSlots = 8; 36 37// Returns -1 for failure. 38uint32_t GetArgcForReplaceCallable(uint32_t num_captures, 39 bool has_named_captures) { 40 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2; 41 const uint32_t kAdditionalArgsWithNamedCaptures = 3; 42 if (num_captures > Code::kMaxArguments) return -1; 43 uint32_t argc = has_named_captures 44 ? num_captures + kAdditionalArgsWithNamedCaptures 45 : num_captures + kAdditionalArgsWithoutNamedCaptures; 46 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() - 47 kAdditionalArgsWithNamedCaptures); 48 return (argc > Code::kMaxArguments) ? -1 : argc; 49} 50 51// Looks up the capture of the given name. Returns the (1-based) numbered 52// capture index or -1 on failure. 53int LookupNamedCapture(const std::function<bool(String)>& name_matches, 54 FixedArray capture_name_map) { 55 // TODO(jgruber): Sort capture_name_map and do binary search via 56 // internalized strings. 57 58 int maybe_capture_index = -1; 59 const int named_capture_count = capture_name_map.length() >> 1; 60 for (int j = 0; j < named_capture_count; j++) { 61 // The format of {capture_name_map} is documented at 62 // JSRegExp::kIrregexpCaptureNameMapIndex. 63 const int name_ix = j * 2; 64 const int index_ix = j * 2 + 1; 65 66 String capture_name = String::cast(capture_name_map.get(name_ix)); 67 if (!name_matches(capture_name)) continue; 68 69 maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix)); 70 break; 71 } 72 73 return maybe_capture_index; 74} 75 76} // namespace 77 78class CompiledReplacement { 79 public: 80 // Return whether the replacement is simple. 81 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp, 82 Handle<String> replacement, int capture_count, 83 int subject_length); 84 85 // Use Apply only if Compile returned false. 86 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, 87 int32_t* match); 88 89 // Number of distinct parts of the replacement pattern. 90 int parts() { return static_cast<int>(parts_.size()); } 91 92 private: 93 enum PartType { 94 SUBJECT_PREFIX = 1, 95 SUBJECT_SUFFIX, 96 SUBJECT_CAPTURE, 97 REPLACEMENT_SUBSTRING, 98 REPLACEMENT_STRING, 99 EMPTY_REPLACEMENT, 100 NUMBER_OF_PART_TYPES 101 }; 102 103 struct ReplacementPart { 104 static inline ReplacementPart SubjectMatch() { 105 return ReplacementPart(SUBJECT_CAPTURE, 0); 106 } 107 static inline ReplacementPart SubjectCapture(int capture_index) { 108 return ReplacementPart(SUBJECT_CAPTURE, capture_index); 109 } 110 static inline ReplacementPart SubjectPrefix() { 111 return ReplacementPart(SUBJECT_PREFIX, 0); 112 } 113 static inline ReplacementPart SubjectSuffix(int subject_length) { 114 return ReplacementPart(SUBJECT_SUFFIX, subject_length); 115 } 116 static inline ReplacementPart ReplacementString() { 117 return ReplacementPart(REPLACEMENT_STRING, 0); 118 } 119 static inline ReplacementPart EmptyReplacement() { 120 return ReplacementPart(EMPTY_REPLACEMENT, 0); 121 } 122 static inline ReplacementPart ReplacementSubString(int from, int to) { 123 DCHECK_LE(0, from); 124 DCHECK_GT(to, from); 125 return ReplacementPart(-from, to); 126 } 127 128 // If tag <= 0 then it is the negation of a start index of a substring of 129 // the replacement pattern, otherwise it's a value from PartType. 130 ReplacementPart(int tag, int data) : tag(tag), data(data) { 131 // Must be non-positive or a PartType value. 132 DCHECK(tag < NUMBER_OF_PART_TYPES); 133 } 134 // Either a value of PartType or a non-positive number that is 135 // the negation of an index into the replacement string. 136 int tag; 137 // The data value's interpretation depends on the value of tag: 138 // tag == SUBJECT_PREFIX || 139 // tag == SUBJECT_SUFFIX: data is unused. 140 // tag == SUBJECT_CAPTURE: data is the number of the capture. 141 // tag == REPLACEMENT_SUBSTRING || 142 // tag == REPLACEMENT_STRING: data is index into array of substrings 143 // of the replacement string. 144 // tag == EMPTY_REPLACEMENT: data is unused. 145 // tag <= 0: Temporary representation of the substring of the replacement 146 // string ranging over -tag .. data. 147 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 148 // substring objects. 149 int data; 150 }; 151 152 template <typename Char> 153 bool ParseReplacementPattern(base::Vector<Char> characters, 154 FixedArray capture_name_map, int capture_count, 155 int subject_length) { 156 // Equivalent to String::GetSubstitution, except that this method converts 157 // the replacement string into an internal representation that avoids 158 // repeated parsing when used repeatedly. 159 int length = characters.length(); 160 int last = 0; 161 for (int i = 0; i < length; i++) { 162 Char c = characters[i]; 163 if (c == '$') { 164 int next_index = i + 1; 165 if (next_index == length) { // No next character! 166 break; 167 } 168 Char c2 = characters[next_index]; 169 switch (c2) { 170 case '$': 171 if (i > last) { 172 // There is a substring before. Include the first "$". 173 parts_.emplace_back( 174 ReplacementPart::ReplacementSubString(last, next_index)); 175 last = next_index + 1; // Continue after the second "$". 176 } else { 177 // Let the next substring start with the second "$". 178 last = next_index; 179 } 180 i = next_index; 181 break; 182 case '`': 183 if (i > last) { 184 parts_.emplace_back( 185 ReplacementPart::ReplacementSubString(last, i)); 186 } 187 parts_.emplace_back(ReplacementPart::SubjectPrefix()); 188 i = next_index; 189 last = i + 1; 190 break; 191 case '\'': 192 if (i > last) { 193 parts_.emplace_back( 194 ReplacementPart::ReplacementSubString(last, i)); 195 } 196 parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length)); 197 i = next_index; 198 last = i + 1; 199 break; 200 case '&': 201 if (i > last) { 202 parts_.emplace_back( 203 ReplacementPart::ReplacementSubString(last, i)); 204 } 205 parts_.emplace_back(ReplacementPart::SubjectMatch()); 206 i = next_index; 207 last = i + 1; 208 break; 209 case '0': 210 case '1': 211 case '2': 212 case '3': 213 case '4': 214 case '5': 215 case '6': 216 case '7': 217 case '8': 218 case '9': { 219 int capture_ref = c2 - '0'; 220 if (capture_ref > capture_count) { 221 i = next_index; 222 continue; 223 } 224 int second_digit_index = next_index + 1; 225 if (second_digit_index < length) { 226 // Peek ahead to see if we have two digits. 227 Char c3 = characters[second_digit_index]; 228 if ('0' <= c3 && c3 <= '9') { // Double digits. 229 int double_digit_ref = capture_ref * 10 + c3 - '0'; 230 if (double_digit_ref <= capture_count) { 231 next_index = second_digit_index; 232 capture_ref = double_digit_ref; 233 } 234 } 235 } 236 if (capture_ref > 0) { 237 if (i > last) { 238 parts_.emplace_back( 239 ReplacementPart::ReplacementSubString(last, i)); 240 } 241 DCHECK(capture_ref <= capture_count); 242 parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref)); 243 last = next_index + 1; 244 } 245 i = next_index; 246 break; 247 } 248 case '<': { 249 if (capture_name_map.is_null()) { 250 i = next_index; 251 break; 252 } 253 254 // Scan until the next '>', and let the enclosed substring be the 255 // groupName. 256 257 const int name_start_index = next_index + 1; 258 int closing_bracket_index = -1; 259 for (int j = name_start_index; j < length; j++) { 260 if (characters[j] == '>') { 261 closing_bracket_index = j; 262 break; 263 } 264 } 265 266 // If no closing bracket is found, '$<' is treated as a string 267 // literal. 268 if (closing_bracket_index == -1) { 269 i = next_index; 270 break; 271 } 272 273 base::Vector<Char> requested_name = 274 characters.SubVector(name_start_index, closing_bracket_index); 275 276 // Let capture be ? Get(namedCaptures, groupName). 277 278 const int capture_index = LookupNamedCapture( 279 [=](String capture_name) { 280 return capture_name.IsEqualTo(requested_name); 281 }, 282 capture_name_map); 283 284 // If capture is undefined or does not exist, replace the text 285 // through the following '>' with the empty string. 286 // Otherwise, replace the text through the following '>' with 287 // ? ToString(capture). 288 289 DCHECK(capture_index == -1 || 290 (1 <= capture_index && capture_index <= capture_count)); 291 292 if (i > last) { 293 parts_.emplace_back( 294 ReplacementPart::ReplacementSubString(last, i)); 295 } 296 parts_.emplace_back( 297 (capture_index == -1) 298 ? ReplacementPart::EmptyReplacement() 299 : ReplacementPart::SubjectCapture(capture_index)); 300 last = closing_bracket_index + 1; 301 i = closing_bracket_index; 302 break; 303 } 304 default: 305 i = next_index; 306 break; 307 } 308 } 309 } 310 if (length > last) { 311 if (last == 0) { 312 // Replacement is simple. Do not use Apply to do the replacement. 313 return true; 314 } else { 315 parts_.emplace_back( 316 ReplacementPart::ReplacementSubString(last, length)); 317 } 318 } 319 return false; 320 } 321 322 base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_; 323 base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_; 324}; 325 326bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp, 327 Handle<String> replacement, int capture_count, 328 int subject_length) { 329 { 330 DisallowGarbageCollection no_gc; 331 String::FlatContent content = replacement->GetFlatContent(no_gc); 332 DCHECK(content.IsFlat()); 333 334 FixedArray capture_name_map; 335 if (capture_count > 0) { 336 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag())); 337 Object maybe_capture_name_map = regexp->capture_name_map(); 338 if (maybe_capture_name_map.IsFixedArray()) { 339 capture_name_map = FixedArray::cast(maybe_capture_name_map); 340 } 341 } 342 343 bool simple; 344 if (content.IsOneByte()) { 345 simple = 346 ParseReplacementPattern(content.ToOneByteVector(), capture_name_map, 347 capture_count, subject_length); 348 } else { 349 DCHECK(content.IsTwoByte()); 350 simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map, 351 capture_count, subject_length); 352 } 353 if (simple) return true; 354 } 355 356 // Find substrings of replacement string and create them as String objects. 357 int substring_index = 0; 358 for (ReplacementPart& part : parts_) { 359 int tag = part.tag; 360 if (tag <= 0) { // A replacement string slice. 361 int from = -tag; 362 int to = part.data; 363 replacement_substrings_.emplace_back( 364 isolate->factory()->NewSubString(replacement, from, to)); 365 part.tag = REPLACEMENT_SUBSTRING; 366 part.data = substring_index; 367 substring_index++; 368 } else if (tag == REPLACEMENT_STRING) { 369 replacement_substrings_.emplace_back(replacement); 370 part.data = substring_index; 371 substring_index++; 372 } 373 } 374 return false; 375} 376 377 378void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 379 int match_from, int match_to, int32_t* match) { 380 DCHECK_LT(0, parts_.size()); 381 for (ReplacementPart& part : parts_) { 382 switch (part.tag) { 383 case SUBJECT_PREFIX: 384 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 385 break; 386 case SUBJECT_SUFFIX: { 387 int subject_length = part.data; 388 if (match_to < subject_length) { 389 builder->AddSubjectSlice(match_to, subject_length); 390 } 391 break; 392 } 393 case SUBJECT_CAPTURE: { 394 int capture = part.data; 395 int from = match[capture * 2]; 396 int to = match[capture * 2 + 1]; 397 if (from >= 0 && to > from) { 398 builder->AddSubjectSlice(from, to); 399 } 400 break; 401 } 402 case REPLACEMENT_SUBSTRING: 403 case REPLACEMENT_STRING: 404 builder->AddString(replacement_substrings_[part.data]); 405 break; 406 case EMPTY_REPLACEMENT: 407 break; 408 default: 409 UNREACHABLE(); 410 } 411 } 412} 413 414void FindOneByteStringIndices(base::Vector<const uint8_t> subject, 415 uint8_t pattern, std::vector<int>* indices, 416 unsigned int limit) { 417 DCHECK_LT(0, limit); 418 // Collect indices of pattern in subject using memchr. 419 // Stop after finding at most limit values. 420 const uint8_t* subject_start = subject.begin(); 421 const uint8_t* subject_end = subject_start + subject.length(); 422 const uint8_t* pos = subject_start; 423 while (limit > 0) { 424 pos = reinterpret_cast<const uint8_t*>( 425 memchr(pos, pattern, subject_end - pos)); 426 if (pos == nullptr) return; 427 indices->push_back(static_cast<int>(pos - subject_start)); 428 pos++; 429 limit--; 430 } 431} 432 433void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject, 434 base::uc16 pattern, std::vector<int>* indices, 435 unsigned int limit) { 436 DCHECK_LT(0, limit); 437 const base::uc16* subject_start = subject.begin(); 438 const base::uc16* subject_end = subject_start + subject.length(); 439 for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0; 440 pos++) { 441 if (*pos == pattern) { 442 indices->push_back(static_cast<int>(pos - subject_start)); 443 limit--; 444 } 445 } 446} 447 448template <typename SubjectChar, typename PatternChar> 449void FindStringIndices(Isolate* isolate, 450 base::Vector<const SubjectChar> subject, 451 base::Vector<const PatternChar> pattern, 452 std::vector<int>* indices, unsigned int limit) { 453 DCHECK_LT(0, limit); 454 // Collect indices of pattern in subject. 455 // Stop after finding at most limit values. 456 int pattern_length = pattern.length(); 457 int index = 0; 458 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); 459 while (limit > 0) { 460 index = search.Search(subject, index); 461 if (index < 0) return; 462 indices->push_back(index); 463 index += pattern_length; 464 limit--; 465 } 466} 467 468void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern, 469 std::vector<int>* indices, unsigned int limit) { 470 { 471 DisallowGarbageCollection no_gc; 472 String::FlatContent subject_content = subject.GetFlatContent(no_gc); 473 String::FlatContent pattern_content = pattern.GetFlatContent(no_gc); 474 DCHECK(subject_content.IsFlat()); 475 DCHECK(pattern_content.IsFlat()); 476 if (subject_content.IsOneByte()) { 477 base::Vector<const uint8_t> subject_vector = 478 subject_content.ToOneByteVector(); 479 if (pattern_content.IsOneByte()) { 480 base::Vector<const uint8_t> pattern_vector = 481 pattern_content.ToOneByteVector(); 482 if (pattern_vector.length() == 1) { 483 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, 484 limit); 485 } else { 486 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 487 limit); 488 } 489 } else { 490 FindStringIndices(isolate, subject_vector, 491 pattern_content.ToUC16Vector(), indices, limit); 492 } 493 } else { 494 base::Vector<const base::uc16> subject_vector = 495 subject_content.ToUC16Vector(); 496 if (pattern_content.IsOneByte()) { 497 base::Vector<const uint8_t> pattern_vector = 498 pattern_content.ToOneByteVector(); 499 if (pattern_vector.length() == 1) { 500 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 501 limit); 502 } else { 503 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 504 limit); 505 } 506 } else { 507 base::Vector<const base::uc16> pattern_vector = 508 pattern_content.ToUC16Vector(); 509 if (pattern_vector.length() == 1) { 510 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 511 limit); 512 } else { 513 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 514 limit); 515 } 516 } 517 } 518 } 519} 520 521namespace { 522std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) { 523 std::vector<int>* list = isolate->regexp_indices(); 524 list->clear(); 525 return list; 526} 527 528void TruncateRegexpIndicesList(Isolate* isolate) { 529 // Same size as smallest zone segment, preserving behavior from the 530 // runtime zone. 531 // TODO(jgruber): Consider removing the reusable regexp_indices list and 532 // simply allocating a new list each time. It feels like we're needlessly 533 // optimizing an edge case. 534 static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize; 535 std::vector<int>* indices = isolate->regexp_indices(); 536 if (indices->capacity() > kMaxRegexpIndicesListCapacity) { 537 // Throw away backing storage. 538 indices->clear(); 539 indices->shrink_to_fit(); 540 } 541} 542} // namespace 543 544template <typename ResultSeqString> 545V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString( 546 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, 547 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 548 DCHECK(subject->IsFlat()); 549 DCHECK(replacement->IsFlat()); 550 551 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); 552 553 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag()); 554 String pattern = pattern_regexp->atom_pattern(); 555 int subject_len = subject->length(); 556 int pattern_len = pattern.length(); 557 int replacement_len = replacement->length(); 558 559 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF); 560 561 if (indices->empty()) return *subject; 562 563 // Detect integer overflow. 564 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - 565 static_cast<int64_t>(pattern_len)) * 566 static_cast<int64_t>(indices->size()) + 567 static_cast<int64_t>(subject_len); 568 int result_len; 569 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { 570 STATIC_ASSERT(String::kMaxLength < kMaxInt); 571 result_len = kMaxInt; // Provoke exception. 572 } else { 573 result_len = static_cast<int>(result_len_64); 574 } 575 if (result_len == 0) { 576 return ReadOnlyRoots(isolate).empty_string(); 577 } 578 579 int subject_pos = 0; 580 int result_pos = 0; 581 582 MaybeHandle<SeqString> maybe_res; 583 if (ResultSeqString::kHasOneByteEncoding) { 584 maybe_res = isolate->factory()->NewRawOneByteString(result_len); 585 } else { 586 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); 587 } 588 Handle<SeqString> untyped_res; 589 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); 590 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); 591 592 DisallowGarbageCollection no_gc; 593 for (int index : *indices) { 594 // Copy non-matched subject content. 595 if (subject_pos < index) { 596 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, 597 subject_pos, index - subject_pos); 598 result_pos += index - subject_pos; 599 } 600 601 // Replace match. 602 if (replacement_len > 0) { 603 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0, 604 replacement_len); 605 result_pos += replacement_len; 606 } 607 608 subject_pos = index + pattern_len; 609 } 610 // Add remaining subject content at the end. 611 if (subject_pos < subject_len) { 612 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, 613 subject_pos, subject_len - subject_pos); 614 } 615 616 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len}; 617 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices); 618 619 TruncateRegexpIndicesList(isolate); 620 621 return *result; 622} 623 624V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString( 625 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 626 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 627 DCHECK(subject->IsFlat()); 628 DCHECK(replacement->IsFlat()); 629 630 int capture_count = regexp->capture_count(); 631 int subject_length = subject->length(); 632 633 // Ensure the RegExp is compiled so we can access the capture-name map. 634 if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) { 635 return ReadOnlyRoots(isolate).exception(); 636 } 637 638 CompiledReplacement compiled_replacement; 639 const bool simple_replace = compiled_replacement.Compile( 640 isolate, regexp, replacement, capture_count, subject_length); 641 642 // Shortcut for simple non-regexp global replacements 643 if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) { 644 if (subject->IsOneByteRepresentation() && 645 replacement->IsOneByteRepresentation()) { 646 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 647 isolate, subject, regexp, replacement, last_match_info); 648 } else { 649 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 650 isolate, subject, regexp, replacement, last_match_info); 651 } 652 } 653 654 RegExpGlobalCache global_cache(regexp, subject, isolate); 655 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 656 657 int32_t* current_match = global_cache.FetchNext(); 658 if (current_match == nullptr) { 659 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 660 return *subject; 661 } 662 663 // Guessing the number of parts that the final result string is built 664 // from. Global regexps can match any number of times, so we guess 665 // conservatively. 666 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; 667 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); 668 669 int prev = 0; 670 671 do { 672 int start = current_match[0]; 673 int end = current_match[1]; 674 675 if (prev < start) { 676 builder.AddSubjectSlice(prev, start); 677 } 678 679 if (simple_replace) { 680 builder.AddString(replacement); 681 } else { 682 compiled_replacement.Apply(&builder, start, end, current_match); 683 } 684 prev = end; 685 686 current_match = global_cache.FetchNext(); 687 } while (current_match != nullptr); 688 689 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 690 691 if (prev < subject_length) { 692 builder.AddSubjectSlice(prev, subject_length); 693 } 694 695 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, 696 global_cache.LastSuccessfulMatch()); 697 698 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString()); 699} 700 701template <typename ResultSeqString> 702V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString( 703 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 704 Handle<RegExpMatchInfo> last_match_info) { 705 DCHECK(subject->IsFlat()); 706 707 // Shortcut for simple non-regexp global replacements 708 if (regexp->type_tag() == JSRegExp::ATOM) { 709 Handle<String> empty_string = isolate->factory()->empty_string(); 710 if (subject->IsOneByteRepresentation()) { 711 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 712 isolate, subject, regexp, empty_string, last_match_info); 713 } else { 714 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 715 isolate, subject, regexp, empty_string, last_match_info); 716 } 717 } 718 719 RegExpGlobalCache global_cache(regexp, subject, isolate); 720 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 721 722 int32_t* current_match = global_cache.FetchNext(); 723 if (current_match == nullptr) { 724 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 725 return *subject; 726 } 727 728 int start = current_match[0]; 729 int end = current_match[1]; 730 int capture_count = regexp->capture_count(); 731 int subject_length = subject->length(); 732 733 int new_length = subject_length - (end - start); 734 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string(); 735 736 Handle<ResultSeqString> answer; 737 if (ResultSeqString::kHasOneByteEncoding) { 738 answer = Handle<ResultSeqString>::cast( 739 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); 740 } else { 741 answer = Handle<ResultSeqString>::cast( 742 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); 743 } 744 745 int prev = 0; 746 int position = 0; 747 748 DisallowGarbageCollection no_gc; 749 do { 750 start = current_match[0]; 751 end = current_match[1]; 752 if (prev < start) { 753 // Add substring subject[prev;start] to answer string. 754 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, 755 start - prev); 756 position += start - prev; 757 } 758 prev = end; 759 760 current_match = global_cache.FetchNext(); 761 } while (current_match != nullptr); 762 763 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 764 765 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, 766 global_cache.LastSuccessfulMatch()); 767 768 if (prev < subject_length) { 769 // Add substring subject[prev;length] to answer string. 770 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, 771 subject_length - prev); 772 position += subject_length - prev; 773 } 774 775 if (position == 0) return ReadOnlyRoots(isolate).empty_string(); 776 777 // Shorten string and fill 778 int string_size = ResultSeqString::SizeFor(position); 779 int allocated_string_size = ResultSeqString::SizeFor(new_length); 780 int delta = allocated_string_size - string_size; 781 782 answer->set_length(position); 783 if (delta == 0) return *answer; 784 785 Address end_of_string = answer->address() + string_size; 786 Heap* heap = isolate->heap(); 787 788 // The trimming is performed on a newly allocated object, which is on a 789 // freshly allocated page or on an already swept page. Hence, the sweeper 790 // thread can not get confused with the filler creation. No synchronization 791 // needed. 792 // TODO(hpayer): We should shrink the large object page if the size 793 // of the object changed significantly. 794 if (!heap->IsLargeObject(*answer)) { 795 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); 796 } 797 return *answer; 798} 799 800RUNTIME_FUNCTION(Runtime_StringSplit) { 801 HandleScope handle_scope(isolate); 802 DCHECK_EQ(3, args.length()); 803 Handle<String> subject = args.at<String>(0); 804 Handle<String> pattern = args.at<String>(1); 805 uint32_t limit = NumberToUint32(args[2]); 806 CHECK_LT(0, limit); 807 808 int subject_length = subject->length(); 809 int pattern_length = pattern->length(); 810 CHECK_LT(0, pattern_length); 811 812 if (limit == 0xFFFFFFFFu) { 813 FixedArray last_match_cache_unused; 814 Handle<Object> cached_answer( 815 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, 816 &last_match_cache_unused, 817 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), 818 isolate); 819 if (*cached_answer != Smi::zero()) { 820 // The cache FixedArray is a COW-array and can therefore be reused. 821 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( 822 Handle<FixedArray>::cast(cached_answer)); 823 return *result; 824 } 825 } 826 827 // The limit can be very large (0xFFFFFFFFu), but since the pattern 828 // isn't empty, we can never create more parts than ~half the length 829 // of the subject. 830 831 subject = String::Flatten(isolate, subject); 832 pattern = String::Flatten(isolate, pattern); 833 834 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); 835 836 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit); 837 838 if (static_cast<uint32_t>(indices->size()) < limit) { 839 indices->push_back(subject_length); 840 } 841 842 // The list indices now contains the end of each part to create. 843 844 // Create JSArray of substrings separated by separator. 845 int part_count = static_cast<int>(indices->size()); 846 847 Handle<JSArray> result = 848 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count, 849 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); 850 851 DCHECK(result->HasObjectElements()); 852 853 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate); 854 855 if (part_count == 1 && indices->at(0) == subject_length) { 856 elements->set(0, *subject); 857 } else { 858 int part_start = 0; 859 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, { 860 int part_end = indices->at(i); 861 Handle<String> substring = 862 isolate->factory()->NewProperSubString(subject, part_start, part_end); 863 elements->set(i, *substring); 864 part_start = part_end + pattern_length; 865 }); 866 } 867 868 if (limit == 0xFFFFFFFFu) { 869 if (result->HasObjectElements()) { 870 RegExpResultsCache::Enter(isolate, subject, pattern, elements, 871 isolate->factory()->empty_fixed_array(), 872 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); 873 } 874 } 875 876 TruncateRegexpIndicesList(isolate); 877 878 return *result; 879} 880 881namespace { 882 883MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp, 884 Handle<String> subject, int32_t index, 885 Handle<RegExpMatchInfo> last_match_info, 886 RegExp::ExecQuirks exec_quirks) { 887 // Due to the way the JS calls are constructed this must be less than the 888 // length of a string, i.e. it is always a Smi. We check anyway for security. 889 CHECK_LE(0, index); 890 CHECK_GE(subject->length(), index); 891 isolate->counters()->regexp_entry_runtime()->Increment(); 892 return RegExp::Exec(isolate, regexp, subject, index, last_match_info, 893 exec_quirks); 894} 895 896MaybeHandle<Object> ExperimentalOneshotExec( 897 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 898 int32_t index, Handle<RegExpMatchInfo> last_match_info, 899 RegExp::ExecQuirks exec_quirks) { 900 // Due to the way the JS calls are constructed this must be less than the 901 // length of a string, i.e. it is always a Smi. We check anyway for security. 902 CHECK_LE(0, index); 903 CHECK_GE(subject->length(), index); 904 isolate->counters()->regexp_entry_runtime()->Increment(); 905 return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index, 906 last_match_info, exec_quirks); 907} 908 909} // namespace 910 911RUNTIME_FUNCTION(Runtime_RegExpExec) { 912 HandleScope scope(isolate); 913 DCHECK_EQ(4, args.length()); 914 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 915 Handle<String> subject = args.at<String>(1); 916 int32_t index = 0; 917 CHECK(args[2].ToInt32(&index)); 918 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3); 919 RETURN_RESULT_OR_FAILURE( 920 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info, 921 RegExp::ExecQuirks::kNone)); 922} 923 924RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) { 925 HandleScope scope(isolate); 926 DCHECK_EQ(4, args.length()); 927 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 928 Handle<String> subject = args.at<String>(1); 929 int32_t index = 0; 930 CHECK(args[2].ToInt32(&index)); 931 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3); 932 RETURN_RESULT_OR_FAILURE( 933 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info, 934 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure)); 935} 936 937RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) { 938 HandleScope scope(isolate); 939 DCHECK_EQ(4, args.length()); 940 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 941 Handle<String> subject = args.at<String>(1); 942 int32_t index = 0; 943 CHECK(args[2].ToInt32(&index)); 944 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3); 945 RETURN_RESULT_OR_FAILURE( 946 isolate, 947 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info, 948 RegExp::ExecQuirks::kNone)); 949} 950 951RUNTIME_FUNCTION( 952 Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) { 953 HandleScope scope(isolate); 954 DCHECK_EQ(4, args.length()); 955 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 956 Handle<String> subject = args.at<String>(1); 957 int32_t index = 0; 958 CHECK(args[2].ToInt32(&index)); 959 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3); 960 RETURN_RESULT_OR_FAILURE( 961 isolate, 962 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info, 963 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure)); 964} 965 966RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) { 967 HandleScope scope(isolate); 968 DCHECK_EQ(3, args.length()); 969 Handle<RegExpMatchInfo> match_info = args.at<RegExpMatchInfo>(1); 970 Handle<Object> maybe_names = args.at(2); 971#ifdef DEBUG 972 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 973 DCHECK(regexp->flags() & JSRegExp::kHasIndices); 974#endif 975 976 return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names); 977} 978 979namespace { 980 981class MatchInfoBackedMatch : public String::Match { 982 public: 983 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp, 984 Handle<String> subject, 985 Handle<RegExpMatchInfo> match_info) 986 : isolate_(isolate), match_info_(match_info) { 987 subject_ = String::Flatten(isolate, subject); 988 989 if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) { 990 Object o = regexp->capture_name_map(); 991 has_named_captures_ = o.IsFixedArray(); 992 if (has_named_captures_) { 993 capture_name_map_ = handle(FixedArray::cast(o), isolate); 994 } 995 } else { 996 has_named_captures_ = false; 997 } 998 } 999 1000 Handle<String> GetMatch() override { 1001 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr); 1002 } 1003 1004 Handle<String> GetPrefix() override { 1005 const int match_start = match_info_->Capture(0); 1006 return isolate_->factory()->NewSubString(subject_, 0, match_start); 1007 } 1008 1009 Handle<String> GetSuffix() override { 1010 const int match_end = match_info_->Capture(1); 1011 return isolate_->factory()->NewSubString(subject_, match_end, 1012 subject_->length()); 1013 } 1014 1015 bool HasNamedCaptures() override { return has_named_captures_; } 1016 1017 int CaptureCount() override { 1018 return match_info_->NumberOfCaptureRegisters() / 2; 1019 } 1020 1021 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 1022 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter( 1023 isolate_, match_info_, i, capture_exists); 1024 return (*capture_exists) ? Object::ToString(isolate_, capture_obj) 1025 : isolate_->factory()->empty_string(); 1026 } 1027 1028 MaybeHandle<String> GetNamedCapture(Handle<String> name, 1029 CaptureState* state) override { 1030 DCHECK(has_named_captures_); 1031 const int capture_index = LookupNamedCapture( 1032 [=](String capture_name) { return capture_name.Equals(*name); }, 1033 *capture_name_map_); 1034 1035 if (capture_index == -1) { 1036 *state = UNMATCHED; 1037 return isolate_->factory()->empty_string(); 1038 } 1039 1040 DCHECK(1 <= capture_index && capture_index <= CaptureCount()); 1041 1042 bool capture_exists; 1043 Handle<String> capture_value; 1044 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value, 1045 GetCapture(capture_index, &capture_exists), 1046 String); 1047 1048 if (!capture_exists) { 1049 *state = UNMATCHED; 1050 return isolate_->factory()->empty_string(); 1051 } else { 1052 *state = MATCHED; 1053 return capture_value; 1054 } 1055 } 1056 1057 private: 1058 Isolate* isolate_; 1059 Handle<String> subject_; 1060 Handle<RegExpMatchInfo> match_info_; 1061 1062 bool has_named_captures_; 1063 Handle<FixedArray> capture_name_map_; 1064}; 1065 1066class VectorBackedMatch : public String::Match { 1067 public: 1068 VectorBackedMatch(Isolate* isolate, Handle<String> subject, 1069 Handle<String> match, int match_position, 1070 base::Vector<Handle<Object>> captures, 1071 Handle<Object> groups_obj) 1072 : isolate_(isolate), 1073 match_(match), 1074 match_position_(match_position), 1075 captures_(captures) { 1076 subject_ = String::Flatten(isolate, subject); 1077 1078 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver()); 1079 has_named_captures_ = !groups_obj->IsUndefined(isolate); 1080 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj); 1081 } 1082 1083 Handle<String> GetMatch() override { return match_; } 1084 1085 Handle<String> GetPrefix() override { 1086 return isolate_->factory()->NewSubString(subject_, 0, match_position_); 1087 } 1088 1089 Handle<String> GetSuffix() override { 1090 const int match_end_position = match_position_ + match_->length(); 1091 return isolate_->factory()->NewSubString(subject_, match_end_position, 1092 subject_->length()); 1093 } 1094 1095 bool HasNamedCaptures() override { return has_named_captures_; } 1096 1097 int CaptureCount() override { return captures_.length(); } 1098 1099 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 1100 Handle<Object> capture_obj = captures_[i]; 1101 if (capture_obj->IsUndefined(isolate_)) { 1102 *capture_exists = false; 1103 return isolate_->factory()->empty_string(); 1104 } 1105 *capture_exists = true; 1106 return Object::ToString(isolate_, capture_obj); 1107 } 1108 1109 MaybeHandle<String> GetNamedCapture(Handle<String> name, 1110 CaptureState* state) override { 1111 DCHECK(has_named_captures_); 1112 1113 Handle<Object> capture_obj; 1114 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj, 1115 Object::GetProperty(isolate_, groups_obj_, name), 1116 String); 1117 if (capture_obj->IsUndefined(isolate_)) { 1118 *state = UNMATCHED; 1119 return isolate_->factory()->empty_string(); 1120 } else { 1121 *state = MATCHED; 1122 return Object::ToString(isolate_, capture_obj); 1123 } 1124 } 1125 1126 private: 1127 Isolate* isolate_; 1128 Handle<String> subject_; 1129 Handle<String> match_; 1130 const int match_position_; 1131 base::Vector<Handle<Object>> captures_; 1132 1133 bool has_named_captures_; 1134 Handle<JSReceiver> groups_obj_; 1135}; 1136 1137// Create the groups object (see also the RegExp result creation in 1138// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo). 1139Handle<JSObject> ConstructNamedCaptureGroupsObject( 1140 Isolate* isolate, Handle<FixedArray> capture_map, 1141 const std::function<Object(int)>& f_get_capture) { 1142 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto(); 1143 1144 const int named_capture_count = capture_map->length() >> 1; 1145 for (int i = 0; i < named_capture_count; i++) { 1146 const int name_ix = i * 2; 1147 const int index_ix = i * 2 + 1; 1148 1149 Handle<String> capture_name(String::cast(capture_map->get(name_ix)), 1150 isolate); 1151 const int capture_ix = Smi::ToInt(capture_map->get(index_ix)); 1152 DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1. 1153 1154 Handle<Object> capture_value(f_get_capture(capture_ix), isolate); 1155 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString()); 1156 1157 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE); 1158 } 1159 1160 return groups; 1161} 1162 1163// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 1164// separate last match info. See comment on that function. 1165template <bool has_capture> 1166static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, 1167 Handle<JSRegExp> regexp, 1168 Handle<RegExpMatchInfo> last_match_array, 1169 Handle<JSArray> result_array) { 1170 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1171 DCHECK_NE(has_capture, regexp->capture_count() == 0); 1172 DCHECK(subject->IsFlat()); 1173 1174 // Force tier up to native code for global replaces. The global replace is 1175 // implemented differently for native code and bytecode execution, where the 1176 // native code expects an array to store all the matches, and the bytecode 1177 // matches one at a time, so it's easier to tier-up to native code from the 1178 // start. 1179 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) { 1180 regexp->MarkTierUpForNextExec(); 1181 if (FLAG_trace_regexp_tier_up) { 1182 PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n", 1183 reinterpret_cast<void*>(regexp->ptr())); 1184 } 1185 } 1186 1187 int capture_count = regexp->capture_count(); 1188 int subject_length = subject->length(); 1189 1190 static const int kMinLengthToCache = 0x1000; 1191 1192 if (subject_length > kMinLengthToCache) { 1193 FixedArray last_match_cache; 1194 Object cached_answer = RegExpResultsCache::Lookup( 1195 isolate->heap(), *subject, regexp->data(), &last_match_cache, 1196 RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 1197 if (cached_answer.IsFixedArray()) { 1198 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count); 1199 int32_t* last_match = NewArray<int32_t>(capture_registers); 1200 for (int i = 0; i < capture_registers; i++) { 1201 last_match[i] = Smi::ToInt(last_match_cache.get(i)); 1202 } 1203 Handle<FixedArray> cached_fixed_array = 1204 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate); 1205 // The cache FixedArray is a COW-array and we need to return a copy. 1206 Handle<FixedArray> copied_fixed_array = 1207 isolate->factory()->CopyFixedArrayWithMap( 1208 cached_fixed_array, isolate->factory()->fixed_array_map()); 1209 JSArray::SetContent(result_array, copied_fixed_array); 1210 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, 1211 capture_count, last_match); 1212 DeleteArray(last_match); 1213 return *result_array; 1214 } 1215 } 1216 1217 RegExpGlobalCache global_cache(regexp, subject, isolate); 1218 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 1219 1220 // Ensured in Runtime_RegExpExecMultiple. 1221 DCHECK(result_array->HasObjectElements()); 1222 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()), 1223 isolate); 1224 if (result_elements->length() < 16) { 1225 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 1226 } 1227 1228 FixedArrayBuilder builder(result_elements); 1229 1230 // Position to search from. 1231 int match_start = -1; 1232 int match_end = 0; 1233 bool first = true; 1234 1235 // Two smis before and after the match, for very long strings. 1236 static const int kMaxBuilderEntriesPerRegExpMatch = 5; 1237 1238 while (true) { 1239 int32_t* current_match = global_cache.FetchNext(); 1240 if (current_match == nullptr) break; 1241 match_start = current_match[0]; 1242 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch); 1243 if (match_end < match_start) { 1244 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 1245 match_start); 1246 } 1247 match_end = current_match[1]; 1248 { 1249 // Avoid accumulating new handles inside loop. 1250 HandleScope temp_scope(isolate); 1251 Handle<String> match; 1252 if (!first) { 1253 match = isolate->factory()->NewProperSubString(subject, match_start, 1254 match_end); 1255 } else { 1256 match = 1257 isolate->factory()->NewSubString(subject, match_start, match_end); 1258 first = false; 1259 } 1260 1261 if (has_capture) { 1262 // Arguments array to replace function is match, captures, index and 1263 // subject, i.e., 3 + capture count in total. If the RegExp contains 1264 // named captures, they are also passed as the last argument. 1265 1266 Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate); 1267 const bool has_named_captures = maybe_capture_map->IsFixedArray(); 1268 1269 const int argc = 1270 has_named_captures ? 4 + capture_count : 3 + capture_count; 1271 1272 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc); 1273 int cursor = 0; 1274 1275 elements->set(cursor++, *match); 1276 for (int i = 1; i <= capture_count; i++) { 1277 int start = current_match[i * 2]; 1278 if (start >= 0) { 1279 int end = current_match[i * 2 + 1]; 1280 DCHECK(start <= end); 1281 Handle<String> substring = 1282 isolate->factory()->NewSubString(subject, start, end); 1283 elements->set(cursor++, *substring); 1284 } else { 1285 DCHECK_GT(0, current_match[i * 2 + 1]); 1286 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value()); 1287 } 1288 } 1289 1290 elements->set(cursor++, Smi::FromInt(match_start)); 1291 elements->set(cursor++, *subject); 1292 1293 if (has_named_captures) { 1294 Handle<FixedArray> capture_map = 1295 Handle<FixedArray>::cast(maybe_capture_map); 1296 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject( 1297 isolate, capture_map, [=](int ix) { return elements->get(ix); }); 1298 elements->set(cursor++, *groups); 1299 } 1300 1301 DCHECK_EQ(cursor, argc); 1302 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 1303 } else { 1304 builder.Add(*match); 1305 } 1306 } 1307 } 1308 1309 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); 1310 1311 if (match_start >= 0) { 1312 // Finished matching, with at least one match. 1313 if (match_end < subject_length) { 1314 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 1315 subject_length); 1316 } 1317 1318 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count, 1319 global_cache.LastSuccessfulMatch()); 1320 1321 if (subject_length > kMinLengthToCache) { 1322 // Store the last successful match into the array for caching. 1323 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count); 1324 Handle<FixedArray> last_match_cache = 1325 isolate->factory()->NewFixedArray(capture_registers); 1326 int32_t* last_match = global_cache.LastSuccessfulMatch(); 1327 for (int i = 0; i < capture_registers; i++) { 1328 last_match_cache->set(i, Smi::FromInt(last_match[i])); 1329 } 1330 Handle<FixedArray> result_fixed_array = 1331 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length()); 1332 // Cache the result and copy the FixedArray into a COW array. 1333 Handle<FixedArray> copied_fixed_array = 1334 isolate->factory()->CopyFixedArrayWithMap( 1335 result_fixed_array, isolate->factory()->fixed_array_map()); 1336 RegExpResultsCache::Enter( 1337 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array, 1338 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 1339 } 1340 return *builder.ToJSArray(result_array); 1341 } else { 1342 return ReadOnlyRoots(isolate).null_value(); // No matches at all. 1343 } 1344} 1345 1346// Legacy implementation of RegExp.prototype[Symbol.replace] which 1347// doesn't properly call the underlying exec method. 1348V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace( 1349 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string, 1350 Handle<String> replace) { 1351 // Functional fast-paths are dispatched directly by replace builtin. 1352 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1353 1354 Factory* factory = isolate->factory(); 1355 1356 const int flags = regexp->flags(); 1357 const bool global = (flags & JSRegExp::kGlobal) != 0; 1358 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1359 1360 replace = String::Flatten(isolate, replace); 1361 1362 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1363 1364 if (!global) { 1365 // Non-global regexp search, string replace. 1366 1367 uint32_t last_index = 0; 1368 if (sticky) { 1369 Handle<Object> last_index_obj(regexp->last_index(), isolate); 1370 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, 1371 Object::ToLength(isolate, last_index_obj), 1372 String); 1373 last_index = PositiveNumberToUint32(*last_index_obj); 1374 } 1375 1376 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), 1377 isolate); 1378 1379 // A lastIndex exceeding the string length always returns null (signalling 1380 // failure) in RegExpBuiltinExec, thus we can skip the call. 1381 if (last_index <= static_cast<uint32_t>(string->length())) { 1382 ASSIGN_RETURN_ON_EXCEPTION( 1383 isolate, match_indices_obj, 1384 RegExp::Exec(isolate, regexp, string, last_index, last_match_info), 1385 String); 1386 } 1387 1388 if (match_indices_obj->IsNull(isolate)) { 1389 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER); 1390 return string; 1391 } 1392 1393 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj); 1394 1395 const int start_index = match_indices->Capture(0); 1396 const int end_index = match_indices->Capture(1); 1397 1398 if (sticky) { 1399 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER); 1400 } 1401 1402 IncrementalStringBuilder builder(isolate); 1403 builder.AppendString(factory->NewSubString(string, 0, start_index)); 1404 1405 if (replace->length() > 0) { 1406 MatchInfoBackedMatch m(isolate, regexp, string, match_indices); 1407 Handle<String> replacement; 1408 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement, 1409 String::GetSubstitution(isolate, &m, replace), 1410 String); 1411 builder.AppendString(replacement); 1412 } 1413 1414 builder.AppendString( 1415 factory->NewSubString(string, end_index, string->length())); 1416 return builder.Finish(); 1417 } else { 1418 // Global regexp search, string replace. 1419 DCHECK(global); 1420 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), 1421 String); 1422 1423 // Force tier up to native code for global replaces. The global replace is 1424 // implemented differently for native code and bytecode execution, where the 1425 // native code expects an array to store all the matches, and the bytecode 1426 // matches one at a time, so it's easier to tier-up to native code from the 1427 // start. 1428 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) { 1429 regexp->MarkTierUpForNextExec(); 1430 if (FLAG_trace_regexp_tier_up) { 1431 PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n", 1432 reinterpret_cast<void*>(regexp->ptr())); 1433 } 1434 } 1435 1436 if (replace->length() == 0) { 1437 if (string->IsOneByteRepresentation()) { 1438 Object result = 1439 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( 1440 isolate, string, regexp, last_match_info); 1441 return handle(String::cast(result), isolate); 1442 } else { 1443 Object result = 1444 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( 1445 isolate, string, regexp, last_match_info); 1446 return handle(String::cast(result), isolate); 1447 } 1448 } 1449 1450 Object result = StringReplaceGlobalRegExpWithString( 1451 isolate, string, regexp, replace, last_match_info); 1452 if (result.IsString()) { 1453 return handle(String::cast(result), isolate); 1454 } else { 1455 return MaybeHandle<String>(); 1456 } 1457 } 1458 1459 UNREACHABLE(); 1460} 1461 1462} // namespace 1463 1464// This is only called for StringReplaceGlobalRegExpWithFunction. 1465RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { 1466 HandleScope handles(isolate); 1467 DCHECK_EQ(4, args.length()); 1468 1469 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 1470 Handle<String> subject = args.at<String>(1); 1471 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(2); 1472 Handle<JSArray> result_array = args.at<JSArray>(3); 1473 1474 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1475 CHECK(result_array->HasObjectElements()); 1476 1477 subject = String::Flatten(isolate, subject); 1478 CHECK(regexp->flags() & JSRegExp::kGlobal); 1479 1480 Object result; 1481 if (regexp->capture_count() == 0) { 1482 result = SearchRegExpMultiple<false>(isolate, subject, regexp, 1483 last_match_info, result_array); 1484 } else { 1485 result = SearchRegExpMultiple<true>(isolate, subject, regexp, 1486 last_match_info, result_array); 1487 } 1488 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1489 return result; 1490} 1491 1492RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) { 1493 HandleScope scope(isolate); 1494 DCHECK_EQ(3, args.length()); 1495 Handle<String> subject = args.at<String>(0); 1496 Handle<JSRegExp> regexp = args.at<JSRegExp>(1); 1497 Handle<JSReceiver> replace_obj = args.at<JSReceiver>(2); 1498 1499 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1500 DCHECK(replace_obj->map().is_callable()); 1501 1502 Factory* factory = isolate->factory(); 1503 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1504 1505 const int flags = regexp->flags(); 1506 DCHECK_EQ(flags & JSRegExp::kGlobal, 0); 1507 1508 // TODO(jgruber): This should be an easy port to CSA with massive payback. 1509 1510 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1511 uint32_t last_index = 0; 1512 if (sticky) { 1513 Handle<Object> last_index_obj(regexp->last_index(), isolate); 1514 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1515 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); 1516 last_index = PositiveNumberToUint32(*last_index_obj); 1517 } 1518 1519 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), 1520 isolate); 1521 1522 // A lastIndex exceeding the string length always returns null (signalling 1523 // failure) in RegExpBuiltinExec, thus we can skip the call. 1524 if (last_index <= static_cast<uint32_t>(subject->length())) { 1525 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1526 isolate, match_indices_obj, 1527 RegExp::Exec(isolate, regexp, subject, last_index, last_match_info)); 1528 } 1529 1530 if (match_indices_obj->IsNull(isolate)) { 1531 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER); 1532 return *subject; 1533 } 1534 1535 Handle<RegExpMatchInfo> match_indices = 1536 Handle<RegExpMatchInfo>::cast(match_indices_obj); 1537 1538 const int index = match_indices->Capture(0); 1539 const int end_of_match = match_indices->Capture(1); 1540 1541 if (sticky) { 1542 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER); 1543 } 1544 1545 IncrementalStringBuilder builder(isolate); 1546 builder.AppendString(factory->NewSubString(subject, 0, index)); 1547 1548 // Compute the parameter list consisting of the match, captures, index, 1549 // and subject for the replace function invocation. If the RegExp contains 1550 // named captures, they are also passed as the last argument. 1551 1552 // The number of captures plus one for the match. 1553 const int m = match_indices->NumberOfCaptureRegisters() / 2; 1554 1555 bool has_named_captures = false; 1556 Handle<FixedArray> capture_map; 1557 if (m > 1) { 1558 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag())); 1559 1560 Object maybe_capture_map = regexp->capture_name_map(); 1561 if (maybe_capture_map.IsFixedArray()) { 1562 has_named_captures = true; 1563 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate); 1564 } 1565 } 1566 1567 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures); 1568 if (argc == static_cast<uint32_t>(-1)) { 1569 THROW_NEW_ERROR_RETURN_FAILURE( 1570 isolate, NewRangeError(MessageTemplate::kTooManyArguments)); 1571 } 1572 base::ScopedVector<Handle<Object>> argv(argc); 1573 1574 int cursor = 0; 1575 for (int j = 0; j < m; j++) { 1576 bool ok; 1577 Handle<String> capture = 1578 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); 1579 if (ok) { 1580 argv[cursor++] = capture; 1581 } else { 1582 argv[cursor++] = factory->undefined_value(); 1583 } 1584 } 1585 1586 argv[cursor++] = handle(Smi::FromInt(index), isolate); 1587 argv[cursor++] = subject; 1588 1589 if (has_named_captures) { 1590 argv[cursor++] = ConstructNamedCaptureGroupsObject( 1591 isolate, capture_map, [&argv](int ix) { return *argv[ix]; }); 1592 } 1593 1594 DCHECK_EQ(cursor, argc); 1595 1596 Handle<Object> replacement_obj; 1597 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1598 isolate, replacement_obj, 1599 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, 1600 argv.begin())); 1601 1602 Handle<String> replacement; 1603 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1604 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1605 1606 builder.AppendString(replacement); 1607 builder.AppendString( 1608 factory->NewSubString(subject, end_of_match, subject->length())); 1609 1610 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); 1611} 1612 1613namespace { 1614 1615V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate, 1616 Handle<Object> object, 1617 uint32_t* out) { 1618 if (object->IsUndefined(isolate)) { 1619 *out = kMaxUInt32; 1620 return object; 1621 } 1622 1623 Handle<Object> number; 1624 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object), 1625 Object); 1626 *out = NumberToUint32(*number); 1627 return object; 1628} 1629 1630Handle<JSArray> NewJSArrayWithElements(Isolate* isolate, 1631 Handle<FixedArray> elems, 1632 int num_elems) { 1633 return isolate->factory()->NewJSArrayWithElements( 1634 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems)); 1635} 1636 1637} // namespace 1638 1639// Slow path for: 1640// ES#sec-regexp.prototype-@@replace 1641// RegExp.prototype [ @@split ] ( string, limit ) 1642RUNTIME_FUNCTION(Runtime_RegExpSplit) { 1643 HandleScope scope(isolate); 1644 DCHECK_EQ(3, args.length()); 1645 1646 Handle<JSReceiver> recv = args.at<JSReceiver>(0); 1647 Handle<String> string = args.at<String>(1); 1648 Handle<Object> limit_obj = args.at(2); 1649 1650 Factory* factory = isolate->factory(); 1651 1652 Handle<JSFunction> regexp_fun = isolate->regexp_function(); 1653 Handle<Object> ctor; 1654 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1655 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun)); 1656 1657 Handle<Object> flags_obj; 1658 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1659 isolate, flags_obj, 1660 JSObject::GetProperty(isolate, recv, factory->flags_string())); 1661 1662 Handle<String> flags; 1663 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags, 1664 Object::ToString(isolate, flags_obj)); 1665 1666 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u'); 1667 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0); 1668 1669 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y'); 1670 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0); 1671 1672 Handle<String> new_flags = flags; 1673 if (!sticky) { 1674 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags, 1675 factory->NewConsString(flags, y_str)); 1676 } 1677 1678 Handle<JSReceiver> splitter; 1679 { 1680 const int argc = 2; 1681 1682 base::ScopedVector<Handle<Object>> argv(argc); 1683 argv[0] = recv; 1684 argv[1] = new_flags; 1685 1686 Handle<Object> splitter_obj; 1687 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1688 isolate, splitter_obj, 1689 Execution::New(isolate, ctor, argc, argv.begin())); 1690 1691 splitter = Handle<JSReceiver>::cast(splitter_obj); 1692 } 1693 1694 uint32_t limit; 1695 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit)); 1696 1697 const uint32_t length = string->length(); 1698 1699 if (limit == 0) return *factory->NewJSArray(0); 1700 1701 if (length == 0) { 1702 Handle<Object> result; 1703 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1704 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, 1705 factory->undefined_value())); 1706 1707 if (!result->IsNull(isolate)) return *factory->NewJSArray(0); 1708 1709 Handle<FixedArray> elems = factory->NewFixedArray(1); 1710 elems->set(0, *string); 1711 return *factory->NewJSArrayWithElements(elems); 1712 } 1713 1714 static const int kInitialArraySize = 8; 1715 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize); 1716 uint32_t num_elems = 0; 1717 1718 uint32_t string_index = 0; 1719 uint32_t prev_string_index = 0; 1720 while (string_index < length) { 1721 RETURN_FAILURE_ON_EXCEPTION( 1722 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index)); 1723 1724 Handle<Object> result; 1725 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1726 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, 1727 factory->undefined_value())); 1728 1729 if (result->IsNull(isolate)) { 1730 string_index = static_cast<uint32_t>( 1731 RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); 1732 continue; 1733 } 1734 1735 Handle<Object> last_index_obj; 1736 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1737 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter)); 1738 1739 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1740 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); 1741 1742 const uint32_t end = 1743 std::min(PositiveNumberToUint32(*last_index_obj), length); 1744 if (end == prev_string_index) { 1745 string_index = static_cast<uint32_t>( 1746 RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); 1747 continue; 1748 } 1749 1750 { 1751 Handle<String> substr = 1752 factory->NewSubString(string, prev_string_index, string_index); 1753 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); 1754 if (num_elems == limit) { 1755 return *NewJSArrayWithElements(isolate, elems, num_elems); 1756 } 1757 } 1758 1759 prev_string_index = end; 1760 1761 Handle<Object> num_captures_obj; 1762 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1763 isolate, num_captures_obj, 1764 Object::GetProperty(isolate, result, 1765 isolate->factory()->length_string())); 1766 1767 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1768 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj)); 1769 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj); 1770 1771 for (uint32_t i = 1; i < num_captures; i++) { 1772 Handle<Object> capture; 1773 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1774 isolate, capture, Object::GetElement(isolate, result, i)); 1775 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture); 1776 if (num_elems == limit) { 1777 return *NewJSArrayWithElements(isolate, elems, num_elems); 1778 } 1779 } 1780 1781 string_index = prev_string_index; 1782 } 1783 1784 { 1785 Handle<String> substr = 1786 factory->NewSubString(string, prev_string_index, length); 1787 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); 1788 } 1789 1790 return *NewJSArrayWithElements(isolate, elems, num_elems); 1791} 1792 1793// Slow path for: 1794// ES#sec-regexp.prototype-@@replace 1795// RegExp.prototype [ @@replace ] ( string, replaceValue ) 1796RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) { 1797 HandleScope scope(isolate); 1798 DCHECK_EQ(3, args.length()); 1799 1800 Handle<JSReceiver> recv = args.at<JSReceiver>(0); 1801 Handle<String> string = args.at<String>(1); 1802 Handle<Object> replace_obj = args.at(2); 1803 1804 Factory* factory = isolate->factory(); 1805 1806 string = String::Flatten(isolate, string); 1807 1808 const bool functional_replace = replace_obj->IsCallable(); 1809 1810 Handle<String> replace; 1811 if (!functional_replace) { 1812 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace, 1813 Object::ToString(isolate, replace_obj)); 1814 } 1815 1816 // Fast-path for unmodified JSRegExps (and non-functional replace). 1817 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) { 1818 // We should never get here with functional replace because unmodified 1819 // regexp and functional replace should be fully handled in CSA code. 1820 CHECK(!functional_replace); 1821 Handle<Object> result; 1822 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1823 isolate, result, 1824 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace)); 1825 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv)); 1826 return *result; 1827 } 1828 1829 const uint32_t length = string->length(); 1830 1831 Handle<Object> global_obj; 1832 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1833 isolate, global_obj, 1834 JSReceiver::GetProperty(isolate, recv, factory->global_string())); 1835 const bool global = global_obj->BooleanValue(isolate); 1836 1837 bool unicode = false; 1838 if (global) { 1839 Handle<Object> unicode_obj; 1840 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1841 isolate, unicode_obj, 1842 JSReceiver::GetProperty(isolate, recv, factory->unicode_string())); 1843 unicode = unicode_obj->BooleanValue(isolate); 1844 1845 RETURN_FAILURE_ON_EXCEPTION(isolate, 1846 RegExpUtils::SetLastIndex(isolate, recv, 0)); 1847 } 1848 1849 base::SmallVector<Handle<Object>, kStaticVectorSlots> results; 1850 1851 while (true) { 1852 Handle<Object> result; 1853 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1854 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, 1855 factory->undefined_value())); 1856 1857 if (result->IsNull(isolate)) break; 1858 1859 results.emplace_back(result); 1860 if (!global) break; 1861 1862 Handle<Object> match_obj; 1863 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, 1864 Object::GetElement(isolate, result, 0)); 1865 1866 Handle<String> match; 1867 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1868 Object::ToString(isolate, match_obj)); 1869 1870 if (match->length() == 0) { 1871 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex( 1872 isolate, recv, string, unicode)); 1873 } 1874 } 1875 1876 // TODO(jgruber): Look into ReplacementStringBuilder instead. 1877 IncrementalStringBuilder builder(isolate); 1878 uint32_t next_source_position = 0; 1879 1880 for (const auto& result : results) { 1881 HandleScope handle_scope(isolate); 1882 Handle<Object> captures_length_obj; 1883 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1884 isolate, captures_length_obj, 1885 Object::GetProperty(isolate, result, factory->length_string())); 1886 1887 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1888 isolate, captures_length_obj, 1889 Object::ToLength(isolate, captures_length_obj)); 1890 const uint32_t captures_length = 1891 PositiveNumberToUint32(*captures_length_obj); 1892 1893 Handle<Object> match_obj; 1894 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, 1895 Object::GetElement(isolate, result, 0)); 1896 1897 Handle<String> match; 1898 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1899 Object::ToString(isolate, match_obj)); 1900 1901 const int match_length = match->length(); 1902 1903 Handle<Object> position_obj; 1904 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1905 isolate, position_obj, 1906 Object::GetProperty(isolate, result, factory->index_string())); 1907 1908 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1909 isolate, position_obj, Object::ToInteger(isolate, position_obj)); 1910 const uint32_t position = 1911 std::min(PositiveNumberToUint32(*position_obj), length); 1912 1913 // Do not reserve capacity since captures_length is user-controlled. 1914 base::SmallVector<Handle<Object>, kStaticVectorSlots> captures; 1915 1916 for (uint32_t n = 0; n < captures_length; n++) { 1917 Handle<Object> capture; 1918 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1919 isolate, capture, Object::GetElement(isolate, result, n)); 1920 1921 if (!capture->IsUndefined(isolate)) { 1922 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, 1923 Object::ToString(isolate, capture)); 1924 } 1925 captures.emplace_back(capture); 1926 } 1927 1928 Handle<Object> groups_obj = isolate->factory()->undefined_value(); 1929 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1930 isolate, groups_obj, 1931 Object::GetProperty(isolate, result, factory->groups_string())); 1932 1933 const bool has_named_captures = !groups_obj->IsUndefined(isolate); 1934 1935 Handle<String> replacement; 1936 if (functional_replace) { 1937 const uint32_t argc = 1938 GetArgcForReplaceCallable(captures_length, has_named_captures); 1939 if (argc == static_cast<uint32_t>(-1)) { 1940 THROW_NEW_ERROR_RETURN_FAILURE( 1941 isolate, NewRangeError(MessageTemplate::kTooManyArguments)); 1942 } 1943 1944 base::ScopedVector<Handle<Object>> argv(argc); 1945 1946 int cursor = 0; 1947 for (uint32_t j = 0; j < captures_length; j++) { 1948 argv[cursor++] = captures[j]; 1949 } 1950 1951 argv[cursor++] = handle(Smi::FromInt(position), isolate); 1952 argv[cursor++] = string; 1953 if (has_named_captures) argv[cursor++] = groups_obj; 1954 1955 DCHECK_EQ(cursor, argc); 1956 1957 Handle<Object> replacement_obj; 1958 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1959 isolate, replacement_obj, 1960 Execution::Call(isolate, replace_obj, factory->undefined_value(), 1961 argc, argv.begin())); 1962 1963 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1964 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1965 } else { 1966 DCHECK(!functional_replace); 1967 if (!groups_obj->IsUndefined(isolate)) { 1968 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1969 isolate, groups_obj, Object::ToObject(isolate, groups_obj)); 1970 } 1971 VectorBackedMatch m(isolate, string, match, position, 1972 base::VectorOf(captures), groups_obj); 1973 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1974 isolate, replacement, String::GetSubstitution(isolate, &m, replace)); 1975 } 1976 1977 if (position >= next_source_position) { 1978 builder.AppendString( 1979 factory->NewSubString(string, next_source_position, position)); 1980 builder.AppendString(replacement); 1981 1982 next_source_position = position + match_length; 1983 } 1984 } 1985 1986 if (next_source_position < length) { 1987 builder.AppendString( 1988 factory->NewSubString(string, next_source_position, length)); 1989 } 1990 1991 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); 1992} 1993 1994RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { 1995 HandleScope scope(isolate); 1996 DCHECK_EQ(3, args.length()); 1997 // TODO(pwong): To follow the spec more closely and simplify calling code, 1998 // this could handle the canonicalization of pattern and flags. See 1999 // https://tc39.github.io/ecma262/#sec-regexpinitialize 2000 Handle<JSRegExp> regexp = args.at<JSRegExp>(0); 2001 Handle<String> source = args.at<String>(1); 2002 Handle<String> flags = args.at<String>(2); 2003 2004 RETURN_FAILURE_ON_EXCEPTION(isolate, 2005 JSRegExp::Initialize(regexp, source, flags)); 2006 2007 return *regexp; 2008} 2009 2010RUNTIME_FUNCTION(Runtime_IsRegExp) { 2011 SealHandleScope shs(isolate); 2012 DCHECK_EQ(1, args.length()); 2013 Object obj = args[0]; 2014 return isolate->heap()->ToBoolean(obj.IsJSRegExp()); 2015} 2016 2017RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) { 2018 HandleScope scope(isolate); 2019 DCHECK_EQ(1, args.length()); 2020 auto regexp = JSRegExp::cast(args[0]); 2021 Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags()); 2022 return *flags; 2023} 2024 2025} // namespace internal 2026} // namespace v8 2027