1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <functional>
6
7#include "src/base/small-vector.h"
8#include "src/base/strings.h"
9#include "src/common/message-template.h"
10#include "src/execution/arguments-inl.h"
11#include "src/execution/isolate-inl.h"
12#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
13#include "src/logging/counters.h"
14#include "src/numbers/conversions-inl.h"
15#include "src/objects/js-array-inl.h"
16#include "src/objects/js-regexp-inl.h"
17#include "src/regexp/regexp-utils.h"
18#include "src/regexp/regexp.h"
19#include "src/runtime/runtime-utils.h"
20#include "src/strings/string-builder-inl.h"
21#include "src/strings/string-search.h"
22
23namespace v8 {
24namespace internal {
25
26namespace {
27
28// Fairly arbitrary, but intended to fit:
29//
30// - captures
31// - results
32// - parsed replacement pattern parts
33//
34// for small, common cases.
35constexpr int kStaticVectorSlots = 8;
36
37// Returns -1 for failure.
38uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
39                                   bool has_named_captures) {
40  const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
41  const uint32_t kAdditionalArgsWithNamedCaptures = 3;
42  if (num_captures > Code::kMaxArguments) return -1;
43  uint32_t argc = has_named_captures
44                      ? num_captures + kAdditionalArgsWithNamedCaptures
45                      : num_captures + kAdditionalArgsWithoutNamedCaptures;
46  STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
47                                          kAdditionalArgsWithNamedCaptures);
48  return (argc > Code::kMaxArguments) ? -1 : argc;
49}
50
51// Looks up the capture of the given name. Returns the (1-based) numbered
52// capture index or -1 on failure.
53int LookupNamedCapture(const std::function<bool(String)>& name_matches,
54                       FixedArray capture_name_map) {
55  // TODO(jgruber): Sort capture_name_map and do binary search via
56  // internalized strings.
57
58  int maybe_capture_index = -1;
59  const int named_capture_count = capture_name_map.length() >> 1;
60  for (int j = 0; j < named_capture_count; j++) {
61    // The format of {capture_name_map} is documented at
62    // JSRegExp::kIrregexpCaptureNameMapIndex.
63    const int name_ix = j * 2;
64    const int index_ix = j * 2 + 1;
65
66    String capture_name = String::cast(capture_name_map.get(name_ix));
67    if (!name_matches(capture_name)) continue;
68
69    maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
70    break;
71  }
72
73  return maybe_capture_index;
74}
75
76}  // namespace
77
78class CompiledReplacement {
79 public:
80  // Return whether the replacement is simple.
81  bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
82               Handle<String> replacement, int capture_count,
83               int subject_length);
84
85  // Use Apply only if Compile returned false.
86  void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
87             int32_t* match);
88
89  // Number of distinct parts of the replacement pattern.
90  int parts() { return static_cast<int>(parts_.size()); }
91
92 private:
93  enum PartType {
94    SUBJECT_PREFIX = 1,
95    SUBJECT_SUFFIX,
96    SUBJECT_CAPTURE,
97    REPLACEMENT_SUBSTRING,
98    REPLACEMENT_STRING,
99    EMPTY_REPLACEMENT,
100    NUMBER_OF_PART_TYPES
101  };
102
103  struct ReplacementPart {
104    static inline ReplacementPart SubjectMatch() {
105      return ReplacementPart(SUBJECT_CAPTURE, 0);
106    }
107    static inline ReplacementPart SubjectCapture(int capture_index) {
108      return ReplacementPart(SUBJECT_CAPTURE, capture_index);
109    }
110    static inline ReplacementPart SubjectPrefix() {
111      return ReplacementPart(SUBJECT_PREFIX, 0);
112    }
113    static inline ReplacementPart SubjectSuffix(int subject_length) {
114      return ReplacementPart(SUBJECT_SUFFIX, subject_length);
115    }
116    static inline ReplacementPart ReplacementString() {
117      return ReplacementPart(REPLACEMENT_STRING, 0);
118    }
119    static inline ReplacementPart EmptyReplacement() {
120      return ReplacementPart(EMPTY_REPLACEMENT, 0);
121    }
122    static inline ReplacementPart ReplacementSubString(int from, int to) {
123      DCHECK_LE(0, from);
124      DCHECK_GT(to, from);
125      return ReplacementPart(-from, to);
126    }
127
128    // If tag <= 0 then it is the negation of a start index of a substring of
129    // the replacement pattern, otherwise it's a value from PartType.
130    ReplacementPart(int tag, int data) : tag(tag), data(data) {
131      // Must be non-positive or a PartType value.
132      DCHECK(tag < NUMBER_OF_PART_TYPES);
133    }
134    // Either a value of PartType or a non-positive number that is
135    // the negation of an index into the replacement string.
136    int tag;
137    // The data value's interpretation depends on the value of tag:
138    // tag == SUBJECT_PREFIX ||
139    // tag == SUBJECT_SUFFIX:  data is unused.
140    // tag == SUBJECT_CAPTURE: data is the number of the capture.
141    // tag == REPLACEMENT_SUBSTRING ||
142    // tag == REPLACEMENT_STRING:    data is index into array of substrings
143    //                               of the replacement string.
144    // tag == EMPTY_REPLACEMENT: data is unused.
145    // tag <= 0: Temporary representation of the substring of the replacement
146    //           string ranging over -tag .. data.
147    //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
148    //           substring objects.
149    int data;
150  };
151
152  template <typename Char>
153  bool ParseReplacementPattern(base::Vector<Char> characters,
154                               FixedArray capture_name_map, int capture_count,
155                               int subject_length) {
156    // Equivalent to String::GetSubstitution, except that this method converts
157    // the replacement string into an internal representation that avoids
158    // repeated parsing when used repeatedly.
159    int length = characters.length();
160    int last = 0;
161    for (int i = 0; i < length; i++) {
162      Char c = characters[i];
163      if (c == '$') {
164        int next_index = i + 1;
165        if (next_index == length) {  // No next character!
166          break;
167        }
168        Char c2 = characters[next_index];
169        switch (c2) {
170          case '$':
171            if (i > last) {
172              // There is a substring before. Include the first "$".
173              parts_.emplace_back(
174                  ReplacementPart::ReplacementSubString(last, next_index));
175              last = next_index + 1;  // Continue after the second "$".
176            } else {
177              // Let the next substring start with the second "$".
178              last = next_index;
179            }
180            i = next_index;
181            break;
182          case '`':
183            if (i > last) {
184              parts_.emplace_back(
185                  ReplacementPart::ReplacementSubString(last, i));
186            }
187            parts_.emplace_back(ReplacementPart::SubjectPrefix());
188            i = next_index;
189            last = i + 1;
190            break;
191          case '\'':
192            if (i > last) {
193              parts_.emplace_back(
194                  ReplacementPart::ReplacementSubString(last, i));
195            }
196            parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
197            i = next_index;
198            last = i + 1;
199            break;
200          case '&':
201            if (i > last) {
202              parts_.emplace_back(
203                  ReplacementPart::ReplacementSubString(last, i));
204            }
205            parts_.emplace_back(ReplacementPart::SubjectMatch());
206            i = next_index;
207            last = i + 1;
208            break;
209          case '0':
210          case '1':
211          case '2':
212          case '3':
213          case '4':
214          case '5':
215          case '6':
216          case '7':
217          case '8':
218          case '9': {
219            int capture_ref = c2 - '0';
220            if (capture_ref > capture_count) {
221              i = next_index;
222              continue;
223            }
224            int second_digit_index = next_index + 1;
225            if (second_digit_index < length) {
226              // Peek ahead to see if we have two digits.
227              Char c3 = characters[second_digit_index];
228              if ('0' <= c3 && c3 <= '9') {  // Double digits.
229                int double_digit_ref = capture_ref * 10 + c3 - '0';
230                if (double_digit_ref <= capture_count) {
231                  next_index = second_digit_index;
232                  capture_ref = double_digit_ref;
233                }
234              }
235            }
236            if (capture_ref > 0) {
237              if (i > last) {
238                parts_.emplace_back(
239                    ReplacementPart::ReplacementSubString(last, i));
240              }
241              DCHECK(capture_ref <= capture_count);
242              parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
243              last = next_index + 1;
244            }
245            i = next_index;
246            break;
247          }
248          case '<': {
249            if (capture_name_map.is_null()) {
250              i = next_index;
251              break;
252            }
253
254            // Scan until the next '>', and let the enclosed substring be the
255            // groupName.
256
257            const int name_start_index = next_index + 1;
258            int closing_bracket_index = -1;
259            for (int j = name_start_index; j < length; j++) {
260              if (characters[j] == '>') {
261                closing_bracket_index = j;
262                break;
263              }
264            }
265
266            // If no closing bracket is found, '$<' is treated as a string
267            // literal.
268            if (closing_bracket_index == -1) {
269              i = next_index;
270              break;
271            }
272
273            base::Vector<Char> requested_name =
274                characters.SubVector(name_start_index, closing_bracket_index);
275
276            // Let capture be ? Get(namedCaptures, groupName).
277
278            const int capture_index = LookupNamedCapture(
279                [=](String capture_name) {
280                  return capture_name.IsEqualTo(requested_name);
281                },
282                capture_name_map);
283
284            // If capture is undefined or does not exist, replace the text
285            // through the following '>' with the empty string.
286            // Otherwise, replace the text through the following '>' with
287            // ? ToString(capture).
288
289            DCHECK(capture_index == -1 ||
290                   (1 <= capture_index && capture_index <= capture_count));
291
292            if (i > last) {
293              parts_.emplace_back(
294                  ReplacementPart::ReplacementSubString(last, i));
295            }
296            parts_.emplace_back(
297                (capture_index == -1)
298                    ? ReplacementPart::EmptyReplacement()
299                    : ReplacementPart::SubjectCapture(capture_index));
300            last = closing_bracket_index + 1;
301            i = closing_bracket_index;
302            break;
303          }
304          default:
305            i = next_index;
306            break;
307        }
308      }
309    }
310    if (length > last) {
311      if (last == 0) {
312        // Replacement is simple.  Do not use Apply to do the replacement.
313        return true;
314      } else {
315        parts_.emplace_back(
316            ReplacementPart::ReplacementSubString(last, length));
317      }
318    }
319    return false;
320  }
321
322  base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_;
323  base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_;
324};
325
326bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
327                                  Handle<String> replacement, int capture_count,
328                                  int subject_length) {
329  {
330    DisallowGarbageCollection no_gc;
331    String::FlatContent content = replacement->GetFlatContent(no_gc);
332    DCHECK(content.IsFlat());
333
334    FixedArray capture_name_map;
335    if (capture_count > 0) {
336      DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
337      Object maybe_capture_name_map = regexp->capture_name_map();
338      if (maybe_capture_name_map.IsFixedArray()) {
339        capture_name_map = FixedArray::cast(maybe_capture_name_map);
340      }
341    }
342
343    bool simple;
344    if (content.IsOneByte()) {
345      simple =
346          ParseReplacementPattern(content.ToOneByteVector(), capture_name_map,
347                                  capture_count, subject_length);
348    } else {
349      DCHECK(content.IsTwoByte());
350      simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map,
351                                       capture_count, subject_length);
352    }
353    if (simple) return true;
354  }
355
356  // Find substrings of replacement string and create them as String objects.
357  int substring_index = 0;
358  for (ReplacementPart& part : parts_) {
359    int tag = part.tag;
360    if (tag <= 0) {  // A replacement string slice.
361      int from = -tag;
362      int to = part.data;
363      replacement_substrings_.emplace_back(
364          isolate->factory()->NewSubString(replacement, from, to));
365      part.tag = REPLACEMENT_SUBSTRING;
366      part.data = substring_index;
367      substring_index++;
368    } else if (tag == REPLACEMENT_STRING) {
369      replacement_substrings_.emplace_back(replacement);
370      part.data = substring_index;
371      substring_index++;
372    }
373  }
374  return false;
375}
376
377
378void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
379                                int match_from, int match_to, int32_t* match) {
380  DCHECK_LT(0, parts_.size());
381  for (ReplacementPart& part : parts_) {
382    switch (part.tag) {
383      case SUBJECT_PREFIX:
384        if (match_from > 0) builder->AddSubjectSlice(0, match_from);
385        break;
386      case SUBJECT_SUFFIX: {
387        int subject_length = part.data;
388        if (match_to < subject_length) {
389          builder->AddSubjectSlice(match_to, subject_length);
390        }
391        break;
392      }
393      case SUBJECT_CAPTURE: {
394        int capture = part.data;
395        int from = match[capture * 2];
396        int to = match[capture * 2 + 1];
397        if (from >= 0 && to > from) {
398          builder->AddSubjectSlice(from, to);
399        }
400        break;
401      }
402      case REPLACEMENT_SUBSTRING:
403      case REPLACEMENT_STRING:
404        builder->AddString(replacement_substrings_[part.data]);
405        break;
406      case EMPTY_REPLACEMENT:
407        break;
408      default:
409        UNREACHABLE();
410    }
411  }
412}
413
414void FindOneByteStringIndices(base::Vector<const uint8_t> subject,
415                              uint8_t pattern, std::vector<int>* indices,
416                              unsigned int limit) {
417  DCHECK_LT(0, limit);
418  // Collect indices of pattern in subject using memchr.
419  // Stop after finding at most limit values.
420  const uint8_t* subject_start = subject.begin();
421  const uint8_t* subject_end = subject_start + subject.length();
422  const uint8_t* pos = subject_start;
423  while (limit > 0) {
424    pos = reinterpret_cast<const uint8_t*>(
425        memchr(pos, pattern, subject_end - pos));
426    if (pos == nullptr) return;
427    indices->push_back(static_cast<int>(pos - subject_start));
428    pos++;
429    limit--;
430  }
431}
432
433void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,
434                              base::uc16 pattern, std::vector<int>* indices,
435                              unsigned int limit) {
436  DCHECK_LT(0, limit);
437  const base::uc16* subject_start = subject.begin();
438  const base::uc16* subject_end = subject_start + subject.length();
439  for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
440       pos++) {
441    if (*pos == pattern) {
442      indices->push_back(static_cast<int>(pos - subject_start));
443      limit--;
444    }
445  }
446}
447
448template <typename SubjectChar, typename PatternChar>
449void FindStringIndices(Isolate* isolate,
450                       base::Vector<const SubjectChar> subject,
451                       base::Vector<const PatternChar> pattern,
452                       std::vector<int>* indices, unsigned int limit) {
453  DCHECK_LT(0, limit);
454  // Collect indices of pattern in subject.
455  // Stop after finding at most limit values.
456  int pattern_length = pattern.length();
457  int index = 0;
458  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
459  while (limit > 0) {
460    index = search.Search(subject, index);
461    if (index < 0) return;
462    indices->push_back(index);
463    index += pattern_length;
464    limit--;
465  }
466}
467
468void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
469                               std::vector<int>* indices, unsigned int limit) {
470  {
471    DisallowGarbageCollection no_gc;
472    String::FlatContent subject_content = subject.GetFlatContent(no_gc);
473    String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
474    DCHECK(subject_content.IsFlat());
475    DCHECK(pattern_content.IsFlat());
476    if (subject_content.IsOneByte()) {
477      base::Vector<const uint8_t> subject_vector =
478          subject_content.ToOneByteVector();
479      if (pattern_content.IsOneByte()) {
480        base::Vector<const uint8_t> pattern_vector =
481            pattern_content.ToOneByteVector();
482        if (pattern_vector.length() == 1) {
483          FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
484                                   limit);
485        } else {
486          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487                            limit);
488        }
489      } else {
490        FindStringIndices(isolate, subject_vector,
491                          pattern_content.ToUC16Vector(), indices, limit);
492      }
493    } else {
494      base::Vector<const base::uc16> subject_vector =
495          subject_content.ToUC16Vector();
496      if (pattern_content.IsOneByte()) {
497        base::Vector<const uint8_t> pattern_vector =
498            pattern_content.ToOneByteVector();
499        if (pattern_vector.length() == 1) {
500          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
501                                   limit);
502        } else {
503          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
504                            limit);
505        }
506      } else {
507        base::Vector<const base::uc16> pattern_vector =
508            pattern_content.ToUC16Vector();
509        if (pattern_vector.length() == 1) {
510          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
511                                   limit);
512        } else {
513          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
514                            limit);
515        }
516      }
517    }
518  }
519}
520
521namespace {
522std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
523  std::vector<int>* list = isolate->regexp_indices();
524  list->clear();
525  return list;
526}
527
528void TruncateRegexpIndicesList(Isolate* isolate) {
529  // Same size as smallest zone segment, preserving behavior from the
530  // runtime zone.
531  // TODO(jgruber): Consider removing the reusable regexp_indices list and
532  // simply allocating a new list each time. It feels like we're needlessly
533  // optimizing an edge case.
534  static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
535  std::vector<int>* indices = isolate->regexp_indices();
536  if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
537    // Throw away backing storage.
538    indices->clear();
539    indices->shrink_to_fit();
540  }
541}
542}  // namespace
543
544template <typename ResultSeqString>
545V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
546    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
547    Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
548  DCHECK(subject->IsFlat());
549  DCHECK(replacement->IsFlat());
550
551  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
552
553  DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag());
554  String pattern = pattern_regexp->atom_pattern();
555  int subject_len = subject->length();
556  int pattern_len = pattern.length();
557  int replacement_len = replacement->length();
558
559  FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
560
561  if (indices->empty()) return *subject;
562
563  // Detect integer overflow.
564  int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
565                           static_cast<int64_t>(pattern_len)) *
566                              static_cast<int64_t>(indices->size()) +
567                          static_cast<int64_t>(subject_len);
568  int result_len;
569  if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
570    STATIC_ASSERT(String::kMaxLength < kMaxInt);
571    result_len = kMaxInt;  // Provoke exception.
572  } else {
573    result_len = static_cast<int>(result_len_64);
574  }
575  if (result_len == 0) {
576    return ReadOnlyRoots(isolate).empty_string();
577  }
578
579  int subject_pos = 0;
580  int result_pos = 0;
581
582  MaybeHandle<SeqString> maybe_res;
583  if (ResultSeqString::kHasOneByteEncoding) {
584    maybe_res = isolate->factory()->NewRawOneByteString(result_len);
585  } else {
586    maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
587  }
588  Handle<SeqString> untyped_res;
589  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
590  Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
591
592  DisallowGarbageCollection no_gc;
593  for (int index : *indices) {
594    // Copy non-matched subject content.
595    if (subject_pos < index) {
596      String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
597                          subject_pos, index - subject_pos);
598      result_pos += index - subject_pos;
599    }
600
601    // Replace match.
602    if (replacement_len > 0) {
603      String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
604                          replacement_len);
605      result_pos += replacement_len;
606    }
607
608    subject_pos = index + pattern_len;
609  }
610  // Add remaining subject content at the end.
611  if (subject_pos < subject_len) {
612    String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
613                        subject_pos, subject_len - subject_pos);
614  }
615
616  int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
617  RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
618
619  TruncateRegexpIndicesList(isolate);
620
621  return *result;
622}
623
624V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
625    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
626    Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
627  DCHECK(subject->IsFlat());
628  DCHECK(replacement->IsFlat());
629
630  int capture_count = regexp->capture_count();
631  int subject_length = subject->length();
632
633  // Ensure the RegExp is compiled so we can access the capture-name map.
634  if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
635    return ReadOnlyRoots(isolate).exception();
636  }
637
638  CompiledReplacement compiled_replacement;
639  const bool simple_replace = compiled_replacement.Compile(
640      isolate, regexp, replacement, capture_count, subject_length);
641
642  // Shortcut for simple non-regexp global replacements
643  if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) {
644    if (subject->IsOneByteRepresentation() &&
645        replacement->IsOneByteRepresentation()) {
646      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
647          isolate, subject, regexp, replacement, last_match_info);
648    } else {
649      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
650          isolate, subject, regexp, replacement, last_match_info);
651    }
652  }
653
654  RegExpGlobalCache global_cache(regexp, subject, isolate);
655  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
656
657  int32_t* current_match = global_cache.FetchNext();
658  if (current_match == nullptr) {
659    if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
660    return *subject;
661  }
662
663  // Guessing the number of parts that the final result string is built
664  // from. Global regexps can match any number of times, so we guess
665  // conservatively.
666  int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
667  ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
668
669  int prev = 0;
670
671  do {
672    int start = current_match[0];
673    int end = current_match[1];
674
675    if (prev < start) {
676      builder.AddSubjectSlice(prev, start);
677    }
678
679    if (simple_replace) {
680      builder.AddString(replacement);
681    } else {
682      compiled_replacement.Apply(&builder, start, end, current_match);
683    }
684    prev = end;
685
686    current_match = global_cache.FetchNext();
687  } while (current_match != nullptr);
688
689  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
690
691  if (prev < subject_length) {
692    builder.AddSubjectSlice(prev, subject_length);
693  }
694
695  RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
696                           global_cache.LastSuccessfulMatch());
697
698  RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
699}
700
701template <typename ResultSeqString>
702V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
703    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
704    Handle<RegExpMatchInfo> last_match_info) {
705  DCHECK(subject->IsFlat());
706
707  // Shortcut for simple non-regexp global replacements
708  if (regexp->type_tag() == JSRegExp::ATOM) {
709    Handle<String> empty_string = isolate->factory()->empty_string();
710    if (subject->IsOneByteRepresentation()) {
711      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
712          isolate, subject, regexp, empty_string, last_match_info);
713    } else {
714      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
715          isolate, subject, regexp, empty_string, last_match_info);
716    }
717  }
718
719  RegExpGlobalCache global_cache(regexp, subject, isolate);
720  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
721
722  int32_t* current_match = global_cache.FetchNext();
723  if (current_match == nullptr) {
724    if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
725    return *subject;
726  }
727
728  int start = current_match[0];
729  int end = current_match[1];
730  int capture_count = regexp->capture_count();
731  int subject_length = subject->length();
732
733  int new_length = subject_length - (end - start);
734  if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
735
736  Handle<ResultSeqString> answer;
737  if (ResultSeqString::kHasOneByteEncoding) {
738    answer = Handle<ResultSeqString>::cast(
739        isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
740  } else {
741    answer = Handle<ResultSeqString>::cast(
742        isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
743  }
744
745  int prev = 0;
746  int position = 0;
747
748  DisallowGarbageCollection no_gc;
749  do {
750    start = current_match[0];
751    end = current_match[1];
752    if (prev < start) {
753      // Add substring subject[prev;start] to answer string.
754      String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
755                          start - prev);
756      position += start - prev;
757    }
758    prev = end;
759
760    current_match = global_cache.FetchNext();
761  } while (current_match != nullptr);
762
763  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
764
765  RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
766                           global_cache.LastSuccessfulMatch());
767
768  if (prev < subject_length) {
769    // Add substring subject[prev;length] to answer string.
770    String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
771                        subject_length - prev);
772    position += subject_length - prev;
773  }
774
775  if (position == 0) return ReadOnlyRoots(isolate).empty_string();
776
777  // Shorten string and fill
778  int string_size = ResultSeqString::SizeFor(position);
779  int allocated_string_size = ResultSeqString::SizeFor(new_length);
780  int delta = allocated_string_size - string_size;
781
782  answer->set_length(position);
783  if (delta == 0) return *answer;
784
785  Address end_of_string = answer->address() + string_size;
786  Heap* heap = isolate->heap();
787
788  // The trimming is performed on a newly allocated object, which is on a
789  // freshly allocated page or on an already swept page. Hence, the sweeper
790  // thread can not get confused with the filler creation. No synchronization
791  // needed.
792  // TODO(hpayer): We should shrink the large object page if the size
793  // of the object changed significantly.
794  if (!heap->IsLargeObject(*answer)) {
795    heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
796  }
797  return *answer;
798}
799
800RUNTIME_FUNCTION(Runtime_StringSplit) {
801  HandleScope handle_scope(isolate);
802  DCHECK_EQ(3, args.length());
803  Handle<String> subject = args.at<String>(0);
804  Handle<String> pattern = args.at<String>(1);
805  uint32_t limit = NumberToUint32(args[2]);
806  CHECK_LT(0, limit);
807
808  int subject_length = subject->length();
809  int pattern_length = pattern->length();
810  CHECK_LT(0, pattern_length);
811
812  if (limit == 0xFFFFFFFFu) {
813    FixedArray last_match_cache_unused;
814    Handle<Object> cached_answer(
815        RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
816                                   &last_match_cache_unused,
817                                   RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
818        isolate);
819    if (*cached_answer != Smi::zero()) {
820      // The cache FixedArray is a COW-array and can therefore be reused.
821      Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
822          Handle<FixedArray>::cast(cached_answer));
823      return *result;
824    }
825  }
826
827  // The limit can be very large (0xFFFFFFFFu), but since the pattern
828  // isn't empty, we can never create more parts than ~half the length
829  // of the subject.
830
831  subject = String::Flatten(isolate, subject);
832  pattern = String::Flatten(isolate, pattern);
833
834  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
835
836  FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
837
838  if (static_cast<uint32_t>(indices->size()) < limit) {
839    indices->push_back(subject_length);
840  }
841
842  // The list indices now contains the end of each part to create.
843
844  // Create JSArray of substrings separated by separator.
845  int part_count = static_cast<int>(indices->size());
846
847  Handle<JSArray> result =
848      isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
849                                     INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
850
851  DCHECK(result->HasObjectElements());
852
853  Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
854
855  if (part_count == 1 && indices->at(0) == subject_length) {
856    elements->set(0, *subject);
857  } else {
858    int part_start = 0;
859    FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
860      int part_end = indices->at(i);
861      Handle<String> substring =
862          isolate->factory()->NewProperSubString(subject, part_start, part_end);
863      elements->set(i, *substring);
864      part_start = part_end + pattern_length;
865    });
866  }
867
868  if (limit == 0xFFFFFFFFu) {
869    if (result->HasObjectElements()) {
870      RegExpResultsCache::Enter(isolate, subject, pattern, elements,
871                                isolate->factory()->empty_fixed_array(),
872                                RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
873    }
874  }
875
876  TruncateRegexpIndicesList(isolate);
877
878  return *result;
879}
880
881namespace {
882
883MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp,
884                               Handle<String> subject, int32_t index,
885                               Handle<RegExpMatchInfo> last_match_info,
886                               RegExp::ExecQuirks exec_quirks) {
887  // Due to the way the JS calls are constructed this must be less than the
888  // length of a string, i.e. it is always a Smi.  We check anyway for security.
889  CHECK_LE(0, index);
890  CHECK_GE(subject->length(), index);
891  isolate->counters()->regexp_entry_runtime()->Increment();
892  return RegExp::Exec(isolate, regexp, subject, index, last_match_info,
893                      exec_quirks);
894}
895
896MaybeHandle<Object> ExperimentalOneshotExec(
897    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
898    int32_t index, Handle<RegExpMatchInfo> last_match_info,
899    RegExp::ExecQuirks exec_quirks) {
900  // Due to the way the JS calls are constructed this must be less than the
901  // length of a string, i.e. it is always a Smi.  We check anyway for security.
902  CHECK_LE(0, index);
903  CHECK_GE(subject->length(), index);
904  isolate->counters()->regexp_entry_runtime()->Increment();
905  return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
906                                         last_match_info, exec_quirks);
907}
908
909}  // namespace
910
911RUNTIME_FUNCTION(Runtime_RegExpExec) {
912  HandleScope scope(isolate);
913  DCHECK_EQ(4, args.length());
914  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
915  Handle<String> subject = args.at<String>(1);
916  int32_t index = 0;
917  CHECK(args[2].ToInt32(&index));
918  Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
919  RETURN_RESULT_OR_FAILURE(
920      isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
921                          RegExp::ExecQuirks::kNone));
922}
923
924RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) {
925  HandleScope scope(isolate);
926  DCHECK_EQ(4, args.length());
927  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
928  Handle<String> subject = args.at<String>(1);
929  int32_t index = 0;
930  CHECK(args[2].ToInt32(&index));
931  Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
932  RETURN_RESULT_OR_FAILURE(
933      isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
934                          RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
935}
936
937RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
938  HandleScope scope(isolate);
939  DCHECK_EQ(4, args.length());
940  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
941  Handle<String> subject = args.at<String>(1);
942  int32_t index = 0;
943  CHECK(args[2].ToInt32(&index));
944  Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
945  RETURN_RESULT_OR_FAILURE(
946      isolate,
947      ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
948                              RegExp::ExecQuirks::kNone));
949}
950
951RUNTIME_FUNCTION(
952    Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) {
953  HandleScope scope(isolate);
954  DCHECK_EQ(4, args.length());
955  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
956  Handle<String> subject = args.at<String>(1);
957  int32_t index = 0;
958  CHECK(args[2].ToInt32(&index));
959  Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
960  RETURN_RESULT_OR_FAILURE(
961      isolate,
962      ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
963                              RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
964}
965
966RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
967  HandleScope scope(isolate);
968  DCHECK_EQ(3, args.length());
969  Handle<RegExpMatchInfo> match_info = args.at<RegExpMatchInfo>(1);
970  Handle<Object> maybe_names = args.at(2);
971#ifdef DEBUG
972  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
973  DCHECK(regexp->flags() & JSRegExp::kHasIndices);
974#endif
975
976  return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
977}
978
979namespace {
980
981class MatchInfoBackedMatch : public String::Match {
982 public:
983  MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
984                       Handle<String> subject,
985                       Handle<RegExpMatchInfo> match_info)
986      : isolate_(isolate), match_info_(match_info) {
987    subject_ = String::Flatten(isolate, subject);
988
989    if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) {
990      Object o = regexp->capture_name_map();
991      has_named_captures_ = o.IsFixedArray();
992      if (has_named_captures_) {
993        capture_name_map_ = handle(FixedArray::cast(o), isolate);
994      }
995    } else {
996      has_named_captures_ = false;
997    }
998  }
999
1000  Handle<String> GetMatch() override {
1001    return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
1002  }
1003
1004  Handle<String> GetPrefix() override {
1005    const int match_start = match_info_->Capture(0);
1006    return isolate_->factory()->NewSubString(subject_, 0, match_start);
1007  }
1008
1009  Handle<String> GetSuffix() override {
1010    const int match_end = match_info_->Capture(1);
1011    return isolate_->factory()->NewSubString(subject_, match_end,
1012                                             subject_->length());
1013  }
1014
1015  bool HasNamedCaptures() override { return has_named_captures_; }
1016
1017  int CaptureCount() override {
1018    return match_info_->NumberOfCaptureRegisters() / 2;
1019  }
1020
1021  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1022    Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1023        isolate_, match_info_, i, capture_exists);
1024    return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1025                             : isolate_->factory()->empty_string();
1026  }
1027
1028  MaybeHandle<String> GetNamedCapture(Handle<String> name,
1029                                      CaptureState* state) override {
1030    DCHECK(has_named_captures_);
1031    const int capture_index = LookupNamedCapture(
1032        [=](String capture_name) { return capture_name.Equals(*name); },
1033        *capture_name_map_);
1034
1035    if (capture_index == -1) {
1036      *state = UNMATCHED;
1037      return isolate_->factory()->empty_string();
1038    }
1039
1040    DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1041
1042    bool capture_exists;
1043    Handle<String> capture_value;
1044    ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1045                               GetCapture(capture_index, &capture_exists),
1046                               String);
1047
1048    if (!capture_exists) {
1049      *state = UNMATCHED;
1050      return isolate_->factory()->empty_string();
1051    } else {
1052      *state = MATCHED;
1053      return capture_value;
1054    }
1055  }
1056
1057 private:
1058  Isolate* isolate_;
1059  Handle<String> subject_;
1060  Handle<RegExpMatchInfo> match_info_;
1061
1062  bool has_named_captures_;
1063  Handle<FixedArray> capture_name_map_;
1064};
1065
1066class VectorBackedMatch : public String::Match {
1067 public:
1068  VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1069                    Handle<String> match, int match_position,
1070                    base::Vector<Handle<Object>> captures,
1071                    Handle<Object> groups_obj)
1072      : isolate_(isolate),
1073        match_(match),
1074        match_position_(match_position),
1075        captures_(captures) {
1076    subject_ = String::Flatten(isolate, subject);
1077
1078    DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1079    has_named_captures_ = !groups_obj->IsUndefined(isolate);
1080    if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1081  }
1082
1083  Handle<String> GetMatch() override { return match_; }
1084
1085  Handle<String> GetPrefix() override {
1086    return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1087  }
1088
1089  Handle<String> GetSuffix() override {
1090    const int match_end_position = match_position_ + match_->length();
1091    return isolate_->factory()->NewSubString(subject_, match_end_position,
1092                                             subject_->length());
1093  }
1094
1095  bool HasNamedCaptures() override { return has_named_captures_; }
1096
1097  int CaptureCount() override { return captures_.length(); }
1098
1099  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1100    Handle<Object> capture_obj = captures_[i];
1101    if (capture_obj->IsUndefined(isolate_)) {
1102      *capture_exists = false;
1103      return isolate_->factory()->empty_string();
1104    }
1105    *capture_exists = true;
1106    return Object::ToString(isolate_, capture_obj);
1107  }
1108
1109  MaybeHandle<String> GetNamedCapture(Handle<String> name,
1110                                      CaptureState* state) override {
1111    DCHECK(has_named_captures_);
1112
1113    Handle<Object> capture_obj;
1114    ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1115                               Object::GetProperty(isolate_, groups_obj_, name),
1116                               String);
1117    if (capture_obj->IsUndefined(isolate_)) {
1118      *state = UNMATCHED;
1119      return isolate_->factory()->empty_string();
1120    } else {
1121      *state = MATCHED;
1122      return Object::ToString(isolate_, capture_obj);
1123    }
1124  }
1125
1126 private:
1127  Isolate* isolate_;
1128  Handle<String> subject_;
1129  Handle<String> match_;
1130  const int match_position_;
1131  base::Vector<Handle<Object>> captures_;
1132
1133  bool has_named_captures_;
1134  Handle<JSReceiver> groups_obj_;
1135};
1136
1137// Create the groups object (see also the RegExp result creation in
1138// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1139Handle<JSObject> ConstructNamedCaptureGroupsObject(
1140    Isolate* isolate, Handle<FixedArray> capture_map,
1141    const std::function<Object(int)>& f_get_capture) {
1142  Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1143
1144  const int named_capture_count = capture_map->length() >> 1;
1145  for (int i = 0; i < named_capture_count; i++) {
1146    const int name_ix = i * 2;
1147    const int index_ix = i * 2 + 1;
1148
1149    Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1150                                isolate);
1151    const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1152    DCHECK_GE(capture_ix, 1);  // Explicit groups start at index 1.
1153
1154    Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1155    DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1156
1157    JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1158  }
1159
1160  return groups;
1161}
1162
1163// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1164// separate last match info.  See comment on that function.
1165template <bool has_capture>
1166static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1167                                   Handle<JSRegExp> regexp,
1168                                   Handle<RegExpMatchInfo> last_match_array,
1169                                   Handle<JSArray> result_array) {
1170  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1171  DCHECK_NE(has_capture, regexp->capture_count() == 0);
1172  DCHECK(subject->IsFlat());
1173
1174  // Force tier up to native code for global replaces. The global replace is
1175  // implemented differently for native code and bytecode execution, where the
1176  // native code expects an array to store all the matches, and the bytecode
1177  // matches one at a time, so it's easier to tier-up to native code from the
1178  // start.
1179  if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1180    regexp->MarkTierUpForNextExec();
1181    if (FLAG_trace_regexp_tier_up) {
1182      PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1183             reinterpret_cast<void*>(regexp->ptr()));
1184    }
1185  }
1186
1187  int capture_count = regexp->capture_count();
1188  int subject_length = subject->length();
1189
1190  static const int kMinLengthToCache = 0x1000;
1191
1192  if (subject_length > kMinLengthToCache) {
1193    FixedArray last_match_cache;
1194    Object cached_answer = RegExpResultsCache::Lookup(
1195        isolate->heap(), *subject, regexp->data(), &last_match_cache,
1196        RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1197    if (cached_answer.IsFixedArray()) {
1198      int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1199      int32_t* last_match = NewArray<int32_t>(capture_registers);
1200      for (int i = 0; i < capture_registers; i++) {
1201        last_match[i] = Smi::ToInt(last_match_cache.get(i));
1202      }
1203      Handle<FixedArray> cached_fixed_array =
1204          Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1205      // The cache FixedArray is a COW-array and we need to return a copy.
1206      Handle<FixedArray> copied_fixed_array =
1207          isolate->factory()->CopyFixedArrayWithMap(
1208              cached_fixed_array, isolate->factory()->fixed_array_map());
1209      JSArray::SetContent(result_array, copied_fixed_array);
1210      RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1211                               capture_count, last_match);
1212      DeleteArray(last_match);
1213      return *result_array;
1214    }
1215  }
1216
1217  RegExpGlobalCache global_cache(regexp, subject, isolate);
1218  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1219
1220  // Ensured in Runtime_RegExpExecMultiple.
1221  DCHECK(result_array->HasObjectElements());
1222  Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1223                                     isolate);
1224  if (result_elements->length() < 16) {
1225    result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1226  }
1227
1228  FixedArrayBuilder builder(result_elements);
1229
1230  // Position to search from.
1231  int match_start = -1;
1232  int match_end = 0;
1233  bool first = true;
1234
1235  // Two smis before and after the match, for very long strings.
1236  static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1237
1238  while (true) {
1239    int32_t* current_match = global_cache.FetchNext();
1240    if (current_match == nullptr) break;
1241    match_start = current_match[0];
1242    builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1243    if (match_end < match_start) {
1244      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1245                                                match_start);
1246    }
1247    match_end = current_match[1];
1248    {
1249      // Avoid accumulating new handles inside loop.
1250      HandleScope temp_scope(isolate);
1251      Handle<String> match;
1252      if (!first) {
1253        match = isolate->factory()->NewProperSubString(subject, match_start,
1254                                                       match_end);
1255      } else {
1256        match =
1257            isolate->factory()->NewSubString(subject, match_start, match_end);
1258        first = false;
1259      }
1260
1261      if (has_capture) {
1262        // Arguments array to replace function is match, captures, index and
1263        // subject, i.e., 3 + capture count in total. If the RegExp contains
1264        // named captures, they are also passed as the last argument.
1265
1266        Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate);
1267        const bool has_named_captures = maybe_capture_map->IsFixedArray();
1268
1269        const int argc =
1270            has_named_captures ? 4 + capture_count : 3 + capture_count;
1271
1272        Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1273        int cursor = 0;
1274
1275        elements->set(cursor++, *match);
1276        for (int i = 1; i <= capture_count; i++) {
1277          int start = current_match[i * 2];
1278          if (start >= 0) {
1279            int end = current_match[i * 2 + 1];
1280            DCHECK(start <= end);
1281            Handle<String> substring =
1282                isolate->factory()->NewSubString(subject, start, end);
1283            elements->set(cursor++, *substring);
1284          } else {
1285            DCHECK_GT(0, current_match[i * 2 + 1]);
1286            elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1287          }
1288        }
1289
1290        elements->set(cursor++, Smi::FromInt(match_start));
1291        elements->set(cursor++, *subject);
1292
1293        if (has_named_captures) {
1294          Handle<FixedArray> capture_map =
1295              Handle<FixedArray>::cast(maybe_capture_map);
1296          Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1297              isolate, capture_map, [=](int ix) { return elements->get(ix); });
1298          elements->set(cursor++, *groups);
1299        }
1300
1301        DCHECK_EQ(cursor, argc);
1302        builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1303      } else {
1304        builder.Add(*match);
1305      }
1306    }
1307  }
1308
1309  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1310
1311  if (match_start >= 0) {
1312    // Finished matching, with at least one match.
1313    if (match_end < subject_length) {
1314      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1315                                                subject_length);
1316    }
1317
1318    RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1319                             global_cache.LastSuccessfulMatch());
1320
1321    if (subject_length > kMinLengthToCache) {
1322      // Store the last successful match into the array for caching.
1323      int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1324      Handle<FixedArray> last_match_cache =
1325          isolate->factory()->NewFixedArray(capture_registers);
1326      int32_t* last_match = global_cache.LastSuccessfulMatch();
1327      for (int i = 0; i < capture_registers; i++) {
1328        last_match_cache->set(i, Smi::FromInt(last_match[i]));
1329      }
1330      Handle<FixedArray> result_fixed_array =
1331          FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1332      // Cache the result and copy the FixedArray into a COW array.
1333      Handle<FixedArray> copied_fixed_array =
1334          isolate->factory()->CopyFixedArrayWithMap(
1335              result_fixed_array, isolate->factory()->fixed_array_map());
1336      RegExpResultsCache::Enter(
1337          isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1338          last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1339    }
1340    return *builder.ToJSArray(result_array);
1341  } else {
1342    return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1343  }
1344}
1345
1346// Legacy implementation of RegExp.prototype[Symbol.replace] which
1347// doesn't properly call the underlying exec method.
1348V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1349    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1350    Handle<String> replace) {
1351  // Functional fast-paths are dispatched directly by replace builtin.
1352  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1353
1354  Factory* factory = isolate->factory();
1355
1356  const int flags = regexp->flags();
1357  const bool global = (flags & JSRegExp::kGlobal) != 0;
1358  const bool sticky = (flags & JSRegExp::kSticky) != 0;
1359
1360  replace = String::Flatten(isolate, replace);
1361
1362  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1363
1364  if (!global) {
1365    // Non-global regexp search, string replace.
1366
1367    uint32_t last_index = 0;
1368    if (sticky) {
1369      Handle<Object> last_index_obj(regexp->last_index(), isolate);
1370      ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1371                                 Object::ToLength(isolate, last_index_obj),
1372                                 String);
1373      last_index = PositiveNumberToUint32(*last_index_obj);
1374    }
1375
1376    Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1377                                     isolate);
1378
1379    // A lastIndex exceeding the string length always returns null (signalling
1380    // failure) in RegExpBuiltinExec, thus we can skip the call.
1381    if (last_index <= static_cast<uint32_t>(string->length())) {
1382      ASSIGN_RETURN_ON_EXCEPTION(
1383          isolate, match_indices_obj,
1384          RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1385          String);
1386    }
1387
1388    if (match_indices_obj->IsNull(isolate)) {
1389      if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1390      return string;
1391    }
1392
1393    auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1394
1395    const int start_index = match_indices->Capture(0);
1396    const int end_index = match_indices->Capture(1);
1397
1398    if (sticky) {
1399      regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1400    }
1401
1402    IncrementalStringBuilder builder(isolate);
1403    builder.AppendString(factory->NewSubString(string, 0, start_index));
1404
1405    if (replace->length() > 0) {
1406      MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1407      Handle<String> replacement;
1408      ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1409                                 String::GetSubstitution(isolate, &m, replace),
1410                                 String);
1411      builder.AppendString(replacement);
1412    }
1413
1414    builder.AppendString(
1415        factory->NewSubString(string, end_index, string->length()));
1416    return builder.Finish();
1417  } else {
1418    // Global regexp search, string replace.
1419    DCHECK(global);
1420    RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1421                        String);
1422
1423    // Force tier up to native code for global replaces. The global replace is
1424    // implemented differently for native code and bytecode execution, where the
1425    // native code expects an array to store all the matches, and the bytecode
1426    // matches one at a time, so it's easier to tier-up to native code from the
1427    // start.
1428    if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1429      regexp->MarkTierUpForNextExec();
1430      if (FLAG_trace_regexp_tier_up) {
1431        PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1432               reinterpret_cast<void*>(regexp->ptr()));
1433      }
1434    }
1435
1436    if (replace->length() == 0) {
1437      if (string->IsOneByteRepresentation()) {
1438        Object result =
1439            StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1440                isolate, string, regexp, last_match_info);
1441        return handle(String::cast(result), isolate);
1442      } else {
1443        Object result =
1444            StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1445                isolate, string, regexp, last_match_info);
1446        return handle(String::cast(result), isolate);
1447      }
1448    }
1449
1450    Object result = StringReplaceGlobalRegExpWithString(
1451        isolate, string, regexp, replace, last_match_info);
1452    if (result.IsString()) {
1453      return handle(String::cast(result), isolate);
1454    } else {
1455      return MaybeHandle<String>();
1456    }
1457  }
1458
1459  UNREACHABLE();
1460}
1461
1462}  // namespace
1463
1464// This is only called for StringReplaceGlobalRegExpWithFunction.
1465RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1466  HandleScope handles(isolate);
1467  DCHECK_EQ(4, args.length());
1468
1469  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
1470  Handle<String> subject = args.at<String>(1);
1471  Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(2);
1472  Handle<JSArray> result_array = args.at<JSArray>(3);
1473
1474  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1475  CHECK(result_array->HasObjectElements());
1476
1477  subject = String::Flatten(isolate, subject);
1478  CHECK(regexp->flags() & JSRegExp::kGlobal);
1479
1480  Object result;
1481  if (regexp->capture_count() == 0) {
1482    result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1483                                         last_match_info, result_array);
1484  } else {
1485    result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1486                                        last_match_info, result_array);
1487  }
1488  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1489  return result;
1490}
1491
1492RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1493  HandleScope scope(isolate);
1494  DCHECK_EQ(3, args.length());
1495  Handle<String> subject = args.at<String>(0);
1496  Handle<JSRegExp> regexp = args.at<JSRegExp>(1);
1497  Handle<JSReceiver> replace_obj = args.at<JSReceiver>(2);
1498
1499  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1500  DCHECK(replace_obj->map().is_callable());
1501
1502  Factory* factory = isolate->factory();
1503  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1504
1505  const int flags = regexp->flags();
1506  DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1507
1508  // TODO(jgruber): This should be an easy port to CSA with massive payback.
1509
1510  const bool sticky = (flags & JSRegExp::kSticky) != 0;
1511  uint32_t last_index = 0;
1512  if (sticky) {
1513    Handle<Object> last_index_obj(regexp->last_index(), isolate);
1514    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1515        isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1516    last_index = PositiveNumberToUint32(*last_index_obj);
1517  }
1518
1519  Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1520                                   isolate);
1521
1522  // A lastIndex exceeding the string length always returns null (signalling
1523  // failure) in RegExpBuiltinExec, thus we can skip the call.
1524  if (last_index <= static_cast<uint32_t>(subject->length())) {
1525    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1526        isolate, match_indices_obj,
1527        RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1528  }
1529
1530  if (match_indices_obj->IsNull(isolate)) {
1531    if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1532    return *subject;
1533  }
1534
1535  Handle<RegExpMatchInfo> match_indices =
1536      Handle<RegExpMatchInfo>::cast(match_indices_obj);
1537
1538  const int index = match_indices->Capture(0);
1539  const int end_of_match = match_indices->Capture(1);
1540
1541  if (sticky) {
1542    regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1543  }
1544
1545  IncrementalStringBuilder builder(isolate);
1546  builder.AppendString(factory->NewSubString(subject, 0, index));
1547
1548  // Compute the parameter list consisting of the match, captures, index,
1549  // and subject for the replace function invocation. If the RegExp contains
1550  // named captures, they are also passed as the last argument.
1551
1552  // The number of captures plus one for the match.
1553  const int m = match_indices->NumberOfCaptureRegisters() / 2;
1554
1555  bool has_named_captures = false;
1556  Handle<FixedArray> capture_map;
1557  if (m > 1) {
1558    DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
1559
1560    Object maybe_capture_map = regexp->capture_name_map();
1561    if (maybe_capture_map.IsFixedArray()) {
1562      has_named_captures = true;
1563      capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1564    }
1565  }
1566
1567  const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1568  if (argc == static_cast<uint32_t>(-1)) {
1569    THROW_NEW_ERROR_RETURN_FAILURE(
1570        isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1571  }
1572  base::ScopedVector<Handle<Object>> argv(argc);
1573
1574  int cursor = 0;
1575  for (int j = 0; j < m; j++) {
1576    bool ok;
1577    Handle<String> capture =
1578        RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1579    if (ok) {
1580      argv[cursor++] = capture;
1581    } else {
1582      argv[cursor++] = factory->undefined_value();
1583    }
1584  }
1585
1586  argv[cursor++] = handle(Smi::FromInt(index), isolate);
1587  argv[cursor++] = subject;
1588
1589  if (has_named_captures) {
1590    argv[cursor++] = ConstructNamedCaptureGroupsObject(
1591        isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1592  }
1593
1594  DCHECK_EQ(cursor, argc);
1595
1596  Handle<Object> replacement_obj;
1597  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1598      isolate, replacement_obj,
1599      Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1600                      argv.begin()));
1601
1602  Handle<String> replacement;
1603  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1604      isolate, replacement, Object::ToString(isolate, replacement_obj));
1605
1606  builder.AppendString(replacement);
1607  builder.AppendString(
1608      factory->NewSubString(subject, end_of_match, subject->length()));
1609
1610  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1611}
1612
1613namespace {
1614
1615V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1616                                                   Handle<Object> object,
1617                                                   uint32_t* out) {
1618  if (object->IsUndefined(isolate)) {
1619    *out = kMaxUInt32;
1620    return object;
1621  }
1622
1623  Handle<Object> number;
1624  ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1625                             Object);
1626  *out = NumberToUint32(*number);
1627  return object;
1628}
1629
1630Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1631                                       Handle<FixedArray> elems,
1632                                       int num_elems) {
1633  return isolate->factory()->NewJSArrayWithElements(
1634      FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1635}
1636
1637}  // namespace
1638
1639// Slow path for:
1640// ES#sec-regexp.prototype-@@replace
1641// RegExp.prototype [ @@split ] ( string, limit )
1642RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1643  HandleScope scope(isolate);
1644  DCHECK_EQ(3, args.length());
1645
1646  Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1647  Handle<String> string = args.at<String>(1);
1648  Handle<Object> limit_obj = args.at(2);
1649
1650  Factory* factory = isolate->factory();
1651
1652  Handle<JSFunction> regexp_fun = isolate->regexp_function();
1653  Handle<Object> ctor;
1654  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655      isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1656
1657  Handle<Object> flags_obj;
1658  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1659      isolate, flags_obj,
1660      JSObject::GetProperty(isolate, recv, factory->flags_string()));
1661
1662  Handle<String> flags;
1663  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1664                                     Object::ToString(isolate, flags_obj));
1665
1666  Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1667  const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1668
1669  Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1670  const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1671
1672  Handle<String> new_flags = flags;
1673  if (!sticky) {
1674    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1675                                       factory->NewConsString(flags, y_str));
1676  }
1677
1678  Handle<JSReceiver> splitter;
1679  {
1680    const int argc = 2;
1681
1682    base::ScopedVector<Handle<Object>> argv(argc);
1683    argv[0] = recv;
1684    argv[1] = new_flags;
1685
1686    Handle<Object> splitter_obj;
1687    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1688        isolate, splitter_obj,
1689        Execution::New(isolate, ctor, argc, argv.begin()));
1690
1691    splitter = Handle<JSReceiver>::cast(splitter_obj);
1692  }
1693
1694  uint32_t limit;
1695  RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1696
1697  const uint32_t length = string->length();
1698
1699  if (limit == 0) return *factory->NewJSArray(0);
1700
1701  if (length == 0) {
1702    Handle<Object> result;
1703    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1704        isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1705                                                 factory->undefined_value()));
1706
1707    if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1708
1709    Handle<FixedArray> elems = factory->NewFixedArray(1);
1710    elems->set(0, *string);
1711    return *factory->NewJSArrayWithElements(elems);
1712  }
1713
1714  static const int kInitialArraySize = 8;
1715  Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1716  uint32_t num_elems = 0;
1717
1718  uint32_t string_index = 0;
1719  uint32_t prev_string_index = 0;
1720  while (string_index < length) {
1721    RETURN_FAILURE_ON_EXCEPTION(
1722        isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1723
1724    Handle<Object> result;
1725    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1726        isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1727                                                 factory->undefined_value()));
1728
1729    if (result->IsNull(isolate)) {
1730      string_index = static_cast<uint32_t>(
1731          RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1732      continue;
1733    }
1734
1735    Handle<Object> last_index_obj;
1736    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1737        isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1738
1739    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1740        isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1741
1742    const uint32_t end =
1743        std::min(PositiveNumberToUint32(*last_index_obj), length);
1744    if (end == prev_string_index) {
1745      string_index = static_cast<uint32_t>(
1746          RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1747      continue;
1748    }
1749
1750    {
1751      Handle<String> substr =
1752          factory->NewSubString(string, prev_string_index, string_index);
1753      elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1754      if (num_elems == limit) {
1755        return *NewJSArrayWithElements(isolate, elems, num_elems);
1756      }
1757    }
1758
1759    prev_string_index = end;
1760
1761    Handle<Object> num_captures_obj;
1762    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1763        isolate, num_captures_obj,
1764        Object::GetProperty(isolate, result,
1765                            isolate->factory()->length_string()));
1766
1767    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1768        isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1769    const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1770
1771    for (uint32_t i = 1; i < num_captures; i++) {
1772      Handle<Object> capture;
1773      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1774          isolate, capture, Object::GetElement(isolate, result, i));
1775      elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1776      if (num_elems == limit) {
1777        return *NewJSArrayWithElements(isolate, elems, num_elems);
1778      }
1779    }
1780
1781    string_index = prev_string_index;
1782  }
1783
1784  {
1785    Handle<String> substr =
1786        factory->NewSubString(string, prev_string_index, length);
1787    elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1788  }
1789
1790  return *NewJSArrayWithElements(isolate, elems, num_elems);
1791}
1792
1793// Slow path for:
1794// ES#sec-regexp.prototype-@@replace
1795// RegExp.prototype [ @@replace ] ( string, replaceValue )
1796RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1797  HandleScope scope(isolate);
1798  DCHECK_EQ(3, args.length());
1799
1800  Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1801  Handle<String> string = args.at<String>(1);
1802  Handle<Object> replace_obj = args.at(2);
1803
1804  Factory* factory = isolate->factory();
1805
1806  string = String::Flatten(isolate, string);
1807
1808  const bool functional_replace = replace_obj->IsCallable();
1809
1810  Handle<String> replace;
1811  if (!functional_replace) {
1812    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1813                                       Object::ToString(isolate, replace_obj));
1814  }
1815
1816  // Fast-path for unmodified JSRegExps (and non-functional replace).
1817  if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1818    // We should never get here with functional replace because unmodified
1819    // regexp and functional replace should be fully handled in CSA code.
1820    CHECK(!functional_replace);
1821    Handle<Object> result;
1822    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1823        isolate, result,
1824        RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1825    DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1826    return *result;
1827  }
1828
1829  const uint32_t length = string->length();
1830
1831  Handle<Object> global_obj;
1832  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1833      isolate, global_obj,
1834      JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1835  const bool global = global_obj->BooleanValue(isolate);
1836
1837  bool unicode = false;
1838  if (global) {
1839    Handle<Object> unicode_obj;
1840    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1841        isolate, unicode_obj,
1842        JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1843    unicode = unicode_obj->BooleanValue(isolate);
1844
1845    RETURN_FAILURE_ON_EXCEPTION(isolate,
1846                                RegExpUtils::SetLastIndex(isolate, recv, 0));
1847  }
1848
1849  base::SmallVector<Handle<Object>, kStaticVectorSlots> results;
1850
1851  while (true) {
1852    Handle<Object> result;
1853    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1854        isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1855                                                 factory->undefined_value()));
1856
1857    if (result->IsNull(isolate)) break;
1858
1859    results.emplace_back(result);
1860    if (!global) break;
1861
1862    Handle<Object> match_obj;
1863    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1864                                       Object::GetElement(isolate, result, 0));
1865
1866    Handle<String> match;
1867    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1868                                       Object::ToString(isolate, match_obj));
1869
1870    if (match->length() == 0) {
1871      RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1872                                               isolate, recv, string, unicode));
1873    }
1874  }
1875
1876  // TODO(jgruber): Look into ReplacementStringBuilder instead.
1877  IncrementalStringBuilder builder(isolate);
1878  uint32_t next_source_position = 0;
1879
1880  for (const auto& result : results) {
1881    HandleScope handle_scope(isolate);
1882    Handle<Object> captures_length_obj;
1883    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884        isolate, captures_length_obj,
1885        Object::GetProperty(isolate, result, factory->length_string()));
1886
1887    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1888        isolate, captures_length_obj,
1889        Object::ToLength(isolate, captures_length_obj));
1890    const uint32_t captures_length =
1891        PositiveNumberToUint32(*captures_length_obj);
1892
1893    Handle<Object> match_obj;
1894    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1895                                       Object::GetElement(isolate, result, 0));
1896
1897    Handle<String> match;
1898    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1899                                       Object::ToString(isolate, match_obj));
1900
1901    const int match_length = match->length();
1902
1903    Handle<Object> position_obj;
1904    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1905        isolate, position_obj,
1906        Object::GetProperty(isolate, result, factory->index_string()));
1907
1908    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1909        isolate, position_obj, Object::ToInteger(isolate, position_obj));
1910    const uint32_t position =
1911        std::min(PositiveNumberToUint32(*position_obj), length);
1912
1913    // Do not reserve capacity since captures_length is user-controlled.
1914    base::SmallVector<Handle<Object>, kStaticVectorSlots> captures;
1915
1916    for (uint32_t n = 0; n < captures_length; n++) {
1917      Handle<Object> capture;
1918      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1919          isolate, capture, Object::GetElement(isolate, result, n));
1920
1921      if (!capture->IsUndefined(isolate)) {
1922        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1923                                           Object::ToString(isolate, capture));
1924      }
1925      captures.emplace_back(capture);
1926    }
1927
1928    Handle<Object> groups_obj = isolate->factory()->undefined_value();
1929    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1930        isolate, groups_obj,
1931        Object::GetProperty(isolate, result, factory->groups_string()));
1932
1933    const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1934
1935    Handle<String> replacement;
1936    if (functional_replace) {
1937      const uint32_t argc =
1938          GetArgcForReplaceCallable(captures_length, has_named_captures);
1939      if (argc == static_cast<uint32_t>(-1)) {
1940        THROW_NEW_ERROR_RETURN_FAILURE(
1941            isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1942      }
1943
1944      base::ScopedVector<Handle<Object>> argv(argc);
1945
1946      int cursor = 0;
1947      for (uint32_t j = 0; j < captures_length; j++) {
1948        argv[cursor++] = captures[j];
1949      }
1950
1951      argv[cursor++] = handle(Smi::FromInt(position), isolate);
1952      argv[cursor++] = string;
1953      if (has_named_captures) argv[cursor++] = groups_obj;
1954
1955      DCHECK_EQ(cursor, argc);
1956
1957      Handle<Object> replacement_obj;
1958      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1959          isolate, replacement_obj,
1960          Execution::Call(isolate, replace_obj, factory->undefined_value(),
1961                          argc, argv.begin()));
1962
1963      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1964          isolate, replacement, Object::ToString(isolate, replacement_obj));
1965    } else {
1966      DCHECK(!functional_replace);
1967      if (!groups_obj->IsUndefined(isolate)) {
1968        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1969            isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1970      }
1971      VectorBackedMatch m(isolate, string, match, position,
1972                          base::VectorOf(captures), groups_obj);
1973      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1974          isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1975    }
1976
1977    if (position >= next_source_position) {
1978      builder.AppendString(
1979          factory->NewSubString(string, next_source_position, position));
1980      builder.AppendString(replacement);
1981
1982      next_source_position = position + match_length;
1983    }
1984  }
1985
1986  if (next_source_position < length) {
1987    builder.AppendString(
1988        factory->NewSubString(string, next_source_position, length));
1989  }
1990
1991  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1992}
1993
1994RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1995  HandleScope scope(isolate);
1996  DCHECK_EQ(3, args.length());
1997  // TODO(pwong): To follow the spec more closely and simplify calling code,
1998  // this could handle the canonicalization of pattern and flags. See
1999  // https://tc39.github.io/ecma262/#sec-regexpinitialize
2000  Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
2001  Handle<String> source = args.at<String>(1);
2002  Handle<String> flags = args.at<String>(2);
2003
2004  RETURN_FAILURE_ON_EXCEPTION(isolate,
2005                              JSRegExp::Initialize(regexp, source, flags));
2006
2007  return *regexp;
2008}
2009
2010RUNTIME_FUNCTION(Runtime_IsRegExp) {
2011  SealHandleScope shs(isolate);
2012  DCHECK_EQ(1, args.length());
2013  Object obj = args[0];
2014  return isolate->heap()->ToBoolean(obj.IsJSRegExp());
2015}
2016
2017RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2018  HandleScope scope(isolate);
2019  DCHECK_EQ(1, args.length());
2020  auto regexp = JSRegExp::cast(args[0]);
2021  Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags());
2022  return *flags;
2023}
2024
2025}  // namespace internal
2026}  // namespace v8
2027