1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <functional>
6 
7 #include "src/base/small-vector.h"
8 #include "src/base/strings.h"
9 #include "src/common/message-template.h"
10 #include "src/execution/arguments-inl.h"
11 #include "src/execution/isolate-inl.h"
12 #include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
13 #include "src/logging/counters.h"
14 #include "src/numbers/conversions-inl.h"
15 #include "src/objects/js-array-inl.h"
16 #include "src/objects/js-regexp-inl.h"
17 #include "src/regexp/regexp-utils.h"
18 #include "src/regexp/regexp.h"
19 #include "src/runtime/runtime-utils.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-search.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 namespace {
27 
28 // Fairly arbitrary, but intended to fit:
29 //
30 // - captures
31 // - results
32 // - parsed replacement pattern parts
33 //
34 // for small, common cases.
35 constexpr int kStaticVectorSlots = 8;
36 
37 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures, bool has_named_captures)38 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
39                                    bool has_named_captures) {
40   const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
41   const uint32_t kAdditionalArgsWithNamedCaptures = 3;
42   if (num_captures > Code::kMaxArguments) return -1;
43   uint32_t argc = has_named_captures
44                       ? num_captures + kAdditionalArgsWithNamedCaptures
45                       : num_captures + kAdditionalArgsWithoutNamedCaptures;
46   STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
47                                           kAdditionalArgsWithNamedCaptures);
48   return (argc > Code::kMaxArguments) ? -1 : argc;
49 }
50 
51 // Looks up the capture of the given name. Returns the (1-based) numbered
52 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool(String)>& name_matches, FixedArray capture_name_map)53 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
54                        FixedArray capture_name_map) {
55   // TODO(jgruber): Sort capture_name_map and do binary search via
56   // internalized strings.
57 
58   int maybe_capture_index = -1;
59   const int named_capture_count = capture_name_map.length() >> 1;
60   for (int j = 0; j < named_capture_count; j++) {
61     // The format of {capture_name_map} is documented at
62     // JSRegExp::kIrregexpCaptureNameMapIndex.
63     const int name_ix = j * 2;
64     const int index_ix = j * 2 + 1;
65 
66     String capture_name = String::cast(capture_name_map.get(name_ix));
67     if (!name_matches(capture_name)) continue;
68 
69     maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
70     break;
71   }
72 
73   return maybe_capture_index;
74 }
75 
76 }  // namespace
77 
78 class CompiledReplacement {
79  public:
80   // Return whether the replacement is simple.
81   bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
82                Handle<String> replacement, int capture_count,
83                int subject_length);
84 
85   // Use Apply only if Compile returned false.
86   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
87              int32_t* match);
88 
89   // Number of distinct parts of the replacement pattern.
parts()90   int parts() { return static_cast<int>(parts_.size()); }
91 
92  private:
93   enum PartType {
94     SUBJECT_PREFIX = 1,
95     SUBJECT_SUFFIX,
96     SUBJECT_CAPTURE,
97     REPLACEMENT_SUBSTRING,
98     REPLACEMENT_STRING,
99     EMPTY_REPLACEMENT,
100     NUMBER_OF_PART_TYPES
101   };
102 
103   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart104     static inline ReplacementPart SubjectMatch() {
105       return ReplacementPart(SUBJECT_CAPTURE, 0);
106     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart107     static inline ReplacementPart SubjectCapture(int capture_index) {
108       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
109     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart110     static inline ReplacementPart SubjectPrefix() {
111       return ReplacementPart(SUBJECT_PREFIX, 0);
112     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart113     static inline ReplacementPart SubjectSuffix(int subject_length) {
114       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
115     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart116     static inline ReplacementPart ReplacementString() {
117       return ReplacementPart(REPLACEMENT_STRING, 0);
118     }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart119     static inline ReplacementPart EmptyReplacement() {
120       return ReplacementPart(EMPTY_REPLACEMENT, 0);
121     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart122     static inline ReplacementPart ReplacementSubString(int from, int to) {
123       DCHECK_LE(0, from);
124       DCHECK_GT(to, from);
125       return ReplacementPart(-from, to);
126     }
127 
128     // If tag <= 0 then it is the negation of a start index of a substring of
129     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart130     ReplacementPart(int tag, int data) : tag(tag), data(data) {
131       // Must be non-positive or a PartType value.
132       DCHECK(tag < NUMBER_OF_PART_TYPES);
133     }
134     // Either a value of PartType or a non-positive number that is
135     // the negation of an index into the replacement string.
136     int tag;
137     // The data value's interpretation depends on the value of tag:
138     // tag == SUBJECT_PREFIX ||
139     // tag == SUBJECT_SUFFIX:  data is unused.
140     // tag == SUBJECT_CAPTURE: data is the number of the capture.
141     // tag == REPLACEMENT_SUBSTRING ||
142     // tag == REPLACEMENT_STRING:    data is index into array of substrings
143     //                               of the replacement string.
144     // tag == EMPTY_REPLACEMENT: data is unused.
145     // tag <= 0: Temporary representation of the substring of the replacement
146     //           string ranging over -tag .. data.
147     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
148     //           substring objects.
149     int data;
150   };
151 
152   template <typename Char>
153   bool ParseReplacementPattern(base::Vector<Char> characters,
154                                FixedArray capture_name_map, int capture_count,
155                                int subject_length) {
156     // Equivalent to String::GetSubstitution, except that this method converts
157     // the replacement string into an internal representation that avoids
158     // repeated parsing when used repeatedly.
159     int length = characters.length();
160     int last = 0;
161     for (int i = 0; i < length; i++) {
162       Char c = characters[i];
163       if (c == '$') {
164         int next_index = i + 1;
165         if (next_index == length) {  // No next character!
166           break;
167         }
168         Char c2 = characters[next_index];
169         switch (c2) {
170           case '$':
171             if (i > last) {
172               // There is a substring before. Include the first "$".
173               parts_.emplace_back(
174                   ReplacementPart::ReplacementSubString(last, next_index));
175               last = next_index + 1;  // Continue after the second "$".
176             } else {
177               // Let the next substring start with the second "$".
178               last = next_index;
179             }
180             i = next_index;
181             break;
182           case '`':
183             if (i > last) {
184               parts_.emplace_back(
185                   ReplacementPart::ReplacementSubString(last, i));
186             }
187             parts_.emplace_back(ReplacementPart::SubjectPrefix());
188             i = next_index;
189             last = i + 1;
190             break;
191           case '\'':
192             if (i > last) {
193               parts_.emplace_back(
194                   ReplacementPart::ReplacementSubString(last, i));
195             }
196             parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
197             i = next_index;
198             last = i + 1;
199             break;
200           case '&':
201             if (i > last) {
202               parts_.emplace_back(
203                   ReplacementPart::ReplacementSubString(last, i));
204             }
205             parts_.emplace_back(ReplacementPart::SubjectMatch());
206             i = next_index;
207             last = i + 1;
208             break;
209           case '0':
210           case '1':
211           case '2':
212           case '3':
213           case '4':
214           case '5':
215           case '6':
216           case '7':
217           case '8':
218           case '9': {
219             int capture_ref = c2 - '0';
220             if (capture_ref > capture_count) {
221               i = next_index;
222               continue;
223             }
224             int second_digit_index = next_index + 1;
225             if (second_digit_index < length) {
226               // Peek ahead to see if we have two digits.
227               Char c3 = characters[second_digit_index];
228               if ('0' <= c3 && c3 <= '9') {  // Double digits.
229                 int double_digit_ref = capture_ref * 10 + c3 - '0';
230                 if (double_digit_ref <= capture_count) {
231                   next_index = second_digit_index;
232                   capture_ref = double_digit_ref;
233                 }
234               }
235             }
236             if (capture_ref > 0) {
237               if (i > last) {
238                 parts_.emplace_back(
239                     ReplacementPart::ReplacementSubString(last, i));
240               }
241               DCHECK(capture_ref <= capture_count);
242               parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
243               last = next_index + 1;
244             }
245             i = next_index;
246             break;
247           }
248           case '<': {
249             if (capture_name_map.is_null()) {
250               i = next_index;
251               break;
252             }
253 
254             // Scan until the next '>', and let the enclosed substring be the
255             // groupName.
256 
257             const int name_start_index = next_index + 1;
258             int closing_bracket_index = -1;
259             for (int j = name_start_index; j < length; j++) {
260               if (characters[j] == '>') {
261                 closing_bracket_index = j;
262                 break;
263               }
264             }
265 
266             // If no closing bracket is found, '$<' is treated as a string
267             // literal.
268             if (closing_bracket_index == -1) {
269               i = next_index;
270               break;
271             }
272 
273             base::Vector<Char> requested_name =
274                 characters.SubVector(name_start_index, closing_bracket_index);
275 
276             // Let capture be ? Get(namedCaptures, groupName).
277 
278             const int capture_index = LookupNamedCapture(
279                 [=](String capture_name) {
280                   return capture_name.IsEqualTo(requested_name);
281                 },
282                 capture_name_map);
283 
284             // If capture is undefined or does not exist, replace the text
285             // through the following '>' with the empty string.
286             // Otherwise, replace the text through the following '>' with
287             // ? ToString(capture).
288 
289             DCHECK(capture_index == -1 ||
290                    (1 <= capture_index && capture_index <= capture_count));
291 
292             if (i > last) {
293               parts_.emplace_back(
294                   ReplacementPart::ReplacementSubString(last, i));
295             }
296             parts_.emplace_back(
297                 (capture_index == -1)
298                     ? ReplacementPart::EmptyReplacement()
299                     : ReplacementPart::SubjectCapture(capture_index));
300             last = closing_bracket_index + 1;
301             i = closing_bracket_index;
302             break;
303           }
304           default:
305             i = next_index;
306             break;
307         }
308       }
309     }
310     if (length > last) {
311       if (last == 0) {
312         // Replacement is simple.  Do not use Apply to do the replacement.
313         return true;
314       } else {
315         parts_.emplace_back(
316             ReplacementPart::ReplacementSubString(last, length));
317       }
318     }
319     return false;
320   }
321 
322   base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_;
323   base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_;
324 };
325 
Compile(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> replacement, int capture_count, int subject_length)326 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
327                                   Handle<String> replacement, int capture_count,
328                                   int subject_length) {
329   {
330     DisallowGarbageCollection no_gc;
331     String::FlatContent content = replacement->GetFlatContent(no_gc);
332     DCHECK(content.IsFlat());
333 
334     FixedArray capture_name_map;
335     if (capture_count > 0) {
336       DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
337       Object maybe_capture_name_map = regexp->capture_name_map();
338       if (maybe_capture_name_map.IsFixedArray()) {
339         capture_name_map = FixedArray::cast(maybe_capture_name_map);
340       }
341     }
342 
343     bool simple;
344     if (content.IsOneByte()) {
345       simple =
346           ParseReplacementPattern(content.ToOneByteVector(), capture_name_map,
347                                   capture_count, subject_length);
348     } else {
349       DCHECK(content.IsTwoByte());
350       simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map,
351                                        capture_count, subject_length);
352     }
353     if (simple) return true;
354   }
355 
356   // Find substrings of replacement string and create them as String objects.
357   int substring_index = 0;
358   for (ReplacementPart& part : parts_) {
359     int tag = part.tag;
360     if (tag <= 0) {  // A replacement string slice.
361       int from = -tag;
362       int to = part.data;
363       replacement_substrings_.emplace_back(
364           isolate->factory()->NewSubString(replacement, from, to));
365       part.tag = REPLACEMENT_SUBSTRING;
366       part.data = substring_index;
367       substring_index++;
368     } else if (tag == REPLACEMENT_STRING) {
369       replacement_substrings_.emplace_back(replacement);
370       part.data = substring_index;
371       substring_index++;
372     }
373   }
374   return false;
375 }
376 
377 
Apply(ReplacementStringBuilder* builder, int match_from, int match_to, int32_t* match)378 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
379                                 int match_from, int match_to, int32_t* match) {
380   DCHECK_LT(0, parts_.size());
381   for (ReplacementPart& part : parts_) {
382     switch (part.tag) {
383       case SUBJECT_PREFIX:
384         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
385         break;
386       case SUBJECT_SUFFIX: {
387         int subject_length = part.data;
388         if (match_to < subject_length) {
389           builder->AddSubjectSlice(match_to, subject_length);
390         }
391         break;
392       }
393       case SUBJECT_CAPTURE: {
394         int capture = part.data;
395         int from = match[capture * 2];
396         int to = match[capture * 2 + 1];
397         if (from >= 0 && to > from) {
398           builder->AddSubjectSlice(from, to);
399         }
400         break;
401       }
402       case REPLACEMENT_SUBSTRING:
403       case REPLACEMENT_STRING:
404         builder->AddString(replacement_substrings_[part.data]);
405         break;
406       case EMPTY_REPLACEMENT:
407         break;
408       default:
409         UNREACHABLE();
410     }
411   }
412 }
413 
FindOneByteStringIndices(base::Vector<const uint8_t> subject, uint8_t pattern, std::vector<int>* indices, unsigned int limit)414 void FindOneByteStringIndices(base::Vector<const uint8_t> subject,
415                               uint8_t pattern, std::vector<int>* indices,
416                               unsigned int limit) {
417   DCHECK_LT(0, limit);
418   // Collect indices of pattern in subject using memchr.
419   // Stop after finding at most limit values.
420   const uint8_t* subject_start = subject.begin();
421   const uint8_t* subject_end = subject_start + subject.length();
422   const uint8_t* pos = subject_start;
423   while (limit > 0) {
424     pos = reinterpret_cast<const uint8_t*>(
425         memchr(pos, pattern, subject_end - pos));
426     if (pos == nullptr) return;
427     indices->push_back(static_cast<int>(pos - subject_start));
428     pos++;
429     limit--;
430   }
431 }
432 
FindTwoByteStringIndices(const base::Vector<const base::uc16> subject, base::uc16 pattern, std::vector<int>* indices, unsigned int limit)433 void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,
434                               base::uc16 pattern, std::vector<int>* indices,
435                               unsigned int limit) {
436   DCHECK_LT(0, limit);
437   const base::uc16* subject_start = subject.begin();
438   const base::uc16* subject_end = subject_start + subject.length();
439   for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
440        pos++) {
441     if (*pos == pattern) {
442       indices->push_back(static_cast<int>(pos - subject_start));
443       limit--;
444     }
445   }
446 }
447 
448 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate* isolate, base::Vector<const SubjectChar> subject, base::Vector<const PatternChar> pattern, std::vector<int>* indices, unsigned int limit)449 void FindStringIndices(Isolate* isolate,
450                        base::Vector<const SubjectChar> subject,
451                        base::Vector<const PatternChar> pattern,
452                        std::vector<int>* indices, unsigned int limit) {
453   DCHECK_LT(0, limit);
454   // Collect indices of pattern in subject.
455   // Stop after finding at most limit values.
456   int pattern_length = pattern.length();
457   int index = 0;
458   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
459   while (limit > 0) {
460     index = search.Search(subject, index);
461     if (index < 0) return;
462     indices->push_back(index);
463     index += pattern_length;
464     limit--;
465   }
466 }
467 
FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern, std::vector<int>* indices, unsigned int limit)468 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
469                                std::vector<int>* indices, unsigned int limit) {
470   {
471     DisallowGarbageCollection no_gc;
472     String::FlatContent subject_content = subject.GetFlatContent(no_gc);
473     String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
474     DCHECK(subject_content.IsFlat());
475     DCHECK(pattern_content.IsFlat());
476     if (subject_content.IsOneByte()) {
477       base::Vector<const uint8_t> subject_vector =
478           subject_content.ToOneByteVector();
479       if (pattern_content.IsOneByte()) {
480         base::Vector<const uint8_t> pattern_vector =
481             pattern_content.ToOneByteVector();
482         if (pattern_vector.length() == 1) {
483           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
484                                    limit);
485         } else {
486           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487                             limit);
488         }
489       } else {
490         FindStringIndices(isolate, subject_vector,
491                           pattern_content.ToUC16Vector(), indices, limit);
492       }
493     } else {
494       base::Vector<const base::uc16> subject_vector =
495           subject_content.ToUC16Vector();
496       if (pattern_content.IsOneByte()) {
497         base::Vector<const uint8_t> pattern_vector =
498             pattern_content.ToOneByteVector();
499         if (pattern_vector.length() == 1) {
500           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
501                                    limit);
502         } else {
503           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
504                             limit);
505         }
506       } else {
507         base::Vector<const base::uc16> pattern_vector =
508             pattern_content.ToUC16Vector();
509         if (pattern_vector.length() == 1) {
510           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
511                                    limit);
512         } else {
513           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
514                             limit);
515         }
516       }
517     }
518   }
519 }
520 
521 namespace {
GetRewoundRegexpIndicesList(Isolate* isolate)522 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
523   std::vector<int>* list = isolate->regexp_indices();
524   list->clear();
525   return list;
526 }
527 
TruncateRegexpIndicesList(Isolate* isolate)528 void TruncateRegexpIndicesList(Isolate* isolate) {
529   // Same size as smallest zone segment, preserving behavior from the
530   // runtime zone.
531   // TODO(jgruber): Consider removing the reusable regexp_indices list and
532   // simply allocating a new list each time. It feels like we're needlessly
533   // optimizing an edge case.
534   static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
535   std::vector<int>* indices = isolate->regexp_indices();
536   if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
537     // Throw away backing storage.
538     indices->clear();
539     indices->shrink_to_fit();
540   }
541 }
542 }  // namespace
543 
544 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info)545 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
546     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
547     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
548   DCHECK(subject->IsFlat());
549   DCHECK(replacement->IsFlat());
550 
551   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
552 
553   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag());
554   String pattern = pattern_regexp->atom_pattern();
555   int subject_len = subject->length();
556   int pattern_len = pattern.length();
557   int replacement_len = replacement->length();
558 
559   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
560 
561   if (indices->empty()) return *subject;
562 
563   // Detect integer overflow.
564   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
565                            static_cast<int64_t>(pattern_len)) *
566                               static_cast<int64_t>(indices->size()) +
567                           static_cast<int64_t>(subject_len);
568   int result_len;
569   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
570     STATIC_ASSERT(String::kMaxLength < kMaxInt);
571     result_len = kMaxInt;  // Provoke exception.
572   } else {
573     result_len = static_cast<int>(result_len_64);
574   }
575   if (result_len == 0) {
576     return ReadOnlyRoots(isolate).empty_string();
577   }
578 
579   int subject_pos = 0;
580   int result_pos = 0;
581 
582   MaybeHandle<SeqString> maybe_res;
583   if (ResultSeqString::kHasOneByteEncoding) {
584     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
585   } else {
586     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
587   }
588   Handle<SeqString> untyped_res;
589   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
590   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
591 
592   DisallowGarbageCollection no_gc;
593   for (int index : *indices) {
594     // Copy non-matched subject content.
595     if (subject_pos < index) {
596       String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
597                           subject_pos, index - subject_pos);
598       result_pos += index - subject_pos;
599     }
600 
601     // Replace match.
602     if (replacement_len > 0) {
603       String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
604                           replacement_len);
605       result_pos += replacement_len;
606     }
607 
608     subject_pos = index + pattern_len;
609   }
610   // Add remaining subject content at the end.
611   if (subject_pos < subject_len) {
612     String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
613                         subject_pos, subject_len - subject_pos);
614   }
615 
616   int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
617   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
618 
619   TruncateRegexpIndicesList(isolate);
620 
621   return *result;
622 }
623 
StringReplaceGlobalRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info)624 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
625     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
626     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
627   DCHECK(subject->IsFlat());
628   DCHECK(replacement->IsFlat());
629 
630   int capture_count = regexp->capture_count();
631   int subject_length = subject->length();
632 
633   // Ensure the RegExp is compiled so we can access the capture-name map.
634   if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
635     return ReadOnlyRoots(isolate).exception();
636   }
637 
638   CompiledReplacement compiled_replacement;
639   const bool simple_replace = compiled_replacement.Compile(
640       isolate, regexp, replacement, capture_count, subject_length);
641 
642   // Shortcut for simple non-regexp global replacements
643   if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) {
644     if (subject->IsOneByteRepresentation() &&
645         replacement->IsOneByteRepresentation()) {
646       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
647           isolate, subject, regexp, replacement, last_match_info);
648     } else {
649       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
650           isolate, subject, regexp, replacement, last_match_info);
651     }
652   }
653 
654   RegExpGlobalCache global_cache(regexp, subject, isolate);
655   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
656 
657   int32_t* current_match = global_cache.FetchNext();
658   if (current_match == nullptr) {
659     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
660     return *subject;
661   }
662 
663   // Guessing the number of parts that the final result string is built
664   // from. Global regexps can match any number of times, so we guess
665   // conservatively.
666   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
667   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
668 
669   int prev = 0;
670 
671   do {
672     int start = current_match[0];
673     int end = current_match[1];
674 
675     if (prev < start) {
676       builder.AddSubjectSlice(prev, start);
677     }
678 
679     if (simple_replace) {
680       builder.AddString(replacement);
681     } else {
682       compiled_replacement.Apply(&builder, start, end, current_match);
683     }
684     prev = end;
685 
686     current_match = global_cache.FetchNext();
687   } while (current_match != nullptr);
688 
689   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
690 
691   if (prev < subject_length) {
692     builder.AddSubjectSlice(prev, subject_length);
693   }
694 
695   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
696                            global_cache.LastSuccessfulMatch());
697 
698   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
699 }
700 
701 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_info)702 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
703     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
704     Handle<RegExpMatchInfo> last_match_info) {
705   DCHECK(subject->IsFlat());
706 
707   // Shortcut for simple non-regexp global replacements
708   if (regexp->type_tag() == JSRegExp::ATOM) {
709     Handle<String> empty_string = isolate->factory()->empty_string();
710     if (subject->IsOneByteRepresentation()) {
711       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
712           isolate, subject, regexp, empty_string, last_match_info);
713     } else {
714       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
715           isolate, subject, regexp, empty_string, last_match_info);
716     }
717   }
718 
719   RegExpGlobalCache global_cache(regexp, subject, isolate);
720   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
721 
722   int32_t* current_match = global_cache.FetchNext();
723   if (current_match == nullptr) {
724     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
725     return *subject;
726   }
727 
728   int start = current_match[0];
729   int end = current_match[1];
730   int capture_count = regexp->capture_count();
731   int subject_length = subject->length();
732 
733   int new_length = subject_length - (end - start);
734   if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
735 
736   Handle<ResultSeqString> answer;
737   if (ResultSeqString::kHasOneByteEncoding) {
738     answer = Handle<ResultSeqString>::cast(
739         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
740   } else {
741     answer = Handle<ResultSeqString>::cast(
742         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
743   }
744 
745   int prev = 0;
746   int position = 0;
747 
748   DisallowGarbageCollection no_gc;
749   do {
750     start = current_match[0];
751     end = current_match[1];
752     if (prev < start) {
753       // Add substring subject[prev;start] to answer string.
754       String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
755                           start - prev);
756       position += start - prev;
757     }
758     prev = end;
759 
760     current_match = global_cache.FetchNext();
761   } while (current_match != nullptr);
762 
763   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
764 
765   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
766                            global_cache.LastSuccessfulMatch());
767 
768   if (prev < subject_length) {
769     // Add substring subject[prev;length] to answer string.
770     String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
771                         subject_length - prev);
772     position += subject_length - prev;
773   }
774 
775   if (position == 0) return ReadOnlyRoots(isolate).empty_string();
776 
777   // Shorten string and fill
778   int string_size = ResultSeqString::SizeFor(position);
779   int allocated_string_size = ResultSeqString::SizeFor(new_length);
780   int delta = allocated_string_size - string_size;
781 
782   answer->set_length(position);
783   if (delta == 0) return *answer;
784 
785   Address end_of_string = answer->address() + string_size;
786   Heap* heap = isolate->heap();
787 
788   // The trimming is performed on a newly allocated object, which is on a
789   // freshly allocated page or on an already swept page. Hence, the sweeper
790   // thread can not get confused with the filler creation. No synchronization
791   // needed.
792   // TODO(hpayer): We should shrink the large object page if the size
793   // of the object changed significantly.
794   if (!heap->IsLargeObject(*answer)) {
795     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
796   }
797   return *answer;
798 }
799 
RUNTIME_FUNCTION(Runtime_StringSplit)800 RUNTIME_FUNCTION(Runtime_StringSplit) {
801   HandleScope handle_scope(isolate);
802   DCHECK_EQ(3, args.length());
803   Handle<String> subject = args.at<String>(0);
804   Handle<String> pattern = args.at<String>(1);
805   uint32_t limit = NumberToUint32(args[2]);
806   CHECK_LT(0, limit);
807 
808   int subject_length = subject->length();
809   int pattern_length = pattern->length();
810   CHECK_LT(0, pattern_length);
811 
812   if (limit == 0xFFFFFFFFu) {
813     FixedArray last_match_cache_unused;
814     Handle<Object> cached_answer(
815         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
816                                    &last_match_cache_unused,
817                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
818         isolate);
819     if (*cached_answer != Smi::zero()) {
820       // The cache FixedArray is a COW-array and can therefore be reused.
821       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
822           Handle<FixedArray>::cast(cached_answer));
823       return *result;
824     }
825   }
826 
827   // The limit can be very large (0xFFFFFFFFu), but since the pattern
828   // isn't empty, we can never create more parts than ~half the length
829   // of the subject.
830 
831   subject = String::Flatten(isolate, subject);
832   pattern = String::Flatten(isolate, pattern);
833 
834   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
835 
836   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
837 
838   if (static_cast<uint32_t>(indices->size()) < limit) {
839     indices->push_back(subject_length);
840   }
841 
842   // The list indices now contains the end of each part to create.
843 
844   // Create JSArray of substrings separated by separator.
845   int part_count = static_cast<int>(indices->size());
846 
847   Handle<JSArray> result =
848       isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
849                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
850 
851   DCHECK(result->HasObjectElements());
852 
853   Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
854 
855   if (part_count == 1 && indices->at(0) == subject_length) {
856     elements->set(0, *subject);
857   } else {
858     int part_start = 0;
859     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
860       int part_end = indices->at(i);
861       Handle<String> substring =
862           isolate->factory()->NewProperSubString(subject, part_start, part_end);
863       elements->set(i, *substring);
864       part_start = part_end + pattern_length;
865     });
866   }
867 
868   if (limit == 0xFFFFFFFFu) {
869     if (result->HasObjectElements()) {
870       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
871                                 isolate->factory()->empty_fixed_array(),
872                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
873     }
874   }
875 
876   TruncateRegexpIndicesList(isolate);
877 
878   return *result;
879 }
880 
881 namespace {
882 
RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)883 MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp,
884                                Handle<String> subject, int32_t index,
885                                Handle<RegExpMatchInfo> last_match_info,
886                                RegExp::ExecQuirks exec_quirks) {
887   // Due to the way the JS calls are constructed this must be less than the
888   // length of a string, i.e. it is always a Smi.  We check anyway for security.
889   CHECK_LE(0, index);
890   CHECK_GE(subject->length(), index);
891   isolate->counters()->regexp_entry_runtime()->Increment();
892   return RegExp::Exec(isolate, regexp, subject, index, last_match_info,
893                       exec_quirks);
894 }
895 
ExperimentalOneshotExec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)896 MaybeHandle<Object> ExperimentalOneshotExec(
897     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
898     int32_t index, Handle<RegExpMatchInfo> last_match_info,
899     RegExp::ExecQuirks exec_quirks) {
900   // Due to the way the JS calls are constructed this must be less than the
901   // length of a string, i.e. it is always a Smi.  We check anyway for security.
902   CHECK_LE(0, index);
903   CHECK_GE(subject->length(), index);
904   isolate->counters()->regexp_entry_runtime()->Increment();
905   return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
906                                          last_match_info, exec_quirks);
907 }
908 
909 }  // namespace
910 
RUNTIME_FUNCTION(Runtime_RegExpExec)911 RUNTIME_FUNCTION(Runtime_RegExpExec) {
912   HandleScope scope(isolate);
913   DCHECK_EQ(4, args.length());
914   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
915   Handle<String> subject = args.at<String>(1);
916   int32_t index = 0;
917   CHECK(args[2].ToInt32(&index));
918   Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
919   RETURN_RESULT_OR_FAILURE(
920       isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
921                           RegExp::ExecQuirks::kNone));
922 }
923 
RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure)924 RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) {
925   HandleScope scope(isolate);
926   DCHECK_EQ(4, args.length());
927   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
928   Handle<String> subject = args.at<String>(1);
929   int32_t index = 0;
930   CHECK(args[2].ToInt32(&index));
931   Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
932   RETURN_RESULT_OR_FAILURE(
933       isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
934                           RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
935 }
936 
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec)937 RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
938   HandleScope scope(isolate);
939   DCHECK_EQ(4, args.length());
940   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
941   Handle<String> subject = args.at<String>(1);
942   int32_t index = 0;
943   CHECK(args[2].ToInt32(&index));
944   Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
945   RETURN_RESULT_OR_FAILURE(
946       isolate,
947       ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
948                               RegExp::ExecQuirks::kNone));
949 }
950 
RUNTIME_FUNCTION( Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure)951 RUNTIME_FUNCTION(
952     Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) {
953   HandleScope scope(isolate);
954   DCHECK_EQ(4, args.length());
955   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
956   Handle<String> subject = args.at<String>(1);
957   int32_t index = 0;
958   CHECK(args[2].ToInt32(&index));
959   Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
960   RETURN_RESULT_OR_FAILURE(
961       isolate,
962       ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
963                               RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
964 }
965 
RUNTIME_FUNCTION(Runtime_RegExpBuildIndices)966 RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
967   HandleScope scope(isolate);
968   DCHECK_EQ(3, args.length());
969   Handle<RegExpMatchInfo> match_info = args.at<RegExpMatchInfo>(1);
970   Handle<Object> maybe_names = args.at(2);
971 #ifdef DEBUG
972   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
973   DCHECK(regexp->flags() & JSRegExp::kHasIndices);
974 #endif
975 
976   return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
977 }
978 
979 namespace {
980 
981 class MatchInfoBackedMatch : public String::Match {
982  public:
MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, Handle<RegExpMatchInfo> match_info)983   MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
984                        Handle<String> subject,
985                        Handle<RegExpMatchInfo> match_info)
986       : isolate_(isolate), match_info_(match_info) {
987     subject_ = String::Flatten(isolate, subject);
988 
989     if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) {
990       Object o = regexp->capture_name_map();
991       has_named_captures_ = o.IsFixedArray();
992       if (has_named_captures_) {
993         capture_name_map_ = handle(FixedArray::cast(o), isolate);
994       }
995     } else {
996       has_named_captures_ = false;
997     }
998   }
999 
1000   Handle<String> GetMatch() override {
1001     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
1002   }
1003 
1004   Handle<String> GetPrefix() override {
1005     const int match_start = match_info_->Capture(0);
1006     return isolate_->factory()->NewSubString(subject_, 0, match_start);
1007   }
1008 
1009   Handle<String> GetSuffix() override {
1010     const int match_end = match_info_->Capture(1);
1011     return isolate_->factory()->NewSubString(subject_, match_end,
1012                                              subject_->length());
1013   }
1014 
1015   bool HasNamedCaptures() override { return has_named_captures_; }
1016 
1017   int CaptureCount() override {
1018     return match_info_->NumberOfCaptureRegisters() / 2;
1019   }
1020 
1021   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1022     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1023         isolate_, match_info_, i, capture_exists);
1024     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1025                              : isolate_->factory()->empty_string();
1026   }
1027 
1028   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1029                                       CaptureState* state) override {
1030     DCHECK(has_named_captures_);
1031     const int capture_index = LookupNamedCapture(
1032         [=](String capture_name) { return capture_name.Equals(*name); },
1033         *capture_name_map_);
1034 
1035     if (capture_index == -1) {
1036       *state = UNMATCHED;
1037       return isolate_->factory()->empty_string();
1038     }
1039 
1040     DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1041 
1042     bool capture_exists;
1043     Handle<String> capture_value;
1044     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1045                                GetCapture(capture_index, &capture_exists),
1046                                String);
1047 
1048     if (!capture_exists) {
1049       *state = UNMATCHED;
1050       return isolate_->factory()->empty_string();
1051     } else {
1052       *state = MATCHED;
1053       return capture_value;
1054     }
1055   }
1056 
1057  private:
1058   Isolate* isolate_;
1059   Handle<String> subject_;
1060   Handle<RegExpMatchInfo> match_info_;
1061 
1062   bool has_named_captures_;
1063   Handle<FixedArray> capture_name_map_;
1064 };
1065 
1066 class VectorBackedMatch : public String::Match {
1067  public:
VectorBackedMatch(Isolate* isolate, Handle<String> subject, Handle<String> match, int match_position, base::Vector<Handle<Object>> captures, Handle<Object> groups_obj)1068   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1069                     Handle<String> match, int match_position,
1070                     base::Vector<Handle<Object>> captures,
1071                     Handle<Object> groups_obj)
1072       : isolate_(isolate),
1073         match_(match),
1074         match_position_(match_position),
1075         captures_(captures) {
1076     subject_ = String::Flatten(isolate, subject);
1077 
1078     DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1079     has_named_captures_ = !groups_obj->IsUndefined(isolate);
1080     if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1081   }
1082 
1083   Handle<String> GetMatch() override { return match_; }
1084 
1085   Handle<String> GetPrefix() override {
1086     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1087   }
1088 
1089   Handle<String> GetSuffix() override {
1090     const int match_end_position = match_position_ + match_->length();
1091     return isolate_->factory()->NewSubString(subject_, match_end_position,
1092                                              subject_->length());
1093   }
1094 
1095   bool HasNamedCaptures() override { return has_named_captures_; }
1096 
1097   int CaptureCount() override { return captures_.length(); }
1098 
1099   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1100     Handle<Object> capture_obj = captures_[i];
1101     if (capture_obj->IsUndefined(isolate_)) {
1102       *capture_exists = false;
1103       return isolate_->factory()->empty_string();
1104     }
1105     *capture_exists = true;
1106     return Object::ToString(isolate_, capture_obj);
1107   }
1108 
1109   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1110                                       CaptureState* state) override {
1111     DCHECK(has_named_captures_);
1112 
1113     Handle<Object> capture_obj;
1114     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1115                                Object::GetProperty(isolate_, groups_obj_, name),
1116                                String);
1117     if (capture_obj->IsUndefined(isolate_)) {
1118       *state = UNMATCHED;
1119       return isolate_->factory()->empty_string();
1120     } else {
1121       *state = MATCHED;
1122       return Object::ToString(isolate_, capture_obj);
1123     }
1124   }
1125 
1126  private:
1127   Isolate* isolate_;
1128   Handle<String> subject_;
1129   Handle<String> match_;
1130   const int match_position_;
1131   base::Vector<Handle<Object>> captures_;
1132 
1133   bool has_named_captures_;
1134   Handle<JSReceiver> groups_obj_;
1135 };
1136 
1137 // Create the groups object (see also the RegExp result creation in
1138 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject( Isolate* isolate, Handle<FixedArray> capture_map, const std::function<Object(int)>& f_get_capture)1139 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1140     Isolate* isolate, Handle<FixedArray> capture_map,
1141     const std::function<Object(int)>& f_get_capture) {
1142   Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1143 
1144   const int named_capture_count = capture_map->length() >> 1;
1145   for (int i = 0; i < named_capture_count; i++) {
1146     const int name_ix = i * 2;
1147     const int index_ix = i * 2 + 1;
1148 
1149     Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1150                                 isolate);
1151     const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1152     DCHECK_GE(capture_ix, 1);  // Explicit groups start at index 1.
1153 
1154     Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1155     DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1156 
1157     JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1158   }
1159 
1160   return groups;
1161 }
1162 
1163 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1164 // separate last match info.  See comment on that function.
1165 template <bool has_capture>
SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_array, Handle<JSArray> result_array)1166 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1167                                    Handle<JSRegExp> regexp,
1168                                    Handle<RegExpMatchInfo> last_match_array,
1169                                    Handle<JSArray> result_array) {
1170   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1171   DCHECK_NE(has_capture, regexp->capture_count() == 0);
1172   DCHECK(subject->IsFlat());
1173 
1174   // Force tier up to native code for global replaces. The global replace is
1175   // implemented differently for native code and bytecode execution, where the
1176   // native code expects an array to store all the matches, and the bytecode
1177   // matches one at a time, so it's easier to tier-up to native code from the
1178   // start.
1179   if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1180     regexp->MarkTierUpForNextExec();
1181     if (FLAG_trace_regexp_tier_up) {
1182       PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1183              reinterpret_cast<void*>(regexp->ptr()));
1184     }
1185   }
1186 
1187   int capture_count = regexp->capture_count();
1188   int subject_length = subject->length();
1189 
1190   static const int kMinLengthToCache = 0x1000;
1191 
1192   if (subject_length > kMinLengthToCache) {
1193     FixedArray last_match_cache;
1194     Object cached_answer = RegExpResultsCache::Lookup(
1195         isolate->heap(), *subject, regexp->data(), &last_match_cache,
1196         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1197     if (cached_answer.IsFixedArray()) {
1198       int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1199       int32_t* last_match = NewArray<int32_t>(capture_registers);
1200       for (int i = 0; i < capture_registers; i++) {
1201         last_match[i] = Smi::ToInt(last_match_cache.get(i));
1202       }
1203       Handle<FixedArray> cached_fixed_array =
1204           Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1205       // The cache FixedArray is a COW-array and we need to return a copy.
1206       Handle<FixedArray> copied_fixed_array =
1207           isolate->factory()->CopyFixedArrayWithMap(
1208               cached_fixed_array, isolate->factory()->fixed_array_map());
1209       JSArray::SetContent(result_array, copied_fixed_array);
1210       RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1211                                capture_count, last_match);
1212       DeleteArray(last_match);
1213       return *result_array;
1214     }
1215   }
1216 
1217   RegExpGlobalCache global_cache(regexp, subject, isolate);
1218   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1219 
1220   // Ensured in Runtime_RegExpExecMultiple.
1221   DCHECK(result_array->HasObjectElements());
1222   Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1223                                      isolate);
1224   if (result_elements->length() < 16) {
1225     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1226   }
1227 
1228   FixedArrayBuilder builder(result_elements);
1229 
1230   // Position to search from.
1231   int match_start = -1;
1232   int match_end = 0;
1233   bool first = true;
1234 
1235   // Two smis before and after the match, for very long strings.
1236   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1237 
1238   while (true) {
1239     int32_t* current_match = global_cache.FetchNext();
1240     if (current_match == nullptr) break;
1241     match_start = current_match[0];
1242     builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1243     if (match_end < match_start) {
1244       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1245                                                 match_start);
1246     }
1247     match_end = current_match[1];
1248     {
1249       // Avoid accumulating new handles inside loop.
1250       HandleScope temp_scope(isolate);
1251       Handle<String> match;
1252       if (!first) {
1253         match = isolate->factory()->NewProperSubString(subject, match_start,
1254                                                        match_end);
1255       } else {
1256         match =
1257             isolate->factory()->NewSubString(subject, match_start, match_end);
1258         first = false;
1259       }
1260 
1261       if (has_capture) {
1262         // Arguments array to replace function is match, captures, index and
1263         // subject, i.e., 3 + capture count in total. If the RegExp contains
1264         // named captures, they are also passed as the last argument.
1265 
1266         Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate);
1267         const bool has_named_captures = maybe_capture_map->IsFixedArray();
1268 
1269         const int argc =
1270             has_named_captures ? 4 + capture_count : 3 + capture_count;
1271 
1272         Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1273         int cursor = 0;
1274 
1275         elements->set(cursor++, *match);
1276         for (int i = 1; i <= capture_count; i++) {
1277           int start = current_match[i * 2];
1278           if (start >= 0) {
1279             int end = current_match[i * 2 + 1];
1280             DCHECK(start <= end);
1281             Handle<String> substring =
1282                 isolate->factory()->NewSubString(subject, start, end);
1283             elements->set(cursor++, *substring);
1284           } else {
1285             DCHECK_GT(0, current_match[i * 2 + 1]);
1286             elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1287           }
1288         }
1289 
1290         elements->set(cursor++, Smi::FromInt(match_start));
1291         elements->set(cursor++, *subject);
1292 
1293         if (has_named_captures) {
1294           Handle<FixedArray> capture_map =
1295               Handle<FixedArray>::cast(maybe_capture_map);
1296           Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1297               isolate, capture_map, [=](int ix) { return elements->get(ix); });
1298           elements->set(cursor++, *groups);
1299         }
1300 
1301         DCHECK_EQ(cursor, argc);
1302         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1303       } else {
1304         builder.Add(*match);
1305       }
1306     }
1307   }
1308 
1309   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1310 
1311   if (match_start >= 0) {
1312     // Finished matching, with at least one match.
1313     if (match_end < subject_length) {
1314       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1315                                                 subject_length);
1316     }
1317 
1318     RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1319                              global_cache.LastSuccessfulMatch());
1320 
1321     if (subject_length > kMinLengthToCache) {
1322       // Store the last successful match into the array for caching.
1323       int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1324       Handle<FixedArray> last_match_cache =
1325           isolate->factory()->NewFixedArray(capture_registers);
1326       int32_t* last_match = global_cache.LastSuccessfulMatch();
1327       for (int i = 0; i < capture_registers; i++) {
1328         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1329       }
1330       Handle<FixedArray> result_fixed_array =
1331           FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1332       // Cache the result and copy the FixedArray into a COW array.
1333       Handle<FixedArray> copied_fixed_array =
1334           isolate->factory()->CopyFixedArrayWithMap(
1335               result_fixed_array, isolate->factory()->fixed_array_map());
1336       RegExpResultsCache::Enter(
1337           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1338           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1339     }
1340     return *builder.ToJSArray(result_array);
1341   } else {
1342     return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1343   }
1344 }
1345 
1346 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1347 // doesn't properly call the underlying exec method.
RegExpReplace( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string, Handle<String> replace)1348 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1349     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1350     Handle<String> replace) {
1351   // Functional fast-paths are dispatched directly by replace builtin.
1352   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1353 
1354   Factory* factory = isolate->factory();
1355 
1356   const int flags = regexp->flags();
1357   const bool global = (flags & JSRegExp::kGlobal) != 0;
1358   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1359 
1360   replace = String::Flatten(isolate, replace);
1361 
1362   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1363 
1364   if (!global) {
1365     // Non-global regexp search, string replace.
1366 
1367     uint32_t last_index = 0;
1368     if (sticky) {
1369       Handle<Object> last_index_obj(regexp->last_index(), isolate);
1370       ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1371                                  Object::ToLength(isolate, last_index_obj),
1372                                  String);
1373       last_index = PositiveNumberToUint32(*last_index_obj);
1374     }
1375 
1376     Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1377                                      isolate);
1378 
1379     // A lastIndex exceeding the string length always returns null (signalling
1380     // failure) in RegExpBuiltinExec, thus we can skip the call.
1381     if (last_index <= static_cast<uint32_t>(string->length())) {
1382       ASSIGN_RETURN_ON_EXCEPTION(
1383           isolate, match_indices_obj,
1384           RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1385           String);
1386     }
1387 
1388     if (match_indices_obj->IsNull(isolate)) {
1389       if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1390       return string;
1391     }
1392 
1393     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1394 
1395     const int start_index = match_indices->Capture(0);
1396     const int end_index = match_indices->Capture(1);
1397 
1398     if (sticky) {
1399       regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1400     }
1401 
1402     IncrementalStringBuilder builder(isolate);
1403     builder.AppendString(factory->NewSubString(string, 0, start_index));
1404 
1405     if (replace->length() > 0) {
1406       MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1407       Handle<String> replacement;
1408       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1409                                  String::GetSubstitution(isolate, &m, replace),
1410                                  String);
1411       builder.AppendString(replacement);
1412     }
1413 
1414     builder.AppendString(
1415         factory->NewSubString(string, end_index, string->length()));
1416     return builder.Finish();
1417   } else {
1418     // Global regexp search, string replace.
1419     DCHECK(global);
1420     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1421                         String);
1422 
1423     // Force tier up to native code for global replaces. The global replace is
1424     // implemented differently for native code and bytecode execution, where the
1425     // native code expects an array to store all the matches, and the bytecode
1426     // matches one at a time, so it's easier to tier-up to native code from the
1427     // start.
1428     if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1429       regexp->MarkTierUpForNextExec();
1430       if (FLAG_trace_regexp_tier_up) {
1431         PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1432                reinterpret_cast<void*>(regexp->ptr()));
1433       }
1434     }
1435 
1436     if (replace->length() == 0) {
1437       if (string->IsOneByteRepresentation()) {
1438         Object result =
1439             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1440                 isolate, string, regexp, last_match_info);
1441         return handle(String::cast(result), isolate);
1442       } else {
1443         Object result =
1444             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1445                 isolate, string, regexp, last_match_info);
1446         return handle(String::cast(result), isolate);
1447       }
1448     }
1449 
1450     Object result = StringReplaceGlobalRegExpWithString(
1451         isolate, string, regexp, replace, last_match_info);
1452     if (result.IsString()) {
1453       return handle(String::cast(result), isolate);
1454     } else {
1455       return MaybeHandle<String>();
1456     }
1457   }
1458 
1459   UNREACHABLE();
1460 }
1461 
1462 }  // namespace
1463 
1464 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1465 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1466   HandleScope handles(isolate);
1467   DCHECK_EQ(4, args.length());
1468 
1469   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
1470   Handle<String> subject = args.at<String>(1);
1471   Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(2);
1472   Handle<JSArray> result_array = args.at<JSArray>(3);
1473 
1474   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1475   CHECK(result_array->HasObjectElements());
1476 
1477   subject = String::Flatten(isolate, subject);
1478   CHECK(regexp->flags() & JSRegExp::kGlobal);
1479 
1480   Object result;
1481   if (regexp->capture_count() == 0) {
1482     result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1483                                          last_match_info, result_array);
1484   } else {
1485     result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1486                                         last_match_info, result_array);
1487   }
1488   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1489   return result;
1490 }
1491 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1492 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1493   HandleScope scope(isolate);
1494   DCHECK_EQ(3, args.length());
1495   Handle<String> subject = args.at<String>(0);
1496   Handle<JSRegExp> regexp = args.at<JSRegExp>(1);
1497   Handle<JSReceiver> replace_obj = args.at<JSReceiver>(2);
1498 
1499   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1500   DCHECK(replace_obj->map().is_callable());
1501 
1502   Factory* factory = isolate->factory();
1503   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1504 
1505   const int flags = regexp->flags();
1506   DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1507 
1508   // TODO(jgruber): This should be an easy port to CSA with massive payback.
1509 
1510   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1511   uint32_t last_index = 0;
1512   if (sticky) {
1513     Handle<Object> last_index_obj(regexp->last_index(), isolate);
1514     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1515         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1516     last_index = PositiveNumberToUint32(*last_index_obj);
1517   }
1518 
1519   Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1520                                    isolate);
1521 
1522   // A lastIndex exceeding the string length always returns null (signalling
1523   // failure) in RegExpBuiltinExec, thus we can skip the call.
1524   if (last_index <= static_cast<uint32_t>(subject->length())) {
1525     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1526         isolate, match_indices_obj,
1527         RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1528   }
1529 
1530   if (match_indices_obj->IsNull(isolate)) {
1531     if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1532     return *subject;
1533   }
1534 
1535   Handle<RegExpMatchInfo> match_indices =
1536       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1537 
1538   const int index = match_indices->Capture(0);
1539   const int end_of_match = match_indices->Capture(1);
1540 
1541   if (sticky) {
1542     regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1543   }
1544 
1545   IncrementalStringBuilder builder(isolate);
1546   builder.AppendString(factory->NewSubString(subject, 0, index));
1547 
1548   // Compute the parameter list consisting of the match, captures, index,
1549   // and subject for the replace function invocation. If the RegExp contains
1550   // named captures, they are also passed as the last argument.
1551 
1552   // The number of captures plus one for the match.
1553   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1554 
1555   bool has_named_captures = false;
1556   Handle<FixedArray> capture_map;
1557   if (m > 1) {
1558     DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
1559 
1560     Object maybe_capture_map = regexp->capture_name_map();
1561     if (maybe_capture_map.IsFixedArray()) {
1562       has_named_captures = true;
1563       capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1564     }
1565   }
1566 
1567   const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1568   if (argc == static_cast<uint32_t>(-1)) {
1569     THROW_NEW_ERROR_RETURN_FAILURE(
1570         isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1571   }
1572   base::ScopedVector<Handle<Object>> argv(argc);
1573 
1574   int cursor = 0;
1575   for (int j = 0; j < m; j++) {
1576     bool ok;
1577     Handle<String> capture =
1578         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1579     if (ok) {
1580       argv[cursor++] = capture;
1581     } else {
1582       argv[cursor++] = factory->undefined_value();
1583     }
1584   }
1585 
1586   argv[cursor++] = handle(Smi::FromInt(index), isolate);
1587   argv[cursor++] = subject;
1588 
1589   if (has_named_captures) {
1590     argv[cursor++] = ConstructNamedCaptureGroupsObject(
1591         isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1592   }
1593 
1594   DCHECK_EQ(cursor, argc);
1595 
1596   Handle<Object> replacement_obj;
1597   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1598       isolate, replacement_obj,
1599       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1600                       argv.begin()));
1601 
1602   Handle<String> replacement;
1603   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1604       isolate, replacement, Object::ToString(isolate, replacement_obj));
1605 
1606   builder.AppendString(replacement);
1607   builder.AppendString(
1608       factory->NewSubString(subject, end_of_match, subject->length()));
1609 
1610   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1611 }
1612 
1613 namespace {
1614 
ToUint32(Isolate* isolate, Handle<Object> object, uint32_t* out)1615 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1616                                                    Handle<Object> object,
1617                                                    uint32_t* out) {
1618   if (object->IsUndefined(isolate)) {
1619     *out = kMaxUInt32;
1620     return object;
1621   }
1622 
1623   Handle<Object> number;
1624   ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1625                              Object);
1626   *out = NumberToUint32(*number);
1627   return object;
1628 }
1629 
NewJSArrayWithElements(Isolate* isolate, Handle<FixedArray> elems, int num_elems)1630 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1631                                        Handle<FixedArray> elems,
1632                                        int num_elems) {
1633   return isolate->factory()->NewJSArrayWithElements(
1634       FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1635 }
1636 
1637 }  // namespace
1638 
1639 // Slow path for:
1640 // ES#sec-regexp.prototype-@@replace
1641 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1642 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1643   HandleScope scope(isolate);
1644   DCHECK_EQ(3, args.length());
1645 
1646   Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1647   Handle<String> string = args.at<String>(1);
1648   Handle<Object> limit_obj = args.at(2);
1649 
1650   Factory* factory = isolate->factory();
1651 
1652   Handle<JSFunction> regexp_fun = isolate->regexp_function();
1653   Handle<Object> ctor;
1654   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655       isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1656 
1657   Handle<Object> flags_obj;
1658   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1659       isolate, flags_obj,
1660       JSObject::GetProperty(isolate, recv, factory->flags_string()));
1661 
1662   Handle<String> flags;
1663   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1664                                      Object::ToString(isolate, flags_obj));
1665 
1666   Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1667   const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1668 
1669   Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1670   const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1671 
1672   Handle<String> new_flags = flags;
1673   if (!sticky) {
1674     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1675                                        factory->NewConsString(flags, y_str));
1676   }
1677 
1678   Handle<JSReceiver> splitter;
1679   {
1680     const int argc = 2;
1681 
1682     base::ScopedVector<Handle<Object>> argv(argc);
1683     argv[0] = recv;
1684     argv[1] = new_flags;
1685 
1686     Handle<Object> splitter_obj;
1687     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1688         isolate, splitter_obj,
1689         Execution::New(isolate, ctor, argc, argv.begin()));
1690 
1691     splitter = Handle<JSReceiver>::cast(splitter_obj);
1692   }
1693 
1694   uint32_t limit;
1695   RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1696 
1697   const uint32_t length = string->length();
1698 
1699   if (limit == 0) return *factory->NewJSArray(0);
1700 
1701   if (length == 0) {
1702     Handle<Object> result;
1703     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1704         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1705                                                  factory->undefined_value()));
1706 
1707     if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1708 
1709     Handle<FixedArray> elems = factory->NewFixedArray(1);
1710     elems->set(0, *string);
1711     return *factory->NewJSArrayWithElements(elems);
1712   }
1713 
1714   static const int kInitialArraySize = 8;
1715   Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1716   uint32_t num_elems = 0;
1717 
1718   uint32_t string_index = 0;
1719   uint32_t prev_string_index = 0;
1720   while (string_index < length) {
1721     RETURN_FAILURE_ON_EXCEPTION(
1722         isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1723 
1724     Handle<Object> result;
1725     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1726         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1727                                                  factory->undefined_value()));
1728 
1729     if (result->IsNull(isolate)) {
1730       string_index = static_cast<uint32_t>(
1731           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1732       continue;
1733     }
1734 
1735     Handle<Object> last_index_obj;
1736     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1737         isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1738 
1739     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1740         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1741 
1742     const uint32_t end =
1743         std::min(PositiveNumberToUint32(*last_index_obj), length);
1744     if (end == prev_string_index) {
1745       string_index = static_cast<uint32_t>(
1746           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1747       continue;
1748     }
1749 
1750     {
1751       Handle<String> substr =
1752           factory->NewSubString(string, prev_string_index, string_index);
1753       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1754       if (num_elems == limit) {
1755         return *NewJSArrayWithElements(isolate, elems, num_elems);
1756       }
1757     }
1758 
1759     prev_string_index = end;
1760 
1761     Handle<Object> num_captures_obj;
1762     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1763         isolate, num_captures_obj,
1764         Object::GetProperty(isolate, result,
1765                             isolate->factory()->length_string()));
1766 
1767     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1768         isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1769     const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1770 
1771     for (uint32_t i = 1; i < num_captures; i++) {
1772       Handle<Object> capture;
1773       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1774           isolate, capture, Object::GetElement(isolate, result, i));
1775       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1776       if (num_elems == limit) {
1777         return *NewJSArrayWithElements(isolate, elems, num_elems);
1778       }
1779     }
1780 
1781     string_index = prev_string_index;
1782   }
1783 
1784   {
1785     Handle<String> substr =
1786         factory->NewSubString(string, prev_string_index, length);
1787     elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1788   }
1789 
1790   return *NewJSArrayWithElements(isolate, elems, num_elems);
1791 }
1792 
1793 // Slow path for:
1794 // ES#sec-regexp.prototype-@@replace
1795 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1796 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1797   HandleScope scope(isolate);
1798   DCHECK_EQ(3, args.length());
1799 
1800   Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1801   Handle<String> string = args.at<String>(1);
1802   Handle<Object> replace_obj = args.at(2);
1803 
1804   Factory* factory = isolate->factory();
1805 
1806   string = String::Flatten(isolate, string);
1807 
1808   const bool functional_replace = replace_obj->IsCallable();
1809 
1810   Handle<String> replace;
1811   if (!functional_replace) {
1812     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1813                                        Object::ToString(isolate, replace_obj));
1814   }
1815 
1816   // Fast-path for unmodified JSRegExps (and non-functional replace).
1817   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1818     // We should never get here with functional replace because unmodified
1819     // regexp and functional replace should be fully handled in CSA code.
1820     CHECK(!functional_replace);
1821     Handle<Object> result;
1822     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1823         isolate, result,
1824         RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1825     DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1826     return *result;
1827   }
1828 
1829   const uint32_t length = string->length();
1830 
1831   Handle<Object> global_obj;
1832   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1833       isolate, global_obj,
1834       JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1835   const bool global = global_obj->BooleanValue(isolate);
1836 
1837   bool unicode = false;
1838   if (global) {
1839     Handle<Object> unicode_obj;
1840     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1841         isolate, unicode_obj,
1842         JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1843     unicode = unicode_obj->BooleanValue(isolate);
1844 
1845     RETURN_FAILURE_ON_EXCEPTION(isolate,
1846                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1847   }
1848 
1849   base::SmallVector<Handle<Object>, kStaticVectorSlots> results;
1850 
1851   while (true) {
1852     Handle<Object> result;
1853     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1854         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1855                                                  factory->undefined_value()));
1856 
1857     if (result->IsNull(isolate)) break;
1858 
1859     results.emplace_back(result);
1860     if (!global) break;
1861 
1862     Handle<Object> match_obj;
1863     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1864                                        Object::GetElement(isolate, result, 0));
1865 
1866     Handle<String> match;
1867     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1868                                        Object::ToString(isolate, match_obj));
1869 
1870     if (match->length() == 0) {
1871       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1872                                                isolate, recv, string, unicode));
1873     }
1874   }
1875 
1876   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1877   IncrementalStringBuilder builder(isolate);
1878   uint32_t next_source_position = 0;
1879 
1880   for (const auto& result : results) {
1881     HandleScope handle_scope(isolate);
1882     Handle<Object> captures_length_obj;
1883     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884         isolate, captures_length_obj,
1885         Object::GetProperty(isolate, result, factory->length_string()));
1886 
1887     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1888         isolate, captures_length_obj,
1889         Object::ToLength(isolate, captures_length_obj));
1890     const uint32_t captures_length =
1891         PositiveNumberToUint32(*captures_length_obj);
1892 
1893     Handle<Object> match_obj;
1894     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1895                                        Object::GetElement(isolate, result, 0));
1896 
1897     Handle<String> match;
1898     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1899                                        Object::ToString(isolate, match_obj));
1900 
1901     const int match_length = match->length();
1902 
1903     Handle<Object> position_obj;
1904     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1905         isolate, position_obj,
1906         Object::GetProperty(isolate, result, factory->index_string()));
1907 
1908     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1909         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1910     const uint32_t position =
1911         std::min(PositiveNumberToUint32(*position_obj), length);
1912 
1913     // Do not reserve capacity since captures_length is user-controlled.
1914     base::SmallVector<Handle<Object>, kStaticVectorSlots> captures;
1915 
1916     for (uint32_t n = 0; n < captures_length; n++) {
1917       Handle<Object> capture;
1918       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1919           isolate, capture, Object::GetElement(isolate, result, n));
1920 
1921       if (!capture->IsUndefined(isolate)) {
1922         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1923                                            Object::ToString(isolate, capture));
1924       }
1925       captures.emplace_back(capture);
1926     }
1927 
1928     Handle<Object> groups_obj = isolate->factory()->undefined_value();
1929     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1930         isolate, groups_obj,
1931         Object::GetProperty(isolate, result, factory->groups_string()));
1932 
1933     const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1934 
1935     Handle<String> replacement;
1936     if (functional_replace) {
1937       const uint32_t argc =
1938           GetArgcForReplaceCallable(captures_length, has_named_captures);
1939       if (argc == static_cast<uint32_t>(-1)) {
1940         THROW_NEW_ERROR_RETURN_FAILURE(
1941             isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1942       }
1943 
1944       base::ScopedVector<Handle<Object>> argv(argc);
1945 
1946       int cursor = 0;
1947       for (uint32_t j = 0; j < captures_length; j++) {
1948         argv[cursor++] = captures[j];
1949       }
1950 
1951       argv[cursor++] = handle(Smi::FromInt(position), isolate);
1952       argv[cursor++] = string;
1953       if (has_named_captures) argv[cursor++] = groups_obj;
1954 
1955       DCHECK_EQ(cursor, argc);
1956 
1957       Handle<Object> replacement_obj;
1958       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1959           isolate, replacement_obj,
1960           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1961                           argc, argv.begin()));
1962 
1963       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1964           isolate, replacement, Object::ToString(isolate, replacement_obj));
1965     } else {
1966       DCHECK(!functional_replace);
1967       if (!groups_obj->IsUndefined(isolate)) {
1968         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1969             isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1970       }
1971       VectorBackedMatch m(isolate, string, match, position,
1972                           base::VectorOf(captures), groups_obj);
1973       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1974           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1975     }
1976 
1977     if (position >= next_source_position) {
1978       builder.AppendString(
1979           factory->NewSubString(string, next_source_position, position));
1980       builder.AppendString(replacement);
1981 
1982       next_source_position = position + match_length;
1983     }
1984   }
1985 
1986   if (next_source_position < length) {
1987     builder.AppendString(
1988         factory->NewSubString(string, next_source_position, length));
1989   }
1990 
1991   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1992 }
1993 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1994 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1995   HandleScope scope(isolate);
1996   DCHECK_EQ(3, args.length());
1997   // TODO(pwong): To follow the spec more closely and simplify calling code,
1998   // this could handle the canonicalization of pattern and flags. See
1999   // https://tc39.github.io/ecma262/#sec-regexpinitialize
2000   Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
2001   Handle<String> source = args.at<String>(1);
2002   Handle<String> flags = args.at<String>(2);
2003 
2004   RETURN_FAILURE_ON_EXCEPTION(isolate,
2005                               JSRegExp::Initialize(regexp, source, flags));
2006 
2007   return *regexp;
2008 }
2009 
RUNTIME_FUNCTION(Runtime_IsRegExp)2010 RUNTIME_FUNCTION(Runtime_IsRegExp) {
2011   SealHandleScope shs(isolate);
2012   DCHECK_EQ(1, args.length());
2013   Object obj = args[0];
2014   return isolate->heap()->ToBoolean(obj.IsJSRegExp());
2015 }
2016 
RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags)2017 RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2018   HandleScope scope(isolate);
2019   DCHECK_EQ(1, args.length());
2020   auto regexp = JSRegExp::cast(args[0]);
2021   Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags());
2022   return *flags;
2023 }
2024 
2025 }  // namespace internal
2026 }  // namespace v8
2027