1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <functional>
6
7 #include "src/base/small-vector.h"
8 #include "src/base/strings.h"
9 #include "src/common/message-template.h"
10 #include "src/execution/arguments-inl.h"
11 #include "src/execution/isolate-inl.h"
12 #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
13 #include "src/logging/counters.h"
14 #include "src/numbers/conversions-inl.h"
15 #include "src/objects/js-array-inl.h"
16 #include "src/objects/js-regexp-inl.h"
17 #include "src/regexp/regexp-utils.h"
18 #include "src/regexp/regexp.h"
19 #include "src/runtime/runtime-utils.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-search.h"
22
23 namespace v8 {
24 namespace internal {
25
26 namespace {
27
28 // Fairly arbitrary, but intended to fit:
29 //
30 // - captures
31 // - results
32 // - parsed replacement pattern parts
33 //
34 // for small, common cases.
35 constexpr int kStaticVectorSlots = 8;
36
37 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures, bool has_named_captures)38 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
39 bool has_named_captures) {
40 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
41 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
42 if (num_captures > Code::kMaxArguments) return -1;
43 uint32_t argc = has_named_captures
44 ? num_captures + kAdditionalArgsWithNamedCaptures
45 : num_captures + kAdditionalArgsWithoutNamedCaptures;
46 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
47 kAdditionalArgsWithNamedCaptures);
48 return (argc > Code::kMaxArguments) ? -1 : argc;
49 }
50
51 // Looks up the capture of the given name. Returns the (1-based) numbered
52 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool(String)>& name_matches, FixedArray capture_name_map)53 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
54 FixedArray capture_name_map) {
55 // TODO(jgruber): Sort capture_name_map and do binary search via
56 // internalized strings.
57
58 int maybe_capture_index = -1;
59 const int named_capture_count = capture_name_map.length() >> 1;
60 for (int j = 0; j < named_capture_count; j++) {
61 // The format of {capture_name_map} is documented at
62 // JSRegExp::kIrregexpCaptureNameMapIndex.
63 const int name_ix = j * 2;
64 const int index_ix = j * 2 + 1;
65
66 String capture_name = String::cast(capture_name_map.get(name_ix));
67 if (!name_matches(capture_name)) continue;
68
69 maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
70 break;
71 }
72
73 return maybe_capture_index;
74 }
75
76 } // namespace
77
78 class CompiledReplacement {
79 public:
80 // Return whether the replacement is simple.
81 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
82 Handle<String> replacement, int capture_count,
83 int subject_length);
84
85 // Use Apply only if Compile returned false.
86 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
87 int32_t* match);
88
89 // Number of distinct parts of the replacement pattern.
parts()90 int parts() { return static_cast<int>(parts_.size()); }
91
92 private:
93 enum PartType {
94 SUBJECT_PREFIX = 1,
95 SUBJECT_SUFFIX,
96 SUBJECT_CAPTURE,
97 REPLACEMENT_SUBSTRING,
98 REPLACEMENT_STRING,
99 EMPTY_REPLACEMENT,
100 NUMBER_OF_PART_TYPES
101 };
102
103 struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart104 static inline ReplacementPart SubjectMatch() {
105 return ReplacementPart(SUBJECT_CAPTURE, 0);
106 }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart107 static inline ReplacementPart SubjectCapture(int capture_index) {
108 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
109 }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart110 static inline ReplacementPart SubjectPrefix() {
111 return ReplacementPart(SUBJECT_PREFIX, 0);
112 }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart113 static inline ReplacementPart SubjectSuffix(int subject_length) {
114 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
115 }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart116 static inline ReplacementPart ReplacementString() {
117 return ReplacementPart(REPLACEMENT_STRING, 0);
118 }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart119 static inline ReplacementPart EmptyReplacement() {
120 return ReplacementPart(EMPTY_REPLACEMENT, 0);
121 }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart122 static inline ReplacementPart ReplacementSubString(int from, int to) {
123 DCHECK_LE(0, from);
124 DCHECK_GT(to, from);
125 return ReplacementPart(-from, to);
126 }
127
128 // If tag <= 0 then it is the negation of a start index of a substring of
129 // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart130 ReplacementPart(int tag, int data) : tag(tag), data(data) {
131 // Must be non-positive or a PartType value.
132 DCHECK(tag < NUMBER_OF_PART_TYPES);
133 }
134 // Either a value of PartType or a non-positive number that is
135 // the negation of an index into the replacement string.
136 int tag;
137 // The data value's interpretation depends on the value of tag:
138 // tag == SUBJECT_PREFIX ||
139 // tag == SUBJECT_SUFFIX: data is unused.
140 // tag == SUBJECT_CAPTURE: data is the number of the capture.
141 // tag == REPLACEMENT_SUBSTRING ||
142 // tag == REPLACEMENT_STRING: data is index into array of substrings
143 // of the replacement string.
144 // tag == EMPTY_REPLACEMENT: data is unused.
145 // tag <= 0: Temporary representation of the substring of the replacement
146 // string ranging over -tag .. data.
147 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
148 // substring objects.
149 int data;
150 };
151
152 template <typename Char>
153 bool ParseReplacementPattern(base::Vector<Char> characters,
154 FixedArray capture_name_map, int capture_count,
155 int subject_length) {
156 // Equivalent to String::GetSubstitution, except that this method converts
157 // the replacement string into an internal representation that avoids
158 // repeated parsing when used repeatedly.
159 int length = characters.length();
160 int last = 0;
161 for (int i = 0; i < length; i++) {
162 Char c = characters[i];
163 if (c == '$') {
164 int next_index = i + 1;
165 if (next_index == length) { // No next character!
166 break;
167 }
168 Char c2 = characters[next_index];
169 switch (c2) {
170 case '$':
171 if (i > last) {
172 // There is a substring before. Include the first "$".
173 parts_.emplace_back(
174 ReplacementPart::ReplacementSubString(last, next_index));
175 last = next_index + 1; // Continue after the second "$".
176 } else {
177 // Let the next substring start with the second "$".
178 last = next_index;
179 }
180 i = next_index;
181 break;
182 case '`':
183 if (i > last) {
184 parts_.emplace_back(
185 ReplacementPart::ReplacementSubString(last, i));
186 }
187 parts_.emplace_back(ReplacementPart::SubjectPrefix());
188 i = next_index;
189 last = i + 1;
190 break;
191 case '\'':
192 if (i > last) {
193 parts_.emplace_back(
194 ReplacementPart::ReplacementSubString(last, i));
195 }
196 parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
197 i = next_index;
198 last = i + 1;
199 break;
200 case '&':
201 if (i > last) {
202 parts_.emplace_back(
203 ReplacementPart::ReplacementSubString(last, i));
204 }
205 parts_.emplace_back(ReplacementPart::SubjectMatch());
206 i = next_index;
207 last = i + 1;
208 break;
209 case '0':
210 case '1':
211 case '2':
212 case '3':
213 case '4':
214 case '5':
215 case '6':
216 case '7':
217 case '8':
218 case '9': {
219 int capture_ref = c2 - '0';
220 if (capture_ref > capture_count) {
221 i = next_index;
222 continue;
223 }
224 int second_digit_index = next_index + 1;
225 if (second_digit_index < length) {
226 // Peek ahead to see if we have two digits.
227 Char c3 = characters[second_digit_index];
228 if ('0' <= c3 && c3 <= '9') { // Double digits.
229 int double_digit_ref = capture_ref * 10 + c3 - '0';
230 if (double_digit_ref <= capture_count) {
231 next_index = second_digit_index;
232 capture_ref = double_digit_ref;
233 }
234 }
235 }
236 if (capture_ref > 0) {
237 if (i > last) {
238 parts_.emplace_back(
239 ReplacementPart::ReplacementSubString(last, i));
240 }
241 DCHECK(capture_ref <= capture_count);
242 parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
243 last = next_index + 1;
244 }
245 i = next_index;
246 break;
247 }
248 case '<': {
249 if (capture_name_map.is_null()) {
250 i = next_index;
251 break;
252 }
253
254 // Scan until the next '>', and let the enclosed substring be the
255 // groupName.
256
257 const int name_start_index = next_index + 1;
258 int closing_bracket_index = -1;
259 for (int j = name_start_index; j < length; j++) {
260 if (characters[j] == '>') {
261 closing_bracket_index = j;
262 break;
263 }
264 }
265
266 // If no closing bracket is found, '$<' is treated as a string
267 // literal.
268 if (closing_bracket_index == -1) {
269 i = next_index;
270 break;
271 }
272
273 base::Vector<Char> requested_name =
274 characters.SubVector(name_start_index, closing_bracket_index);
275
276 // Let capture be ? Get(namedCaptures, groupName).
277
278 const int capture_index = LookupNamedCapture(
279 [=](String capture_name) {
280 return capture_name.IsEqualTo(requested_name);
281 },
282 capture_name_map);
283
284 // If capture is undefined or does not exist, replace the text
285 // through the following '>' with the empty string.
286 // Otherwise, replace the text through the following '>' with
287 // ? ToString(capture).
288
289 DCHECK(capture_index == -1 ||
290 (1 <= capture_index && capture_index <= capture_count));
291
292 if (i > last) {
293 parts_.emplace_back(
294 ReplacementPart::ReplacementSubString(last, i));
295 }
296 parts_.emplace_back(
297 (capture_index == -1)
298 ? ReplacementPart::EmptyReplacement()
299 : ReplacementPart::SubjectCapture(capture_index));
300 last = closing_bracket_index + 1;
301 i = closing_bracket_index;
302 break;
303 }
304 default:
305 i = next_index;
306 break;
307 }
308 }
309 }
310 if (length > last) {
311 if (last == 0) {
312 // Replacement is simple. Do not use Apply to do the replacement.
313 return true;
314 } else {
315 parts_.emplace_back(
316 ReplacementPart::ReplacementSubString(last, length));
317 }
318 }
319 return false;
320 }
321
322 base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_;
323 base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_;
324 };
325
Compile(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> replacement, int capture_count, int subject_length)326 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
327 Handle<String> replacement, int capture_count,
328 int subject_length) {
329 {
330 DisallowGarbageCollection no_gc;
331 String::FlatContent content = replacement->GetFlatContent(no_gc);
332 DCHECK(content.IsFlat());
333
334 FixedArray capture_name_map;
335 if (capture_count > 0) {
336 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
337 Object maybe_capture_name_map = regexp->capture_name_map();
338 if (maybe_capture_name_map.IsFixedArray()) {
339 capture_name_map = FixedArray::cast(maybe_capture_name_map);
340 }
341 }
342
343 bool simple;
344 if (content.IsOneByte()) {
345 simple =
346 ParseReplacementPattern(content.ToOneByteVector(), capture_name_map,
347 capture_count, subject_length);
348 } else {
349 DCHECK(content.IsTwoByte());
350 simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map,
351 capture_count, subject_length);
352 }
353 if (simple) return true;
354 }
355
356 // Find substrings of replacement string and create them as String objects.
357 int substring_index = 0;
358 for (ReplacementPart& part : parts_) {
359 int tag = part.tag;
360 if (tag <= 0) { // A replacement string slice.
361 int from = -tag;
362 int to = part.data;
363 replacement_substrings_.emplace_back(
364 isolate->factory()->NewSubString(replacement, from, to));
365 part.tag = REPLACEMENT_SUBSTRING;
366 part.data = substring_index;
367 substring_index++;
368 } else if (tag == REPLACEMENT_STRING) {
369 replacement_substrings_.emplace_back(replacement);
370 part.data = substring_index;
371 substring_index++;
372 }
373 }
374 return false;
375 }
376
377
Apply(ReplacementStringBuilder* builder, int match_from, int match_to, int32_t* match)378 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
379 int match_from, int match_to, int32_t* match) {
380 DCHECK_LT(0, parts_.size());
381 for (ReplacementPart& part : parts_) {
382 switch (part.tag) {
383 case SUBJECT_PREFIX:
384 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
385 break;
386 case SUBJECT_SUFFIX: {
387 int subject_length = part.data;
388 if (match_to < subject_length) {
389 builder->AddSubjectSlice(match_to, subject_length);
390 }
391 break;
392 }
393 case SUBJECT_CAPTURE: {
394 int capture = part.data;
395 int from = match[capture * 2];
396 int to = match[capture * 2 + 1];
397 if (from >= 0 && to > from) {
398 builder->AddSubjectSlice(from, to);
399 }
400 break;
401 }
402 case REPLACEMENT_SUBSTRING:
403 case REPLACEMENT_STRING:
404 builder->AddString(replacement_substrings_[part.data]);
405 break;
406 case EMPTY_REPLACEMENT:
407 break;
408 default:
409 UNREACHABLE();
410 }
411 }
412 }
413
FindOneByteStringIndices(base::Vector<const uint8_t> subject, uint8_t pattern, std::vector<int>* indices, unsigned int limit)414 void FindOneByteStringIndices(base::Vector<const uint8_t> subject,
415 uint8_t pattern, std::vector<int>* indices,
416 unsigned int limit) {
417 DCHECK_LT(0, limit);
418 // Collect indices of pattern in subject using memchr.
419 // Stop after finding at most limit values.
420 const uint8_t* subject_start = subject.begin();
421 const uint8_t* subject_end = subject_start + subject.length();
422 const uint8_t* pos = subject_start;
423 while (limit > 0) {
424 pos = reinterpret_cast<const uint8_t*>(
425 memchr(pos, pattern, subject_end - pos));
426 if (pos == nullptr) return;
427 indices->push_back(static_cast<int>(pos - subject_start));
428 pos++;
429 limit--;
430 }
431 }
432
FindTwoByteStringIndices(const base::Vector<const base::uc16> subject, base::uc16 pattern, std::vector<int>* indices, unsigned int limit)433 void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,
434 base::uc16 pattern, std::vector<int>* indices,
435 unsigned int limit) {
436 DCHECK_LT(0, limit);
437 const base::uc16* subject_start = subject.begin();
438 const base::uc16* subject_end = subject_start + subject.length();
439 for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
440 pos++) {
441 if (*pos == pattern) {
442 indices->push_back(static_cast<int>(pos - subject_start));
443 limit--;
444 }
445 }
446 }
447
448 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate* isolate, base::Vector<const SubjectChar> subject, base::Vector<const PatternChar> pattern, std::vector<int>* indices, unsigned int limit)449 void FindStringIndices(Isolate* isolate,
450 base::Vector<const SubjectChar> subject,
451 base::Vector<const PatternChar> pattern,
452 std::vector<int>* indices, unsigned int limit) {
453 DCHECK_LT(0, limit);
454 // Collect indices of pattern in subject.
455 // Stop after finding at most limit values.
456 int pattern_length = pattern.length();
457 int index = 0;
458 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
459 while (limit > 0) {
460 index = search.Search(subject, index);
461 if (index < 0) return;
462 indices->push_back(index);
463 index += pattern_length;
464 limit--;
465 }
466 }
467
FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern, std::vector<int>* indices, unsigned int limit)468 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
469 std::vector<int>* indices, unsigned int limit) {
470 {
471 DisallowGarbageCollection no_gc;
472 String::FlatContent subject_content = subject.GetFlatContent(no_gc);
473 String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
474 DCHECK(subject_content.IsFlat());
475 DCHECK(pattern_content.IsFlat());
476 if (subject_content.IsOneByte()) {
477 base::Vector<const uint8_t> subject_vector =
478 subject_content.ToOneByteVector();
479 if (pattern_content.IsOneByte()) {
480 base::Vector<const uint8_t> pattern_vector =
481 pattern_content.ToOneByteVector();
482 if (pattern_vector.length() == 1) {
483 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
484 limit);
485 } else {
486 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 limit);
488 }
489 } else {
490 FindStringIndices(isolate, subject_vector,
491 pattern_content.ToUC16Vector(), indices, limit);
492 }
493 } else {
494 base::Vector<const base::uc16> subject_vector =
495 subject_content.ToUC16Vector();
496 if (pattern_content.IsOneByte()) {
497 base::Vector<const uint8_t> pattern_vector =
498 pattern_content.ToOneByteVector();
499 if (pattern_vector.length() == 1) {
500 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
501 limit);
502 } else {
503 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
504 limit);
505 }
506 } else {
507 base::Vector<const base::uc16> pattern_vector =
508 pattern_content.ToUC16Vector();
509 if (pattern_vector.length() == 1) {
510 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
511 limit);
512 } else {
513 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
514 limit);
515 }
516 }
517 }
518 }
519 }
520
521 namespace {
GetRewoundRegexpIndicesList(Isolate* isolate)522 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
523 std::vector<int>* list = isolate->regexp_indices();
524 list->clear();
525 return list;
526 }
527
TruncateRegexpIndicesList(Isolate* isolate)528 void TruncateRegexpIndicesList(Isolate* isolate) {
529 // Same size as smallest zone segment, preserving behavior from the
530 // runtime zone.
531 // TODO(jgruber): Consider removing the reusable regexp_indices list and
532 // simply allocating a new list each time. It feels like we're needlessly
533 // optimizing an edge case.
534 static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
535 std::vector<int>* indices = isolate->regexp_indices();
536 if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
537 // Throw away backing storage.
538 indices->clear();
539 indices->shrink_to_fit();
540 }
541 }
542 } // namespace
543
544 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info)545 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
546 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
547 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
548 DCHECK(subject->IsFlat());
549 DCHECK(replacement->IsFlat());
550
551 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
552
553 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag());
554 String pattern = pattern_regexp->atom_pattern();
555 int subject_len = subject->length();
556 int pattern_len = pattern.length();
557 int replacement_len = replacement->length();
558
559 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
560
561 if (indices->empty()) return *subject;
562
563 // Detect integer overflow.
564 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
565 static_cast<int64_t>(pattern_len)) *
566 static_cast<int64_t>(indices->size()) +
567 static_cast<int64_t>(subject_len);
568 int result_len;
569 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
570 STATIC_ASSERT(String::kMaxLength < kMaxInt);
571 result_len = kMaxInt; // Provoke exception.
572 } else {
573 result_len = static_cast<int>(result_len_64);
574 }
575 if (result_len == 0) {
576 return ReadOnlyRoots(isolate).empty_string();
577 }
578
579 int subject_pos = 0;
580 int result_pos = 0;
581
582 MaybeHandle<SeqString> maybe_res;
583 if (ResultSeqString::kHasOneByteEncoding) {
584 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
585 } else {
586 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
587 }
588 Handle<SeqString> untyped_res;
589 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
590 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
591
592 DisallowGarbageCollection no_gc;
593 for (int index : *indices) {
594 // Copy non-matched subject content.
595 if (subject_pos < index) {
596 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
597 subject_pos, index - subject_pos);
598 result_pos += index - subject_pos;
599 }
600
601 // Replace match.
602 if (replacement_len > 0) {
603 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
604 replacement_len);
605 result_pos += replacement_len;
606 }
607
608 subject_pos = index + pattern_len;
609 }
610 // Add remaining subject content at the end.
611 if (subject_pos < subject_len) {
612 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
613 subject_pos, subject_len - subject_pos);
614 }
615
616 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
617 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
618
619 TruncateRegexpIndicesList(isolate);
620
621 return *result;
622 }
623
StringReplaceGlobalRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info)624 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
625 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
626 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
627 DCHECK(subject->IsFlat());
628 DCHECK(replacement->IsFlat());
629
630 int capture_count = regexp->capture_count();
631 int subject_length = subject->length();
632
633 // Ensure the RegExp is compiled so we can access the capture-name map.
634 if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
635 return ReadOnlyRoots(isolate).exception();
636 }
637
638 CompiledReplacement compiled_replacement;
639 const bool simple_replace = compiled_replacement.Compile(
640 isolate, regexp, replacement, capture_count, subject_length);
641
642 // Shortcut for simple non-regexp global replacements
643 if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) {
644 if (subject->IsOneByteRepresentation() &&
645 replacement->IsOneByteRepresentation()) {
646 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
647 isolate, subject, regexp, replacement, last_match_info);
648 } else {
649 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
650 isolate, subject, regexp, replacement, last_match_info);
651 }
652 }
653
654 RegExpGlobalCache global_cache(regexp, subject, isolate);
655 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
656
657 int32_t* current_match = global_cache.FetchNext();
658 if (current_match == nullptr) {
659 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
660 return *subject;
661 }
662
663 // Guessing the number of parts that the final result string is built
664 // from. Global regexps can match any number of times, so we guess
665 // conservatively.
666 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
667 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
668
669 int prev = 0;
670
671 do {
672 int start = current_match[0];
673 int end = current_match[1];
674
675 if (prev < start) {
676 builder.AddSubjectSlice(prev, start);
677 }
678
679 if (simple_replace) {
680 builder.AddString(replacement);
681 } else {
682 compiled_replacement.Apply(&builder, start, end, current_match);
683 }
684 prev = end;
685
686 current_match = global_cache.FetchNext();
687 } while (current_match != nullptr);
688
689 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
690
691 if (prev < subject_length) {
692 builder.AddSubjectSlice(prev, subject_length);
693 }
694
695 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
696 global_cache.LastSuccessfulMatch());
697
698 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
699 }
700
701 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_info)702 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
703 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
704 Handle<RegExpMatchInfo> last_match_info) {
705 DCHECK(subject->IsFlat());
706
707 // Shortcut for simple non-regexp global replacements
708 if (regexp->type_tag() == JSRegExp::ATOM) {
709 Handle<String> empty_string = isolate->factory()->empty_string();
710 if (subject->IsOneByteRepresentation()) {
711 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
712 isolate, subject, regexp, empty_string, last_match_info);
713 } else {
714 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
715 isolate, subject, regexp, empty_string, last_match_info);
716 }
717 }
718
719 RegExpGlobalCache global_cache(regexp, subject, isolate);
720 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
721
722 int32_t* current_match = global_cache.FetchNext();
723 if (current_match == nullptr) {
724 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
725 return *subject;
726 }
727
728 int start = current_match[0];
729 int end = current_match[1];
730 int capture_count = regexp->capture_count();
731 int subject_length = subject->length();
732
733 int new_length = subject_length - (end - start);
734 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
735
736 Handle<ResultSeqString> answer;
737 if (ResultSeqString::kHasOneByteEncoding) {
738 answer = Handle<ResultSeqString>::cast(
739 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
740 } else {
741 answer = Handle<ResultSeqString>::cast(
742 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
743 }
744
745 int prev = 0;
746 int position = 0;
747
748 DisallowGarbageCollection no_gc;
749 do {
750 start = current_match[0];
751 end = current_match[1];
752 if (prev < start) {
753 // Add substring subject[prev;start] to answer string.
754 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
755 start - prev);
756 position += start - prev;
757 }
758 prev = end;
759
760 current_match = global_cache.FetchNext();
761 } while (current_match != nullptr);
762
763 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
764
765 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
766 global_cache.LastSuccessfulMatch());
767
768 if (prev < subject_length) {
769 // Add substring subject[prev;length] to answer string.
770 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
771 subject_length - prev);
772 position += subject_length - prev;
773 }
774
775 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
776
777 // Shorten string and fill
778 int string_size = ResultSeqString::SizeFor(position);
779 int allocated_string_size = ResultSeqString::SizeFor(new_length);
780 int delta = allocated_string_size - string_size;
781
782 answer->set_length(position);
783 if (delta == 0) return *answer;
784
785 Address end_of_string = answer->address() + string_size;
786 Heap* heap = isolate->heap();
787
788 // The trimming is performed on a newly allocated object, which is on a
789 // freshly allocated page or on an already swept page. Hence, the sweeper
790 // thread can not get confused with the filler creation. No synchronization
791 // needed.
792 // TODO(hpayer): We should shrink the large object page if the size
793 // of the object changed significantly.
794 if (!heap->IsLargeObject(*answer)) {
795 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
796 }
797 return *answer;
798 }
799
RUNTIME_FUNCTION(Runtime_StringSplit)800 RUNTIME_FUNCTION(Runtime_StringSplit) {
801 HandleScope handle_scope(isolate);
802 DCHECK_EQ(3, args.length());
803 Handle<String> subject = args.at<String>(0);
804 Handle<String> pattern = args.at<String>(1);
805 uint32_t limit = NumberToUint32(args[2]);
806 CHECK_LT(0, limit);
807
808 int subject_length = subject->length();
809 int pattern_length = pattern->length();
810 CHECK_LT(0, pattern_length);
811
812 if (limit == 0xFFFFFFFFu) {
813 FixedArray last_match_cache_unused;
814 Handle<Object> cached_answer(
815 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
816 &last_match_cache_unused,
817 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
818 isolate);
819 if (*cached_answer != Smi::zero()) {
820 // The cache FixedArray is a COW-array and can therefore be reused.
821 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
822 Handle<FixedArray>::cast(cached_answer));
823 return *result;
824 }
825 }
826
827 // The limit can be very large (0xFFFFFFFFu), but since the pattern
828 // isn't empty, we can never create more parts than ~half the length
829 // of the subject.
830
831 subject = String::Flatten(isolate, subject);
832 pattern = String::Flatten(isolate, pattern);
833
834 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
835
836 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
837
838 if (static_cast<uint32_t>(indices->size()) < limit) {
839 indices->push_back(subject_length);
840 }
841
842 // The list indices now contains the end of each part to create.
843
844 // Create JSArray of substrings separated by separator.
845 int part_count = static_cast<int>(indices->size());
846
847 Handle<JSArray> result =
848 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
849 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
850
851 DCHECK(result->HasObjectElements());
852
853 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
854
855 if (part_count == 1 && indices->at(0) == subject_length) {
856 elements->set(0, *subject);
857 } else {
858 int part_start = 0;
859 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
860 int part_end = indices->at(i);
861 Handle<String> substring =
862 isolate->factory()->NewProperSubString(subject, part_start, part_end);
863 elements->set(i, *substring);
864 part_start = part_end + pattern_length;
865 });
866 }
867
868 if (limit == 0xFFFFFFFFu) {
869 if (result->HasObjectElements()) {
870 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
871 isolate->factory()->empty_fixed_array(),
872 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
873 }
874 }
875
876 TruncateRegexpIndicesList(isolate);
877
878 return *result;
879 }
880
881 namespace {
882
RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)883 MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp,
884 Handle<String> subject, int32_t index,
885 Handle<RegExpMatchInfo> last_match_info,
886 RegExp::ExecQuirks exec_quirks) {
887 // Due to the way the JS calls are constructed this must be less than the
888 // length of a string, i.e. it is always a Smi. We check anyway for security.
889 CHECK_LE(0, index);
890 CHECK_GE(subject->length(), index);
891 isolate->counters()->regexp_entry_runtime()->Increment();
892 return RegExp::Exec(isolate, regexp, subject, index, last_match_info,
893 exec_quirks);
894 }
895
ExperimentalOneshotExec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks)896 MaybeHandle<Object> ExperimentalOneshotExec(
897 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
898 int32_t index, Handle<RegExpMatchInfo> last_match_info,
899 RegExp::ExecQuirks exec_quirks) {
900 // Due to the way the JS calls are constructed this must be less than the
901 // length of a string, i.e. it is always a Smi. We check anyway for security.
902 CHECK_LE(0, index);
903 CHECK_GE(subject->length(), index);
904 isolate->counters()->regexp_entry_runtime()->Increment();
905 return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
906 last_match_info, exec_quirks);
907 }
908
909 } // namespace
910
RUNTIME_FUNCTION(Runtime_RegExpExec)911 RUNTIME_FUNCTION(Runtime_RegExpExec) {
912 HandleScope scope(isolate);
913 DCHECK_EQ(4, args.length());
914 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
915 Handle<String> subject = args.at<String>(1);
916 int32_t index = 0;
917 CHECK(args[2].ToInt32(&index));
918 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
919 RETURN_RESULT_OR_FAILURE(
920 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
921 RegExp::ExecQuirks::kNone));
922 }
923
RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure)924 RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) {
925 HandleScope scope(isolate);
926 DCHECK_EQ(4, args.length());
927 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
928 Handle<String> subject = args.at<String>(1);
929 int32_t index = 0;
930 CHECK(args[2].ToInt32(&index));
931 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
932 RETURN_RESULT_OR_FAILURE(
933 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
934 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
935 }
936
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec)937 RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
938 HandleScope scope(isolate);
939 DCHECK_EQ(4, args.length());
940 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
941 Handle<String> subject = args.at<String>(1);
942 int32_t index = 0;
943 CHECK(args[2].ToInt32(&index));
944 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
945 RETURN_RESULT_OR_FAILURE(
946 isolate,
947 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
948 RegExp::ExecQuirks::kNone));
949 }
950
RUNTIME_FUNCTION( Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure)951 RUNTIME_FUNCTION(
952 Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) {
953 HandleScope scope(isolate);
954 DCHECK_EQ(4, args.length());
955 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
956 Handle<String> subject = args.at<String>(1);
957 int32_t index = 0;
958 CHECK(args[2].ToInt32(&index));
959 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(3);
960 RETURN_RESULT_OR_FAILURE(
961 isolate,
962 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
963 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
964 }
965
RUNTIME_FUNCTION(Runtime_RegExpBuildIndices)966 RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
967 HandleScope scope(isolate);
968 DCHECK_EQ(3, args.length());
969 Handle<RegExpMatchInfo> match_info = args.at<RegExpMatchInfo>(1);
970 Handle<Object> maybe_names = args.at(2);
971 #ifdef DEBUG
972 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
973 DCHECK(regexp->flags() & JSRegExp::kHasIndices);
974 #endif
975
976 return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
977 }
978
979 namespace {
980
981 class MatchInfoBackedMatch : public String::Match {
982 public:
MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, Handle<RegExpMatchInfo> match_info)983 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
984 Handle<String> subject,
985 Handle<RegExpMatchInfo> match_info)
986 : isolate_(isolate), match_info_(match_info) {
987 subject_ = String::Flatten(isolate, subject);
988
989 if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) {
990 Object o = regexp->capture_name_map();
991 has_named_captures_ = o.IsFixedArray();
992 if (has_named_captures_) {
993 capture_name_map_ = handle(FixedArray::cast(o), isolate);
994 }
995 } else {
996 has_named_captures_ = false;
997 }
998 }
999
1000 Handle<String> GetMatch() override {
1001 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
1002 }
1003
1004 Handle<String> GetPrefix() override {
1005 const int match_start = match_info_->Capture(0);
1006 return isolate_->factory()->NewSubString(subject_, 0, match_start);
1007 }
1008
1009 Handle<String> GetSuffix() override {
1010 const int match_end = match_info_->Capture(1);
1011 return isolate_->factory()->NewSubString(subject_, match_end,
1012 subject_->length());
1013 }
1014
1015 bool HasNamedCaptures() override { return has_named_captures_; }
1016
1017 int CaptureCount() override {
1018 return match_info_->NumberOfCaptureRegisters() / 2;
1019 }
1020
1021 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1022 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1023 isolate_, match_info_, i, capture_exists);
1024 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1025 : isolate_->factory()->empty_string();
1026 }
1027
1028 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1029 CaptureState* state) override {
1030 DCHECK(has_named_captures_);
1031 const int capture_index = LookupNamedCapture(
1032 [=](String capture_name) { return capture_name.Equals(*name); },
1033 *capture_name_map_);
1034
1035 if (capture_index == -1) {
1036 *state = UNMATCHED;
1037 return isolate_->factory()->empty_string();
1038 }
1039
1040 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1041
1042 bool capture_exists;
1043 Handle<String> capture_value;
1044 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1045 GetCapture(capture_index, &capture_exists),
1046 String);
1047
1048 if (!capture_exists) {
1049 *state = UNMATCHED;
1050 return isolate_->factory()->empty_string();
1051 } else {
1052 *state = MATCHED;
1053 return capture_value;
1054 }
1055 }
1056
1057 private:
1058 Isolate* isolate_;
1059 Handle<String> subject_;
1060 Handle<RegExpMatchInfo> match_info_;
1061
1062 bool has_named_captures_;
1063 Handle<FixedArray> capture_name_map_;
1064 };
1065
1066 class VectorBackedMatch : public String::Match {
1067 public:
VectorBackedMatch(Isolate* isolate, Handle<String> subject, Handle<String> match, int match_position, base::Vector<Handle<Object>> captures, Handle<Object> groups_obj)1068 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1069 Handle<String> match, int match_position,
1070 base::Vector<Handle<Object>> captures,
1071 Handle<Object> groups_obj)
1072 : isolate_(isolate),
1073 match_(match),
1074 match_position_(match_position),
1075 captures_(captures) {
1076 subject_ = String::Flatten(isolate, subject);
1077
1078 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1079 has_named_captures_ = !groups_obj->IsUndefined(isolate);
1080 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1081 }
1082
1083 Handle<String> GetMatch() override { return match_; }
1084
1085 Handle<String> GetPrefix() override {
1086 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1087 }
1088
1089 Handle<String> GetSuffix() override {
1090 const int match_end_position = match_position_ + match_->length();
1091 return isolate_->factory()->NewSubString(subject_, match_end_position,
1092 subject_->length());
1093 }
1094
1095 bool HasNamedCaptures() override { return has_named_captures_; }
1096
1097 int CaptureCount() override { return captures_.length(); }
1098
1099 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1100 Handle<Object> capture_obj = captures_[i];
1101 if (capture_obj->IsUndefined(isolate_)) {
1102 *capture_exists = false;
1103 return isolate_->factory()->empty_string();
1104 }
1105 *capture_exists = true;
1106 return Object::ToString(isolate_, capture_obj);
1107 }
1108
1109 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1110 CaptureState* state) override {
1111 DCHECK(has_named_captures_);
1112
1113 Handle<Object> capture_obj;
1114 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1115 Object::GetProperty(isolate_, groups_obj_, name),
1116 String);
1117 if (capture_obj->IsUndefined(isolate_)) {
1118 *state = UNMATCHED;
1119 return isolate_->factory()->empty_string();
1120 } else {
1121 *state = MATCHED;
1122 return Object::ToString(isolate_, capture_obj);
1123 }
1124 }
1125
1126 private:
1127 Isolate* isolate_;
1128 Handle<String> subject_;
1129 Handle<String> match_;
1130 const int match_position_;
1131 base::Vector<Handle<Object>> captures_;
1132
1133 bool has_named_captures_;
1134 Handle<JSReceiver> groups_obj_;
1135 };
1136
1137 // Create the groups object (see also the RegExp result creation in
1138 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject( Isolate* isolate, Handle<FixedArray> capture_map, const std::function<Object(int)>& f_get_capture)1139 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1140 Isolate* isolate, Handle<FixedArray> capture_map,
1141 const std::function<Object(int)>& f_get_capture) {
1142 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1143
1144 const int named_capture_count = capture_map->length() >> 1;
1145 for (int i = 0; i < named_capture_count; i++) {
1146 const int name_ix = i * 2;
1147 const int index_ix = i * 2 + 1;
1148
1149 Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1150 isolate);
1151 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1152 DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1.
1153
1154 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1155 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1156
1157 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1158 }
1159
1160 return groups;
1161 }
1162
1163 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1164 // separate last match info. See comment on that function.
1165 template <bool has_capture>
SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_array, Handle<JSArray> result_array)1166 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1167 Handle<JSRegExp> regexp,
1168 Handle<RegExpMatchInfo> last_match_array,
1169 Handle<JSArray> result_array) {
1170 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1171 DCHECK_NE(has_capture, regexp->capture_count() == 0);
1172 DCHECK(subject->IsFlat());
1173
1174 // Force tier up to native code for global replaces. The global replace is
1175 // implemented differently for native code and bytecode execution, where the
1176 // native code expects an array to store all the matches, and the bytecode
1177 // matches one at a time, so it's easier to tier-up to native code from the
1178 // start.
1179 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1180 regexp->MarkTierUpForNextExec();
1181 if (FLAG_trace_regexp_tier_up) {
1182 PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1183 reinterpret_cast<void*>(regexp->ptr()));
1184 }
1185 }
1186
1187 int capture_count = regexp->capture_count();
1188 int subject_length = subject->length();
1189
1190 static const int kMinLengthToCache = 0x1000;
1191
1192 if (subject_length > kMinLengthToCache) {
1193 FixedArray last_match_cache;
1194 Object cached_answer = RegExpResultsCache::Lookup(
1195 isolate->heap(), *subject, regexp->data(), &last_match_cache,
1196 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1197 if (cached_answer.IsFixedArray()) {
1198 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1199 int32_t* last_match = NewArray<int32_t>(capture_registers);
1200 for (int i = 0; i < capture_registers; i++) {
1201 last_match[i] = Smi::ToInt(last_match_cache.get(i));
1202 }
1203 Handle<FixedArray> cached_fixed_array =
1204 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1205 // The cache FixedArray is a COW-array and we need to return a copy.
1206 Handle<FixedArray> copied_fixed_array =
1207 isolate->factory()->CopyFixedArrayWithMap(
1208 cached_fixed_array, isolate->factory()->fixed_array_map());
1209 JSArray::SetContent(result_array, copied_fixed_array);
1210 RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1211 capture_count, last_match);
1212 DeleteArray(last_match);
1213 return *result_array;
1214 }
1215 }
1216
1217 RegExpGlobalCache global_cache(regexp, subject, isolate);
1218 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1219
1220 // Ensured in Runtime_RegExpExecMultiple.
1221 DCHECK(result_array->HasObjectElements());
1222 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1223 isolate);
1224 if (result_elements->length() < 16) {
1225 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1226 }
1227
1228 FixedArrayBuilder builder(result_elements);
1229
1230 // Position to search from.
1231 int match_start = -1;
1232 int match_end = 0;
1233 bool first = true;
1234
1235 // Two smis before and after the match, for very long strings.
1236 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1237
1238 while (true) {
1239 int32_t* current_match = global_cache.FetchNext();
1240 if (current_match == nullptr) break;
1241 match_start = current_match[0];
1242 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1243 if (match_end < match_start) {
1244 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1245 match_start);
1246 }
1247 match_end = current_match[1];
1248 {
1249 // Avoid accumulating new handles inside loop.
1250 HandleScope temp_scope(isolate);
1251 Handle<String> match;
1252 if (!first) {
1253 match = isolate->factory()->NewProperSubString(subject, match_start,
1254 match_end);
1255 } else {
1256 match =
1257 isolate->factory()->NewSubString(subject, match_start, match_end);
1258 first = false;
1259 }
1260
1261 if (has_capture) {
1262 // Arguments array to replace function is match, captures, index and
1263 // subject, i.e., 3 + capture count in total. If the RegExp contains
1264 // named captures, they are also passed as the last argument.
1265
1266 Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate);
1267 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1268
1269 const int argc =
1270 has_named_captures ? 4 + capture_count : 3 + capture_count;
1271
1272 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1273 int cursor = 0;
1274
1275 elements->set(cursor++, *match);
1276 for (int i = 1; i <= capture_count; i++) {
1277 int start = current_match[i * 2];
1278 if (start >= 0) {
1279 int end = current_match[i * 2 + 1];
1280 DCHECK(start <= end);
1281 Handle<String> substring =
1282 isolate->factory()->NewSubString(subject, start, end);
1283 elements->set(cursor++, *substring);
1284 } else {
1285 DCHECK_GT(0, current_match[i * 2 + 1]);
1286 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1287 }
1288 }
1289
1290 elements->set(cursor++, Smi::FromInt(match_start));
1291 elements->set(cursor++, *subject);
1292
1293 if (has_named_captures) {
1294 Handle<FixedArray> capture_map =
1295 Handle<FixedArray>::cast(maybe_capture_map);
1296 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1297 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1298 elements->set(cursor++, *groups);
1299 }
1300
1301 DCHECK_EQ(cursor, argc);
1302 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1303 } else {
1304 builder.Add(*match);
1305 }
1306 }
1307 }
1308
1309 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1310
1311 if (match_start >= 0) {
1312 // Finished matching, with at least one match.
1313 if (match_end < subject_length) {
1314 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1315 subject_length);
1316 }
1317
1318 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1319 global_cache.LastSuccessfulMatch());
1320
1321 if (subject_length > kMinLengthToCache) {
1322 // Store the last successful match into the array for caching.
1323 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1324 Handle<FixedArray> last_match_cache =
1325 isolate->factory()->NewFixedArray(capture_registers);
1326 int32_t* last_match = global_cache.LastSuccessfulMatch();
1327 for (int i = 0; i < capture_registers; i++) {
1328 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1329 }
1330 Handle<FixedArray> result_fixed_array =
1331 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1332 // Cache the result and copy the FixedArray into a COW array.
1333 Handle<FixedArray> copied_fixed_array =
1334 isolate->factory()->CopyFixedArrayWithMap(
1335 result_fixed_array, isolate->factory()->fixed_array_map());
1336 RegExpResultsCache::Enter(
1337 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1338 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1339 }
1340 return *builder.ToJSArray(result_array);
1341 } else {
1342 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1343 }
1344 }
1345
1346 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1347 // doesn't properly call the underlying exec method.
RegExpReplace( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string, Handle<String> replace)1348 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1349 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1350 Handle<String> replace) {
1351 // Functional fast-paths are dispatched directly by replace builtin.
1352 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1353
1354 Factory* factory = isolate->factory();
1355
1356 const int flags = regexp->flags();
1357 const bool global = (flags & JSRegExp::kGlobal) != 0;
1358 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1359
1360 replace = String::Flatten(isolate, replace);
1361
1362 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1363
1364 if (!global) {
1365 // Non-global regexp search, string replace.
1366
1367 uint32_t last_index = 0;
1368 if (sticky) {
1369 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1370 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1371 Object::ToLength(isolate, last_index_obj),
1372 String);
1373 last_index = PositiveNumberToUint32(*last_index_obj);
1374 }
1375
1376 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1377 isolate);
1378
1379 // A lastIndex exceeding the string length always returns null (signalling
1380 // failure) in RegExpBuiltinExec, thus we can skip the call.
1381 if (last_index <= static_cast<uint32_t>(string->length())) {
1382 ASSIGN_RETURN_ON_EXCEPTION(
1383 isolate, match_indices_obj,
1384 RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1385 String);
1386 }
1387
1388 if (match_indices_obj->IsNull(isolate)) {
1389 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1390 return string;
1391 }
1392
1393 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1394
1395 const int start_index = match_indices->Capture(0);
1396 const int end_index = match_indices->Capture(1);
1397
1398 if (sticky) {
1399 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1400 }
1401
1402 IncrementalStringBuilder builder(isolate);
1403 builder.AppendString(factory->NewSubString(string, 0, start_index));
1404
1405 if (replace->length() > 0) {
1406 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1407 Handle<String> replacement;
1408 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1409 String::GetSubstitution(isolate, &m, replace),
1410 String);
1411 builder.AppendString(replacement);
1412 }
1413
1414 builder.AppendString(
1415 factory->NewSubString(string, end_index, string->length()));
1416 return builder.Finish();
1417 } else {
1418 // Global regexp search, string replace.
1419 DCHECK(global);
1420 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1421 String);
1422
1423 // Force tier up to native code for global replaces. The global replace is
1424 // implemented differently for native code and bytecode execution, where the
1425 // native code expects an array to store all the matches, and the bytecode
1426 // matches one at a time, so it's easier to tier-up to native code from the
1427 // start.
1428 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1429 regexp->MarkTierUpForNextExec();
1430 if (FLAG_trace_regexp_tier_up) {
1431 PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1432 reinterpret_cast<void*>(regexp->ptr()));
1433 }
1434 }
1435
1436 if (replace->length() == 0) {
1437 if (string->IsOneByteRepresentation()) {
1438 Object result =
1439 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1440 isolate, string, regexp, last_match_info);
1441 return handle(String::cast(result), isolate);
1442 } else {
1443 Object result =
1444 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1445 isolate, string, regexp, last_match_info);
1446 return handle(String::cast(result), isolate);
1447 }
1448 }
1449
1450 Object result = StringReplaceGlobalRegExpWithString(
1451 isolate, string, regexp, replace, last_match_info);
1452 if (result.IsString()) {
1453 return handle(String::cast(result), isolate);
1454 } else {
1455 return MaybeHandle<String>();
1456 }
1457 }
1458
1459 UNREACHABLE();
1460 }
1461
1462 } // namespace
1463
1464 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1465 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1466 HandleScope handles(isolate);
1467 DCHECK_EQ(4, args.length());
1468
1469 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
1470 Handle<String> subject = args.at<String>(1);
1471 Handle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(2);
1472 Handle<JSArray> result_array = args.at<JSArray>(3);
1473
1474 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1475 CHECK(result_array->HasObjectElements());
1476
1477 subject = String::Flatten(isolate, subject);
1478 CHECK(regexp->flags() & JSRegExp::kGlobal);
1479
1480 Object result;
1481 if (regexp->capture_count() == 0) {
1482 result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1483 last_match_info, result_array);
1484 } else {
1485 result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1486 last_match_info, result_array);
1487 }
1488 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1489 return result;
1490 }
1491
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1492 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1493 HandleScope scope(isolate);
1494 DCHECK_EQ(3, args.length());
1495 Handle<String> subject = args.at<String>(0);
1496 Handle<JSRegExp> regexp = args.at<JSRegExp>(1);
1497 Handle<JSReceiver> replace_obj = args.at<JSReceiver>(2);
1498
1499 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1500 DCHECK(replace_obj->map().is_callable());
1501
1502 Factory* factory = isolate->factory();
1503 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1504
1505 const int flags = regexp->flags();
1506 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1507
1508 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1509
1510 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1511 uint32_t last_index = 0;
1512 if (sticky) {
1513 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1514 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1515 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1516 last_index = PositiveNumberToUint32(*last_index_obj);
1517 }
1518
1519 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1520 isolate);
1521
1522 // A lastIndex exceeding the string length always returns null (signalling
1523 // failure) in RegExpBuiltinExec, thus we can skip the call.
1524 if (last_index <= static_cast<uint32_t>(subject->length())) {
1525 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1526 isolate, match_indices_obj,
1527 RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1528 }
1529
1530 if (match_indices_obj->IsNull(isolate)) {
1531 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1532 return *subject;
1533 }
1534
1535 Handle<RegExpMatchInfo> match_indices =
1536 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1537
1538 const int index = match_indices->Capture(0);
1539 const int end_of_match = match_indices->Capture(1);
1540
1541 if (sticky) {
1542 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1543 }
1544
1545 IncrementalStringBuilder builder(isolate);
1546 builder.AppendString(factory->NewSubString(subject, 0, index));
1547
1548 // Compute the parameter list consisting of the match, captures, index,
1549 // and subject for the replace function invocation. If the RegExp contains
1550 // named captures, they are also passed as the last argument.
1551
1552 // The number of captures plus one for the match.
1553 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1554
1555 bool has_named_captures = false;
1556 Handle<FixedArray> capture_map;
1557 if (m > 1) {
1558 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
1559
1560 Object maybe_capture_map = regexp->capture_name_map();
1561 if (maybe_capture_map.IsFixedArray()) {
1562 has_named_captures = true;
1563 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1564 }
1565 }
1566
1567 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1568 if (argc == static_cast<uint32_t>(-1)) {
1569 THROW_NEW_ERROR_RETURN_FAILURE(
1570 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1571 }
1572 base::ScopedVector<Handle<Object>> argv(argc);
1573
1574 int cursor = 0;
1575 for (int j = 0; j < m; j++) {
1576 bool ok;
1577 Handle<String> capture =
1578 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1579 if (ok) {
1580 argv[cursor++] = capture;
1581 } else {
1582 argv[cursor++] = factory->undefined_value();
1583 }
1584 }
1585
1586 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1587 argv[cursor++] = subject;
1588
1589 if (has_named_captures) {
1590 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1591 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1592 }
1593
1594 DCHECK_EQ(cursor, argc);
1595
1596 Handle<Object> replacement_obj;
1597 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1598 isolate, replacement_obj,
1599 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1600 argv.begin()));
1601
1602 Handle<String> replacement;
1603 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1604 isolate, replacement, Object::ToString(isolate, replacement_obj));
1605
1606 builder.AppendString(replacement);
1607 builder.AppendString(
1608 factory->NewSubString(subject, end_of_match, subject->length()));
1609
1610 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1611 }
1612
1613 namespace {
1614
ToUint32(Isolate* isolate, Handle<Object> object, uint32_t* out)1615 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1616 Handle<Object> object,
1617 uint32_t* out) {
1618 if (object->IsUndefined(isolate)) {
1619 *out = kMaxUInt32;
1620 return object;
1621 }
1622
1623 Handle<Object> number;
1624 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1625 Object);
1626 *out = NumberToUint32(*number);
1627 return object;
1628 }
1629
NewJSArrayWithElements(Isolate* isolate, Handle<FixedArray> elems, int num_elems)1630 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1631 Handle<FixedArray> elems,
1632 int num_elems) {
1633 return isolate->factory()->NewJSArrayWithElements(
1634 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1635 }
1636
1637 } // namespace
1638
1639 // Slow path for:
1640 // ES#sec-regexp.prototype-@@replace
1641 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1642 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1643 HandleScope scope(isolate);
1644 DCHECK_EQ(3, args.length());
1645
1646 Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1647 Handle<String> string = args.at<String>(1);
1648 Handle<Object> limit_obj = args.at(2);
1649
1650 Factory* factory = isolate->factory();
1651
1652 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1653 Handle<Object> ctor;
1654 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1656
1657 Handle<Object> flags_obj;
1658 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1659 isolate, flags_obj,
1660 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1661
1662 Handle<String> flags;
1663 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1664 Object::ToString(isolate, flags_obj));
1665
1666 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1667 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1668
1669 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1670 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1671
1672 Handle<String> new_flags = flags;
1673 if (!sticky) {
1674 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1675 factory->NewConsString(flags, y_str));
1676 }
1677
1678 Handle<JSReceiver> splitter;
1679 {
1680 const int argc = 2;
1681
1682 base::ScopedVector<Handle<Object>> argv(argc);
1683 argv[0] = recv;
1684 argv[1] = new_flags;
1685
1686 Handle<Object> splitter_obj;
1687 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1688 isolate, splitter_obj,
1689 Execution::New(isolate, ctor, argc, argv.begin()));
1690
1691 splitter = Handle<JSReceiver>::cast(splitter_obj);
1692 }
1693
1694 uint32_t limit;
1695 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1696
1697 const uint32_t length = string->length();
1698
1699 if (limit == 0) return *factory->NewJSArray(0);
1700
1701 if (length == 0) {
1702 Handle<Object> result;
1703 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1704 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1705 factory->undefined_value()));
1706
1707 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1708
1709 Handle<FixedArray> elems = factory->NewFixedArray(1);
1710 elems->set(0, *string);
1711 return *factory->NewJSArrayWithElements(elems);
1712 }
1713
1714 static const int kInitialArraySize = 8;
1715 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1716 uint32_t num_elems = 0;
1717
1718 uint32_t string_index = 0;
1719 uint32_t prev_string_index = 0;
1720 while (string_index < length) {
1721 RETURN_FAILURE_ON_EXCEPTION(
1722 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1723
1724 Handle<Object> result;
1725 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1726 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1727 factory->undefined_value()));
1728
1729 if (result->IsNull(isolate)) {
1730 string_index = static_cast<uint32_t>(
1731 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1732 continue;
1733 }
1734
1735 Handle<Object> last_index_obj;
1736 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1737 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1738
1739 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1740 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1741
1742 const uint32_t end =
1743 std::min(PositiveNumberToUint32(*last_index_obj), length);
1744 if (end == prev_string_index) {
1745 string_index = static_cast<uint32_t>(
1746 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1747 continue;
1748 }
1749
1750 {
1751 Handle<String> substr =
1752 factory->NewSubString(string, prev_string_index, string_index);
1753 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1754 if (num_elems == limit) {
1755 return *NewJSArrayWithElements(isolate, elems, num_elems);
1756 }
1757 }
1758
1759 prev_string_index = end;
1760
1761 Handle<Object> num_captures_obj;
1762 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1763 isolate, num_captures_obj,
1764 Object::GetProperty(isolate, result,
1765 isolate->factory()->length_string()));
1766
1767 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1768 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1769 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1770
1771 for (uint32_t i = 1; i < num_captures; i++) {
1772 Handle<Object> capture;
1773 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1774 isolate, capture, Object::GetElement(isolate, result, i));
1775 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1776 if (num_elems == limit) {
1777 return *NewJSArrayWithElements(isolate, elems, num_elems);
1778 }
1779 }
1780
1781 string_index = prev_string_index;
1782 }
1783
1784 {
1785 Handle<String> substr =
1786 factory->NewSubString(string, prev_string_index, length);
1787 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1788 }
1789
1790 return *NewJSArrayWithElements(isolate, elems, num_elems);
1791 }
1792
1793 // Slow path for:
1794 // ES#sec-regexp.prototype-@@replace
1795 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1796 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1797 HandleScope scope(isolate);
1798 DCHECK_EQ(3, args.length());
1799
1800 Handle<JSReceiver> recv = args.at<JSReceiver>(0);
1801 Handle<String> string = args.at<String>(1);
1802 Handle<Object> replace_obj = args.at(2);
1803
1804 Factory* factory = isolate->factory();
1805
1806 string = String::Flatten(isolate, string);
1807
1808 const bool functional_replace = replace_obj->IsCallable();
1809
1810 Handle<String> replace;
1811 if (!functional_replace) {
1812 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1813 Object::ToString(isolate, replace_obj));
1814 }
1815
1816 // Fast-path for unmodified JSRegExps (and non-functional replace).
1817 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1818 // We should never get here with functional replace because unmodified
1819 // regexp and functional replace should be fully handled in CSA code.
1820 CHECK(!functional_replace);
1821 Handle<Object> result;
1822 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1823 isolate, result,
1824 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1825 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1826 return *result;
1827 }
1828
1829 const uint32_t length = string->length();
1830
1831 Handle<Object> global_obj;
1832 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1833 isolate, global_obj,
1834 JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1835 const bool global = global_obj->BooleanValue(isolate);
1836
1837 bool unicode = false;
1838 if (global) {
1839 Handle<Object> unicode_obj;
1840 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1841 isolate, unicode_obj,
1842 JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1843 unicode = unicode_obj->BooleanValue(isolate);
1844
1845 RETURN_FAILURE_ON_EXCEPTION(isolate,
1846 RegExpUtils::SetLastIndex(isolate, recv, 0));
1847 }
1848
1849 base::SmallVector<Handle<Object>, kStaticVectorSlots> results;
1850
1851 while (true) {
1852 Handle<Object> result;
1853 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1854 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1855 factory->undefined_value()));
1856
1857 if (result->IsNull(isolate)) break;
1858
1859 results.emplace_back(result);
1860 if (!global) break;
1861
1862 Handle<Object> match_obj;
1863 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1864 Object::GetElement(isolate, result, 0));
1865
1866 Handle<String> match;
1867 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1868 Object::ToString(isolate, match_obj));
1869
1870 if (match->length() == 0) {
1871 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1872 isolate, recv, string, unicode));
1873 }
1874 }
1875
1876 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1877 IncrementalStringBuilder builder(isolate);
1878 uint32_t next_source_position = 0;
1879
1880 for (const auto& result : results) {
1881 HandleScope handle_scope(isolate);
1882 Handle<Object> captures_length_obj;
1883 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884 isolate, captures_length_obj,
1885 Object::GetProperty(isolate, result, factory->length_string()));
1886
1887 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1888 isolate, captures_length_obj,
1889 Object::ToLength(isolate, captures_length_obj));
1890 const uint32_t captures_length =
1891 PositiveNumberToUint32(*captures_length_obj);
1892
1893 Handle<Object> match_obj;
1894 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1895 Object::GetElement(isolate, result, 0));
1896
1897 Handle<String> match;
1898 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1899 Object::ToString(isolate, match_obj));
1900
1901 const int match_length = match->length();
1902
1903 Handle<Object> position_obj;
1904 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1905 isolate, position_obj,
1906 Object::GetProperty(isolate, result, factory->index_string()));
1907
1908 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1909 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1910 const uint32_t position =
1911 std::min(PositiveNumberToUint32(*position_obj), length);
1912
1913 // Do not reserve capacity since captures_length is user-controlled.
1914 base::SmallVector<Handle<Object>, kStaticVectorSlots> captures;
1915
1916 for (uint32_t n = 0; n < captures_length; n++) {
1917 Handle<Object> capture;
1918 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1919 isolate, capture, Object::GetElement(isolate, result, n));
1920
1921 if (!capture->IsUndefined(isolate)) {
1922 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1923 Object::ToString(isolate, capture));
1924 }
1925 captures.emplace_back(capture);
1926 }
1927
1928 Handle<Object> groups_obj = isolate->factory()->undefined_value();
1929 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1930 isolate, groups_obj,
1931 Object::GetProperty(isolate, result, factory->groups_string()));
1932
1933 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1934
1935 Handle<String> replacement;
1936 if (functional_replace) {
1937 const uint32_t argc =
1938 GetArgcForReplaceCallable(captures_length, has_named_captures);
1939 if (argc == static_cast<uint32_t>(-1)) {
1940 THROW_NEW_ERROR_RETURN_FAILURE(
1941 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1942 }
1943
1944 base::ScopedVector<Handle<Object>> argv(argc);
1945
1946 int cursor = 0;
1947 for (uint32_t j = 0; j < captures_length; j++) {
1948 argv[cursor++] = captures[j];
1949 }
1950
1951 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1952 argv[cursor++] = string;
1953 if (has_named_captures) argv[cursor++] = groups_obj;
1954
1955 DCHECK_EQ(cursor, argc);
1956
1957 Handle<Object> replacement_obj;
1958 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1959 isolate, replacement_obj,
1960 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1961 argc, argv.begin()));
1962
1963 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1964 isolate, replacement, Object::ToString(isolate, replacement_obj));
1965 } else {
1966 DCHECK(!functional_replace);
1967 if (!groups_obj->IsUndefined(isolate)) {
1968 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1969 isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1970 }
1971 VectorBackedMatch m(isolate, string, match, position,
1972 base::VectorOf(captures), groups_obj);
1973 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1974 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1975 }
1976
1977 if (position >= next_source_position) {
1978 builder.AppendString(
1979 factory->NewSubString(string, next_source_position, position));
1980 builder.AppendString(replacement);
1981
1982 next_source_position = position + match_length;
1983 }
1984 }
1985
1986 if (next_source_position < length) {
1987 builder.AppendString(
1988 factory->NewSubString(string, next_source_position, length));
1989 }
1990
1991 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1992 }
1993
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1994 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1995 HandleScope scope(isolate);
1996 DCHECK_EQ(3, args.length());
1997 // TODO(pwong): To follow the spec more closely and simplify calling code,
1998 // this could handle the canonicalization of pattern and flags. See
1999 // https://tc39.github.io/ecma262/#sec-regexpinitialize
2000 Handle<JSRegExp> regexp = args.at<JSRegExp>(0);
2001 Handle<String> source = args.at<String>(1);
2002 Handle<String> flags = args.at<String>(2);
2003
2004 RETURN_FAILURE_ON_EXCEPTION(isolate,
2005 JSRegExp::Initialize(regexp, source, flags));
2006
2007 return *regexp;
2008 }
2009
RUNTIME_FUNCTION(Runtime_IsRegExp)2010 RUNTIME_FUNCTION(Runtime_IsRegExp) {
2011 SealHandleScope shs(isolate);
2012 DCHECK_EQ(1, args.length());
2013 Object obj = args[0];
2014 return isolate->heap()->ToBoolean(obj.IsJSRegExp());
2015 }
2016
RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags)2017 RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2018 HandleScope scope(isolate);
2019 DCHECK_EQ(1, args.length());
2020 auto regexp = JSRegExp::cast(args[0]);
2021 Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags());
2022 return *flags;
2023 }
2024
2025 } // namespace internal
2026 } // namespace v8
2027