11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/objects/js-regexp.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/base/strings.h" 81cb0ef41Sopenharmony_ci#include "src/common/globals.h" 91cb0ef41Sopenharmony_ci#include "src/objects/code.h" 101cb0ef41Sopenharmony_ci#include "src/objects/js-array-inl.h" 111cb0ef41Sopenharmony_ci#include "src/objects/js-regexp-inl.h" 121cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h" 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_cinamespace v8 { 151cb0ef41Sopenharmony_cinamespace internal { 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ciHandle<JSRegExpResultIndices> JSRegExpResultIndices::BuildIndices( 181cb0ef41Sopenharmony_ci Isolate* isolate, Handle<RegExpMatchInfo> match_info, 191cb0ef41Sopenharmony_ci Handle<Object> maybe_names) { 201cb0ef41Sopenharmony_ci Handle<JSRegExpResultIndices> indices(Handle<JSRegExpResultIndices>::cast( 211cb0ef41Sopenharmony_ci isolate->factory()->NewJSObjectFromMap( 221cb0ef41Sopenharmony_ci isolate->regexp_result_indices_map()))); 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ci // Initialize indices length to avoid having a partially initialized object 251cb0ef41Sopenharmony_ci // should GC be triggered by creating a NewFixedArray. 261cb0ef41Sopenharmony_ci indices->set_length(Smi::zero()); 271cb0ef41Sopenharmony_ci 281cb0ef41Sopenharmony_ci // Build indices array from RegExpMatchInfo. 291cb0ef41Sopenharmony_ci int num_indices = match_info->NumberOfCaptureRegisters(); 301cb0ef41Sopenharmony_ci int num_results = num_indices >> 1; 311cb0ef41Sopenharmony_ci Handle<FixedArray> indices_array = 321cb0ef41Sopenharmony_ci isolate->factory()->NewFixedArray(num_results); 331cb0ef41Sopenharmony_ci JSArray::SetContent(indices, indices_array); 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_ci for (int i = 0; i < num_results; i++) { 361cb0ef41Sopenharmony_ci int base_offset = i * 2; 371cb0ef41Sopenharmony_ci int start_offset = match_info->Capture(base_offset); 381cb0ef41Sopenharmony_ci int end_offset = match_info->Capture(base_offset + 1); 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ci // Any unmatched captures are set to undefined, otherwise we set them to a 411cb0ef41Sopenharmony_ci // subarray of the indices. 421cb0ef41Sopenharmony_ci if (start_offset == -1) { 431cb0ef41Sopenharmony_ci indices_array->set(i, ReadOnlyRoots(isolate).undefined_value()); 441cb0ef41Sopenharmony_ci } else { 451cb0ef41Sopenharmony_ci Handle<FixedArray> indices_sub_array( 461cb0ef41Sopenharmony_ci isolate->factory()->NewFixedArray(2)); 471cb0ef41Sopenharmony_ci indices_sub_array->set(0, Smi::FromInt(start_offset)); 481cb0ef41Sopenharmony_ci indices_sub_array->set(1, Smi::FromInt(end_offset)); 491cb0ef41Sopenharmony_ci Handle<JSArray> indices_sub_jsarray = 501cb0ef41Sopenharmony_ci isolate->factory()->NewJSArrayWithElements(indices_sub_array, 511cb0ef41Sopenharmony_ci PACKED_SMI_ELEMENTS, 2); 521cb0ef41Sopenharmony_ci indices_array->set(i, *indices_sub_jsarray); 531cb0ef41Sopenharmony_ci } 541cb0ef41Sopenharmony_ci } 551cb0ef41Sopenharmony_ci 561cb0ef41Sopenharmony_ci // If there are no capture groups, set the groups property to undefined. 571cb0ef41Sopenharmony_ci FieldIndex groups_index = FieldIndex::ForDescriptor( 581cb0ef41Sopenharmony_ci indices->map(), InternalIndex(kGroupsDescriptorIndex)); 591cb0ef41Sopenharmony_ci if (maybe_names->IsUndefined(isolate)) { 601cb0ef41Sopenharmony_ci indices->FastPropertyAtPut(groups_index, 611cb0ef41Sopenharmony_ci ReadOnlyRoots(isolate).undefined_value()); 621cb0ef41Sopenharmony_ci return indices; 631cb0ef41Sopenharmony_ci } 641cb0ef41Sopenharmony_ci 651cb0ef41Sopenharmony_ci // Create a groups property which returns a dictionary of named captures to 661cb0ef41Sopenharmony_ci // their corresponding capture indices. 671cb0ef41Sopenharmony_ci Handle<FixedArray> names(Handle<FixedArray>::cast(maybe_names)); 681cb0ef41Sopenharmony_ci int num_names = names->length() >> 1; 691cb0ef41Sopenharmony_ci Handle<HeapObject> group_names; 701cb0ef41Sopenharmony_ci if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) { 711cb0ef41Sopenharmony_ci group_names = isolate->factory()->NewSwissNameDictionary(num_names); 721cb0ef41Sopenharmony_ci } else { 731cb0ef41Sopenharmony_ci group_names = isolate->factory()->NewNameDictionary(num_names); 741cb0ef41Sopenharmony_ci } 751cb0ef41Sopenharmony_ci for (int i = 0; i < num_names; i++) { 761cb0ef41Sopenharmony_ci int base_offset = i * 2; 771cb0ef41Sopenharmony_ci int name_offset = base_offset; 781cb0ef41Sopenharmony_ci int index_offset = base_offset + 1; 791cb0ef41Sopenharmony_ci Handle<String> name(String::cast(names->get(name_offset)), isolate); 801cb0ef41Sopenharmony_ci Handle<Smi> smi_index(Smi::cast(names->get(index_offset)), isolate); 811cb0ef41Sopenharmony_ci Handle<Object> capture_indices(indices_array->get(smi_index->value()), 821cb0ef41Sopenharmony_ci isolate); 831cb0ef41Sopenharmony_ci if (!capture_indices->IsUndefined(isolate)) { 841cb0ef41Sopenharmony_ci capture_indices = Handle<JSArray>::cast(capture_indices); 851cb0ef41Sopenharmony_ci } 861cb0ef41Sopenharmony_ci if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) { 871cb0ef41Sopenharmony_ci group_names = SwissNameDictionary::Add( 881cb0ef41Sopenharmony_ci isolate, Handle<SwissNameDictionary>::cast(group_names), name, 891cb0ef41Sopenharmony_ci capture_indices, PropertyDetails::Empty()); 901cb0ef41Sopenharmony_ci } else { 911cb0ef41Sopenharmony_ci group_names = NameDictionary::Add( 921cb0ef41Sopenharmony_ci isolate, Handle<NameDictionary>::cast(group_names), name, 931cb0ef41Sopenharmony_ci capture_indices, PropertyDetails::Empty()); 941cb0ef41Sopenharmony_ci } 951cb0ef41Sopenharmony_ci } 961cb0ef41Sopenharmony_ci 971cb0ef41Sopenharmony_ci // Convert group_names to a JSObject and store at the groups property of the 981cb0ef41Sopenharmony_ci // result indices. 991cb0ef41Sopenharmony_ci Handle<FixedArrayBase> elements = isolate->factory()->empty_fixed_array(); 1001cb0ef41Sopenharmony_ci Handle<HeapObject> null = 1011cb0ef41Sopenharmony_ci Handle<HeapObject>::cast(isolate->factory()->null_value()); 1021cb0ef41Sopenharmony_ci Handle<JSObject> js_group_names = 1031cb0ef41Sopenharmony_ci isolate->factory()->NewSlowJSObjectWithPropertiesAndElements( 1041cb0ef41Sopenharmony_ci null, group_names, elements); 1051cb0ef41Sopenharmony_ci indices->FastPropertyAtPut(groups_index, *js_group_names); 1061cb0ef41Sopenharmony_ci return indices; 1071cb0ef41Sopenharmony_ci} 1081cb0ef41Sopenharmony_ci 1091cb0ef41Sopenharmony_ciuint32_t JSRegExp::backtrack_limit() const { 1101cb0ef41Sopenharmony_ci CHECK_EQ(type_tag(), IRREGEXP); 1111cb0ef41Sopenharmony_ci return static_cast<uint32_t>(Smi::ToInt(DataAt(kIrregexpBacktrackLimit))); 1121cb0ef41Sopenharmony_ci} 1131cb0ef41Sopenharmony_ci 1141cb0ef41Sopenharmony_ci// static 1151cb0ef41Sopenharmony_cibase::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString( 1161cb0ef41Sopenharmony_ci Isolate* isolate, Handle<String> flags) { 1171cb0ef41Sopenharmony_ci const int length = flags->length(); 1181cb0ef41Sopenharmony_ci 1191cb0ef41Sopenharmony_ci // A longer flags string cannot be valid. 1201cb0ef41Sopenharmony_ci if (length > JSRegExp::kFlagCount) return {}; 1211cb0ef41Sopenharmony_ci 1221cb0ef41Sopenharmony_ci RegExpFlags value; 1231cb0ef41Sopenharmony_ci FlatStringReader reader(isolate, String::Flatten(isolate, flags)); 1241cb0ef41Sopenharmony_ci 1251cb0ef41Sopenharmony_ci for (int i = 0; i < length; i++) { 1261cb0ef41Sopenharmony_ci base::Optional<RegExpFlag> flag = JSRegExp::FlagFromChar(reader.Get(i)); 1271cb0ef41Sopenharmony_ci if (!flag.has_value()) return {}; 1281cb0ef41Sopenharmony_ci if (value & flag.value()) return {}; // Duplicate. 1291cb0ef41Sopenharmony_ci value |= flag.value(); 1301cb0ef41Sopenharmony_ci } 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_ci return JSRegExp::AsJSRegExpFlags(value); 1331cb0ef41Sopenharmony_ci} 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_ci// static 1361cb0ef41Sopenharmony_ciHandle<String> JSRegExp::StringFromFlags(Isolate* isolate, 1371cb0ef41Sopenharmony_ci JSRegExp::Flags flags) { 1381cb0ef41Sopenharmony_ci static constexpr int kStringTerminator = 1; 1391cb0ef41Sopenharmony_ci int cursor = 0; 1401cb0ef41Sopenharmony_ci char buffer[kFlagCount + kStringTerminator]; 1411cb0ef41Sopenharmony_ci#define V(Lower, Camel, LowerCamel, Char, Bit) \ 1421cb0ef41Sopenharmony_ci if (flags & JSRegExp::k##Camel) buffer[cursor++] = Char; 1431cb0ef41Sopenharmony_ci REGEXP_FLAG_LIST(V) 1441cb0ef41Sopenharmony_ci#undef V 1451cb0ef41Sopenharmony_ci buffer[cursor++] = '\0'; 1461cb0ef41Sopenharmony_ci DCHECK_LE(cursor, kFlagCount + kStringTerminator); 1471cb0ef41Sopenharmony_ci return isolate->factory()->NewStringFromAsciiChecked(buffer); 1481cb0ef41Sopenharmony_ci} 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_ci// static 1511cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::New(Isolate* isolate, Handle<String> pattern, 1521cb0ef41Sopenharmony_ci Flags flags, uint32_t backtrack_limit) { 1531cb0ef41Sopenharmony_ci Handle<JSFunction> constructor = isolate->regexp_function(); 1541cb0ef41Sopenharmony_ci Handle<JSRegExp> regexp = 1551cb0ef41Sopenharmony_ci Handle<JSRegExp>::cast(isolate->factory()->NewJSObject(constructor)); 1561cb0ef41Sopenharmony_ci 1571cb0ef41Sopenharmony_ci return JSRegExp::Initialize(regexp, pattern, flags, backtrack_limit); 1581cb0ef41Sopenharmony_ci} 1591cb0ef41Sopenharmony_ci 1601cb0ef41Sopenharmony_ciObject JSRegExp::code(bool is_latin1) const { 1611cb0ef41Sopenharmony_ci DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP); 1621cb0ef41Sopenharmony_ci Object value = DataAt(code_index(is_latin1)); 1631cb0ef41Sopenharmony_ci DCHECK_IMPLIES(V8_EXTERNAL_CODE_SPACE_BOOL, value.IsSmi() || value.IsCodeT()); 1641cb0ef41Sopenharmony_ci return value; 1651cb0ef41Sopenharmony_ci} 1661cb0ef41Sopenharmony_ci 1671cb0ef41Sopenharmony_civoid JSRegExp::set_code(bool is_latin1, Handle<Code> code) { 1681cb0ef41Sopenharmony_ci SetDataAt(code_index(is_latin1), ToCodeT(*code)); 1691cb0ef41Sopenharmony_ci} 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ciObject JSRegExp::bytecode(bool is_latin1) const { 1721cb0ef41Sopenharmony_ci DCHECK(type_tag() == JSRegExp::IRREGEXP || 1731cb0ef41Sopenharmony_ci type_tag() == JSRegExp::EXPERIMENTAL); 1741cb0ef41Sopenharmony_ci return DataAt(bytecode_index(is_latin1)); 1751cb0ef41Sopenharmony_ci} 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_civoid JSRegExp::set_bytecode_and_trampoline(Isolate* isolate, 1781cb0ef41Sopenharmony_ci Handle<ByteArray> bytecode) { 1791cb0ef41Sopenharmony_ci SetDataAt(kIrregexpLatin1BytecodeIndex, *bytecode); 1801cb0ef41Sopenharmony_ci SetDataAt(kIrregexpUC16BytecodeIndex, *bytecode); 1811cb0ef41Sopenharmony_ci 1821cb0ef41Sopenharmony_ci Handle<CodeT> trampoline = 1831cb0ef41Sopenharmony_ci BUILTIN_CODE(isolate, RegExpExperimentalTrampoline); 1841cb0ef41Sopenharmony_ci SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline); 1851cb0ef41Sopenharmony_ci SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline); 1861cb0ef41Sopenharmony_ci} 1871cb0ef41Sopenharmony_ci 1881cb0ef41Sopenharmony_cibool JSRegExp::ShouldProduceBytecode() { 1891cb0ef41Sopenharmony_ci return FLAG_regexp_interpret_all || 1901cb0ef41Sopenharmony_ci (FLAG_regexp_tier_up && !MarkedForTierUp()); 1911cb0ef41Sopenharmony_ci} 1921cb0ef41Sopenharmony_ci 1931cb0ef41Sopenharmony_ci// Only irregexps are subject to tier-up. 1941cb0ef41Sopenharmony_cibool JSRegExp::CanTierUp() { 1951cb0ef41Sopenharmony_ci return FLAG_regexp_tier_up && type_tag() == JSRegExp::IRREGEXP; 1961cb0ef41Sopenharmony_ci} 1971cb0ef41Sopenharmony_ci 1981cb0ef41Sopenharmony_ci// An irregexp is considered to be marked for tier up if the tier-up ticks 1991cb0ef41Sopenharmony_ci// value reaches zero. 2001cb0ef41Sopenharmony_cibool JSRegExp::MarkedForTierUp() { 2011cb0ef41Sopenharmony_ci DCHECK(data().IsFixedArray()); 2021cb0ef41Sopenharmony_ci 2031cb0ef41Sopenharmony_ci if (!CanTierUp()) { 2041cb0ef41Sopenharmony_ci return false; 2051cb0ef41Sopenharmony_ci } 2061cb0ef41Sopenharmony_ci 2071cb0ef41Sopenharmony_ci return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0; 2081cb0ef41Sopenharmony_ci} 2091cb0ef41Sopenharmony_ci 2101cb0ef41Sopenharmony_civoid JSRegExp::ResetLastTierUpTick() { 2111cb0ef41Sopenharmony_ci DCHECK(FLAG_regexp_tier_up); 2121cb0ef41Sopenharmony_ci DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP); 2131cb0ef41Sopenharmony_ci int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) + 1; 2141cb0ef41Sopenharmony_ci FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex, 2151cb0ef41Sopenharmony_ci Smi::FromInt(tier_up_ticks)); 2161cb0ef41Sopenharmony_ci} 2171cb0ef41Sopenharmony_ci 2181cb0ef41Sopenharmony_civoid JSRegExp::TierUpTick() { 2191cb0ef41Sopenharmony_ci DCHECK(FLAG_regexp_tier_up); 2201cb0ef41Sopenharmony_ci DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP); 2211cb0ef41Sopenharmony_ci int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)); 2221cb0ef41Sopenharmony_ci if (tier_up_ticks == 0) { 2231cb0ef41Sopenharmony_ci return; 2241cb0ef41Sopenharmony_ci } 2251cb0ef41Sopenharmony_ci FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex, 2261cb0ef41Sopenharmony_ci Smi::FromInt(tier_up_ticks - 1)); 2271cb0ef41Sopenharmony_ci} 2281cb0ef41Sopenharmony_ci 2291cb0ef41Sopenharmony_civoid JSRegExp::MarkTierUpForNextExec() { 2301cb0ef41Sopenharmony_ci DCHECK(FLAG_regexp_tier_up); 2311cb0ef41Sopenharmony_ci DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP); 2321cb0ef41Sopenharmony_ci FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex, 2331cb0ef41Sopenharmony_ci Smi::zero()); 2341cb0ef41Sopenharmony_ci} 2351cb0ef41Sopenharmony_ci 2361cb0ef41Sopenharmony_ci// static 2371cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp, 2381cb0ef41Sopenharmony_ci Handle<String> source, 2391cb0ef41Sopenharmony_ci Handle<String> flags_string) { 2401cb0ef41Sopenharmony_ci Isolate* isolate = regexp->GetIsolate(); 2411cb0ef41Sopenharmony_ci base::Optional<Flags> flags = 2421cb0ef41Sopenharmony_ci JSRegExp::FlagsFromString(isolate, flags_string); 2431cb0ef41Sopenharmony_ci if (!flags.has_value()) { 2441cb0ef41Sopenharmony_ci THROW_NEW_ERROR( 2451cb0ef41Sopenharmony_ci isolate, 2461cb0ef41Sopenharmony_ci NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string), 2471cb0ef41Sopenharmony_ci JSRegExp); 2481cb0ef41Sopenharmony_ci } 2491cb0ef41Sopenharmony_ci return Initialize(regexp, source, flags.value()); 2501cb0ef41Sopenharmony_ci} 2511cb0ef41Sopenharmony_ci 2521cb0ef41Sopenharmony_cinamespace { 2531cb0ef41Sopenharmony_ci 2541cb0ef41Sopenharmony_cibool IsLineTerminator(int c) { 2551cb0ef41Sopenharmony_ci // Expected to return true for '\n', '\r', 0x2028, and 0x2029. 2561cb0ef41Sopenharmony_ci return unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c)); 2571cb0ef41Sopenharmony_ci} 2581cb0ef41Sopenharmony_ci 2591cb0ef41Sopenharmony_ci// TODO(jgruber): Consider merging CountAdditionalEscapeChars and 2601cb0ef41Sopenharmony_ci// WriteEscapedRegExpSource into a single function to deduplicate dispatch logic 2611cb0ef41Sopenharmony_ci// and move related code closer to each other. 2621cb0ef41Sopenharmony_citemplate <typename Char> 2631cb0ef41Sopenharmony_ciint CountAdditionalEscapeChars(Handle<String> source, bool* needs_escapes_out) { 2641cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 2651cb0ef41Sopenharmony_ci int escapes = 0; 2661cb0ef41Sopenharmony_ci bool needs_escapes = false; 2671cb0ef41Sopenharmony_ci bool in_char_class = false; 2681cb0ef41Sopenharmony_ci base::Vector<const Char> src = source->GetCharVector<Char>(no_gc); 2691cb0ef41Sopenharmony_ci for (int i = 0; i < src.length(); i++) { 2701cb0ef41Sopenharmony_ci const Char c = src[i]; 2711cb0ef41Sopenharmony_ci if (c == '\\') { 2721cb0ef41Sopenharmony_ci if (i + 1 < src.length() && IsLineTerminator(src[i + 1])) { 2731cb0ef41Sopenharmony_ci // This '\' is ignored since the next character itself will be escaped. 2741cb0ef41Sopenharmony_ci escapes--; 2751cb0ef41Sopenharmony_ci } else { 2761cb0ef41Sopenharmony_ci // Escape. Skip next character, which will be copied verbatim; 2771cb0ef41Sopenharmony_ci i++; 2781cb0ef41Sopenharmony_ci } 2791cb0ef41Sopenharmony_ci } else if (c == '/' && !in_char_class) { 2801cb0ef41Sopenharmony_ci // Not escaped forward-slash needs escape. 2811cb0ef41Sopenharmony_ci needs_escapes = true; 2821cb0ef41Sopenharmony_ci escapes++; 2831cb0ef41Sopenharmony_ci } else if (c == '[') { 2841cb0ef41Sopenharmony_ci in_char_class = true; 2851cb0ef41Sopenharmony_ci } else if (c == ']') { 2861cb0ef41Sopenharmony_ci in_char_class = false; 2871cb0ef41Sopenharmony_ci } else if (c == '\n') { 2881cb0ef41Sopenharmony_ci needs_escapes = true; 2891cb0ef41Sopenharmony_ci escapes++; 2901cb0ef41Sopenharmony_ci } else if (c == '\r') { 2911cb0ef41Sopenharmony_ci needs_escapes = true; 2921cb0ef41Sopenharmony_ci escapes++; 2931cb0ef41Sopenharmony_ci } else if (static_cast<int>(c) == 0x2028) { 2941cb0ef41Sopenharmony_ci needs_escapes = true; 2951cb0ef41Sopenharmony_ci escapes += std::strlen("\\u2028") - 1; 2961cb0ef41Sopenharmony_ci } else if (static_cast<int>(c) == 0x2029) { 2971cb0ef41Sopenharmony_ci needs_escapes = true; 2981cb0ef41Sopenharmony_ci escapes += std::strlen("\\u2029") - 1; 2991cb0ef41Sopenharmony_ci } else { 3001cb0ef41Sopenharmony_ci DCHECK(!IsLineTerminator(c)); 3011cb0ef41Sopenharmony_ci } 3021cb0ef41Sopenharmony_ci } 3031cb0ef41Sopenharmony_ci DCHECK(!in_char_class); 3041cb0ef41Sopenharmony_ci DCHECK_GE(escapes, 0); 3051cb0ef41Sopenharmony_ci DCHECK_IMPLIES(escapes != 0, needs_escapes); 3061cb0ef41Sopenharmony_ci *needs_escapes_out = needs_escapes; 3071cb0ef41Sopenharmony_ci return escapes; 3081cb0ef41Sopenharmony_ci} 3091cb0ef41Sopenharmony_ci 3101cb0ef41Sopenharmony_citemplate <typename Char> 3111cb0ef41Sopenharmony_civoid WriteStringToCharVector(base::Vector<Char> v, int* d, const char* string) { 3121cb0ef41Sopenharmony_ci int s = 0; 3131cb0ef41Sopenharmony_ci while (string[s] != '\0') v[(*d)++] = string[s++]; 3141cb0ef41Sopenharmony_ci} 3151cb0ef41Sopenharmony_ci 3161cb0ef41Sopenharmony_citemplate <typename Char, typename StringType> 3171cb0ef41Sopenharmony_ciHandle<StringType> WriteEscapedRegExpSource(Handle<String> source, 3181cb0ef41Sopenharmony_ci Handle<StringType> result) { 3191cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 3201cb0ef41Sopenharmony_ci base::Vector<const Char> src = source->GetCharVector<Char>(no_gc); 3211cb0ef41Sopenharmony_ci base::Vector<Char> dst(result->GetChars(no_gc), result->length()); 3221cb0ef41Sopenharmony_ci int s = 0; 3231cb0ef41Sopenharmony_ci int d = 0; 3241cb0ef41Sopenharmony_ci bool in_char_class = false; 3251cb0ef41Sopenharmony_ci while (s < src.length()) { 3261cb0ef41Sopenharmony_ci const Char c = src[s]; 3271cb0ef41Sopenharmony_ci if (c == '\\') { 3281cb0ef41Sopenharmony_ci if (s + 1 < src.length() && IsLineTerminator(src[s + 1])) { 3291cb0ef41Sopenharmony_ci // This '\' is ignored since the next character itself will be escaped. 3301cb0ef41Sopenharmony_ci s++; 3311cb0ef41Sopenharmony_ci continue; 3321cb0ef41Sopenharmony_ci } else { 3331cb0ef41Sopenharmony_ci // Escape. Copy this and next character. 3341cb0ef41Sopenharmony_ci dst[d++] = src[s++]; 3351cb0ef41Sopenharmony_ci } 3361cb0ef41Sopenharmony_ci if (s == src.length()) break; 3371cb0ef41Sopenharmony_ci } else if (c == '/' && !in_char_class) { 3381cb0ef41Sopenharmony_ci // Not escaped forward-slash needs escape. 3391cb0ef41Sopenharmony_ci dst[d++] = '\\'; 3401cb0ef41Sopenharmony_ci } else if (c == '[') { 3411cb0ef41Sopenharmony_ci in_char_class = true; 3421cb0ef41Sopenharmony_ci } else if (c == ']') { 3431cb0ef41Sopenharmony_ci in_char_class = false; 3441cb0ef41Sopenharmony_ci } else if (c == '\n') { 3451cb0ef41Sopenharmony_ci WriteStringToCharVector(dst, &d, "\\n"); 3461cb0ef41Sopenharmony_ci s++; 3471cb0ef41Sopenharmony_ci continue; 3481cb0ef41Sopenharmony_ci } else if (c == '\r') { 3491cb0ef41Sopenharmony_ci WriteStringToCharVector(dst, &d, "\\r"); 3501cb0ef41Sopenharmony_ci s++; 3511cb0ef41Sopenharmony_ci continue; 3521cb0ef41Sopenharmony_ci } else if (static_cast<int>(c) == 0x2028) { 3531cb0ef41Sopenharmony_ci WriteStringToCharVector(dst, &d, "\\u2028"); 3541cb0ef41Sopenharmony_ci s++; 3551cb0ef41Sopenharmony_ci continue; 3561cb0ef41Sopenharmony_ci } else if (static_cast<int>(c) == 0x2029) { 3571cb0ef41Sopenharmony_ci WriteStringToCharVector(dst, &d, "\\u2029"); 3581cb0ef41Sopenharmony_ci s++; 3591cb0ef41Sopenharmony_ci continue; 3601cb0ef41Sopenharmony_ci } else { 3611cb0ef41Sopenharmony_ci DCHECK(!IsLineTerminator(c)); 3621cb0ef41Sopenharmony_ci } 3631cb0ef41Sopenharmony_ci dst[d++] = src[s++]; 3641cb0ef41Sopenharmony_ci } 3651cb0ef41Sopenharmony_ci DCHECK_EQ(result->length(), d); 3661cb0ef41Sopenharmony_ci DCHECK(!in_char_class); 3671cb0ef41Sopenharmony_ci return result; 3681cb0ef41Sopenharmony_ci} 3691cb0ef41Sopenharmony_ci 3701cb0ef41Sopenharmony_ciMaybeHandle<String> EscapeRegExpSource(Isolate* isolate, 3711cb0ef41Sopenharmony_ci Handle<String> source) { 3721cb0ef41Sopenharmony_ci DCHECK(source->IsFlat()); 3731cb0ef41Sopenharmony_ci if (source->length() == 0) return isolate->factory()->query_colon_string(); 3741cb0ef41Sopenharmony_ci bool one_byte = String::IsOneByteRepresentationUnderneath(*source); 3751cb0ef41Sopenharmony_ci bool needs_escapes = false; 3761cb0ef41Sopenharmony_ci int additional_escape_chars = 3771cb0ef41Sopenharmony_ci one_byte ? CountAdditionalEscapeChars<uint8_t>(source, &needs_escapes) 3781cb0ef41Sopenharmony_ci : CountAdditionalEscapeChars<base::uc16>(source, &needs_escapes); 3791cb0ef41Sopenharmony_ci if (!needs_escapes) return source; 3801cb0ef41Sopenharmony_ci int length = source->length() + additional_escape_chars; 3811cb0ef41Sopenharmony_ci if (one_byte) { 3821cb0ef41Sopenharmony_ci Handle<SeqOneByteString> result; 3831cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION(isolate, result, 3841cb0ef41Sopenharmony_ci isolate->factory()->NewRawOneByteString(length), 3851cb0ef41Sopenharmony_ci String); 3861cb0ef41Sopenharmony_ci return WriteEscapedRegExpSource<uint8_t>(source, result); 3871cb0ef41Sopenharmony_ci } else { 3881cb0ef41Sopenharmony_ci Handle<SeqTwoByteString> result; 3891cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION(isolate, result, 3901cb0ef41Sopenharmony_ci isolate->factory()->NewRawTwoByteString(length), 3911cb0ef41Sopenharmony_ci String); 3921cb0ef41Sopenharmony_ci return WriteEscapedRegExpSource<base::uc16>(source, result); 3931cb0ef41Sopenharmony_ci } 3941cb0ef41Sopenharmony_ci} 3951cb0ef41Sopenharmony_ci 3961cb0ef41Sopenharmony_ci} // namespace 3971cb0ef41Sopenharmony_ci 3981cb0ef41Sopenharmony_ci// static 3991cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp, 4001cb0ef41Sopenharmony_ci Handle<String> source, Flags flags, 4011cb0ef41Sopenharmony_ci uint32_t backtrack_limit) { 4021cb0ef41Sopenharmony_ci Isolate* isolate = regexp->GetIsolate(); 4031cb0ef41Sopenharmony_ci Factory* factory = isolate->factory(); 4041cb0ef41Sopenharmony_ci // If source is the empty string we set it to "(?:)" instead as 4051cb0ef41Sopenharmony_ci // suggested by ECMA-262, 5th, section 15.10.4.1. 4061cb0ef41Sopenharmony_ci if (source->length() == 0) source = factory->query_colon_string(); 4071cb0ef41Sopenharmony_ci 4081cb0ef41Sopenharmony_ci source = String::Flatten(isolate, source); 4091cb0ef41Sopenharmony_ci 4101cb0ef41Sopenharmony_ci RETURN_ON_EXCEPTION( 4111cb0ef41Sopenharmony_ci isolate, 4121cb0ef41Sopenharmony_ci RegExp::Compile(isolate, regexp, source, JSRegExp::AsRegExpFlags(flags), 4131cb0ef41Sopenharmony_ci backtrack_limit), 4141cb0ef41Sopenharmony_ci JSRegExp); 4151cb0ef41Sopenharmony_ci 4161cb0ef41Sopenharmony_ci Handle<String> escaped_source; 4171cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source, 4181cb0ef41Sopenharmony_ci EscapeRegExpSource(isolate, source), JSRegExp); 4191cb0ef41Sopenharmony_ci 4201cb0ef41Sopenharmony_ci regexp->set_source(*escaped_source); 4211cb0ef41Sopenharmony_ci regexp->set_flags(Smi::FromInt(flags)); 4221cb0ef41Sopenharmony_ci 4231cb0ef41Sopenharmony_ci Map map = regexp->map(); 4241cb0ef41Sopenharmony_ci Object constructor = map.GetConstructor(); 4251cb0ef41Sopenharmony_ci if (constructor.IsJSFunction() && 4261cb0ef41Sopenharmony_ci JSFunction::cast(constructor).initial_map() == map) { 4271cb0ef41Sopenharmony_ci // If we still have the original map, set in-object properties directly. 4281cb0ef41Sopenharmony_ci regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex, 4291cb0ef41Sopenharmony_ci Smi::FromInt(kInitialLastIndexValue), 4301cb0ef41Sopenharmony_ci SKIP_WRITE_BARRIER); 4311cb0ef41Sopenharmony_ci } else { 4321cb0ef41Sopenharmony_ci // Map has changed, so use generic, but slower, method. 4331cb0ef41Sopenharmony_ci RETURN_ON_EXCEPTION( 4341cb0ef41Sopenharmony_ci isolate, 4351cb0ef41Sopenharmony_ci Object::SetProperty( 4361cb0ef41Sopenharmony_ci isolate, regexp, factory->lastIndex_string(), 4371cb0ef41Sopenharmony_ci Handle<Smi>(Smi::FromInt(kInitialLastIndexValue), isolate)), 4381cb0ef41Sopenharmony_ci JSRegExp); 4391cb0ef41Sopenharmony_ci } 4401cb0ef41Sopenharmony_ci 4411cb0ef41Sopenharmony_ci return regexp; 4421cb0ef41Sopenharmony_ci} 4431cb0ef41Sopenharmony_ci 4441cb0ef41Sopenharmony_ci} // namespace internal 4451cb0ef41Sopenharmony_ci} // namespace v8 446