11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#include "src/objects/js-regexp.h"
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "src/base/strings.h"
81cb0ef41Sopenharmony_ci#include "src/common/globals.h"
91cb0ef41Sopenharmony_ci#include "src/objects/code.h"
101cb0ef41Sopenharmony_ci#include "src/objects/js-array-inl.h"
111cb0ef41Sopenharmony_ci#include "src/objects/js-regexp-inl.h"
121cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h"
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_cinamespace v8 {
151cb0ef41Sopenharmony_cinamespace internal {
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ciHandle<JSRegExpResultIndices> JSRegExpResultIndices::BuildIndices(
181cb0ef41Sopenharmony_ci    Isolate* isolate, Handle<RegExpMatchInfo> match_info,
191cb0ef41Sopenharmony_ci    Handle<Object> maybe_names) {
201cb0ef41Sopenharmony_ci  Handle<JSRegExpResultIndices> indices(Handle<JSRegExpResultIndices>::cast(
211cb0ef41Sopenharmony_ci      isolate->factory()->NewJSObjectFromMap(
221cb0ef41Sopenharmony_ci          isolate->regexp_result_indices_map())));
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ci  // Initialize indices length to avoid having a partially initialized object
251cb0ef41Sopenharmony_ci  // should GC be triggered by creating a NewFixedArray.
261cb0ef41Sopenharmony_ci  indices->set_length(Smi::zero());
271cb0ef41Sopenharmony_ci
281cb0ef41Sopenharmony_ci  // Build indices array from RegExpMatchInfo.
291cb0ef41Sopenharmony_ci  int num_indices = match_info->NumberOfCaptureRegisters();
301cb0ef41Sopenharmony_ci  int num_results = num_indices >> 1;
311cb0ef41Sopenharmony_ci  Handle<FixedArray> indices_array =
321cb0ef41Sopenharmony_ci      isolate->factory()->NewFixedArray(num_results);
331cb0ef41Sopenharmony_ci  JSArray::SetContent(indices, indices_array);
341cb0ef41Sopenharmony_ci
351cb0ef41Sopenharmony_ci  for (int i = 0; i < num_results; i++) {
361cb0ef41Sopenharmony_ci    int base_offset = i * 2;
371cb0ef41Sopenharmony_ci    int start_offset = match_info->Capture(base_offset);
381cb0ef41Sopenharmony_ci    int end_offset = match_info->Capture(base_offset + 1);
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ci    // Any unmatched captures are set to undefined, otherwise we set them to a
411cb0ef41Sopenharmony_ci    // subarray of the indices.
421cb0ef41Sopenharmony_ci    if (start_offset == -1) {
431cb0ef41Sopenharmony_ci      indices_array->set(i, ReadOnlyRoots(isolate).undefined_value());
441cb0ef41Sopenharmony_ci    } else {
451cb0ef41Sopenharmony_ci      Handle<FixedArray> indices_sub_array(
461cb0ef41Sopenharmony_ci          isolate->factory()->NewFixedArray(2));
471cb0ef41Sopenharmony_ci      indices_sub_array->set(0, Smi::FromInt(start_offset));
481cb0ef41Sopenharmony_ci      indices_sub_array->set(1, Smi::FromInt(end_offset));
491cb0ef41Sopenharmony_ci      Handle<JSArray> indices_sub_jsarray =
501cb0ef41Sopenharmony_ci          isolate->factory()->NewJSArrayWithElements(indices_sub_array,
511cb0ef41Sopenharmony_ci                                                     PACKED_SMI_ELEMENTS, 2);
521cb0ef41Sopenharmony_ci      indices_array->set(i, *indices_sub_jsarray);
531cb0ef41Sopenharmony_ci    }
541cb0ef41Sopenharmony_ci  }
551cb0ef41Sopenharmony_ci
561cb0ef41Sopenharmony_ci  // If there are no capture groups, set the groups property to undefined.
571cb0ef41Sopenharmony_ci  FieldIndex groups_index = FieldIndex::ForDescriptor(
581cb0ef41Sopenharmony_ci      indices->map(), InternalIndex(kGroupsDescriptorIndex));
591cb0ef41Sopenharmony_ci  if (maybe_names->IsUndefined(isolate)) {
601cb0ef41Sopenharmony_ci    indices->FastPropertyAtPut(groups_index,
611cb0ef41Sopenharmony_ci                               ReadOnlyRoots(isolate).undefined_value());
621cb0ef41Sopenharmony_ci    return indices;
631cb0ef41Sopenharmony_ci  }
641cb0ef41Sopenharmony_ci
651cb0ef41Sopenharmony_ci  // Create a groups property which returns a dictionary of named captures to
661cb0ef41Sopenharmony_ci  // their corresponding capture indices.
671cb0ef41Sopenharmony_ci  Handle<FixedArray> names(Handle<FixedArray>::cast(maybe_names));
681cb0ef41Sopenharmony_ci  int num_names = names->length() >> 1;
691cb0ef41Sopenharmony_ci  Handle<HeapObject> group_names;
701cb0ef41Sopenharmony_ci  if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) {
711cb0ef41Sopenharmony_ci    group_names = isolate->factory()->NewSwissNameDictionary(num_names);
721cb0ef41Sopenharmony_ci  } else {
731cb0ef41Sopenharmony_ci    group_names = isolate->factory()->NewNameDictionary(num_names);
741cb0ef41Sopenharmony_ci  }
751cb0ef41Sopenharmony_ci  for (int i = 0; i < num_names; i++) {
761cb0ef41Sopenharmony_ci    int base_offset = i * 2;
771cb0ef41Sopenharmony_ci    int name_offset = base_offset;
781cb0ef41Sopenharmony_ci    int index_offset = base_offset + 1;
791cb0ef41Sopenharmony_ci    Handle<String> name(String::cast(names->get(name_offset)), isolate);
801cb0ef41Sopenharmony_ci    Handle<Smi> smi_index(Smi::cast(names->get(index_offset)), isolate);
811cb0ef41Sopenharmony_ci    Handle<Object> capture_indices(indices_array->get(smi_index->value()),
821cb0ef41Sopenharmony_ci                                   isolate);
831cb0ef41Sopenharmony_ci    if (!capture_indices->IsUndefined(isolate)) {
841cb0ef41Sopenharmony_ci      capture_indices = Handle<JSArray>::cast(capture_indices);
851cb0ef41Sopenharmony_ci    }
861cb0ef41Sopenharmony_ci    if (V8_ENABLE_SWISS_NAME_DICTIONARY_BOOL) {
871cb0ef41Sopenharmony_ci      group_names = SwissNameDictionary::Add(
881cb0ef41Sopenharmony_ci          isolate, Handle<SwissNameDictionary>::cast(group_names), name,
891cb0ef41Sopenharmony_ci          capture_indices, PropertyDetails::Empty());
901cb0ef41Sopenharmony_ci    } else {
911cb0ef41Sopenharmony_ci      group_names = NameDictionary::Add(
921cb0ef41Sopenharmony_ci          isolate, Handle<NameDictionary>::cast(group_names), name,
931cb0ef41Sopenharmony_ci          capture_indices, PropertyDetails::Empty());
941cb0ef41Sopenharmony_ci    }
951cb0ef41Sopenharmony_ci  }
961cb0ef41Sopenharmony_ci
971cb0ef41Sopenharmony_ci  // Convert group_names to a JSObject and store at the groups property of the
981cb0ef41Sopenharmony_ci  // result indices.
991cb0ef41Sopenharmony_ci  Handle<FixedArrayBase> elements = isolate->factory()->empty_fixed_array();
1001cb0ef41Sopenharmony_ci  Handle<HeapObject> null =
1011cb0ef41Sopenharmony_ci      Handle<HeapObject>::cast(isolate->factory()->null_value());
1021cb0ef41Sopenharmony_ci  Handle<JSObject> js_group_names =
1031cb0ef41Sopenharmony_ci      isolate->factory()->NewSlowJSObjectWithPropertiesAndElements(
1041cb0ef41Sopenharmony_ci          null, group_names, elements);
1051cb0ef41Sopenharmony_ci  indices->FastPropertyAtPut(groups_index, *js_group_names);
1061cb0ef41Sopenharmony_ci  return indices;
1071cb0ef41Sopenharmony_ci}
1081cb0ef41Sopenharmony_ci
1091cb0ef41Sopenharmony_ciuint32_t JSRegExp::backtrack_limit() const {
1101cb0ef41Sopenharmony_ci  CHECK_EQ(type_tag(), IRREGEXP);
1111cb0ef41Sopenharmony_ci  return static_cast<uint32_t>(Smi::ToInt(DataAt(kIrregexpBacktrackLimit)));
1121cb0ef41Sopenharmony_ci}
1131cb0ef41Sopenharmony_ci
1141cb0ef41Sopenharmony_ci// static
1151cb0ef41Sopenharmony_cibase::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString(
1161cb0ef41Sopenharmony_ci    Isolate* isolate, Handle<String> flags) {
1171cb0ef41Sopenharmony_ci  const int length = flags->length();
1181cb0ef41Sopenharmony_ci
1191cb0ef41Sopenharmony_ci  // A longer flags string cannot be valid.
1201cb0ef41Sopenharmony_ci  if (length > JSRegExp::kFlagCount) return {};
1211cb0ef41Sopenharmony_ci
1221cb0ef41Sopenharmony_ci  RegExpFlags value;
1231cb0ef41Sopenharmony_ci  FlatStringReader reader(isolate, String::Flatten(isolate, flags));
1241cb0ef41Sopenharmony_ci
1251cb0ef41Sopenharmony_ci  for (int i = 0; i < length; i++) {
1261cb0ef41Sopenharmony_ci    base::Optional<RegExpFlag> flag = JSRegExp::FlagFromChar(reader.Get(i));
1271cb0ef41Sopenharmony_ci    if (!flag.has_value()) return {};
1281cb0ef41Sopenharmony_ci    if (value & flag.value()) return {};  // Duplicate.
1291cb0ef41Sopenharmony_ci    value |= flag.value();
1301cb0ef41Sopenharmony_ci  }
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ci  return JSRegExp::AsJSRegExpFlags(value);
1331cb0ef41Sopenharmony_ci}
1341cb0ef41Sopenharmony_ci
1351cb0ef41Sopenharmony_ci// static
1361cb0ef41Sopenharmony_ciHandle<String> JSRegExp::StringFromFlags(Isolate* isolate,
1371cb0ef41Sopenharmony_ci                                         JSRegExp::Flags flags) {
1381cb0ef41Sopenharmony_ci  static constexpr int kStringTerminator = 1;
1391cb0ef41Sopenharmony_ci  int cursor = 0;
1401cb0ef41Sopenharmony_ci  char buffer[kFlagCount + kStringTerminator];
1411cb0ef41Sopenharmony_ci#define V(Lower, Camel, LowerCamel, Char, Bit) \
1421cb0ef41Sopenharmony_ci  if (flags & JSRegExp::k##Camel) buffer[cursor++] = Char;
1431cb0ef41Sopenharmony_ci  REGEXP_FLAG_LIST(V)
1441cb0ef41Sopenharmony_ci#undef V
1451cb0ef41Sopenharmony_ci  buffer[cursor++] = '\0';
1461cb0ef41Sopenharmony_ci  DCHECK_LE(cursor, kFlagCount + kStringTerminator);
1471cb0ef41Sopenharmony_ci  return isolate->factory()->NewStringFromAsciiChecked(buffer);
1481cb0ef41Sopenharmony_ci}
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci// static
1511cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::New(Isolate* isolate, Handle<String> pattern,
1521cb0ef41Sopenharmony_ci                                    Flags flags, uint32_t backtrack_limit) {
1531cb0ef41Sopenharmony_ci  Handle<JSFunction> constructor = isolate->regexp_function();
1541cb0ef41Sopenharmony_ci  Handle<JSRegExp> regexp =
1551cb0ef41Sopenharmony_ci      Handle<JSRegExp>::cast(isolate->factory()->NewJSObject(constructor));
1561cb0ef41Sopenharmony_ci
1571cb0ef41Sopenharmony_ci  return JSRegExp::Initialize(regexp, pattern, flags, backtrack_limit);
1581cb0ef41Sopenharmony_ci}
1591cb0ef41Sopenharmony_ci
1601cb0ef41Sopenharmony_ciObject JSRegExp::code(bool is_latin1) const {
1611cb0ef41Sopenharmony_ci  DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP);
1621cb0ef41Sopenharmony_ci  Object value = DataAt(code_index(is_latin1));
1631cb0ef41Sopenharmony_ci  DCHECK_IMPLIES(V8_EXTERNAL_CODE_SPACE_BOOL, value.IsSmi() || value.IsCodeT());
1641cb0ef41Sopenharmony_ci  return value;
1651cb0ef41Sopenharmony_ci}
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_civoid JSRegExp::set_code(bool is_latin1, Handle<Code> code) {
1681cb0ef41Sopenharmony_ci  SetDataAt(code_index(is_latin1), ToCodeT(*code));
1691cb0ef41Sopenharmony_ci}
1701cb0ef41Sopenharmony_ci
1711cb0ef41Sopenharmony_ciObject JSRegExp::bytecode(bool is_latin1) const {
1721cb0ef41Sopenharmony_ci  DCHECK(type_tag() == JSRegExp::IRREGEXP ||
1731cb0ef41Sopenharmony_ci         type_tag() == JSRegExp::EXPERIMENTAL);
1741cb0ef41Sopenharmony_ci  return DataAt(bytecode_index(is_latin1));
1751cb0ef41Sopenharmony_ci}
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_civoid JSRegExp::set_bytecode_and_trampoline(Isolate* isolate,
1781cb0ef41Sopenharmony_ci                                           Handle<ByteArray> bytecode) {
1791cb0ef41Sopenharmony_ci  SetDataAt(kIrregexpLatin1BytecodeIndex, *bytecode);
1801cb0ef41Sopenharmony_ci  SetDataAt(kIrregexpUC16BytecodeIndex, *bytecode);
1811cb0ef41Sopenharmony_ci
1821cb0ef41Sopenharmony_ci  Handle<CodeT> trampoline =
1831cb0ef41Sopenharmony_ci      BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
1841cb0ef41Sopenharmony_ci  SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
1851cb0ef41Sopenharmony_ci  SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
1861cb0ef41Sopenharmony_ci}
1871cb0ef41Sopenharmony_ci
1881cb0ef41Sopenharmony_cibool JSRegExp::ShouldProduceBytecode() {
1891cb0ef41Sopenharmony_ci  return FLAG_regexp_interpret_all ||
1901cb0ef41Sopenharmony_ci         (FLAG_regexp_tier_up && !MarkedForTierUp());
1911cb0ef41Sopenharmony_ci}
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci// Only irregexps are subject to tier-up.
1941cb0ef41Sopenharmony_cibool JSRegExp::CanTierUp() {
1951cb0ef41Sopenharmony_ci  return FLAG_regexp_tier_up && type_tag() == JSRegExp::IRREGEXP;
1961cb0ef41Sopenharmony_ci}
1971cb0ef41Sopenharmony_ci
1981cb0ef41Sopenharmony_ci// An irregexp is considered to be marked for tier up if the tier-up ticks
1991cb0ef41Sopenharmony_ci// value reaches zero.
2001cb0ef41Sopenharmony_cibool JSRegExp::MarkedForTierUp() {
2011cb0ef41Sopenharmony_ci  DCHECK(data().IsFixedArray());
2021cb0ef41Sopenharmony_ci
2031cb0ef41Sopenharmony_ci  if (!CanTierUp()) {
2041cb0ef41Sopenharmony_ci    return false;
2051cb0ef41Sopenharmony_ci  }
2061cb0ef41Sopenharmony_ci
2071cb0ef41Sopenharmony_ci  return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0;
2081cb0ef41Sopenharmony_ci}
2091cb0ef41Sopenharmony_ci
2101cb0ef41Sopenharmony_civoid JSRegExp::ResetLastTierUpTick() {
2111cb0ef41Sopenharmony_ci  DCHECK(FLAG_regexp_tier_up);
2121cb0ef41Sopenharmony_ci  DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP);
2131cb0ef41Sopenharmony_ci  int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) + 1;
2141cb0ef41Sopenharmony_ci  FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
2151cb0ef41Sopenharmony_ci                               Smi::FromInt(tier_up_ticks));
2161cb0ef41Sopenharmony_ci}
2171cb0ef41Sopenharmony_ci
2181cb0ef41Sopenharmony_civoid JSRegExp::TierUpTick() {
2191cb0ef41Sopenharmony_ci  DCHECK(FLAG_regexp_tier_up);
2201cb0ef41Sopenharmony_ci  DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP);
2211cb0ef41Sopenharmony_ci  int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex));
2221cb0ef41Sopenharmony_ci  if (tier_up_ticks == 0) {
2231cb0ef41Sopenharmony_ci    return;
2241cb0ef41Sopenharmony_ci  }
2251cb0ef41Sopenharmony_ci  FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
2261cb0ef41Sopenharmony_ci                               Smi::FromInt(tier_up_ticks - 1));
2271cb0ef41Sopenharmony_ci}
2281cb0ef41Sopenharmony_ci
2291cb0ef41Sopenharmony_civoid JSRegExp::MarkTierUpForNextExec() {
2301cb0ef41Sopenharmony_ci  DCHECK(FLAG_regexp_tier_up);
2311cb0ef41Sopenharmony_ci  DCHECK_EQ(type_tag(), JSRegExp::IRREGEXP);
2321cb0ef41Sopenharmony_ci  FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
2331cb0ef41Sopenharmony_ci                               Smi::zero());
2341cb0ef41Sopenharmony_ci}
2351cb0ef41Sopenharmony_ci
2361cb0ef41Sopenharmony_ci// static
2371cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
2381cb0ef41Sopenharmony_ci                                           Handle<String> source,
2391cb0ef41Sopenharmony_ci                                           Handle<String> flags_string) {
2401cb0ef41Sopenharmony_ci  Isolate* isolate = regexp->GetIsolate();
2411cb0ef41Sopenharmony_ci  base::Optional<Flags> flags =
2421cb0ef41Sopenharmony_ci      JSRegExp::FlagsFromString(isolate, flags_string);
2431cb0ef41Sopenharmony_ci  if (!flags.has_value()) {
2441cb0ef41Sopenharmony_ci    THROW_NEW_ERROR(
2451cb0ef41Sopenharmony_ci        isolate,
2461cb0ef41Sopenharmony_ci        NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
2471cb0ef41Sopenharmony_ci        JSRegExp);
2481cb0ef41Sopenharmony_ci  }
2491cb0ef41Sopenharmony_ci  return Initialize(regexp, source, flags.value());
2501cb0ef41Sopenharmony_ci}
2511cb0ef41Sopenharmony_ci
2521cb0ef41Sopenharmony_cinamespace {
2531cb0ef41Sopenharmony_ci
2541cb0ef41Sopenharmony_cibool IsLineTerminator(int c) {
2551cb0ef41Sopenharmony_ci  // Expected to return true for '\n', '\r', 0x2028, and 0x2029.
2561cb0ef41Sopenharmony_ci  return unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c));
2571cb0ef41Sopenharmony_ci}
2581cb0ef41Sopenharmony_ci
2591cb0ef41Sopenharmony_ci// TODO(jgruber): Consider merging CountAdditionalEscapeChars and
2601cb0ef41Sopenharmony_ci// WriteEscapedRegExpSource into a single function to deduplicate dispatch logic
2611cb0ef41Sopenharmony_ci// and move related code closer to each other.
2621cb0ef41Sopenharmony_citemplate <typename Char>
2631cb0ef41Sopenharmony_ciint CountAdditionalEscapeChars(Handle<String> source, bool* needs_escapes_out) {
2641cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
2651cb0ef41Sopenharmony_ci  int escapes = 0;
2661cb0ef41Sopenharmony_ci  bool needs_escapes = false;
2671cb0ef41Sopenharmony_ci  bool in_char_class = false;
2681cb0ef41Sopenharmony_ci  base::Vector<const Char> src = source->GetCharVector<Char>(no_gc);
2691cb0ef41Sopenharmony_ci  for (int i = 0; i < src.length(); i++) {
2701cb0ef41Sopenharmony_ci    const Char c = src[i];
2711cb0ef41Sopenharmony_ci    if (c == '\\') {
2721cb0ef41Sopenharmony_ci      if (i + 1 < src.length() && IsLineTerminator(src[i + 1])) {
2731cb0ef41Sopenharmony_ci        // This '\' is ignored since the next character itself will be escaped.
2741cb0ef41Sopenharmony_ci        escapes--;
2751cb0ef41Sopenharmony_ci      } else {
2761cb0ef41Sopenharmony_ci        // Escape. Skip next character, which will be copied verbatim;
2771cb0ef41Sopenharmony_ci        i++;
2781cb0ef41Sopenharmony_ci      }
2791cb0ef41Sopenharmony_ci    } else if (c == '/' && !in_char_class) {
2801cb0ef41Sopenharmony_ci      // Not escaped forward-slash needs escape.
2811cb0ef41Sopenharmony_ci      needs_escapes = true;
2821cb0ef41Sopenharmony_ci      escapes++;
2831cb0ef41Sopenharmony_ci    } else if (c == '[') {
2841cb0ef41Sopenharmony_ci      in_char_class = true;
2851cb0ef41Sopenharmony_ci    } else if (c == ']') {
2861cb0ef41Sopenharmony_ci      in_char_class = false;
2871cb0ef41Sopenharmony_ci    } else if (c == '\n') {
2881cb0ef41Sopenharmony_ci      needs_escapes = true;
2891cb0ef41Sopenharmony_ci      escapes++;
2901cb0ef41Sopenharmony_ci    } else if (c == '\r') {
2911cb0ef41Sopenharmony_ci      needs_escapes = true;
2921cb0ef41Sopenharmony_ci      escapes++;
2931cb0ef41Sopenharmony_ci    } else if (static_cast<int>(c) == 0x2028) {
2941cb0ef41Sopenharmony_ci      needs_escapes = true;
2951cb0ef41Sopenharmony_ci      escapes += std::strlen("\\u2028") - 1;
2961cb0ef41Sopenharmony_ci    } else if (static_cast<int>(c) == 0x2029) {
2971cb0ef41Sopenharmony_ci      needs_escapes = true;
2981cb0ef41Sopenharmony_ci      escapes += std::strlen("\\u2029") - 1;
2991cb0ef41Sopenharmony_ci    } else {
3001cb0ef41Sopenharmony_ci      DCHECK(!IsLineTerminator(c));
3011cb0ef41Sopenharmony_ci    }
3021cb0ef41Sopenharmony_ci  }
3031cb0ef41Sopenharmony_ci  DCHECK(!in_char_class);
3041cb0ef41Sopenharmony_ci  DCHECK_GE(escapes, 0);
3051cb0ef41Sopenharmony_ci  DCHECK_IMPLIES(escapes != 0, needs_escapes);
3061cb0ef41Sopenharmony_ci  *needs_escapes_out = needs_escapes;
3071cb0ef41Sopenharmony_ci  return escapes;
3081cb0ef41Sopenharmony_ci}
3091cb0ef41Sopenharmony_ci
3101cb0ef41Sopenharmony_citemplate <typename Char>
3111cb0ef41Sopenharmony_civoid WriteStringToCharVector(base::Vector<Char> v, int* d, const char* string) {
3121cb0ef41Sopenharmony_ci  int s = 0;
3131cb0ef41Sopenharmony_ci  while (string[s] != '\0') v[(*d)++] = string[s++];
3141cb0ef41Sopenharmony_ci}
3151cb0ef41Sopenharmony_ci
3161cb0ef41Sopenharmony_citemplate <typename Char, typename StringType>
3171cb0ef41Sopenharmony_ciHandle<StringType> WriteEscapedRegExpSource(Handle<String> source,
3181cb0ef41Sopenharmony_ci                                            Handle<StringType> result) {
3191cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
3201cb0ef41Sopenharmony_ci  base::Vector<const Char> src = source->GetCharVector<Char>(no_gc);
3211cb0ef41Sopenharmony_ci  base::Vector<Char> dst(result->GetChars(no_gc), result->length());
3221cb0ef41Sopenharmony_ci  int s = 0;
3231cb0ef41Sopenharmony_ci  int d = 0;
3241cb0ef41Sopenharmony_ci  bool in_char_class = false;
3251cb0ef41Sopenharmony_ci  while (s < src.length()) {
3261cb0ef41Sopenharmony_ci    const Char c = src[s];
3271cb0ef41Sopenharmony_ci    if (c == '\\') {
3281cb0ef41Sopenharmony_ci      if (s + 1 < src.length() && IsLineTerminator(src[s + 1])) {
3291cb0ef41Sopenharmony_ci        // This '\' is ignored since the next character itself will be escaped.
3301cb0ef41Sopenharmony_ci        s++;
3311cb0ef41Sopenharmony_ci        continue;
3321cb0ef41Sopenharmony_ci      } else {
3331cb0ef41Sopenharmony_ci        // Escape. Copy this and next character.
3341cb0ef41Sopenharmony_ci        dst[d++] = src[s++];
3351cb0ef41Sopenharmony_ci      }
3361cb0ef41Sopenharmony_ci      if (s == src.length()) break;
3371cb0ef41Sopenharmony_ci    } else if (c == '/' && !in_char_class) {
3381cb0ef41Sopenharmony_ci      // Not escaped forward-slash needs escape.
3391cb0ef41Sopenharmony_ci      dst[d++] = '\\';
3401cb0ef41Sopenharmony_ci    } else if (c == '[') {
3411cb0ef41Sopenharmony_ci      in_char_class = true;
3421cb0ef41Sopenharmony_ci    } else if (c == ']') {
3431cb0ef41Sopenharmony_ci      in_char_class = false;
3441cb0ef41Sopenharmony_ci    } else if (c == '\n') {
3451cb0ef41Sopenharmony_ci      WriteStringToCharVector(dst, &d, "\\n");
3461cb0ef41Sopenharmony_ci      s++;
3471cb0ef41Sopenharmony_ci      continue;
3481cb0ef41Sopenharmony_ci    } else if (c == '\r') {
3491cb0ef41Sopenharmony_ci      WriteStringToCharVector(dst, &d, "\\r");
3501cb0ef41Sopenharmony_ci      s++;
3511cb0ef41Sopenharmony_ci      continue;
3521cb0ef41Sopenharmony_ci    } else if (static_cast<int>(c) == 0x2028) {
3531cb0ef41Sopenharmony_ci      WriteStringToCharVector(dst, &d, "\\u2028");
3541cb0ef41Sopenharmony_ci      s++;
3551cb0ef41Sopenharmony_ci      continue;
3561cb0ef41Sopenharmony_ci    } else if (static_cast<int>(c) == 0x2029) {
3571cb0ef41Sopenharmony_ci      WriteStringToCharVector(dst, &d, "\\u2029");
3581cb0ef41Sopenharmony_ci      s++;
3591cb0ef41Sopenharmony_ci      continue;
3601cb0ef41Sopenharmony_ci    } else {
3611cb0ef41Sopenharmony_ci      DCHECK(!IsLineTerminator(c));
3621cb0ef41Sopenharmony_ci    }
3631cb0ef41Sopenharmony_ci    dst[d++] = src[s++];
3641cb0ef41Sopenharmony_ci  }
3651cb0ef41Sopenharmony_ci  DCHECK_EQ(result->length(), d);
3661cb0ef41Sopenharmony_ci  DCHECK(!in_char_class);
3671cb0ef41Sopenharmony_ci  return result;
3681cb0ef41Sopenharmony_ci}
3691cb0ef41Sopenharmony_ci
3701cb0ef41Sopenharmony_ciMaybeHandle<String> EscapeRegExpSource(Isolate* isolate,
3711cb0ef41Sopenharmony_ci                                       Handle<String> source) {
3721cb0ef41Sopenharmony_ci  DCHECK(source->IsFlat());
3731cb0ef41Sopenharmony_ci  if (source->length() == 0) return isolate->factory()->query_colon_string();
3741cb0ef41Sopenharmony_ci  bool one_byte = String::IsOneByteRepresentationUnderneath(*source);
3751cb0ef41Sopenharmony_ci  bool needs_escapes = false;
3761cb0ef41Sopenharmony_ci  int additional_escape_chars =
3771cb0ef41Sopenharmony_ci      one_byte ? CountAdditionalEscapeChars<uint8_t>(source, &needs_escapes)
3781cb0ef41Sopenharmony_ci               : CountAdditionalEscapeChars<base::uc16>(source, &needs_escapes);
3791cb0ef41Sopenharmony_ci  if (!needs_escapes) return source;
3801cb0ef41Sopenharmony_ci  int length = source->length() + additional_escape_chars;
3811cb0ef41Sopenharmony_ci  if (one_byte) {
3821cb0ef41Sopenharmony_ci    Handle<SeqOneByteString> result;
3831cb0ef41Sopenharmony_ci    ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
3841cb0ef41Sopenharmony_ci                               isolate->factory()->NewRawOneByteString(length),
3851cb0ef41Sopenharmony_ci                               String);
3861cb0ef41Sopenharmony_ci    return WriteEscapedRegExpSource<uint8_t>(source, result);
3871cb0ef41Sopenharmony_ci  } else {
3881cb0ef41Sopenharmony_ci    Handle<SeqTwoByteString> result;
3891cb0ef41Sopenharmony_ci    ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
3901cb0ef41Sopenharmony_ci                               isolate->factory()->NewRawTwoByteString(length),
3911cb0ef41Sopenharmony_ci                               String);
3921cb0ef41Sopenharmony_ci    return WriteEscapedRegExpSource<base::uc16>(source, result);
3931cb0ef41Sopenharmony_ci  }
3941cb0ef41Sopenharmony_ci}
3951cb0ef41Sopenharmony_ci
3961cb0ef41Sopenharmony_ci}  // namespace
3971cb0ef41Sopenharmony_ci
3981cb0ef41Sopenharmony_ci// static
3991cb0ef41Sopenharmony_ciMaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
4001cb0ef41Sopenharmony_ci                                           Handle<String> source, Flags flags,
4011cb0ef41Sopenharmony_ci                                           uint32_t backtrack_limit) {
4021cb0ef41Sopenharmony_ci  Isolate* isolate = regexp->GetIsolate();
4031cb0ef41Sopenharmony_ci  Factory* factory = isolate->factory();
4041cb0ef41Sopenharmony_ci  // If source is the empty string we set it to "(?:)" instead as
4051cb0ef41Sopenharmony_ci  // suggested by ECMA-262, 5th, section 15.10.4.1.
4061cb0ef41Sopenharmony_ci  if (source->length() == 0) source = factory->query_colon_string();
4071cb0ef41Sopenharmony_ci
4081cb0ef41Sopenharmony_ci  source = String::Flatten(isolate, source);
4091cb0ef41Sopenharmony_ci
4101cb0ef41Sopenharmony_ci  RETURN_ON_EXCEPTION(
4111cb0ef41Sopenharmony_ci      isolate,
4121cb0ef41Sopenharmony_ci      RegExp::Compile(isolate, regexp, source, JSRegExp::AsRegExpFlags(flags),
4131cb0ef41Sopenharmony_ci                      backtrack_limit),
4141cb0ef41Sopenharmony_ci      JSRegExp);
4151cb0ef41Sopenharmony_ci
4161cb0ef41Sopenharmony_ci  Handle<String> escaped_source;
4171cb0ef41Sopenharmony_ci  ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source,
4181cb0ef41Sopenharmony_ci                             EscapeRegExpSource(isolate, source), JSRegExp);
4191cb0ef41Sopenharmony_ci
4201cb0ef41Sopenharmony_ci  regexp->set_source(*escaped_source);
4211cb0ef41Sopenharmony_ci  regexp->set_flags(Smi::FromInt(flags));
4221cb0ef41Sopenharmony_ci
4231cb0ef41Sopenharmony_ci  Map map = regexp->map();
4241cb0ef41Sopenharmony_ci  Object constructor = map.GetConstructor();
4251cb0ef41Sopenharmony_ci  if (constructor.IsJSFunction() &&
4261cb0ef41Sopenharmony_ci      JSFunction::cast(constructor).initial_map() == map) {
4271cb0ef41Sopenharmony_ci    // If we still have the original map, set in-object properties directly.
4281cb0ef41Sopenharmony_ci    regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
4291cb0ef41Sopenharmony_ci                                  Smi::FromInt(kInitialLastIndexValue),
4301cb0ef41Sopenharmony_ci                                  SKIP_WRITE_BARRIER);
4311cb0ef41Sopenharmony_ci  } else {
4321cb0ef41Sopenharmony_ci    // Map has changed, so use generic, but slower, method.
4331cb0ef41Sopenharmony_ci    RETURN_ON_EXCEPTION(
4341cb0ef41Sopenharmony_ci        isolate,
4351cb0ef41Sopenharmony_ci        Object::SetProperty(
4361cb0ef41Sopenharmony_ci            isolate, regexp, factory->lastIndex_string(),
4371cb0ef41Sopenharmony_ci            Handle<Smi>(Smi::FromInt(kInitialLastIndexValue), isolate)),
4381cb0ef41Sopenharmony_ci        JSRegExp);
4391cb0ef41Sopenharmony_ci  }
4401cb0ef41Sopenharmony_ci
4411cb0ef41Sopenharmony_ci  return regexp;
4421cb0ef41Sopenharmony_ci}
4431cb0ef41Sopenharmony_ci
4441cb0ef41Sopenharmony_ci}  // namespace internal
4451cb0ef41Sopenharmony_ci}  // namespace v8
446