11cb0ef41Sopenharmony_ci// Copyright 2012 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h"
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "src/codegen/assembler.h"
81cb0ef41Sopenharmony_ci#include "src/codegen/label.h"
91cb0ef41Sopenharmony_ci#include "src/execution/isolate-inl.h"
101cb0ef41Sopenharmony_ci#include "src/execution/pointer-authentication.h"
111cb0ef41Sopenharmony_ci#include "src/execution/simulator.h"
121cb0ef41Sopenharmony_ci#include "src/regexp/regexp-stack.h"
131cb0ef41Sopenharmony_ci#include "src/regexp/special-case.h"
141cb0ef41Sopenharmony_ci#include "src/strings/unicode-inl.h"
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
171cb0ef41Sopenharmony_ci#include "unicode/uchar.h"
181cb0ef41Sopenharmony_ci#include "unicode/unistr.h"
191cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_cinamespace v8 {
221cb0ef41Sopenharmony_cinamespace internal {
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ciRegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
251cb0ef41Sopenharmony_ci    : slow_safe_compiler_(false),
261cb0ef41Sopenharmony_ci      backtrack_limit_(JSRegExp::kNoBacktrackLimit),
271cb0ef41Sopenharmony_ci      global_mode_(NOT_GLOBAL),
281cb0ef41Sopenharmony_ci      isolate_(isolate),
291cb0ef41Sopenharmony_ci      zone_(zone) {}
301cb0ef41Sopenharmony_ci
311cb0ef41Sopenharmony_cibool RegExpMacroAssembler::has_backtrack_limit() const {
321cb0ef41Sopenharmony_ci  return backtrack_limit_ != JSRegExp::kNoBacktrackLimit;
331cb0ef41Sopenharmony_ci}
341cb0ef41Sopenharmony_ci
351cb0ef41Sopenharmony_ci// static
361cb0ef41Sopenharmony_ciint RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(Address byte_offset1,
371cb0ef41Sopenharmony_ci                                                           Address byte_offset2,
381cb0ef41Sopenharmony_ci                                                           size_t byte_length,
391cb0ef41Sopenharmony_ci                                                           Isolate* isolate) {
401cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
411cb0ef41Sopenharmony_ci  // This function is not allowed to cause a garbage collection.
421cb0ef41Sopenharmony_ci  // A GC might move the calling generated code and invalidate the
431cb0ef41Sopenharmony_ci  // return address on the stack.
441cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
451cb0ef41Sopenharmony_ci  DCHECK_EQ(0, byte_length % 2);
461cb0ef41Sopenharmony_ci  size_t length = byte_length / 2;
471cb0ef41Sopenharmony_ci  base::uc16* substring1 = reinterpret_cast<base::uc16*>(byte_offset1);
481cb0ef41Sopenharmony_ci  base::uc16* substring2 = reinterpret_cast<base::uc16*>(byte_offset2);
491cb0ef41Sopenharmony_ci
501cb0ef41Sopenharmony_ci  for (size_t i = 0; i < length; i++) {
511cb0ef41Sopenharmony_ci    UChar32 c1 = RegExpCaseFolding::Canonicalize(substring1[i]);
521cb0ef41Sopenharmony_ci    UChar32 c2 = RegExpCaseFolding::Canonicalize(substring2[i]);
531cb0ef41Sopenharmony_ci    if (c1 != c2) {
541cb0ef41Sopenharmony_ci      return 0;
551cb0ef41Sopenharmony_ci    }
561cb0ef41Sopenharmony_ci  }
571cb0ef41Sopenharmony_ci  return 1;
581cb0ef41Sopenharmony_ci#else
591cb0ef41Sopenharmony_ci  return CaseInsensitiveCompareUnicode(byte_offset1, byte_offset2, byte_length,
601cb0ef41Sopenharmony_ci                                       isolate);
611cb0ef41Sopenharmony_ci#endif
621cb0ef41Sopenharmony_ci}
631cb0ef41Sopenharmony_ci
641cb0ef41Sopenharmony_ci// static
651cb0ef41Sopenharmony_ciint RegExpMacroAssembler::CaseInsensitiveCompareUnicode(Address byte_offset1,
661cb0ef41Sopenharmony_ci                                                        Address byte_offset2,
671cb0ef41Sopenharmony_ci                                                        size_t byte_length,
681cb0ef41Sopenharmony_ci                                                        Isolate* isolate) {
691cb0ef41Sopenharmony_ci  // This function is not allowed to cause a garbage collection.
701cb0ef41Sopenharmony_ci  // A GC might move the calling generated code and invalidate the
711cb0ef41Sopenharmony_ci  // return address on the stack.
721cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
731cb0ef41Sopenharmony_ci  DCHECK_EQ(0, byte_length % 2);
741cb0ef41Sopenharmony_ci
751cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
761cb0ef41Sopenharmony_ci  int32_t length = static_cast<int32_t>(byte_length >> 1);
771cb0ef41Sopenharmony_ci  icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1),
781cb0ef41Sopenharmony_ci                               length);
791cb0ef41Sopenharmony_ci  return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2),
801cb0ef41Sopenharmony_ci                               length, U_FOLD_CASE_DEFAULT) == 0;
811cb0ef41Sopenharmony_ci#else
821cb0ef41Sopenharmony_ci  base::uc16* substring1 = reinterpret_cast<base::uc16*>(byte_offset1);
831cb0ef41Sopenharmony_ci  base::uc16* substring2 = reinterpret_cast<base::uc16*>(byte_offset2);
841cb0ef41Sopenharmony_ci  size_t length = byte_length >> 1;
851cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(isolate);
861cb0ef41Sopenharmony_ci  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
871cb0ef41Sopenharmony_ci      isolate->regexp_macro_assembler_canonicalize();
881cb0ef41Sopenharmony_ci  for (size_t i = 0; i < length; i++) {
891cb0ef41Sopenharmony_ci    unibrow::uchar c1 = substring1[i];
901cb0ef41Sopenharmony_ci    unibrow::uchar c2 = substring2[i];
911cb0ef41Sopenharmony_ci    if (c1 != c2) {
921cb0ef41Sopenharmony_ci      unibrow::uchar s1[1] = {c1};
931cb0ef41Sopenharmony_ci      canonicalize->get(c1, '\0', s1);
941cb0ef41Sopenharmony_ci      if (s1[0] != c2) {
951cb0ef41Sopenharmony_ci        unibrow::uchar s2[1] = {c2};
961cb0ef41Sopenharmony_ci        canonicalize->get(c2, '\0', s2);
971cb0ef41Sopenharmony_ci        if (s1[0] != s2[0]) {
981cb0ef41Sopenharmony_ci          return 0;
991cb0ef41Sopenharmony_ci        }
1001cb0ef41Sopenharmony_ci      }
1011cb0ef41Sopenharmony_ci    }
1021cb0ef41Sopenharmony_ci  }
1031cb0ef41Sopenharmony_ci  return 1;
1041cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
1051cb0ef41Sopenharmony_ci}
1061cb0ef41Sopenharmony_ci
1071cb0ef41Sopenharmony_cinamespace {
1081cb0ef41Sopenharmony_ci
1091cb0ef41Sopenharmony_ciuint32_t Hash(const ZoneList<CharacterRange>* ranges) {
1101cb0ef41Sopenharmony_ci  size_t seed = 0;
1111cb0ef41Sopenharmony_ci  for (int i = 0; i < ranges->length(); i++) {
1121cb0ef41Sopenharmony_ci    const CharacterRange& r = ranges->at(i);
1131cb0ef41Sopenharmony_ci    seed = base::hash_combine(seed, r.from(), r.to());
1141cb0ef41Sopenharmony_ci  }
1151cb0ef41Sopenharmony_ci  return static_cast<uint32_t>(seed);
1161cb0ef41Sopenharmony_ci}
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ciconstexpr base::uc32 MaskEndOfRangeMarker(base::uc32 c) {
1191cb0ef41Sopenharmony_ci  // CharacterRanges may use 0x10ffff as the end-of-range marker irrespective
1201cb0ef41Sopenharmony_ci  // of whether the regexp IsUnicode or not; translate the marker value here.
1211cb0ef41Sopenharmony_ci  DCHECK_IMPLIES(c > kMaxUInt16, c == String::kMaxCodePoint);
1221cb0ef41Sopenharmony_ci  return c & 0xffff;
1231cb0ef41Sopenharmony_ci}
1241cb0ef41Sopenharmony_ci
1251cb0ef41Sopenharmony_ciint RangeArrayLengthFor(const ZoneList<CharacterRange>* ranges) {
1261cb0ef41Sopenharmony_ci  const int ranges_length = ranges->length();
1271cb0ef41Sopenharmony_ci  return MaskEndOfRangeMarker(ranges->at(ranges_length - 1).to()) == kMaxUInt16
1281cb0ef41Sopenharmony_ci             ? ranges_length * 2 - 1
1291cb0ef41Sopenharmony_ci             : ranges_length * 2;
1301cb0ef41Sopenharmony_ci}
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_cibool Equals(const ZoneList<CharacterRange>* lhs, const Handle<ByteArray>& rhs) {
1331cb0ef41Sopenharmony_ci  DCHECK_EQ(rhs->length() % kUInt16Size, 0);  // uc16 elements.
1341cb0ef41Sopenharmony_ci  const int rhs_length = rhs->length() / kUInt16Size;
1351cb0ef41Sopenharmony_ci  if (rhs_length != RangeArrayLengthFor(lhs)) return false;
1361cb0ef41Sopenharmony_ci  for (int i = 0; i < lhs->length(); i++) {
1371cb0ef41Sopenharmony_ci    const CharacterRange& r = lhs->at(i);
1381cb0ef41Sopenharmony_ci    if (rhs->get_uint16(i * 2 + 0) != r.from()) return false;
1391cb0ef41Sopenharmony_ci    if (i * 2 + 1 == rhs_length) break;
1401cb0ef41Sopenharmony_ci    if (rhs->get_uint16(i * 2 + 1) != r.to() + 1) return false;
1411cb0ef41Sopenharmony_ci  }
1421cb0ef41Sopenharmony_ci  return true;
1431cb0ef41Sopenharmony_ci}
1441cb0ef41Sopenharmony_ci
1451cb0ef41Sopenharmony_ciHandle<ByteArray> MakeRangeArray(Isolate* isolate,
1461cb0ef41Sopenharmony_ci                                 const ZoneList<CharacterRange>* ranges) {
1471cb0ef41Sopenharmony_ci  const int ranges_length = ranges->length();
1481cb0ef41Sopenharmony_ci  const int byte_array_length = RangeArrayLengthFor(ranges);
1491cb0ef41Sopenharmony_ci  const int size_in_bytes = byte_array_length * kUInt16Size;
1501cb0ef41Sopenharmony_ci  Handle<ByteArray> range_array =
1511cb0ef41Sopenharmony_ci      isolate->factory()->NewByteArray(size_in_bytes);
1521cb0ef41Sopenharmony_ci  for (int i = 0; i < ranges_length; i++) {
1531cb0ef41Sopenharmony_ci    const CharacterRange& r = ranges->at(i);
1541cb0ef41Sopenharmony_ci    DCHECK_LE(r.from(), kMaxUInt16);
1551cb0ef41Sopenharmony_ci    range_array->set_uint16(i * 2 + 0, r.from());
1561cb0ef41Sopenharmony_ci    const base::uc32 to = MaskEndOfRangeMarker(r.to());
1571cb0ef41Sopenharmony_ci    if (i == ranges_length - 1 && to == kMaxUInt16) {
1581cb0ef41Sopenharmony_ci      DCHECK_EQ(byte_array_length, ranges_length * 2 - 1);
1591cb0ef41Sopenharmony_ci      break;  // Avoid overflow by leaving the last range open-ended.
1601cb0ef41Sopenharmony_ci    }
1611cb0ef41Sopenharmony_ci    DCHECK_LT(to, kMaxUInt16);
1621cb0ef41Sopenharmony_ci    range_array->set_uint16(i * 2 + 1, to + 1);  // Exclusive.
1631cb0ef41Sopenharmony_ci  }
1641cb0ef41Sopenharmony_ci  return range_array;
1651cb0ef41Sopenharmony_ci}
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci}  // namespace
1681cb0ef41Sopenharmony_ci
1691cb0ef41Sopenharmony_ciHandle<ByteArray> NativeRegExpMacroAssembler::GetOrAddRangeArray(
1701cb0ef41Sopenharmony_ci    const ZoneList<CharacterRange>* ranges) {
1711cb0ef41Sopenharmony_ci  const uint32_t hash = Hash(ranges);
1721cb0ef41Sopenharmony_ci
1731cb0ef41Sopenharmony_ci  if (range_array_cache_.count(hash) != 0) {
1741cb0ef41Sopenharmony_ci    Handle<ByteArray> range_array = range_array_cache_[hash];
1751cb0ef41Sopenharmony_ci    if (Equals(ranges, range_array)) return range_array;
1761cb0ef41Sopenharmony_ci  }
1771cb0ef41Sopenharmony_ci
1781cb0ef41Sopenharmony_ci  Handle<ByteArray> range_array = MakeRangeArray(isolate(), ranges);
1791cb0ef41Sopenharmony_ci  range_array_cache_[hash] = range_array;
1801cb0ef41Sopenharmony_ci  return range_array;
1811cb0ef41Sopenharmony_ci}
1821cb0ef41Sopenharmony_ci
1831cb0ef41Sopenharmony_ci// static
1841cb0ef41Sopenharmony_ciuint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
1851cb0ef41Sopenharmony_ci                                                       Address raw_byte_array,
1861cb0ef41Sopenharmony_ci                                                       Isolate* isolate) {
1871cb0ef41Sopenharmony_ci  // Use uint32_t to avoid complexity around bool return types (which may be
1881cb0ef41Sopenharmony_ci  // optimized to use only the least significant byte).
1891cb0ef41Sopenharmony_ci  static constexpr uint32_t kTrue = 1;
1901cb0ef41Sopenharmony_ci  static constexpr uint32_t kFalse = 0;
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ci  ByteArray ranges = ByteArray::cast(Object(raw_byte_array));
1931cb0ef41Sopenharmony_ci
1941cb0ef41Sopenharmony_ci  DCHECK_EQ(ranges.length() % kUInt16Size, 0);  // uc16 elements.
1951cb0ef41Sopenharmony_ci  const int length = ranges.length() / kUInt16Size;
1961cb0ef41Sopenharmony_ci  DCHECK_GE(length, 1);
1971cb0ef41Sopenharmony_ci
1981cb0ef41Sopenharmony_ci  // Shortcut for fully out of range chars.
1991cb0ef41Sopenharmony_ci  if (current_char < ranges.get_uint16(0)) return kFalse;
2001cb0ef41Sopenharmony_ci  if (current_char >= ranges.get_uint16(length - 1)) {
2011cb0ef41Sopenharmony_ci    // The last range may be open-ended.
2021cb0ef41Sopenharmony_ci    return (length % 2) == 0 ? kFalse : kTrue;
2031cb0ef41Sopenharmony_ci  }
2041cb0ef41Sopenharmony_ci
2051cb0ef41Sopenharmony_ci  // Binary search for the matching range. `ranges` is encoded as
2061cb0ef41Sopenharmony_ci  // [from0, to0, from1, to1, ..., fromN, toN], or
2071cb0ef41Sopenharmony_ci  // [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
2081cb0ef41Sopenharmony_ci
2091cb0ef41Sopenharmony_ci  int mid, lower = 0, upper = length;
2101cb0ef41Sopenharmony_ci  do {
2111cb0ef41Sopenharmony_ci    mid = lower + (upper - lower) / 2;
2121cb0ef41Sopenharmony_ci    const base::uc16 elem = ranges.get_uint16(mid);
2131cb0ef41Sopenharmony_ci    if (current_char < elem) {
2141cb0ef41Sopenharmony_ci      upper = mid;
2151cb0ef41Sopenharmony_ci    } else if (current_char > elem) {
2161cb0ef41Sopenharmony_ci      lower = mid + 1;
2171cb0ef41Sopenharmony_ci    } else {
2181cb0ef41Sopenharmony_ci      DCHECK_EQ(current_char, elem);
2191cb0ef41Sopenharmony_ci      break;
2201cb0ef41Sopenharmony_ci    }
2211cb0ef41Sopenharmony_ci  } while (lower < upper);
2221cb0ef41Sopenharmony_ci
2231cb0ef41Sopenharmony_ci  const bool current_char_ge_last_elem = current_char >= ranges.get_uint16(mid);
2241cb0ef41Sopenharmony_ci  const int current_range_start_index =
2251cb0ef41Sopenharmony_ci      current_char_ge_last_elem ? mid : mid - 1;
2261cb0ef41Sopenharmony_ci
2271cb0ef41Sopenharmony_ci  // Ranges start at even indices and end at odd indices.
2281cb0ef41Sopenharmony_ci  return (current_range_start_index % 2) == 0 ? kTrue : kFalse;
2291cb0ef41Sopenharmony_ci}
2301cb0ef41Sopenharmony_ci
2311cb0ef41Sopenharmony_civoid RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
2321cb0ef41Sopenharmony_ci                                                   Label* on_failure) {
2331cb0ef41Sopenharmony_ci  Label ok;
2341cb0ef41Sopenharmony_ci  // Check that current character is not a trail surrogate.
2351cb0ef41Sopenharmony_ci  LoadCurrentCharacter(cp_offset, &ok);
2361cb0ef41Sopenharmony_ci  CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok);
2371cb0ef41Sopenharmony_ci  // Check that previous character is not a lead surrogate.
2381cb0ef41Sopenharmony_ci  LoadCurrentCharacter(cp_offset - 1, &ok);
2391cb0ef41Sopenharmony_ci  CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure);
2401cb0ef41Sopenharmony_ci  Bind(&ok);
2411cb0ef41Sopenharmony_ci}
2421cb0ef41Sopenharmony_ci
2431cb0ef41Sopenharmony_civoid RegExpMacroAssembler::CheckPosition(int cp_offset,
2441cb0ef41Sopenharmony_ci                                         Label* on_outside_input) {
2451cb0ef41Sopenharmony_ci  LoadCurrentCharacter(cp_offset, on_outside_input, true);
2461cb0ef41Sopenharmony_ci}
2471cb0ef41Sopenharmony_ci
2481cb0ef41Sopenharmony_civoid RegExpMacroAssembler::LoadCurrentCharacter(int cp_offset,
2491cb0ef41Sopenharmony_ci                                                Label* on_end_of_input,
2501cb0ef41Sopenharmony_ci                                                bool check_bounds,
2511cb0ef41Sopenharmony_ci                                                int characters,
2521cb0ef41Sopenharmony_ci                                                int eats_at_least) {
2531cb0ef41Sopenharmony_ci  // By default, eats_at_least = characters.
2541cb0ef41Sopenharmony_ci  if (eats_at_least == kUseCharactersValue) {
2551cb0ef41Sopenharmony_ci    eats_at_least = characters;
2561cb0ef41Sopenharmony_ci  }
2571cb0ef41Sopenharmony_ci
2581cb0ef41Sopenharmony_ci  LoadCurrentCharacterImpl(cp_offset, on_end_of_input, check_bounds, characters,
2591cb0ef41Sopenharmony_ci                           eats_at_least);
2601cb0ef41Sopenharmony_ci}
2611cb0ef41Sopenharmony_ci
2621cb0ef41Sopenharmony_civoid NativeRegExpMacroAssembler::LoadCurrentCharacterImpl(
2631cb0ef41Sopenharmony_ci    int cp_offset, Label* on_end_of_input, bool check_bounds, int characters,
2641cb0ef41Sopenharmony_ci    int eats_at_least) {
2651cb0ef41Sopenharmony_ci  // It's possible to preload a small number of characters when each success
2661cb0ef41Sopenharmony_ci  // path requires a large number of characters, but not the reverse.
2671cb0ef41Sopenharmony_ci  DCHECK_GE(eats_at_least, characters);
2681cb0ef41Sopenharmony_ci
2691cb0ef41Sopenharmony_ci  DCHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset));
2701cb0ef41Sopenharmony_ci  if (check_bounds) {
2711cb0ef41Sopenharmony_ci    if (cp_offset >= 0) {
2721cb0ef41Sopenharmony_ci      CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
2731cb0ef41Sopenharmony_ci    } else {
2741cb0ef41Sopenharmony_ci      CheckPosition(cp_offset, on_end_of_input);
2751cb0ef41Sopenharmony_ci    }
2761cb0ef41Sopenharmony_ci  }
2771cb0ef41Sopenharmony_ci  LoadCurrentCharacterUnchecked(cp_offset, characters);
2781cb0ef41Sopenharmony_ci}
2791cb0ef41Sopenharmony_ci
2801cb0ef41Sopenharmony_cibool NativeRegExpMacroAssembler::CanReadUnaligned() const {
2811cb0ef41Sopenharmony_ci  return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
2821cb0ef41Sopenharmony_ci}
2831cb0ef41Sopenharmony_ci
2841cb0ef41Sopenharmony_ci#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
2851cb0ef41Sopenharmony_ci
2861cb0ef41Sopenharmony_ci// This method may only be called after an interrupt.
2871cb0ef41Sopenharmony_ci// static
2881cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::CheckStackGuardState(
2891cb0ef41Sopenharmony_ci    Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
2901cb0ef41Sopenharmony_ci    Address* return_address, Code re_code, Address* subject,
2911cb0ef41Sopenharmony_ci    const byte** input_start, const byte** input_end) {
2921cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
2931cb0ef41Sopenharmony_ci  Address old_pc = PointerAuthentication::AuthenticatePC(return_address, 0);
2941cb0ef41Sopenharmony_ci  DCHECK_LE(re_code.raw_instruction_start(), old_pc);
2951cb0ef41Sopenharmony_ci  DCHECK_LE(old_pc, re_code.raw_instruction_end());
2961cb0ef41Sopenharmony_ci
2971cb0ef41Sopenharmony_ci  StackLimitCheck check(isolate);
2981cb0ef41Sopenharmony_ci  bool js_has_overflowed = check.JsHasOverflowed();
2991cb0ef41Sopenharmony_ci
3001cb0ef41Sopenharmony_ci  if (call_origin == RegExp::CallOrigin::kFromJs) {
3011cb0ef41Sopenharmony_ci    // Direct calls from JavaScript can be interrupted in two ways:
3021cb0ef41Sopenharmony_ci    // 1. A real stack overflow, in which case we let the caller throw the
3031cb0ef41Sopenharmony_ci    //    exception.
3041cb0ef41Sopenharmony_ci    // 2. The stack guard was used to interrupt execution for another purpose,
3051cb0ef41Sopenharmony_ci    //    forcing the call through the runtime system.
3061cb0ef41Sopenharmony_ci
3071cb0ef41Sopenharmony_ci    // Bug(v8:9540) Investigate why this method is called from JS although no
3081cb0ef41Sopenharmony_ci    // stackoverflow or interrupt is pending on ARM64. We return 0 in this case
3091cb0ef41Sopenharmony_ci    // to continue execution normally.
3101cb0ef41Sopenharmony_ci    if (js_has_overflowed) {
3111cb0ef41Sopenharmony_ci      return EXCEPTION;
3121cb0ef41Sopenharmony_ci    } else if (check.InterruptRequested()) {
3131cb0ef41Sopenharmony_ci      return RETRY;
3141cb0ef41Sopenharmony_ci    } else {
3151cb0ef41Sopenharmony_ci      return 0;
3161cb0ef41Sopenharmony_ci    }
3171cb0ef41Sopenharmony_ci  }
3181cb0ef41Sopenharmony_ci  DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
3191cb0ef41Sopenharmony_ci
3201cb0ef41Sopenharmony_ci  // Prepare for possible GC.
3211cb0ef41Sopenharmony_ci  HandleScope handles(isolate);
3221cb0ef41Sopenharmony_ci  Handle<Code> code_handle(re_code, isolate);
3231cb0ef41Sopenharmony_ci  Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
3241cb0ef41Sopenharmony_ci  bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
3251cb0ef41Sopenharmony_ci  int return_value = 0;
3261cb0ef41Sopenharmony_ci
3271cb0ef41Sopenharmony_ci  {
3281cb0ef41Sopenharmony_ci    DisableGCMole no_gc_mole;
3291cb0ef41Sopenharmony_ci    if (js_has_overflowed) {
3301cb0ef41Sopenharmony_ci      AllowGarbageCollection yes_gc;
3311cb0ef41Sopenharmony_ci      isolate->StackOverflow();
3321cb0ef41Sopenharmony_ci      return_value = EXCEPTION;
3331cb0ef41Sopenharmony_ci    } else if (check.InterruptRequested()) {
3341cb0ef41Sopenharmony_ci      AllowGarbageCollection yes_gc;
3351cb0ef41Sopenharmony_ci      Object result = isolate->stack_guard()->HandleInterrupts();
3361cb0ef41Sopenharmony_ci      if (result.IsException(isolate)) return_value = EXCEPTION;
3371cb0ef41Sopenharmony_ci    }
3381cb0ef41Sopenharmony_ci
3391cb0ef41Sopenharmony_ci    if (*code_handle != re_code) {  // Return address no longer valid
3401cb0ef41Sopenharmony_ci      // Overwrite the return address on the stack.
3411cb0ef41Sopenharmony_ci      intptr_t delta = code_handle->address() - re_code.address();
3421cb0ef41Sopenharmony_ci      Address new_pc = old_pc + delta;
3431cb0ef41Sopenharmony_ci      // TODO(v8:10026): avoid replacing a signed pointer.
3441cb0ef41Sopenharmony_ci      PointerAuthentication::ReplacePC(return_address, new_pc, 0);
3451cb0ef41Sopenharmony_ci    }
3461cb0ef41Sopenharmony_ci  }
3471cb0ef41Sopenharmony_ci
3481cb0ef41Sopenharmony_ci  // If we continue, we need to update the subject string addresses.
3491cb0ef41Sopenharmony_ci  if (return_value == 0) {
3501cb0ef41Sopenharmony_ci    // String encoding might have changed.
3511cb0ef41Sopenharmony_ci    if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
3521cb0ef41Sopenharmony_ci        is_one_byte) {
3531cb0ef41Sopenharmony_ci      // If we changed between an LATIN1 and an UC16 string, the specialized
3541cb0ef41Sopenharmony_ci      // code cannot be used, and we need to restart regexp matching from
3551cb0ef41Sopenharmony_ci      // scratch (including, potentially, compiling a new version of the code).
3561cb0ef41Sopenharmony_ci      return_value = RETRY;
3571cb0ef41Sopenharmony_ci    } else {
3581cb0ef41Sopenharmony_ci      *subject = subject_handle->ptr();
3591cb0ef41Sopenharmony_ci      intptr_t byte_length = *input_end - *input_start;
3601cb0ef41Sopenharmony_ci      *input_start = subject_handle->AddressOfCharacterAt(start_index, no_gc);
3611cb0ef41Sopenharmony_ci      *input_end = *input_start + byte_length;
3621cb0ef41Sopenharmony_ci    }
3631cb0ef41Sopenharmony_ci  }
3641cb0ef41Sopenharmony_ci  return return_value;
3651cb0ef41Sopenharmony_ci}
3661cb0ef41Sopenharmony_ci
3671cb0ef41Sopenharmony_ci// Returns a {Result} sentinel, or the number of successful matches.
3681cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
3691cb0ef41Sopenharmony_ci                                      Handle<String> subject,
3701cb0ef41Sopenharmony_ci                                      int* offsets_vector,
3711cb0ef41Sopenharmony_ci                                      int offsets_vector_length,
3721cb0ef41Sopenharmony_ci                                      int previous_index, Isolate* isolate) {
3731cb0ef41Sopenharmony_ci  DCHECK(subject->IsFlat());
3741cb0ef41Sopenharmony_ci  DCHECK_LE(0, previous_index);
3751cb0ef41Sopenharmony_ci  DCHECK_LE(previous_index, subject->length());
3761cb0ef41Sopenharmony_ci
3771cb0ef41Sopenharmony_ci  // No allocations before calling the regexp, but we can't use
3781cb0ef41Sopenharmony_ci  // DisallowGarbageCollection, since regexps might be preempted, and another
3791cb0ef41Sopenharmony_ci  // thread might do allocation anyway.
3801cb0ef41Sopenharmony_ci
3811cb0ef41Sopenharmony_ci  String subject_ptr = *subject;
3821cb0ef41Sopenharmony_ci  // Character offsets into string.
3831cb0ef41Sopenharmony_ci  int start_offset = previous_index;
3841cb0ef41Sopenharmony_ci  int char_length = subject_ptr.length() - start_offset;
3851cb0ef41Sopenharmony_ci  int slice_offset = 0;
3861cb0ef41Sopenharmony_ci
3871cb0ef41Sopenharmony_ci  // The string has been flattened, so if it is a cons string it contains the
3881cb0ef41Sopenharmony_ci  // full string in the first part.
3891cb0ef41Sopenharmony_ci  if (StringShape(subject_ptr).IsCons()) {
3901cb0ef41Sopenharmony_ci    DCHECK_EQ(0, ConsString::cast(subject_ptr).second().length());
3911cb0ef41Sopenharmony_ci    subject_ptr = ConsString::cast(subject_ptr).first();
3921cb0ef41Sopenharmony_ci  } else if (StringShape(subject_ptr).IsSliced()) {
3931cb0ef41Sopenharmony_ci    SlicedString slice = SlicedString::cast(subject_ptr);
3941cb0ef41Sopenharmony_ci    subject_ptr = slice.parent();
3951cb0ef41Sopenharmony_ci    slice_offset = slice.offset();
3961cb0ef41Sopenharmony_ci  }
3971cb0ef41Sopenharmony_ci  if (StringShape(subject_ptr).IsThin()) {
3981cb0ef41Sopenharmony_ci    subject_ptr = ThinString::cast(subject_ptr).actual();
3991cb0ef41Sopenharmony_ci  }
4001cb0ef41Sopenharmony_ci  // Ensure that an underlying string has the same representation.
4011cb0ef41Sopenharmony_ci  bool is_one_byte = subject_ptr.IsOneByteRepresentation();
4021cb0ef41Sopenharmony_ci  DCHECK(subject_ptr.IsExternalString() || subject_ptr.IsSeqString());
4031cb0ef41Sopenharmony_ci  // String is now either Sequential or External
4041cb0ef41Sopenharmony_ci  int char_size_shift = is_one_byte ? 0 : 1;
4051cb0ef41Sopenharmony_ci
4061cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
4071cb0ef41Sopenharmony_ci  const byte* input_start =
4081cb0ef41Sopenharmony_ci      subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc);
4091cb0ef41Sopenharmony_ci  int byte_length = char_length << char_size_shift;
4101cb0ef41Sopenharmony_ci  const byte* input_end = input_start + byte_length;
4111cb0ef41Sopenharmony_ci  return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
4121cb0ef41Sopenharmony_ci                 offsets_vector_length, isolate, *regexp);
4131cb0ef41Sopenharmony_ci}
4141cb0ef41Sopenharmony_ci
4151cb0ef41Sopenharmony_ci// static
4161cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::ExecuteForTesting(
4171cb0ef41Sopenharmony_ci    String input, int start_offset, const byte* input_start,
4181cb0ef41Sopenharmony_ci    const byte* input_end, int* output, int output_size, Isolate* isolate,
4191cb0ef41Sopenharmony_ci    JSRegExp regexp) {
4201cb0ef41Sopenharmony_ci  return Execute(input, start_offset, input_start, input_end, output,
4211cb0ef41Sopenharmony_ci                 output_size, isolate, regexp);
4221cb0ef41Sopenharmony_ci}
4231cb0ef41Sopenharmony_ci
4241cb0ef41Sopenharmony_ci// Returns a {Result} sentinel, or the number of successful matches.
4251cb0ef41Sopenharmony_ci// TODO(pthier): The JSRegExp object is passed to native irregexp code to match
4261cb0ef41Sopenharmony_ci// the signature of the interpreter. We should get rid of JS objects passed to
4271cb0ef41Sopenharmony_ci// internal methods.
4281cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::Execute(
4291cb0ef41Sopenharmony_ci    String input,  // This needs to be the unpacked (sliced, cons) string.
4301cb0ef41Sopenharmony_ci    int start_offset, const byte* input_start, const byte* input_end,
4311cb0ef41Sopenharmony_ci    int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
4321cb0ef41Sopenharmony_ci  RegExpStackScope stack_scope(isolate);
4331cb0ef41Sopenharmony_ci
4341cb0ef41Sopenharmony_ci  bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
4351cb0ef41Sopenharmony_ci  Code code = FromCodeT(CodeT::cast(regexp.code(is_one_byte)));
4361cb0ef41Sopenharmony_ci  RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
4371cb0ef41Sopenharmony_ci
4381cb0ef41Sopenharmony_ci  using RegexpMatcherSig =
4391cb0ef41Sopenharmony_ci      // NOLINTNEXTLINE(readability/casting)
4401cb0ef41Sopenharmony_ci      int(Address input_string, int start_offset, const byte* input_start,
4411cb0ef41Sopenharmony_ci          const byte* input_end, int* output, int output_size, int call_origin,
4421cb0ef41Sopenharmony_ci          Isolate* isolate, Address regexp);
4431cb0ef41Sopenharmony_ci
4441cb0ef41Sopenharmony_ci  auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
4451cb0ef41Sopenharmony_ci  int result = fn.Call(input.ptr(), start_offset, input_start, input_end,
4461cb0ef41Sopenharmony_ci                       output, output_size, call_origin, isolate, regexp.ptr());
4471cb0ef41Sopenharmony_ci  DCHECK_GE(result, SMALLEST_REGEXP_RESULT);
4481cb0ef41Sopenharmony_ci
4491cb0ef41Sopenharmony_ci  if (result == EXCEPTION && !isolate->has_pending_exception()) {
4501cb0ef41Sopenharmony_ci    // We detected a stack overflow (on the backtrack stack) in RegExp code,
4511cb0ef41Sopenharmony_ci    // but haven't created the exception yet. Additionally, we allow heap
4521cb0ef41Sopenharmony_ci    // allocation because even though it invalidates {input_start} and
4531cb0ef41Sopenharmony_ci    // {input_end}, we are about to return anyway.
4541cb0ef41Sopenharmony_ci    AllowGarbageCollection allow_allocation;
4551cb0ef41Sopenharmony_ci    isolate->StackOverflow();
4561cb0ef41Sopenharmony_ci  }
4571cb0ef41Sopenharmony_ci  return result;
4581cb0ef41Sopenharmony_ci}
4591cb0ef41Sopenharmony_ci
4601cb0ef41Sopenharmony_ci#endif  // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
4611cb0ef41Sopenharmony_ci
4621cb0ef41Sopenharmony_ci// clang-format off
4631cb0ef41Sopenharmony_ciconst byte NativeRegExpMacroAssembler::word_character_map[] = {
4641cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4651cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4661cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4671cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4681cb0ef41Sopenharmony_ci
4691cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4701cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4711cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // '0' - '7'
4721cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
4731cb0ef41Sopenharmony_ci
4741cb0ef41Sopenharmony_ci    0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'A' - 'G'
4751cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'H' - 'O'
4761cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'P' - 'W'
4771cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu,  // 'X' - 'Z', '_'
4781cb0ef41Sopenharmony_ci
4791cb0ef41Sopenharmony_ci    0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'a' - 'g'
4801cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'h' - 'o'
4811cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,  // 'p' - 'w'
4821cb0ef41Sopenharmony_ci    0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
4831cb0ef41Sopenharmony_ci    // Latin-1 range
4841cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4851cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4861cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4871cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4881cb0ef41Sopenharmony_ci
4891cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4901cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4911cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4921cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4931cb0ef41Sopenharmony_ci
4941cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4951cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4961cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4971cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
4981cb0ef41Sopenharmony_ci
4991cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
5001cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
5011cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
5021cb0ef41Sopenharmony_ci    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
5031cb0ef41Sopenharmony_ci};
5041cb0ef41Sopenharmony_ci// clang-format on
5051cb0ef41Sopenharmony_ci
5061cb0ef41Sopenharmony_ci// static
5071cb0ef41Sopenharmony_ciAddress NativeRegExpMacroAssembler::GrowStack(Isolate* isolate) {
5081cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
5091cb0ef41Sopenharmony_ci
5101cb0ef41Sopenharmony_ci  RegExpStack* regexp_stack = isolate->regexp_stack();
5111cb0ef41Sopenharmony_ci  const size_t old_size = regexp_stack->memory_size();
5121cb0ef41Sopenharmony_ci
5131cb0ef41Sopenharmony_ci#ifdef DEBUG
5141cb0ef41Sopenharmony_ci  const Address old_stack_top = regexp_stack->memory_top();
5151cb0ef41Sopenharmony_ci  const Address old_stack_pointer = regexp_stack->stack_pointer();
5161cb0ef41Sopenharmony_ci  CHECK_LE(old_stack_pointer, old_stack_top);
5171cb0ef41Sopenharmony_ci  CHECK_LE(static_cast<size_t>(old_stack_top - old_stack_pointer), old_size);
5181cb0ef41Sopenharmony_ci#endif  // DEBUG
5191cb0ef41Sopenharmony_ci
5201cb0ef41Sopenharmony_ci  Address new_stack_base = regexp_stack->EnsureCapacity(old_size * 2);
5211cb0ef41Sopenharmony_ci  if (new_stack_base == kNullAddress) return kNullAddress;
5221cb0ef41Sopenharmony_ci
5231cb0ef41Sopenharmony_ci  return regexp_stack->stack_pointer();
5241cb0ef41Sopenharmony_ci}
5251cb0ef41Sopenharmony_ci
5261cb0ef41Sopenharmony_ci}  // namespace internal
5271cb0ef41Sopenharmony_ci}  // namespace v8
528