11cb0ef41Sopenharmony_ci// Copyright 2012 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/codegen/assembler.h" 81cb0ef41Sopenharmony_ci#include "src/codegen/label.h" 91cb0ef41Sopenharmony_ci#include "src/execution/isolate-inl.h" 101cb0ef41Sopenharmony_ci#include "src/execution/pointer-authentication.h" 111cb0ef41Sopenharmony_ci#include "src/execution/simulator.h" 121cb0ef41Sopenharmony_ci#include "src/regexp/regexp-stack.h" 131cb0ef41Sopenharmony_ci#include "src/regexp/special-case.h" 141cb0ef41Sopenharmony_ci#include "src/strings/unicode-inl.h" 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 171cb0ef41Sopenharmony_ci#include "unicode/uchar.h" 181cb0ef41Sopenharmony_ci#include "unicode/unistr.h" 191cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_cinamespace v8 { 221cb0ef41Sopenharmony_cinamespace internal { 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ciRegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) 251cb0ef41Sopenharmony_ci : slow_safe_compiler_(false), 261cb0ef41Sopenharmony_ci backtrack_limit_(JSRegExp::kNoBacktrackLimit), 271cb0ef41Sopenharmony_ci global_mode_(NOT_GLOBAL), 281cb0ef41Sopenharmony_ci isolate_(isolate), 291cb0ef41Sopenharmony_ci zone_(zone) {} 301cb0ef41Sopenharmony_ci 311cb0ef41Sopenharmony_cibool RegExpMacroAssembler::has_backtrack_limit() const { 321cb0ef41Sopenharmony_ci return backtrack_limit_ != JSRegExp::kNoBacktrackLimit; 331cb0ef41Sopenharmony_ci} 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_ci// static 361cb0ef41Sopenharmony_ciint RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(Address byte_offset1, 371cb0ef41Sopenharmony_ci Address byte_offset2, 381cb0ef41Sopenharmony_ci size_t byte_length, 391cb0ef41Sopenharmony_ci Isolate* isolate) { 401cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 411cb0ef41Sopenharmony_ci // This function is not allowed to cause a garbage collection. 421cb0ef41Sopenharmony_ci // A GC might move the calling generated code and invalidate the 431cb0ef41Sopenharmony_ci // return address on the stack. 441cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 451cb0ef41Sopenharmony_ci DCHECK_EQ(0, byte_length % 2); 461cb0ef41Sopenharmony_ci size_t length = byte_length / 2; 471cb0ef41Sopenharmony_ci base::uc16* substring1 = reinterpret_cast<base::uc16*>(byte_offset1); 481cb0ef41Sopenharmony_ci base::uc16* substring2 = reinterpret_cast<base::uc16*>(byte_offset2); 491cb0ef41Sopenharmony_ci 501cb0ef41Sopenharmony_ci for (size_t i = 0; i < length; i++) { 511cb0ef41Sopenharmony_ci UChar32 c1 = RegExpCaseFolding::Canonicalize(substring1[i]); 521cb0ef41Sopenharmony_ci UChar32 c2 = RegExpCaseFolding::Canonicalize(substring2[i]); 531cb0ef41Sopenharmony_ci if (c1 != c2) { 541cb0ef41Sopenharmony_ci return 0; 551cb0ef41Sopenharmony_ci } 561cb0ef41Sopenharmony_ci } 571cb0ef41Sopenharmony_ci return 1; 581cb0ef41Sopenharmony_ci#else 591cb0ef41Sopenharmony_ci return CaseInsensitiveCompareUnicode(byte_offset1, byte_offset2, byte_length, 601cb0ef41Sopenharmony_ci isolate); 611cb0ef41Sopenharmony_ci#endif 621cb0ef41Sopenharmony_ci} 631cb0ef41Sopenharmony_ci 641cb0ef41Sopenharmony_ci// static 651cb0ef41Sopenharmony_ciint RegExpMacroAssembler::CaseInsensitiveCompareUnicode(Address byte_offset1, 661cb0ef41Sopenharmony_ci Address byte_offset2, 671cb0ef41Sopenharmony_ci size_t byte_length, 681cb0ef41Sopenharmony_ci Isolate* isolate) { 691cb0ef41Sopenharmony_ci // This function is not allowed to cause a garbage collection. 701cb0ef41Sopenharmony_ci // A GC might move the calling generated code and invalidate the 711cb0ef41Sopenharmony_ci // return address on the stack. 721cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 731cb0ef41Sopenharmony_ci DCHECK_EQ(0, byte_length % 2); 741cb0ef41Sopenharmony_ci 751cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 761cb0ef41Sopenharmony_ci int32_t length = static_cast<int32_t>(byte_length >> 1); 771cb0ef41Sopenharmony_ci icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1), 781cb0ef41Sopenharmony_ci length); 791cb0ef41Sopenharmony_ci return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2), 801cb0ef41Sopenharmony_ci length, U_FOLD_CASE_DEFAULT) == 0; 811cb0ef41Sopenharmony_ci#else 821cb0ef41Sopenharmony_ci base::uc16* substring1 = reinterpret_cast<base::uc16*>(byte_offset1); 831cb0ef41Sopenharmony_ci base::uc16* substring2 = reinterpret_cast<base::uc16*>(byte_offset2); 841cb0ef41Sopenharmony_ci size_t length = byte_length >> 1; 851cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(isolate); 861cb0ef41Sopenharmony_ci unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = 871cb0ef41Sopenharmony_ci isolate->regexp_macro_assembler_canonicalize(); 881cb0ef41Sopenharmony_ci for (size_t i = 0; i < length; i++) { 891cb0ef41Sopenharmony_ci unibrow::uchar c1 = substring1[i]; 901cb0ef41Sopenharmony_ci unibrow::uchar c2 = substring2[i]; 911cb0ef41Sopenharmony_ci if (c1 != c2) { 921cb0ef41Sopenharmony_ci unibrow::uchar s1[1] = {c1}; 931cb0ef41Sopenharmony_ci canonicalize->get(c1, '\0', s1); 941cb0ef41Sopenharmony_ci if (s1[0] != c2) { 951cb0ef41Sopenharmony_ci unibrow::uchar s2[1] = {c2}; 961cb0ef41Sopenharmony_ci canonicalize->get(c2, '\0', s2); 971cb0ef41Sopenharmony_ci if (s1[0] != s2[0]) { 981cb0ef41Sopenharmony_ci return 0; 991cb0ef41Sopenharmony_ci } 1001cb0ef41Sopenharmony_ci } 1011cb0ef41Sopenharmony_ci } 1021cb0ef41Sopenharmony_ci } 1031cb0ef41Sopenharmony_ci return 1; 1041cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 1051cb0ef41Sopenharmony_ci} 1061cb0ef41Sopenharmony_ci 1071cb0ef41Sopenharmony_cinamespace { 1081cb0ef41Sopenharmony_ci 1091cb0ef41Sopenharmony_ciuint32_t Hash(const ZoneList<CharacterRange>* ranges) { 1101cb0ef41Sopenharmony_ci size_t seed = 0; 1111cb0ef41Sopenharmony_ci for (int i = 0; i < ranges->length(); i++) { 1121cb0ef41Sopenharmony_ci const CharacterRange& r = ranges->at(i); 1131cb0ef41Sopenharmony_ci seed = base::hash_combine(seed, r.from(), r.to()); 1141cb0ef41Sopenharmony_ci } 1151cb0ef41Sopenharmony_ci return static_cast<uint32_t>(seed); 1161cb0ef41Sopenharmony_ci} 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ciconstexpr base::uc32 MaskEndOfRangeMarker(base::uc32 c) { 1191cb0ef41Sopenharmony_ci // CharacterRanges may use 0x10ffff as the end-of-range marker irrespective 1201cb0ef41Sopenharmony_ci // of whether the regexp IsUnicode or not; translate the marker value here. 1211cb0ef41Sopenharmony_ci DCHECK_IMPLIES(c > kMaxUInt16, c == String::kMaxCodePoint); 1221cb0ef41Sopenharmony_ci return c & 0xffff; 1231cb0ef41Sopenharmony_ci} 1241cb0ef41Sopenharmony_ci 1251cb0ef41Sopenharmony_ciint RangeArrayLengthFor(const ZoneList<CharacterRange>* ranges) { 1261cb0ef41Sopenharmony_ci const int ranges_length = ranges->length(); 1271cb0ef41Sopenharmony_ci return MaskEndOfRangeMarker(ranges->at(ranges_length - 1).to()) == kMaxUInt16 1281cb0ef41Sopenharmony_ci ? ranges_length * 2 - 1 1291cb0ef41Sopenharmony_ci : ranges_length * 2; 1301cb0ef41Sopenharmony_ci} 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_cibool Equals(const ZoneList<CharacterRange>* lhs, const Handle<ByteArray>& rhs) { 1331cb0ef41Sopenharmony_ci DCHECK_EQ(rhs->length() % kUInt16Size, 0); // uc16 elements. 1341cb0ef41Sopenharmony_ci const int rhs_length = rhs->length() / kUInt16Size; 1351cb0ef41Sopenharmony_ci if (rhs_length != RangeArrayLengthFor(lhs)) return false; 1361cb0ef41Sopenharmony_ci for (int i = 0; i < lhs->length(); i++) { 1371cb0ef41Sopenharmony_ci const CharacterRange& r = lhs->at(i); 1381cb0ef41Sopenharmony_ci if (rhs->get_uint16(i * 2 + 0) != r.from()) return false; 1391cb0ef41Sopenharmony_ci if (i * 2 + 1 == rhs_length) break; 1401cb0ef41Sopenharmony_ci if (rhs->get_uint16(i * 2 + 1) != r.to() + 1) return false; 1411cb0ef41Sopenharmony_ci } 1421cb0ef41Sopenharmony_ci return true; 1431cb0ef41Sopenharmony_ci} 1441cb0ef41Sopenharmony_ci 1451cb0ef41Sopenharmony_ciHandle<ByteArray> MakeRangeArray(Isolate* isolate, 1461cb0ef41Sopenharmony_ci const ZoneList<CharacterRange>* ranges) { 1471cb0ef41Sopenharmony_ci const int ranges_length = ranges->length(); 1481cb0ef41Sopenharmony_ci const int byte_array_length = RangeArrayLengthFor(ranges); 1491cb0ef41Sopenharmony_ci const int size_in_bytes = byte_array_length * kUInt16Size; 1501cb0ef41Sopenharmony_ci Handle<ByteArray> range_array = 1511cb0ef41Sopenharmony_ci isolate->factory()->NewByteArray(size_in_bytes); 1521cb0ef41Sopenharmony_ci for (int i = 0; i < ranges_length; i++) { 1531cb0ef41Sopenharmony_ci const CharacterRange& r = ranges->at(i); 1541cb0ef41Sopenharmony_ci DCHECK_LE(r.from(), kMaxUInt16); 1551cb0ef41Sopenharmony_ci range_array->set_uint16(i * 2 + 0, r.from()); 1561cb0ef41Sopenharmony_ci const base::uc32 to = MaskEndOfRangeMarker(r.to()); 1571cb0ef41Sopenharmony_ci if (i == ranges_length - 1 && to == kMaxUInt16) { 1581cb0ef41Sopenharmony_ci DCHECK_EQ(byte_array_length, ranges_length * 2 - 1); 1591cb0ef41Sopenharmony_ci break; // Avoid overflow by leaving the last range open-ended. 1601cb0ef41Sopenharmony_ci } 1611cb0ef41Sopenharmony_ci DCHECK_LT(to, kMaxUInt16); 1621cb0ef41Sopenharmony_ci range_array->set_uint16(i * 2 + 1, to + 1); // Exclusive. 1631cb0ef41Sopenharmony_ci } 1641cb0ef41Sopenharmony_ci return range_array; 1651cb0ef41Sopenharmony_ci} 1661cb0ef41Sopenharmony_ci 1671cb0ef41Sopenharmony_ci} // namespace 1681cb0ef41Sopenharmony_ci 1691cb0ef41Sopenharmony_ciHandle<ByteArray> NativeRegExpMacroAssembler::GetOrAddRangeArray( 1701cb0ef41Sopenharmony_ci const ZoneList<CharacterRange>* ranges) { 1711cb0ef41Sopenharmony_ci const uint32_t hash = Hash(ranges); 1721cb0ef41Sopenharmony_ci 1731cb0ef41Sopenharmony_ci if (range_array_cache_.count(hash) != 0) { 1741cb0ef41Sopenharmony_ci Handle<ByteArray> range_array = range_array_cache_[hash]; 1751cb0ef41Sopenharmony_ci if (Equals(ranges, range_array)) return range_array; 1761cb0ef41Sopenharmony_ci } 1771cb0ef41Sopenharmony_ci 1781cb0ef41Sopenharmony_ci Handle<ByteArray> range_array = MakeRangeArray(isolate(), ranges); 1791cb0ef41Sopenharmony_ci range_array_cache_[hash] = range_array; 1801cb0ef41Sopenharmony_ci return range_array; 1811cb0ef41Sopenharmony_ci} 1821cb0ef41Sopenharmony_ci 1831cb0ef41Sopenharmony_ci// static 1841cb0ef41Sopenharmony_ciuint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char, 1851cb0ef41Sopenharmony_ci Address raw_byte_array, 1861cb0ef41Sopenharmony_ci Isolate* isolate) { 1871cb0ef41Sopenharmony_ci // Use uint32_t to avoid complexity around bool return types (which may be 1881cb0ef41Sopenharmony_ci // optimized to use only the least significant byte). 1891cb0ef41Sopenharmony_ci static constexpr uint32_t kTrue = 1; 1901cb0ef41Sopenharmony_ci static constexpr uint32_t kFalse = 0; 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ci ByteArray ranges = ByteArray::cast(Object(raw_byte_array)); 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_ci DCHECK_EQ(ranges.length() % kUInt16Size, 0); // uc16 elements. 1951cb0ef41Sopenharmony_ci const int length = ranges.length() / kUInt16Size; 1961cb0ef41Sopenharmony_ci DCHECK_GE(length, 1); 1971cb0ef41Sopenharmony_ci 1981cb0ef41Sopenharmony_ci // Shortcut for fully out of range chars. 1991cb0ef41Sopenharmony_ci if (current_char < ranges.get_uint16(0)) return kFalse; 2001cb0ef41Sopenharmony_ci if (current_char >= ranges.get_uint16(length - 1)) { 2011cb0ef41Sopenharmony_ci // The last range may be open-ended. 2021cb0ef41Sopenharmony_ci return (length % 2) == 0 ? kFalse : kTrue; 2031cb0ef41Sopenharmony_ci } 2041cb0ef41Sopenharmony_ci 2051cb0ef41Sopenharmony_ci // Binary search for the matching range. `ranges` is encoded as 2061cb0ef41Sopenharmony_ci // [from0, to0, from1, to1, ..., fromN, toN], or 2071cb0ef41Sopenharmony_ci // [from0, to0, from1, to1, ..., fromN] (open-ended last interval). 2081cb0ef41Sopenharmony_ci 2091cb0ef41Sopenharmony_ci int mid, lower = 0, upper = length; 2101cb0ef41Sopenharmony_ci do { 2111cb0ef41Sopenharmony_ci mid = lower + (upper - lower) / 2; 2121cb0ef41Sopenharmony_ci const base::uc16 elem = ranges.get_uint16(mid); 2131cb0ef41Sopenharmony_ci if (current_char < elem) { 2141cb0ef41Sopenharmony_ci upper = mid; 2151cb0ef41Sopenharmony_ci } else if (current_char > elem) { 2161cb0ef41Sopenharmony_ci lower = mid + 1; 2171cb0ef41Sopenharmony_ci } else { 2181cb0ef41Sopenharmony_ci DCHECK_EQ(current_char, elem); 2191cb0ef41Sopenharmony_ci break; 2201cb0ef41Sopenharmony_ci } 2211cb0ef41Sopenharmony_ci } while (lower < upper); 2221cb0ef41Sopenharmony_ci 2231cb0ef41Sopenharmony_ci const bool current_char_ge_last_elem = current_char >= ranges.get_uint16(mid); 2241cb0ef41Sopenharmony_ci const int current_range_start_index = 2251cb0ef41Sopenharmony_ci current_char_ge_last_elem ? mid : mid - 1; 2261cb0ef41Sopenharmony_ci 2271cb0ef41Sopenharmony_ci // Ranges start at even indices and end at odd indices. 2281cb0ef41Sopenharmony_ci return (current_range_start_index % 2) == 0 ? kTrue : kFalse; 2291cb0ef41Sopenharmony_ci} 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_civoid RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset, 2321cb0ef41Sopenharmony_ci Label* on_failure) { 2331cb0ef41Sopenharmony_ci Label ok; 2341cb0ef41Sopenharmony_ci // Check that current character is not a trail surrogate. 2351cb0ef41Sopenharmony_ci LoadCurrentCharacter(cp_offset, &ok); 2361cb0ef41Sopenharmony_ci CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok); 2371cb0ef41Sopenharmony_ci // Check that previous character is not a lead surrogate. 2381cb0ef41Sopenharmony_ci LoadCurrentCharacter(cp_offset - 1, &ok); 2391cb0ef41Sopenharmony_ci CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure); 2401cb0ef41Sopenharmony_ci Bind(&ok); 2411cb0ef41Sopenharmony_ci} 2421cb0ef41Sopenharmony_ci 2431cb0ef41Sopenharmony_civoid RegExpMacroAssembler::CheckPosition(int cp_offset, 2441cb0ef41Sopenharmony_ci Label* on_outside_input) { 2451cb0ef41Sopenharmony_ci LoadCurrentCharacter(cp_offset, on_outside_input, true); 2461cb0ef41Sopenharmony_ci} 2471cb0ef41Sopenharmony_ci 2481cb0ef41Sopenharmony_civoid RegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, 2491cb0ef41Sopenharmony_ci Label* on_end_of_input, 2501cb0ef41Sopenharmony_ci bool check_bounds, 2511cb0ef41Sopenharmony_ci int characters, 2521cb0ef41Sopenharmony_ci int eats_at_least) { 2531cb0ef41Sopenharmony_ci // By default, eats_at_least = characters. 2541cb0ef41Sopenharmony_ci if (eats_at_least == kUseCharactersValue) { 2551cb0ef41Sopenharmony_ci eats_at_least = characters; 2561cb0ef41Sopenharmony_ci } 2571cb0ef41Sopenharmony_ci 2581cb0ef41Sopenharmony_ci LoadCurrentCharacterImpl(cp_offset, on_end_of_input, check_bounds, characters, 2591cb0ef41Sopenharmony_ci eats_at_least); 2601cb0ef41Sopenharmony_ci} 2611cb0ef41Sopenharmony_ci 2621cb0ef41Sopenharmony_civoid NativeRegExpMacroAssembler::LoadCurrentCharacterImpl( 2631cb0ef41Sopenharmony_ci int cp_offset, Label* on_end_of_input, bool check_bounds, int characters, 2641cb0ef41Sopenharmony_ci int eats_at_least) { 2651cb0ef41Sopenharmony_ci // It's possible to preload a small number of characters when each success 2661cb0ef41Sopenharmony_ci // path requires a large number of characters, but not the reverse. 2671cb0ef41Sopenharmony_ci DCHECK_GE(eats_at_least, characters); 2681cb0ef41Sopenharmony_ci 2691cb0ef41Sopenharmony_ci DCHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset)); 2701cb0ef41Sopenharmony_ci if (check_bounds) { 2711cb0ef41Sopenharmony_ci if (cp_offset >= 0) { 2721cb0ef41Sopenharmony_ci CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); 2731cb0ef41Sopenharmony_ci } else { 2741cb0ef41Sopenharmony_ci CheckPosition(cp_offset, on_end_of_input); 2751cb0ef41Sopenharmony_ci } 2761cb0ef41Sopenharmony_ci } 2771cb0ef41Sopenharmony_ci LoadCurrentCharacterUnchecked(cp_offset, characters); 2781cb0ef41Sopenharmony_ci} 2791cb0ef41Sopenharmony_ci 2801cb0ef41Sopenharmony_cibool NativeRegExpMacroAssembler::CanReadUnaligned() const { 2811cb0ef41Sopenharmony_ci return FLAG_enable_regexp_unaligned_accesses && !slow_safe(); 2821cb0ef41Sopenharmony_ci} 2831cb0ef41Sopenharmony_ci 2841cb0ef41Sopenharmony_ci#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER 2851cb0ef41Sopenharmony_ci 2861cb0ef41Sopenharmony_ci// This method may only be called after an interrupt. 2871cb0ef41Sopenharmony_ci// static 2881cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::CheckStackGuardState( 2891cb0ef41Sopenharmony_ci Isolate* isolate, int start_index, RegExp::CallOrigin call_origin, 2901cb0ef41Sopenharmony_ci Address* return_address, Code re_code, Address* subject, 2911cb0ef41Sopenharmony_ci const byte** input_start, const byte** input_end) { 2921cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 2931cb0ef41Sopenharmony_ci Address old_pc = PointerAuthentication::AuthenticatePC(return_address, 0); 2941cb0ef41Sopenharmony_ci DCHECK_LE(re_code.raw_instruction_start(), old_pc); 2951cb0ef41Sopenharmony_ci DCHECK_LE(old_pc, re_code.raw_instruction_end()); 2961cb0ef41Sopenharmony_ci 2971cb0ef41Sopenharmony_ci StackLimitCheck check(isolate); 2981cb0ef41Sopenharmony_ci bool js_has_overflowed = check.JsHasOverflowed(); 2991cb0ef41Sopenharmony_ci 3001cb0ef41Sopenharmony_ci if (call_origin == RegExp::CallOrigin::kFromJs) { 3011cb0ef41Sopenharmony_ci // Direct calls from JavaScript can be interrupted in two ways: 3021cb0ef41Sopenharmony_ci // 1. A real stack overflow, in which case we let the caller throw the 3031cb0ef41Sopenharmony_ci // exception. 3041cb0ef41Sopenharmony_ci // 2. The stack guard was used to interrupt execution for another purpose, 3051cb0ef41Sopenharmony_ci // forcing the call through the runtime system. 3061cb0ef41Sopenharmony_ci 3071cb0ef41Sopenharmony_ci // Bug(v8:9540) Investigate why this method is called from JS although no 3081cb0ef41Sopenharmony_ci // stackoverflow or interrupt is pending on ARM64. We return 0 in this case 3091cb0ef41Sopenharmony_ci // to continue execution normally. 3101cb0ef41Sopenharmony_ci if (js_has_overflowed) { 3111cb0ef41Sopenharmony_ci return EXCEPTION; 3121cb0ef41Sopenharmony_ci } else if (check.InterruptRequested()) { 3131cb0ef41Sopenharmony_ci return RETRY; 3141cb0ef41Sopenharmony_ci } else { 3151cb0ef41Sopenharmony_ci return 0; 3161cb0ef41Sopenharmony_ci } 3171cb0ef41Sopenharmony_ci } 3181cb0ef41Sopenharmony_ci DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime); 3191cb0ef41Sopenharmony_ci 3201cb0ef41Sopenharmony_ci // Prepare for possible GC. 3211cb0ef41Sopenharmony_ci HandleScope handles(isolate); 3221cb0ef41Sopenharmony_ci Handle<Code> code_handle(re_code, isolate); 3231cb0ef41Sopenharmony_ci Handle<String> subject_handle(String::cast(Object(*subject)), isolate); 3241cb0ef41Sopenharmony_ci bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle); 3251cb0ef41Sopenharmony_ci int return_value = 0; 3261cb0ef41Sopenharmony_ci 3271cb0ef41Sopenharmony_ci { 3281cb0ef41Sopenharmony_ci DisableGCMole no_gc_mole; 3291cb0ef41Sopenharmony_ci if (js_has_overflowed) { 3301cb0ef41Sopenharmony_ci AllowGarbageCollection yes_gc; 3311cb0ef41Sopenharmony_ci isolate->StackOverflow(); 3321cb0ef41Sopenharmony_ci return_value = EXCEPTION; 3331cb0ef41Sopenharmony_ci } else if (check.InterruptRequested()) { 3341cb0ef41Sopenharmony_ci AllowGarbageCollection yes_gc; 3351cb0ef41Sopenharmony_ci Object result = isolate->stack_guard()->HandleInterrupts(); 3361cb0ef41Sopenharmony_ci if (result.IsException(isolate)) return_value = EXCEPTION; 3371cb0ef41Sopenharmony_ci } 3381cb0ef41Sopenharmony_ci 3391cb0ef41Sopenharmony_ci if (*code_handle != re_code) { // Return address no longer valid 3401cb0ef41Sopenharmony_ci // Overwrite the return address on the stack. 3411cb0ef41Sopenharmony_ci intptr_t delta = code_handle->address() - re_code.address(); 3421cb0ef41Sopenharmony_ci Address new_pc = old_pc + delta; 3431cb0ef41Sopenharmony_ci // TODO(v8:10026): avoid replacing a signed pointer. 3441cb0ef41Sopenharmony_ci PointerAuthentication::ReplacePC(return_address, new_pc, 0); 3451cb0ef41Sopenharmony_ci } 3461cb0ef41Sopenharmony_ci } 3471cb0ef41Sopenharmony_ci 3481cb0ef41Sopenharmony_ci // If we continue, we need to update the subject string addresses. 3491cb0ef41Sopenharmony_ci if (return_value == 0) { 3501cb0ef41Sopenharmony_ci // String encoding might have changed. 3511cb0ef41Sopenharmony_ci if (String::IsOneByteRepresentationUnderneath(*subject_handle) != 3521cb0ef41Sopenharmony_ci is_one_byte) { 3531cb0ef41Sopenharmony_ci // If we changed between an LATIN1 and an UC16 string, the specialized 3541cb0ef41Sopenharmony_ci // code cannot be used, and we need to restart regexp matching from 3551cb0ef41Sopenharmony_ci // scratch (including, potentially, compiling a new version of the code). 3561cb0ef41Sopenharmony_ci return_value = RETRY; 3571cb0ef41Sopenharmony_ci } else { 3581cb0ef41Sopenharmony_ci *subject = subject_handle->ptr(); 3591cb0ef41Sopenharmony_ci intptr_t byte_length = *input_end - *input_start; 3601cb0ef41Sopenharmony_ci *input_start = subject_handle->AddressOfCharacterAt(start_index, no_gc); 3611cb0ef41Sopenharmony_ci *input_end = *input_start + byte_length; 3621cb0ef41Sopenharmony_ci } 3631cb0ef41Sopenharmony_ci } 3641cb0ef41Sopenharmony_ci return return_value; 3651cb0ef41Sopenharmony_ci} 3661cb0ef41Sopenharmony_ci 3671cb0ef41Sopenharmony_ci// Returns a {Result} sentinel, or the number of successful matches. 3681cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp, 3691cb0ef41Sopenharmony_ci Handle<String> subject, 3701cb0ef41Sopenharmony_ci int* offsets_vector, 3711cb0ef41Sopenharmony_ci int offsets_vector_length, 3721cb0ef41Sopenharmony_ci int previous_index, Isolate* isolate) { 3731cb0ef41Sopenharmony_ci DCHECK(subject->IsFlat()); 3741cb0ef41Sopenharmony_ci DCHECK_LE(0, previous_index); 3751cb0ef41Sopenharmony_ci DCHECK_LE(previous_index, subject->length()); 3761cb0ef41Sopenharmony_ci 3771cb0ef41Sopenharmony_ci // No allocations before calling the regexp, but we can't use 3781cb0ef41Sopenharmony_ci // DisallowGarbageCollection, since regexps might be preempted, and another 3791cb0ef41Sopenharmony_ci // thread might do allocation anyway. 3801cb0ef41Sopenharmony_ci 3811cb0ef41Sopenharmony_ci String subject_ptr = *subject; 3821cb0ef41Sopenharmony_ci // Character offsets into string. 3831cb0ef41Sopenharmony_ci int start_offset = previous_index; 3841cb0ef41Sopenharmony_ci int char_length = subject_ptr.length() - start_offset; 3851cb0ef41Sopenharmony_ci int slice_offset = 0; 3861cb0ef41Sopenharmony_ci 3871cb0ef41Sopenharmony_ci // The string has been flattened, so if it is a cons string it contains the 3881cb0ef41Sopenharmony_ci // full string in the first part. 3891cb0ef41Sopenharmony_ci if (StringShape(subject_ptr).IsCons()) { 3901cb0ef41Sopenharmony_ci DCHECK_EQ(0, ConsString::cast(subject_ptr).second().length()); 3911cb0ef41Sopenharmony_ci subject_ptr = ConsString::cast(subject_ptr).first(); 3921cb0ef41Sopenharmony_ci } else if (StringShape(subject_ptr).IsSliced()) { 3931cb0ef41Sopenharmony_ci SlicedString slice = SlicedString::cast(subject_ptr); 3941cb0ef41Sopenharmony_ci subject_ptr = slice.parent(); 3951cb0ef41Sopenharmony_ci slice_offset = slice.offset(); 3961cb0ef41Sopenharmony_ci } 3971cb0ef41Sopenharmony_ci if (StringShape(subject_ptr).IsThin()) { 3981cb0ef41Sopenharmony_ci subject_ptr = ThinString::cast(subject_ptr).actual(); 3991cb0ef41Sopenharmony_ci } 4001cb0ef41Sopenharmony_ci // Ensure that an underlying string has the same representation. 4011cb0ef41Sopenharmony_ci bool is_one_byte = subject_ptr.IsOneByteRepresentation(); 4021cb0ef41Sopenharmony_ci DCHECK(subject_ptr.IsExternalString() || subject_ptr.IsSeqString()); 4031cb0ef41Sopenharmony_ci // String is now either Sequential or External 4041cb0ef41Sopenharmony_ci int char_size_shift = is_one_byte ? 0 : 1; 4051cb0ef41Sopenharmony_ci 4061cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 4071cb0ef41Sopenharmony_ci const byte* input_start = 4081cb0ef41Sopenharmony_ci subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc); 4091cb0ef41Sopenharmony_ci int byte_length = char_length << char_size_shift; 4101cb0ef41Sopenharmony_ci const byte* input_end = input_start + byte_length; 4111cb0ef41Sopenharmony_ci return Execute(*subject, start_offset, input_start, input_end, offsets_vector, 4121cb0ef41Sopenharmony_ci offsets_vector_length, isolate, *regexp); 4131cb0ef41Sopenharmony_ci} 4141cb0ef41Sopenharmony_ci 4151cb0ef41Sopenharmony_ci// static 4161cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::ExecuteForTesting( 4171cb0ef41Sopenharmony_ci String input, int start_offset, const byte* input_start, 4181cb0ef41Sopenharmony_ci const byte* input_end, int* output, int output_size, Isolate* isolate, 4191cb0ef41Sopenharmony_ci JSRegExp regexp) { 4201cb0ef41Sopenharmony_ci return Execute(input, start_offset, input_start, input_end, output, 4211cb0ef41Sopenharmony_ci output_size, isolate, regexp); 4221cb0ef41Sopenharmony_ci} 4231cb0ef41Sopenharmony_ci 4241cb0ef41Sopenharmony_ci// Returns a {Result} sentinel, or the number of successful matches. 4251cb0ef41Sopenharmony_ci// TODO(pthier): The JSRegExp object is passed to native irregexp code to match 4261cb0ef41Sopenharmony_ci// the signature of the interpreter. We should get rid of JS objects passed to 4271cb0ef41Sopenharmony_ci// internal methods. 4281cb0ef41Sopenharmony_ciint NativeRegExpMacroAssembler::Execute( 4291cb0ef41Sopenharmony_ci String input, // This needs to be the unpacked (sliced, cons) string. 4301cb0ef41Sopenharmony_ci int start_offset, const byte* input_start, const byte* input_end, 4311cb0ef41Sopenharmony_ci int* output, int output_size, Isolate* isolate, JSRegExp regexp) { 4321cb0ef41Sopenharmony_ci RegExpStackScope stack_scope(isolate); 4331cb0ef41Sopenharmony_ci 4341cb0ef41Sopenharmony_ci bool is_one_byte = String::IsOneByteRepresentationUnderneath(input); 4351cb0ef41Sopenharmony_ci Code code = FromCodeT(CodeT::cast(regexp.code(is_one_byte))); 4361cb0ef41Sopenharmony_ci RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime; 4371cb0ef41Sopenharmony_ci 4381cb0ef41Sopenharmony_ci using RegexpMatcherSig = 4391cb0ef41Sopenharmony_ci // NOLINTNEXTLINE(readability/casting) 4401cb0ef41Sopenharmony_ci int(Address input_string, int start_offset, const byte* input_start, 4411cb0ef41Sopenharmony_ci const byte* input_end, int* output, int output_size, int call_origin, 4421cb0ef41Sopenharmony_ci Isolate* isolate, Address regexp); 4431cb0ef41Sopenharmony_ci 4441cb0ef41Sopenharmony_ci auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code); 4451cb0ef41Sopenharmony_ci int result = fn.Call(input.ptr(), start_offset, input_start, input_end, 4461cb0ef41Sopenharmony_ci output, output_size, call_origin, isolate, regexp.ptr()); 4471cb0ef41Sopenharmony_ci DCHECK_GE(result, SMALLEST_REGEXP_RESULT); 4481cb0ef41Sopenharmony_ci 4491cb0ef41Sopenharmony_ci if (result == EXCEPTION && !isolate->has_pending_exception()) { 4501cb0ef41Sopenharmony_ci // We detected a stack overflow (on the backtrack stack) in RegExp code, 4511cb0ef41Sopenharmony_ci // but haven't created the exception yet. Additionally, we allow heap 4521cb0ef41Sopenharmony_ci // allocation because even though it invalidates {input_start} and 4531cb0ef41Sopenharmony_ci // {input_end}, we are about to return anyway. 4541cb0ef41Sopenharmony_ci AllowGarbageCollection allow_allocation; 4551cb0ef41Sopenharmony_ci isolate->StackOverflow(); 4561cb0ef41Sopenharmony_ci } 4571cb0ef41Sopenharmony_ci return result; 4581cb0ef41Sopenharmony_ci} 4591cb0ef41Sopenharmony_ci 4601cb0ef41Sopenharmony_ci#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER 4611cb0ef41Sopenharmony_ci 4621cb0ef41Sopenharmony_ci// clang-format off 4631cb0ef41Sopenharmony_ciconst byte NativeRegExpMacroAssembler::word_character_map[] = { 4641cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4651cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4661cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4671cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4681cb0ef41Sopenharmony_ci 4691cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4701cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4711cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // '0' - '7' 4721cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' 4731cb0ef41Sopenharmony_ci 4741cb0ef41Sopenharmony_ci 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'A' - 'G' 4751cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'H' - 'O' 4761cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'P' - 'W' 4771cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu, // 'X' - 'Z', '_' 4781cb0ef41Sopenharmony_ci 4791cb0ef41Sopenharmony_ci 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'a' - 'g' 4801cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'h' - 'o' 4811cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'p' - 'w' 4821cb0ef41Sopenharmony_ci 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' 4831cb0ef41Sopenharmony_ci // Latin-1 range 4841cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4851cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4861cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4871cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4881cb0ef41Sopenharmony_ci 4891cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4901cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4911cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4921cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4931cb0ef41Sopenharmony_ci 4941cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4951cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4961cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4971cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 4981cb0ef41Sopenharmony_ci 4991cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 5001cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 5011cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 5021cb0ef41Sopenharmony_ci 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 5031cb0ef41Sopenharmony_ci}; 5041cb0ef41Sopenharmony_ci// clang-format on 5051cb0ef41Sopenharmony_ci 5061cb0ef41Sopenharmony_ci// static 5071cb0ef41Sopenharmony_ciAddress NativeRegExpMacroAssembler::GrowStack(Isolate* isolate) { 5081cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 5091cb0ef41Sopenharmony_ci 5101cb0ef41Sopenharmony_ci RegExpStack* regexp_stack = isolate->regexp_stack(); 5111cb0ef41Sopenharmony_ci const size_t old_size = regexp_stack->memory_size(); 5121cb0ef41Sopenharmony_ci 5131cb0ef41Sopenharmony_ci#ifdef DEBUG 5141cb0ef41Sopenharmony_ci const Address old_stack_top = regexp_stack->memory_top(); 5151cb0ef41Sopenharmony_ci const Address old_stack_pointer = regexp_stack->stack_pointer(); 5161cb0ef41Sopenharmony_ci CHECK_LE(old_stack_pointer, old_stack_top); 5171cb0ef41Sopenharmony_ci CHECK_LE(static_cast<size_t>(old_stack_top - old_stack_pointer), old_size); 5181cb0ef41Sopenharmony_ci#endif // DEBUG 5191cb0ef41Sopenharmony_ci 5201cb0ef41Sopenharmony_ci Address new_stack_base = regexp_stack->EnsureCapacity(old_size * 2); 5211cb0ef41Sopenharmony_ci if (new_stack_base == kNullAddress) return kNullAddress; 5221cb0ef41Sopenharmony_ci 5231cb0ef41Sopenharmony_ci return regexp_stack->stack_pointer(); 5241cb0ef41Sopenharmony_ci} 5251cb0ef41Sopenharmony_ci 5261cb0ef41Sopenharmony_ci} // namespace internal 5271cb0ef41Sopenharmony_ci} // namespace v8 528