11cb0ef41Sopenharmony_ci// Copyright 2016 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-utils.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/execution/isolate.h" 81cb0ef41Sopenharmony_ci#include "src/execution/protectors-inl.h" 91cb0ef41Sopenharmony_ci#include "src/heap/factory.h" 101cb0ef41Sopenharmony_ci#include "src/objects/js-regexp-inl.h" 111cb0ef41Sopenharmony_ci#include "src/objects/objects-inl.h" 121cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h" 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_cinamespace v8 { 151cb0ef41Sopenharmony_cinamespace internal { 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ciHandle<String> RegExpUtils::GenericCaptureGetter( 181cb0ef41Sopenharmony_ci Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture, 191cb0ef41Sopenharmony_ci bool* ok) { 201cb0ef41Sopenharmony_ci const int index = capture * 2; 211cb0ef41Sopenharmony_ci if (index >= match_info->NumberOfCaptureRegisters()) { 221cb0ef41Sopenharmony_ci if (ok != nullptr) *ok = false; 231cb0ef41Sopenharmony_ci return isolate->factory()->empty_string(); 241cb0ef41Sopenharmony_ci } 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ci const int match_start = match_info->Capture(index); 271cb0ef41Sopenharmony_ci const int match_end = match_info->Capture(index + 1); 281cb0ef41Sopenharmony_ci if (match_start == -1 || match_end == -1) { 291cb0ef41Sopenharmony_ci if (ok != nullptr) *ok = false; 301cb0ef41Sopenharmony_ci return isolate->factory()->empty_string(); 311cb0ef41Sopenharmony_ci } 321cb0ef41Sopenharmony_ci 331cb0ef41Sopenharmony_ci if (ok != nullptr) *ok = true; 341cb0ef41Sopenharmony_ci Handle<String> last_subject(match_info->LastSubject(), isolate); 351cb0ef41Sopenharmony_ci return isolate->factory()->NewSubString(last_subject, match_start, match_end); 361cb0ef41Sopenharmony_ci} 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_cinamespace { 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ciV8_INLINE bool HasInitialRegExpMap(Isolate* isolate, JSReceiver recv) { 411cb0ef41Sopenharmony_ci return recv.map() == isolate->regexp_function()->initial_map(); 421cb0ef41Sopenharmony_ci} 431cb0ef41Sopenharmony_ci 441cb0ef41Sopenharmony_ci} // namespace 451cb0ef41Sopenharmony_ci 461cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate, 471cb0ef41Sopenharmony_ci Handle<JSReceiver> recv, 481cb0ef41Sopenharmony_ci uint64_t value) { 491cb0ef41Sopenharmony_ci Handle<Object> value_as_object = 501cb0ef41Sopenharmony_ci isolate->factory()->NewNumberFromInt64(value); 511cb0ef41Sopenharmony_ci if (HasInitialRegExpMap(isolate, *recv)) { 521cb0ef41Sopenharmony_ci JSRegExp::cast(*recv).set_last_index(*value_as_object, 531cb0ef41Sopenharmony_ci UPDATE_WRITE_BARRIER); 541cb0ef41Sopenharmony_ci return recv; 551cb0ef41Sopenharmony_ci } else { 561cb0ef41Sopenharmony_ci return Object::SetProperty( 571cb0ef41Sopenharmony_ci isolate, recv, isolate->factory()->lastIndex_string(), value_as_object, 581cb0ef41Sopenharmony_ci StoreOrigin::kMaybeKeyed, Just(kThrowOnError)); 591cb0ef41Sopenharmony_ci } 601cb0ef41Sopenharmony_ci} 611cb0ef41Sopenharmony_ci 621cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate, 631cb0ef41Sopenharmony_ci Handle<JSReceiver> recv) { 641cb0ef41Sopenharmony_ci if (HasInitialRegExpMap(isolate, *recv)) { 651cb0ef41Sopenharmony_ci return handle(JSRegExp::cast(*recv).last_index(), isolate); 661cb0ef41Sopenharmony_ci } else { 671cb0ef41Sopenharmony_ci return Object::GetProperty(isolate, recv, 681cb0ef41Sopenharmony_ci isolate->factory()->lastIndex_string()); 691cb0ef41Sopenharmony_ci } 701cb0ef41Sopenharmony_ci} 711cb0ef41Sopenharmony_ci 721cb0ef41Sopenharmony_ci// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) 731cb0ef41Sopenharmony_ci// Also takes an optional exec method in case our caller 741cb0ef41Sopenharmony_ci// has already fetched exec. 751cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate, 761cb0ef41Sopenharmony_ci Handle<JSReceiver> regexp, 771cb0ef41Sopenharmony_ci Handle<String> string, 781cb0ef41Sopenharmony_ci Handle<Object> exec) { 791cb0ef41Sopenharmony_ci if (exec->IsUndefined(isolate)) { 801cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION( 811cb0ef41Sopenharmony_ci isolate, exec, 821cb0ef41Sopenharmony_ci Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()), 831cb0ef41Sopenharmony_ci Object); 841cb0ef41Sopenharmony_ci } 851cb0ef41Sopenharmony_ci 861cb0ef41Sopenharmony_ci if (exec->IsCallable()) { 871cb0ef41Sopenharmony_ci const int argc = 1; 881cb0ef41Sopenharmony_ci base::ScopedVector<Handle<Object>> argv(argc); 891cb0ef41Sopenharmony_ci argv[0] = string; 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_ci Handle<Object> result; 921cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION( 931cb0ef41Sopenharmony_ci isolate, result, 941cb0ef41Sopenharmony_ci Execution::Call(isolate, exec, regexp, argc, argv.begin()), Object); 951cb0ef41Sopenharmony_ci 961cb0ef41Sopenharmony_ci if (!result->IsJSReceiver() && !result->IsNull(isolate)) { 971cb0ef41Sopenharmony_ci THROW_NEW_ERROR(isolate, 981cb0ef41Sopenharmony_ci NewTypeError(MessageTemplate::kInvalidRegExpExecResult), 991cb0ef41Sopenharmony_ci Object); 1001cb0ef41Sopenharmony_ci } 1011cb0ef41Sopenharmony_ci return result; 1021cb0ef41Sopenharmony_ci } 1031cb0ef41Sopenharmony_ci 1041cb0ef41Sopenharmony_ci if (!regexp->IsJSRegExp()) { 1051cb0ef41Sopenharmony_ci THROW_NEW_ERROR(isolate, 1061cb0ef41Sopenharmony_ci NewTypeError(MessageTemplate::kIncompatibleMethodReceiver, 1071cb0ef41Sopenharmony_ci isolate->factory()->NewStringFromAsciiChecked( 1081cb0ef41Sopenharmony_ci "RegExp.prototype.exec"), 1091cb0ef41Sopenharmony_ci regexp), 1101cb0ef41Sopenharmony_ci Object); 1111cb0ef41Sopenharmony_ci } 1121cb0ef41Sopenharmony_ci 1131cb0ef41Sopenharmony_ci { 1141cb0ef41Sopenharmony_ci Handle<JSFunction> regexp_exec = isolate->regexp_exec_function(); 1151cb0ef41Sopenharmony_ci 1161cb0ef41Sopenharmony_ci const int argc = 1; 1171cb0ef41Sopenharmony_ci base::ScopedVector<Handle<Object>> argv(argc); 1181cb0ef41Sopenharmony_ci argv[0] = string; 1191cb0ef41Sopenharmony_ci 1201cb0ef41Sopenharmony_ci return Execution::Call(isolate, regexp_exec, regexp, argc, argv.begin()); 1211cb0ef41Sopenharmony_ci } 1221cb0ef41Sopenharmony_ci} 1231cb0ef41Sopenharmony_ci 1241cb0ef41Sopenharmony_cibool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) { 1251cb0ef41Sopenharmony_ci#ifdef V8_ENABLE_FORCE_SLOW_PATH 1261cb0ef41Sopenharmony_ci if (isolate->force_slow_path()) return false; 1271cb0ef41Sopenharmony_ci#endif 1281cb0ef41Sopenharmony_ci 1291cb0ef41Sopenharmony_ci if (!obj->IsJSReceiver()) return false; 1301cb0ef41Sopenharmony_ci 1311cb0ef41Sopenharmony_ci JSReceiver recv = JSReceiver::cast(*obj); 1321cb0ef41Sopenharmony_ci 1331cb0ef41Sopenharmony_ci if (!HasInitialRegExpMap(isolate, recv)) return false; 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_ci // Check the receiver's prototype's map. 1361cb0ef41Sopenharmony_ci Object proto = recv.map().prototype(); 1371cb0ef41Sopenharmony_ci if (!proto.IsJSReceiver()) return false; 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_ci Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map(); 1401cb0ef41Sopenharmony_ci Map proto_map = JSReceiver::cast(proto).map(); 1411cb0ef41Sopenharmony_ci if (proto_map != *initial_proto_initial_map) { 1421cb0ef41Sopenharmony_ci return false; 1431cb0ef41Sopenharmony_ci } 1441cb0ef41Sopenharmony_ci 1451cb0ef41Sopenharmony_ci // Check that the "exec" method is unmodified. 1461cb0ef41Sopenharmony_ci // Check that the index refers to "exec" method (this has to be consistent 1471cb0ef41Sopenharmony_ci // with the init order in the bootstrapper). 1481cb0ef41Sopenharmony_ci InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex); 1491cb0ef41Sopenharmony_ci DCHECK_EQ(*(isolate->factory()->exec_string()), 1501cb0ef41Sopenharmony_ci proto_map.instance_descriptors(isolate).GetKey(kExecIndex)); 1511cb0ef41Sopenharmony_ci if (proto_map.instance_descriptors(isolate) 1521cb0ef41Sopenharmony_ci .GetDetails(kExecIndex) 1531cb0ef41Sopenharmony_ci .constness() != PropertyConstness::kConst) { 1541cb0ef41Sopenharmony_ci return false; 1551cb0ef41Sopenharmony_ci } 1561cb0ef41Sopenharmony_ci 1571cb0ef41Sopenharmony_ci // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this 1581cb0ef41Sopenharmony_ci // does not go on to check the actual value of the exec property. This would 1591cb0ef41Sopenharmony_ci // not be valid since this method is called from places that access the flags 1601cb0ef41Sopenharmony_ci // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this 1611cb0ef41Sopenharmony_ci // case. 1621cb0ef41Sopenharmony_ci 1631cb0ef41Sopenharmony_ci if (!Protectors::IsRegExpSpeciesLookupChainIntact(isolate)) return false; 1641cb0ef41Sopenharmony_ci 1651cb0ef41Sopenharmony_ci // The smi check is required to omit ToLength(lastIndex) calls with possible 1661cb0ef41Sopenharmony_ci // user-code execution on the fast path. 1671cb0ef41Sopenharmony_ci Object last_index = JSRegExp::cast(recv).last_index(); 1681cb0ef41Sopenharmony_ci return last_index.IsSmi() && Smi::ToInt(last_index) >= 0; 1691cb0ef41Sopenharmony_ci} 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ciuint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index, 1721cb0ef41Sopenharmony_ci bool unicode) { 1731cb0ef41Sopenharmony_ci DCHECK_LE(static_cast<double>(index), kMaxSafeInteger); 1741cb0ef41Sopenharmony_ci const uint64_t string_length = static_cast<uint64_t>(string->length()); 1751cb0ef41Sopenharmony_ci if (unicode && index < string_length) { 1761cb0ef41Sopenharmony_ci const uint16_t first = string->Get(static_cast<uint32_t>(index)); 1771cb0ef41Sopenharmony_ci if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) { 1781cb0ef41Sopenharmony_ci DCHECK_LT(index, std::numeric_limits<uint64_t>::max()); 1791cb0ef41Sopenharmony_ci const uint16_t second = string->Get(static_cast<uint32_t>(index + 1)); 1801cb0ef41Sopenharmony_ci if (second >= 0xDC00 && second <= 0xDFFF) { 1811cb0ef41Sopenharmony_ci return index + 2; 1821cb0ef41Sopenharmony_ci } 1831cb0ef41Sopenharmony_ci } 1841cb0ef41Sopenharmony_ci } 1851cb0ef41Sopenharmony_ci 1861cb0ef41Sopenharmony_ci return index + 1; 1871cb0ef41Sopenharmony_ci} 1881cb0ef41Sopenharmony_ci 1891cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex( 1901cb0ef41Sopenharmony_ci Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string, 1911cb0ef41Sopenharmony_ci bool unicode) { 1921cb0ef41Sopenharmony_ci Handle<Object> last_index_obj; 1931cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION( 1941cb0ef41Sopenharmony_ci isolate, last_index_obj, 1951cb0ef41Sopenharmony_ci Object::GetProperty(isolate, regexp, 1961cb0ef41Sopenharmony_ci isolate->factory()->lastIndex_string()), 1971cb0ef41Sopenharmony_ci Object); 1981cb0ef41Sopenharmony_ci 1991cb0ef41Sopenharmony_ci ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, 2001cb0ef41Sopenharmony_ci Object::ToLength(isolate, last_index_obj), Object); 2011cb0ef41Sopenharmony_ci const uint64_t last_index = PositiveNumberToUint64(*last_index_obj); 2021cb0ef41Sopenharmony_ci const uint64_t new_last_index = 2031cb0ef41Sopenharmony_ci AdvanceStringIndex(string, last_index, unicode); 2041cb0ef41Sopenharmony_ci 2051cb0ef41Sopenharmony_ci return SetLastIndex(isolate, regexp, new_last_index); 2061cb0ef41Sopenharmony_ci} 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ci} // namespace internal 2091cb0ef41Sopenharmony_ci} // namespace v8 210