11cb0ef41Sopenharmony_ci// Copyright 2016 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-utils.h"
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "src/execution/isolate.h"
81cb0ef41Sopenharmony_ci#include "src/execution/protectors-inl.h"
91cb0ef41Sopenharmony_ci#include "src/heap/factory.h"
101cb0ef41Sopenharmony_ci#include "src/objects/js-regexp-inl.h"
111cb0ef41Sopenharmony_ci#include "src/objects/objects-inl.h"
121cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h"
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_cinamespace v8 {
151cb0ef41Sopenharmony_cinamespace internal {
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ciHandle<String> RegExpUtils::GenericCaptureGetter(
181cb0ef41Sopenharmony_ci    Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
191cb0ef41Sopenharmony_ci    bool* ok) {
201cb0ef41Sopenharmony_ci  const int index = capture * 2;
211cb0ef41Sopenharmony_ci  if (index >= match_info->NumberOfCaptureRegisters()) {
221cb0ef41Sopenharmony_ci    if (ok != nullptr) *ok = false;
231cb0ef41Sopenharmony_ci    return isolate->factory()->empty_string();
241cb0ef41Sopenharmony_ci  }
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ci  const int match_start = match_info->Capture(index);
271cb0ef41Sopenharmony_ci  const int match_end = match_info->Capture(index + 1);
281cb0ef41Sopenharmony_ci  if (match_start == -1 || match_end == -1) {
291cb0ef41Sopenharmony_ci    if (ok != nullptr) *ok = false;
301cb0ef41Sopenharmony_ci    return isolate->factory()->empty_string();
311cb0ef41Sopenharmony_ci  }
321cb0ef41Sopenharmony_ci
331cb0ef41Sopenharmony_ci  if (ok != nullptr) *ok = true;
341cb0ef41Sopenharmony_ci  Handle<String> last_subject(match_info->LastSubject(), isolate);
351cb0ef41Sopenharmony_ci  return isolate->factory()->NewSubString(last_subject, match_start, match_end);
361cb0ef41Sopenharmony_ci}
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_cinamespace {
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ciV8_INLINE bool HasInitialRegExpMap(Isolate* isolate, JSReceiver recv) {
411cb0ef41Sopenharmony_ci  return recv.map() == isolate->regexp_function()->initial_map();
421cb0ef41Sopenharmony_ci}
431cb0ef41Sopenharmony_ci
441cb0ef41Sopenharmony_ci}  // namespace
451cb0ef41Sopenharmony_ci
461cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
471cb0ef41Sopenharmony_ci                                              Handle<JSReceiver> recv,
481cb0ef41Sopenharmony_ci                                              uint64_t value) {
491cb0ef41Sopenharmony_ci  Handle<Object> value_as_object =
501cb0ef41Sopenharmony_ci      isolate->factory()->NewNumberFromInt64(value);
511cb0ef41Sopenharmony_ci  if (HasInitialRegExpMap(isolate, *recv)) {
521cb0ef41Sopenharmony_ci    JSRegExp::cast(*recv).set_last_index(*value_as_object,
531cb0ef41Sopenharmony_ci                                         UPDATE_WRITE_BARRIER);
541cb0ef41Sopenharmony_ci    return recv;
551cb0ef41Sopenharmony_ci  } else {
561cb0ef41Sopenharmony_ci    return Object::SetProperty(
571cb0ef41Sopenharmony_ci        isolate, recv, isolate->factory()->lastIndex_string(), value_as_object,
581cb0ef41Sopenharmony_ci        StoreOrigin::kMaybeKeyed, Just(kThrowOnError));
591cb0ef41Sopenharmony_ci  }
601cb0ef41Sopenharmony_ci}
611cb0ef41Sopenharmony_ci
621cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
631cb0ef41Sopenharmony_ci                                              Handle<JSReceiver> recv) {
641cb0ef41Sopenharmony_ci  if (HasInitialRegExpMap(isolate, *recv)) {
651cb0ef41Sopenharmony_ci    return handle(JSRegExp::cast(*recv).last_index(), isolate);
661cb0ef41Sopenharmony_ci  } else {
671cb0ef41Sopenharmony_ci    return Object::GetProperty(isolate, recv,
681cb0ef41Sopenharmony_ci                               isolate->factory()->lastIndex_string());
691cb0ef41Sopenharmony_ci  }
701cb0ef41Sopenharmony_ci}
711cb0ef41Sopenharmony_ci
721cb0ef41Sopenharmony_ci// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
731cb0ef41Sopenharmony_ci// Also takes an optional exec method in case our caller
741cb0ef41Sopenharmony_ci// has already fetched exec.
751cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
761cb0ef41Sopenharmony_ci                                            Handle<JSReceiver> regexp,
771cb0ef41Sopenharmony_ci                                            Handle<String> string,
781cb0ef41Sopenharmony_ci                                            Handle<Object> exec) {
791cb0ef41Sopenharmony_ci  if (exec->IsUndefined(isolate)) {
801cb0ef41Sopenharmony_ci    ASSIGN_RETURN_ON_EXCEPTION(
811cb0ef41Sopenharmony_ci        isolate, exec,
821cb0ef41Sopenharmony_ci        Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
831cb0ef41Sopenharmony_ci        Object);
841cb0ef41Sopenharmony_ci  }
851cb0ef41Sopenharmony_ci
861cb0ef41Sopenharmony_ci  if (exec->IsCallable()) {
871cb0ef41Sopenharmony_ci    const int argc = 1;
881cb0ef41Sopenharmony_ci    base::ScopedVector<Handle<Object>> argv(argc);
891cb0ef41Sopenharmony_ci    argv[0] = string;
901cb0ef41Sopenharmony_ci
911cb0ef41Sopenharmony_ci    Handle<Object> result;
921cb0ef41Sopenharmony_ci    ASSIGN_RETURN_ON_EXCEPTION(
931cb0ef41Sopenharmony_ci        isolate, result,
941cb0ef41Sopenharmony_ci        Execution::Call(isolate, exec, regexp, argc, argv.begin()), Object);
951cb0ef41Sopenharmony_ci
961cb0ef41Sopenharmony_ci    if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
971cb0ef41Sopenharmony_ci      THROW_NEW_ERROR(isolate,
981cb0ef41Sopenharmony_ci                      NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
991cb0ef41Sopenharmony_ci                      Object);
1001cb0ef41Sopenharmony_ci    }
1011cb0ef41Sopenharmony_ci    return result;
1021cb0ef41Sopenharmony_ci  }
1031cb0ef41Sopenharmony_ci
1041cb0ef41Sopenharmony_ci  if (!regexp->IsJSRegExp()) {
1051cb0ef41Sopenharmony_ci    THROW_NEW_ERROR(isolate,
1061cb0ef41Sopenharmony_ci                    NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
1071cb0ef41Sopenharmony_ci                                 isolate->factory()->NewStringFromAsciiChecked(
1081cb0ef41Sopenharmony_ci                                     "RegExp.prototype.exec"),
1091cb0ef41Sopenharmony_ci                                 regexp),
1101cb0ef41Sopenharmony_ci                    Object);
1111cb0ef41Sopenharmony_ci  }
1121cb0ef41Sopenharmony_ci
1131cb0ef41Sopenharmony_ci  {
1141cb0ef41Sopenharmony_ci    Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
1151cb0ef41Sopenharmony_ci
1161cb0ef41Sopenharmony_ci    const int argc = 1;
1171cb0ef41Sopenharmony_ci    base::ScopedVector<Handle<Object>> argv(argc);
1181cb0ef41Sopenharmony_ci    argv[0] = string;
1191cb0ef41Sopenharmony_ci
1201cb0ef41Sopenharmony_ci    return Execution::Call(isolate, regexp_exec, regexp, argc, argv.begin());
1211cb0ef41Sopenharmony_ci  }
1221cb0ef41Sopenharmony_ci}
1231cb0ef41Sopenharmony_ci
1241cb0ef41Sopenharmony_cibool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
1251cb0ef41Sopenharmony_ci#ifdef V8_ENABLE_FORCE_SLOW_PATH
1261cb0ef41Sopenharmony_ci  if (isolate->force_slow_path()) return false;
1271cb0ef41Sopenharmony_ci#endif
1281cb0ef41Sopenharmony_ci
1291cb0ef41Sopenharmony_ci  if (!obj->IsJSReceiver()) return false;
1301cb0ef41Sopenharmony_ci
1311cb0ef41Sopenharmony_ci  JSReceiver recv = JSReceiver::cast(*obj);
1321cb0ef41Sopenharmony_ci
1331cb0ef41Sopenharmony_ci  if (!HasInitialRegExpMap(isolate, recv)) return false;
1341cb0ef41Sopenharmony_ci
1351cb0ef41Sopenharmony_ci  // Check the receiver's prototype's map.
1361cb0ef41Sopenharmony_ci  Object proto = recv.map().prototype();
1371cb0ef41Sopenharmony_ci  if (!proto.IsJSReceiver()) return false;
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ci  Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
1401cb0ef41Sopenharmony_ci  Map proto_map = JSReceiver::cast(proto).map();
1411cb0ef41Sopenharmony_ci  if (proto_map != *initial_proto_initial_map) {
1421cb0ef41Sopenharmony_ci    return false;
1431cb0ef41Sopenharmony_ci  }
1441cb0ef41Sopenharmony_ci
1451cb0ef41Sopenharmony_ci  // Check that the "exec" method is unmodified.
1461cb0ef41Sopenharmony_ci  // Check that the index refers to "exec" method (this has to be consistent
1471cb0ef41Sopenharmony_ci  // with the init order in the bootstrapper).
1481cb0ef41Sopenharmony_ci  InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex);
1491cb0ef41Sopenharmony_ci  DCHECK_EQ(*(isolate->factory()->exec_string()),
1501cb0ef41Sopenharmony_ci            proto_map.instance_descriptors(isolate).GetKey(kExecIndex));
1511cb0ef41Sopenharmony_ci  if (proto_map.instance_descriptors(isolate)
1521cb0ef41Sopenharmony_ci          .GetDetails(kExecIndex)
1531cb0ef41Sopenharmony_ci          .constness() != PropertyConstness::kConst) {
1541cb0ef41Sopenharmony_ci    return false;
1551cb0ef41Sopenharmony_ci  }
1561cb0ef41Sopenharmony_ci
1571cb0ef41Sopenharmony_ci  // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this
1581cb0ef41Sopenharmony_ci  // does not go on to check the actual value of the exec property. This would
1591cb0ef41Sopenharmony_ci  // not be valid since this method is called from places that access the flags
1601cb0ef41Sopenharmony_ci  // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this
1611cb0ef41Sopenharmony_ci  // case.
1621cb0ef41Sopenharmony_ci
1631cb0ef41Sopenharmony_ci  if (!Protectors::IsRegExpSpeciesLookupChainIntact(isolate)) return false;
1641cb0ef41Sopenharmony_ci
1651cb0ef41Sopenharmony_ci  // The smi check is required to omit ToLength(lastIndex) calls with possible
1661cb0ef41Sopenharmony_ci  // user-code execution on the fast path.
1671cb0ef41Sopenharmony_ci  Object last_index = JSRegExp::cast(recv).last_index();
1681cb0ef41Sopenharmony_ci  return last_index.IsSmi() && Smi::ToInt(last_index) >= 0;
1691cb0ef41Sopenharmony_ci}
1701cb0ef41Sopenharmony_ci
1711cb0ef41Sopenharmony_ciuint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
1721cb0ef41Sopenharmony_ci                                         bool unicode) {
1731cb0ef41Sopenharmony_ci  DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
1741cb0ef41Sopenharmony_ci  const uint64_t string_length = static_cast<uint64_t>(string->length());
1751cb0ef41Sopenharmony_ci  if (unicode && index < string_length) {
1761cb0ef41Sopenharmony_ci    const uint16_t first = string->Get(static_cast<uint32_t>(index));
1771cb0ef41Sopenharmony_ci    if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
1781cb0ef41Sopenharmony_ci      DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
1791cb0ef41Sopenharmony_ci      const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
1801cb0ef41Sopenharmony_ci      if (second >= 0xDC00 && second <= 0xDFFF) {
1811cb0ef41Sopenharmony_ci        return index + 2;
1821cb0ef41Sopenharmony_ci      }
1831cb0ef41Sopenharmony_ci    }
1841cb0ef41Sopenharmony_ci  }
1851cb0ef41Sopenharmony_ci
1861cb0ef41Sopenharmony_ci  return index + 1;
1871cb0ef41Sopenharmony_ci}
1881cb0ef41Sopenharmony_ci
1891cb0ef41Sopenharmony_ciMaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
1901cb0ef41Sopenharmony_ci    Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
1911cb0ef41Sopenharmony_ci    bool unicode) {
1921cb0ef41Sopenharmony_ci  Handle<Object> last_index_obj;
1931cb0ef41Sopenharmony_ci  ASSIGN_RETURN_ON_EXCEPTION(
1941cb0ef41Sopenharmony_ci      isolate, last_index_obj,
1951cb0ef41Sopenharmony_ci      Object::GetProperty(isolate, regexp,
1961cb0ef41Sopenharmony_ci                          isolate->factory()->lastIndex_string()),
1971cb0ef41Sopenharmony_ci      Object);
1981cb0ef41Sopenharmony_ci
1991cb0ef41Sopenharmony_ci  ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
2001cb0ef41Sopenharmony_ci                             Object::ToLength(isolate, last_index_obj), Object);
2011cb0ef41Sopenharmony_ci  const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
2021cb0ef41Sopenharmony_ci  const uint64_t new_last_index =
2031cb0ef41Sopenharmony_ci      AdvanceStringIndex(string, last_index, unicode);
2041cb0ef41Sopenharmony_ci
2051cb0ef41Sopenharmony_ci  return SetLastIndex(isolate, regexp, new_last_index);
2061cb0ef41Sopenharmony_ci}
2071cb0ef41Sopenharmony_ci
2081cb0ef41Sopenharmony_ci}  // namespace internal
2091cb0ef41Sopenharmony_ci}  // namespace v8
210