1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/regexp/regexp-utils.h"
6
7#include "src/execution/isolate.h"
8#include "src/execution/protectors-inl.h"
9#include "src/heap/factory.h"
10#include "src/objects/js-regexp-inl.h"
11#include "src/objects/objects-inl.h"
12#include "src/regexp/regexp.h"
13
14namespace v8 {
15namespace internal {
16
17Handle<String> RegExpUtils::GenericCaptureGetter(
18    Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
19    bool* ok) {
20  const int index = capture * 2;
21  if (index >= match_info->NumberOfCaptureRegisters()) {
22    if (ok != nullptr) *ok = false;
23    return isolate->factory()->empty_string();
24  }
25
26  const int match_start = match_info->Capture(index);
27  const int match_end = match_info->Capture(index + 1);
28  if (match_start == -1 || match_end == -1) {
29    if (ok != nullptr) *ok = false;
30    return isolate->factory()->empty_string();
31  }
32
33  if (ok != nullptr) *ok = true;
34  Handle<String> last_subject(match_info->LastSubject(), isolate);
35  return isolate->factory()->NewSubString(last_subject, match_start, match_end);
36}
37
38namespace {
39
40V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, JSReceiver recv) {
41  return recv.map() == isolate->regexp_function()->initial_map();
42}
43
44}  // namespace
45
46MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
47                                              Handle<JSReceiver> recv,
48                                              uint64_t value) {
49  Handle<Object> value_as_object =
50      isolate->factory()->NewNumberFromInt64(value);
51  if (HasInitialRegExpMap(isolate, *recv)) {
52    JSRegExp::cast(*recv).set_last_index(*value_as_object,
53                                         UPDATE_WRITE_BARRIER);
54    return recv;
55  } else {
56    return Object::SetProperty(
57        isolate, recv, isolate->factory()->lastIndex_string(), value_as_object,
58        StoreOrigin::kMaybeKeyed, Just(kThrowOnError));
59  }
60}
61
62MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
63                                              Handle<JSReceiver> recv) {
64  if (HasInitialRegExpMap(isolate, *recv)) {
65    return handle(JSRegExp::cast(*recv).last_index(), isolate);
66  } else {
67    return Object::GetProperty(isolate, recv,
68                               isolate->factory()->lastIndex_string());
69  }
70}
71
72// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
73// Also takes an optional exec method in case our caller
74// has already fetched exec.
75MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
76                                            Handle<JSReceiver> regexp,
77                                            Handle<String> string,
78                                            Handle<Object> exec) {
79  if (exec->IsUndefined(isolate)) {
80    ASSIGN_RETURN_ON_EXCEPTION(
81        isolate, exec,
82        Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
83        Object);
84  }
85
86  if (exec->IsCallable()) {
87    const int argc = 1;
88    base::ScopedVector<Handle<Object>> argv(argc);
89    argv[0] = string;
90
91    Handle<Object> result;
92    ASSIGN_RETURN_ON_EXCEPTION(
93        isolate, result,
94        Execution::Call(isolate, exec, regexp, argc, argv.begin()), Object);
95
96    if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
97      THROW_NEW_ERROR(isolate,
98                      NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
99                      Object);
100    }
101    return result;
102  }
103
104  if (!regexp->IsJSRegExp()) {
105    THROW_NEW_ERROR(isolate,
106                    NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
107                                 isolate->factory()->NewStringFromAsciiChecked(
108                                     "RegExp.prototype.exec"),
109                                 regexp),
110                    Object);
111  }
112
113  {
114    Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
115
116    const int argc = 1;
117    base::ScopedVector<Handle<Object>> argv(argc);
118    argv[0] = string;
119
120    return Execution::Call(isolate, regexp_exec, regexp, argc, argv.begin());
121  }
122}
123
124bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
125#ifdef V8_ENABLE_FORCE_SLOW_PATH
126  if (isolate->force_slow_path()) return false;
127#endif
128
129  if (!obj->IsJSReceiver()) return false;
130
131  JSReceiver recv = JSReceiver::cast(*obj);
132
133  if (!HasInitialRegExpMap(isolate, recv)) return false;
134
135  // Check the receiver's prototype's map.
136  Object proto = recv.map().prototype();
137  if (!proto.IsJSReceiver()) return false;
138
139  Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
140  Map proto_map = JSReceiver::cast(proto).map();
141  if (proto_map != *initial_proto_initial_map) {
142    return false;
143  }
144
145  // Check that the "exec" method is unmodified.
146  // Check that the index refers to "exec" method (this has to be consistent
147  // with the init order in the bootstrapper).
148  InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex);
149  DCHECK_EQ(*(isolate->factory()->exec_string()),
150            proto_map.instance_descriptors(isolate).GetKey(kExecIndex));
151  if (proto_map.instance_descriptors(isolate)
152          .GetDetails(kExecIndex)
153          .constness() != PropertyConstness::kConst) {
154    return false;
155  }
156
157  // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this
158  // does not go on to check the actual value of the exec property. This would
159  // not be valid since this method is called from places that access the flags
160  // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this
161  // case.
162
163  if (!Protectors::IsRegExpSpeciesLookupChainIntact(isolate)) return false;
164
165  // The smi check is required to omit ToLength(lastIndex) calls with possible
166  // user-code execution on the fast path.
167  Object last_index = JSRegExp::cast(recv).last_index();
168  return last_index.IsSmi() && Smi::ToInt(last_index) >= 0;
169}
170
171uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
172                                         bool unicode) {
173  DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
174  const uint64_t string_length = static_cast<uint64_t>(string->length());
175  if (unicode && index < string_length) {
176    const uint16_t first = string->Get(static_cast<uint32_t>(index));
177    if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
178      DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
179      const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
180      if (second >= 0xDC00 && second <= 0xDFFF) {
181        return index + 2;
182      }
183    }
184  }
185
186  return index + 1;
187}
188
189MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
190    Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
191    bool unicode) {
192  Handle<Object> last_index_obj;
193  ASSIGN_RETURN_ON_EXCEPTION(
194      isolate, last_index_obj,
195      Object::GetProperty(isolate, regexp,
196                          isolate->factory()->lastIndex_string()),
197      Object);
198
199  ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
200                             Object::ToLength(isolate, last_index_obj), Object);
201  const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
202  const uint64_t new_last_index =
203      AdvanceStringIndex(string, last_index, unicode);
204
205  return SetLastIndex(isolate, regexp, new_last_index);
206}
207
208}  // namespace internal
209}  // namespace v8
210