1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/regexp/regexp-utils.h" 6 7#include "src/execution/isolate.h" 8#include "src/execution/protectors-inl.h" 9#include "src/heap/factory.h" 10#include "src/objects/js-regexp-inl.h" 11#include "src/objects/objects-inl.h" 12#include "src/regexp/regexp.h" 13 14namespace v8 { 15namespace internal { 16 17Handle<String> RegExpUtils::GenericCaptureGetter( 18 Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture, 19 bool* ok) { 20 const int index = capture * 2; 21 if (index >= match_info->NumberOfCaptureRegisters()) { 22 if (ok != nullptr) *ok = false; 23 return isolate->factory()->empty_string(); 24 } 25 26 const int match_start = match_info->Capture(index); 27 const int match_end = match_info->Capture(index + 1); 28 if (match_start == -1 || match_end == -1) { 29 if (ok != nullptr) *ok = false; 30 return isolate->factory()->empty_string(); 31 } 32 33 if (ok != nullptr) *ok = true; 34 Handle<String> last_subject(match_info->LastSubject(), isolate); 35 return isolate->factory()->NewSubString(last_subject, match_start, match_end); 36} 37 38namespace { 39 40V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, JSReceiver recv) { 41 return recv.map() == isolate->regexp_function()->initial_map(); 42} 43 44} // namespace 45 46MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate, 47 Handle<JSReceiver> recv, 48 uint64_t value) { 49 Handle<Object> value_as_object = 50 isolate->factory()->NewNumberFromInt64(value); 51 if (HasInitialRegExpMap(isolate, *recv)) { 52 JSRegExp::cast(*recv).set_last_index(*value_as_object, 53 UPDATE_WRITE_BARRIER); 54 return recv; 55 } else { 56 return Object::SetProperty( 57 isolate, recv, isolate->factory()->lastIndex_string(), value_as_object, 58 StoreOrigin::kMaybeKeyed, Just(kThrowOnError)); 59 } 60} 61 62MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate, 63 Handle<JSReceiver> recv) { 64 if (HasInitialRegExpMap(isolate, *recv)) { 65 return handle(JSRegExp::cast(*recv).last_index(), isolate); 66 } else { 67 return Object::GetProperty(isolate, recv, 68 isolate->factory()->lastIndex_string()); 69 } 70} 71 72// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) 73// Also takes an optional exec method in case our caller 74// has already fetched exec. 75MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate, 76 Handle<JSReceiver> regexp, 77 Handle<String> string, 78 Handle<Object> exec) { 79 if (exec->IsUndefined(isolate)) { 80 ASSIGN_RETURN_ON_EXCEPTION( 81 isolate, exec, 82 Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()), 83 Object); 84 } 85 86 if (exec->IsCallable()) { 87 const int argc = 1; 88 base::ScopedVector<Handle<Object>> argv(argc); 89 argv[0] = string; 90 91 Handle<Object> result; 92 ASSIGN_RETURN_ON_EXCEPTION( 93 isolate, result, 94 Execution::Call(isolate, exec, regexp, argc, argv.begin()), Object); 95 96 if (!result->IsJSReceiver() && !result->IsNull(isolate)) { 97 THROW_NEW_ERROR(isolate, 98 NewTypeError(MessageTemplate::kInvalidRegExpExecResult), 99 Object); 100 } 101 return result; 102 } 103 104 if (!regexp->IsJSRegExp()) { 105 THROW_NEW_ERROR(isolate, 106 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver, 107 isolate->factory()->NewStringFromAsciiChecked( 108 "RegExp.prototype.exec"), 109 regexp), 110 Object); 111 } 112 113 { 114 Handle<JSFunction> regexp_exec = isolate->regexp_exec_function(); 115 116 const int argc = 1; 117 base::ScopedVector<Handle<Object>> argv(argc); 118 argv[0] = string; 119 120 return Execution::Call(isolate, regexp_exec, regexp, argc, argv.begin()); 121 } 122} 123 124bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) { 125#ifdef V8_ENABLE_FORCE_SLOW_PATH 126 if (isolate->force_slow_path()) return false; 127#endif 128 129 if (!obj->IsJSReceiver()) return false; 130 131 JSReceiver recv = JSReceiver::cast(*obj); 132 133 if (!HasInitialRegExpMap(isolate, recv)) return false; 134 135 // Check the receiver's prototype's map. 136 Object proto = recv.map().prototype(); 137 if (!proto.IsJSReceiver()) return false; 138 139 Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map(); 140 Map proto_map = JSReceiver::cast(proto).map(); 141 if (proto_map != *initial_proto_initial_map) { 142 return false; 143 } 144 145 // Check that the "exec" method is unmodified. 146 // Check that the index refers to "exec" method (this has to be consistent 147 // with the init order in the bootstrapper). 148 InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex); 149 DCHECK_EQ(*(isolate->factory()->exec_string()), 150 proto_map.instance_descriptors(isolate).GetKey(kExecIndex)); 151 if (proto_map.instance_descriptors(isolate) 152 .GetDetails(kExecIndex) 153 .constness() != PropertyConstness::kConst) { 154 return false; 155 } 156 157 // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this 158 // does not go on to check the actual value of the exec property. This would 159 // not be valid since this method is called from places that access the flags 160 // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this 161 // case. 162 163 if (!Protectors::IsRegExpSpeciesLookupChainIntact(isolate)) return false; 164 165 // The smi check is required to omit ToLength(lastIndex) calls with possible 166 // user-code execution on the fast path. 167 Object last_index = JSRegExp::cast(recv).last_index(); 168 return last_index.IsSmi() && Smi::ToInt(last_index) >= 0; 169} 170 171uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index, 172 bool unicode) { 173 DCHECK_LE(static_cast<double>(index), kMaxSafeInteger); 174 const uint64_t string_length = static_cast<uint64_t>(string->length()); 175 if (unicode && index < string_length) { 176 const uint16_t first = string->Get(static_cast<uint32_t>(index)); 177 if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) { 178 DCHECK_LT(index, std::numeric_limits<uint64_t>::max()); 179 const uint16_t second = string->Get(static_cast<uint32_t>(index + 1)); 180 if (second >= 0xDC00 && second <= 0xDFFF) { 181 return index + 2; 182 } 183 } 184 } 185 186 return index + 1; 187} 188 189MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex( 190 Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string, 191 bool unicode) { 192 Handle<Object> last_index_obj; 193 ASSIGN_RETURN_ON_EXCEPTION( 194 isolate, last_index_obj, 195 Object::GetProperty(isolate, regexp, 196 isolate->factory()->lastIndex_string()), 197 Object); 198 199 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, 200 Object::ToLength(isolate, last_index_obj), Object); 201 const uint64_t last_index = PositiveNumberToUint64(*last_index_obj); 202 const uint64_t new_last_index = 203 AdvanceStringIndex(string, last_index, unicode); 204 205 return SetLastIndex(isolate, regexp, new_last_index); 206} 207 208} // namespace internal 209} // namespace v8 210