1// Copyright 2017 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_OBJECTS_JS_REGEXP_H_ 6#define V8_OBJECTS_JS_REGEXP_H_ 7 8#include "include/v8-regexp.h" 9#include "src/objects/contexts.h" 10#include "src/objects/js-array.h" 11#include "src/regexp/regexp-flags.h" 12#include "torque-generated/bit-fields.h" 13 14// Has to be the last include (doesn't have include guards): 15#include "src/objects/object-macros.h" 16 17namespace v8 { 18namespace internal { 19 20#include "torque-generated/src/objects/js-regexp-tq.inc" 21 22// Regular expressions 23// The regular expression holds a single reference to a FixedArray in 24// the kDataOffset field. 25// The FixedArray contains the following data: 26// - tag : type of regexp implementation (not compiled yet, atom or irregexp) 27// - reference to the original source string 28// - reference to the original flag string 29// If it is an atom regexp 30// - a reference to a literal string to search for 31// If it is an irregexp regexp: 32// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi 33// used for tracking the last usage (used for regexp code flushing). 34// - a reference to code for UC16 inputs (bytecode or compiled), or a smi 35// used for tracking the last usage (used for regexp code flushing). 36// - max number of registers used by irregexp implementations. 37// - number of capture registers (output values) of the regexp. 38class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { 39 public: 40 enum Type { 41 NOT_COMPILED, // Initial value. No data array has been set yet. 42 ATOM, // A simple string match. 43 IRREGEXP, // Compiled with Irregexp (code or bytecode). 44 EXPERIMENTAL, // Compiled to use the experimental linear time engine. 45 }; 46 DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS() 47 48 V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New( 49 Isolate* isolate, Handle<String> source, Flags flags, 50 uint32_t backtrack_limit = kNoBacktrackLimit); 51 52 static MaybeHandle<JSRegExp> Initialize( 53 Handle<JSRegExp> regexp, Handle<String> source, Flags flags, 54 uint32_t backtrack_limit = kNoBacktrackLimit); 55 static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp, 56 Handle<String> source, 57 Handle<String> flags_string); 58 59 DECL_ACCESSORS(last_index, Object) 60 61 // Instance fields accessors. 62 inline String source() const; 63 inline Flags flags() const; 64 65 // Data array field accessors. 66 67 inline Type type_tag() const; 68 inline String atom_pattern() const; 69 // This could be a Smi kUninitializedValue or Code. 70 V8_EXPORT_PRIVATE Object code(bool is_latin1) const; 71 V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code); 72 // This could be a Smi kUninitializedValue or ByteArray. 73 V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const; 74 // Sets the bytecode as well as initializing trampoline slots to the 75 // RegExpInterpreterTrampoline. 76 void set_bytecode_and_trampoline(Isolate* isolate, 77 Handle<ByteArray> bytecode); 78 inline int max_register_count() const; 79 // Number of captures (without the match itself). 80 inline int capture_count() const; 81 inline Object capture_name_map(); 82 inline void set_capture_name_map(Handle<FixedArray> capture_name_map); 83 uint32_t backtrack_limit() const; 84 85 static constexpr Flag AsJSRegExpFlag(RegExpFlag f) { 86 return static_cast<Flag>(f); 87 } 88 static constexpr Flags AsJSRegExpFlags(RegExpFlags f) { 89 return Flags{static_cast<int>(f)}; 90 } 91 static constexpr RegExpFlags AsRegExpFlags(Flags f) { 92 return RegExpFlags{static_cast<int>(f)}; 93 } 94 95 static base::Optional<RegExpFlag> FlagFromChar(char c) { 96 base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c); 97 if (!f.has_value()) return f; 98 if (f.value() == RegExpFlag::kLinear && 99 !FLAG_enable_experimental_regexp_engine) { 100 return {}; 101 } 102 return f; 103 } 104 105 STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone); 106#define V(_, Camel, ...) \ 107 STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \ 108 STATIC_ASSERT(static_cast<int>(k##Camel) == \ 109 static_cast<int>(RegExpFlag::k##Camel)); 110 REGEXP_FLAG_LIST(V) 111#undef V 112 STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount); 113 STATIC_ASSERT(kFlagCount == kRegExpFlagCount); 114 115 static base::Optional<Flags> FlagsFromString(Isolate* isolate, 116 Handle<String> flags); 117 118 V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate, 119 Flags flags); 120 121 inline String EscapedPattern(); 122 123 bool CanTierUp(); 124 bool MarkedForTierUp(); 125 void ResetLastTierUpTick(); 126 void TierUpTick(); 127 void MarkTierUpForNextExec(); 128 129 bool ShouldProduceBytecode(); 130 inline bool HasCompiledCode() const; 131 inline void DiscardCompiledCodeForSerialization(); 132 133 static constexpr bool TypeSupportsCaptures(Type t) { 134 return t == IRREGEXP || t == EXPERIMENTAL; 135 } 136 137 // Each capture (including the match itself) needs two registers. 138 static constexpr int RegistersForCaptureCount(int count) { 139 return (count + 1) * 2; 140 } 141 142 static constexpr int code_index(bool is_latin1) { 143 return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex; 144 } 145 146 static constexpr int bytecode_index(bool is_latin1) { 147 return is_latin1 ? kIrregexpLatin1BytecodeIndex 148 : kIrregexpUC16BytecodeIndex; 149 } 150 151 // Dispatched behavior. 152 DECL_PRINTER(JSRegExp) 153 DECL_VERIFIER(JSRegExp) 154 155 /* This is already an in-object field. */ 156 // TODO(v8:8944): improve handling of in-object fields 157 static constexpr int kLastIndexOffset = kHeaderSize; 158 159 // The initial value of the last_index field on a new JSRegExp instance. 160 static constexpr int kInitialLastIndexValue = 0; 161 162 // Indices in the data array. 163 static constexpr int kTagIndex = 0; 164 static constexpr int kSourceIndex = kTagIndex + 1; 165 static constexpr int kFlagsIndex = kSourceIndex + 1; 166 static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1; 167 static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex; 168 169 // The data fields are used in different ways depending on the 170 // value of the tag. 171 // Atom regexps (literal strings). 172 static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex; 173 static constexpr int kAtomDataSize = kAtomPatternIndex + 1; 174 175 // A Code object or a Smi marker value equal to kUninitializedValue. 176 static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex; 177 static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1; 178 // A ByteArray object or a Smi marker value equal to kUninitializedValue. 179 static constexpr int kIrregexpLatin1BytecodeIndex = 180 kIrregexpUC16CodeIndex + 1; 181 static constexpr int kIrregexpUC16BytecodeIndex = 182 kIrregexpLatin1BytecodeIndex + 1; 183 // Maximal number of registers used by either Latin1 or UC16. 184 // Only used to check that there is enough stack space 185 static constexpr int kIrregexpMaxRegisterCountIndex = 186 kIrregexpUC16BytecodeIndex + 1; 187 // Number of captures in the compiled regexp. 188 static constexpr int kIrregexpCaptureCountIndex = 189 kIrregexpMaxRegisterCountIndex + 1; 190 // Maps names of named capture groups (at indices 2i) to their corresponding 191 // (1-based) capture group indices (at indices 2i + 1). 192 static constexpr int kIrregexpCaptureNameMapIndex = 193 kIrregexpCaptureCountIndex + 1; 194 // Tier-up ticks are set to the value of the tier-up ticks flag. The value is 195 // decremented on each execution of the bytecode, so that the tier-up 196 // happens once the ticks reach zero. 197 // This value is ignored if the regexp-tier-up flag isn't turned on. 198 static constexpr int kIrregexpTicksUntilTierUpIndex = 199 kIrregexpCaptureNameMapIndex + 1; 200 // A smi containing either the backtracking limit or kNoBacktrackLimit. 201 // TODO(jgruber): If needed, this limit could be packed into other fields 202 // above to save space. 203 static constexpr int kIrregexpBacktrackLimit = 204 kIrregexpTicksUntilTierUpIndex + 1; 205 static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1; 206 207 // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms 208 // to the format of an IRREGEXP data array, with most fields set to some 209 // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP 210 // regexps take the same code path in `RegExpExecInternal`, which reads off 211 // various fields from the data array. `RegExpExecInternal` should probably 212 // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of 213 // all the IRREGEXP only fields. 214 static constexpr int kExperimentalDataSize = kIrregexpDataSize; 215 216 // In-object fields. 217 static constexpr int kLastIndexFieldIndex = 0; 218 static constexpr int kInObjectFieldCount = 1; 219 220 // The actual object size including in-object fields. 221 static constexpr int Size() { 222 return kHeaderSize + kInObjectFieldCount * kTaggedSize; 223 } 224 225 // Descriptor array index to important methods in the prototype. 226 static constexpr int kExecFunctionDescriptorIndex = 1; 227 static constexpr int kSymbolMatchFunctionDescriptorIndex = 14; 228 static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15; 229 static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16; 230 static constexpr int kSymbolSearchFunctionDescriptorIndex = 17; 231 static constexpr int kSymbolSplitFunctionDescriptorIndex = 18; 232 233 // The uninitialized value for a regexp code object. 234 static constexpr int kUninitializedValue = -1; 235 236 // If the backtrack limit is set to this marker value, no limit is applied. 237 static constexpr uint32_t kNoBacktrackLimit = 0; 238 239 // The heuristic value for the length of the subject string for which we 240 // tier-up to the compiler immediately, instead of using the interpreter. 241 static constexpr int kTierUpForSubjectLengthValue = 1000; 242 243 // Maximum number of captures allowed. 244 static constexpr int kMaxCaptures = 1 << 16; 245 246 private: 247 inline Object DataAt(int index) const; 248 inline void SetDataAt(int index, Object value); 249 250 TQ_OBJECT_CONSTRUCTORS(JSRegExp) 251}; 252 253DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags) 254 255// JSRegExpResult is just a JSArray with a specific initial map. 256// This initial map adds in-object properties for "index" and "input" 257// properties, as assigned by RegExp.prototype.exec, which allows 258// faster creation of RegExp exec results. 259// This class just holds constants used when creating the result. 260// After creation the result must be treated as a JSArray in all regards. 261class JSRegExpResult 262 : public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> { 263 public: 264 // TODO(joshualitt): We would like to add printers and verifiers to 265 // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same 266 // instance type as JSArray. 267 268 // Indices of in-object properties. 269 static constexpr int kIndexIndex = 0; 270 static constexpr int kInputIndex = 1; 271 static constexpr int kGroupsIndex = 2; 272 273 // Private internal only fields. 274 static constexpr int kNamesIndex = 3; 275 static constexpr int kRegExpInputIndex = 4; 276 static constexpr int kRegExpLastIndex = 5; 277 static constexpr int kInObjectPropertyCount = 6; 278 279 static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX; 280 281 TQ_OBJECT_CONSTRUCTORS(JSRegExpResult) 282}; 283 284class JSRegExpResultWithIndices 285 : public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices, 286 JSRegExpResult> { 287 public: 288 static_assert( 289 JSRegExpResult::kInObjectPropertyCount == 6, 290 "JSRegExpResultWithIndices must be a subclass of JSRegExpResult"); 291 static constexpr int kIndicesIndex = 6; 292 static constexpr int kInObjectPropertyCount = 7; 293 294 TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices) 295}; 296 297// JSRegExpResultIndices is just a JSArray with a specific initial map. 298// This initial map adds in-object properties for "group" 299// properties, as assigned by RegExp.prototype.exec, which allows 300// faster creation of RegExp exec results. 301// This class just holds constants used when creating the result. 302// After creation the result must be treated as a JSArray in all regards. 303class JSRegExpResultIndices 304 : public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices, 305 JSArray> { 306 public: 307 static Handle<JSRegExpResultIndices> BuildIndices( 308 Isolate* isolate, Handle<RegExpMatchInfo> match_info, 309 Handle<Object> maybe_names); 310 311 // Indices of in-object properties. 312 static constexpr int kGroupsIndex = 0; 313 static constexpr int kInObjectPropertyCount = 1; 314 315 // Descriptor index of groups. 316 static constexpr int kGroupsDescriptorIndex = 1; 317 318 TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices) 319}; 320 321} // namespace internal 322} // namespace v8 323 324#include "src/objects/object-macros-undef.h" 325 326#endif // V8_OBJECTS_JS_REGEXP_H_ 327