xref: /third_party/node/deps/v8/src/objects/js-regexp.h (revision 1cb0ef41)
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_OBJECTS_JS_REGEXP_H_
6#define V8_OBJECTS_JS_REGEXP_H_
7
8#include "include/v8-regexp.h"
9#include "src/objects/contexts.h"
10#include "src/objects/js-array.h"
11#include "src/regexp/regexp-flags.h"
12#include "torque-generated/bit-fields.h"
13
14// Has to be the last include (doesn't have include guards):
15#include "src/objects/object-macros.h"
16
17namespace v8 {
18namespace internal {
19
20#include "torque-generated/src/objects/js-regexp-tq.inc"
21
22// Regular expressions
23// The regular expression holds a single reference to a FixedArray in
24// the kDataOffset field.
25// The FixedArray contains the following data:
26// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
27// - reference to the original source string
28// - reference to the original flag string
29// If it is an atom regexp
30// - a reference to a literal string to search for
31// If it is an irregexp regexp:
32// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
33// used for tracking the last usage (used for regexp code flushing).
34// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
35// used for tracking the last usage (used for regexp code flushing).
36// - max number of registers used by irregexp implementations.
37// - number of capture registers (output values) of the regexp.
38class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
39 public:
40  enum Type {
41    NOT_COMPILED,  // Initial value. No data array has been set yet.
42    ATOM,          // A simple string match.
43    IRREGEXP,      // Compiled with Irregexp (code or bytecode).
44    EXPERIMENTAL,  // Compiled to use the experimental linear time engine.
45  };
46  DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
47
48  V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
49      Isolate* isolate, Handle<String> source, Flags flags,
50      uint32_t backtrack_limit = kNoBacktrackLimit);
51
52  static MaybeHandle<JSRegExp> Initialize(
53      Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
54      uint32_t backtrack_limit = kNoBacktrackLimit);
55  static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
56                                          Handle<String> source,
57                                          Handle<String> flags_string);
58
59  DECL_ACCESSORS(last_index, Object)
60
61  // Instance fields accessors.
62  inline String source() const;
63  inline Flags flags() const;
64
65  // Data array field accessors.
66
67  inline Type type_tag() const;
68  inline String atom_pattern() const;
69  // This could be a Smi kUninitializedValue or Code.
70  V8_EXPORT_PRIVATE Object code(bool is_latin1) const;
71  V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code);
72  // This could be a Smi kUninitializedValue or ByteArray.
73  V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const;
74  // Sets the bytecode as well as initializing trampoline slots to the
75  // RegExpInterpreterTrampoline.
76  void set_bytecode_and_trampoline(Isolate* isolate,
77                                   Handle<ByteArray> bytecode);
78  inline int max_register_count() const;
79  // Number of captures (without the match itself).
80  inline int capture_count() const;
81  inline Object capture_name_map();
82  inline void set_capture_name_map(Handle<FixedArray> capture_name_map);
83  uint32_t backtrack_limit() const;
84
85  static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
86    return static_cast<Flag>(f);
87  }
88  static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
89    return Flags{static_cast<int>(f)};
90  }
91  static constexpr RegExpFlags AsRegExpFlags(Flags f) {
92    return RegExpFlags{static_cast<int>(f)};
93  }
94
95  static base::Optional<RegExpFlag> FlagFromChar(char c) {
96    base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
97    if (!f.has_value()) return f;
98    if (f.value() == RegExpFlag::kLinear &&
99        !FLAG_enable_experimental_regexp_engine) {
100      return {};
101    }
102    return f;
103  }
104
105  STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
106#define V(_, Camel, ...)                                             \
107  STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
108  STATIC_ASSERT(static_cast<int>(k##Camel) ==                        \
109                static_cast<int>(RegExpFlag::k##Camel));
110  REGEXP_FLAG_LIST(V)
111#undef V
112  STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
113  STATIC_ASSERT(kFlagCount == kRegExpFlagCount);
114
115  static base::Optional<Flags> FlagsFromString(Isolate* isolate,
116                                               Handle<String> flags);
117
118  V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
119                                                          Flags flags);
120
121  inline String EscapedPattern();
122
123  bool CanTierUp();
124  bool MarkedForTierUp();
125  void ResetLastTierUpTick();
126  void TierUpTick();
127  void MarkTierUpForNextExec();
128
129  bool ShouldProduceBytecode();
130  inline bool HasCompiledCode() const;
131  inline void DiscardCompiledCodeForSerialization();
132
133  static constexpr bool TypeSupportsCaptures(Type t) {
134    return t == IRREGEXP || t == EXPERIMENTAL;
135  }
136
137  // Each capture (including the match itself) needs two registers.
138  static constexpr int RegistersForCaptureCount(int count) {
139    return (count + 1) * 2;
140  }
141
142  static constexpr int code_index(bool is_latin1) {
143    return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
144  }
145
146  static constexpr int bytecode_index(bool is_latin1) {
147    return is_latin1 ? kIrregexpLatin1BytecodeIndex
148                     : kIrregexpUC16BytecodeIndex;
149  }
150
151  // Dispatched behavior.
152  DECL_PRINTER(JSRegExp)
153  DECL_VERIFIER(JSRegExp)
154
155  /* This is already an in-object field. */
156  // TODO(v8:8944): improve handling of in-object fields
157  static constexpr int kLastIndexOffset = kHeaderSize;
158
159  // The initial value of the last_index field on a new JSRegExp instance.
160  static constexpr int kInitialLastIndexValue = 0;
161
162  // Indices in the data array.
163  static constexpr int kTagIndex = 0;
164  static constexpr int kSourceIndex = kTagIndex + 1;
165  static constexpr int kFlagsIndex = kSourceIndex + 1;
166  static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1;
167  static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex;
168
169  // The data fields are used in different ways depending on the
170  // value of the tag.
171  // Atom regexps (literal strings).
172  static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex;
173  static constexpr int kAtomDataSize = kAtomPatternIndex + 1;
174
175  // A Code object or a Smi marker value equal to kUninitializedValue.
176  static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex;
177  static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1;
178  // A ByteArray object or a Smi marker value equal to kUninitializedValue.
179  static constexpr int kIrregexpLatin1BytecodeIndex =
180      kIrregexpUC16CodeIndex + 1;
181  static constexpr int kIrregexpUC16BytecodeIndex =
182      kIrregexpLatin1BytecodeIndex + 1;
183  // Maximal number of registers used by either Latin1 or UC16.
184  // Only used to check that there is enough stack space
185  static constexpr int kIrregexpMaxRegisterCountIndex =
186      kIrregexpUC16BytecodeIndex + 1;
187  // Number of captures in the compiled regexp.
188  static constexpr int kIrregexpCaptureCountIndex =
189      kIrregexpMaxRegisterCountIndex + 1;
190  // Maps names of named capture groups (at indices 2i) to their corresponding
191  // (1-based) capture group indices (at indices 2i + 1).
192  static constexpr int kIrregexpCaptureNameMapIndex =
193      kIrregexpCaptureCountIndex + 1;
194  // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
195  // decremented on each execution of the bytecode, so that the tier-up
196  // happens once the ticks reach zero.
197  // This value is ignored if the regexp-tier-up flag isn't turned on.
198  static constexpr int kIrregexpTicksUntilTierUpIndex =
199      kIrregexpCaptureNameMapIndex + 1;
200  // A smi containing either the backtracking limit or kNoBacktrackLimit.
201  // TODO(jgruber): If needed, this limit could be packed into other fields
202  // above to save space.
203  static constexpr int kIrregexpBacktrackLimit =
204      kIrregexpTicksUntilTierUpIndex + 1;
205  static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1;
206
207  // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
208  // to the format of an IRREGEXP data array, with most fields set to some
209  // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
210  // regexps take the same code path in `RegExpExecInternal`, which reads off
211  // various fields from the data array. `RegExpExecInternal` should probably
212  // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
213  // all the IRREGEXP only fields.
214  static constexpr int kExperimentalDataSize = kIrregexpDataSize;
215
216  // In-object fields.
217  static constexpr int kLastIndexFieldIndex = 0;
218  static constexpr int kInObjectFieldCount = 1;
219
220  // The actual object size including in-object fields.
221  static constexpr int Size() {
222    return kHeaderSize + kInObjectFieldCount * kTaggedSize;
223  }
224
225  // Descriptor array index to important methods in the prototype.
226  static constexpr int kExecFunctionDescriptorIndex = 1;
227  static constexpr int kSymbolMatchFunctionDescriptorIndex = 14;
228  static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15;
229  static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16;
230  static constexpr int kSymbolSearchFunctionDescriptorIndex = 17;
231  static constexpr int kSymbolSplitFunctionDescriptorIndex = 18;
232
233  // The uninitialized value for a regexp code object.
234  static constexpr int kUninitializedValue = -1;
235
236  // If the backtrack limit is set to this marker value, no limit is applied.
237  static constexpr uint32_t kNoBacktrackLimit = 0;
238
239  // The heuristic value for the length of the subject string for which we
240  // tier-up to the compiler immediately, instead of using the interpreter.
241  static constexpr int kTierUpForSubjectLengthValue = 1000;
242
243  // Maximum number of captures allowed.
244  static constexpr int kMaxCaptures = 1 << 16;
245
246 private:
247  inline Object DataAt(int index) const;
248  inline void SetDataAt(int index, Object value);
249
250  TQ_OBJECT_CONSTRUCTORS(JSRegExp)
251};
252
253DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
254
255// JSRegExpResult is just a JSArray with a specific initial map.
256// This initial map adds in-object properties for "index" and "input"
257// properties, as assigned by RegExp.prototype.exec, which allows
258// faster creation of RegExp exec results.
259// This class just holds constants used when creating the result.
260// After creation the result must be treated as a JSArray in all regards.
261class JSRegExpResult
262    : public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> {
263 public:
264  // TODO(joshualitt): We would like to add printers and verifiers to
265  // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
266  // instance type as JSArray.
267
268  // Indices of in-object properties.
269  static constexpr int kIndexIndex = 0;
270  static constexpr int kInputIndex = 1;
271  static constexpr int kGroupsIndex = 2;
272
273  // Private internal only fields.
274  static constexpr int kNamesIndex = 3;
275  static constexpr int kRegExpInputIndex = 4;
276  static constexpr int kRegExpLastIndex = 5;
277  static constexpr int kInObjectPropertyCount = 6;
278
279  static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX;
280
281  TQ_OBJECT_CONSTRUCTORS(JSRegExpResult)
282};
283
284class JSRegExpResultWithIndices
285    : public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices,
286                                                      JSRegExpResult> {
287 public:
288  static_assert(
289      JSRegExpResult::kInObjectPropertyCount == 6,
290      "JSRegExpResultWithIndices must be a subclass of JSRegExpResult");
291  static constexpr int kIndicesIndex = 6;
292  static constexpr int kInObjectPropertyCount = 7;
293
294  TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices)
295};
296
297// JSRegExpResultIndices is just a JSArray with a specific initial map.
298// This initial map adds in-object properties for "group"
299// properties, as assigned by RegExp.prototype.exec, which allows
300// faster creation of RegExp exec results.
301// This class just holds constants used when creating the result.
302// After creation the result must be treated as a JSArray in all regards.
303class JSRegExpResultIndices
304    : public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices,
305                                                  JSArray> {
306 public:
307  static Handle<JSRegExpResultIndices> BuildIndices(
308      Isolate* isolate, Handle<RegExpMatchInfo> match_info,
309      Handle<Object> maybe_names);
310
311  // Indices of in-object properties.
312  static constexpr int kGroupsIndex = 0;
313  static constexpr int kInObjectPropertyCount = 1;
314
315  // Descriptor index of groups.
316  static constexpr int kGroupsDescriptorIndex = 1;
317
318  TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices)
319};
320
321}  // namespace internal
322}  // namespace v8
323
324#include "src/objects/object-macros-undef.h"
325
326#endif  // V8_OBJECTS_JS_REGEXP_H_
327