1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
6 #define V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
7 
8 #include "src/base/strings.h"
9 #include "src/codegen/label.h"
10 #include "src/regexp/regexp-macro-assembler.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 // An assembler/generator for the Irregexp byte code.
16 class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
17  public:
18   // Create an assembler. Instructions and relocation information are emitted
19   // into a buffer, with the instructions starting from the beginning and the
20   // relocation information starting from the end of the buffer. See CodeDesc
21   // for a detailed comment on the layout (globals.h).
22   //
23   // The assembler allocates and grows its own buffer, and buffer_size
24   // determines the initial buffer size. The buffer is owned by the assembler
25   // and deallocated upon destruction of the assembler.
26   RegExpBytecodeGenerator(Isolate* isolate, Zone* zone);
27   ~RegExpBytecodeGenerator() override;
28   // The byte-code interpreter checks on each push anyway.
29   int stack_limit_slack() override { return 1; }
30   bool CanReadUnaligned() const override { return false; }
31   void Bind(Label* label) override;
32   void AdvanceCurrentPosition(int by) override;  // Signed cp change.
33   void PopCurrentPosition() override;
34   void PushCurrentPosition() override;
35   void Backtrack() override;
36   void GoTo(Label* label) override;
37   void PushBacktrack(Label* label) override;
38   bool Succeed() override;
39   void Fail() override;
40   void PopRegister(int register_index) override;
41   void PushRegister(int register_index,
42                     StackCheckFlag check_stack_limit) override;
43   void AdvanceRegister(int reg, int by) override;  // r[reg] += by.
44   void SetCurrentPositionFromEnd(int by) override;
45   void SetRegister(int register_index, int to) override;
46   void WriteCurrentPositionToRegister(int reg, int cp_offset) override;
47   void ClearRegisters(int reg_from, int reg_to) override;
48   void ReadCurrentPositionFromRegister(int reg) override;
49   void WriteStackPointerToRegister(int reg) override;
50   void ReadStackPointerFromRegister(int reg) override;
51   void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
52                                 bool check_bounds, int characters,
53                                 int eats_at_least) override;
54   void CheckCharacter(unsigned c, Label* on_equal) override;
55   void CheckCharacterAfterAnd(unsigned c, unsigned mask,
56                               Label* on_equal) override;
57   void CheckCharacterGT(base::uc16 limit, Label* on_greater) override;
58   void CheckCharacterLT(base::uc16 limit, Label* on_less) override;
59   void CheckGreedyLoop(Label* on_tos_equals_current_position) override;
60   void CheckAtStart(int cp_offset, Label* on_at_start) override;
61   void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override;
62   void CheckNotCharacter(unsigned c, Label* on_not_equal) override;
63   void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
64                                  Label* on_not_equal) override;
65   void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus,
66                                       base::uc16 mask,
67                                       Label* on_not_equal) override;
68   void CheckCharacterInRange(base::uc16 from, base::uc16 to,
69                              Label* on_in_range) override;
70   void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
71                                 Label* on_not_in_range) override;
72   bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
73                                   Label* on_in_range) override {
74     // Disabled in the interpreter, because 1) there is no constant pool that
75     // could store the ByteArray pointer, 2) bytecode size limits are not as
76     // restrictive as code (e.g. branch distances on arm), 3) bytecode for
77     // large character classes is already quite compact.
78     // TODO(jgruber): Consider using BytecodeArrays (with a constant pool)
79     // instead of plain ByteArrays; then we could implement
80     // CheckCharacterInRangeArray in the interpreter.
81     return false;
82   }
83   bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
84                                      Label* on_not_in_range) override {
85     return false;
86   }
87   void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
88   void CheckNotBackReference(int start_reg, bool read_backward,
89                              Label* on_no_match) override;
90   void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
91                                        bool unicode,
92                                        Label* on_no_match) override;
93   void IfRegisterLT(int register_index, int comparand, Label* if_lt) override;
94   void IfRegisterGE(int register_index, int comparand, Label* if_ge) override;
95   void IfRegisterEqPos(int register_index, Label* if_eq) override;
96 
97   IrregexpImplementation Implementation() override;
98   Handle<HeapObject> GetCode(Handle<String> source) override;
99 
100  private:
101   void ExpandBuffer();
102 
103   // Code and bitmap emission.
104   inline void EmitOrLink(Label* label);
105   inline void Emit32(uint32_t x);
106   inline void Emit16(uint32_t x);
107   inline void Emit8(uint32_t x);
108   inline void Emit(uint32_t bc, uint32_t arg);
109   inline void Emit(uint32_t bc, int32_t arg);
110   // Bytecode buffer.
111   int length();
112   void Copy(byte* a);
113 
114   // The buffer into which code and relocation info are generated.
115   static constexpr int kInitialBufferSize = 1024;
116   ZoneVector<byte> buffer_;
117 
118   // The program counter.
119   int pc_;
120   Label backtrack_;
121 
122   int advance_current_start_;
123   int advance_current_offset_;
124   int advance_current_end_;
125 
126   // Stores jump edges emitted for the bytecode (used by
127   // RegExpBytecodePeepholeOptimization).
128   // Key: jump source (offset in buffer_ where jump destination is stored).
129   // Value: jump destination (offset in buffer_ to jump to).
130   ZoneUnorderedMap<int, int> jump_edges_;
131 
132   Isolate* isolate_;
133 
134   static const int kInvalidPC = -1;
135 
136   DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodeGenerator);
137 };
138 
139 }  // namespace internal
140 }  // namespace v8
141 
142 #endif  // V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
143