1// Copyright 2008-2009 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/regexp/regexp-bytecode-generator.h"
6
7#include "src/ast/ast.h"
8#include "src/objects/fixed-array-inl.h"
9#include "src/regexp/regexp-bytecode-generator-inl.h"
10#include "src/regexp/regexp-bytecode-peephole.h"
11#include "src/regexp/regexp-bytecodes.h"
12#include "src/regexp/regexp-macro-assembler.h"
13
14namespace v8 {
15namespace internal {
16
17RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
18    : RegExpMacroAssembler(isolate, zone),
19      buffer_(kInitialBufferSize, zone),
20      pc_(0),
21      advance_current_end_(kInvalidPC),
22      jump_edges_(zone),
23      isolate_(isolate) {}
24
25RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
26  if (backtrack_.is_linked()) backtrack_.Unuse();
27}
28
29RegExpBytecodeGenerator::IrregexpImplementation
30RegExpBytecodeGenerator::Implementation() {
31  return kBytecodeImplementation;
32}
33
34void RegExpBytecodeGenerator::Bind(Label* l) {
35  advance_current_end_ = kInvalidPC;
36  DCHECK(!l->is_bound());
37  if (l->is_linked()) {
38    int pos = l->pos();
39    while (pos != 0) {
40      int fixup = pos;
41      pos = *reinterpret_cast<int32_t*>(buffer_.data() + fixup);
42      *reinterpret_cast<uint32_t*>(buffer_.data() + fixup) = pc_;
43      jump_edges_.emplace(fixup, pc_);
44    }
45  }
46  l->bind_to(pc_);
47}
48
49void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
50  if (l == nullptr) l = &backtrack_;
51  int pos = 0;
52  if (l->is_bound()) {
53    pos = l->pos();
54    jump_edges_.emplace(pc_, pos);
55  } else {
56    if (l->is_linked()) {
57      pos = l->pos();
58    }
59    l->link_to(pc_);
60  }
61  Emit32(pos);
62}
63
64void RegExpBytecodeGenerator::PopRegister(int register_index) {
65  DCHECK_LE(0, register_index);
66  DCHECK_GE(kMaxRegister, register_index);
67  Emit(BC_POP_REGISTER, register_index);
68}
69
70void RegExpBytecodeGenerator::PushRegister(int register_index,
71                                           StackCheckFlag check_stack_limit) {
72  DCHECK_LE(0, register_index);
73  DCHECK_GE(kMaxRegister, register_index);
74  Emit(BC_PUSH_REGISTER, register_index);
75}
76
77void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index,
78                                                             int cp_offset) {
79  DCHECK_LE(0, register_index);
80  DCHECK_GE(kMaxRegister, register_index);
81  Emit(BC_SET_REGISTER_TO_CP, register_index);
82  Emit32(cp_offset);  // Current position offset.
83}
84
85void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) {
86  DCHECK(reg_from <= reg_to);
87  for (int reg = reg_from; reg <= reg_to; reg++) {
88    SetRegister(reg, -1);
89  }
90}
91
92void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister(
93    int register_index) {
94  DCHECK_LE(0, register_index);
95  DCHECK_GE(kMaxRegister, register_index);
96  Emit(BC_SET_CP_TO_REGISTER, register_index);
97}
98
99void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) {
100  DCHECK_LE(0, register_index);
101  DCHECK_GE(kMaxRegister, register_index);
102  Emit(BC_SET_REGISTER_TO_SP, register_index);
103}
104
105void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) {
106  DCHECK_LE(0, register_index);
107  DCHECK_GE(kMaxRegister, register_index);
108  Emit(BC_SET_SP_TO_REGISTER, register_index);
109}
110
111void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) {
112  DCHECK(is_uint24(by));
113  Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
114}
115
116void RegExpBytecodeGenerator::SetRegister(int register_index, int to) {
117  DCHECK_LE(0, register_index);
118  DCHECK_GE(kMaxRegister, register_index);
119  Emit(BC_SET_REGISTER, register_index);
120  Emit32(to);
121}
122
123void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) {
124  DCHECK_LE(0, register_index);
125  DCHECK_GE(kMaxRegister, register_index);
126  Emit(BC_ADVANCE_REGISTER, register_index);
127  Emit32(by);
128}
129
130void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); }
131
132void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); }
133
134void RegExpBytecodeGenerator::Backtrack() {
135  int error_code =
136      can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE;
137  Emit(BC_POP_BT, error_code);
138}
139
140void RegExpBytecodeGenerator::GoTo(Label* l) {
141  if (advance_current_end_ == pc_) {
142    // Combine advance current and goto.
143    pc_ = advance_current_start_;
144    Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
145    EmitOrLink(l);
146    advance_current_end_ = kInvalidPC;
147  } else {
148    // Regular goto.
149    Emit(BC_GOTO, 0);
150    EmitOrLink(l);
151  }
152}
153
154void RegExpBytecodeGenerator::PushBacktrack(Label* l) {
155  Emit(BC_PUSH_BT, 0);
156  EmitOrLink(l);
157}
158
159bool RegExpBytecodeGenerator::Succeed() {
160  Emit(BC_SUCCEED, 0);
161  return false;  // Restart matching for global regexp not supported.
162}
163
164void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); }
165
166void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) {
167  // TODO(chromium:1166138): Turn back into DCHECKs once the underlying issue
168  // is fixed.
169  CHECK_LE(kMinCPOffset, by);
170  CHECK_GE(kMaxCPOffset, by);
171  advance_current_start_ = pc_;
172  advance_current_offset_ = by;
173  Emit(BC_ADVANCE_CP, by);
174  advance_current_end_ = pc_;
175}
176
177void RegExpBytecodeGenerator::CheckGreedyLoop(
178    Label* on_tos_equals_current_position) {
179  Emit(BC_CHECK_GREEDY, 0);
180  EmitOrLink(on_tos_equals_current_position);
181}
182
183void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
184                                                       Label* on_failure,
185                                                       bool check_bounds,
186                                                       int characters,
187                                                       int eats_at_least) {
188  DCHECK_GE(eats_at_least, characters);
189  if (eats_at_least > characters && check_bounds) {
190    DCHECK(is_int24(cp_offset + eats_at_least));
191    Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
192    EmitOrLink(on_failure);
193    check_bounds = false;  // Load below doesn't need to check.
194  }
195
196  DCHECK_LE(kMinCPOffset, cp_offset);
197  DCHECK_GE(kMaxCPOffset, cp_offset);
198  int bytecode;
199  if (check_bounds) {
200    if (characters == 4) {
201      bytecode = BC_LOAD_4_CURRENT_CHARS;
202    } else if (characters == 2) {
203      bytecode = BC_LOAD_2_CURRENT_CHARS;
204    } else {
205      DCHECK_EQ(1, characters);
206      bytecode = BC_LOAD_CURRENT_CHAR;
207    }
208  } else {
209    if (characters == 4) {
210      bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
211    } else if (characters == 2) {
212      bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
213    } else {
214      DCHECK_EQ(1, characters);
215      bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
216    }
217  }
218  Emit(bytecode, cp_offset);
219  if (check_bounds) EmitOrLink(on_failure);
220}
221
222void RegExpBytecodeGenerator::CheckCharacterLT(base::uc16 limit,
223                                               Label* on_less) {
224  Emit(BC_CHECK_LT, limit);
225  EmitOrLink(on_less);
226}
227
228void RegExpBytecodeGenerator::CheckCharacterGT(base::uc16 limit,
229                                               Label* on_greater) {
230  Emit(BC_CHECK_GT, limit);
231  EmitOrLink(on_greater);
232}
233
234void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) {
235  if (c > MAX_FIRST_ARG) {
236    Emit(BC_CHECK_4_CHARS, 0);
237    Emit32(c);
238  } else {
239    Emit(BC_CHECK_CHAR, c);
240  }
241  EmitOrLink(on_equal);
242}
243
244void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) {
245  Emit(BC_CHECK_AT_START, cp_offset);
246  EmitOrLink(on_at_start);
247}
248
249void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset,
250                                              Label* on_not_at_start) {
251  Emit(BC_CHECK_NOT_AT_START, cp_offset);
252  EmitOrLink(on_not_at_start);
253}
254
255void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c,
256                                                Label* on_not_equal) {
257  if (c > MAX_FIRST_ARG) {
258    Emit(BC_CHECK_NOT_4_CHARS, 0);
259    Emit32(c);
260  } else {
261    Emit(BC_CHECK_NOT_CHAR, c);
262  }
263  EmitOrLink(on_not_equal);
264}
265
266void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
267                                                     Label* on_equal) {
268  if (c > MAX_FIRST_ARG) {
269    Emit(BC_AND_CHECK_4_CHARS, 0);
270    Emit32(c);
271  } else {
272    Emit(BC_AND_CHECK_CHAR, c);
273  }
274  Emit32(mask);
275  EmitOrLink(on_equal);
276}
277
278void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c,
279                                                        uint32_t mask,
280                                                        Label* on_not_equal) {
281  if (c > MAX_FIRST_ARG) {
282    Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
283    Emit32(c);
284  } else {
285    Emit(BC_AND_CHECK_NOT_CHAR, c);
286  }
287  Emit32(mask);
288  EmitOrLink(on_not_equal);
289}
290
291void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd(
292    base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
293  Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
294  Emit16(minus);
295  Emit16(mask);
296  EmitOrLink(on_not_equal);
297}
298
299void RegExpBytecodeGenerator::CheckCharacterInRange(base::uc16 from,
300                                                    base::uc16 to,
301                                                    Label* on_in_range) {
302  Emit(BC_CHECK_CHAR_IN_RANGE, 0);
303  Emit16(from);
304  Emit16(to);
305  EmitOrLink(on_in_range);
306}
307
308void RegExpBytecodeGenerator::CheckCharacterNotInRange(base::uc16 from,
309                                                       base::uc16 to,
310                                                       Label* on_not_in_range) {
311  Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
312  Emit16(from);
313  Emit16(to);
314  EmitOrLink(on_not_in_range);
315}
316
317void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table,
318                                              Label* on_bit_set) {
319  Emit(BC_CHECK_BIT_IN_TABLE, 0);
320  EmitOrLink(on_bit_set);
321  for (int i = 0; i < kTableSize; i += kBitsPerByte) {
322    int byte = 0;
323    for (int j = 0; j < kBitsPerByte; j++) {
324      if (table->get(i + j) != 0) byte |= 1 << j;
325    }
326    Emit8(byte);
327  }
328}
329
330void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
331                                                    bool read_backward,
332                                                    Label* on_not_equal) {
333  DCHECK_LE(0, start_reg);
334  DCHECK_GE(kMaxRegister, start_reg);
335  Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
336       start_reg);
337  EmitOrLink(on_not_equal);
338}
339
340void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
341    int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
342  DCHECK_LE(0, start_reg);
343  DCHECK_GE(kMaxRegister, start_reg);
344  Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
345                                : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
346                     : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
347                                : BC_CHECK_NOT_BACK_REF_NO_CASE),
348       start_reg);
349  EmitOrLink(on_not_equal);
350}
351
352void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand,
353                                           Label* on_less_than) {
354  DCHECK_LE(0, register_index);
355  DCHECK_GE(kMaxRegister, register_index);
356  Emit(BC_CHECK_REGISTER_LT, register_index);
357  Emit32(comparand);
358  EmitOrLink(on_less_than);
359}
360
361void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand,
362                                           Label* on_greater_or_equal) {
363  DCHECK_LE(0, register_index);
364  DCHECK_GE(kMaxRegister, register_index);
365  Emit(BC_CHECK_REGISTER_GE, register_index);
366  Emit32(comparand);
367  EmitOrLink(on_greater_or_equal);
368}
369
370void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
371                                              Label* on_eq) {
372  DCHECK_LE(0, register_index);
373  DCHECK_GE(kMaxRegister, register_index);
374  Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
375  EmitOrLink(on_eq);
376}
377
378Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
379  Bind(&backtrack_);
380  Backtrack();
381
382  Handle<ByteArray> array;
383  if (FLAG_regexp_peephole_optimization) {
384    array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
385        isolate_, zone(), source, buffer_.data(), length(), jump_edges_);
386  } else {
387    array = isolate_->factory()->NewByteArray(length());
388    Copy(array->GetDataStartAddress());
389  }
390
391  return array;
392}
393
394int RegExpBytecodeGenerator::length() { return pc_; }
395
396void RegExpBytecodeGenerator::Copy(byte* a) {
397  MemCopy(a, buffer_.data(), length());
398}
399
400void RegExpBytecodeGenerator::ExpandBuffer() {
401  // TODO(jgruber): The growth strategy could be smarter for large sizes.
402  // TODO(jgruber): It's not necessary to default-initialize new elements.
403  buffer_.resize(buffer_.size() * 2);
404}
405
406}  // namespace internal
407}  // namespace v8
408