1// Copyright 2008-2009 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/regexp/regexp-bytecode-generator.h" 6 7#include "src/ast/ast.h" 8#include "src/objects/fixed-array-inl.h" 9#include "src/regexp/regexp-bytecode-generator-inl.h" 10#include "src/regexp/regexp-bytecode-peephole.h" 11#include "src/regexp/regexp-bytecodes.h" 12#include "src/regexp/regexp-macro-assembler.h" 13 14namespace v8 { 15namespace internal { 16 17RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone) 18 : RegExpMacroAssembler(isolate, zone), 19 buffer_(kInitialBufferSize, zone), 20 pc_(0), 21 advance_current_end_(kInvalidPC), 22 jump_edges_(zone), 23 isolate_(isolate) {} 24 25RegExpBytecodeGenerator::~RegExpBytecodeGenerator() { 26 if (backtrack_.is_linked()) backtrack_.Unuse(); 27} 28 29RegExpBytecodeGenerator::IrregexpImplementation 30RegExpBytecodeGenerator::Implementation() { 31 return kBytecodeImplementation; 32} 33 34void RegExpBytecodeGenerator::Bind(Label* l) { 35 advance_current_end_ = kInvalidPC; 36 DCHECK(!l->is_bound()); 37 if (l->is_linked()) { 38 int pos = l->pos(); 39 while (pos != 0) { 40 int fixup = pos; 41 pos = *reinterpret_cast<int32_t*>(buffer_.data() + fixup); 42 *reinterpret_cast<uint32_t*>(buffer_.data() + fixup) = pc_; 43 jump_edges_.emplace(fixup, pc_); 44 } 45 } 46 l->bind_to(pc_); 47} 48 49void RegExpBytecodeGenerator::EmitOrLink(Label* l) { 50 if (l == nullptr) l = &backtrack_; 51 int pos = 0; 52 if (l->is_bound()) { 53 pos = l->pos(); 54 jump_edges_.emplace(pc_, pos); 55 } else { 56 if (l->is_linked()) { 57 pos = l->pos(); 58 } 59 l->link_to(pc_); 60 } 61 Emit32(pos); 62} 63 64void RegExpBytecodeGenerator::PopRegister(int register_index) { 65 DCHECK_LE(0, register_index); 66 DCHECK_GE(kMaxRegister, register_index); 67 Emit(BC_POP_REGISTER, register_index); 68} 69 70void RegExpBytecodeGenerator::PushRegister(int register_index, 71 StackCheckFlag check_stack_limit) { 72 DCHECK_LE(0, register_index); 73 DCHECK_GE(kMaxRegister, register_index); 74 Emit(BC_PUSH_REGISTER, register_index); 75} 76 77void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index, 78 int cp_offset) { 79 DCHECK_LE(0, register_index); 80 DCHECK_GE(kMaxRegister, register_index); 81 Emit(BC_SET_REGISTER_TO_CP, register_index); 82 Emit32(cp_offset); // Current position offset. 83} 84 85void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) { 86 DCHECK(reg_from <= reg_to); 87 for (int reg = reg_from; reg <= reg_to; reg++) { 88 SetRegister(reg, -1); 89 } 90} 91 92void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister( 93 int register_index) { 94 DCHECK_LE(0, register_index); 95 DCHECK_GE(kMaxRegister, register_index); 96 Emit(BC_SET_CP_TO_REGISTER, register_index); 97} 98 99void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) { 100 DCHECK_LE(0, register_index); 101 DCHECK_GE(kMaxRegister, register_index); 102 Emit(BC_SET_REGISTER_TO_SP, register_index); 103} 104 105void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) { 106 DCHECK_LE(0, register_index); 107 DCHECK_GE(kMaxRegister, register_index); 108 Emit(BC_SET_SP_TO_REGISTER, register_index); 109} 110 111void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) { 112 DCHECK(is_uint24(by)); 113 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 114} 115 116void RegExpBytecodeGenerator::SetRegister(int register_index, int to) { 117 DCHECK_LE(0, register_index); 118 DCHECK_GE(kMaxRegister, register_index); 119 Emit(BC_SET_REGISTER, register_index); 120 Emit32(to); 121} 122 123void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) { 124 DCHECK_LE(0, register_index); 125 DCHECK_GE(kMaxRegister, register_index); 126 Emit(BC_ADVANCE_REGISTER, register_index); 127 Emit32(by); 128} 129 130void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); } 131 132void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); } 133 134void RegExpBytecodeGenerator::Backtrack() { 135 int error_code = 136 can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE; 137 Emit(BC_POP_BT, error_code); 138} 139 140void RegExpBytecodeGenerator::GoTo(Label* l) { 141 if (advance_current_end_ == pc_) { 142 // Combine advance current and goto. 143 pc_ = advance_current_start_; 144 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 145 EmitOrLink(l); 146 advance_current_end_ = kInvalidPC; 147 } else { 148 // Regular goto. 149 Emit(BC_GOTO, 0); 150 EmitOrLink(l); 151 } 152} 153 154void RegExpBytecodeGenerator::PushBacktrack(Label* l) { 155 Emit(BC_PUSH_BT, 0); 156 EmitOrLink(l); 157} 158 159bool RegExpBytecodeGenerator::Succeed() { 160 Emit(BC_SUCCEED, 0); 161 return false; // Restart matching for global regexp not supported. 162} 163 164void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); } 165 166void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) { 167 // TODO(chromium:1166138): Turn back into DCHECKs once the underlying issue 168 // is fixed. 169 CHECK_LE(kMinCPOffset, by); 170 CHECK_GE(kMaxCPOffset, by); 171 advance_current_start_ = pc_; 172 advance_current_offset_ = by; 173 Emit(BC_ADVANCE_CP, by); 174 advance_current_end_ = pc_; 175} 176 177void RegExpBytecodeGenerator::CheckGreedyLoop( 178 Label* on_tos_equals_current_position) { 179 Emit(BC_CHECK_GREEDY, 0); 180 EmitOrLink(on_tos_equals_current_position); 181} 182 183void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset, 184 Label* on_failure, 185 bool check_bounds, 186 int characters, 187 int eats_at_least) { 188 DCHECK_GE(eats_at_least, characters); 189 if (eats_at_least > characters && check_bounds) { 190 DCHECK(is_int24(cp_offset + eats_at_least)); 191 Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least); 192 EmitOrLink(on_failure); 193 check_bounds = false; // Load below doesn't need to check. 194 } 195 196 DCHECK_LE(kMinCPOffset, cp_offset); 197 DCHECK_GE(kMaxCPOffset, cp_offset); 198 int bytecode; 199 if (check_bounds) { 200 if (characters == 4) { 201 bytecode = BC_LOAD_4_CURRENT_CHARS; 202 } else if (characters == 2) { 203 bytecode = BC_LOAD_2_CURRENT_CHARS; 204 } else { 205 DCHECK_EQ(1, characters); 206 bytecode = BC_LOAD_CURRENT_CHAR; 207 } 208 } else { 209 if (characters == 4) { 210 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 211 } else if (characters == 2) { 212 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 213 } else { 214 DCHECK_EQ(1, characters); 215 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 216 } 217 } 218 Emit(bytecode, cp_offset); 219 if (check_bounds) EmitOrLink(on_failure); 220} 221 222void RegExpBytecodeGenerator::CheckCharacterLT(base::uc16 limit, 223 Label* on_less) { 224 Emit(BC_CHECK_LT, limit); 225 EmitOrLink(on_less); 226} 227 228void RegExpBytecodeGenerator::CheckCharacterGT(base::uc16 limit, 229 Label* on_greater) { 230 Emit(BC_CHECK_GT, limit); 231 EmitOrLink(on_greater); 232} 233 234void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) { 235 if (c > MAX_FIRST_ARG) { 236 Emit(BC_CHECK_4_CHARS, 0); 237 Emit32(c); 238 } else { 239 Emit(BC_CHECK_CHAR, c); 240 } 241 EmitOrLink(on_equal); 242} 243 244void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) { 245 Emit(BC_CHECK_AT_START, cp_offset); 246 EmitOrLink(on_at_start); 247} 248 249void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset, 250 Label* on_not_at_start) { 251 Emit(BC_CHECK_NOT_AT_START, cp_offset); 252 EmitOrLink(on_not_at_start); 253} 254 255void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c, 256 Label* on_not_equal) { 257 if (c > MAX_FIRST_ARG) { 258 Emit(BC_CHECK_NOT_4_CHARS, 0); 259 Emit32(c); 260 } else { 261 Emit(BC_CHECK_NOT_CHAR, c); 262 } 263 EmitOrLink(on_not_equal); 264} 265 266void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask, 267 Label* on_equal) { 268 if (c > MAX_FIRST_ARG) { 269 Emit(BC_AND_CHECK_4_CHARS, 0); 270 Emit32(c); 271 } else { 272 Emit(BC_AND_CHECK_CHAR, c); 273 } 274 Emit32(mask); 275 EmitOrLink(on_equal); 276} 277 278void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c, 279 uint32_t mask, 280 Label* on_not_equal) { 281 if (c > MAX_FIRST_ARG) { 282 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 283 Emit32(c); 284 } else { 285 Emit(BC_AND_CHECK_NOT_CHAR, c); 286 } 287 Emit32(mask); 288 EmitOrLink(on_not_equal); 289} 290 291void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd( 292 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) { 293 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 294 Emit16(minus); 295 Emit16(mask); 296 EmitOrLink(on_not_equal); 297} 298 299void RegExpBytecodeGenerator::CheckCharacterInRange(base::uc16 from, 300 base::uc16 to, 301 Label* on_in_range) { 302 Emit(BC_CHECK_CHAR_IN_RANGE, 0); 303 Emit16(from); 304 Emit16(to); 305 EmitOrLink(on_in_range); 306} 307 308void RegExpBytecodeGenerator::CheckCharacterNotInRange(base::uc16 from, 309 base::uc16 to, 310 Label* on_not_in_range) { 311 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); 312 Emit16(from); 313 Emit16(to); 314 EmitOrLink(on_not_in_range); 315} 316 317void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table, 318 Label* on_bit_set) { 319 Emit(BC_CHECK_BIT_IN_TABLE, 0); 320 EmitOrLink(on_bit_set); 321 for (int i = 0; i < kTableSize; i += kBitsPerByte) { 322 int byte = 0; 323 for (int j = 0; j < kBitsPerByte; j++) { 324 if (table->get(i + j) != 0) byte |= 1 << j; 325 } 326 Emit8(byte); 327 } 328} 329 330void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg, 331 bool read_backward, 332 Label* on_not_equal) { 333 DCHECK_LE(0, start_reg); 334 DCHECK_GE(kMaxRegister, start_reg); 335 Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, 336 start_reg); 337 EmitOrLink(on_not_equal); 338} 339 340void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase( 341 int start_reg, bool read_backward, bool unicode, Label* on_not_equal) { 342 DCHECK_LE(0, start_reg); 343 DCHECK_GE(kMaxRegister, start_reg); 344 Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD 345 : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) 346 : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE 347 : BC_CHECK_NOT_BACK_REF_NO_CASE), 348 start_reg); 349 EmitOrLink(on_not_equal); 350} 351 352void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand, 353 Label* on_less_than) { 354 DCHECK_LE(0, register_index); 355 DCHECK_GE(kMaxRegister, register_index); 356 Emit(BC_CHECK_REGISTER_LT, register_index); 357 Emit32(comparand); 358 EmitOrLink(on_less_than); 359} 360 361void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand, 362 Label* on_greater_or_equal) { 363 DCHECK_LE(0, register_index); 364 DCHECK_GE(kMaxRegister, register_index); 365 Emit(BC_CHECK_REGISTER_GE, register_index); 366 Emit32(comparand); 367 EmitOrLink(on_greater_or_equal); 368} 369 370void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index, 371 Label* on_eq) { 372 DCHECK_LE(0, register_index); 373 DCHECK_GE(kMaxRegister, register_index); 374 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 375 EmitOrLink(on_eq); 376} 377 378Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) { 379 Bind(&backtrack_); 380 Backtrack(); 381 382 Handle<ByteArray> array; 383 if (FLAG_regexp_peephole_optimization) { 384 array = RegExpBytecodePeepholeOptimization::OptimizeBytecode( 385 isolate_, zone(), source, buffer_.data(), length(), jump_edges_); 386 } else { 387 array = isolate_->factory()->NewByteArray(length()); 388 Copy(array->GetDataStartAddress()); 389 } 390 391 return array; 392} 393 394int RegExpBytecodeGenerator::length() { return pc_; } 395 396void RegExpBytecodeGenerator::Copy(byte* a) { 397 MemCopy(a, buffer_.data(), length()); 398} 399 400void RegExpBytecodeGenerator::ExpandBuffer() { 401 // TODO(jgruber): The growth strategy could be smarter for large sizes. 402 // TODO(jgruber): It's not necessary to default-initialize new elements. 403 buffer_.resize(buffer_.size() * 2); 404} 405 406} // namespace internal 407} // namespace v8 408