1// Copyright 2013 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#if V8_TARGET_ARCH_ARM64 6 7#include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 8 9#include "src/codegen/arm64/macro-assembler-arm64-inl.h" 10#include "src/codegen/macro-assembler.h" 11#include "src/logging/log.h" 12#include "src/objects/objects-inl.h" 13#include "src/regexp/regexp-macro-assembler.h" 14#include "src/regexp/regexp-stack.h" 15#include "src/snapshot/embedded/embedded-data.h" 16#include "src/strings/unicode.h" 17 18namespace v8 { 19namespace internal { 20 21/* 22 * This assembler uses the following register assignment convention: 23 * - w19 : Used to temporarely store a value before a call to C code. 24 * See CheckNotBackReferenceIgnoreCase. 25 * - x20 : Pointer to the current Code object, 26 * it includes the heap object tag. 27 * - w21 : Current position in input, as negative offset from 28 * the end of the string. Please notice that this is 29 * the byte offset, not the character offset! 30 * - w22 : Currently loaded character. Must be loaded using 31 * LoadCurrentCharacter before using any of the dispatch methods. 32 * - x23 : Points to tip of backtrack stack. 33 * - w24 : Position of the first character minus one: non_position_value. 34 * Used to initialize capture registers. 35 * - x25 : Address at the end of the input string: input_end. 36 * Points to byte after last character in input. 37 * - x26 : Address at the start of the input string: input_start. 38 * - w27 : Where to start in the input string. 39 * - x28 : Output array pointer. 40 * - x29/fp : Frame pointer. Used to access arguments, local variables and 41 * RegExp registers. 42 * - x16/x17 : IP registers, used by assembler. Very volatile. 43 * - sp : Points to tip of C stack. 44 * 45 * - x0-x7 : Used as a cache to store 32 bit capture registers. These 46 * registers need to be retained every time a call to C code 47 * is done. 48 * 49 * The remaining registers are free for computations. 50 * Each call to a public method should retain this convention. 51 * 52 * The stack will have the following structure: 53 * 54 * Location Name Description 55 * (as referred to 56 * in the code) 57 * 58 * - fp[104] Address regexp Address of the JSRegExp object. Unused in 59 * native code, passed to match signature of 60 * the interpreter. 61 * - fp[96] isolate Address of the current isolate. 62 * ^^^^^^^^^ sp when called ^^^^^^^^^ 63 * - fp[16..88] r19-r28 Backup of CalleeSaved registers. 64 * - fp[8] lr Return from the RegExp code. 65 * - fp[0] fp Old frame pointer. 66 * ^^^^^^^^^ fp ^^^^^^^^^ 67 * - fp[-8] direct_call 1 => Direct call from JavaScript code. 68 * 0 => Call through the runtime system. 69 * - fp[-16] output_size Output may fit multiple sets of matches. 70 * - fp[-24] input Handle containing the input string. 71 * - fp[-32] success_counter 72 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^ 73 * - fp[-40] register N Capture registers initialized with 74 * - fp[-44] register N + 1 non_position_value. 75 * ... The first kNumCachedRegisters (N) registers 76 * ... are cached in x0 to x7. 77 * ... Only positions must be stored in the first 78 * - ... num_saved_registers_ registers. 79 * - ... 80 * - register N + num_registers - 1 81 * ^^^^^^^^^ sp ^^^^^^^^^ 82 * 83 * The first num_saved_registers_ registers are initialized to point to 84 * "character -1" in the string (i.e., char_size() bytes before the first 85 * character of the string). The remaining registers start out as garbage. 86 * 87 * The data up to the return address must be placed there by the calling 88 * code and the remaining arguments are passed in registers, e.g. by calling the 89 * code entry as cast to a function with the signature: 90 * int (*match)(String input_string, 91 * int start_index, 92 * Address start, 93 * Address end, 94 * int* capture_output_array, 95 * int num_capture_registers, 96 * bool direct_call = false, 97 * Isolate* isolate, 98 * Address regexp); 99 * The call is performed by NativeRegExpMacroAssembler::Execute() 100 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. 101 */ 102 103#define __ ACCESS_MASM(masm_) 104 105const int RegExpMacroAssemblerARM64::kRegExpCodeSize; 106 107RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, 108 Zone* zone, Mode mode, 109 int registers_to_save) 110 : NativeRegExpMacroAssembler(isolate, zone), 111 masm_(std::make_unique<MacroAssembler>( 112 isolate, CodeObjectRequired::kYes, 113 NewAssemblerBuffer(kRegExpCodeSize))), 114 no_root_array_scope_(masm_.get()), 115 mode_(mode), 116 num_registers_(registers_to_save), 117 num_saved_registers_(registers_to_save), 118 entry_label_(), 119 start_label_(), 120 success_label_(), 121 backtrack_label_(), 122 exit_label_() { 123 DCHECK_EQ(0, registers_to_save % 2); 124 // We can cache at most 16 W registers in x0-x7. 125 STATIC_ASSERT(kNumCachedRegisters <= 16); 126 STATIC_ASSERT((kNumCachedRegisters % 2) == 0); 127 __ CallTarget(); 128 129 __ B(&entry_label_); // We'll write the entry code later. 130 __ Bind(&start_label_); // And then continue from here. 131} 132 133RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() = default; 134 135void RegExpMacroAssemblerARM64::AbortedCodeGeneration() { 136 masm_->AbortedCodeGeneration(); 137 entry_label_.Unuse(); 138 start_label_.Unuse(); 139 success_label_.Unuse(); 140 backtrack_label_.Unuse(); 141 exit_label_.Unuse(); 142 check_preempt_label_.Unuse(); 143 stack_overflow_label_.Unuse(); 144 fallback_label_.Unuse(); 145} 146 147int RegExpMacroAssemblerARM64::stack_limit_slack() { 148 return RegExpStack::kStackLimitSlack; 149} 150 151 152void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) { 153 if (by != 0) { 154 __ Add(current_input_offset(), 155 current_input_offset(), by * char_size()); 156 } 157} 158 159 160void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) { 161 DCHECK((reg >= 0) && (reg < num_registers_)); 162 if (by != 0) { 163 RegisterState register_state = GetRegisterState(reg); 164 switch (register_state) { 165 case STACKED: 166 __ Ldr(w10, register_location(reg)); 167 __ Add(w10, w10, by); 168 __ Str(w10, register_location(reg)); 169 break; 170 case CACHED_LSW: { 171 Register to_advance = GetCachedRegister(reg); 172 __ Add(to_advance, to_advance, by); 173 break; 174 } 175 case CACHED_MSW: { 176 Register to_advance = GetCachedRegister(reg); 177 // Sign-extend to int64, shift as uint64, cast back to int64. 178 __ Add( 179 to_advance, to_advance, 180 static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by)) 181 << kWRegSizeInBits)); 182 break; 183 } 184 default: 185 UNREACHABLE(); 186 } 187 } 188} 189 190 191void RegExpMacroAssemblerARM64::Backtrack() { 192 CheckPreemption(); 193 if (has_backtrack_limit()) { 194 Label next; 195 UseScratchRegisterScope temps(masm_.get()); 196 Register scratch = temps.AcquireW(); 197 __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount)); 198 __ Add(scratch, scratch, 1); 199 __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount)); 200 __ Cmp(scratch, Operand(backtrack_limit())); 201 __ B(ne, &next); 202 203 // Backtrack limit exceeded. 204 if (can_fallback()) { 205 __ B(&fallback_label_); 206 } else { 207 // Can't fallback, so we treat it as a failed match. 208 Fail(); 209 } 210 211 __ bind(&next); 212 } 213 Pop(w10); 214 __ Add(x10, code_pointer(), Operand(w10, UXTW)); 215 __ Br(x10); 216} 217 218 219void RegExpMacroAssemblerARM64::Bind(Label* label) { 220 __ Bind(label); 221} 222 223void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) { 224 __ BindJumpTarget(label); 225} 226 227void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) { 228 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal); 229} 230 231void RegExpMacroAssemblerARM64::CheckCharacterGT(base::uc16 limit, 232 Label* on_greater) { 233 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); 234} 235 236void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset, 237 Label* on_at_start) { 238 __ Add(w10, current_input_offset(), 239 Operand(-char_size() + cp_offset * char_size())); 240 __ Cmp(w10, string_start_minus_one()); 241 BranchOrBacktrack(eq, on_at_start); 242} 243 244void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, 245 Label* on_not_at_start) { 246 __ Add(w10, current_input_offset(), 247 Operand(-char_size() + cp_offset * char_size())); 248 __ Cmp(w10, string_start_minus_one()); 249 BranchOrBacktrack(ne, on_not_at_start); 250} 251 252void RegExpMacroAssemblerARM64::CheckCharacterLT(base::uc16 limit, 253 Label* on_less) { 254 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less); 255} 256 257void RegExpMacroAssemblerARM64::CheckCharacters( 258 base::Vector<const base::uc16> str, int cp_offset, Label* on_failure, 259 bool check_end_of_string) { 260 // This method is only ever called from the cctests. 261 262 if (check_end_of_string) { 263 // Is last character of required match inside string. 264 CheckPosition(cp_offset + str.length() - 1, on_failure); 265 } 266 267 Register characters_address = x11; 268 269 __ Add(characters_address, 270 input_end(), 271 Operand(current_input_offset(), SXTW)); 272 if (cp_offset != 0) { 273 __ Add(characters_address, characters_address, cp_offset * char_size()); 274 } 275 276 for (int i = 0; i < str.length(); i++) { 277 if (mode_ == LATIN1) { 278 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex)); 279 DCHECK_GE(String::kMaxOneByteCharCode, str[i]); 280 } else { 281 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex)); 282 } 283 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure); 284 } 285} 286 287void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { 288 __ Ldr(w10, MemOperand(backtrack_stackpointer())); 289 __ Cmp(current_input_offset(), w10); 290 __ Cset(x11, eq); 291 __ Add(backtrack_stackpointer(), 292 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2)); 293 BranchOrBacktrack(eq, on_equal); 294} 295 296void RegExpMacroAssemblerARM64::PushCachedRegisters() { 297 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 298 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2); 299 __ PushCPURegList(cached_registers); 300} 301 302void RegExpMacroAssemblerARM64::PopCachedRegisters() { 303 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 304 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2); 305 __ PopCPURegList(cached_registers); 306} 307 308void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( 309 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 310 Label fallthrough; 311 312 Register capture_start_offset = w10; 313 // Save the capture length in a callee-saved register so it will 314 // be preserved if we call a C helper. 315 Register capture_length = w19; 316 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length)); 317 318 // Find length of back-referenced capture. 319 DCHECK_EQ(0, start_reg % 2); 320 if (start_reg < kNumCachedRegisters) { 321 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg)); 322 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 323 } else { 324 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); 325 } 326 __ Sub(capture_length, w11, capture_start_offset); // Length to check. 327 328 // At this point, the capture registers are either both set or both cleared. 329 // If the capture length is zero, then the capture is either empty or cleared. 330 // Fall through in both cases. 331 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 332 333 // Check that there are enough characters left in the input. 334 if (read_backward) { 335 __ Add(w12, string_start_minus_one(), capture_length); 336 __ Cmp(current_input_offset(), w12); 337 BranchOrBacktrack(le, on_no_match); 338 } else { 339 __ Cmn(capture_length, current_input_offset()); 340 BranchOrBacktrack(gt, on_no_match); 341 } 342 343 if (mode_ == LATIN1) { 344 Label success; 345 Label fail; 346 Label loop_check; 347 348 Register capture_start_address = x12; 349 Register capture_end_addresss = x13; 350 Register current_position_address = x14; 351 352 __ Add(capture_start_address, 353 input_end(), 354 Operand(capture_start_offset, SXTW)); 355 __ Add(capture_end_addresss, 356 capture_start_address, 357 Operand(capture_length, SXTW)); 358 __ Add(current_position_address, 359 input_end(), 360 Operand(current_input_offset(), SXTW)); 361 if (read_backward) { 362 // Offset by length when matching backwards. 363 __ Sub(current_position_address, current_position_address, 364 Operand(capture_length, SXTW)); 365 } 366 367 Label loop; 368 __ Bind(&loop); 369 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 370 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 371 __ Cmp(w10, w11); 372 __ B(eq, &loop_check); 373 374 // Mismatch, try case-insensitive match (converting letters to lower-case). 375 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case. 376 __ Orr(w11, w11, 0x20); // Also convert input character. 377 __ Cmp(w11, w10); 378 __ B(ne, &fail); 379 __ Sub(w10, w10, 'a'); 380 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter? 381 __ B(ls, &loop_check); // In range 'a'-'z'. 382 // Latin-1: Check for values in range [224,254] but not 247. 383 __ Sub(w10, w10, 224 - 'a'); 384 __ Cmp(w10, 254 - 224); 385 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247. 386 __ B(eq, &fail); // Weren't Latin-1 letters. 387 388 __ Bind(&loop_check); 389 __ Cmp(capture_start_address, capture_end_addresss); 390 __ B(lt, &loop); 391 __ B(&success); 392 393 __ Bind(&fail); 394 BranchOrBacktrack(al, on_no_match); 395 396 __ Bind(&success); 397 // Compute new value of character position after the matched part. 398 __ Sub(current_input_offset().X(), current_position_address, input_end()); 399 if (read_backward) { 400 __ Sub(current_input_offset().X(), current_input_offset().X(), 401 Operand(capture_length, SXTW)); 402 } 403 if (FLAG_debug_code) { 404 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 405 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 406 // The current input offset should be <= 0, and fit in a W register. 407 __ Check(le, AbortReason::kOffsetOutOfRange); 408 } 409 } else { 410 DCHECK(mode_ == UC16); 411 int argument_count = 4; 412 413 PushCachedRegisters(); 414 415 // Put arguments into arguments registers. 416 // Parameters are 417 // x0: Address byte_offset1 - Address captured substring's start. 418 // x1: Address byte_offset2 - Address of current character position. 419 // w2: size_t byte_length - length of capture in bytes(!) 420 // x3: Isolate* isolate. 421 422 // Address of start of capture. 423 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW)); 424 // Length of capture. 425 __ Mov(w2, capture_length); 426 // Address of current input position. 427 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); 428 if (read_backward) { 429 __ Sub(x1, x1, Operand(capture_length, SXTW)); 430 } 431 // Isolate. 432 __ Mov(x3, ExternalReference::isolate_address(isolate())); 433 434 { 435 AllowExternalCallThatCantCauseGC scope(masm_.get()); 436 ExternalReference function = 437 unicode 438 ? ExternalReference::re_case_insensitive_compare_unicode() 439 : ExternalReference::re_case_insensitive_compare_non_unicode(); 440 __ CallCFunction(function, argument_count); 441 } 442 443 // Check if function returned non-zero for success or zero for failure. 444 // x0 is one of the registers used as a cache so it must be tested before 445 // the cache is restored. 446 __ Cmp(x0, 0); 447 PopCachedRegisters(); 448 BranchOrBacktrack(eq, on_no_match); 449 450 // On success, advance position by length of capture. 451 if (read_backward) { 452 __ Sub(current_input_offset(), current_input_offset(), capture_length); 453 } else { 454 __ Add(current_input_offset(), current_input_offset(), capture_length); 455 } 456 } 457 458 __ Bind(&fallthrough); 459} 460 461void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, 462 bool read_backward, 463 Label* on_no_match) { 464 Label fallthrough; 465 466 Register capture_start_address = x12; 467 Register capture_end_address = x13; 468 Register current_position_address = x14; 469 Register capture_length = w15; 470 471 // Find length of back-referenced capture. 472 DCHECK_EQ(0, start_reg % 2); 473 if (start_reg < kNumCachedRegisters) { 474 __ Mov(x10, GetCachedRegister(start_reg)); 475 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 476 } else { 477 __ Ldp(w11, w10, capture_location(start_reg, x10)); 478 } 479 __ Sub(capture_length, w11, w10); // Length to check. 480 481 // At this point, the capture registers are either both set or both cleared. 482 // If the capture length is zero, then the capture is either empty or cleared. 483 // Fall through in both cases. 484 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 485 486 // Check that there are enough characters left in the input. 487 if (read_backward) { 488 __ Add(w12, string_start_minus_one(), capture_length); 489 __ Cmp(current_input_offset(), w12); 490 BranchOrBacktrack(le, on_no_match); 491 } else { 492 __ Cmn(capture_length, current_input_offset()); 493 BranchOrBacktrack(gt, on_no_match); 494 } 495 496 // Compute pointers to match string and capture string 497 __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); 498 __ Add(capture_end_address, 499 capture_start_address, 500 Operand(capture_length, SXTW)); 501 __ Add(current_position_address, 502 input_end(), 503 Operand(current_input_offset(), SXTW)); 504 if (read_backward) { 505 // Offset by length when matching backwards. 506 __ Sub(current_position_address, current_position_address, 507 Operand(capture_length, SXTW)); 508 } 509 510 Label loop; 511 __ Bind(&loop); 512 if (mode_ == LATIN1) { 513 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 514 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 515 } else { 516 DCHECK(mode_ == UC16); 517 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex)); 518 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex)); 519 } 520 __ Cmp(w10, w11); 521 BranchOrBacktrack(ne, on_no_match); 522 __ Cmp(capture_start_address, capture_end_address); 523 __ B(lt, &loop); 524 525 // Move current character position to position after match. 526 __ Sub(current_input_offset().X(), current_position_address, input_end()); 527 if (read_backward) { 528 __ Sub(current_input_offset().X(), current_input_offset().X(), 529 Operand(capture_length, SXTW)); 530 } 531 532 if (FLAG_debug_code) { 533 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 534 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 535 // The current input offset should be <= 0, and fit in a W register. 536 __ Check(le, AbortReason::kOffsetOutOfRange); 537 } 538 __ Bind(&fallthrough); 539} 540 541 542void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c, 543 Label* on_not_equal) { 544 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal); 545} 546 547 548void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c, 549 uint32_t mask, 550 Label* on_equal) { 551 __ And(w10, current_character(), mask); 552 CompareAndBranchOrBacktrack(w10, c, eq, on_equal); 553} 554 555 556void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c, 557 unsigned mask, 558 Label* on_not_equal) { 559 __ And(w10, current_character(), mask); 560 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 561} 562 563void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd( 564 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) { 565 DCHECK_GT(String::kMaxUtf16CodeUnit, minus); 566 __ Sub(w10, current_character(), minus); 567 __ And(w10, w10, mask); 568 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 569} 570 571void RegExpMacroAssemblerARM64::CheckCharacterInRange(base::uc16 from, 572 base::uc16 to, 573 Label* on_in_range) { 574 __ Sub(w10, current_character(), from); 575 // Unsigned lower-or-same condition. 576 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range); 577} 578 579void RegExpMacroAssemblerARM64::CheckCharacterNotInRange( 580 base::uc16 from, base::uc16 to, Label* on_not_in_range) { 581 __ Sub(w10, current_character(), from); 582 // Unsigned higher condition. 583 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range); 584} 585 586void RegExpMacroAssemblerARM64::CallIsCharacterInRangeArray( 587 const ZoneList<CharacterRange>* ranges) { 588 static const int kNumArguments = 3; 589 __ Mov(w0, current_character()); 590 __ Mov(x1, GetOrAddRangeArray(ranges)); 591 __ Mov(x2, ExternalReference::isolate_address(isolate())); 592 593 { 594 // We have a frame (set up in GetCode), but the assembler doesn't know. 595 FrameScope scope(masm_.get(), StackFrame::MANUAL); 596 __ CallCFunction(ExternalReference::re_is_character_in_range_array(), 597 kNumArguments); 598 } 599 600 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 601} 602 603bool RegExpMacroAssemblerARM64::CheckCharacterInRangeArray( 604 const ZoneList<CharacterRange>* ranges, Label* on_in_range) { 605 // Note: due to the arm64 oddity of x0 being a 'cached register', 606 // pushing/popping registers must happen outside of CallIsCharacterInRange 607 // s.t. we can compare the return value to 0 before popping x0. 608 PushCachedRegisters(); 609 CallIsCharacterInRangeArray(ranges); 610 __ Cmp(x0, 0); 611 PopCachedRegisters(); 612 BranchOrBacktrack(ne, on_in_range); 613 return true; 614} 615 616bool RegExpMacroAssemblerARM64::CheckCharacterNotInRangeArray( 617 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) { 618 // Note: due to the arm64 oddity of x0 being a 'cached register', 619 // pushing/popping registers must happen outside of CallIsCharacterInRange 620 // s.t. we can compare the return value to 0 before popping x0. 621 PushCachedRegisters(); 622 CallIsCharacterInRangeArray(ranges); 623 __ Cmp(x0, 0); 624 PopCachedRegisters(); 625 BranchOrBacktrack(eq, on_not_in_range); 626 return true; 627} 628 629void RegExpMacroAssemblerARM64::CheckBitInTable( 630 Handle<ByteArray> table, 631 Label* on_bit_set) { 632 __ Mov(x11, Operand(table)); 633 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) { 634 __ And(w10, current_character(), kTableMask); 635 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag); 636 } else { 637 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag); 638 } 639 __ Ldrb(w11, MemOperand(x11, w10, UXTW)); 640 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set); 641} 642 643bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass( 644 StandardCharacterSet type, Label* on_no_match) { 645 // Range checks (c in min..max) are generally implemented by an unsigned 646 // (c - min) <= (max - min) check 647 // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16). 648 switch (type) { 649 case StandardCharacterSet::kWhitespace: 650 // Match space-characters. 651 if (mode_ == LATIN1) { 652 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 653 Label success; 654 // Check for ' ' or 0x00A0. 655 __ Cmp(current_character(), ' '); 656 __ Ccmp(current_character(), 0x00A0, ZFlag, ne); 657 __ B(eq, &success); 658 // Check range 0x09..0x0D. 659 __ Sub(w10, current_character(), '\t'); 660 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); 661 __ Bind(&success); 662 return true; 663 } 664 return false; 665 case StandardCharacterSet::kNotWhitespace: 666 // The emitted code for generic character classes is good enough. 667 return false; 668 case StandardCharacterSet::kDigit: 669 // Match ASCII digits ('0'..'9'). 670 __ Sub(w10, current_character(), '0'); 671 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match); 672 return true; 673 case StandardCharacterSet::kNotDigit: 674 // Match ASCII non-digits. 675 __ Sub(w10, current_character(), '0'); 676 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); 677 return true; 678 case StandardCharacterSet::kNotLineTerminator: { 679 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) 680 // Here we emit the conditional branch only once at the end to make branch 681 // prediction more efficient, even though we could branch out of here 682 // as soon as a character matches. 683 __ Cmp(current_character(), 0x0A); 684 __ Ccmp(current_character(), 0x0D, ZFlag, ne); 685 if (mode_ == UC16) { 686 __ Sub(w10, current_character(), 0x2028); 687 // If the Z flag was set we clear the flags to force a branch. 688 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 689 // ls -> !((C==1) && (Z==0)) 690 BranchOrBacktrack(ls, on_no_match); 691 } else { 692 BranchOrBacktrack(eq, on_no_match); 693 } 694 return true; 695 } 696 case StandardCharacterSet::kLineTerminator: { 697 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) 698 // We have to check all 4 newline characters before emitting 699 // the conditional branch. 700 __ Cmp(current_character(), 0x0A); 701 __ Ccmp(current_character(), 0x0D, ZFlag, ne); 702 if (mode_ == UC16) { 703 __ Sub(w10, current_character(), 0x2028); 704 // If the Z flag was set we clear the flags to force a fall-through. 705 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 706 // hi -> (C==1) && (Z==0) 707 BranchOrBacktrack(hi, on_no_match); 708 } else { 709 BranchOrBacktrack(ne, on_no_match); 710 } 711 return true; 712 } 713 case StandardCharacterSet::kWord: { 714 if (mode_ != LATIN1) { 715 // Table is 256 entries, so all Latin1 characters can be tested. 716 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match); 717 } 718 ExternalReference map = ExternalReference::re_word_character_map(); 719 __ Mov(x10, map); 720 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 721 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match); 722 return true; 723 } 724 case StandardCharacterSet::kNotWord: { 725 Label done; 726 if (mode_ != LATIN1) { 727 // Table is 256 entries, so all Latin1 characters can be tested. 728 __ Cmp(current_character(), 'z'); 729 __ B(hi, &done); 730 } 731 ExternalReference map = ExternalReference::re_word_character_map(); 732 __ Mov(x10, map); 733 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 734 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match); 735 __ Bind(&done); 736 return true; 737 } 738 case StandardCharacterSet::kEverything: 739 // Match any character. 740 return true; 741 } 742} 743 744void RegExpMacroAssemblerARM64::Fail() { 745 __ Mov(w0, FAILURE); 746 __ B(&exit_label_); 747} 748 749void RegExpMacroAssemblerARM64::LoadRegExpStackPointerFromMemory(Register dst) { 750 ExternalReference ref = 751 ExternalReference::address_of_regexp_stack_stack_pointer(isolate()); 752 __ Mov(dst, ref); 753 __ Ldr(dst, MemOperand(dst)); 754} 755 756void RegExpMacroAssemblerARM64::StoreRegExpStackPointerToMemory( 757 Register src, Register scratch) { 758 ExternalReference ref = 759 ExternalReference::address_of_regexp_stack_stack_pointer(isolate()); 760 __ Mov(scratch, ref); 761 __ Str(src, MemOperand(scratch)); 762} 763 764void RegExpMacroAssemblerARM64::PushRegExpBasePointer(Register stack_pointer, 765 Register scratch) { 766 ExternalReference ref = 767 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 768 __ Mov(scratch, ref); 769 __ Ldr(scratch, MemOperand(scratch)); 770 __ Sub(scratch, stack_pointer, scratch); 771 __ Str(scratch, MemOperand(frame_pointer(), kRegExpStackBasePointer)); 772} 773 774void RegExpMacroAssemblerARM64::PopRegExpBasePointer(Register stack_pointer_out, 775 Register scratch) { 776 ExternalReference ref = 777 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 778 __ Ldr(stack_pointer_out, 779 MemOperand(frame_pointer(), kRegExpStackBasePointer)); 780 __ Mov(scratch, ref); 781 __ Ldr(scratch, MemOperand(scratch)); 782 __ Add(stack_pointer_out, stack_pointer_out, scratch); 783 StoreRegExpStackPointerToMemory(stack_pointer_out, scratch); 784} 785 786Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { 787 Label return_w0; 788 // Finalize code - write the entry point code now we know how many 789 // registers we need. 790 791 // Entry code: 792 __ Bind(&entry_label_); 793 794 // Arguments on entry: 795 // x0: String input 796 // x1: int start_offset 797 // x2: byte* input_start 798 // x3: byte* input_end 799 // x4: int* output array 800 // x5: int output array size 801 // x6: int direct_call 802 // x7: Isolate* isolate 803 // 804 // sp[0]: secondary link/return address used by native call 805 806 // Tell the system that we have a stack frame. Because the type is MANUAL, no 807 // code is generated. 808 FrameScope scope(masm_.get(), StackFrame::MANUAL); 809 810 // Push registers on the stack, only push the argument registers that we need. 811 CPURegList argument_registers(x0, x5, x6, x7); 812 813 CPURegList registers_to_retain = kCalleeSaved; 814 DCHECK_EQ(registers_to_retain.Count(), kNumCalleeSavedRegisters); 815 816 __ PushCPURegList(registers_to_retain); 817 __ Push<TurboAssembler::kSignLR>(lr, fp); 818 __ PushCPURegList(argument_registers); 819 820 // Set frame pointer in place. 821 __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize); 822 823 // Initialize callee-saved registers. 824 __ Mov(start_offset(), w1); 825 __ Mov(input_start(), x2); 826 __ Mov(input_end(), x3); 827 __ Mov(output_array(), x4); 828 829 // Make sure the stack alignment will be respected. 830 const int alignment = masm_->ActivationFrameAlignment(); 831 DCHECK_EQ(alignment % 16, 0); 832 const int align_mask = (alignment / kWRegSize) - 1; 833 834 // Make room for stack locals. 835 static constexpr int kWRegPerXReg = kXRegSize / kWRegSize; 836 DCHECK_EQ(kNumberOfStackLocals * kWRegPerXReg, 837 ((kNumberOfStackLocals * kWRegPerXReg) + align_mask) & ~align_mask); 838 __ Claim(kNumberOfStackLocals * kWRegPerXReg); 839 840 // Initialize backtrack stack pointer. It must not be clobbered from here on. 841 // Note the backtrack_stackpointer is callee-saved. 842 STATIC_ASSERT(backtrack_stackpointer() == x23); 843 LoadRegExpStackPointerFromMemory(backtrack_stackpointer()); 844 845 // Store the regexp base pointer - we'll later restore it / write it to 846 // memory when returning from this irregexp code object. 847 PushRegExpBasePointer(backtrack_stackpointer(), x11); 848 849 // Set the number of registers we will need to allocate, that is: 850 // - (num_registers_ - kNumCachedRegisters) (W registers) 851 const int num_stack_registers = 852 std::max(0, num_registers_ - kNumCachedRegisters); 853 const int num_wreg_to_allocate = 854 (num_stack_registers + align_mask) & ~align_mask; 855 856 { 857 // Check if we have space on the stack. 858 Label stack_limit_hit, stack_ok; 859 860 ExternalReference stack_limit = 861 ExternalReference::address_of_jslimit(isolate()); 862 __ Mov(x10, stack_limit); 863 __ Ldr(x10, MemOperand(x10)); 864 __ Subs(x10, sp, x10); 865 866 // Handle it if the stack pointer is already below the stack limit. 867 __ B(ls, &stack_limit_hit); 868 869 // Check if there is room for the variable number of registers above 870 // the stack limit. 871 __ Cmp(x10, num_wreg_to_allocate * kWRegSize); 872 __ B(hs, &stack_ok); 873 874 // Exit with OutOfMemory exception. There is not enough space on the stack 875 // for our working registers. 876 __ Mov(w0, EXCEPTION); 877 __ B(&return_w0); 878 879 __ Bind(&stack_limit_hit); 880 CallCheckStackGuardState(x10); 881 // If returned value is non-zero, we exit with the returned value as result. 882 __ Cbnz(w0, &return_w0); 883 884 __ Bind(&stack_ok); 885 } 886 887 // Allocate space on stack. 888 __ Claim(num_wreg_to_allocate, kWRegSize); 889 890 // Initialize success_counter and kBacktrackCount with 0. 891 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter)); 892 __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount)); 893 894 // Find negative length (offset of start relative to end). 895 __ Sub(x10, input_start(), input_end()); 896 if (FLAG_debug_code) { 897 // Check that the size of the input string chars is in range. 898 __ Neg(x11, x10); 899 __ Cmp(x11, SeqTwoByteString::kMaxCharsSize); 900 __ Check(ls, AbortReason::kInputStringTooLong); 901 } 902 __ Mov(current_input_offset(), w10); 903 904 // The non-position value is used as a clearing value for the 905 // capture registers, it corresponds to the position of the first character 906 // minus one. 907 __ Sub(string_start_minus_one(), current_input_offset(), char_size()); 908 __ Sub(string_start_minus_one(), string_start_minus_one(), 909 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); 910 // We can store this value twice in an X register for initializing 911 // on-stack registers later. 912 __ Orr(twice_non_position_value(), string_start_minus_one().X(), 913 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits)); 914 915 // Initialize code pointer register. 916 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 917 918 Label load_char_start_regexp; 919 { 920 Label start_regexp; 921 // Load newline if index is at start, previous character otherwise. 922 __ Cbnz(start_offset(), &load_char_start_regexp); 923 __ Mov(current_character(), '\n'); 924 __ B(&start_regexp); 925 926 // Global regexp restarts matching here. 927 __ Bind(&load_char_start_regexp); 928 // Load previous char as initial value of current character register. 929 LoadCurrentCharacterUnchecked(-1, 1); 930 __ Bind(&start_regexp); 931 } 932 933 // Initialize on-stack registers. 934 if (num_saved_registers_ > 0) { 935 ClearRegisters(0, num_saved_registers_ - 1); 936 } 937 938 // Execute. 939 __ B(&start_label_); 940 941 if (backtrack_label_.is_linked()) { 942 __ Bind(&backtrack_label_); 943 Backtrack(); 944 } 945 946 if (success_label_.is_linked()) { 947 Register first_capture_start = w15; 948 949 // Save captures when successful. 950 __ Bind(&success_label_); 951 952 if (num_saved_registers_ > 0) { 953 // V8 expects the output to be an int32_t array. 954 Register capture_start = w12; 955 Register capture_end = w13; 956 Register input_length = w14; 957 958 // Copy captures to output. 959 960 // Get string length. 961 __ Sub(x10, input_end(), input_start()); 962 if (FLAG_debug_code) { 963 // Check that the size of the input string chars is in range. 964 __ Cmp(x10, SeqTwoByteString::kMaxCharsSize); 965 __ Check(ls, AbortReason::kInputStringTooLong); 966 } 967 // input_start has a start_offset offset on entry. We need to include 968 // it when computing the length of the whole string. 969 if (mode_ == UC16) { 970 __ Add(input_length, start_offset(), Operand(w10, LSR, 1)); 971 } else { 972 __ Add(input_length, start_offset(), w10); 973 } 974 975 // Copy the results to the output array from the cached registers first. 976 for (int i = 0; 977 (i < num_saved_registers_) && (i < kNumCachedRegisters); 978 i += 2) { 979 __ Mov(capture_start.X(), GetCachedRegister(i)); 980 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits); 981 if ((i == 0) && global_with_zero_length_check()) { 982 // Keep capture start for the zero-length check later. 983 __ Mov(first_capture_start, capture_start); 984 } 985 // Offsets need to be relative to the start of the string. 986 if (mode_ == UC16) { 987 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 988 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 989 } else { 990 __ Add(capture_start, input_length, capture_start); 991 __ Add(capture_end, input_length, capture_end); 992 } 993 // The output pointer advances for a possible global match. 994 __ Stp(capture_start, capture_end, 995 MemOperand(output_array(), kSystemPointerSize, PostIndex)); 996 } 997 998 // Only carry on if there are more than kNumCachedRegisters capture 999 // registers. 1000 int num_registers_left_on_stack = 1001 num_saved_registers_ - kNumCachedRegisters; 1002 if (num_registers_left_on_stack > 0) { 1003 Register base = x10; 1004 // There are always an even number of capture registers. A couple of 1005 // registers determine one match with two offsets. 1006 DCHECK_EQ(0, num_registers_left_on_stack % 2); 1007 __ Add(base, frame_pointer(), kFirstCaptureOnStack); 1008 1009 // We can unroll the loop here, we should not unroll for less than 2 1010 // registers. 1011 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1012 if (num_registers_left_on_stack <= kNumRegistersToUnroll) { 1013 for (int i = 0; i < num_registers_left_on_stack / 2; i++) { 1014 __ Ldp(capture_end, capture_start, 1015 MemOperand(base, -kSystemPointerSize, PostIndex)); 1016 if ((i == 0) && global_with_zero_length_check()) { 1017 // Keep capture start for the zero-length check later. 1018 __ Mov(first_capture_start, capture_start); 1019 } 1020 // Offsets need to be relative to the start of the string. 1021 if (mode_ == UC16) { 1022 __ Add(capture_start, 1023 input_length, 1024 Operand(capture_start, ASR, 1)); 1025 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 1026 } else { 1027 __ Add(capture_start, input_length, capture_start); 1028 __ Add(capture_end, input_length, capture_end); 1029 } 1030 // The output pointer advances for a possible global match. 1031 __ Stp(capture_start, capture_end, 1032 MemOperand(output_array(), kSystemPointerSize, PostIndex)); 1033 } 1034 } else { 1035 Label loop, start; 1036 __ Mov(x11, num_registers_left_on_stack); 1037 1038 __ Ldp(capture_end, capture_start, 1039 MemOperand(base, -kSystemPointerSize, PostIndex)); 1040 if (global_with_zero_length_check()) { 1041 __ Mov(first_capture_start, capture_start); 1042 } 1043 __ B(&start); 1044 1045 __ Bind(&loop); 1046 __ Ldp(capture_end, capture_start, 1047 MemOperand(base, -kSystemPointerSize, PostIndex)); 1048 __ Bind(&start); 1049 if (mode_ == UC16) { 1050 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 1051 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 1052 } else { 1053 __ Add(capture_start, input_length, capture_start); 1054 __ Add(capture_end, input_length, capture_end); 1055 } 1056 // The output pointer advances for a possible global match. 1057 __ Stp(capture_start, capture_end, 1058 MemOperand(output_array(), kSystemPointerSize, PostIndex)); 1059 __ Sub(x11, x11, 2); 1060 __ Cbnz(x11, &loop); 1061 } 1062 } 1063 } 1064 1065 if (global()) { 1066 Register success_counter = w0; 1067 Register output_size = x10; 1068 // Restart matching if the regular expression is flagged as global. 1069 1070 // Increment success counter. 1071 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 1072 __ Add(success_counter, success_counter, 1); 1073 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 1074 1075 // Capture results have been stored, so the number of remaining global 1076 // output registers is reduced by the number of stored captures. 1077 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize)); 1078 __ Sub(output_size, output_size, num_saved_registers_); 1079 // Check whether we have enough room for another set of capture results. 1080 __ Cmp(output_size, num_saved_registers_); 1081 __ B(lt, &return_w0); 1082 1083 // The output pointer is already set to the next field in the output 1084 // array. 1085 // Update output size on the frame before we restart matching. 1086 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize)); 1087 1088 // Restore the original regexp stack pointer value (effectively, pop the 1089 // stored base pointer). 1090 PopRegExpBasePointer(backtrack_stackpointer(), x11); 1091 1092 if (global_with_zero_length_check()) { 1093 // Special case for zero-length matches. 1094 __ Cmp(current_input_offset(), first_capture_start); 1095 // Not a zero-length match, restart. 1096 __ B(ne, &load_char_start_regexp); 1097 // Offset from the end is zero if we already reached the end. 1098 __ Cbz(current_input_offset(), &return_w0); 1099 // Advance current position after a zero-length match. 1100 Label advance; 1101 __ bind(&advance); 1102 __ Add(current_input_offset(), current_input_offset(), 1103 Operand((mode_ == UC16) ? 2 : 1)); 1104 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 1105 } 1106 1107 __ B(&load_char_start_regexp); 1108 } else { 1109 __ Mov(w0, SUCCESS); 1110 } 1111 } 1112 1113 if (exit_label_.is_linked()) { 1114 // Exit and return w0. 1115 __ Bind(&exit_label_); 1116 if (global()) { 1117 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter)); 1118 } 1119 } 1120 1121 __ Bind(&return_w0); 1122 // Restore the original regexp stack pointer value (effectively, pop the 1123 // stored base pointer). 1124 PopRegExpBasePointer(backtrack_stackpointer(), x11); 1125 1126 // Set stack pointer back to first register to retain. 1127 __ Mov(sp, fp); 1128 __ Pop<TurboAssembler::kAuthLR>(fp, lr); 1129 1130 // Restore registers. 1131 __ PopCPURegList(registers_to_retain); 1132 1133 __ Ret(); 1134 1135 Label exit_with_exception; 1136 if (check_preempt_label_.is_linked()) { 1137 __ Bind(&check_preempt_label_); 1138 1139 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10); 1140 1141 SaveLinkRegister(); 1142 PushCachedRegisters(); 1143 CallCheckStackGuardState(x10); 1144 // Returning from the regexp code restores the stack (sp <- fp) 1145 // so we don't need to drop the link register from it before exiting. 1146 __ Cbnz(w0, &return_w0); 1147 // Reset the cached registers. 1148 PopCachedRegisters(); 1149 1150 LoadRegExpStackPointerFromMemory(backtrack_stackpointer()); 1151 1152 RestoreLinkRegister(); 1153 __ Ret(); 1154 } 1155 1156 if (stack_overflow_label_.is_linked()) { 1157 __ Bind(&stack_overflow_label_); 1158 1159 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10); 1160 1161 SaveLinkRegister(); 1162 PushCachedRegisters(); 1163 // Call GrowStack(isolate). 1164 static constexpr int kNumArguments = 1; 1165 __ Mov(x0, ExternalReference::isolate_address(isolate())); 1166 __ CallCFunction(ExternalReference::re_grow_stack(), kNumArguments); 1167 // If return nullptr, we have failed to grow the stack, and must exit with 1168 // a stack-overflow exception. Returning from the regexp code restores the 1169 // stack (sp <- fp) so we don't need to drop the link register from it 1170 // before exiting. 1171 __ Cbz(w0, &exit_with_exception); 1172 // Otherwise use return value as new stack pointer. 1173 __ Mov(backtrack_stackpointer(), x0); 1174 PopCachedRegisters(); 1175 RestoreLinkRegister(); 1176 __ Ret(); 1177 } 1178 1179 if (exit_with_exception.is_linked()) { 1180 __ Bind(&exit_with_exception); 1181 __ Mov(w0, EXCEPTION); 1182 __ B(&return_w0); 1183 } 1184 1185 if (fallback_label_.is_linked()) { 1186 __ Bind(&fallback_label_); 1187 __ Mov(w0, FALLBACK_TO_EXPERIMENTAL); 1188 __ B(&return_w0); 1189 } 1190 1191 CodeDesc code_desc; 1192 masm_->GetCode(isolate(), &code_desc); 1193 Handle<Code> code = 1194 Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP) 1195 .set_self_reference(masm_->CodeObject()) 1196 .Build(); 1197 PROFILE(masm_->isolate(), 1198 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source)); 1199 return Handle<HeapObject>::cast(code); 1200} 1201 1202 1203void RegExpMacroAssemblerARM64::GoTo(Label* to) { 1204 BranchOrBacktrack(al, to); 1205} 1206 1207void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand, 1208 Label* if_ge) { 1209 Register to_compare = GetRegister(reg, w10); 1210 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge); 1211} 1212 1213 1214void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand, 1215 Label* if_lt) { 1216 Register to_compare = GetRegister(reg, w10); 1217 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt); 1218} 1219 1220 1221void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) { 1222 Register to_compare = GetRegister(reg, w10); 1223 __ Cmp(to_compare, current_input_offset()); 1224 BranchOrBacktrack(eq, if_eq); 1225} 1226 1227RegExpMacroAssembler::IrregexpImplementation 1228 RegExpMacroAssemblerARM64::Implementation() { 1229 return kARM64Implementation; 1230} 1231 1232 1233void RegExpMacroAssemblerARM64::PopCurrentPosition() { 1234 Pop(current_input_offset()); 1235} 1236 1237 1238void RegExpMacroAssemblerARM64::PopRegister(int register_index) { 1239 Pop(w10); 1240 StoreRegister(register_index, w10); 1241} 1242 1243 1244void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { 1245 if (label->is_bound()) { 1246 int target = label->pos(); 1247 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag); 1248 } else { 1249 __ Adr(x10, label, MacroAssembler::kAdrFar); 1250 __ Sub(x10, x10, code_pointer()); 1251 if (FLAG_debug_code) { 1252 __ Cmp(x10, kWRegMask); 1253 // The code offset has to fit in a W register. 1254 __ Check(ls, AbortReason::kOffsetOutOfRange); 1255 } 1256 } 1257 Push(w10); 1258 CheckStackLimit(); 1259} 1260 1261 1262void RegExpMacroAssemblerARM64::PushCurrentPosition() { 1263 Push(current_input_offset()); 1264} 1265 1266 1267void RegExpMacroAssemblerARM64::PushRegister(int register_index, 1268 StackCheckFlag check_stack_limit) { 1269 Register to_push = GetRegister(register_index, w10); 1270 Push(to_push); 1271 if (check_stack_limit) CheckStackLimit(); 1272} 1273 1274 1275void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) { 1276 RegisterState register_state = GetRegisterState(reg); 1277 switch (register_state) { 1278 case STACKED: 1279 __ Ldr(current_input_offset(), register_location(reg)); 1280 break; 1281 case CACHED_LSW: 1282 __ Mov(current_input_offset(), GetCachedRegister(reg).W()); 1283 break; 1284 case CACHED_MSW: 1285 __ Lsr(current_input_offset().X(), GetCachedRegister(reg), 1286 kWRegSizeInBits); 1287 break; 1288 default: 1289 UNREACHABLE(); 1290 } 1291} 1292 1293void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { 1294 ExternalReference ref = 1295 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 1296 __ Mov(x10, ref); 1297 __ Ldr(x10, MemOperand(x10)); 1298 __ Sub(x10, backtrack_stackpointer(), x10); 1299 if (FLAG_debug_code) { 1300 __ Cmp(x10, Operand(w10, SXTW)); 1301 // The stack offset needs to fit in a W register. 1302 __ Check(eq, AbortReason::kOffsetOutOfRange); 1303 } 1304 StoreRegister(reg, w10); 1305} 1306 1307void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) { 1308 ExternalReference ref = 1309 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 1310 Register read_from = GetRegister(reg, w10); 1311 __ Mov(x11, ref); 1312 __ Ldr(x11, MemOperand(x11)); 1313 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW)); 1314} 1315 1316void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) { 1317 Label after_position; 1318 __ Cmp(current_input_offset(), -by * char_size()); 1319 __ B(ge, &after_position); 1320 __ Mov(current_input_offset(), -by * char_size()); 1321 // On RegExp code entry (where this operation is used), the character before 1322 // the current position is expected to be already loaded. 1323 // We have advanced the position, so it's safe to read backwards. 1324 LoadCurrentCharacterUnchecked(-1, 1); 1325 __ Bind(&after_position); 1326} 1327 1328 1329void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) { 1330 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1331 Register set_to = wzr; 1332 if (to != 0) { 1333 set_to = w10; 1334 __ Mov(set_to, to); 1335 } 1336 StoreRegister(register_index, set_to); 1337} 1338 1339 1340bool RegExpMacroAssemblerARM64::Succeed() { 1341 __ B(&success_label_); 1342 return global(); 1343} 1344 1345 1346void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg, 1347 int cp_offset) { 1348 Register position = current_input_offset(); 1349 if (cp_offset != 0) { 1350 position = w10; 1351 __ Add(position, current_input_offset(), cp_offset * char_size()); 1352 } 1353 StoreRegister(reg, position); 1354} 1355 1356 1357void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { 1358 DCHECK(reg_from <= reg_to); 1359 int num_registers = reg_to - reg_from + 1; 1360 1361 // If the first capture register is cached in a hardware register but not 1362 // aligned on a 64-bit one, we need to clear the first one specifically. 1363 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { 1364 StoreRegister(reg_from, string_start_minus_one()); 1365 num_registers--; 1366 reg_from++; 1367 } 1368 1369 // Clear cached registers in pairs as far as possible. 1370 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) { 1371 DCHECK(GetRegisterState(reg_from) == CACHED_LSW); 1372 __ Mov(GetCachedRegister(reg_from), twice_non_position_value()); 1373 reg_from += 2; 1374 num_registers -= 2; 1375 } 1376 1377 if ((num_registers % 2) == 1) { 1378 StoreRegister(reg_from, string_start_minus_one()); 1379 num_registers--; 1380 reg_from++; 1381 } 1382 1383 if (num_registers > 0) { 1384 // If there are some remaining registers, they are stored on the stack. 1385 DCHECK_LE(kNumCachedRegisters, reg_from); 1386 1387 // Move down the indexes of the registers on stack to get the correct offset 1388 // in memory. 1389 reg_from -= kNumCachedRegisters; 1390 reg_to -= kNumCachedRegisters; 1391 // We should not unroll the loop for less than 2 registers. 1392 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1393 // We position the base pointer to (reg_from + 1). 1394 int base_offset = kFirstRegisterOnStack - 1395 kWRegSize - (kWRegSize * reg_from); 1396 if (num_registers > kNumRegistersToUnroll) { 1397 Register base = x10; 1398 __ Add(base, frame_pointer(), base_offset); 1399 1400 Label loop; 1401 __ Mov(x11, num_registers); 1402 __ Bind(&loop); 1403 __ Str(twice_non_position_value(), 1404 MemOperand(base, -kSystemPointerSize, PostIndex)); 1405 __ Sub(x11, x11, 2); 1406 __ Cbnz(x11, &loop); 1407 } else { 1408 for (int i = reg_from; i <= reg_to; i += 2) { 1409 __ Str(twice_non_position_value(), 1410 MemOperand(frame_pointer(), base_offset)); 1411 base_offset -= kWRegSize * 2; 1412 } 1413 } 1414 } 1415} 1416 1417// Helper function for reading a value out of a stack frame. 1418template <typename T> 1419static T& frame_entry(Address re_frame, int frame_offset) { 1420 return *reinterpret_cast<T*>(re_frame + frame_offset); 1421} 1422 1423 1424template <typename T> 1425static T* frame_entry_address(Address re_frame, int frame_offset) { 1426 return reinterpret_cast<T*>(re_frame + frame_offset); 1427} 1428 1429int RegExpMacroAssemblerARM64::CheckStackGuardState( 1430 Address* return_address, Address raw_code, Address re_frame, 1431 int start_index, const byte** input_start, const byte** input_end) { 1432 Code re_code = Code::cast(Object(raw_code)); 1433 return NativeRegExpMacroAssembler::CheckStackGuardState( 1434 frame_entry<Isolate*>(re_frame, kIsolate), start_index, 1435 static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), 1436 return_address, re_code, frame_entry_address<Address>(re_frame, kInput), 1437 input_start, input_end); 1438} 1439 1440 1441void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, 1442 Label* on_outside_input) { 1443 if (cp_offset >= 0) { 1444 CompareAndBranchOrBacktrack(current_input_offset(), 1445 -cp_offset * char_size(), ge, on_outside_input); 1446 } else { 1447 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size())); 1448 __ Cmp(w12, string_start_minus_one()); 1449 BranchOrBacktrack(le, on_outside_input); 1450 } 1451} 1452 1453 1454// Private methods: 1455 1456void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { 1457 DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins()); 1458 DCHECK(!masm_->options().isolate_independent_code); 1459 1460 // Allocate space on the stack to store the return address. The 1461 // CheckStackGuardState C++ function will override it if the code 1462 // moved. Allocate extra space for 2 arguments passed by pointers. 1463 // AAPCS64 requires the stack to be 16 byte aligned. 1464 int alignment = masm_->ActivationFrameAlignment(); 1465 DCHECK_EQ(alignment % 16, 0); 1466 int align_mask = (alignment / kXRegSize) - 1; 1467 int xreg_to_claim = (3 + align_mask) & ~align_mask; 1468 1469 __ Claim(xreg_to_claim); 1470 1471 // CheckStackGuardState needs the end and start addresses of the input string. 1472 __ Poke(input_end(), 2 * kSystemPointerSize); 1473 __ Add(x5, sp, 2 * kSystemPointerSize); 1474 __ Poke(input_start(), kSystemPointerSize); 1475 __ Add(x4, sp, kSystemPointerSize); 1476 1477 __ Mov(w3, start_offset()); 1478 // RegExp code frame pointer. 1479 __ Mov(x2, frame_pointer()); 1480 // Code of self. 1481 __ Mov(x1, Operand(masm_->CodeObject())); 1482 1483 // We need to pass a pointer to the return address as first argument. 1484 // DirectCEntry will place the return address on the stack before calling so 1485 // the stack pointer will point to it. 1486 __ Mov(x0, sp); 1487 1488 DCHECK_EQ(scratch, x10); 1489 ExternalReference check_stack_guard_state = 1490 ExternalReference::re_check_stack_guard_state(); 1491 __ Mov(scratch, check_stack_guard_state); 1492 1493 __ CallBuiltin(Builtin::kDirectCEntry); 1494 1495 // The input string may have been moved in memory, we need to reload it. 1496 __ Peek(input_start(), kSystemPointerSize); 1497 __ Peek(input_end(), 2 * kSystemPointerSize); 1498 1499 __ Drop(xreg_to_claim); 1500 1501 // Reload the Code pointer. 1502 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 1503} 1504 1505void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition, 1506 Label* to) { 1507 if (condition == al) { // Unconditional. 1508 if (to == nullptr) { 1509 Backtrack(); 1510 return; 1511 } 1512 __ B(to); 1513 return; 1514 } 1515 if (to == nullptr) { 1516 to = &backtrack_label_; 1517 } 1518 __ B(condition, to); 1519} 1520 1521void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, 1522 int immediate, 1523 Condition condition, 1524 Label* to) { 1525 if ((immediate == 0) && ((condition == eq) || (condition == ne))) { 1526 if (to == nullptr) { 1527 to = &backtrack_label_; 1528 } 1529 if (condition == eq) { 1530 __ Cbz(reg, to); 1531 } else { 1532 __ Cbnz(reg, to); 1533 } 1534 } else { 1535 __ Cmp(reg, immediate); 1536 BranchOrBacktrack(condition, to); 1537 } 1538} 1539 1540 1541void RegExpMacroAssemblerARM64::CheckPreemption() { 1542 // Check for preemption. 1543 ExternalReference stack_limit = 1544 ExternalReference::address_of_jslimit(isolate()); 1545 __ Mov(x10, stack_limit); 1546 __ Ldr(x10, MemOperand(x10)); 1547 __ Cmp(sp, x10); 1548 CallIf(&check_preempt_label_, ls); 1549} 1550 1551 1552void RegExpMacroAssemblerARM64::CheckStackLimit() { 1553 ExternalReference stack_limit = 1554 ExternalReference::address_of_regexp_stack_limit_address(isolate()); 1555 __ Mov(x10, stack_limit); 1556 __ Ldr(x10, MemOperand(x10)); 1557 __ Cmp(backtrack_stackpointer(), x10); 1558 CallIf(&stack_overflow_label_, ls); 1559} 1560 1561 1562void RegExpMacroAssemblerARM64::Push(Register source) { 1563 DCHECK(source.Is32Bits()); 1564 DCHECK_NE(source, backtrack_stackpointer()); 1565 __ Str(source, 1566 MemOperand(backtrack_stackpointer(), 1567 -static_cast<int>(kWRegSize), 1568 PreIndex)); 1569} 1570 1571 1572void RegExpMacroAssemblerARM64::Pop(Register target) { 1573 DCHECK(target.Is32Bits()); 1574 DCHECK_NE(target, backtrack_stackpointer()); 1575 __ Ldr(target, 1576 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex)); 1577} 1578 1579 1580Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) { 1581 DCHECK_GT(kNumCachedRegisters, register_index); 1582 return Register::Create(register_index / 2, kXRegSizeInBits); 1583} 1584 1585 1586Register RegExpMacroAssemblerARM64::GetRegister(int register_index, 1587 Register maybe_result) { 1588 DCHECK(maybe_result.Is32Bits()); 1589 DCHECK_LE(0, register_index); 1590 if (num_registers_ <= register_index) { 1591 num_registers_ = register_index + 1; 1592 } 1593 Register result = NoReg; 1594 RegisterState register_state = GetRegisterState(register_index); 1595 switch (register_state) { 1596 case STACKED: 1597 __ Ldr(maybe_result, register_location(register_index)); 1598 result = maybe_result; 1599 break; 1600 case CACHED_LSW: 1601 result = GetCachedRegister(register_index).W(); 1602 break; 1603 case CACHED_MSW: 1604 __ Lsr(maybe_result.X(), GetCachedRegister(register_index), 1605 kWRegSizeInBits); 1606 result = maybe_result; 1607 break; 1608 default: 1609 UNREACHABLE(); 1610 } 1611 DCHECK(result.Is32Bits()); 1612 return result; 1613} 1614 1615 1616void RegExpMacroAssemblerARM64::StoreRegister(int register_index, 1617 Register source) { 1618 DCHECK(source.Is32Bits()); 1619 DCHECK_LE(0, register_index); 1620 if (num_registers_ <= register_index) { 1621 num_registers_ = register_index + 1; 1622 } 1623 1624 RegisterState register_state = GetRegisterState(register_index); 1625 switch (register_state) { 1626 case STACKED: 1627 __ Str(source, register_location(register_index)); 1628 break; 1629 case CACHED_LSW: { 1630 Register cached_register = GetCachedRegister(register_index); 1631 if (source != cached_register.W()) { 1632 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits); 1633 } 1634 break; 1635 } 1636 case CACHED_MSW: { 1637 Register cached_register = GetCachedRegister(register_index); 1638 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits); 1639 break; 1640 } 1641 default: 1642 UNREACHABLE(); 1643 } 1644} 1645 1646 1647void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) { 1648 Label skip_call; 1649 if (condition != al) __ B(&skip_call, NegateCondition(condition)); 1650 __ Bl(to); 1651 __ Bind(&skip_call); 1652} 1653 1654 1655void RegExpMacroAssemblerARM64::RestoreLinkRegister() { 1656 // TODO(v8:10026): Remove when we stop compacting for code objects that are 1657 // active on the call stack. 1658 __ Pop<TurboAssembler::kAuthLR>(padreg, lr); 1659 __ Add(lr, lr, Operand(masm_->CodeObject())); 1660} 1661 1662 1663void RegExpMacroAssemblerARM64::SaveLinkRegister() { 1664 __ Sub(lr, lr, Operand(masm_->CodeObject())); 1665 __ Push<TurboAssembler::kSignLR>(lr, padreg); 1666} 1667 1668 1669MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) { 1670 DCHECK(register_index < (1<<30)); 1671 DCHECK_LE(kNumCachedRegisters, register_index); 1672 if (num_registers_ <= register_index) { 1673 num_registers_ = register_index + 1; 1674 } 1675 register_index -= kNumCachedRegisters; 1676 int offset = kFirstRegisterOnStack - register_index * kWRegSize; 1677 return MemOperand(frame_pointer(), offset); 1678} 1679 1680MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index, 1681 Register scratch) { 1682 DCHECK(register_index < (1<<30)); 1683 DCHECK(register_index < num_saved_registers_); 1684 DCHECK_LE(kNumCachedRegisters, register_index); 1685 DCHECK_EQ(register_index % 2, 0); 1686 register_index -= kNumCachedRegisters; 1687 int offset = kFirstCaptureOnStack - register_index * kWRegSize; 1688 // capture_location is used with Stp instructions to load/store 2 registers. 1689 // The immediate field in the encoding is limited to 7 bits (signed). 1690 if (is_int7(offset)) { 1691 return MemOperand(frame_pointer(), offset); 1692 } else { 1693 __ Add(scratch, frame_pointer(), offset); 1694 return MemOperand(scratch); 1695 } 1696} 1697 1698void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, 1699 int characters) { 1700 Register offset = current_input_offset(); 1701 1702 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1703 // and the operating system running on the target allow it. 1704 // If unaligned load/stores are not supported then this function must only 1705 // be used to load a single character at a time. 1706 1707 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to 1708 // disable it. 1709 // TODO(pielan): See whether or not we should disable unaligned accesses. 1710 if (!CanReadUnaligned()) { 1711 DCHECK_EQ(1, characters); 1712 } 1713 1714 if (cp_offset != 0) { 1715 if (FLAG_debug_code) { 1716 __ Mov(x10, cp_offset * char_size()); 1717 __ Add(x10, x10, Operand(current_input_offset(), SXTW)); 1718 __ Cmp(x10, Operand(w10, SXTW)); 1719 // The offset needs to fit in a W register. 1720 __ Check(eq, AbortReason::kOffsetOutOfRange); 1721 } else { 1722 __ Add(w10, current_input_offset(), cp_offset * char_size()); 1723 } 1724 offset = w10; 1725 } 1726 1727 if (mode_ == LATIN1) { 1728 if (characters == 4) { 1729 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1730 } else if (characters == 2) { 1731 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1732 } else { 1733 DCHECK_EQ(1, characters); 1734 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW)); 1735 } 1736 } else { 1737 DCHECK(mode_ == UC16); 1738 if (characters == 2) { 1739 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1740 } else { 1741 DCHECK_EQ(1, characters); 1742 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1743 } 1744 } 1745} 1746 1747} // namespace internal 1748} // namespace v8 1749 1750#undef __ 1751 1752#endif // V8_TARGET_ARCH_ARM64 1753