1// Copyright 2021 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#if V8_TARGET_ARCH_RISCV64 6 7#include "src/regexp/riscv64/regexp-macro-assembler-riscv64.h" 8 9#include "src/codegen/assembler-inl.h" 10#include "src/codegen/macro-assembler.h" 11#include "src/logging/log.h" 12#include "src/objects/objects-inl.h" 13#include "src/regexp/regexp-macro-assembler.h" 14#include "src/regexp/regexp-stack.h" 15#include "src/snapshot/embedded/embedded-data-inl.h" 16#include "src/strings/unicode.h" 17 18namespace v8 { 19namespace internal { 20 21/* clang-format off 22 * 23 * This assembler uses the following register assignment convention 24 * - s3 : kScratchReg. Temporarily stores the index of capture start after a matching pass 25 * for a global regexp. 26 * - s4 : Pointer to current Code object including heap object tag. 27 * - s1 : Current position in input, as negative offset from end of string. 28 * Please notice that this is the byte offset, not the character offset! 29 * - s2 : Currently loaded character. Must be loaded using 30 * LoadCurrentCharacter before using any of the dispatch methods. 31 * - t0 : Points to tip of backtrack stack 32 * - t1 : Unused. 33 * - t2 : End of input (points to byte after last character in input). 34 * - fp : Frame pointer. Used to access arguments, local variables and 35 * RegExp registers. 36 * - sp : Points to tip of C stack. 37 * 38 * The remaining registers are free for computations. 39 * Each call to a public method should retain this convention. 40 * 41 * The stack will have the following structure: 42 * 43 * kStackFrameHeader 44 * --- sp when called --- 45 * - fp[72] ra Return from RegExp code (ra). kReturnAddress 46 * - fp[64] s9, old-fp Old fp, callee saved(s9). 47 * - fp[0..63] fp..s7 Callee-saved registers fp..s7. 48 * --- frame pointer ---- 49 * - fp[-8] Isolate* isolate (address of the current isolate) kIsolate 50 * - fp[-16] direct_call (1 = direct call from JS, 0 = from runtime) kDirectCall 51 * - fp[-24] output_size (may fit multiple sets of matches) kNumOutputRegisters 52 * - fp[-32] int* output (int[num_saved_registers_], for output). kRegisterOutput 53 * - fp[-40] end of input (address of end of string). kInputEnd 54 * - fp[-48] start of input (address of first character in string). kInputStart 55 * - fp[-56] start index (character index of start). kStartIndex 56 * - fp[-64] void* input_string (location of a handle containing the string). kInputString 57 * - fp[-72] success counter (only for global regexps to count matches). kSuccessfulCaptures 58 * - fp[-80] Offset of location before start of input (effectively character kStringStartMinusOne 59 * position -1). Used to initialize capture registers to a 60 * non-position. 61 * --------- The following output registers are 32-bit values. --------- 62 * - fp[-88] register 0 (Only positions must be stored in the first kRegisterZero 63 * - register 1 num_saved_registers_ registers) 64 * - ... 65 * - register num_registers-1 66 * --- sp --- 67 * 68 * The first num_saved_registers_ registers are initialized to point to 69 * "character -1" in the string (i.e., char_size() bytes before the first 70 * character of the string). The remaining registers start out as garbage. 71 * 72 * The data up to the return address must be placed there by the calling 73 * code and the remaining arguments are passed in registers, e.g. by calling the 74 * code entry as cast to a function with the signature: 75 * int (*match)(String input_string, 76 * int start_index, 77 * Address start, 78 * Address end, 79 * int* output, 80 * int output_size, 81 * bool direct_call = false, 82 * Isolate* isolate, 83 * Address regexp); 84 * The call is performed by NativeRegExpMacroAssembler::Execute() 85 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. 86 * 87 * clang-format on 88 */ 89 90#define __ ACCESS_MASM(masm_) 91 92const int RegExpMacroAssemblerRISCV::kRegExpCodeSize; 93 94RegExpMacroAssemblerRISCV::RegExpMacroAssemblerRISCV(Isolate* isolate, 95 Zone* zone, Mode mode, 96 int registers_to_save) 97 : NativeRegExpMacroAssembler(isolate, zone), 98 masm_(std::make_unique<MacroAssembler>( 99 isolate, CodeObjectRequired::kYes, 100 NewAssemblerBuffer(kRegExpCodeSize))), 101 no_root_array_scope_(masm_.get()), 102 mode_(mode), 103 num_registers_(registers_to_save), 104 num_saved_registers_(registers_to_save), 105 entry_label_(), 106 start_label_(), 107 success_label_(), 108 backtrack_label_(), 109 exit_label_(), 110 internal_failure_label_() { 111 DCHECK_EQ(0, registers_to_save % 2); 112 __ jmp(&entry_label_); // We'll write the entry code later. 113 // If the code gets too big or corrupted, an internal exception will be 114 // raised, and we will exit right away. 115 __ bind(&internal_failure_label_); 116 __ li(a0, Operand(FAILURE)); 117 __ Ret(); 118 __ bind(&start_label_); // And then continue from here. 119} 120 121RegExpMacroAssemblerRISCV::~RegExpMacroAssemblerRISCV() { 122 // Unuse labels in case we throw away the assembler without calling GetCode. 123 entry_label_.Unuse(); 124 start_label_.Unuse(); 125 success_label_.Unuse(); 126 backtrack_label_.Unuse(); 127 exit_label_.Unuse(); 128 check_preempt_label_.Unuse(); 129 stack_overflow_label_.Unuse(); 130 internal_failure_label_.Unuse(); 131 fallback_label_.Unuse(); 132} 133 134int RegExpMacroAssemblerRISCV::stack_limit_slack() { 135 return RegExpStack::kStackLimitSlack; 136} 137 138void RegExpMacroAssemblerRISCV::AdvanceCurrentPosition(int by) { 139 if (by != 0) { 140 __ Add64(current_input_offset(), current_input_offset(), 141 Operand(by * char_size())); 142 } 143} 144 145void RegExpMacroAssemblerRISCV::AdvanceRegister(int reg, int by) { 146 DCHECK_LE(0, reg); 147 DCHECK_GT(num_registers_, reg); 148 if (by != 0) { 149 __ Ld(a0, register_location(reg)); 150 __ Add64(a0, a0, Operand(by)); 151 __ Sd(a0, register_location(reg)); 152 } 153} 154 155void RegExpMacroAssemblerRISCV::Backtrack() { 156 CheckPreemption(); 157 if (has_backtrack_limit()) { 158 Label next; 159 __ Ld(a0, MemOperand(frame_pointer(), kBacktrackCount)); 160 __ Add64(a0, a0, Operand(1)); 161 __ Sd(a0, MemOperand(frame_pointer(), kBacktrackCount)); 162 __ BranchShort(&next, ne, a0, Operand(backtrack_limit())); 163 164 // Backtrack limit exceeded. 165 if (can_fallback()) { 166 __ jmp(&fallback_label_); 167 } else { 168 // Can't fallback, so we treat it as a failed match. 169 Fail(); 170 } 171 172 __ bind(&next); 173 } 174 // Pop Code offset from backtrack stack, add Code and jump to location. 175 Pop(a0); 176 __ Add64(a0, a0, code_pointer()); 177 __ Jump(a0); 178} 179 180void RegExpMacroAssemblerRISCV::Bind(Label* label) { __ bind(label); } 181 182void RegExpMacroAssemblerRISCV::CheckCharacter(uint32_t c, Label* on_equal) { 183 BranchOrBacktrack(on_equal, eq, current_character(), Operand(c)); 184} 185 186void RegExpMacroAssemblerRISCV::CheckCharacterGT(base::uc16 limit, 187 Label* on_greater) { 188 BranchOrBacktrack(on_greater, gt, current_character(), Operand(limit)); 189} 190 191void RegExpMacroAssemblerRISCV::CheckAtStart(int cp_offset, 192 Label* on_at_start) { 193 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); 194 __ Add64(a0, current_input_offset(), 195 Operand(-char_size() + cp_offset * char_size())); 196 BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); 197} 198 199void RegExpMacroAssemblerRISCV::CheckNotAtStart(int cp_offset, 200 Label* on_not_at_start) { 201 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); 202 __ Add64(a0, current_input_offset(), 203 Operand(-char_size() + cp_offset * char_size())); 204 BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1)); 205} 206 207void RegExpMacroAssemblerRISCV::CheckCharacterLT(base::uc16 limit, 208 Label* on_less) { 209 BranchOrBacktrack(on_less, lt, current_character(), Operand(limit)); 210} 211 212void RegExpMacroAssemblerRISCV::CheckGreedyLoop(Label* on_equal) { 213 Label backtrack_non_equal; 214 __ Lw(a0, MemOperand(backtrack_stackpointer(), 0)); 215 __ BranchShort(&backtrack_non_equal, ne, current_input_offset(), Operand(a0)); 216 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), 217 Operand(kIntSize)); 218 __ bind(&backtrack_non_equal); 219 BranchOrBacktrack(on_equal, eq, current_input_offset(), Operand(a0)); 220} 221 222// Push (pop) caller-saved registers used by irregexp. 223void RegExpMacroAssemblerRISCV::PushCallerSavedRegisters() { 224 RegList caller_saved_regexp = {current_input_offset(), current_character(), 225 end_of_input_address(), 226 backtrack_stackpointer()}; 227 __ MultiPush(caller_saved_regexp); 228} 229 230void RegExpMacroAssemblerRISCV::PopCallerSavedRegisters() { 231 RegList caller_saved_regexp = {current_input_offset(), current_character(), 232 end_of_input_address(), 233 backtrack_stackpointer()}; 234 __ MultiPop(caller_saved_regexp); 235} 236 237void RegExpMacroAssemblerRISCV::CallIsCharacterInRangeArray( 238 const ZoneList<CharacterRange>* ranges) { 239 PushCallerSavedRegisters(); 240 static const int kNumArguments = 3; 241 __ PrepareCallCFunction(kNumArguments, a0); 242 243 __ mv(a0, current_character()); 244 __ li(a1, Operand(GetOrAddRangeArray(ranges))); 245 __ li(a2, Operand(ExternalReference::isolate_address(isolate()))); 246 247 { 248 // We have a frame (set up in GetCode), but the assembler doesn't know. 249 FrameScope scope(masm_.get(), StackFrame::MANUAL); 250 __ CallCFunction(ExternalReference::re_is_character_in_range_array(), 251 kNumArguments); 252 } 253 PopCallerSavedRegisters(); 254 __ li(code_pointer(), Operand(masm_->CodeObject())); 255} 256 257bool RegExpMacroAssemblerRISCV::CheckCharacterInRangeArray( 258 const ZoneList<CharacterRange>* ranges, Label* on_in_range) { 259 CallIsCharacterInRangeArray(ranges); 260 BranchOrBacktrack(on_in_range, ne, a0, Operand(zero_reg)); 261 return true; 262} 263 264bool RegExpMacroAssemblerRISCV::CheckCharacterNotInRangeArray( 265 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) { 266 CallIsCharacterInRangeArray(ranges); 267 BranchOrBacktrack(on_not_in_range, eq, a0, Operand(zero_reg)); 268 return true; 269} 270 271void RegExpMacroAssemblerRISCV::CheckNotBackReferenceIgnoreCase( 272 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 273 Label fallthrough; 274 __ Ld(a0, register_location(start_reg)); // Index of start of capture. 275 __ Ld(a1, register_location(start_reg + 1)); // Index of end of capture. 276 __ Sub64(a1, a1, a0); // Length of capture. 277 278 // At this point, the capture registers are either both set or both cleared. 279 // If the capture length is zero, then the capture is either empty or cleared. 280 // Fall through in both cases. 281 __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg)); 282 283 if (read_backward) { 284 __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); 285 __ Add64(t1, t1, a1); 286 BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); 287 } else { 288 __ Add64(t1, a1, current_input_offset()); 289 // Check that there are enough characters left in the input. 290 BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); 291 } 292 293 if (mode_ == LATIN1) { 294 Label success; 295 Label fail; 296 Label loop_check; 297 298 // a0 - offset of start of capture. 299 // a1 - length of capture. 300 __ Add64(a0, a0, Operand(end_of_input_address())); 301 __ Add64(a2, end_of_input_address(), Operand(current_input_offset())); 302 if (read_backward) { 303 __ Sub64(a2, a2, Operand(a1)); 304 } 305 __ Add64(a1, a0, Operand(a1)); 306 307 // a0 - Address of start of capture. 308 // a1 - Address of end of capture. 309 // a2 - Address of current input position. 310 311 Label loop; 312 __ bind(&loop); 313 __ Lbu(a3, MemOperand(a0, 0)); 314 __ addi(a0, a0, char_size()); 315 __ Lbu(a4, MemOperand(a2, 0)); 316 __ addi(a2, a2, char_size()); 317 318 __ BranchShort(&loop_check, eq, a4, Operand(a3)); 319 320 // Mismatch, try case-insensitive match (converting letters to lower-case). 321 __ Or(a3, a3, Operand(0x20)); // Convert capture character to lower-case. 322 __ Or(a4, a4, Operand(0x20)); // Also convert input character. 323 __ BranchShort(&fail, ne, a4, Operand(a3)); 324 __ Sub64(a3, a3, Operand('a')); 325 __ BranchShort(&loop_check, Uless_equal, a3, Operand('z' - 'a')); 326 // Latin-1: Check for values in range [224,254] but not 247. 327 __ Sub64(a3, a3, Operand(224 - 'a')); 328 // Weren't Latin-1 letters. 329 __ BranchShort(&fail, Ugreater, a3, Operand(254 - 224)); 330 // Check for 247. 331 __ BranchShort(&fail, eq, a3, Operand(247 - 224)); 332 333 __ bind(&loop_check); 334 __ Branch(&loop, lt, a0, Operand(a1)); 335 __ jmp(&success); 336 337 __ bind(&fail); 338 GoTo(on_no_match); 339 340 __ bind(&success); 341 // Compute new value of character position after the matched part. 342 __ Sub64(current_input_offset(), a2, end_of_input_address()); 343 if (read_backward) { 344 __ Ld(t1, register_location(start_reg)); // Index of start of capture. 345 __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture. 346 __ Add64(current_input_offset(), current_input_offset(), Operand(t1)); 347 __ Sub64(current_input_offset(), current_input_offset(), Operand(a2)); 348 } 349 } else { 350 DCHECK(mode_ == UC16); 351 PushCallerSavedRegisters(); 352 353 int argument_count = 4; 354 __ PrepareCallCFunction(argument_count, a2); 355 356 // a0 - offset of start of capture. 357 // a1 - length of capture. 358 359 // Put arguments into arguments registers. 360 // Parameters are 361 // a0: Address byte_offset1 - Address captured substring's start. 362 // a1: Address byte_offset2 - Address of current character position. 363 // a2: size_t byte_length - length of capture in bytes(!). 364 // a3: Isolate* isolate. 365 366 // Address of start of capture. 367 __ Add64(a0, a0, Operand(end_of_input_address())); 368 // Length of capture. 369 __ mv(a2, a1); 370 // Save length in callee-save register for use on return. 371 __ mv(s3, a1); 372 // Address of current input position. 373 __ Add64(a1, current_input_offset(), Operand(end_of_input_address())); 374 if (read_backward) { 375 __ Sub64(a1, a1, Operand(s3)); 376 } 377 // Isolate. 378 __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate()))); 379 380 { 381 AllowExternalCallThatCantCauseGC scope(masm_.get()); 382 ExternalReference function = 383 unicode 384 ? ExternalReference::re_case_insensitive_compare_unicode() 385 : ExternalReference::re_case_insensitive_compare_non_unicode(); 386 __ CallCFunction(function, argument_count); 387 } 388 389 // Restore regexp engine registers. 390 PopCallerSavedRegisters(); 391 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); 392 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 393 394 // Check if function returned non-zero for success or zero for failure. 395 BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg)); 396 // On success, increment position by length of capture. 397 if (read_backward) { 398 __ Sub64(current_input_offset(), current_input_offset(), Operand(s3)); 399 } else { 400 __ Add64(current_input_offset(), current_input_offset(), Operand(s3)); 401 } 402 } 403 404 __ bind(&fallthrough); 405} 406 407void RegExpMacroAssemblerRISCV::CheckNotBackReference(int start_reg, 408 bool read_backward, 409 Label* on_no_match) { 410 Label fallthrough; 411 412 // Find length of back-referenced capture. 413 __ Ld(a0, register_location(start_reg)); 414 __ Ld(a1, register_location(start_reg + 1)); 415 __ Sub64(a1, a1, a0); // Length to check. 416 417 // At this point, the capture registers are either both set or both cleared. 418 // If the capture length is zero, then the capture is either empty or cleared. 419 // Fall through in both cases. 420 __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg)); 421 422 if (read_backward) { 423 __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); 424 __ Add64(t1, t1, a1); 425 BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); 426 } else { 427 __ Add64(t1, a1, current_input_offset()); 428 // Check that there are enough characters left in the input. 429 BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); 430 } 431 432 // Compute pointers to match string and capture string. 433 __ Add64(a0, a0, Operand(end_of_input_address())); 434 __ Add64(a2, end_of_input_address(), Operand(current_input_offset())); 435 if (read_backward) { 436 __ Sub64(a2, a2, Operand(a1)); 437 } 438 __ Add64(a1, a1, Operand(a0)); 439 440 Label loop; 441 __ bind(&loop); 442 if (mode_ == LATIN1) { 443 __ Lbu(a3, MemOperand(a0, 0)); 444 __ addi(a0, a0, char_size()); 445 __ Lbu(a4, MemOperand(a2, 0)); 446 __ addi(a2, a2, char_size()); 447 } else { 448 DCHECK(mode_ == UC16); 449 __ Lhu(a3, MemOperand(a0, 0)); 450 __ addi(a0, a0, char_size()); 451 __ Lhu(a4, MemOperand(a2, 0)); 452 __ addi(a2, a2, char_size()); 453 } 454 BranchOrBacktrack(on_no_match, ne, a3, Operand(a4)); 455 __ Branch(&loop, lt, a0, Operand(a1)); 456 457 // Move current character position to position after match. 458 __ Sub64(current_input_offset(), a2, end_of_input_address()); 459 if (read_backward) { 460 __ Ld(t1, register_location(start_reg)); // Index of start of capture. 461 __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture. 462 __ Add64(current_input_offset(), current_input_offset(), Operand(t1)); 463 __ Sub64(current_input_offset(), current_input_offset(), Operand(a2)); 464 } 465 __ bind(&fallthrough); 466} 467 468void RegExpMacroAssemblerRISCV::CheckNotCharacter(uint32_t c, 469 Label* on_not_equal) { 470 BranchOrBacktrack(on_not_equal, ne, current_character(), Operand(c)); 471} 472 473void RegExpMacroAssemblerRISCV::CheckCharacterAfterAnd(uint32_t c, 474 uint32_t mask, 475 Label* on_equal) { 476 __ And(a0, current_character(), Operand(mask)); 477 Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c); 478 BranchOrBacktrack(on_equal, eq, a0, rhs); 479} 480 481void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterAnd(uint32_t c, 482 uint32_t mask, 483 Label* on_not_equal) { 484 __ And(a0, current_character(), Operand(mask)); 485 Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c); 486 BranchOrBacktrack(on_not_equal, ne, a0, rhs); 487} 488 489void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterMinusAnd( 490 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) { 491 DCHECK_GT(String::kMaxUtf16CodeUnit, minus); 492 __ Sub64(a0, current_character(), Operand(minus)); 493 __ And(a0, a0, Operand(mask)); 494 BranchOrBacktrack(on_not_equal, ne, a0, Operand(c)); 495} 496 497void RegExpMacroAssemblerRISCV::CheckCharacterInRange(base::uc16 from, 498 base::uc16 to, 499 Label* on_in_range) { 500 __ Sub64(a0, current_character(), Operand(from)); 501 // Unsigned lower-or-same condition. 502 BranchOrBacktrack(on_in_range, Uless_equal, a0, Operand(to - from)); 503} 504 505void RegExpMacroAssemblerRISCV::CheckCharacterNotInRange( 506 base::uc16 from, base::uc16 to, Label* on_not_in_range) { 507 __ Sub64(a0, current_character(), Operand(from)); 508 // Unsigned higher condition. 509 BranchOrBacktrack(on_not_in_range, Ugreater, a0, Operand(to - from)); 510} 511 512void RegExpMacroAssemblerRISCV::CheckBitInTable(Handle<ByteArray> table, 513 Label* on_bit_set) { 514 __ li(a0, Operand(table)); 515 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { 516 __ And(a1, current_character(), Operand(kTableSize - 1)); 517 __ Add64(a0, a0, a1); 518 } else { 519 __ Add64(a0, a0, current_character()); 520 } 521 522 __ Lbu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize)); 523 BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg)); 524} 525 526bool RegExpMacroAssemblerRISCV::CheckSpecialCharacterClass( 527 StandardCharacterSet type, Label* on_no_match) { 528 // Range checks (c in min..max) are generally implemented by an unsigned 529 // (c - min) <= (max - min) check. 530 switch (type) { 531 case StandardCharacterSet::kWhitespace: 532 // Match space-characters. 533 if (mode_ == LATIN1) { 534 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 535 Label success; 536 __ BranchShort(&success, eq, current_character(), Operand(' ')); 537 // Check range 0x09..0x0D. 538 __ Sub64(a0, current_character(), Operand('\t')); 539 __ BranchShort(&success, Uless_equal, a0, Operand('\r' - '\t')); 540 // \u00a0 (NBSP). 541 BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t')); 542 __ bind(&success); 543 return true; 544 } 545 return false; 546 case StandardCharacterSet::kNotWhitespace: 547 // The emitted code for generic character classes is good enough. 548 return false; 549 case StandardCharacterSet::kDigit: 550 // Match Latin1 digits ('0'..'9'). 551 __ Sub64(a0, current_character(), Operand('0')); 552 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand('9' - '0')); 553 return true; 554 case StandardCharacterSet::kNotDigit: 555 // Match non Latin1-digits. 556 __ Sub64(a0, current_character(), Operand('0')); 557 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand('9' - '0')); 558 return true; 559 case StandardCharacterSet::kNotLineTerminator: { 560 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). 561 __ Xor(a0, current_character(), Operand(0x01)); 562 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. 563 __ Sub64(a0, a0, Operand(0x0B)); 564 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(0x0C - 0x0B)); 565 if (mode_ == UC16) { 566 // Compare original value to 0x2028 and 0x2029, using the already 567 // computed (current_char ^ 0x01 - 0x0B). I.e., check for 568 // 0x201D (0x2028 - 0x0B) or 0x201E. 569 __ Sub64(a0, a0, Operand(0x2028 - 0x0B)); 570 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(1)); 571 } 572 return true; 573 } 574 case StandardCharacterSet::kLineTerminator: { 575 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). 576 __ Xor(a0, current_character(), Operand(0x01)); 577 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. 578 __ Sub64(a0, a0, Operand(0x0B)); 579 if (mode_ == LATIN1) { 580 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(0x0C - 0x0B)); 581 } else { 582 Label done; 583 BranchOrBacktrack(&done, Uless_equal, a0, Operand(0x0C - 0x0B)); 584 // Compare original value to 0x2028 and 0x2029, using the already 585 // computed (current_char ^ 0x01 - 0x0B). I.e., check for 586 // 0x201D (0x2028 - 0x0B) or 0x201E. 587 __ Sub64(a0, a0, Operand(0x2028 - 0x0B)); 588 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(1)); 589 __ bind(&done); 590 } 591 return true; 592 } 593 case StandardCharacterSet::kWord: { 594 if (mode_ != LATIN1) { 595 // Table is 256 entries, so all Latin1 characters can be tested. 596 BranchOrBacktrack(on_no_match, Ugreater, current_character(), 597 Operand('z')); 598 } 599 ExternalReference map = ExternalReference::re_word_character_map(); 600 __ li(a0, Operand(map)); 601 __ Add64(a0, a0, current_character()); 602 __ Lbu(a0, MemOperand(a0, 0)); 603 BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg)); 604 return true; 605 } 606 case StandardCharacterSet::kNotWord: { 607 Label done; 608 if (mode_ != LATIN1) { 609 // Table is 256 entries, so all Latin1 characters can be tested. 610 __ BranchShort(&done, Ugreater, current_character(), Operand('z')); 611 } 612 ExternalReference map = ExternalReference::re_word_character_map(); 613 __ li(a0, Operand(map)); 614 __ Add64(a0, a0, current_character()); 615 __ Lbu(a0, MemOperand(a0, 0)); 616 BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg)); 617 if (mode_ != LATIN1) { 618 __ bind(&done); 619 } 620 return true; 621 } 622 case StandardCharacterSet::kEverything: 623 // Match any character. 624 return true; 625 // No custom implementation (yet): s(UC16), S(UC16). 626 default: 627 return false; 628 } 629} 630 631void RegExpMacroAssemblerRISCV::Fail() { 632 __ li(a0, Operand(FAILURE)); 633 __ jmp(&exit_label_); 634} 635 636void RegExpMacroAssemblerRISCV::LoadRegExpStackPointerFromMemory(Register dst) { 637 ExternalReference ref = 638 ExternalReference::address_of_regexp_stack_stack_pointer(isolate()); 639 __ li(dst, Operand(ref)); 640 __ Ld(dst, MemOperand(dst)); 641} 642 643void RegExpMacroAssemblerRISCV::StoreRegExpStackPointerToMemory( 644 Register src, Register scratch) { 645 ExternalReference ref = 646 ExternalReference::address_of_regexp_stack_stack_pointer(isolate()); 647 __ li(scratch, Operand(ref)); 648 __ Sd(src, MemOperand(scratch)); 649} 650 651void RegExpMacroAssemblerRISCV::PushRegExpBasePointer(Register scratch1, 652 Register scratch2) { 653 LoadRegExpStackPointerFromMemory(scratch1); 654 ExternalReference ref = 655 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 656 __ li(scratch2, Operand(ref)); 657 __ Ld(scratch2, MemOperand(scratch2)); 658 __ Sub64(scratch2, scratch1, scratch2); 659 __ Sd(scratch2, MemOperand(frame_pointer(), kRegExpStackBasePointer)); 660} 661 662void RegExpMacroAssemblerRISCV::PopRegExpBasePointer(Register scratch1, 663 Register scratch2) { 664 ExternalReference ref = 665 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 666 __ Ld(scratch1, MemOperand(frame_pointer(), kRegExpStackBasePointer)); 667 __ li(scratch2, ref); 668 __ Ld(scratch2, MemOperand(scratch2)); 669 __ Add64(scratch1, scratch1, scratch2); 670 StoreRegExpStackPointerToMemory(scratch1, scratch2); 671} 672 673Handle<HeapObject> RegExpMacroAssemblerRISCV::GetCode(Handle<String> source) { 674 Label return_a0; 675 if (masm_->has_exception()) { 676 // If the code gets corrupted due to long regular expressions and lack of 677 // space on trampolines, an internal exception flag is set. If this case 678 // is detected, we will jump into exit sequence right away. 679 __ bind_to(&entry_label_, internal_failure_label_.pos()); 680 } else { 681 // Finalize code - write the entry point code now we know how many 682 // registers we need. 683 684 // Entry code: 685 __ bind(&entry_label_); 686 687 // Tell the system that we have a stack frame. Because the type is MANUAL, 688 // no is generated. 689 FrameScope scope(masm_.get(), StackFrame::MANUAL); 690 691 // Actually emit code to start a new stack frame. 692 // Push arguments 693 // Save callee-save registers. 694 // Start new stack frame. 695 // Store link register in existing stack-cell. 696 // Order here should correspond to order of offset constants in header file. 697 // TODO(plind): we save fp..s11, but ONLY use s3 here - use the regs 698 // or dont save. 699 RegList registers_to_retain = {fp, s1, s2, s3, s4, 700 s5, s6, s7, s8 /*, s9, s10, s11*/}; 701 DCHECK(registers_to_retain.Count() == kNumCalleeRegsToRetain); 702 703 // The remaining arguments are passed in registers, e.g.by calling the code 704 // entry as cast to a function with the signature: 705 // 706 // *int(*match)(String input_string, // a0 707 // int start_offset, // a1 708 // byte* input_start, // a2 709 // byte* input_end, // a3 710 // int* output, // a4 711 // int output_size, // a5 712 // int call_origin, // a6 713 // Isolate* isolate, // a7 714 // Address regexp); // on the stack 715 RegList argument_registers = {a0, a1, a2, a3, a4, a5, a6, a7}; 716 717 // According to MultiPush implementation, registers will be pushed in the 718 // order of ra, fp, then s8, ..., s1, and finally a7,...a0 719 __ MultiPush(RegList{ra} | registers_to_retain | argument_registers); 720 721 // Set frame pointer in space for it if this is not a direct call 722 // from generated code. 723 __ Add64(frame_pointer(), sp, 724 Operand(argument_registers.Count() * kSystemPointerSize)); 725 726 STATIC_ASSERT(kSuccessfulCaptures == kInputString - kSystemPointerSize); 727 __ mv(a0, zero_reg); 728 __ push(a0); // Make room for success counter and initialize it to 0. 729 STATIC_ASSERT(kStringStartMinusOne == 730 kSuccessfulCaptures - kSystemPointerSize); 731 __ push(a0); // Make room for "string start - 1" constant. 732 STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize); 733 __ push(a0); // The backtrack counter 734 STATIC_ASSERT(kRegExpStackBasePointer == 735 kBacktrackCount - kSystemPointerSize); 736 __ push(a0); // The regexp stack base ptr. 737 // Store the regexp base pointer - we'll later restore it / write it to 738 // memory when returning from this irregexp code object. 739 PushRegExpBasePointer(a0, a1); 740 741 // Check if we have space on the stack for registers. 742 Label stack_limit_hit; 743 Label stack_ok; 744 745 ExternalReference stack_limit = 746 ExternalReference::address_of_jslimit(masm_->isolate()); 747 __ li(a0, Operand(stack_limit)); 748 __ Ld(a0, MemOperand(a0)); 749 __ Sub64(a0, sp, a0); 750 // Handle it if the stack pointer is already below the stack limit. 751 __ BranchShort(&stack_limit_hit, le, a0, Operand(zero_reg)); 752 // Check if there is room for the variable number of registers above 753 // the stack limit. 754 __ BranchShort(&stack_ok, Ugreater_equal, a0, 755 Operand(num_registers_ * kSystemPointerSize)); 756 // Exit with OutOfMemory exception. There is not enough space on the stack 757 // for our working registers. 758 __ li(a0, Operand(EXCEPTION)); 759 __ jmp(&return_a0); 760 761 __ bind(&stack_limit_hit); 762 CallCheckStackGuardState(a0); 763 // If returned value is non-zero, we exit with the returned value as result. 764 __ Branch(&return_a0, ne, a0, Operand(zero_reg)); 765 766 __ bind(&stack_ok); 767 // Allocate space on stack for registers. 768 __ Sub64(sp, sp, Operand(num_registers_ * kSystemPointerSize)); 769 // Load string end. 770 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 771 // Load input start. 772 __ Ld(a0, MemOperand(frame_pointer(), kInputStart)); 773 // Find negative length (offset of start relative to end). 774 __ Sub64(current_input_offset(), a0, end_of_input_address()); 775 // Set a0 to address of char before start of the input string 776 // (effectively string position -1). 777 __ Ld(a1, MemOperand(frame_pointer(), kStartIndex)); 778 __ Sub64(a0, current_input_offset(), Operand(char_size())); 779 __ slli(t1, a1, (mode_ == UC16) ? 1 : 0); 780 __ Sub64(a0, a0, t1); 781 // Store this value in a local variable, for use when clearing 782 // position registers. 783 __ Sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); 784 785 // Initialize code pointer register 786 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); 787 788 Label load_char_start_regexp, start_regexp; 789 // Load newline if index is at start, previous character otherwise. 790 __ BranchShort(&load_char_start_regexp, ne, a1, Operand(zero_reg)); 791 __ li(current_character(), Operand('\n')); 792 __ jmp(&start_regexp); 793 794 // Global regexp restarts matching here. 795 __ bind(&load_char_start_regexp); 796 // Load previous char as initial value of current character register. 797 LoadCurrentCharacterUnchecked(-1, 1); 798 __ bind(&start_regexp); 799 800 // Initialize on-stack registers. 801 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. 802 // Fill saved registers with initial value = start offset - 1. 803 if (num_saved_registers_ > 8) { 804 // Address of register 0. 805 __ Add64(a1, frame_pointer(), Operand(kRegisterZero)); 806 __ li(a2, Operand(num_saved_registers_)); 807 Label init_loop; 808 __ bind(&init_loop); 809 __ Sd(a0, MemOperand(a1)); 810 __ Add64(a1, a1, Operand(-kSystemPointerSize)); 811 __ Sub64(a2, a2, Operand(1)); 812 __ Branch(&init_loop, ne, a2, Operand(zero_reg)); 813 } else { 814 for (int i = 0; i < num_saved_registers_; i++) { 815 __ Sd(a0, register_location(i)); 816 } 817 } 818 } 819 820 // Initialize backtrack stack pointer. 821 LoadRegExpStackPointerFromMemory(backtrack_stackpointer()); 822 823 __ jmp(&start_label_); 824 825 // Exit code: 826 if (success_label_.is_linked()) { 827 // Save captures when successful. 828 __ bind(&success_label_); 829 if (num_saved_registers_ > 0) { 830 // Copy captures to output. 831 __ Ld(a1, MemOperand(frame_pointer(), kInputStart)); 832 __ Ld(a0, MemOperand(frame_pointer(), kRegisterOutput)); 833 __ Ld(a2, MemOperand(frame_pointer(), kStartIndex)); 834 __ Sub64(a1, end_of_input_address(), a1); 835 // a1 is length of input in bytes. 836 if (mode_ == UC16) { 837 __ srli(a1, a1, 1); 838 } 839 // a1 is length of input in characters. 840 __ Add64(a1, a1, Operand(a2)); 841 // a1 is length of string in characters. 842 843 DCHECK_EQ(0, num_saved_registers_ % 2); 844 // Always an even number of capture registers. This allows us to 845 // unroll the loop once to add an operation between a load of a 846 // register and the following use of that register. 847 for (int i = 0; i < num_saved_registers_; i += 2) { 848 __ Ld(a2, register_location(i)); 849 __ Ld(a3, register_location(i + 1)); 850 if (i == 0 && global_with_zero_length_check()) { 851 // Keep capture start in a4 for the zero-length check later. 852 __ mv(s3, a2); 853 } 854 if (mode_ == UC16) { 855 __ srai(a2, a2, 1); 856 __ Add64(a2, a2, a1); 857 __ srai(a3, a3, 1); 858 __ Add64(a3, a3, a1); 859 } else { 860 __ Add64(a2, a1, Operand(a2)); 861 __ Add64(a3, a1, Operand(a3)); 862 } 863 // V8 expects the output to be an int32_t array. 864 __ Sw(a2, MemOperand(a0)); 865 __ Add64(a0, a0, kIntSize); 866 __ Sw(a3, MemOperand(a0)); 867 __ Add64(a0, a0, kIntSize); 868 } 869 } 870 871 if (global()) { 872 // Restart matching if the regular expression is flagged as global. 873 __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 874 __ Ld(a1, MemOperand(frame_pointer(), kNumOutputRegisters)); 875 __ Ld(a2, MemOperand(frame_pointer(), kRegisterOutput)); 876 // Increment success counter. 877 __ Add64(a0, a0, 1); 878 __ Sd(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 879 // Capture results have been stored, so the number of remaining global 880 // output registers is reduced by the number of stored captures. 881 __ Sub64(a1, a1, num_saved_registers_); 882 // Check whether we have enough room for another set of capture results. 883 __ Branch(&return_a0, lt, a1, Operand(num_saved_registers_)); 884 885 __ Sd(a1, MemOperand(frame_pointer(), kNumOutputRegisters)); 886 // Advance the location for output. 887 __ Add64(a2, a2, num_saved_registers_ * kIntSize); 888 __ Sd(a2, MemOperand(frame_pointer(), kRegisterOutput)); 889 890 // Prepare a0 to initialize registers with its value in the next run. 891 __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); 892 893 if (global_with_zero_length_check()) { 894 // Special case for zero-length matches. 895 // s3: capture start index 896 // Not a zero-length match, restart. 897 __ Branch(&load_char_start_regexp, ne, current_input_offset(), 898 Operand(s3)); 899 // Offset from the end is zero if we already reached the end. 900 __ Branch(&exit_label_, eq, current_input_offset(), 901 Operand(zero_reg)); 902 // Advance current position after a zero-length match. 903 Label advance; 904 __ bind(&advance); 905 __ Add64(current_input_offset(), current_input_offset(), 906 Operand((mode_ == UC16) ? 2 : 1)); 907 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 908 } 909 910 __ Branch(&load_char_start_regexp); 911 } else { 912 __ li(a0, Operand(SUCCESS)); 913 } 914 } 915 // Exit and return a0. 916 __ bind(&exit_label_); 917 if (global()) { 918 __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 919 } 920 921 __ bind(&return_a0); 922 // Restore the original regexp stack pointer value (effectively, pop the 923 // stored base pointer). 924 PopRegExpBasePointer(a1, a2); 925 // Skip sp past regexp registers and local variables.. 926 __ mv(sp, frame_pointer()); 927 928 // Restore registers fp..s11 and return (restoring ra to pc). 929 __ MultiPop(registers_to_retain | ra); 930 931 __ Ret(); 932 933 // Backtrack code (branch target for conditional backtracks). 934 if (backtrack_label_.is_linked()) { 935 __ bind(&backtrack_label_); 936 Backtrack(); 937 } 938 939 Label exit_with_exception; 940 941 // Preempt-code. 942 if (check_preempt_label_.is_linked()) { 943 SafeCallTarget(&check_preempt_label_); 944 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), a1); 945 // Put regexp engine registers on stack. 946 PushCallerSavedRegisters(); 947 CallCheckStackGuardState(a0); 948 PopCallerSavedRegisters(); 949 // If returning non-zero, we should end execution with the given 950 // result as return value. 951 __ Branch(&return_a0, ne, a0, Operand(zero_reg)); 952 LoadRegExpStackPointerFromMemory(backtrack_stackpointer()); 953 // String might have moved: Reload end of string from frame. 954 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 955 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); 956 SafeReturn(); 957 } 958 959 // Backtrack stack overflow code. 960 if (stack_overflow_label_.is_linked()) { 961 SafeCallTarget(&stack_overflow_label_); 962 // Call GrowStack(isolate). 963 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), 964 a1); 965 966 static constexpr int kNumArguments = 1; 967 __ PrepareCallCFunction(kNumArguments, 0, a0); 968 __ li(a0, ExternalReference::isolate_address(isolate())); 969 ExternalReference grow_stack = ExternalReference::re_grow_stack(); 970 __ CallCFunction(grow_stack, kNumArguments); 971 // If nullptr is returned, we have failed to grow the stack, and must exit 972 // with a stack-overflow exception. 973 __ BranchShort(&exit_with_exception, eq, a0, Operand(zero_reg)); 974 // Otherwise use return value as new stack pointer. 975 __ mv(backtrack_stackpointer(), a0); 976 // Restore saved registers and continue. 977 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); 978 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 979 SafeReturn(); 980 } 981 982 if (exit_with_exception.is_linked()) { 983 // If any of the code above needed to exit with an exception. 984 __ bind(&exit_with_exception); 985 // Exit with Result EXCEPTION(-1) to signal thrown exception. 986 __ li(a0, Operand(EXCEPTION)); 987 __ jmp(&return_a0); 988 } 989 990 if (fallback_label_.is_linked()) { 991 __ bind(&fallback_label_); 992 __ li(a0, Operand(FALLBACK_TO_EXPERIMENTAL)); 993 __ jmp(&return_a0); 994 } 995 } 996 997 CodeDesc code_desc; 998 masm_->GetCode(isolate(), &code_desc); 999 Handle<Code> code = 1000 Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP) 1001 .set_self_reference(masm_->CodeObject()) 1002 .Build(); 1003 LOG(masm_->isolate(), 1004 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source)); 1005 return Handle<HeapObject>::cast(code); 1006} 1007 1008void RegExpMacroAssemblerRISCV::GoTo(Label* to) { 1009 if (to == nullptr) { 1010 Backtrack(); 1011 return; 1012 } 1013 __ jmp(to); 1014 return; 1015} 1016 1017void RegExpMacroAssemblerRISCV::IfRegisterGE(int reg, int comparand, 1018 Label* if_ge) { 1019 __ Ld(a0, register_location(reg)); 1020 BranchOrBacktrack(if_ge, ge, a0, Operand(comparand)); 1021} 1022 1023void RegExpMacroAssemblerRISCV::IfRegisterLT(int reg, int comparand, 1024 Label* if_lt) { 1025 __ Ld(a0, register_location(reg)); 1026 BranchOrBacktrack(if_lt, lt, a0, Operand(comparand)); 1027} 1028 1029void RegExpMacroAssemblerRISCV::IfRegisterEqPos(int reg, Label* if_eq) { 1030 __ Ld(a0, register_location(reg)); 1031 BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset())); 1032} 1033 1034RegExpMacroAssembler::IrregexpImplementation 1035RegExpMacroAssemblerRISCV::Implementation() { 1036 return kRISCVImplementation; 1037} 1038 1039void RegExpMacroAssemblerRISCV::PopCurrentPosition() { 1040 Pop(current_input_offset()); 1041} 1042 1043void RegExpMacroAssemblerRISCV::PopRegister(int register_index) { 1044 Pop(a0); 1045 __ Sd(a0, register_location(register_index)); 1046} 1047 1048void RegExpMacroAssemblerRISCV::PushBacktrack(Label* label) { 1049 if (label->is_bound()) { 1050 int target = label->pos(); 1051 __ li(a0, Operand(target + Code::kHeaderSize - kHeapObjectTag)); 1052 } else { 1053 Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm_.get()); 1054 Label after_constant; 1055 __ BranchShort(&after_constant); 1056 int offset = masm_->pc_offset(); 1057 int cp_offset = offset + Code::kHeaderSize - kHeapObjectTag; 1058 __ emit(0); 1059 masm_->label_at_put(label, offset); 1060 __ bind(&after_constant); 1061 if (is_int16(cp_offset)) { 1062 __ Lwu(a0, MemOperand(code_pointer(), cp_offset)); 1063 } else { 1064 __ Add64(a0, code_pointer(), cp_offset); 1065 __ Lwu(a0, MemOperand(a0, 0)); 1066 } 1067 } 1068 Push(a0); 1069 CheckStackLimit(); 1070} 1071 1072void RegExpMacroAssemblerRISCV::PushCurrentPosition() { 1073 Push(current_input_offset()); 1074} 1075 1076void RegExpMacroAssemblerRISCV::PushRegister(int register_index, 1077 StackCheckFlag check_stack_limit) { 1078 __ Ld(a0, register_location(register_index)); 1079 Push(a0); 1080 if (check_stack_limit) CheckStackLimit(); 1081} 1082 1083void RegExpMacroAssemblerRISCV::ReadCurrentPositionFromRegister(int reg) { 1084 __ Ld(current_input_offset(), register_location(reg)); 1085} 1086 1087void RegExpMacroAssemblerRISCV::WriteStackPointerToRegister(int reg) { 1088 ExternalReference ref = 1089 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 1090 __ li(a0, ref); 1091 __ Ld(a0, MemOperand(a0)); 1092 __ Sub64(a0, backtrack_stackpointer(), a0); 1093 __ Sw(a0, register_location(reg)); 1094} 1095 1096void RegExpMacroAssemblerRISCV::ReadStackPointerFromRegister(int reg) { 1097 ExternalReference ref = 1098 ExternalReference::address_of_regexp_stack_memory_top_address(isolate()); 1099 __ li(a1, ref); 1100 __ Ld(a1, MemOperand(a1)); 1101 __ Lw(backtrack_stackpointer(), register_location(reg)); 1102 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), a1); 1103} 1104 1105void RegExpMacroAssemblerRISCV::SetCurrentPositionFromEnd(int by) { 1106 Label after_position; 1107 __ BranchShort(&after_position, ge, current_input_offset(), 1108 Operand(-by * char_size())); 1109 __ li(current_input_offset(), -by * char_size()); 1110 // On RegExp code entry (where this operation is used), the character before 1111 // the current position is expected to be already loaded. 1112 // We have advanced the position, so it's safe to read backwards. 1113 LoadCurrentCharacterUnchecked(-1, 1); 1114 __ bind(&after_position); 1115} 1116 1117void RegExpMacroAssemblerRISCV::SetRegister(int register_index, int to) { 1118 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1119 __ li(a0, Operand(to)); 1120 __ Sd(a0, register_location(register_index)); 1121} 1122 1123bool RegExpMacroAssemblerRISCV::Succeed() { 1124 __ jmp(&success_label_); 1125 return global(); 1126} 1127 1128void RegExpMacroAssemblerRISCV::WriteCurrentPositionToRegister(int reg, 1129 int cp_offset) { 1130 if (cp_offset == 0) { 1131 __ Sd(current_input_offset(), register_location(reg)); 1132 } else { 1133 __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size())); 1134 __ Sd(a0, register_location(reg)); 1135 } 1136} 1137 1138void RegExpMacroAssemblerRISCV::ClearRegisters(int reg_from, int reg_to) { 1139 DCHECK(reg_from <= reg_to); 1140 __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); 1141 for (int reg = reg_from; reg <= reg_to; reg++) { 1142 __ Sd(a0, register_location(reg)); 1143 } 1144} 1145#ifdef RISCV_HAS_NO_UNALIGNED 1146bool RegExpMacroAssemblerRISCV::CanReadUnaligned() const { return false; } 1147#endif 1148// Private methods: 1149 1150void RegExpMacroAssemblerRISCV::CallCheckStackGuardState(Register scratch) { 1151 DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins()); 1152 DCHECK(!masm_->options().isolate_independent_code); 1153 1154 int stack_alignment = base::OS::ActivationFrameAlignment(); 1155 1156 // Align the stack pointer and save the original sp value on the stack. 1157 __ mv(scratch, sp); 1158 __ Sub64(sp, sp, Operand(kSystemPointerSize)); 1159 DCHECK(base::bits::IsPowerOfTwo(stack_alignment)); 1160 __ And(sp, sp, Operand(-stack_alignment)); 1161 __ Sd(scratch, MemOperand(sp)); 1162 1163 __ mv(a2, frame_pointer()); 1164 // Code of self. 1165 __ li(a1, Operand(masm_->CodeObject()), CONSTANT_SIZE); 1166 1167 // We need to make room for the return address on the stack. 1168 DCHECK(IsAligned(stack_alignment, kSystemPointerSize)); 1169 __ Sub64(sp, sp, Operand(stack_alignment)); 1170 1171 // The stack pointer now points to cell where the return address will be 1172 // written. Arguments are in registers, meaning we treat the return address as 1173 // argument 5. Since DirectCEntry will handle allocating space for the C 1174 // argument slots, we don't need to care about that here. This is how the 1175 // stack will look (sp meaning the value of sp at this moment): 1176 // [sp + 3] - empty slot if needed for alignment. 1177 // [sp + 2] - saved sp. 1178 // [sp + 1] - second word reserved for return value. 1179 // [sp + 0] - first word reserved for return value. 1180 1181 // a0 will point to the return address, placed by DirectCEntry. 1182 __ mv(a0, sp); 1183 1184 ExternalReference stack_guard_check = 1185 ExternalReference::re_check_stack_guard_state(); 1186 __ li(t6, Operand(stack_guard_check)); 1187 1188 EmbeddedData d = EmbeddedData::FromBlob(); 1189 CHECK(Builtins::IsIsolateIndependent(Builtin::kDirectCEntry)); 1190 Address entry = d.InstructionStartOfBuiltin(Builtin::kDirectCEntry); 1191 __ li(kScratchReg, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); 1192 __ Call(kScratchReg); 1193 1194 // DirectCEntry allocated space for the C argument slots so we have to 1195 // drop them with the return address from the stack with loading saved sp. 1196 // At this point stack must look: 1197 // [sp + 7] - empty slot if needed for alignment. 1198 // [sp + 6] - saved sp. 1199 // [sp + 5] - second word reserved for return value. 1200 // [sp + 4] - first word reserved for return value. 1201 // [sp + 3] - C argument slot. 1202 // [sp + 2] - C argument slot. 1203 // [sp + 1] - C argument slot. 1204 // [sp + 0] - C argument slot. 1205 __ Ld(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize)); 1206 1207 __ li(code_pointer(), Operand(masm_->CodeObject())); 1208} 1209 1210// Helper function for reading a value out of a stack frame. 1211template <typename T> 1212static T& frame_entry(Address re_frame, int frame_offset) { 1213 return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset)); 1214} 1215 1216template <typename T> 1217static T* frame_entry_address(Address re_frame, int frame_offset) { 1218 return reinterpret_cast<T*>(re_frame + frame_offset); 1219} 1220 1221int64_t RegExpMacroAssemblerRISCV::CheckStackGuardState(Address* return_address, 1222 Address raw_code, 1223 Address re_frame) { 1224 Code re_code = Code::cast(Object(raw_code)); 1225 return NativeRegExpMacroAssembler::CheckStackGuardState( 1226 frame_entry<Isolate*>(re_frame, kIsolate), 1227 static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)), 1228 static_cast<RegExp::CallOrigin>( 1229 frame_entry<int64_t>(re_frame, kDirectCall)), 1230 return_address, re_code, 1231 frame_entry_address<Address>(re_frame, kInputString), 1232 frame_entry_address<const byte*>(re_frame, kInputStart), 1233 frame_entry_address<const byte*>(re_frame, kInputEnd)); 1234} 1235 1236MemOperand RegExpMacroAssemblerRISCV::register_location(int register_index) { 1237 DCHECK(register_index < (1 << 30)); 1238 if (num_registers_ <= register_index) { 1239 num_registers_ = register_index + 1; 1240 } 1241 return MemOperand(frame_pointer(), 1242 kRegisterZero - register_index * kSystemPointerSize); 1243} 1244 1245void RegExpMacroAssemblerRISCV::CheckPosition(int cp_offset, 1246 Label* on_outside_input) { 1247 if (cp_offset >= 0) { 1248 BranchOrBacktrack(on_outside_input, ge, current_input_offset(), 1249 Operand(-cp_offset * char_size())); 1250 } else { 1251 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); 1252 __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size())); 1253 BranchOrBacktrack(on_outside_input, le, a0, Operand(a1)); 1254 } 1255} 1256 1257void RegExpMacroAssemblerRISCV::BranchOrBacktrack(Label* to, 1258 Condition condition, 1259 Register rs, 1260 const Operand& rt) { 1261 if (condition == al) { // Unconditional. 1262 if (to == nullptr) { 1263 Backtrack(); 1264 return; 1265 } 1266 __ jmp(to); 1267 return; 1268 } 1269 if (to == nullptr) { 1270 __ Branch(&backtrack_label_, condition, rs, rt); 1271 return; 1272 } 1273 __ Branch(to, condition, rs, rt); 1274} 1275 1276void RegExpMacroAssemblerRISCV::SafeCall(Label* to, Condition cond, Register rs, 1277 const Operand& rt) { 1278 __ BranchAndLink(to, cond, rs, rt); 1279} 1280 1281void RegExpMacroAssemblerRISCV::SafeReturn() { 1282 __ pop(ra); 1283 __ Add64(t1, ra, Operand(masm_->CodeObject())); 1284 __ Jump(t1); 1285} 1286 1287void RegExpMacroAssemblerRISCV::SafeCallTarget(Label* name) { 1288 __ bind(name); 1289 __ Sub64(ra, ra, Operand(masm_->CodeObject())); 1290 __ push(ra); 1291} 1292 1293void RegExpMacroAssemblerRISCV::Push(Register source) { 1294 DCHECK(source != backtrack_stackpointer()); 1295 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), 1296 Operand(-kIntSize)); 1297 __ Sw(source, MemOperand(backtrack_stackpointer())); 1298} 1299 1300void RegExpMacroAssemblerRISCV::Pop(Register target) { 1301 DCHECK(target != backtrack_stackpointer()); 1302 __ Lw(target, MemOperand(backtrack_stackpointer())); 1303 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize); 1304} 1305 1306void RegExpMacroAssemblerRISCV::CheckPreemption() { 1307 // Check for preemption. 1308 ExternalReference stack_limit = 1309 ExternalReference::address_of_jslimit(masm_->isolate()); 1310 __ li(a0, Operand(stack_limit)); 1311 __ Ld(a0, MemOperand(a0)); 1312 SafeCall(&check_preempt_label_, Uless_equal, sp, Operand(a0)); 1313} 1314 1315void RegExpMacroAssemblerRISCV::CheckStackLimit() { 1316 ExternalReference stack_limit = 1317 ExternalReference::address_of_regexp_stack_limit_address( 1318 masm_->isolate()); 1319 1320 __ li(a0, Operand(stack_limit)); 1321 __ Ld(a0, MemOperand(a0)); 1322 SafeCall(&stack_overflow_label_, Uless_equal, backtrack_stackpointer(), 1323 Operand(a0)); 1324} 1325 1326void RegExpMacroAssemblerRISCV::LoadCurrentCharacterUnchecked(int cp_offset, 1327 int characters) { 1328 Register offset = current_input_offset(); 1329 1330 // If unaligned load/stores are not supported then this function must only 1331 // be used to load a single character at a time. 1332 if (!CanReadUnaligned()) { 1333 DCHECK_EQ(1, characters); 1334 } 1335 if (cp_offset != 0) { 1336 // t3 is not being used to store the capture start index at this point. 1337 __ Add64(t3, current_input_offset(), Operand(cp_offset * char_size())); 1338 offset = t3; 1339 } 1340 1341 if (mode_ == LATIN1) { 1342 if (characters == 4) { 1343 __ Add64(kScratchReg, end_of_input_address(), offset); 1344 __ Lwu(current_character(), MemOperand(kScratchReg)); 1345 } else if (characters == 2) { 1346 __ Add64(kScratchReg, end_of_input_address(), offset); 1347 __ Lhu(current_character(), MemOperand(kScratchReg)); 1348 } else { 1349 DCHECK_EQ(1, characters); 1350 __ Add64(kScratchReg, end_of_input_address(), offset); 1351 __ Lbu(current_character(), MemOperand(kScratchReg)); 1352 } 1353 } else { 1354 DCHECK(mode_ == UC16); 1355 if (characters == 2) { 1356 __ Add64(kScratchReg, end_of_input_address(), offset); 1357 __ Lwu(current_character(), MemOperand(kScratchReg)); 1358 } else { 1359 DCHECK_EQ(1, characters); 1360 __ Add64(kScratchReg, end_of_input_address(), offset); 1361 __ Lhu(current_character(), MemOperand(kScratchReg)); 1362 } 1363 } 1364} 1365 1366#undef __ 1367 1368} // namespace internal 1369} // namespace v8 1370 1371#endif // V8_TARGET_ARCH_RISCV64 1372