1// Copyright 2021 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <limits.h> // For LONG_MIN, LONG_MAX. 6 7#if V8_TARGET_ARCH_RISCV64 8 9#include "src/base/bits.h" 10#include "src/base/division-by-constant.h" 11#include "src/codegen/assembler-inl.h" 12#include "src/codegen/callable.h" 13#include "src/codegen/code-factory.h" 14#include "src/codegen/external-reference-table.h" 15#include "src/codegen/interface-descriptors-inl.h" 16#include "src/codegen/macro-assembler.h" 17#include "src/codegen/register-configuration.h" 18#include "src/debug/debug.h" 19#include "src/deoptimizer/deoptimizer.h" 20#include "src/execution/frames-inl.h" 21#include "src/heap/memory-chunk.h" 22#include "src/init/bootstrapper.h" 23#include "src/logging/counters.h" 24#include "src/objects/heap-number.h" 25#include "src/runtime/runtime.h" 26#include "src/snapshot/snapshot.h" 27#include "src/wasm/wasm-code-manager.h" 28 29// Satisfy cpplint check, but don't include platform-specific header. It is 30// included recursively via macro-assembler.h. 31#if 0 32#include "src/codegen/riscv64/macro-assembler-riscv64.h" 33#endif 34 35namespace v8 { 36namespace internal { 37 38static inline bool IsZero(const Operand& rt) { 39 if (rt.is_reg()) { 40 return rt.rm() == zero_reg; 41 } else { 42 return rt.immediate() == 0; 43 } 44} 45 46int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, 47 Register exclusion1, 48 Register exclusion2, 49 Register exclusion3) const { 50 int bytes = 0; 51 52 RegList exclusions = {exclusion1, exclusion2, exclusion3}; 53 RegList list = kJSCallerSaved - exclusions; 54 bytes += list.Count() * kSystemPointerSize; 55 56 if (fp_mode == SaveFPRegsMode::kSave) { 57 bytes += kCallerSavedFPU.Count() * kDoubleSize; 58 } 59 60 return bytes; 61} 62 63int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, 64 Register exclusion2, Register exclusion3) { 65 int bytes = 0; 66 67 RegList exclusions = {exclusion1, exclusion2, exclusion3}; 68 RegList list = kJSCallerSaved - exclusions; 69 MultiPush(list); 70 bytes += list.Count() * kSystemPointerSize; 71 72 if (fp_mode == SaveFPRegsMode::kSave) { 73 MultiPushFPU(kCallerSavedFPU); 74 bytes += kCallerSavedFPU.Count() * kDoubleSize; 75 } 76 77 return bytes; 78} 79 80int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, 81 Register exclusion2, Register exclusion3) { 82 int bytes = 0; 83 if (fp_mode == SaveFPRegsMode::kSave) { 84 MultiPopFPU(kCallerSavedFPU); 85 bytes += kCallerSavedFPU.Count() * kDoubleSize; 86 } 87 88 RegList exclusions = {exclusion1, exclusion2, exclusion3}; 89 RegList list = kJSCallerSaved - exclusions; 90 MultiPop(list); 91 bytes += list.Count() * kSystemPointerSize; 92 93 return bytes; 94} 95 96void TurboAssembler::LoadRoot(Register destination, RootIndex index) { 97 Ld(destination, 98 MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index))); 99} 100 101void TurboAssembler::LoadRoot(Register destination, RootIndex index, 102 Condition cond, Register src1, 103 const Operand& src2) { 104 Label skip; 105 BranchShort(&skip, NegateCondition(cond), src1, src2); 106 Ld(destination, 107 MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index))); 108 bind(&skip); 109} 110 111void TurboAssembler::PushCommonFrame(Register marker_reg) { 112 if (marker_reg.is_valid()) { 113 Push(ra, fp, marker_reg); 114 Add64(fp, sp, Operand(kSystemPointerSize)); 115 } else { 116 Push(ra, fp); 117 Mv(fp, sp); 118 } 119} 120 121void TurboAssembler::PushStandardFrame(Register function_reg) { 122 int offset = -StandardFrameConstants::kContextOffset; 123 if (function_reg.is_valid()) { 124 Push(ra, fp, cp, function_reg, kJavaScriptCallArgCountRegister); 125 offset += 2 * kSystemPointerSize; 126 } else { 127 Push(ra, fp, cp, kJavaScriptCallArgCountRegister); 128 offset += kSystemPointerSize; 129 } 130 Add64(fp, sp, Operand(offset)); 131} 132 133int MacroAssembler::SafepointRegisterStackIndex(int reg_code) { 134 // The registers are pushed starting with the highest encoding, 135 // which means that lowest encodings are closest to the stack pointer. 136 return kSafepointRegisterStackIndexMap[reg_code]; 137} 138 139// Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved) 140// The register 'object' contains a heap object pointer. The heap object 141// tag is shifted away. 142void MacroAssembler::RecordWriteField(Register object, int offset, 143 Register value, RAStatus ra_status, 144 SaveFPRegsMode save_fp, 145 RememberedSetAction remembered_set_action, 146 SmiCheck smi_check) { 147 DCHECK(!AreAliased(object, value)); 148 // First, check if a write barrier is even needed. The tests below 149 // catch stores of Smis. 150 Label done; 151 152 // Skip the barrier if writing a smi. 153 if (smi_check == SmiCheck::kInline) { 154 JumpIfSmi(value, &done); 155 } 156 157 // Although the object register is tagged, the offset is relative to the start 158 // of the object, so offset must be a multiple of kTaggedSize. 159 DCHECK(IsAligned(offset, kTaggedSize)); 160 161 if (FLAG_debug_code) { 162 Label ok; 163 UseScratchRegisterScope temps(this); 164 Register scratch = temps.Acquire(); 165 DCHECK(!AreAliased(object, value, scratch)); 166 Add64(scratch, object, offset - kHeapObjectTag); 167 And(scratch, scratch, Operand(kTaggedSize - 1)); 168 BranchShort(&ok, eq, scratch, Operand(zero_reg)); 169 Abort(AbortReason::kUnalignedCellInWriteBarrier); 170 bind(&ok); 171 } 172 173 RecordWrite(object, Operand(offset - kHeapObjectTag), value, ra_status, 174 save_fp, remembered_set_action, SmiCheck::kOmit); 175 176 bind(&done); 177} 178 179void TurboAssembler::MaybeSaveRegisters(RegList registers) { 180 if (registers.is_empty()) return; 181 MultiPush(registers); 182} 183 184void TurboAssembler::MaybeRestoreRegisters(RegList registers) { 185 if (registers.is_empty()) return; 186 MultiPop(registers); 187} 188 189void TurboAssembler::CallEphemeronKeyBarrier(Register object, 190 Register slot_address, 191 SaveFPRegsMode fp_mode) { 192 DCHECK(!AreAliased(object, slot_address)); 193 RegList registers = 194 WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address); 195 MaybeSaveRegisters(registers); 196 197 Register object_parameter = WriteBarrierDescriptor::ObjectRegister(); 198 Register slot_address_parameter = 199 WriteBarrierDescriptor::SlotAddressRegister(); 200 201 Push(object); 202 Push(slot_address); 203 Pop(slot_address_parameter); 204 Pop(object_parameter); 205 206 Call(isolate()->builtins()->code_handle( 207 Builtins::GetEphemeronKeyBarrierStub(fp_mode)), 208 RelocInfo::CODE_TARGET); 209 MaybeRestoreRegisters(registers); 210} 211 212void TurboAssembler::CallRecordWriteStubSaveRegisters( 213 Register object, Register slot_address, 214 RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode, 215 StubCallMode mode) { 216 DCHECK(!AreAliased(object, slot_address)); 217 RegList registers = 218 WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address); 219 MaybeSaveRegisters(registers); 220 221 Register object_parameter = WriteBarrierDescriptor::ObjectRegister(); 222 Register slot_address_parameter = 223 WriteBarrierDescriptor::SlotAddressRegister(); 224 225 Push(object); 226 Push(slot_address); 227 Pop(slot_address_parameter); 228 Pop(object_parameter); 229 230 CallRecordWriteStub(object_parameter, slot_address_parameter, 231 remembered_set_action, fp_mode, mode); 232 233 MaybeRestoreRegisters(registers); 234} 235 236void TurboAssembler::CallRecordWriteStub( 237 Register object, Register slot_address, 238 RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode, 239 StubCallMode mode) { 240 // Use CallRecordWriteStubSaveRegisters if the object and slot registers 241 // need to be caller saved. 242 DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object); 243 DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address); 244 if (mode == StubCallMode::kCallWasmRuntimeStub) { 245 auto wasm_target = 246 wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode); 247 Call(wasm_target, RelocInfo::WASM_STUB_CALL); 248 } else { 249 auto builtin = Builtins::GetRecordWriteStub(remembered_set_action, fp_mode); 250 if (options().inline_offheap_trampolines) { 251 // Inline the trampoline. //qj 252 RecordCommentForOffHeapTrampoline(builtin); 253 254 UseScratchRegisterScope temps(this); 255 BlockTrampolinePoolScope block_trampoline_pool(this); 256 Register scratch = temps.Acquire(); 257 li(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET)); 258 Call(scratch); 259 RecordComment("]"); 260 } else { 261 Handle<Code> code_target = isolate()->builtins()->code_handle(builtin); 262 Call(code_target, RelocInfo::CODE_TARGET); 263 } 264 } 265} 266 267// Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved) 268// The register 'object' contains a heap object pointer. The heap object 269// tag is shifted away. 270void MacroAssembler::RecordWrite(Register object, Operand offset, 271 Register value, RAStatus ra_status, 272 SaveFPRegsMode fp_mode, 273 RememberedSetAction remembered_set_action, 274 SmiCheck smi_check) { 275 DCHECK(!AreAliased(object, value)); 276 277 if (FLAG_debug_code) { 278 UseScratchRegisterScope temps(this); 279 Register temp = temps.Acquire(); 280 DCHECK(!AreAliased(object, value, temp)); 281 Add64(temp, object, offset); 282 LoadTaggedPointerField(temp, MemOperand(temp)); 283 Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, temp, 284 Operand(value)); 285 } 286 287 if ((remembered_set_action == RememberedSetAction::kOmit && 288 !FLAG_incremental_marking) || 289 FLAG_disable_write_barriers) { 290 return; 291 } 292 293 // First, check if a write barrier is even needed. The tests below 294 // catch stores of smis and stores into the young generation. 295 Label done; 296 297 if (smi_check == SmiCheck::kInline) { 298 DCHECK_EQ(0, kSmiTag); 299 JumpIfSmi(value, &done); 300 } 301 302 { 303 UseScratchRegisterScope temps(this); 304 Register temp = temps.Acquire(); 305 CheckPageFlag(value, 306 temp, // Used as scratch. 307 MemoryChunk::kPointersToHereAreInterestingMask, 308 eq, // In RISC-V, it uses cc for a comparison with 0, so if 309 // no bits are set, and cc is eq, it will branch to done 310 &done); 311 312 CheckPageFlag(object, 313 temp, // Used as scratch. 314 MemoryChunk::kPointersFromHereAreInterestingMask, 315 eq, // In RISC-V, it uses cc for a comparison with 0, so if 316 // no bits are set, and cc is eq, it will branch to done 317 &done); 318 } 319 // Record the actual write. 320 if (ra_status == kRAHasNotBeenSaved) { 321 push(ra); 322 } 323 Register slot_address = WriteBarrierDescriptor::SlotAddressRegister(); 324 DCHECK(!AreAliased(object, slot_address, value)); 325 // TODO(cbruni): Turn offset into int. 326 DCHECK(offset.IsImmediate()); 327 Add64(slot_address, object, offset); 328 CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode); 329 if (ra_status == kRAHasNotBeenSaved) { 330 pop(ra); 331 } 332 if (FLAG_debug_code) li(slot_address, Operand(kZapValue)); 333 334 bind(&done); 335} 336 337// --------------------------------------------------------------------------- 338// Instruction macros. 339 340void TurboAssembler::Add32(Register rd, Register rs, const Operand& rt) { 341 if (rt.is_reg()) { 342 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 343 ((rd.code() & 0b11000) == 0b01000) && 344 ((rt.rm().code() & 0b11000) == 0b01000)) { 345 c_addw(rd, rt.rm()); 346 } else { 347 addw(rd, rs, rt.rm()); 348 } 349 } else { 350 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) && 351 (rd.code() == rs.code()) && (rd != zero_reg) && 352 !MustUseReg(rt.rmode())) { 353 c_addiw(rd, static_cast<int8_t>(rt.immediate())); 354 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 355 addiw(rd, rs, static_cast<int32_t>(rt.immediate())); 356 } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) || 357 (2048 <= rt.immediate() && rt.immediate() <= 4094)) { 358 addiw(rd, rs, rt.immediate() / 2); 359 addiw(rd, rd, rt.immediate() - (rt.immediate() / 2)); 360 } else { 361 // li handles the relocation. 362 UseScratchRegisterScope temps(this); 363 Register scratch = temps.Acquire(); 364 Li(scratch, rt.immediate()); 365 addw(rd, rs, scratch); 366 } 367 } 368} 369 370void TurboAssembler::Add64(Register rd, Register rs, const Operand& rt) { 371 if (rt.is_reg()) { 372 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 373 (rt.rm() != zero_reg) && (rs != zero_reg)) { 374 c_add(rd, rt.rm()); 375 } else { 376 add(rd, rs, rt.rm()); 377 } 378 } else { 379 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) && 380 (rd.code() == rs.code()) && (rd != zero_reg) && (rt.immediate() != 0) && 381 !MustUseReg(rt.rmode())) { 382 c_addi(rd, static_cast<int8_t>(rt.immediate())); 383 } else if (FLAG_riscv_c_extension && is_int10(rt.immediate()) && 384 (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) && 385 (rd.code() == rs.code()) && (rd == sp) && 386 !MustUseReg(rt.rmode())) { 387 c_addi16sp(static_cast<int16_t>(rt.immediate())); 388 } else if (FLAG_riscv_c_extension && ((rd.code() & 0b11000) == 0b01000) && 389 (rs == sp) && is_uint10(rt.immediate()) && 390 (rt.immediate() != 0) && !MustUseReg(rt.rmode())) { 391 c_addi4spn(rd, static_cast<uint16_t>(rt.immediate())); 392 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 393 addi(rd, rs, static_cast<int32_t>(rt.immediate())); 394 } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) || 395 (2048 <= rt.immediate() && rt.immediate() <= 4094)) { 396 addi(rd, rs, rt.immediate() / 2); 397 addi(rd, rd, rt.immediate() - (rt.immediate() / 2)); 398 } else { 399 // li handles the relocation. 400 UseScratchRegisterScope temps(this); 401 Register scratch = temps.Acquire(); 402 BlockTrampolinePoolScope block_trampoline_pool(this); 403 Li(scratch, rt.immediate()); 404 add(rd, rs, scratch); 405 } 406 } 407} 408 409void TurboAssembler::Sub32(Register rd, Register rs, const Operand& rt) { 410 if (rt.is_reg()) { 411 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 412 ((rd.code() & 0b11000) == 0b01000) && 413 ((rt.rm().code() & 0b11000) == 0b01000)) { 414 c_subw(rd, rt.rm()); 415 } else { 416 subw(rd, rs, rt.rm()); 417 } 418 } else { 419 DCHECK(is_int32(rt.immediate())); 420 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 421 (rd != zero_reg) && is_int6(-rt.immediate()) && 422 !MustUseReg(rt.rmode())) { 423 c_addiw( 424 rd, 425 static_cast<int8_t>( 426 -rt.immediate())); // No c_subiw instr, use c_addiw(x, y, -imm). 427 } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) { 428 addiw(rd, rs, 429 static_cast<int32_t>( 430 -rt.immediate())); // No subiw instr, use addiw(x, y, -imm). 431 } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) || 432 (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) { 433 addiw(rd, rs, -rt.immediate() / 2); 434 addiw(rd, rd, -rt.immediate() - (-rt.immediate() / 2)); 435 } else { 436 UseScratchRegisterScope temps(this); 437 Register scratch = temps.Acquire(); 438 if (-rt.immediate() >> 12 == 0 && !MustUseReg(rt.rmode())) { 439 // Use load -imm and addu when loading -imm generates one instruction. 440 Li(scratch, -rt.immediate()); 441 addw(rd, rs, scratch); 442 } else { 443 // li handles the relocation. 444 Li(scratch, rt.immediate()); 445 subw(rd, rs, scratch); 446 } 447 } 448 } 449} 450 451void TurboAssembler::Sub64(Register rd, Register rs, const Operand& rt) { 452 if (rt.is_reg()) { 453 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 454 ((rd.code() & 0b11000) == 0b01000) && 455 ((rt.rm().code() & 0b11000) == 0b01000)) { 456 c_sub(rd, rt.rm()); 457 } else { 458 sub(rd, rs, rt.rm()); 459 } 460 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 461 (rd != zero_reg) && is_int6(-rt.immediate()) && 462 (rt.immediate() != 0) && !MustUseReg(rt.rmode())) { 463 c_addi(rd, 464 static_cast<int8_t>( 465 -rt.immediate())); // No c_subi instr, use c_addi(x, y, -imm). 466 467 } else if (FLAG_riscv_c_extension && is_int10(-rt.immediate()) && 468 (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) && 469 (rd.code() == rs.code()) && (rd == sp) && 470 !MustUseReg(rt.rmode())) { 471 c_addi16sp(static_cast<int16_t>(-rt.immediate())); 472 } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) { 473 addi(rd, rs, 474 static_cast<int32_t>( 475 -rt.immediate())); // No subi instr, use addi(x, y, -imm). 476 } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) || 477 (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) { 478 addi(rd, rs, -rt.immediate() / 2); 479 addi(rd, rd, -rt.immediate() - (-rt.immediate() / 2)); 480 } else { 481 int li_count = InstrCountForLi64Bit(rt.immediate()); 482 int li_neg_count = InstrCountForLi64Bit(-rt.immediate()); 483 if (li_neg_count < li_count && !MustUseReg(rt.rmode())) { 484 // Use load -imm and add when loading -imm generates one instruction. 485 DCHECK(rt.immediate() != std::numeric_limits<int32_t>::min()); 486 UseScratchRegisterScope temps(this); 487 Register scratch = temps.Acquire(); 488 Li(scratch, -rt.immediate()); 489 add(rd, rs, scratch); 490 } else { 491 // li handles the relocation. 492 UseScratchRegisterScope temps(this); 493 Register scratch = temps.Acquire(); 494 Li(scratch, rt.immediate()); 495 sub(rd, rs, scratch); 496 } 497 } 498} 499 500void TurboAssembler::Mul32(Register rd, Register rs, const Operand& rt) { 501 if (rt.is_reg()) { 502 mulw(rd, rs, rt.rm()); 503 } else { 504 // li handles the relocation. 505 UseScratchRegisterScope temps(this); 506 Register scratch = temps.Acquire(); 507 Li(scratch, rt.immediate()); 508 mulw(rd, rs, scratch); 509 } 510} 511 512void TurboAssembler::Mulh32(Register rd, Register rs, const Operand& rt) { 513 if (rt.is_reg()) { 514 mul(rd, rs, rt.rm()); 515 } else { 516 // li handles the relocation. 517 UseScratchRegisterScope temps(this); 518 Register scratch = temps.Acquire(); 519 Li(scratch, rt.immediate()); 520 mul(rd, rs, scratch); 521 } 522 srai(rd, rd, 32); 523} 524 525void TurboAssembler::Mulhu32(Register rd, Register rs, const Operand& rt, 526 Register rsz, Register rtz) { 527 slli(rsz, rs, 32); 528 if (rt.is_reg()) { 529 slli(rtz, rt.rm(), 32); 530 } else { 531 Li(rtz, rt.immediate() << 32); 532 } 533 mulhu(rd, rsz, rtz); 534 srai(rd, rd, 32); 535} 536 537void TurboAssembler::Mul64(Register rd, Register rs, const Operand& rt) { 538 if (rt.is_reg()) { 539 mul(rd, rs, rt.rm()); 540 } else { 541 // li handles the relocation. 542 UseScratchRegisterScope temps(this); 543 Register scratch = temps.Acquire(); 544 Li(scratch, rt.immediate()); 545 mul(rd, rs, scratch); 546 } 547} 548 549void TurboAssembler::Mulh64(Register rd, Register rs, const Operand& rt) { 550 if (rt.is_reg()) { 551 mulh(rd, rs, rt.rm()); 552 } else { 553 // li handles the relocation. 554 UseScratchRegisterScope temps(this); 555 Register scratch = temps.Acquire(); 556 Li(scratch, rt.immediate()); 557 mulh(rd, rs, scratch); 558 } 559} 560 561void TurboAssembler::Div32(Register res, Register rs, const Operand& rt) { 562 if (rt.is_reg()) { 563 divw(res, rs, rt.rm()); 564 } else { 565 // li handles the relocation. 566 UseScratchRegisterScope temps(this); 567 Register scratch = temps.Acquire(); 568 Li(scratch, rt.immediate()); 569 divw(res, rs, scratch); 570 } 571} 572 573void TurboAssembler::Mod32(Register rd, Register rs, const Operand& rt) { 574 if (rt.is_reg()) { 575 remw(rd, rs, rt.rm()); 576 } else { 577 // li handles the relocation. 578 UseScratchRegisterScope temps(this); 579 Register scratch = temps.Acquire(); 580 Li(scratch, rt.immediate()); 581 remw(rd, rs, scratch); 582 } 583} 584 585void TurboAssembler::Modu32(Register rd, Register rs, const Operand& rt) { 586 if (rt.is_reg()) { 587 remuw(rd, rs, rt.rm()); 588 } else { 589 // li handles the relocation. 590 UseScratchRegisterScope temps(this); 591 Register scratch = temps.Acquire(); 592 Li(scratch, rt.immediate()); 593 remuw(rd, rs, scratch); 594 } 595} 596 597void TurboAssembler::Div64(Register rd, Register rs, const Operand& rt) { 598 if (rt.is_reg()) { 599 div(rd, rs, rt.rm()); 600 } else { 601 // li handles the relocation. 602 UseScratchRegisterScope temps(this); 603 Register scratch = temps.Acquire(); 604 Li(scratch, rt.immediate()); 605 div(rd, rs, scratch); 606 } 607} 608 609void TurboAssembler::Divu32(Register res, Register rs, const Operand& rt) { 610 if (rt.is_reg()) { 611 divuw(res, rs, rt.rm()); 612 } else { 613 // li handles the relocation. 614 UseScratchRegisterScope temps(this); 615 Register scratch = temps.Acquire(); 616 Li(scratch, rt.immediate()); 617 divuw(res, rs, scratch); 618 } 619} 620 621void TurboAssembler::Divu64(Register res, Register rs, const Operand& rt) { 622 if (rt.is_reg()) { 623 divu(res, rs, rt.rm()); 624 } else { 625 // li handles the relocation. 626 UseScratchRegisterScope temps(this); 627 Register scratch = temps.Acquire(); 628 Li(scratch, rt.immediate()); 629 divu(res, rs, scratch); 630 } 631} 632 633void TurboAssembler::Mod64(Register rd, Register rs, const Operand& rt) { 634 if (rt.is_reg()) { 635 rem(rd, rs, rt.rm()); 636 } else { 637 // li handles the relocation. 638 UseScratchRegisterScope temps(this); 639 Register scratch = temps.Acquire(); 640 Li(scratch, rt.immediate()); 641 rem(rd, rs, scratch); 642 } 643} 644 645void TurboAssembler::Modu64(Register rd, Register rs, const Operand& rt) { 646 if (rt.is_reg()) { 647 remu(rd, rs, rt.rm()); 648 } else { 649 // li handles the relocation. 650 UseScratchRegisterScope temps(this); 651 Register scratch = temps.Acquire(); 652 Li(scratch, rt.immediate()); 653 remu(rd, rs, scratch); 654 } 655} 656 657void TurboAssembler::And(Register rd, Register rs, const Operand& rt) { 658 if (rt.is_reg()) { 659 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 660 ((rd.code() & 0b11000) == 0b01000) && 661 ((rt.rm().code() & 0b11000) == 0b01000)) { 662 c_and(rd, rt.rm()); 663 } else { 664 and_(rd, rs, rt.rm()); 665 } 666 } else { 667 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) && 668 !MustUseReg(rt.rmode()) && (rd.code() == rs.code()) && 669 ((rd.code() & 0b11000) == 0b01000)) { 670 c_andi(rd, static_cast<int8_t>(rt.immediate())); 671 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 672 andi(rd, rs, static_cast<int32_t>(rt.immediate())); 673 } else { 674 // li handles the relocation. 675 UseScratchRegisterScope temps(this); 676 Register scratch = temps.Acquire(); 677 Li(scratch, rt.immediate()); 678 and_(rd, rs, scratch); 679 } 680 } 681} 682 683void TurboAssembler::Or(Register rd, Register rs, const Operand& rt) { 684 if (rt.is_reg()) { 685 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 686 ((rd.code() & 0b11000) == 0b01000) && 687 ((rt.rm().code() & 0b11000) == 0b01000)) { 688 c_or(rd, rt.rm()); 689 } else { 690 or_(rd, rs, rt.rm()); 691 } 692 } else { 693 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 694 ori(rd, rs, static_cast<int32_t>(rt.immediate())); 695 } else { 696 // li handles the relocation. 697 UseScratchRegisterScope temps(this); 698 Register scratch = temps.Acquire(); 699 Li(scratch, rt.immediate()); 700 or_(rd, rs, scratch); 701 } 702 } 703} 704 705void TurboAssembler::Xor(Register rd, Register rs, const Operand& rt) { 706 if (rt.is_reg()) { 707 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 708 ((rd.code() & 0b11000) == 0b01000) && 709 ((rt.rm().code() & 0b11000) == 0b01000)) { 710 c_xor(rd, rt.rm()); 711 } else { 712 xor_(rd, rs, rt.rm()); 713 } 714 } else { 715 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 716 xori(rd, rs, static_cast<int32_t>(rt.immediate())); 717 } else { 718 // li handles the relocation. 719 UseScratchRegisterScope temps(this); 720 Register scratch = temps.Acquire(); 721 Li(scratch, rt.immediate()); 722 xor_(rd, rs, scratch); 723 } 724 } 725} 726 727void TurboAssembler::Nor(Register rd, Register rs, const Operand& rt) { 728 if (rt.is_reg()) { 729 or_(rd, rs, rt.rm()); 730 not_(rd, rd); 731 } else { 732 Or(rd, rs, rt); 733 not_(rd, rd); 734 } 735} 736 737void TurboAssembler::Neg(Register rs, const Operand& rt) { 738 DCHECK(rt.is_reg()); 739 neg(rs, rt.rm()); 740} 741 742void TurboAssembler::Seqz(Register rd, const Operand& rt) { 743 if (rt.is_reg()) { 744 seqz(rd, rt.rm()); 745 } else { 746 li(rd, rt.immediate() == 0); 747 } 748} 749 750void TurboAssembler::Snez(Register rd, const Operand& rt) { 751 if (rt.is_reg()) { 752 snez(rd, rt.rm()); 753 } else { 754 li(rd, rt.immediate() != 0); 755 } 756} 757 758void TurboAssembler::Seq(Register rd, Register rs, const Operand& rt) { 759 if (rs == zero_reg) { 760 Seqz(rd, rt); 761 } else if (IsZero(rt)) { 762 seqz(rd, rs); 763 } else { 764 Sub64(rd, rs, rt); 765 seqz(rd, rd); 766 } 767} 768 769void TurboAssembler::Sne(Register rd, Register rs, const Operand& rt) { 770 if (rs == zero_reg) { 771 Snez(rd, rt); 772 } else if (IsZero(rt)) { 773 snez(rd, rs); 774 } else { 775 Sub64(rd, rs, rt); 776 snez(rd, rd); 777 } 778} 779 780void TurboAssembler::Slt(Register rd, Register rs, const Operand& rt) { 781 if (rt.is_reg()) { 782 slt(rd, rs, rt.rm()); 783 } else { 784 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 785 slti(rd, rs, static_cast<int32_t>(rt.immediate())); 786 } else { 787 // li handles the relocation. 788 UseScratchRegisterScope temps(this); 789 Register scratch = temps.Acquire(); 790 BlockTrampolinePoolScope block_trampoline_pool(this); 791 Li(scratch, rt.immediate()); 792 slt(rd, rs, scratch); 793 } 794 } 795} 796 797void TurboAssembler::Sltu(Register rd, Register rs, const Operand& rt) { 798 if (rt.is_reg()) { 799 sltu(rd, rs, rt.rm()); 800 } else { 801 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) { 802 sltiu(rd, rs, static_cast<int32_t>(rt.immediate())); 803 } else { 804 // li handles the relocation. 805 UseScratchRegisterScope temps(this); 806 Register scratch = temps.Acquire(); 807 BlockTrampolinePoolScope block_trampoline_pool(this); 808 Li(scratch, rt.immediate()); 809 sltu(rd, rs, scratch); 810 } 811 } 812} 813 814void TurboAssembler::Sle(Register rd, Register rs, const Operand& rt) { 815 if (rt.is_reg()) { 816 slt(rd, rt.rm(), rs); 817 } else { 818 // li handles the relocation. 819 UseScratchRegisterScope temps(this); 820 Register scratch = temps.Acquire(); 821 BlockTrampolinePoolScope block_trampoline_pool(this); 822 Li(scratch, rt.immediate()); 823 slt(rd, scratch, rs); 824 } 825 xori(rd, rd, 1); 826} 827 828void TurboAssembler::Sleu(Register rd, Register rs, const Operand& rt) { 829 if (rt.is_reg()) { 830 sltu(rd, rt.rm(), rs); 831 } else { 832 // li handles the relocation. 833 UseScratchRegisterScope temps(this); 834 Register scratch = temps.Acquire(); 835 BlockTrampolinePoolScope block_trampoline_pool(this); 836 Li(scratch, rt.immediate()); 837 sltu(rd, scratch, rs); 838 } 839 xori(rd, rd, 1); 840} 841 842void TurboAssembler::Sge(Register rd, Register rs, const Operand& rt) { 843 Slt(rd, rs, rt); 844 xori(rd, rd, 1); 845} 846 847void TurboAssembler::Sgeu(Register rd, Register rs, const Operand& rt) { 848 Sltu(rd, rs, rt); 849 xori(rd, rd, 1); 850} 851 852void TurboAssembler::Sgt(Register rd, Register rs, const Operand& rt) { 853 if (rt.is_reg()) { 854 slt(rd, rt.rm(), rs); 855 } else { 856 // li handles the relocation. 857 UseScratchRegisterScope temps(this); 858 Register scratch = temps.Acquire(); 859 BlockTrampolinePoolScope block_trampoline_pool(this); 860 Li(scratch, rt.immediate()); 861 slt(rd, scratch, rs); 862 } 863} 864 865void TurboAssembler::Sgtu(Register rd, Register rs, const Operand& rt) { 866 if (rt.is_reg()) { 867 sltu(rd, rt.rm(), rs); 868 } else { 869 // li handles the relocation. 870 UseScratchRegisterScope temps(this); 871 Register scratch = temps.Acquire(); 872 BlockTrampolinePoolScope block_trampoline_pool(this); 873 Li(scratch, rt.immediate()); 874 sltu(rd, scratch, rs); 875 } 876} 877 878void TurboAssembler::Sll32(Register rd, Register rs, const Operand& rt) { 879 if (rt.is_reg()) { 880 sllw(rd, rs, rt.rm()); 881 } else { 882 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 883 slliw(rd, rs, shamt); 884 } 885} 886 887void TurboAssembler::Sra32(Register rd, Register rs, const Operand& rt) { 888 if (rt.is_reg()) { 889 sraw(rd, rs, rt.rm()); 890 } else { 891 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 892 sraiw(rd, rs, shamt); 893 } 894} 895 896void TurboAssembler::Srl32(Register rd, Register rs, const Operand& rt) { 897 if (rt.is_reg()) { 898 srlw(rd, rs, rt.rm()); 899 } else { 900 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 901 srliw(rd, rs, shamt); 902 } 903} 904 905void TurboAssembler::Sra64(Register rd, Register rs, const Operand& rt) { 906 if (rt.is_reg()) { 907 sra(rd, rs, rt.rm()); 908 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 909 ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) { 910 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 911 c_srai(rd, shamt); 912 } else { 913 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 914 srai(rd, rs, shamt); 915 } 916} 917 918void TurboAssembler::Srl64(Register rd, Register rs, const Operand& rt) { 919 if (rt.is_reg()) { 920 srl(rd, rs, rt.rm()); 921 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 922 ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) { 923 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 924 c_srli(rd, shamt); 925 } else { 926 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 927 srli(rd, rs, shamt); 928 } 929} 930 931void TurboAssembler::Sll64(Register rd, Register rs, const Operand& rt) { 932 if (rt.is_reg()) { 933 sll(rd, rs, rt.rm()); 934 } else { 935 uint8_t shamt = static_cast<uint8_t>(rt.immediate()); 936 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) && 937 (rd != zero_reg) && (shamt != 0) && is_uint6(shamt)) { 938 c_slli(rd, shamt); 939 } else { 940 slli(rd, rs, shamt); 941 } 942 } 943} 944 945void TurboAssembler::Li(Register rd, int64_t imm) { 946 if (FLAG_riscv_c_extension && (rd != zero_reg) && is_int6(imm)) { 947 c_li(rd, imm); 948 } else { 949 RV_li(rd, imm); 950 } 951} 952 953void TurboAssembler::Mv(Register rd, const Operand& rt) { 954 if (FLAG_riscv_c_extension && (rd != zero_reg) && (rt.rm() != zero_reg)) { 955 c_mv(rd, rt.rm()); 956 } else { 957 mv(rd, rt.rm()); 958 } 959} 960 961void TurboAssembler::Ror(Register rd, Register rs, const Operand& rt) { 962 UseScratchRegisterScope temps(this); 963 Register scratch = temps.Acquire(); 964 BlockTrampolinePoolScope block_trampoline_pool(this); 965 if (rt.is_reg()) { 966 negw(scratch, rt.rm()); 967 sllw(scratch, rs, scratch); 968 srlw(rd, rs, rt.rm()); 969 or_(rd, scratch, rd); 970 sext_w(rd, rd); 971 } else { 972 int64_t ror_value = rt.immediate() % 32; 973 if (ror_value == 0) { 974 Mv(rd, rs); 975 return; 976 } else if (ror_value < 0) { 977 ror_value += 32; 978 } 979 srliw(scratch, rs, ror_value); 980 slliw(rd, rs, 32 - ror_value); 981 or_(rd, scratch, rd); 982 sext_w(rd, rd); 983 } 984} 985 986void TurboAssembler::Dror(Register rd, Register rs, const Operand& rt) { 987 UseScratchRegisterScope temps(this); 988 Register scratch = temps.Acquire(); 989 BlockTrampolinePoolScope block_trampoline_pool(this); 990 if (rt.is_reg()) { 991 negw(scratch, rt.rm()); 992 sll(scratch, rs, scratch); 993 srl(rd, rs, rt.rm()); 994 or_(rd, scratch, rd); 995 } else { 996 int64_t dror_value = rt.immediate() % 64; 997 if (dror_value == 0) { 998 Mv(rd, rs); 999 return; 1000 } else if (dror_value < 0) { 1001 dror_value += 64; 1002 } 1003 srli(scratch, rs, dror_value); 1004 slli(rd, rs, 64 - dror_value); 1005 or_(rd, scratch, rd); 1006 } 1007} 1008 1009void TurboAssembler::CalcScaledAddress(Register rd, Register rt, Register rs, 1010 uint8_t sa) { 1011 DCHECK(sa >= 1 && sa <= 31); 1012 UseScratchRegisterScope temps(this); 1013 Register tmp = rd == rt ? temps.Acquire() : rd; 1014 DCHECK(tmp != rt); 1015 slli(tmp, rs, sa); 1016 Add64(rd, rt, tmp); 1017} 1018 1019// ------------Pseudo-instructions------------- 1020// Change endianness 1021void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size, 1022 Register scratch) { 1023 DCHECK_NE(scratch, rs); 1024 DCHECK_NE(scratch, rd); 1025 DCHECK(operand_size == 4 || operand_size == 8); 1026 if (operand_size == 4) { 1027 // Uint32_t x1 = 0x00FF00FF; 1028 // x0 = (x0 << 16 | x0 >> 16); 1029 // x0 = (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8)); 1030 UseScratchRegisterScope temps(this); 1031 BlockTrampolinePoolScope block_trampoline_pool(this); 1032 DCHECK((rd != t6) && (rs != t6)); 1033 Register x0 = temps.Acquire(); 1034 Register x1 = temps.Acquire(); 1035 Register x2 = scratch; 1036 li(x1, 0x00FF00FF); 1037 slliw(x0, rs, 16); 1038 srliw(rd, rs, 16); 1039 or_(x0, rd, x0); // x0 <- x0 << 16 | x0 >> 16 1040 and_(x2, x0, x1); // x2 <- x0 & 0x00FF00FF 1041 slliw(x2, x2, 8); // x2 <- (x0 & x1) << 8 1042 slliw(x1, x1, 8); // x1 <- 0xFF00FF00 1043 and_(rd, x0, x1); // x0 & 0xFF00FF00 1044 srliw(rd, rd, 8); 1045 or_(rd, rd, x2); // (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8)) 1046 } else { 1047 // uinx24_t x1 = 0x0000FFFF0000FFFFl; 1048 // uinx24_t x1 = 0x00FF00FF00FF00FFl; 1049 // x0 = (x0 << 32 | x0 >> 32); 1050 // x0 = (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16; 1051 // x0 = (x0 & x1) << 8 | (x0 & (x1 << 8)) >> 8; 1052 UseScratchRegisterScope temps(this); 1053 BlockTrampolinePoolScope block_trampoline_pool(this); 1054 DCHECK((rd != t6) && (rs != t6)); 1055 Register x0 = temps.Acquire(); 1056 Register x1 = temps.Acquire(); 1057 Register x2 = scratch; 1058 li(x1, 0x0000FFFF0000FFFFl); 1059 slli(x0, rs, 32); 1060 srli(rd, rs, 32); 1061 or_(x0, rd, x0); // x0 <- x0 << 32 | x0 >> 32 1062 and_(x2, x0, x1); // x2 <- x0 & 0x0000FFFF0000FFFF 1063 slli(x2, x2, 16); // x2 <- (x0 & 0x0000FFFF0000FFFF) << 16 1064 slli(x1, x1, 16); // x1 <- 0xFFFF0000FFFF0000 1065 and_(rd, x0, x1); // rd <- x0 & 0xFFFF0000FFFF0000 1066 srli(rd, rd, 16); // rd <- x0 & (x1 << 16)) >> 16 1067 or_(x0, rd, x2); // (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16; 1068 li(x1, 0x00FF00FF00FF00FFl); 1069 and_(x2, x0, x1); // x2 <- x0 & 0x00FF00FF00FF00FF 1070 slli(x2, x2, 8); // x2 <- (x0 & x1) << 8 1071 slli(x1, x1, 8); // x1 <- 0xFF00FF00FF00FF00 1072 and_(rd, x0, x1); 1073 srli(rd, rd, 8); // rd <- (x0 & (x1 << 8)) >> 8 1074 or_(rd, rd, x2); // (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8)) 1075 } 1076} 1077 1078template <int NBYTES, bool LOAD_SIGNED> 1079void TurboAssembler::LoadNBytes(Register rd, const MemOperand& rs, 1080 Register scratch) { 1081 DCHECK(rd != rs.rm() && rd != scratch); 1082 DCHECK_LE(NBYTES, 8); 1083 1084 // load the most significant byte 1085 if (LOAD_SIGNED) { 1086 lb(rd, rs.rm(), rs.offset() + (NBYTES - 1)); 1087 } else { 1088 lbu(rd, rs.rm(), rs.offset() + (NBYTES - 1)); 1089 } 1090 1091 // load remaining (nbytes-1) bytes from higher to lower 1092 slli(rd, rd, 8 * (NBYTES - 1)); 1093 for (int i = (NBYTES - 2); i >= 0; i--) { 1094 lbu(scratch, rs.rm(), rs.offset() + i); 1095 if (i) slli(scratch, scratch, i * 8); 1096 or_(rd, rd, scratch); 1097 } 1098} 1099 1100template <int NBYTES, bool LOAD_SIGNED> 1101void TurboAssembler::LoadNBytesOverwritingBaseReg(const MemOperand& rs, 1102 Register scratch0, 1103 Register scratch1) { 1104 // This function loads nbytes from memory specified by rs and into rs.rm() 1105 DCHECK(rs.rm() != scratch0 && rs.rm() != scratch1 && scratch0 != scratch1); 1106 DCHECK_LE(NBYTES, 8); 1107 1108 // load the most significant byte 1109 if (LOAD_SIGNED) { 1110 lb(scratch0, rs.rm(), rs.offset() + (NBYTES - 1)); 1111 } else { 1112 lbu(scratch0, rs.rm(), rs.offset() + (NBYTES - 1)); 1113 } 1114 1115 // load remaining (nbytes-1) bytes from higher to lower 1116 slli(scratch0, scratch0, 8 * (NBYTES - 1)); 1117 for (int i = (NBYTES - 2); i >= 0; i--) { 1118 lbu(scratch1, rs.rm(), rs.offset() + i); 1119 if (i) { 1120 slli(scratch1, scratch1, i * 8); 1121 or_(scratch0, scratch0, scratch1); 1122 } else { 1123 // write to rs.rm() when processing the last byte 1124 or_(rs.rm(), scratch0, scratch1); 1125 } 1126 } 1127} 1128 1129template <int NBYTES, bool IS_SIGNED> 1130void TurboAssembler::UnalignedLoadHelper(Register rd, const MemOperand& rs) { 1131 BlockTrampolinePoolScope block_trampoline_pool(this); 1132 UseScratchRegisterScope temps(this); 1133 1134 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) { 1135 // Adjust offset for two accesses and check if offset + 3 fits into int12. 1136 MemOperand source = rs; 1137 Register scratch_base = temps.Acquire(); 1138 DCHECK(scratch_base != rs.rm()); 1139 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES, 1140 NBYTES - 1); 1141 1142 // Since source.rm() is scratch_base, assume rd != source.rm() 1143 DCHECK(rd != source.rm()); 1144 Register scratch_other = temps.Acquire(); 1145 LoadNBytes<NBYTES, IS_SIGNED>(rd, source, scratch_other); 1146 } else { 1147 // no need to adjust base-and-offset 1148 if (rd != rs.rm()) { 1149 Register scratch = temps.Acquire(); 1150 LoadNBytes<NBYTES, IS_SIGNED>(rd, rs, scratch); 1151 } else { // rd == rs.rm() 1152 Register scratch = temps.Acquire(); 1153 Register scratch2 = temps.Acquire(); 1154 LoadNBytesOverwritingBaseReg<NBYTES, IS_SIGNED>(rs, scratch, scratch2); 1155 } 1156 } 1157} 1158 1159template <int NBYTES> 1160void TurboAssembler::UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs, 1161 Register scratch_base) { 1162 DCHECK(NBYTES == 4 || NBYTES == 8); 1163 DCHECK_NE(scratch_base, rs.rm()); 1164 BlockTrampolinePoolScope block_trampoline_pool(this); 1165 MemOperand source = rs; 1166 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) { 1167 // Adjust offset for two accesses and check if offset + 3 fits into int12. 1168 DCHECK(scratch_base != rs.rm()); 1169 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES, 1170 NBYTES - 1); 1171 } 1172 UseScratchRegisterScope temps(this); 1173 Register scratch_other = temps.Acquire(); 1174 Register scratch = temps.Acquire(); 1175 DCHECK(scratch != rs.rm() && scratch_other != scratch && 1176 scratch_other != rs.rm()); 1177 LoadNBytes<NBYTES, true>(scratch, source, scratch_other); 1178 if (NBYTES == 4) 1179 fmv_w_x(frd, scratch); 1180 else 1181 fmv_d_x(frd, scratch); 1182} 1183 1184template <int NBYTES> 1185void TurboAssembler::UnalignedStoreHelper(Register rd, const MemOperand& rs, 1186 Register scratch_other) { 1187 DCHECK(scratch_other != rs.rm()); 1188 DCHECK_LE(NBYTES, 8); 1189 MemOperand source = rs; 1190 UseScratchRegisterScope temps(this); 1191 Register scratch_base = temps.Acquire(); 1192 // Adjust offset for two accesses and check if offset + 3 fits into int12. 1193 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) { 1194 DCHECK(scratch_base != rd && scratch_base != rs.rm()); 1195 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES, 1196 NBYTES - 1); 1197 } 1198 1199 BlockTrampolinePoolScope block_trampoline_pool(this); 1200 if (scratch_other == no_reg) { 1201 if (temps.hasAvailable()) { 1202 scratch_other = temps.Acquire(); 1203 } else { 1204 push(t2); 1205 scratch_other = t2; 1206 } 1207 } 1208 1209 DCHECK(scratch_other != rd && scratch_other != rs.rm() && 1210 scratch_other != source.rm()); 1211 1212 sb(rd, source.rm(), source.offset()); 1213 for (size_t i = 1; i <= (NBYTES - 1); i++) { 1214 srli(scratch_other, rd, i * 8); 1215 sb(scratch_other, source.rm(), source.offset() + i); 1216 } 1217 if (scratch_other == t2) { 1218 pop(t2); 1219 } 1220} 1221 1222template <int NBYTES> 1223void TurboAssembler::UnalignedFStoreHelper(FPURegister frd, 1224 const MemOperand& rs, 1225 Register scratch) { 1226 DCHECK(NBYTES == 8 || NBYTES == 4); 1227 DCHECK_NE(scratch, rs.rm()); 1228 if (NBYTES == 4) { 1229 fmv_x_w(scratch, frd); 1230 } else { 1231 fmv_x_d(scratch, frd); 1232 } 1233 UnalignedStoreHelper<NBYTES>(scratch, rs); 1234} 1235 1236template <typename Reg_T, typename Func> 1237void TurboAssembler::AlignedLoadHelper(Reg_T target, const MemOperand& rs, 1238 Func generator) { 1239 MemOperand source = rs; 1240 UseScratchRegisterScope temps(this); 1241 BlockTrampolinePoolScope block_trampoline_pool(this); 1242 if (NeedAdjustBaseAndOffset(source)) { 1243 Register scratch = temps.Acquire(); 1244 DCHECK(scratch != rs.rm()); 1245 AdjustBaseAndOffset(&source, scratch); 1246 } 1247 generator(target, source); 1248} 1249 1250template <typename Reg_T, typename Func> 1251void TurboAssembler::AlignedStoreHelper(Reg_T value, const MemOperand& rs, 1252 Func generator) { 1253 MemOperand source = rs; 1254 UseScratchRegisterScope temps(this); 1255 BlockTrampolinePoolScope block_trampoline_pool(this); 1256 if (NeedAdjustBaseAndOffset(source)) { 1257 Register scratch = temps.Acquire(); 1258 // make sure scratch does not overwrite value 1259 if (std::is_same<Reg_T, Register>::value) 1260 DCHECK(scratch.code() != value.code()); 1261 DCHECK(scratch != rs.rm()); 1262 AdjustBaseAndOffset(&source, scratch); 1263 } 1264 generator(value, source); 1265} 1266 1267void TurboAssembler::Ulw(Register rd, const MemOperand& rs) { 1268 UnalignedLoadHelper<4, true>(rd, rs); 1269} 1270 1271void TurboAssembler::Ulwu(Register rd, const MemOperand& rs) { 1272 UnalignedLoadHelper<4, false>(rd, rs); 1273} 1274 1275void TurboAssembler::Usw(Register rd, const MemOperand& rs) { 1276 UnalignedStoreHelper<4>(rd, rs); 1277} 1278 1279void TurboAssembler::Ulh(Register rd, const MemOperand& rs) { 1280 UnalignedLoadHelper<2, true>(rd, rs); 1281} 1282 1283void TurboAssembler::Ulhu(Register rd, const MemOperand& rs) { 1284 UnalignedLoadHelper<2, false>(rd, rs); 1285} 1286 1287void TurboAssembler::Ush(Register rd, const MemOperand& rs) { 1288 UnalignedStoreHelper<2>(rd, rs); 1289} 1290 1291void TurboAssembler::Uld(Register rd, const MemOperand& rs) { 1292 UnalignedLoadHelper<8, true>(rd, rs); 1293} 1294 1295// Load consequent 32-bit word pair in 64-bit reg. and put first word in low 1296// bits, 1297// second word in high bits. 1298void MacroAssembler::LoadWordPair(Register rd, const MemOperand& rs) { 1299 UseScratchRegisterScope temps(this); 1300 Register scratch = temps.Acquire(); 1301 Lwu(rd, rs); 1302 Lw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2)); 1303 slli(scratch, scratch, 32); 1304 Add64(rd, rd, scratch); 1305} 1306 1307void TurboAssembler::Usd(Register rd, const MemOperand& rs) { 1308 UnalignedStoreHelper<8>(rd, rs); 1309} 1310 1311// Do 64-bit store as two consequent 32-bit stores to unaligned address. 1312void MacroAssembler::StoreWordPair(Register rd, const MemOperand& rs) { 1313 UseScratchRegisterScope temps(this); 1314 Register scratch = temps.Acquire(); 1315 Sw(rd, rs); 1316 srai(scratch, rd, 32); 1317 Sw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2)); 1318} 1319 1320void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs, 1321 Register scratch) { 1322 DCHECK_NE(scratch, rs.rm()); 1323 UnalignedFLoadHelper<4>(fd, rs, scratch); 1324} 1325 1326void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs, 1327 Register scratch) { 1328 DCHECK_NE(scratch, rs.rm()); 1329 UnalignedFStoreHelper<4>(fd, rs, scratch); 1330} 1331 1332void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs, 1333 Register scratch) { 1334 DCHECK_NE(scratch, rs.rm()); 1335 UnalignedFLoadHelper<8>(fd, rs, scratch); 1336} 1337 1338void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs, 1339 Register scratch) { 1340 DCHECK_NE(scratch, rs.rm()); 1341 UnalignedFStoreHelper<8>(fd, rs, scratch); 1342} 1343 1344void TurboAssembler::Lb(Register rd, const MemOperand& rs) { 1345 auto fn = [this](Register target, const MemOperand& source) { 1346 this->lb(target, source.rm(), source.offset()); 1347 }; 1348 AlignedLoadHelper(rd, rs, fn); 1349} 1350 1351void TurboAssembler::Lbu(Register rd, const MemOperand& rs) { 1352 auto fn = [this](Register target, const MemOperand& source) { 1353 this->lbu(target, source.rm(), source.offset()); 1354 }; 1355 AlignedLoadHelper(rd, rs, fn); 1356} 1357 1358void TurboAssembler::Sb(Register rd, const MemOperand& rs) { 1359 auto fn = [this](Register value, const MemOperand& source) { 1360 this->sb(value, source.rm(), source.offset()); 1361 }; 1362 AlignedStoreHelper(rd, rs, fn); 1363} 1364 1365void TurboAssembler::Lh(Register rd, const MemOperand& rs) { 1366 auto fn = [this](Register target, const MemOperand& source) { 1367 this->lh(target, source.rm(), source.offset()); 1368 }; 1369 AlignedLoadHelper(rd, rs, fn); 1370} 1371 1372void TurboAssembler::Lhu(Register rd, const MemOperand& rs) { 1373 auto fn = [this](Register target, const MemOperand& source) { 1374 this->lhu(target, source.rm(), source.offset()); 1375 }; 1376 AlignedLoadHelper(rd, rs, fn); 1377} 1378 1379void TurboAssembler::Sh(Register rd, const MemOperand& rs) { 1380 auto fn = [this](Register value, const MemOperand& source) { 1381 this->sh(value, source.rm(), source.offset()); 1382 }; 1383 AlignedStoreHelper(rd, rs, fn); 1384} 1385 1386void TurboAssembler::Lw(Register rd, const MemOperand& rs) { 1387 auto fn = [this](Register target, const MemOperand& source) { 1388 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) && 1389 ((source.rm().code() & 0b11000) == 0b01000) && 1390 is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) { 1391 this->c_lw(target, source.rm(), source.offset()); 1392 } else if (FLAG_riscv_c_extension && (target != zero_reg) && 1393 is_uint8(source.offset()) && (source.rm() == sp) && 1394 ((source.offset() & 0x3) == 0)) { 1395 this->c_lwsp(target, source.offset()); 1396 } else { 1397 this->lw(target, source.rm(), source.offset()); 1398 } 1399 }; 1400 AlignedLoadHelper(rd, rs, fn); 1401} 1402 1403void TurboAssembler::Lwu(Register rd, const MemOperand& rs) { 1404 auto fn = [this](Register target, const MemOperand& source) { 1405 this->lwu(target, source.rm(), source.offset()); 1406 }; 1407 AlignedLoadHelper(rd, rs, fn); 1408} 1409 1410void TurboAssembler::Sw(Register rd, const MemOperand& rs) { 1411 auto fn = [this](Register value, const MemOperand& source) { 1412 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) && 1413 ((source.rm().code() & 0b11000) == 0b01000) && 1414 is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) { 1415 this->c_sw(value, source.rm(), source.offset()); 1416 } else if (FLAG_riscv_c_extension && (source.rm() == sp) && 1417 is_uint8(source.offset()) && (((source.offset() & 0x3) == 0))) { 1418 this->c_swsp(value, source.offset()); 1419 } else { 1420 this->sw(value, source.rm(), source.offset()); 1421 } 1422 }; 1423 AlignedStoreHelper(rd, rs, fn); 1424} 1425 1426void TurboAssembler::Ld(Register rd, const MemOperand& rs) { 1427 auto fn = [this](Register target, const MemOperand& source) { 1428 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) && 1429 ((source.rm().code() & 0b11000) == 0b01000) && 1430 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) { 1431 this->c_ld(target, source.rm(), source.offset()); 1432 } else if (FLAG_riscv_c_extension && (target != zero_reg) && 1433 is_uint9(source.offset()) && (source.rm() == sp) && 1434 ((source.offset() & 0x7) == 0)) { 1435 this->c_ldsp(target, source.offset()); 1436 } else { 1437 this->ld(target, source.rm(), source.offset()); 1438 } 1439 }; 1440 AlignedLoadHelper(rd, rs, fn); 1441} 1442 1443void TurboAssembler::Sd(Register rd, const MemOperand& rs) { 1444 auto fn = [this](Register value, const MemOperand& source) { 1445 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) && 1446 ((source.rm().code() & 0b11000) == 0b01000) && 1447 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) { 1448 this->c_sd(value, source.rm(), source.offset()); 1449 } else if (FLAG_riscv_c_extension && (source.rm() == sp) && 1450 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) { 1451 this->c_sdsp(value, source.offset()); 1452 } else { 1453 this->sd(value, source.rm(), source.offset()); 1454 } 1455 }; 1456 AlignedStoreHelper(rd, rs, fn); 1457} 1458 1459void TurboAssembler::LoadFloat(FPURegister fd, const MemOperand& src) { 1460 auto fn = [this](FPURegister target, const MemOperand& source) { 1461 this->flw(target, source.rm(), source.offset()); 1462 }; 1463 AlignedLoadHelper(fd, src, fn); 1464} 1465 1466void TurboAssembler::StoreFloat(FPURegister fs, const MemOperand& src) { 1467 auto fn = [this](FPURegister value, const MemOperand& source) { 1468 this->fsw(value, source.rm(), source.offset()); 1469 }; 1470 AlignedStoreHelper(fs, src, fn); 1471} 1472 1473void TurboAssembler::LoadDouble(FPURegister fd, const MemOperand& src) { 1474 auto fn = [this](FPURegister target, const MemOperand& source) { 1475 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) && 1476 ((source.rm().code() & 0b11000) == 0b01000) && 1477 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) { 1478 this->c_fld(target, source.rm(), source.offset()); 1479 } else if (FLAG_riscv_c_extension && (source.rm() == sp) && 1480 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) { 1481 this->c_fldsp(target, source.offset()); 1482 } else { 1483 this->fld(target, source.rm(), source.offset()); 1484 } 1485 }; 1486 AlignedLoadHelper(fd, src, fn); 1487} 1488 1489void TurboAssembler::StoreDouble(FPURegister fs, const MemOperand& src) { 1490 auto fn = [this](FPURegister value, const MemOperand& source) { 1491 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) && 1492 ((source.rm().code() & 0b11000) == 0b01000) && 1493 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) { 1494 this->c_fsd(value, source.rm(), source.offset()); 1495 } else if (FLAG_riscv_c_extension && (source.rm() == sp) && 1496 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) { 1497 this->c_fsdsp(value, source.offset()); 1498 } else { 1499 this->fsd(value, source.rm(), source.offset()); 1500 } 1501 }; 1502 AlignedStoreHelper(fs, src, fn); 1503} 1504 1505void TurboAssembler::Ll(Register rd, const MemOperand& rs) { 1506 bool is_one_instruction = rs.offset() == 0; 1507 if (is_one_instruction) { 1508 lr_w(false, false, rd, rs.rm()); 1509 } else { 1510 UseScratchRegisterScope temps(this); 1511 Register scratch = temps.Acquire(); 1512 Add64(scratch, rs.rm(), rs.offset()); 1513 lr_w(false, false, rd, scratch); 1514 } 1515} 1516 1517void TurboAssembler::Lld(Register rd, const MemOperand& rs) { 1518 bool is_one_instruction = rs.offset() == 0; 1519 if (is_one_instruction) { 1520 lr_d(false, false, rd, rs.rm()); 1521 } else { 1522 UseScratchRegisterScope temps(this); 1523 Register scratch = temps.Acquire(); 1524 Add64(scratch, rs.rm(), rs.offset()); 1525 lr_d(false, false, rd, scratch); 1526 } 1527} 1528 1529void TurboAssembler::Sc(Register rd, const MemOperand& rs) { 1530 bool is_one_instruction = rs.offset() == 0; 1531 if (is_one_instruction) { 1532 sc_w(false, false, rd, rs.rm(), rd); 1533 } else { 1534 UseScratchRegisterScope temps(this); 1535 Register scratch = temps.Acquire(); 1536 Add64(scratch, rs.rm(), rs.offset()); 1537 sc_w(false, false, rd, scratch, rd); 1538 } 1539} 1540 1541void TurboAssembler::Scd(Register rd, const MemOperand& rs) { 1542 bool is_one_instruction = rs.offset() == 0; 1543 if (is_one_instruction) { 1544 sc_d(false, false, rd, rs.rm(), rd); 1545 } else { 1546 UseScratchRegisterScope temps(this); 1547 Register scratch = temps.Acquire(); 1548 Add64(scratch, rs.rm(), rs.offset()); 1549 sc_d(false, false, rd, scratch, rd); 1550 } 1551} 1552 1553void TurboAssembler::li(Register dst, Handle<HeapObject> value, 1554 RelocInfo::Mode rmode) { 1555 // TODO(jgruber,v8:8887): Also consider a root-relative load when generating 1556 // non-isolate-independent code. In many cases it might be cheaper than 1557 // embedding the relocatable value. 1558 if (root_array_available_ && options().isolate_independent_code) { 1559 IndirectLoadConstant(dst, value); 1560 return; 1561 } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) { 1562 EmbeddedObjectIndex index = AddEmbeddedObject(value); 1563 DCHECK(is_uint32(index)); 1564 li(dst, Operand(index, rmode)); 1565 } else { 1566 DCHECK(RelocInfo::IsFullEmbeddedObject(rmode)); 1567 li(dst, Operand(value.address(), rmode)); 1568 } 1569} 1570 1571void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) { 1572 // TODO(jgruber,v8:8887): Also consider a root-relative load when generating 1573 // non-isolate-independent code. In many cases it might be cheaper than 1574 // embedding the relocatable value. 1575 if (root_array_available_ && options().isolate_independent_code) { 1576 IndirectLoadExternalReference(dst, value); 1577 return; 1578 } 1579 li(dst, Operand(value), mode); 1580} 1581 1582void TurboAssembler::li(Register dst, const StringConstantBase* string, 1583 LiFlags mode) { 1584 li(dst, Operand::EmbeddedStringConstant(string), mode); 1585} 1586 1587static inline int InstrCountForLiLower32Bit(int64_t value) { 1588 int64_t Hi20 = ((value + 0x800) >> 12); 1589 int64_t Lo12 = value << 52 >> 52; 1590 if (Hi20 == 0 || Lo12 == 0) { 1591 return 1; 1592 } 1593 return 2; 1594} 1595 1596int TurboAssembler::InstrCountForLi64Bit(int64_t value) { 1597 if (is_int32(value + 0x800)) { 1598 return InstrCountForLiLower32Bit(value); 1599 } else { 1600 return li_estimate(value); 1601 } 1602 UNREACHABLE(); 1603 return INT_MAX; 1604} 1605 1606void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) { 1607 DCHECK(!j.is_reg()); 1608 DCHECK(!MustUseReg(j.rmode())); 1609 DCHECK(mode == OPTIMIZE_SIZE); 1610 Li(rd, j.immediate()); 1611} 1612 1613void TurboAssembler::li(Register rd, Operand j, LiFlags mode) { 1614 DCHECK(!j.is_reg()); 1615 BlockTrampolinePoolScope block_trampoline_pool(this); 1616 if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) { 1617 UseScratchRegisterScope temps(this); 1618 int count = li_estimate(j.immediate(), temps.hasAvailable()); 1619 int reverse_count = li_estimate(~j.immediate(), temps.hasAvailable()); 1620 if (FLAG_riscv_constant_pool && count >= 4 && reverse_count >= 4) { 1621 // Ld a Address from a constant pool. 1622 RecordEntry((uint64_t)j.immediate(), j.rmode()); 1623 auipc(rd, 0); 1624 // Record a value into constant pool. 1625 ld(rd, rd, 0); 1626 } else { 1627 if ((count - reverse_count) > 1) { 1628 Li(rd, ~j.immediate()); 1629 not_(rd, rd); 1630 } else { 1631 Li(rd, j.immediate()); 1632 } 1633 } 1634 } else if (MustUseReg(j.rmode())) { 1635 int64_t immediate; 1636 if (j.IsHeapObjectRequest()) { 1637 RequestHeapObject(j.heap_object_request()); 1638 immediate = 0; 1639 } else { 1640 immediate = j.immediate(); 1641 } 1642 1643 RecordRelocInfo(j.rmode(), immediate); 1644 li_ptr(rd, immediate); 1645 } else if (mode == ADDRESS_LOAD) { 1646 // We always need the same number of instructions as we may need to patch 1647 // this code to load another value which may need all 6 instructions. 1648 RecordRelocInfo(j.rmode()); 1649 li_ptr(rd, j.immediate()); 1650 } else { // Always emit the same 48 bit instruction 1651 // sequence. 1652 li_ptr(rd, j.immediate()); 1653 } 1654} 1655 1656static RegList t_regs = {t0, t1, t2, t3, t4, t5, t6}; 1657static RegList a_regs = {a0, a1, a2, a3, a4, a5, a6, a7}; 1658static RegList s_regs = {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11}; 1659 1660void TurboAssembler::MultiPush(RegList regs) { 1661 int16_t num_to_push = regs.Count(); 1662 int16_t stack_offset = num_to_push * kSystemPointerSize; 1663 1664#define TEST_AND_PUSH_REG(reg) \ 1665 if (regs.has(reg)) { \ 1666 stack_offset -= kSystemPointerSize; \ 1667 Sd(reg, MemOperand(sp, stack_offset)); \ 1668 regs.clear(reg); \ 1669 } 1670 1671#define T_REGS(V) V(t6) V(t5) V(t4) V(t3) V(t2) V(t1) V(t0) 1672#define A_REGS(V) V(a7) V(a6) V(a5) V(a4) V(a3) V(a2) V(a1) V(a0) 1673#define S_REGS(V) \ 1674 V(s11) V(s10) V(s9) V(s8) V(s7) V(s6) V(s5) V(s4) V(s3) V(s2) V(s1) 1675 1676 Sub64(sp, sp, Operand(stack_offset)); 1677 1678 // Certain usage of MultiPush requires that registers are pushed onto the 1679 // stack in a particular: ra, fp, sp, gp, .... (basically in the decreasing 1680 // order of register numbers according to MIPS register numbers) 1681 TEST_AND_PUSH_REG(ra); 1682 TEST_AND_PUSH_REG(fp); 1683 TEST_AND_PUSH_REG(sp); 1684 TEST_AND_PUSH_REG(gp); 1685 TEST_AND_PUSH_REG(tp); 1686 if (!(regs & s_regs).is_empty()) { 1687 S_REGS(TEST_AND_PUSH_REG) 1688 } 1689 if (!(regs & a_regs).is_empty()) { 1690 A_REGS(TEST_AND_PUSH_REG) 1691 } 1692 if (!(regs & t_regs).is_empty()) { 1693 T_REGS(TEST_AND_PUSH_REG) 1694 } 1695 1696 DCHECK(regs.is_empty()); 1697 1698#undef TEST_AND_PUSH_REG 1699#undef T_REGS 1700#undef A_REGS 1701#undef S_REGS 1702} 1703 1704void TurboAssembler::MultiPop(RegList regs) { 1705 int16_t stack_offset = 0; 1706 1707#define TEST_AND_POP_REG(reg) \ 1708 if (regs.has(reg)) { \ 1709 Ld(reg, MemOperand(sp, stack_offset)); \ 1710 stack_offset += kSystemPointerSize; \ 1711 regs.clear(reg); \ 1712 } 1713 1714#define T_REGS(V) V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(t6) 1715#define A_REGS(V) V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7) 1716#define S_REGS(V) \ 1717 V(s1) V(s2) V(s3) V(s4) V(s5) V(s6) V(s7) V(s8) V(s9) V(s10) V(s11) 1718 1719 // MultiPop pops from the stack in reverse order as MultiPush 1720 if (!(regs & t_regs).is_empty()) { 1721 T_REGS(TEST_AND_POP_REG) 1722 } 1723 if (!(regs & a_regs).is_empty()) { 1724 A_REGS(TEST_AND_POP_REG) 1725 } 1726 if (!(regs & s_regs).is_empty()) { 1727 S_REGS(TEST_AND_POP_REG) 1728 } 1729 TEST_AND_POP_REG(tp); 1730 TEST_AND_POP_REG(gp); 1731 TEST_AND_POP_REG(sp); 1732 TEST_AND_POP_REG(fp); 1733 TEST_AND_POP_REG(ra); 1734 1735 DCHECK(regs.is_empty()); 1736 1737 addi(sp, sp, stack_offset); 1738 1739#undef TEST_AND_POP_REG 1740#undef T_REGS 1741#undef S_REGS 1742#undef A_REGS 1743} 1744 1745void TurboAssembler::MultiPushFPU(DoubleRegList regs) { 1746 int16_t num_to_push = regs.Count(); 1747 int16_t stack_offset = num_to_push * kDoubleSize; 1748 1749 Sub64(sp, sp, Operand(stack_offset)); 1750 for (int16_t i = kNumRegisters - 1; i >= 0; i--) { 1751 if ((regs.bits() & (1 << i)) != 0) { 1752 stack_offset -= kDoubleSize; 1753 StoreDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset)); 1754 } 1755 } 1756} 1757 1758void TurboAssembler::MultiPopFPU(DoubleRegList regs) { 1759 int16_t stack_offset = 0; 1760 1761 for (int16_t i = 0; i < kNumRegisters; i++) { 1762 if ((regs.bits() & (1 << i)) != 0) { 1763 LoadDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset)); 1764 stack_offset += kDoubleSize; 1765 } 1766 } 1767 addi(sp, sp, stack_offset); 1768} 1769 1770void TurboAssembler::ExtractBits(Register rt, Register rs, uint16_t pos, 1771 uint16_t size, bool sign_extend) { 1772 DCHECK(pos < 64 && 0 < size && size <= 64 && 0 < pos + size && 1773 pos + size <= 64); 1774 slli(rt, rs, 64 - (pos + size)); 1775 if (sign_extend) { 1776 srai(rt, rt, 64 - size); 1777 } else { 1778 srli(rt, rt, 64 - size); 1779 } 1780} 1781 1782void TurboAssembler::InsertBits(Register dest, Register source, Register pos, 1783 int size) { 1784 DCHECK_LT(size, 64); 1785 UseScratchRegisterScope temps(this); 1786 Register mask = temps.Acquire(); 1787 BlockTrampolinePoolScope block_trampoline_pool(this); 1788 Register source_ = temps.Acquire(); 1789 // Create a mask of the length=size. 1790 li(mask, 1); 1791 slli(mask, mask, size); 1792 addi(mask, mask, -1); 1793 and_(source_, mask, source); 1794 sll(source_, source_, pos); 1795 // Make a mask containing 0's. 0's start at "pos" with length=size. 1796 sll(mask, mask, pos); 1797 not_(mask, mask); 1798 // cut area for insertion of source. 1799 and_(dest, mask, dest); 1800 // insert source 1801 or_(dest, dest, source_); 1802} 1803 1804void TurboAssembler::Neg_s(FPURegister fd, FPURegister fs) { fneg_s(fd, fs); } 1805 1806void TurboAssembler::Neg_d(FPURegister fd, FPURegister fs) { fneg_d(fd, fs); } 1807 1808void TurboAssembler::Cvt_d_uw(FPURegister fd, Register rs) { 1809 // Convert rs to a FP value in fd. 1810 fcvt_d_wu(fd, rs); 1811} 1812 1813void TurboAssembler::Cvt_d_w(FPURegister fd, Register rs) { 1814 // Convert rs to a FP value in fd. 1815 fcvt_d_w(fd, rs); 1816} 1817 1818void TurboAssembler::Cvt_d_ul(FPURegister fd, Register rs) { 1819 // Convert rs to a FP value in fd. 1820 fcvt_d_lu(fd, rs); 1821} 1822 1823void TurboAssembler::Cvt_s_uw(FPURegister fd, Register rs) { 1824 // Convert rs to a FP value in fd. 1825 fcvt_s_wu(fd, rs); 1826} 1827 1828void TurboAssembler::Cvt_s_w(FPURegister fd, Register rs) { 1829 // Convert rs to a FP value in fd. 1830 fcvt_s_w(fd, rs); 1831} 1832 1833void TurboAssembler::Cvt_s_ul(FPURegister fd, Register rs) { 1834 // Convert rs to a FP value in fd. 1835 fcvt_s_lu(fd, rs); 1836} 1837 1838template <typename CvtFunc> 1839void TurboAssembler::RoundFloatingPointToInteger(Register rd, FPURegister fs, 1840 Register result, 1841 CvtFunc fcvt_generator) { 1842 // Save csr_fflags to scratch & clear exception flags 1843 if (result.is_valid()) { 1844 BlockTrampolinePoolScope block_trampoline_pool(this); 1845 UseScratchRegisterScope temps(this); 1846 Register scratch = temps.Acquire(); 1847 1848 int exception_flags = kInvalidOperation; 1849 csrrci(scratch, csr_fflags, exception_flags); 1850 1851 // actual conversion instruction 1852 fcvt_generator(this, rd, fs); 1853 1854 // check kInvalidOperation flag (out-of-range, NaN) 1855 // set result to 1 if normal, otherwise set result to 0 for abnormal 1856 frflags(result); 1857 andi(result, result, exception_flags); 1858 seqz(result, result); // result <-- 1 (normal), result <-- 0 (abnormal) 1859 1860 // restore csr_fflags 1861 csrw(csr_fflags, scratch); 1862 } else { 1863 // actual conversion instruction 1864 fcvt_generator(this, rd, fs); 1865 } 1866} 1867 1868void TurboAssembler::Clear_if_nan_d(Register rd, FPURegister fs) { 1869 Label no_nan; 1870 feq_d(kScratchReg, fs, fs); 1871 bnez(kScratchReg, &no_nan); 1872 Move(rd, zero_reg); 1873 bind(&no_nan); 1874} 1875 1876void TurboAssembler::Clear_if_nan_s(Register rd, FPURegister fs) { 1877 Label no_nan; 1878 feq_s(kScratchReg, fs, fs); 1879 bnez(kScratchReg, &no_nan); 1880 Move(rd, zero_reg); 1881 bind(&no_nan); 1882} 1883 1884void TurboAssembler::Trunc_uw_d(Register rd, FPURegister fs, Register result) { 1885 RoundFloatingPointToInteger( 1886 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1887 tasm->fcvt_wu_d(dst, src, RTZ); 1888 }); 1889} 1890 1891void TurboAssembler::Trunc_w_d(Register rd, FPURegister fs, Register result) { 1892 RoundFloatingPointToInteger( 1893 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1894 tasm->fcvt_w_d(dst, src, RTZ); 1895 }); 1896} 1897 1898void TurboAssembler::Trunc_uw_s(Register rd, FPURegister fs, Register result) { 1899 RoundFloatingPointToInteger( 1900 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1901 tasm->fcvt_wu_s(dst, src, RTZ); 1902 }); 1903} 1904 1905void TurboAssembler::Trunc_w_s(Register rd, FPURegister fs, Register result) { 1906 RoundFloatingPointToInteger( 1907 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1908 tasm->fcvt_w_s(dst, src, RTZ); 1909 }); 1910} 1911 1912void TurboAssembler::Trunc_ul_d(Register rd, FPURegister fs, Register result) { 1913 RoundFloatingPointToInteger( 1914 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1915 tasm->fcvt_lu_d(dst, src, RTZ); 1916 }); 1917} 1918 1919void TurboAssembler::Trunc_l_d(Register rd, FPURegister fs, Register result) { 1920 RoundFloatingPointToInteger( 1921 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1922 tasm->fcvt_l_d(dst, src, RTZ); 1923 }); 1924} 1925 1926void TurboAssembler::Trunc_ul_s(Register rd, FPURegister fs, Register result) { 1927 RoundFloatingPointToInteger( 1928 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1929 tasm->fcvt_lu_s(dst, src, RTZ); 1930 }); 1931} 1932 1933void TurboAssembler::Trunc_l_s(Register rd, FPURegister fs, Register result) { 1934 RoundFloatingPointToInteger( 1935 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1936 tasm->fcvt_l_s(dst, src, RTZ); 1937 }); 1938} 1939 1940void TurboAssembler::Round_w_s(Register rd, FPURegister fs, Register result) { 1941 RoundFloatingPointToInteger( 1942 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1943 tasm->fcvt_w_s(dst, src, RNE); 1944 }); 1945} 1946 1947void TurboAssembler::Round_w_d(Register rd, FPURegister fs, Register result) { 1948 RoundFloatingPointToInteger( 1949 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1950 tasm->fcvt_w_d(dst, src, RNE); 1951 }); 1952} 1953 1954void TurboAssembler::Ceil_w_s(Register rd, FPURegister fs, Register result) { 1955 RoundFloatingPointToInteger( 1956 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1957 tasm->fcvt_w_s(dst, src, RUP); 1958 }); 1959} 1960 1961void TurboAssembler::Ceil_w_d(Register rd, FPURegister fs, Register result) { 1962 RoundFloatingPointToInteger( 1963 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1964 tasm->fcvt_w_d(dst, src, RUP); 1965 }); 1966} 1967 1968void TurboAssembler::Floor_w_s(Register rd, FPURegister fs, Register result) { 1969 RoundFloatingPointToInteger( 1970 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1971 tasm->fcvt_w_s(dst, src, RDN); 1972 }); 1973} 1974 1975void TurboAssembler::Floor_w_d(Register rd, FPURegister fs, Register result) { 1976 RoundFloatingPointToInteger( 1977 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) { 1978 tasm->fcvt_w_d(dst, src, RDN); 1979 }); 1980} 1981 1982// According to JS ECMA specification, for floating-point round operations, if 1983// the input is NaN, +/-infinity, or +/-0, the same input is returned as the 1984// rounded result; this differs from behavior of RISCV fcvt instructions (which 1985// round out-of-range values to the nearest max or min value), therefore special 1986// handling is needed by NaN, +/-Infinity, +/-0 1987template <typename F> 1988void TurboAssembler::RoundHelper(FPURegister dst, FPURegister src, 1989 FPURegister fpu_scratch, RoundingMode frm) { 1990 BlockTrampolinePoolScope block_trampoline_pool(this); 1991 UseScratchRegisterScope temps(this); 1992 Register scratch2 = temps.Acquire(); 1993 1994 DCHECK((std::is_same<float, F>::value) || (std::is_same<double, F>::value)); 1995 // Need at least two FPRs, so check against dst == src == fpu_scratch 1996 DCHECK(!(dst == src && dst == fpu_scratch)); 1997 1998 const int kFloatMantissaBits = 1999 sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits; 2000 const int kFloatExponentBits = 2001 sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits; 2002 const int kFloatExponentBias = 2003 sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias; 2004 Label done; 2005 2006 { 2007 UseScratchRegisterScope temps2(this); 2008 Register scratch = temps2.Acquire(); 2009 // extract exponent value of the source floating-point to scratch 2010 if (std::is_same<F, double>::value) { 2011 fmv_x_d(scratch, src); 2012 } else { 2013 fmv_x_w(scratch, src); 2014 } 2015 ExtractBits(scratch2, scratch, kFloatMantissaBits, kFloatExponentBits); 2016 } 2017 2018 // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits 2019 // in mantissa, the result is the same as src, so move src to dest (to avoid 2020 // generating another branch) 2021 if (dst != src) { 2022 if (std::is_same<F, double>::value) { 2023 fmv_d(dst, src); 2024 } else { 2025 fmv_s(dst, src); 2026 } 2027 } 2028 { 2029 Label not_NaN; 2030 UseScratchRegisterScope temps2(this); 2031 Register scratch = temps2.Acquire(); 2032 // According to the wasm spec 2033 // (https://webassembly.github.io/spec/core/exec/numerics.html#aux-nans) 2034 // if input is canonical NaN, then output is canonical NaN, and if input is 2035 // any other NaN, then output is any NaN with most significant bit of 2036 // payload is 1. In RISC-V, feq_d will set scratch to 0 if src is a NaN. If 2037 // src is not a NaN, branch to the label and do nothing, but if it is, 2038 // fmin_d will set dst to the canonical NaN. 2039 if (std::is_same<F, double>::value) { 2040 feq_d(scratch, src, src); 2041 bnez(scratch, ¬_NaN); 2042 fmin_d(dst, src, src); 2043 } else { 2044 feq_s(scratch, src, src); 2045 bnez(scratch, ¬_NaN); 2046 fmin_s(dst, src, src); 2047 } 2048 bind(¬_NaN); 2049 } 2050 2051 // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than 2052 // kFloat32MantissaBits, it means the floating-point value has no fractional 2053 // part, thus the input is already rounded, jump to done. Note that, NaN and 2054 // Infinity in floating-point representation sets maximal exponent value, so 2055 // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits), 2056 // and JS round semantics specify that rounding of NaN (Infinity) returns NaN 2057 // (Infinity), so NaN and Infinity are considered rounded value too. 2058 Branch(&done, greater_equal, scratch2, 2059 Operand(kFloatExponentBias + kFloatMantissaBits)); 2060 2061 // Actual rounding is needed along this path 2062 2063 // old_src holds the original input, needed for the case of src == dst 2064 FPURegister old_src = src; 2065 if (src == dst) { 2066 DCHECK(fpu_scratch != dst); 2067 Move(fpu_scratch, src); 2068 old_src = fpu_scratch; 2069 } 2070 2071 // Since only input whose real exponent value is less than kMantissaBits 2072 // (i.e., 23 or 52-bits) falls into this path, the value range of the input 2073 // falls into that of 23- or 53-bit integers. So we round the input to integer 2074 // values, then convert them back to floating-point. 2075 { 2076 UseScratchRegisterScope temps(this); 2077 Register scratch = temps.Acquire(); 2078 if (std::is_same<F, double>::value) { 2079 fcvt_l_d(scratch, src, frm); 2080 fcvt_d_l(dst, scratch, frm); 2081 } else { 2082 fcvt_w_s(scratch, src, frm); 2083 fcvt_s_w(dst, scratch, frm); 2084 } 2085 } 2086 // A special handling is needed if the input is a very small positive/negative 2087 // number that rounds to zero. JS semantics requires that the rounded result 2088 // retains the sign of the input, so a very small positive (negative) 2089 // floating-point number should be rounded to positive (negative) 0. 2090 // Therefore, we use sign-bit injection to produce +/-0 correctly. Instead of 2091 // testing for zero w/ a branch, we just insert sign-bit for everyone on this 2092 // path (this is where old_src is needed) 2093 if (std::is_same<F, double>::value) { 2094 fsgnj_d(dst, dst, old_src); 2095 } else { 2096 fsgnj_s(dst, dst, old_src); 2097 } 2098 2099 bind(&done); 2100} 2101 2102// According to JS ECMA specification, for floating-point round operations, if 2103// the input is NaN, +/-infinity, or +/-0, the same input is returned as the 2104// rounded result; this differs from behavior of RISCV fcvt instructions (which 2105// round out-of-range values to the nearest max or min value), therefore special 2106// handling is needed by NaN, +/-Infinity, +/-0 2107template <typename F> 2108void TurboAssembler::RoundHelper(VRegister dst, VRegister src, Register scratch, 2109 VRegister v_scratch, RoundingMode frm) { 2110 VU.set(scratch, std::is_same<F, float>::value ? E32 : E64, m1); 2111 // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits 2112 // in mantissa, the result is the same as src, so move src to dest (to avoid 2113 // generating another branch) 2114 2115 // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than 2116 // kFloat32MantissaBits, it means the floating-point value has no fractional 2117 // part, thus the input is already rounded, jump to done. Note that, NaN and 2118 // Infinity in floating-point representation sets maximal exponent value, so 2119 // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits), 2120 // and JS round semantics specify that rounding of NaN (Infinity) returns NaN 2121 // (Infinity), so NaN and Infinity are considered rounded value too. 2122 const int kFloatMantissaBits = 2123 sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits; 2124 const int kFloatExponentBits = 2125 sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits; 2126 const int kFloatExponentBias = 2127 sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias; 2128 2129 // slli(rt, rs, 64 - (pos + size)); 2130 // if (sign_extend) { 2131 // srai(rt, rt, 64 - size); 2132 // } else { 2133 // srli(rt, rt, 64 - size); 2134 // } 2135 2136 li(scratch, 64 - kFloatMantissaBits - kFloatExponentBits); 2137 vsll_vx(v_scratch, src, scratch); 2138 li(scratch, 64 - kFloatExponentBits); 2139 vsrl_vx(v_scratch, v_scratch, scratch); 2140 li(scratch, kFloatExponentBias + kFloatMantissaBits); 2141 vmslt_vx(v0, v_scratch, scratch); 2142 2143 VU.set(frm); 2144 vmv_vv(dst, src); 2145 if (dst == src) { 2146 vmv_vv(v_scratch, src); 2147 } 2148 vfcvt_x_f_v(dst, src, MaskType::Mask); 2149 vfcvt_f_x_v(dst, dst, MaskType::Mask); 2150 2151 // A special handling is needed if the input is a very small positive/negative 2152 // number that rounds to zero. JS semantics requires that the rounded result 2153 // retains the sign of the input, so a very small positive (negative) 2154 // floating-point number should be rounded to positive (negative) 0. 2155 if (dst == src) { 2156 vfsngj_vv(dst, dst, v_scratch); 2157 } else { 2158 vfsngj_vv(dst, dst, src); 2159 } 2160} 2161 2162void TurboAssembler::Ceil_f(VRegister vdst, VRegister vsrc, Register scratch, 2163 VRegister v_scratch) { 2164 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RUP); 2165} 2166 2167void TurboAssembler::Ceil_d(VRegister vdst, VRegister vsrc, Register scratch, 2168 VRegister v_scratch) { 2169 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RUP); 2170} 2171 2172void TurboAssembler::Floor_f(VRegister vdst, VRegister vsrc, Register scratch, 2173 VRegister v_scratch) { 2174 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RDN); 2175} 2176 2177void TurboAssembler::Floor_d(VRegister vdst, VRegister vsrc, Register scratch, 2178 VRegister v_scratch) { 2179 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RDN); 2180} 2181 2182void TurboAssembler::Trunc_d(VRegister vdst, VRegister vsrc, Register scratch, 2183 VRegister v_scratch) { 2184 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RTZ); 2185} 2186 2187void TurboAssembler::Trunc_f(VRegister vdst, VRegister vsrc, Register scratch, 2188 VRegister v_scratch) { 2189 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RTZ); 2190} 2191 2192void TurboAssembler::Round_f(VRegister vdst, VRegister vsrc, Register scratch, 2193 VRegister v_scratch) { 2194 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RNE); 2195} 2196 2197void TurboAssembler::Round_d(VRegister vdst, VRegister vsrc, Register scratch, 2198 VRegister v_scratch) { 2199 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RNE); 2200} 2201 2202void TurboAssembler::Floor_d_d(FPURegister dst, FPURegister src, 2203 FPURegister fpu_scratch) { 2204 RoundHelper<double>(dst, src, fpu_scratch, RDN); 2205} 2206 2207void TurboAssembler::Ceil_d_d(FPURegister dst, FPURegister src, 2208 FPURegister fpu_scratch) { 2209 RoundHelper<double>(dst, src, fpu_scratch, RUP); 2210} 2211 2212void TurboAssembler::Trunc_d_d(FPURegister dst, FPURegister src, 2213 FPURegister fpu_scratch) { 2214 RoundHelper<double>(dst, src, fpu_scratch, RTZ); 2215} 2216 2217void TurboAssembler::Round_d_d(FPURegister dst, FPURegister src, 2218 FPURegister fpu_scratch) { 2219 RoundHelper<double>(dst, src, fpu_scratch, RNE); 2220} 2221 2222void TurboAssembler::Floor_s_s(FPURegister dst, FPURegister src, 2223 FPURegister fpu_scratch) { 2224 RoundHelper<float>(dst, src, fpu_scratch, RDN); 2225} 2226 2227void TurboAssembler::Ceil_s_s(FPURegister dst, FPURegister src, 2228 FPURegister fpu_scratch) { 2229 RoundHelper<float>(dst, src, fpu_scratch, RUP); 2230} 2231 2232void TurboAssembler::Trunc_s_s(FPURegister dst, FPURegister src, 2233 FPURegister fpu_scratch) { 2234 RoundHelper<float>(dst, src, fpu_scratch, RTZ); 2235} 2236 2237void TurboAssembler::Round_s_s(FPURegister dst, FPURegister src, 2238 FPURegister fpu_scratch) { 2239 RoundHelper<float>(dst, src, fpu_scratch, RNE); 2240} 2241 2242void MacroAssembler::Madd_s(FPURegister fd, FPURegister fr, FPURegister fs, 2243 FPURegister ft) { 2244 fmadd_s(fd, fs, ft, fr); 2245} 2246 2247void MacroAssembler::Madd_d(FPURegister fd, FPURegister fr, FPURegister fs, 2248 FPURegister ft) { 2249 fmadd_d(fd, fs, ft, fr); 2250} 2251 2252void MacroAssembler::Msub_s(FPURegister fd, FPURegister fr, FPURegister fs, 2253 FPURegister ft) { 2254 fmsub_s(fd, fs, ft, fr); 2255} 2256 2257void MacroAssembler::Msub_d(FPURegister fd, FPURegister fr, FPURegister fs, 2258 FPURegister ft) { 2259 fmsub_d(fd, fs, ft, fr); 2260} 2261 2262void TurboAssembler::CompareF32(Register rd, FPUCondition cc, FPURegister cmp1, 2263 FPURegister cmp2) { 2264 switch (cc) { 2265 case EQ: 2266 feq_s(rd, cmp1, cmp2); 2267 break; 2268 case NE: 2269 feq_s(rd, cmp1, cmp2); 2270 NegateBool(rd, rd); 2271 break; 2272 case LT: 2273 flt_s(rd, cmp1, cmp2); 2274 break; 2275 case GE: 2276 fle_s(rd, cmp2, cmp1); 2277 break; 2278 case LE: 2279 fle_s(rd, cmp1, cmp2); 2280 break; 2281 case GT: 2282 flt_s(rd, cmp2, cmp1); 2283 break; 2284 default: 2285 UNREACHABLE(); 2286 } 2287} 2288 2289void TurboAssembler::CompareF64(Register rd, FPUCondition cc, FPURegister cmp1, 2290 FPURegister cmp2) { 2291 switch (cc) { 2292 case EQ: 2293 feq_d(rd, cmp1, cmp2); 2294 break; 2295 case NE: 2296 feq_d(rd, cmp1, cmp2); 2297 NegateBool(rd, rd); 2298 break; 2299 case LT: 2300 flt_d(rd, cmp1, cmp2); 2301 break; 2302 case GE: 2303 fle_d(rd, cmp2, cmp1); 2304 break; 2305 case LE: 2306 fle_d(rd, cmp1, cmp2); 2307 break; 2308 case GT: 2309 flt_d(rd, cmp2, cmp1); 2310 break; 2311 default: 2312 UNREACHABLE(); 2313 } 2314} 2315 2316void TurboAssembler::CompareIsNotNanF32(Register rd, FPURegister cmp1, 2317 FPURegister cmp2) { 2318 UseScratchRegisterScope temps(this); 2319 BlockTrampolinePoolScope block_trampoline_pool(this); 2320 Register scratch = temps.Acquire(); 2321 2322 feq_s(rd, cmp1, cmp1); // rd <- !isNan(cmp1) 2323 feq_s(scratch, cmp2, cmp2); // scratch <- !isNaN(cmp2) 2324 And(rd, rd, scratch); // rd <- !isNan(cmp1) && !isNan(cmp2) 2325} 2326 2327void TurboAssembler::CompareIsNotNanF64(Register rd, FPURegister cmp1, 2328 FPURegister cmp2) { 2329 UseScratchRegisterScope temps(this); 2330 BlockTrampolinePoolScope block_trampoline_pool(this); 2331 Register scratch = temps.Acquire(); 2332 2333 feq_d(rd, cmp1, cmp1); // rd <- !isNan(cmp1) 2334 feq_d(scratch, cmp2, cmp2); // scratch <- !isNaN(cmp2) 2335 And(rd, rd, scratch); // rd <- !isNan(cmp1) && !isNan(cmp2) 2336} 2337 2338void TurboAssembler::CompareIsNanF32(Register rd, FPURegister cmp1, 2339 FPURegister cmp2) { 2340 CompareIsNotNanF32(rd, cmp1, cmp2); // rd <- !isNan(cmp1) && !isNan(cmp2) 2341 Xor(rd, rd, 1); // rd <- isNan(cmp1) || isNan(cmp2) 2342} 2343 2344void TurboAssembler::CompareIsNanF64(Register rd, FPURegister cmp1, 2345 FPURegister cmp2) { 2346 CompareIsNotNanF64(rd, cmp1, cmp2); // rd <- !isNan(cmp1) && !isNan(cmp2) 2347 Xor(rd, rd, 1); // rd <- isNan(cmp1) || isNan(cmp2) 2348} 2349 2350void TurboAssembler::BranchTrueShortF(Register rs, Label* target) { 2351 Branch(target, not_equal, rs, Operand(zero_reg)); 2352} 2353 2354void TurboAssembler::BranchFalseShortF(Register rs, Label* target) { 2355 Branch(target, equal, rs, Operand(zero_reg)); 2356} 2357 2358void TurboAssembler::BranchTrueF(Register rs, Label* target) { 2359 bool long_branch = 2360 target->is_bound() ? !is_near(target) : is_trampoline_emitted(); 2361 if (long_branch) { 2362 Label skip; 2363 BranchFalseShortF(rs, &skip); 2364 BranchLong(target); 2365 bind(&skip); 2366 } else { 2367 BranchTrueShortF(rs, target); 2368 } 2369} 2370 2371void TurboAssembler::BranchFalseF(Register rs, Label* target) { 2372 bool long_branch = 2373 target->is_bound() ? !is_near(target) : is_trampoline_emitted(); 2374 if (long_branch) { 2375 Label skip; 2376 BranchTrueShortF(rs, &skip); 2377 BranchLong(target); 2378 bind(&skip); 2379 } else { 2380 BranchFalseShortF(rs, target); 2381 } 2382} 2383 2384void TurboAssembler::InsertHighWordF64(FPURegister dst, Register src_high) { 2385 UseScratchRegisterScope temps(this); 2386 Register scratch = temps.Acquire(); 2387 Register scratch2 = temps.Acquire(); 2388 BlockTrampolinePoolScope block_trampoline_pool(this); 2389 2390 DCHECK(src_high != scratch2 && src_high != scratch); 2391 2392 fmv_x_d(scratch, dst); 2393 slli(scratch2, src_high, 32); 2394 slli(scratch, scratch, 32); 2395 srli(scratch, scratch, 32); 2396 or_(scratch, scratch, scratch2); 2397 fmv_d_x(dst, scratch); 2398} 2399 2400void TurboAssembler::InsertLowWordF64(FPURegister dst, Register src_low) { 2401 UseScratchRegisterScope temps(this); 2402 Register scratch = temps.Acquire(); 2403 Register scratch2 = temps.Acquire(); 2404 BlockTrampolinePoolScope block_trampoline_pool(this); 2405 2406 DCHECK(src_low != scratch && src_low != scratch2); 2407 fmv_x_d(scratch, dst); 2408 slli(scratch2, src_low, 32); 2409 srli(scratch2, scratch2, 32); 2410 srli(scratch, scratch, 32); 2411 slli(scratch, scratch, 32); 2412 or_(scratch, scratch, scratch2); 2413 fmv_d_x(dst, scratch); 2414} 2415 2416void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint32_t src) { 2417 // Handle special values first. 2418 if (src == bit_cast<uint32_t>(0.0f) && has_single_zero_reg_set_) { 2419 if (dst != kDoubleRegZero) fmv_s(dst, kDoubleRegZero); 2420 } else if (src == bit_cast<uint32_t>(-0.0f) && has_single_zero_reg_set_) { 2421 Neg_s(dst, kDoubleRegZero); 2422 } else { 2423 if (dst == kDoubleRegZero) { 2424 DCHECK(src == bit_cast<uint32_t>(0.0f)); 2425 fmv_w_x(dst, zero_reg); 2426 has_single_zero_reg_set_ = true; 2427 has_double_zero_reg_set_ = false; 2428 } else { 2429 UseScratchRegisterScope temps(this); 2430 Register scratch = temps.Acquire(); 2431 li(scratch, Operand(static_cast<int32_t>(src))); 2432 fmv_w_x(dst, scratch); 2433 } 2434 } 2435} 2436 2437void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint64_t src) { 2438 // Handle special values first. 2439 if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) { 2440 if (dst != kDoubleRegZero) fmv_d(dst, kDoubleRegZero); 2441 } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) { 2442 Neg_d(dst, kDoubleRegZero); 2443 } else { 2444 if (dst == kDoubleRegZero) { 2445 DCHECK(src == bit_cast<uint64_t>(0.0)); 2446 fmv_d_x(dst, zero_reg); 2447 has_double_zero_reg_set_ = true; 2448 has_single_zero_reg_set_ = false; 2449 } else { 2450 UseScratchRegisterScope temps(this); 2451 Register scratch = temps.Acquire(); 2452 li(scratch, Operand(src)); 2453 fmv_d_x(dst, scratch); 2454 } 2455 } 2456} 2457 2458void TurboAssembler::CompareI(Register rd, Register rs, const Operand& rt, 2459 Condition cond) { 2460 switch (cond) { 2461 case eq: 2462 Seq(rd, rs, rt); 2463 break; 2464 case ne: 2465 Sne(rd, rs, rt); 2466 break; 2467 2468 // Signed comparison. 2469 case greater: 2470 Sgt(rd, rs, rt); 2471 break; 2472 case greater_equal: 2473 Sge(rd, rs, rt); // rs >= rt 2474 break; 2475 case less: 2476 Slt(rd, rs, rt); // rs < rt 2477 break; 2478 case less_equal: 2479 Sle(rd, rs, rt); // rs <= rt 2480 break; 2481 2482 // Unsigned comparison. 2483 case Ugreater: 2484 Sgtu(rd, rs, rt); // rs > rt 2485 break; 2486 case Ugreater_equal: 2487 Sgeu(rd, rs, rt); // rs >= rt 2488 break; 2489 case Uless: 2490 Sltu(rd, rs, rt); // rs < rt 2491 break; 2492 case Uless_equal: 2493 Sleu(rd, rs, rt); // rs <= rt 2494 break; 2495 case cc_always: 2496 UNREACHABLE(); 2497 default: 2498 UNREACHABLE(); 2499 } 2500} 2501 2502// dest <- (condition != 0 ? zero : dest) 2503void TurboAssembler::LoadZeroIfConditionNotZero(Register dest, 2504 Register condition) { 2505 UseScratchRegisterScope temps(this); 2506 Register scratch = temps.Acquire(); 2507 seqz(scratch, condition); 2508 // neg + and may be more efficient than mul(dest, dest, scratch) 2509 neg(scratch, scratch); // 0 is still 0, 1 becomes all 1s 2510 and_(dest, dest, scratch); 2511} 2512 2513// dest <- (condition == 0 ? 0 : dest) 2514void TurboAssembler::LoadZeroIfConditionZero(Register dest, 2515 Register condition) { 2516 UseScratchRegisterScope temps(this); 2517 Register scratch = temps.Acquire(); 2518 snez(scratch, condition); 2519 // neg + and may be more efficient than mul(dest, dest, scratch); 2520 neg(scratch, scratch); // 0 is still 0, 1 becomes all 1s 2521 and_(dest, dest, scratch); 2522} 2523 2524void TurboAssembler::Clz32(Register rd, Register xx) { 2525 // 32 bit unsigned in lower word: count number of leading zeros. 2526 // int n = 32; 2527 // unsigned y; 2528 2529 // y = x >>16; if (y != 0) { n = n -16; x = y; } 2530 // y = x >> 8; if (y != 0) { n = n - 8; x = y; } 2531 // y = x >> 4; if (y != 0) { n = n - 4; x = y; } 2532 // y = x >> 2; if (y != 0) { n = n - 2; x = y; } 2533 // y = x >> 1; if (y != 0) {rd = n - 2; return;} 2534 // rd = n - x; 2535 2536 Label L0, L1, L2, L3, L4; 2537 UseScratchRegisterScope temps(this); 2538 BlockTrampolinePoolScope block_trampoline_pool(this); 2539 Register x = rd; 2540 Register y = temps.Acquire(); 2541 Register n = temps.Acquire(); 2542 DCHECK(xx != y && xx != n); 2543 Move(x, xx); 2544 li(n, Operand(32)); 2545 srliw(y, x, 16); 2546 BranchShort(&L0, eq, y, Operand(zero_reg)); 2547 Move(x, y); 2548 addiw(n, n, -16); 2549 bind(&L0); 2550 srliw(y, x, 8); 2551 BranchShort(&L1, eq, y, Operand(zero_reg)); 2552 addiw(n, n, -8); 2553 Move(x, y); 2554 bind(&L1); 2555 srliw(y, x, 4); 2556 BranchShort(&L2, eq, y, Operand(zero_reg)); 2557 addiw(n, n, -4); 2558 Move(x, y); 2559 bind(&L2); 2560 srliw(y, x, 2); 2561 BranchShort(&L3, eq, y, Operand(zero_reg)); 2562 addiw(n, n, -2); 2563 Move(x, y); 2564 bind(&L3); 2565 srliw(y, x, 1); 2566 subw(rd, n, x); 2567 BranchShort(&L4, eq, y, Operand(zero_reg)); 2568 addiw(rd, n, -2); 2569 bind(&L4); 2570} 2571 2572void TurboAssembler::Clz64(Register rd, Register xx) { 2573 // 64 bit: count number of leading zeros. 2574 // int n = 64; 2575 // unsigned y; 2576 2577 // y = x >>32; if (y != 0) { n = n - 32; x = y; } 2578 // y = x >>16; if (y != 0) { n = n - 16; x = y; } 2579 // y = x >> 8; if (y != 0) { n = n - 8; x = y; } 2580 // y = x >> 4; if (y != 0) { n = n - 4; x = y; } 2581 // y = x >> 2; if (y != 0) { n = n - 2; x = y; } 2582 // y = x >> 1; if (y != 0) {rd = n - 2; return;} 2583 // rd = n - x; 2584 2585 Label L0, L1, L2, L3, L4, L5; 2586 UseScratchRegisterScope temps(this); 2587 BlockTrampolinePoolScope block_trampoline_pool(this); 2588 Register x = rd; 2589 Register y = temps.Acquire(); 2590 Register n = temps.Acquire(); 2591 DCHECK(xx != y && xx != n); 2592 Move(x, xx); 2593 li(n, Operand(64)); 2594 srli(y, x, 32); 2595 BranchShort(&L0, eq, y, Operand(zero_reg)); 2596 addiw(n, n, -32); 2597 Move(x, y); 2598 bind(&L0); 2599 srli(y, x, 16); 2600 BranchShort(&L1, eq, y, Operand(zero_reg)); 2601 addiw(n, n, -16); 2602 Move(x, y); 2603 bind(&L1); 2604 srli(y, x, 8); 2605 BranchShort(&L2, eq, y, Operand(zero_reg)); 2606 addiw(n, n, -8); 2607 Move(x, y); 2608 bind(&L2); 2609 srli(y, x, 4); 2610 BranchShort(&L3, eq, y, Operand(zero_reg)); 2611 addiw(n, n, -4); 2612 Move(x, y); 2613 bind(&L3); 2614 srli(y, x, 2); 2615 BranchShort(&L4, eq, y, Operand(zero_reg)); 2616 addiw(n, n, -2); 2617 Move(x, y); 2618 bind(&L4); 2619 srli(y, x, 1); 2620 subw(rd, n, x); 2621 BranchShort(&L5, eq, y, Operand(zero_reg)); 2622 addiw(rd, n, -2); 2623 bind(&L5); 2624} 2625 2626void TurboAssembler::Ctz32(Register rd, Register rs) { 2627 // Convert trailing zeroes to trailing ones, and bits to their left 2628 // to zeroes. 2629 2630 BlockTrampolinePoolScope block_trampoline_pool(this); 2631 { 2632 UseScratchRegisterScope temps(this); 2633 Register scratch = temps.Acquire(); 2634 Add64(scratch, rs, -1); 2635 Xor(rd, scratch, rs); 2636 And(rd, rd, scratch); 2637 // Count number of leading zeroes. 2638 } 2639 Clz32(rd, rd); 2640 { 2641 // Subtract number of leading zeroes from 32 to get number of trailing 2642 // ones. Remember that the trailing ones were formerly trailing zeroes. 2643 UseScratchRegisterScope temps(this); 2644 Register scratch = temps.Acquire(); 2645 li(scratch, 32); 2646 Sub32(rd, scratch, rd); 2647 } 2648} 2649 2650void TurboAssembler::Ctz64(Register rd, Register rs) { 2651 // Convert trailing zeroes to trailing ones, and bits to their left 2652 // to zeroes. 2653 2654 BlockTrampolinePoolScope block_trampoline_pool(this); 2655 { 2656 UseScratchRegisterScope temps(this); 2657 Register scratch = temps.Acquire(); 2658 Add64(scratch, rs, -1); 2659 Xor(rd, scratch, rs); 2660 And(rd, rd, scratch); 2661 // Count number of leading zeroes. 2662 } 2663 Clz64(rd, rd); 2664 { 2665 // Subtract number of leading zeroes from 64 to get number of trailing 2666 // ones. Remember that the trailing ones were formerly trailing zeroes. 2667 UseScratchRegisterScope temps(this); 2668 Register scratch = temps.Acquire(); 2669 li(scratch, 64); 2670 Sub64(rd, scratch, rd); 2671 } 2672} 2673 2674void TurboAssembler::Popcnt32(Register rd, Register rs, Register scratch) { 2675 DCHECK_NE(scratch, rs); 2676 DCHECK_NE(scratch, rd); 2677 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel 2678 // 2679 // A generalization of the best bit counting method to integers of 2680 // bit-widths up to 128 (parameterized by type T) is this: 2681 // 2682 // v = v - ((v >> 1) & (T)~(T)0/3); // temp 2683 // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp 2684 // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp 2685 // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count 2686 // 2687 // There are algorithms which are faster in the cases where very few 2688 // bits are set but the algorithm here attempts to minimize the total 2689 // number of instructions executed even when a large number of bits 2690 // are set. 2691 // The number of instruction is 20. 2692 // uint32_t B0 = 0x55555555; // (T)~(T)0/3 2693 // uint32_t B1 = 0x33333333; // (T)~(T)0/15*3 2694 // uint32_t B2 = 0x0F0F0F0F; // (T)~(T)0/255*15 2695 // uint32_t value = 0x01010101; // (T)~(T)0/255 2696 2697 uint32_t shift = 24; 2698 UseScratchRegisterScope temps(this); 2699 BlockTrampolinePoolScope block_trampoline_pool(this); 2700 Register scratch2 = temps.Acquire(); 2701 Register value = temps.Acquire(); 2702 DCHECK((rd != value) && (rs != value)); 2703 li(value, 0x01010101); // value = 0x01010101; 2704 li(scratch2, 0x55555555); // B0 = 0x55555555; 2705 Srl32(scratch, rs, 1); 2706 And(scratch, scratch, scratch2); 2707 Sub32(scratch, rs, scratch); 2708 li(scratch2, 0x33333333); // B1 = 0x33333333; 2709 slli(rd, scratch2, 4); 2710 or_(scratch2, scratch2, rd); 2711 And(rd, scratch, scratch2); 2712 Srl32(scratch, scratch, 2); 2713 And(scratch, scratch, scratch2); 2714 Add32(scratch, rd, scratch); 2715 srliw(rd, scratch, 4); 2716 Add32(rd, rd, scratch); 2717 li(scratch2, 0xF); 2718 Mul32(scratch2, value, scratch2); // B2 = 0x0F0F0F0F; 2719 And(rd, rd, scratch2); 2720 Mul32(rd, rd, value); 2721 Srl32(rd, rd, shift); 2722} 2723 2724void TurboAssembler::Popcnt64(Register rd, Register rs, Register scratch) { 2725 DCHECK_NE(scratch, rs); 2726 DCHECK_NE(scratch, rd); 2727 // uint64_t B0 = 0x5555555555555555l; // (T)~(T)0/3 2728 // uint64_t B1 = 0x3333333333333333l; // (T)~(T)0/15*3 2729 // uint64_t B2 = 0x0F0F0F0F0F0F0F0Fl; // (T)~(T)0/255*15 2730 // uint64_t value = 0x0101010101010101l; // (T)~(T)0/255 2731 // uint64_t shift = 24; // (sizeof(T) - 1) * BITS_PER_BYTE 2732 2733 uint64_t shift = 24; 2734 UseScratchRegisterScope temps(this); 2735 BlockTrampolinePoolScope block_trampoline_pool(this); 2736 Register scratch2 = temps.Acquire(); 2737 Register value = temps.Acquire(); 2738 DCHECK((rd != value) && (rs != value)); 2739 li(value, 0x1111111111111111l); // value = 0x1111111111111111l; 2740 li(scratch2, 5); 2741 Mul64(scratch2, value, scratch2); // B0 = 0x5555555555555555l; 2742 Srl64(scratch, rs, 1); 2743 And(scratch, scratch, scratch2); 2744 Sub64(scratch, rs, scratch); 2745 li(scratch2, 3); 2746 Mul64(scratch2, value, scratch2); // B1 = 0x3333333333333333l; 2747 And(rd, scratch, scratch2); 2748 Srl64(scratch, scratch, 2); 2749 And(scratch, scratch, scratch2); 2750 Add64(scratch, rd, scratch); 2751 Srl64(rd, scratch, 4); 2752 Add64(rd, rd, scratch); 2753 li(scratch2, 0xF); 2754 li(value, 0x0101010101010101l); // value = 0x0101010101010101l; 2755 Mul64(scratch2, value, scratch2); // B2 = 0x0F0F0F0F0F0F0F0Fl; 2756 And(rd, rd, scratch2); 2757 Mul64(rd, rd, value); 2758 srli(rd, rd, 32 + shift); 2759} 2760 2761void TurboAssembler::TryInlineTruncateDoubleToI(Register result, 2762 DoubleRegister double_input, 2763 Label* done) { 2764 UseScratchRegisterScope temps(this); 2765 Register scratch = temps.Acquire(); 2766 // if scratch == 1, exception happens during truncation 2767 Trunc_w_d(result, double_input, scratch); 2768 // If we had no exceptions (i.e., scratch==1) we are done. 2769 Branch(done, eq, scratch, Operand(1)); 2770} 2771 2772void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone, 2773 Register result, 2774 DoubleRegister double_input, 2775 StubCallMode stub_mode) { 2776 Label done; 2777 2778 TryInlineTruncateDoubleToI(result, double_input, &done); 2779 2780 // If we fell through then inline version didn't succeed - call stub 2781 // instead. 2782 push(ra); 2783 Sub64(sp, sp, Operand(kDoubleSize)); // Put input on stack. 2784 fsd(double_input, sp, 0); 2785 2786 if (stub_mode == StubCallMode::kCallWasmRuntimeStub) { 2787 Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); 2788 } else { 2789 Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET); 2790 } 2791 ld(result, sp, 0); 2792 2793 Add64(sp, sp, Operand(kDoubleSize)); 2794 pop(ra); 2795 2796 bind(&done); 2797} 2798 2799// BRANCH_ARGS_CHECK checks that conditional jump arguments are correct. 2800#define BRANCH_ARGS_CHECK(cond, rs, rt) \ 2801 DCHECK((cond == cc_always && rs == zero_reg && rt.rm() == zero_reg) || \ 2802 (cond != cc_always && (rs != zero_reg || rt.rm() != zero_reg))) 2803 2804void TurboAssembler::Branch(int32_t offset) { 2805 DCHECK(is_int21(offset)); 2806 BranchShort(offset); 2807} 2808 2809void TurboAssembler::Branch(int32_t offset, Condition cond, Register rs, 2810 const Operand& rt, Label::Distance near_jump) { 2811 bool is_near = BranchShortCheck(offset, nullptr, cond, rs, rt); 2812 DCHECK(is_near); 2813 USE(is_near); 2814} 2815 2816void TurboAssembler::Branch(Label* L) { 2817 if (L->is_bound()) { 2818 if (is_near(L)) { 2819 BranchShort(L); 2820 } else { 2821 BranchLong(L); 2822 } 2823 } else { 2824 if (is_trampoline_emitted()) { 2825 BranchLong(L); 2826 } else { 2827 BranchShort(L); 2828 } 2829 } 2830} 2831 2832void TurboAssembler::Branch(Label* L, Condition cond, Register rs, 2833 const Operand& rt, Label::Distance near_jump) { 2834 if (L->is_bound()) { 2835 if (!BranchShortCheck(0, L, cond, rs, rt)) { 2836 if (cond != cc_always) { 2837 Label skip; 2838 Condition neg_cond = NegateCondition(cond); 2839 BranchShort(&skip, neg_cond, rs, rt); 2840 BranchLong(L); 2841 bind(&skip); 2842 } else { 2843 BranchLong(L); 2844 EmitConstPoolWithJumpIfNeeded(); 2845 } 2846 } 2847 } else { 2848 if (is_trampoline_emitted() && near_jump == Label::Distance::kFar) { 2849 if (cond != cc_always) { 2850 Label skip; 2851 Condition neg_cond = NegateCondition(cond); 2852 BranchShort(&skip, neg_cond, rs, rt); 2853 BranchLong(L); 2854 bind(&skip); 2855 } else { 2856 BranchLong(L); 2857 EmitConstPoolWithJumpIfNeeded(); 2858 } 2859 } else { 2860 BranchShort(L, cond, rs, rt); 2861 } 2862 } 2863} 2864 2865void TurboAssembler::Branch(Label* L, Condition cond, Register rs, 2866 RootIndex index) { 2867 UseScratchRegisterScope temps(this); 2868 Register scratch = temps.Acquire(); 2869 LoadRoot(scratch, index); 2870 Branch(L, cond, rs, Operand(scratch)); 2871} 2872 2873void TurboAssembler::BranchShortHelper(int32_t offset, Label* L) { 2874 DCHECK(L == nullptr || offset == 0); 2875 offset = GetOffset(offset, L, OffsetSize::kOffset21); 2876 j(offset); 2877} 2878 2879void TurboAssembler::BranchShort(int32_t offset) { 2880 DCHECK(is_int21(offset)); 2881 BranchShortHelper(offset, nullptr); 2882} 2883 2884void TurboAssembler::BranchShort(Label* L) { BranchShortHelper(0, L); } 2885 2886int32_t TurboAssembler::GetOffset(int32_t offset, Label* L, OffsetSize bits) { 2887 if (L) { 2888 offset = branch_offset_helper(L, bits); 2889 } else { 2890 DCHECK(is_intn(offset, bits)); 2891 } 2892 return offset; 2893} 2894 2895Register TurboAssembler::GetRtAsRegisterHelper(const Operand& rt, 2896 Register scratch) { 2897 Register r2 = no_reg; 2898 if (rt.is_reg()) { 2899 r2 = rt.rm(); 2900 } else { 2901 r2 = scratch; 2902 li(r2, rt); 2903 } 2904 2905 return r2; 2906} 2907 2908bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset, 2909 OffsetSize bits) { 2910 if (!is_near(L, bits)) return false; 2911 *offset = GetOffset(*offset, L, bits); 2912 return true; 2913} 2914 2915bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset, OffsetSize bits, 2916 Register* scratch, const Operand& rt) { 2917 if (!is_near(L, bits)) return false; 2918 *scratch = GetRtAsRegisterHelper(rt, *scratch); 2919 *offset = GetOffset(*offset, L, bits); 2920 return true; 2921} 2922 2923bool TurboAssembler::BranchShortHelper(int32_t offset, Label* L, Condition cond, 2924 Register rs, const Operand& rt) { 2925 DCHECK(L == nullptr || offset == 0); 2926 UseScratchRegisterScope temps(this); 2927 BlockTrampolinePoolScope block_trampoline_pool(this); 2928 Register scratch = no_reg; 2929 if (!rt.is_reg()) { 2930 scratch = temps.Acquire(); 2931 li(scratch, rt); 2932 } else { 2933 scratch = rt.rm(); 2934 } 2935 { 2936 BlockTrampolinePoolScope block_trampoline_pool(this); 2937 switch (cond) { 2938 case cc_always: 2939 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 2940 j(offset); 2941 EmitConstPoolWithJumpIfNeeded(); 2942 break; 2943 case eq: 2944 // rs == rt 2945 if (rt.is_reg() && rs == rt.rm()) { 2946 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 2947 j(offset); 2948 } else { 2949 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2950 beq(rs, scratch, offset); 2951 } 2952 break; 2953 case ne: 2954 // rs != rt 2955 if (rt.is_reg() && rs == rt.rm()) { 2956 break; // No code needs to be emitted 2957 } else { 2958 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2959 bne(rs, scratch, offset); 2960 } 2961 break; 2962 2963 // Signed comparison. 2964 case greater: 2965 // rs > rt 2966 if (rt.is_reg() && rs == rt.rm()) { 2967 break; // No code needs to be emitted. 2968 } else { 2969 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2970 bgt(rs, scratch, offset); 2971 } 2972 break; 2973 case greater_equal: 2974 // rs >= rt 2975 if (rt.is_reg() && rs == rt.rm()) { 2976 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 2977 j(offset); 2978 } else { 2979 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2980 bge(rs, scratch, offset); 2981 } 2982 break; 2983 case less: 2984 // rs < rt 2985 if (rt.is_reg() && rs == rt.rm()) { 2986 break; // No code needs to be emitted. 2987 } else { 2988 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2989 blt(rs, scratch, offset); 2990 } 2991 break; 2992 case less_equal: 2993 // rs <= rt 2994 if (rt.is_reg() && rs == rt.rm()) { 2995 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 2996 j(offset); 2997 } else { 2998 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 2999 ble(rs, scratch, offset); 3000 } 3001 break; 3002 3003 // Unsigned comparison. 3004 case Ugreater: 3005 // rs > rt 3006 if (rt.is_reg() && rs == rt.rm()) { 3007 break; // No code needs to be emitted. 3008 } else { 3009 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 3010 bgtu(rs, scratch, offset); 3011 } 3012 break; 3013 case Ugreater_equal: 3014 // rs >= rt 3015 if (rt.is_reg() && rs == rt.rm()) { 3016 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 3017 j(offset); 3018 } else { 3019 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 3020 bgeu(rs, scratch, offset); 3021 } 3022 break; 3023 case Uless: 3024 // rs < rt 3025 if (rt.is_reg() && rs == rt.rm()) { 3026 break; // No code needs to be emitted. 3027 } else { 3028 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 3029 bltu(rs, scratch, offset); 3030 } 3031 break; 3032 case Uless_equal: 3033 // rs <= rt 3034 if (rt.is_reg() && rs == rt.rm()) { 3035 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false; 3036 j(offset); 3037 } else { 3038 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false; 3039 bleu(rs, scratch, offset); 3040 } 3041 break; 3042 default: 3043 UNREACHABLE(); 3044 } 3045 } 3046 3047 CheckTrampolinePoolQuick(1); 3048 return true; 3049} 3050 3051bool TurboAssembler::BranchShortCheck(int32_t offset, Label* L, Condition cond, 3052 Register rs, const Operand& rt) { 3053 BRANCH_ARGS_CHECK(cond, rs, rt); 3054 3055 if (!L) { 3056 DCHECK(is_int13(offset)); 3057 return BranchShortHelper(offset, nullptr, cond, rs, rt); 3058 } else { 3059 DCHECK_EQ(offset, 0); 3060 return BranchShortHelper(0, L, cond, rs, rt); 3061 } 3062} 3063 3064void TurboAssembler::BranchShort(int32_t offset, Condition cond, Register rs, 3065 const Operand& rt) { 3066 BranchShortCheck(offset, nullptr, cond, rs, rt); 3067} 3068 3069void TurboAssembler::BranchShort(Label* L, Condition cond, Register rs, 3070 const Operand& rt) { 3071 BranchShortCheck(0, L, cond, rs, rt); 3072} 3073 3074void TurboAssembler::BranchAndLink(int32_t offset) { 3075 BranchAndLinkShort(offset); 3076} 3077 3078void TurboAssembler::BranchAndLink(int32_t offset, Condition cond, Register rs, 3079 const Operand& rt) { 3080 bool is_near = BranchAndLinkShortCheck(offset, nullptr, cond, rs, rt); 3081 DCHECK(is_near); 3082 USE(is_near); 3083} 3084 3085void TurboAssembler::BranchAndLink(Label* L) { 3086 if (L->is_bound()) { 3087 if (is_near(L)) { 3088 BranchAndLinkShort(L); 3089 } else { 3090 BranchAndLinkLong(L); 3091 } 3092 } else { 3093 if (is_trampoline_emitted()) { 3094 BranchAndLinkLong(L); 3095 } else { 3096 BranchAndLinkShort(L); 3097 } 3098 } 3099} 3100 3101void TurboAssembler::BranchAndLink(Label* L, Condition cond, Register rs, 3102 const Operand& rt) { 3103 if (L->is_bound()) { 3104 if (!BranchAndLinkShortCheck(0, L, cond, rs, rt)) { 3105 Label skip; 3106 Condition neg_cond = NegateCondition(cond); 3107 BranchShort(&skip, neg_cond, rs, rt); 3108 BranchAndLinkLong(L); 3109 bind(&skip); 3110 } 3111 } else { 3112 if (is_trampoline_emitted()) { 3113 Label skip; 3114 Condition neg_cond = NegateCondition(cond); 3115 BranchShort(&skip, neg_cond, rs, rt); 3116 BranchAndLinkLong(L); 3117 bind(&skip); 3118 } else { 3119 BranchAndLinkShortCheck(0, L, cond, rs, rt); 3120 } 3121 } 3122} 3123 3124void TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L) { 3125 DCHECK(L == nullptr || offset == 0); 3126 offset = GetOffset(offset, L, OffsetSize::kOffset21); 3127 jal(offset); 3128} 3129 3130void TurboAssembler::BranchAndLinkShort(int32_t offset) { 3131 DCHECK(is_int21(offset)); 3132 BranchAndLinkShortHelper(offset, nullptr); 3133} 3134 3135void TurboAssembler::BranchAndLinkShort(Label* L) { 3136 BranchAndLinkShortHelper(0, L); 3137} 3138 3139// Pre r6 we need to use a bgezal or bltzal, but they can't be used directly 3140// with the slt instructions. We could use sub or add instead but we would miss 3141// overflow cases, so we keep slt and add an intermediate third instruction. 3142bool TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L, 3143 Condition cond, Register rs, 3144 const Operand& rt) { 3145 DCHECK(L == nullptr || offset == 0); 3146 if (!is_near(L, OffsetSize::kOffset21)) return false; 3147 3148 UseScratchRegisterScope temps(this); 3149 Register scratch = temps.Acquire(); 3150 BlockTrampolinePoolScope block_trampoline_pool(this); 3151 3152 if (cond == cc_always) { 3153 offset = GetOffset(offset, L, OffsetSize::kOffset21); 3154 jal(offset); 3155 } else { 3156 Branch(kInstrSize * 2, NegateCondition(cond), rs, 3157 Operand(GetRtAsRegisterHelper(rt, scratch))); 3158 offset = GetOffset(offset, L, OffsetSize::kOffset21); 3159 jal(offset); 3160 } 3161 3162 return true; 3163} 3164 3165bool TurboAssembler::BranchAndLinkShortCheck(int32_t offset, Label* L, 3166 Condition cond, Register rs, 3167 const Operand& rt) { 3168 BRANCH_ARGS_CHECK(cond, rs, rt); 3169 3170 if (!L) { 3171 DCHECK(is_int21(offset)); 3172 return BranchAndLinkShortHelper(offset, nullptr, cond, rs, rt); 3173 } else { 3174 DCHECK_EQ(offset, 0); 3175 return BranchAndLinkShortHelper(0, L, cond, rs, rt); 3176 } 3177} 3178 3179void TurboAssembler::LoadFromConstantsTable(Register destination, 3180 int constant_index) { 3181 DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable)); 3182 LoadRoot(destination, RootIndex::kBuiltinsConstantsTable); 3183 LoadTaggedPointerField( 3184 destination, FieldMemOperand(destination, FixedArray::OffsetOfElementAt( 3185 constant_index))); 3186} 3187 3188void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) { 3189 Ld(destination, MemOperand(kRootRegister, offset)); 3190} 3191 3192void TurboAssembler::LoadRootRegisterOffset(Register destination, 3193 intptr_t offset) { 3194 if (offset == 0) { 3195 Move(destination, kRootRegister); 3196 } else { 3197 Add64(destination, kRootRegister, Operand(offset)); 3198 } 3199} 3200 3201void TurboAssembler::Jump(Register target, Condition cond, Register rs, 3202 const Operand& rt) { 3203 BlockTrampolinePoolScope block_trampoline_pool(this); 3204 if (cond == cc_always) { 3205 jr(target); 3206 ForceConstantPoolEmissionWithoutJump(); 3207 } else { 3208 BRANCH_ARGS_CHECK(cond, rs, rt); 3209 Branch(kInstrSize * 2, NegateCondition(cond), rs, rt); 3210 jr(target); 3211 } 3212} 3213 3214void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode, 3215 Condition cond, Register rs, const Operand& rt) { 3216 Label skip; 3217 if (cond != cc_always) { 3218 Branch(&skip, NegateCondition(cond), rs, rt); 3219 } 3220 { 3221 BlockTrampolinePoolScope block_trampoline_pool(this); 3222 li(t6, Operand(target, rmode)); 3223 Jump(t6, al, zero_reg, Operand(zero_reg)); 3224 EmitConstPoolWithJumpIfNeeded(); 3225 bind(&skip); 3226 } 3227} 3228 3229void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond, 3230 Register rs, const Operand& rt) { 3231 DCHECK(!RelocInfo::IsCodeTarget(rmode)); 3232 Jump(static_cast<intptr_t>(target), rmode, cond, rs, rt); 3233} 3234 3235void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode, 3236 Condition cond, Register rs, const Operand& rt) { 3237 DCHECK(RelocInfo::IsCodeTarget(rmode)); 3238 3239 BlockTrampolinePoolScope block_trampoline_pool(this); 3240 Builtin builtin = Builtin::kNoBuiltinId; 3241 bool target_is_isolate_independent_builtin = 3242 isolate()->builtins()->IsBuiltinHandle(code, &builtin) && 3243 Builtins::IsIsolateIndependent(builtin); 3244 if (target_is_isolate_independent_builtin && 3245 options().use_pc_relative_calls_and_jumps) { 3246 int32_t code_target_index = AddCodeTarget(code); 3247 Label skip; 3248 BlockTrampolinePoolScope block_trampoline_pool(this); 3249 if (cond != al) { 3250 Branch(&skip, NegateCondition(cond), rs, rt); 3251 } 3252 RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET); 3253 GenPCRelativeJump(t6, code_target_index); 3254 bind(&skip); 3255 return; 3256 } else if (root_array_available_ && options().isolate_independent_code && 3257 target_is_isolate_independent_builtin) { 3258 int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize + 3259 IsolateData::builtin_entry_table_offset(); 3260 Ld(t6, MemOperand(kRootRegister, offset)); 3261 Jump(t6, cond, rs, rt); 3262 return; 3263 } else if (options().inline_offheap_trampolines && 3264 target_is_isolate_independent_builtin) { 3265 // Inline the trampoline. 3266 RecordCommentForOffHeapTrampoline(builtin); 3267 li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET)); 3268 Jump(t6, cond, rs, rt); 3269 RecordComment("]"); 3270 return; 3271 } 3272 3273 int32_t target_index = AddCodeTarget(code); 3274 Jump(static_cast<intptr_t>(target_index), rmode, cond, rs, rt); 3275} 3276 3277void TurboAssembler::Jump(const ExternalReference& reference) { 3278 li(t6, reference); 3279 Jump(t6); 3280} 3281 3282// Note: To call gcc-compiled C code on riscv64, you must call through t6. 3283void TurboAssembler::Call(Register target, Condition cond, Register rs, 3284 const Operand& rt) { 3285 BlockTrampolinePoolScope block_trampoline_pool(this); 3286 if (cond == cc_always) { 3287 jalr(ra, target, 0); 3288 } else { 3289 BRANCH_ARGS_CHECK(cond, rs, rt); 3290 Branch(kInstrSize * 2, NegateCondition(cond), rs, rt); 3291 jalr(ra, target, 0); 3292 } 3293} 3294 3295void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit, 3296 unsigned higher_limit, 3297 Label* on_in_range) { 3298 if (lower_limit != 0) { 3299 UseScratchRegisterScope temps(this); 3300 Register scratch = temps.Acquire(); 3301 Sub64(scratch, value, Operand(lower_limit)); 3302 Branch(on_in_range, Uless_equal, scratch, 3303 Operand(higher_limit - lower_limit)); 3304 } else { 3305 Branch(on_in_range, Uless_equal, value, 3306 Operand(higher_limit - lower_limit)); 3307 } 3308} 3309 3310void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond, 3311 Register rs, const Operand& rt) { 3312 li(t6, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD); 3313 Call(t6, cond, rs, rt); 3314} 3315 3316void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode, 3317 Condition cond, Register rs, const Operand& rt) { 3318 Builtin builtin = Builtin::kNoBuiltinId; 3319 bool target_is_isolate_independent_builtin = 3320 isolate()->builtins()->IsBuiltinHandle(code, &builtin) && 3321 Builtins::IsIsolateIndependent(builtin); 3322 if (target_is_isolate_independent_builtin && 3323 options().use_pc_relative_calls_and_jumps) { 3324 int32_t code_target_index = AddCodeTarget(code); 3325 Label skip; 3326 BlockTrampolinePoolScope block_trampoline_pool(this); 3327 RecordCommentForOffHeapTrampoline(builtin); 3328 if (cond != al) { 3329 Branch(&skip, NegateCondition(cond), rs, rt); 3330 } 3331 RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET); 3332 GenPCRelativeJumpAndLink(t6, code_target_index); 3333 bind(&skip); 3334 RecordComment("]"); 3335 return; 3336 } else if (root_array_available_ && options().isolate_independent_code && 3337 target_is_isolate_independent_builtin) { 3338 int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize + 3339 IsolateData::builtin_entry_table_offset(); 3340 LoadRootRelative(t6, offset); 3341 Call(t6, cond, rs, rt); 3342 return; 3343 } else if (options().inline_offheap_trampolines && 3344 target_is_isolate_independent_builtin) { 3345 // Inline the trampoline. 3346 RecordCommentForOffHeapTrampoline(builtin); 3347 li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET)); 3348 Call(t6, cond, rs, rt); 3349 RecordComment("]"); 3350 return; 3351 } 3352 3353 DCHECK(RelocInfo::IsCodeTarget(rmode)); 3354 DCHECK(code->IsExecutable()); 3355 int32_t target_index = AddCodeTarget(code); 3356 Call(static_cast<Address>(target_index), rmode, cond, rs, rt); 3357} 3358 3359void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin) { 3360 STATIC_ASSERT(kSystemPointerSize == 8); 3361 STATIC_ASSERT(kSmiTagSize == 1); 3362 STATIC_ASSERT(kSmiTag == 0); 3363 3364 // The builtin register contains the builtin index as a Smi. 3365 SmiUntag(builtin, builtin); 3366 CalcScaledAddress(builtin, kRootRegister, builtin, kSystemPointerSizeLog2); 3367 Ld(builtin, MemOperand(builtin, IsolateData::builtin_entry_table_offset())); 3368} 3369 3370void TurboAssembler::CallBuiltinByIndex(Register builtin) { 3371 LoadEntryFromBuiltinIndex(builtin); 3372 Call(builtin); 3373} 3374 3375void TurboAssembler::CallBuiltin(Builtin builtin) { 3376 RecordCommentForOffHeapTrampoline(builtin); 3377 if (options().short_builtin_calls) { 3378 Call(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY); 3379 } else { 3380 Call(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET); 3381 } 3382 RecordComment("]"); 3383} 3384 3385void TurboAssembler::TailCallBuiltin(Builtin builtin) { 3386 RecordCommentForOffHeapTrampoline(builtin); 3387 if (options().short_builtin_calls) { 3388 Jump(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY); 3389 } else { 3390 Jump(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET); 3391 } 3392 RecordComment("]"); 3393} 3394 3395void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin, 3396 Register destination) { 3397 Ld(destination, EntryFromBuiltinAsOperand(builtin)); 3398} 3399 3400MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) { 3401 DCHECK(root_array_available()); 3402 return MemOperand(kRootRegister, 3403 IsolateData::BuiltinEntrySlotOffset(builtin)); 3404} 3405 3406void TurboAssembler::PatchAndJump(Address target) { 3407 UseScratchRegisterScope temps(this); 3408 Register scratch = temps.Acquire(); 3409 auipc(scratch, 0); // Load PC into scratch 3410 Ld(t6, MemOperand(scratch, kInstrSize * 4)); 3411 jr(t6); 3412 nop(); // For alignment 3413 DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0); 3414 *reinterpret_cast<uint64_t*>(pc_) = target; // pc_ should be align. 3415 pc_ += sizeof(uint64_t); 3416} 3417 3418void TurboAssembler::StoreReturnAddressAndCall(Register target) { 3419 // This generates the final instruction sequence for calls to C functions 3420 // once an exit frame has been constructed. 3421 // 3422 // Note that this assumes the caller code (i.e. the Code object currently 3423 // being generated) is immovable or that the callee function cannot trigger 3424 // GC, since the callee function will return to it. 3425 // 3426 // Compute the return address in lr to return to after the jump below. The 3427 // pc is already at '+ 8' from the current instruction; but return is after 3428 // three instructions, so add another 4 to pc to get the return address. 3429 // 3430 Assembler::BlockTrampolinePoolScope block_trampoline_pool(this); 3431 int kNumInstructionsToJump = 5; 3432 if (FLAG_riscv_c_extension) kNumInstructionsToJump = 4; 3433 Label find_ra; 3434 // Adjust the value in ra to point to the correct return location, one 3435 // instruction past the real call into C code (the jalr(t6)), and push it. 3436 // This is the return address of the exit frame. 3437 auipc(ra, 0); // Set ra the current PC 3438 bind(&find_ra); 3439 addi(ra, ra, 3440 (kNumInstructionsToJump + 1) * 3441 kInstrSize); // Set ra to insn after the call 3442 3443 // This spot was reserved in EnterExitFrame. 3444 Sd(ra, MemOperand(sp)); 3445 addi(sp, sp, -kCArgsSlotsSize); 3446 // Stack is still aligned. 3447 3448 // Call the C routine. 3449 Mv(t6, 3450 target); // Function pointer to t6 to conform to ABI for PIC. 3451 jalr(t6); 3452 // Make sure the stored 'ra' points to this position. 3453 DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra)); 3454} 3455 3456void TurboAssembler::Ret(Condition cond, Register rs, const Operand& rt) { 3457 Jump(ra, cond, rs, rt); 3458 if (cond == al) { 3459 ForceConstantPoolEmissionWithoutJump(); 3460 } 3461} 3462 3463 3464void TurboAssembler::BranchLong(Label* L) { 3465 // Generate position independent long branch. 3466 BlockTrampolinePoolScope block_trampoline_pool(this); 3467 int64_t imm64; 3468 imm64 = branch_long_offset(L); 3469 GenPCRelativeJump(t6, imm64); 3470 EmitConstPoolWithJumpIfNeeded(); 3471} 3472 3473void TurboAssembler::BranchAndLinkLong(Label* L) { 3474 // Generate position independent long branch and link. 3475 BlockTrampolinePoolScope block_trampoline_pool(this); 3476 int64_t imm64; 3477 imm64 = branch_long_offset(L); 3478 GenPCRelativeJumpAndLink(t6, imm64); 3479} 3480 3481void TurboAssembler::DropAndRet(int drop) { 3482 Add64(sp, sp, drop * kSystemPointerSize); 3483 Ret(); 3484} 3485 3486void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1, 3487 const Operand& r2) { 3488 // Both Drop and Ret need to be conditional. 3489 Label skip; 3490 if (cond != cc_always) { 3491 Branch(&skip, NegateCondition(cond), r1, r2); 3492 } 3493 3494 Drop(drop); 3495 Ret(); 3496 3497 if (cond != cc_always) { 3498 bind(&skip); 3499 } 3500} 3501 3502void TurboAssembler::Drop(int count, Condition cond, Register reg, 3503 const Operand& op) { 3504 if (count <= 0) { 3505 return; 3506 } 3507 3508 Label skip; 3509 3510 if (cond != al) { 3511 Branch(&skip, NegateCondition(cond), reg, op); 3512 } 3513 3514 Add64(sp, sp, Operand(count * kSystemPointerSize)); 3515 3516 if (cond != al) { 3517 bind(&skip); 3518 } 3519} 3520 3521void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) { 3522 if (scratch == no_reg) { 3523 Xor(reg1, reg1, Operand(reg2)); 3524 Xor(reg2, reg2, Operand(reg1)); 3525 Xor(reg1, reg1, Operand(reg2)); 3526 } else { 3527 Mv(scratch, reg1); 3528 Mv(reg1, reg2); 3529 Mv(reg2, scratch); 3530 } 3531} 3532 3533void TurboAssembler::Call(Label* target) { BranchAndLink(target); } 3534 3535void TurboAssembler::LoadAddress(Register dst, Label* target, 3536 RelocInfo::Mode rmode) { 3537 int32_t offset; 3538 if (CalculateOffset(target, &offset, OffsetSize::kOffset32)) { 3539 CHECK(is_int32(offset + 0x800)); 3540 int32_t Hi20 = (((int32_t)offset + 0x800) >> 12); 3541 int32_t Lo12 = (int32_t)offset << 20 >> 20; 3542 BlockTrampolinePoolScope block_trampoline_pool(this); 3543 auipc(dst, Hi20); 3544 addi(dst, dst, Lo12); 3545 } else { 3546 uint64_t address = jump_address(target); 3547 li(dst, Operand(address, rmode), ADDRESS_LOAD); 3548 } 3549} 3550 3551void TurboAssembler::Push(Smi smi) { 3552 UseScratchRegisterScope temps(this); 3553 Register scratch = temps.Acquire(); 3554 li(scratch, Operand(smi)); 3555 push(scratch); 3556} 3557 3558void TurboAssembler::PushArray(Register array, Register size, 3559 PushArrayOrder order) { 3560 UseScratchRegisterScope temps(this); 3561 Register scratch = temps.Acquire(); 3562 Register scratch2 = temps.Acquire(); 3563 Label loop, entry; 3564 if (order == PushArrayOrder::kReverse) { 3565 Mv(scratch, zero_reg); 3566 jmp(&entry); 3567 bind(&loop); 3568 CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2); 3569 Ld(scratch2, MemOperand(scratch2)); 3570 push(scratch2); 3571 Add64(scratch, scratch, Operand(1)); 3572 bind(&entry); 3573 Branch(&loop, less, scratch, Operand(size)); 3574 } else { 3575 Mv(scratch, size); 3576 jmp(&entry); 3577 bind(&loop); 3578 CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2); 3579 Ld(scratch2, MemOperand(scratch2)); 3580 push(scratch2); 3581 bind(&entry); 3582 Add64(scratch, scratch, Operand(-1)); 3583 Branch(&loop, greater_equal, scratch, Operand(zero_reg)); 3584 } 3585} 3586 3587void TurboAssembler::Push(Handle<HeapObject> handle) { 3588 UseScratchRegisterScope temps(this); 3589 Register scratch = temps.Acquire(); 3590 li(scratch, Operand(handle)); 3591 push(scratch); 3592} 3593 3594// --------------------------------------------------------------------------- 3595// Exception handling. 3596 3597void MacroAssembler::PushStackHandler() { 3598 // Adjust this code if not the case. 3599 STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize); 3600 STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize); 3601 3602 Push(Smi::zero()); // Padding. 3603 3604 // Link the current handler as the next handler. 3605 UseScratchRegisterScope temps(this); 3606 Register handler_address = temps.Acquire(); 3607 li(handler_address, 3608 ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate())); 3609 Register handler = temps.Acquire(); 3610 Ld(handler, MemOperand(handler_address)); 3611 push(handler); 3612 3613 // Set this new handler as the current one. 3614 Sd(sp, MemOperand(handler_address)); 3615} 3616 3617void MacroAssembler::PopStackHandler() { 3618 STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0); 3619 pop(a1); 3620 Add64(sp, sp, 3621 Operand(static_cast<int64_t>(StackHandlerConstants::kSize - 3622 kSystemPointerSize))); 3623 UseScratchRegisterScope temps(this); 3624 Register scratch = temps.Acquire(); 3625 li(scratch, 3626 ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate())); 3627 Sd(a1, MemOperand(scratch)); 3628} 3629 3630void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst, 3631 const DoubleRegister src) { 3632 // Subtracting 0.0 preserves all inputs except for signalling NaNs, which 3633 // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0 3634 // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0. 3635 fsub_d(dst, src, kDoubleRegZero); 3636} 3637 3638void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) { 3639 Move(dst, fa0); // Reg fa0 is FP return value. 3640} 3641 3642void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) { 3643 Move(dst, fa0); // Reg fa0 is FP first argument value. 3644} 3645 3646void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(fa0, src); } 3647 3648void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(fa0, src); } 3649 3650void TurboAssembler::MovToFloatParameters(DoubleRegister src1, 3651 DoubleRegister src2) { 3652 const DoubleRegister fparg2 = fa1; 3653 if (src2 == fa0) { 3654 DCHECK(src1 != fparg2); 3655 Move(fparg2, src2); 3656 Move(fa0, src1); 3657 } else { 3658 Move(fa0, src1); 3659 Move(fparg2, src2); 3660 } 3661} 3662 3663// ----------------------------------------------------------------------------- 3664// JavaScript invokes. 3665 3666void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) { 3667 DCHECK(root_array_available()); 3668 Isolate* isolate = this->isolate(); 3669 ExternalReference limit = 3670 kind == StackLimitKind::kRealStackLimit 3671 ? ExternalReference::address_of_real_jslimit(isolate) 3672 : ExternalReference::address_of_jslimit(isolate); 3673 DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit)); 3674 3675 intptr_t offset = 3676 TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit); 3677 CHECK(is_int32(offset)); 3678 Ld(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset))); 3679} 3680 3681void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1, 3682 Register scratch2, 3683 Label* stack_overflow, Label* done) { 3684 // Check the stack for overflow. We are not trying to catch 3685 // interruptions (e.g. debug break and preemption) here, so the "real stack 3686 // limit" is checked. 3687 DCHECK(stack_overflow != nullptr || done != nullptr); 3688 LoadStackLimit(scratch1, StackLimitKind::kRealStackLimit); 3689 // Make scratch1 the space we have left. The stack might already be overflowed 3690 // here which will cause scratch1 to become negative. 3691 Sub64(scratch1, sp, scratch1); 3692 // Check if the arguments will overflow the stack. 3693 Sll64(scratch2, num_args, kSystemPointerSizeLog2); 3694 // Signed comparison. 3695 if (stack_overflow != nullptr) { 3696 Branch(stack_overflow, le, scratch1, Operand(scratch2)); 3697 } else if (done != nullptr) { 3698 Branch(done, gt, scratch1, Operand(scratch2)); 3699 } else { 3700 UNREACHABLE(); 3701 } 3702} 3703 3704void MacroAssembler::InvokePrologue(Register expected_parameter_count, 3705 Register actual_parameter_count, 3706 Label* done, InvokeType type) { 3707 Label regular_invoke; 3708 3709 // a0: actual arguments count 3710 // a1: function (passed through to callee) 3711 // a2: expected arguments count 3712 3713 DCHECK_EQ(actual_parameter_count, a0); 3714 DCHECK_EQ(expected_parameter_count, a2); 3715 3716 // If the expected parameter count is equal to the adaptor sentinel, no need 3717 // to push undefined value as arguments. 3718 if (kDontAdaptArgumentsSentinel != 0) { 3719 Branch(®ular_invoke, eq, expected_parameter_count, 3720 Operand(kDontAdaptArgumentsSentinel)); 3721 } 3722 // If overapplication or if the actual argument count is equal to the 3723 // formal parameter count, no need to push extra undefined values. 3724 Sub64(expected_parameter_count, expected_parameter_count, 3725 actual_parameter_count); 3726 Branch(®ular_invoke, le, expected_parameter_count, Operand(zero_reg)); 3727 3728 Label stack_overflow; 3729 { 3730 UseScratchRegisterScope temps(this); 3731 StackOverflowCheck(expected_parameter_count, temps.Acquire(), 3732 temps.Acquire(), &stack_overflow); 3733 } 3734 // Underapplication. Move the arguments already in the stack, including the 3735 // receiver and the return address. 3736 { 3737 Label copy; 3738 Register src = a6, dest = a7; 3739 Move(src, sp); 3740 Sll64(t0, expected_parameter_count, kSystemPointerSizeLog2); 3741 Sub64(sp, sp, Operand(t0)); 3742 // Update stack pointer. 3743 Move(dest, sp); 3744 Move(t0, actual_parameter_count); 3745 bind(©); 3746 Ld(t1, MemOperand(src, 0)); 3747 Sd(t1, MemOperand(dest, 0)); 3748 Sub64(t0, t0, Operand(1)); 3749 Add64(src, src, Operand(kSystemPointerSize)); 3750 Add64(dest, dest, Operand(kSystemPointerSize)); 3751 Branch(©, gt, t0, Operand(zero_reg)); 3752 } 3753 3754 // Fill remaining expected arguments with undefined values. 3755 LoadRoot(t0, RootIndex::kUndefinedValue); 3756 { 3757 Label loop; 3758 bind(&loop); 3759 Sd(t0, MemOperand(a7, 0)); 3760 Sub64(expected_parameter_count, expected_parameter_count, Operand(1)); 3761 Add64(a7, a7, Operand(kSystemPointerSize)); 3762 Branch(&loop, gt, expected_parameter_count, Operand(zero_reg)); 3763 } 3764 Branch(®ular_invoke); 3765 3766 bind(&stack_overflow); 3767 { 3768 FrameScope frame( 3769 this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL); 3770 CallRuntime(Runtime::kThrowStackOverflow); 3771 break_(0xCC); 3772 } 3773 bind(®ular_invoke); 3774} 3775 3776void MacroAssembler::CheckDebugHook(Register fun, Register new_target, 3777 Register expected_parameter_count, 3778 Register actual_parameter_count) { 3779 Label skip_hook; 3780 { 3781 UseScratchRegisterScope temps(this); 3782 Register scratch = temps.Acquire(); 3783 li(scratch, 3784 ExternalReference::debug_hook_on_function_call_address(isolate())); 3785 Lb(scratch, MemOperand(scratch)); 3786 Branch(&skip_hook, eq, scratch, Operand(zero_reg)); 3787 } 3788 { 3789 // Load receiver to pass it later to DebugOnFunctionCall hook. 3790 UseScratchRegisterScope temps(this); 3791 Register receiver = temps.Acquire(); 3792 LoadReceiver(receiver, actual_parameter_count); 3793 3794 FrameScope frame( 3795 this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL); 3796 SmiTag(expected_parameter_count); 3797 Push(expected_parameter_count); 3798 3799 SmiTag(actual_parameter_count); 3800 Push(actual_parameter_count); 3801 3802 if (new_target.is_valid()) { 3803 Push(new_target); 3804 } 3805 Push(fun); 3806 Push(fun); 3807 Push(receiver); 3808 CallRuntime(Runtime::kDebugOnFunctionCall); 3809 Pop(fun); 3810 if (new_target.is_valid()) { 3811 Pop(new_target); 3812 } 3813 3814 Pop(actual_parameter_count); 3815 SmiUntag(actual_parameter_count); 3816 3817 Pop(expected_parameter_count); 3818 SmiUntag(expected_parameter_count); 3819 } 3820 bind(&skip_hook); 3821} 3822 3823void MacroAssembler::InvokeFunctionCode(Register function, Register new_target, 3824 Register expected_parameter_count, 3825 Register actual_parameter_count, 3826 InvokeType type) { 3827 // You can't call a function without a valid frame. 3828 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame()); 3829 DCHECK_EQ(function, a1); 3830 DCHECK_IMPLIES(new_target.is_valid(), new_target == a3); 3831 3832 // On function call, call into the debugger if necessary. 3833 CheckDebugHook(function, new_target, expected_parameter_count, 3834 actual_parameter_count); 3835 3836 // Clear the new.target register if not given. 3837 if (!new_target.is_valid()) { 3838 LoadRoot(a3, RootIndex::kUndefinedValue); 3839 } 3840 3841 Label done; 3842 InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type); 3843 // We call indirectly through the code field in the function to 3844 // allow recompilation to take effect without changing any of the 3845 // call sites. 3846 Register code = kJavaScriptCallCodeStartRegister; 3847 LoadTaggedPointerField(code, 3848 FieldMemOperand(function, JSFunction::kCodeOffset)); 3849 switch (type) { 3850 case InvokeType::kCall: 3851 CallCodeObject(code); 3852 break; 3853 case InvokeType::kJump: 3854 JumpCodeObject(code); 3855 break; 3856 } 3857 3858 // Continue here if InvokePrologue does handle the invocation due to 3859 // mismatched parameter counts. 3860 bind(&done); 3861} 3862 3863void MacroAssembler::InvokeFunctionWithNewTarget( 3864 Register function, Register new_target, Register actual_parameter_count, 3865 InvokeType type) { 3866 // You can't call a function without a valid frame. 3867 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame()); 3868 3869 // Contract with called JS functions requires that function is passed in a1. 3870 DCHECK_EQ(function, a1); 3871 Register expected_parameter_count = a2; 3872 { 3873 UseScratchRegisterScope temps(this); 3874 Register temp_reg = temps.Acquire(); 3875 LoadTaggedPointerField( 3876 temp_reg, 3877 FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset)); 3878 LoadTaggedPointerField( 3879 cp, FieldMemOperand(function, JSFunction::kContextOffset)); 3880 // The argument count is stored as uint16_t 3881 Lhu(expected_parameter_count, 3882 FieldMemOperand(temp_reg, 3883 SharedFunctionInfo::kFormalParameterCountOffset)); 3884 } 3885 InvokeFunctionCode(function, new_target, expected_parameter_count, 3886 actual_parameter_count, type); 3887} 3888 3889void MacroAssembler::InvokeFunction(Register function, 3890 Register expected_parameter_count, 3891 Register actual_parameter_count, 3892 InvokeType type) { 3893 // You can't call a function without a valid frame. 3894 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame()); 3895 3896 // Contract with called JS functions requires that function is passed in a1. 3897 DCHECK_EQ(function, a1); 3898 3899 // Get the function and setup the context. 3900 LoadTaggedPointerField(cp, FieldMemOperand(a1, JSFunction::kContextOffset)); 3901 3902 InvokeFunctionCode(a1, no_reg, expected_parameter_count, 3903 actual_parameter_count, type); 3904} 3905 3906// --------------------------------------------------------------------------- 3907// Support functions. 3908 3909void MacroAssembler::GetObjectType(Register object, Register map, 3910 Register type_reg) { 3911 LoadMap(map, object); 3912 Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset)); 3913} 3914 3915void MacroAssembler::GetInstanceTypeRange(Register map, Register type_reg, 3916 InstanceType lower_limit, 3917 Register range) { 3918 Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset)); 3919 Sub64(range, type_reg, Operand(lower_limit)); 3920} 3921//------------------------------------------------------------------------------ 3922// Wasm 3923void TurboAssembler::WasmRvvEq(VRegister dst, VRegister lhs, VRegister rhs, 3924 VSew sew, Vlmul lmul) { 3925 VU.set(kScratchReg, sew, lmul); 3926 vmseq_vv(v0, lhs, rhs); 3927 li(kScratchReg, -1); 3928 vmv_vx(dst, zero_reg); 3929 vmerge_vx(dst, kScratchReg, dst); 3930} 3931 3932void TurboAssembler::WasmRvvNe(VRegister dst, VRegister lhs, VRegister rhs, 3933 VSew sew, Vlmul lmul) { 3934 VU.set(kScratchReg, sew, lmul); 3935 vmsne_vv(v0, lhs, rhs); 3936 li(kScratchReg, -1); 3937 vmv_vx(dst, zero_reg); 3938 vmerge_vx(dst, kScratchReg, dst); 3939} 3940 3941void TurboAssembler::WasmRvvGeS(VRegister dst, VRegister lhs, VRegister rhs, 3942 VSew sew, Vlmul lmul) { 3943 VU.set(kScratchReg, sew, lmul); 3944 vmsle_vv(v0, rhs, lhs); 3945 li(kScratchReg, -1); 3946 vmv_vx(dst, zero_reg); 3947 vmerge_vx(dst, kScratchReg, dst); 3948} 3949 3950void TurboAssembler::WasmRvvGeU(VRegister dst, VRegister lhs, VRegister rhs, 3951 VSew sew, Vlmul lmul) { 3952 VU.set(kScratchReg, sew, lmul); 3953 vmsleu_vv(v0, rhs, lhs); 3954 li(kScratchReg, -1); 3955 vmv_vx(dst, zero_reg); 3956 vmerge_vx(dst, kScratchReg, dst); 3957} 3958 3959void TurboAssembler::WasmRvvGtS(VRegister dst, VRegister lhs, VRegister rhs, 3960 VSew sew, Vlmul lmul) { 3961 VU.set(kScratchReg, sew, lmul); 3962 vmslt_vv(v0, rhs, lhs); 3963 li(kScratchReg, -1); 3964 vmv_vx(dst, zero_reg); 3965 vmerge_vx(dst, kScratchReg, dst); 3966} 3967 3968void TurboAssembler::WasmRvvGtU(VRegister dst, VRegister lhs, VRegister rhs, 3969 VSew sew, Vlmul lmul) { 3970 VU.set(kScratchReg, sew, lmul); 3971 vmsltu_vv(v0, rhs, lhs); 3972 li(kScratchReg, -1); 3973 vmv_vx(dst, zero_reg); 3974 vmerge_vx(dst, kScratchReg, dst); 3975} 3976 3977void TurboAssembler::WasmRvvS128const(VRegister dst, const uint8_t imms[16]) { 3978 uint64_t imm1 = *(reinterpret_cast<const uint64_t*>(imms)); 3979 uint64_t imm2 = *((reinterpret_cast<const uint64_t*>(imms)) + 1); 3980 VU.set(kScratchReg, VSew::E64, Vlmul::m1); 3981 li(kScratchReg, 1); 3982 vmv_vx(v0, kScratchReg); 3983 li(kScratchReg, imm1); 3984 vmerge_vx(dst, kScratchReg, dst); 3985 li(kScratchReg, imm2); 3986 vsll_vi(v0, v0, 1); 3987 vmerge_vx(dst, kScratchReg, dst); 3988} 3989 3990void TurboAssembler::LoadLane(int ts, VRegister dst, uint8_t laneidx, 3991 MemOperand src) { 3992 if (ts == 8) { 3993 Lbu(kScratchReg2, src); 3994 VU.set(kScratchReg, E64, m1); 3995 li(kScratchReg, 0x1 << laneidx); 3996 vmv_sx(v0, kScratchReg); 3997 VU.set(kScratchReg, E8, m1); 3998 vmerge_vx(dst, kScratchReg2, dst); 3999 } else if (ts == 16) { 4000 Lhu(kScratchReg2, src); 4001 VU.set(kScratchReg, E16, m1); 4002 li(kScratchReg, 0x1 << laneidx); 4003 vmv_sx(v0, kScratchReg); 4004 vmerge_vx(dst, kScratchReg2, dst); 4005 } else if (ts == 32) { 4006 Lwu(kScratchReg2, src); 4007 VU.set(kScratchReg, E32, m1); 4008 li(kScratchReg, 0x1 << laneidx); 4009 vmv_sx(v0, kScratchReg); 4010 vmerge_vx(dst, kScratchReg2, dst); 4011 } else if (ts == 64) { 4012 Ld(kScratchReg2, src); 4013 VU.set(kScratchReg, E64, m1); 4014 li(kScratchReg, 0x1 << laneidx); 4015 vmv_sx(v0, kScratchReg); 4016 vmerge_vx(dst, kScratchReg2, dst); 4017 } else { 4018 UNREACHABLE(); 4019 } 4020} 4021 4022void TurboAssembler::StoreLane(int sz, VRegister src, uint8_t laneidx, 4023 MemOperand dst) { 4024 if (sz == 8) { 4025 VU.set(kScratchReg, E8, m1); 4026 vslidedown_vi(kSimd128ScratchReg, src, laneidx); 4027 vmv_xs(kScratchReg, kSimd128ScratchReg); 4028 Sb(kScratchReg, dst); 4029 } else if (sz == 16) { 4030 VU.set(kScratchReg, E16, m1); 4031 vslidedown_vi(kSimd128ScratchReg, src, laneidx); 4032 vmv_xs(kScratchReg, kSimd128ScratchReg); 4033 Sh(kScratchReg, dst); 4034 } else if (sz == 32) { 4035 VU.set(kScratchReg, E32, m1); 4036 vslidedown_vi(kSimd128ScratchReg, src, laneidx); 4037 vmv_xs(kScratchReg, kSimd128ScratchReg); 4038 Sw(kScratchReg, dst); 4039 } else { 4040 DCHECK_EQ(sz, 64); 4041 VU.set(kScratchReg, E64, m1); 4042 vslidedown_vi(kSimd128ScratchReg, src, laneidx); 4043 vmv_xs(kScratchReg, kSimd128ScratchReg); 4044 Sd(kScratchReg, dst); 4045 } 4046} 4047// ----------------------------------------------------------------------------- 4048// Runtime calls. 4049 4050void TurboAssembler::AddOverflow64(Register dst, Register left, 4051 const Operand& right, Register overflow) { 4052 UseScratchRegisterScope temps(this); 4053 BlockTrampolinePoolScope block_trampoline_pool(this); 4054 Register right_reg = no_reg; 4055 Register scratch = temps.Acquire(); 4056 Register scratch2 = temps.Acquire(); 4057 if (!right.is_reg()) { 4058 li(scratch, Operand(right)); 4059 right_reg = scratch; 4060 } else { 4061 right_reg = right.rm(); 4062 } 4063 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 && 4064 overflow != scratch2); 4065 DCHECK(overflow != left && overflow != right_reg); 4066 if (dst == left || dst == right_reg) { 4067 add(scratch2, left, right_reg); 4068 xor_(overflow, scratch2, left); 4069 xor_(scratch, scratch2, right_reg); 4070 and_(overflow, overflow, scratch); 4071 Mv(dst, scratch2); 4072 } else { 4073 add(dst, left, right_reg); 4074 xor_(overflow, dst, left); 4075 xor_(scratch, dst, right_reg); 4076 and_(overflow, overflow, scratch); 4077 } 4078} 4079 4080void TurboAssembler::SubOverflow64(Register dst, Register left, 4081 const Operand& right, Register overflow) { 4082 UseScratchRegisterScope temps(this); 4083 BlockTrampolinePoolScope block_trampoline_pool(this); 4084 Register right_reg = no_reg; 4085 Register scratch = temps.Acquire(); 4086 Register scratch2 = temps.Acquire(); 4087 if (!right.is_reg()) { 4088 li(scratch, Operand(right)); 4089 right_reg = scratch; 4090 } else { 4091 right_reg = right.rm(); 4092 } 4093 4094 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 && 4095 overflow != scratch2); 4096 DCHECK(overflow != left && overflow != right_reg); 4097 4098 if (dst == left || dst == right_reg) { 4099 sub(scratch2, left, right_reg); 4100 xor_(overflow, left, scratch2); 4101 xor_(scratch, left, right_reg); 4102 and_(overflow, overflow, scratch); 4103 Mv(dst, scratch2); 4104 } else { 4105 sub(dst, left, right_reg); 4106 xor_(overflow, left, dst); 4107 xor_(scratch, left, right_reg); 4108 and_(overflow, overflow, scratch); 4109 } 4110} 4111 4112void TurboAssembler::MulOverflow32(Register dst, Register left, 4113 const Operand& right, Register overflow) { 4114 ASM_CODE_COMMENT(this); 4115 UseScratchRegisterScope temps(this); 4116 BlockTrampolinePoolScope block_trampoline_pool(this); 4117 Register right_reg = no_reg; 4118 Register scratch = temps.Acquire(); 4119 Register scratch2 = temps.Acquire(); 4120 if (!right.is_reg()) { 4121 li(scratch, Operand(right)); 4122 right_reg = scratch; 4123 } else { 4124 right_reg = right.rm(); 4125 } 4126 4127 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 && 4128 overflow != scratch2); 4129 DCHECK(overflow != left && overflow != right_reg); 4130 sext_w(overflow, left); 4131 sext_w(scratch2, right_reg); 4132 4133 mul(overflow, overflow, scratch2); 4134 sext_w(dst, overflow); 4135 xor_(overflow, overflow, dst); 4136} 4137 4138void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments, 4139 SaveFPRegsMode save_doubles) { 4140 ASM_CODE_COMMENT(this); 4141 // All parameters are on the stack. a0 has the return value after call. 4142 4143 // If the expected number of arguments of the runtime function is 4144 // constant, we check that the actual number of arguments match the 4145 // expectation. 4146 CHECK(f->nargs < 0 || f->nargs == num_arguments); 4147 4148 // TODO(1236192): Most runtime routines don't need the number of 4149 // arguments passed in because it is constant. At some point we 4150 // should remove this need and make the runtime routine entry code 4151 // smarter. 4152 PrepareCEntryArgs(num_arguments); 4153 PrepareCEntryFunction(ExternalReference::Create(f)); 4154 Handle<Code> code = 4155 CodeFactory::CEntry(isolate(), f->result_size, save_doubles); 4156 Call(code, RelocInfo::CODE_TARGET); 4157} 4158 4159void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) { 4160 ASM_CODE_COMMENT(this); 4161 const Runtime::Function* function = Runtime::FunctionForId(fid); 4162 DCHECK_EQ(1, function->result_size); 4163 if (function->nargs >= 0) { 4164 PrepareCEntryArgs(function->nargs); 4165 } 4166 JumpToExternalReference(ExternalReference::Create(fid)); 4167} 4168 4169void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin, 4170 bool builtin_exit_frame) { 4171 ASM_CODE_COMMENT(this); 4172 PrepareCEntryFunction(builtin); 4173 Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore, 4174 ArgvMode::kStack, builtin_exit_frame); 4175 Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg)); 4176} 4177 4178void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) { 4179 // Ld a Address from a constant pool. 4180 // Record a value into constant pool. 4181 ASM_CODE_COMMENT(this); 4182 if (!FLAG_riscv_constant_pool) { 4183 li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); 4184 } else { 4185 RecordEntry(entry, RelocInfo::OFF_HEAP_TARGET); 4186 RecordRelocInfo(RelocInfo::OFF_HEAP_TARGET, entry); 4187 auipc(kOffHeapTrampolineRegister, 0); 4188 ld(kOffHeapTrampolineRegister, kOffHeapTrampolineRegister, 0); 4189 } 4190 Jump(kOffHeapTrampolineRegister); 4191} 4192 4193void MacroAssembler::LoadWeakValue(Register out, Register in, 4194 Label* target_if_cleared) { 4195 ASM_CODE_COMMENT(this); 4196 Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32)); 4197 And(out, in, Operand(~kWeakHeapObjectMask)); 4198} 4199 4200void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value, 4201 Register scratch1, 4202 Register scratch2) { 4203 DCHECK_GT(value, 0); 4204 if (FLAG_native_code_counters && counter->Enabled()) { 4205 ASM_CODE_COMMENT(this); 4206 // This operation has to be exactly 32-bit wide in case the external 4207 // reference table redirects the counter to a uint32_t 4208 // dummy_stats_counter_ field. 4209 li(scratch2, ExternalReference::Create(counter)); 4210 Lw(scratch1, MemOperand(scratch2)); 4211 Add32(scratch1, scratch1, Operand(value)); 4212 Sw(scratch1, MemOperand(scratch2)); 4213 } 4214} 4215 4216void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value, 4217 Register scratch1, 4218 Register scratch2) { 4219 DCHECK_GT(value, 0); 4220 if (FLAG_native_code_counters && counter->Enabled()) { 4221 ASM_CODE_COMMENT(this); 4222 // This operation has to be exactly 32-bit wide in case the external 4223 // reference table redirects the counter to a uint32_t 4224 // dummy_stats_counter_ field. 4225 li(scratch2, ExternalReference::Create(counter)); 4226 Lw(scratch1, MemOperand(scratch2)); 4227 Sub32(scratch1, scratch1, Operand(value)); 4228 Sw(scratch1, MemOperand(scratch2)); 4229 } 4230} 4231 4232// ----------------------------------------------------------------------------- 4233// Debugging. 4234 4235void TurboAssembler::Trap() { stop(); } 4236void TurboAssembler::DebugBreak() { stop(); } 4237 4238void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs, 4239 Operand rt) { 4240 if (FLAG_debug_code) Check(cc, reason, rs, rt); 4241} 4242 4243void TurboAssembler::Check(Condition cc, AbortReason reason, Register rs, 4244 Operand rt) { 4245 Label L; 4246 BranchShort(&L, cc, rs, rt); 4247 Abort(reason); 4248 // Will not return here. 4249 bind(&L); 4250} 4251 4252void TurboAssembler::Abort(AbortReason reason) { 4253 Label abort_start; 4254 bind(&abort_start); 4255 if (FLAG_code_comments) { 4256 const char* msg = GetAbortReason(reason); 4257 RecordComment("Abort message: "); 4258 RecordComment(msg); 4259 } 4260 4261 // Avoid emitting call to builtin if requested. 4262 if (trap_on_abort()) { 4263 ebreak(); 4264 return; 4265 } 4266 4267 if (should_abort_hard()) { 4268 // We don't care if we constructed a frame. Just pretend we did. 4269 FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE); 4270 PrepareCallCFunction(0, a0); 4271 li(a0, Operand(static_cast<int64_t>(reason))); 4272 CallCFunction(ExternalReference::abort_with_reason(), 1); 4273 return; 4274 } 4275 4276 Move(a0, Smi::FromInt(static_cast<int>(reason))); 4277 4278 // Disable stub call restrictions to always allow calls to abort. 4279 if (!has_frame()) { 4280 // We don't actually want to generate a pile of code for this, so just 4281 // claim there is a stack frame, without generating one. 4282 FrameScope scope(this, StackFrame::NO_FRAME_TYPE); 4283 Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET); 4284 } else { 4285 Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET); 4286 } 4287 // Will not return here. 4288 if (is_trampoline_pool_blocked()) { 4289 // If the calling code cares about the exact number of 4290 // instructions generated, we insert padding here to keep the size 4291 // of the Abort macro constant. 4292 // Currently in debug mode with debug_code enabled the number of 4293 // generated instructions is 10, so we use this as a maximum value. 4294 static const int kExpectedAbortInstructions = 10; 4295 int abort_instructions = InstructionsGeneratedSince(&abort_start); 4296 DCHECK_LE(abort_instructions, kExpectedAbortInstructions); 4297 while (abort_instructions++ < kExpectedAbortInstructions) { 4298 nop(); 4299 } 4300 } 4301} 4302 4303void TurboAssembler::LoadMap(Register destination, Register object) { 4304 ASM_CODE_COMMENT(this); 4305 LoadTaggedPointerField(destination, 4306 FieldMemOperand(object, HeapObject::kMapOffset)); 4307} 4308 4309void MacroAssembler::LoadNativeContextSlot(Register dst, int index) { 4310 ASM_CODE_COMMENT(this); 4311 LoadMap(dst, cp); 4312 LoadTaggedPointerField( 4313 dst, FieldMemOperand( 4314 dst, Map::kConstructorOrBackPointerOrNativeContextOffset)); 4315 LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index))); 4316} 4317 4318void TurboAssembler::StubPrologue(StackFrame::Type type) { 4319 ASM_CODE_COMMENT(this); 4320 UseScratchRegisterScope temps(this); 4321 Register scratch = temps.Acquire(); 4322 li(scratch, Operand(StackFrame::TypeToMarker(type))); 4323 PushCommonFrame(scratch); 4324} 4325 4326void TurboAssembler::Prologue() { PushStandardFrame(a1); } 4327 4328void TurboAssembler::EnterFrame(StackFrame::Type type) { 4329 ASM_CODE_COMMENT(this); 4330 UseScratchRegisterScope temps(this); 4331 Register scratch = temps.Acquire(); 4332 BlockTrampolinePoolScope block_trampoline_pool(this); 4333 Push(ra, fp); 4334 Move(fp, sp); 4335 if (!StackFrame::IsJavaScript(type)) { 4336 li(scratch, Operand(StackFrame::TypeToMarker(type))); 4337 Push(scratch); 4338 } 4339#if V8_ENABLE_WEBASSEMBLY 4340 if (type == StackFrame::WASM) Push(kWasmInstanceRegister); 4341#endif // V8_ENABLE_WEBASSEMBLY 4342} 4343 4344void TurboAssembler::LeaveFrame(StackFrame::Type type) { 4345 ASM_CODE_COMMENT(this); 4346 addi(sp, fp, 2 * kSystemPointerSize); 4347 Ld(ra, MemOperand(fp, 1 * kSystemPointerSize)); 4348 Ld(fp, MemOperand(fp, 0 * kSystemPointerSize)); 4349} 4350 4351void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space, 4352 StackFrame::Type frame_type) { 4353 ASM_CODE_COMMENT(this); 4354 DCHECK(frame_type == StackFrame::EXIT || 4355 frame_type == StackFrame::BUILTIN_EXIT); 4356 4357 // Set up the frame structure on the stack. 4358 STATIC_ASSERT(2 * kSystemPointerSize == 4359 ExitFrameConstants::kCallerSPDisplacement); 4360 STATIC_ASSERT(1 * kSystemPointerSize == ExitFrameConstants::kCallerPCOffset); 4361 STATIC_ASSERT(0 * kSystemPointerSize == ExitFrameConstants::kCallerFPOffset); 4362 4363 // This is how the stack will look: 4364 // fp + 2 (==kCallerSPDisplacement) - old stack's end 4365 // [fp + 1 (==kCallerPCOffset)] - saved old ra 4366 // [fp + 0 (==kCallerFPOffset)] - saved old fp 4367 // [fp - 1 StackFrame::EXIT Smi 4368 // [fp - 2 (==kSPOffset)] - sp of the called function 4369 // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the 4370 // new stack (will contain saved ra) 4371 4372 // Save registers and reserve room for saved entry sp. 4373 addi(sp, sp, 4374 -2 * kSystemPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp); 4375 Sd(ra, MemOperand(sp, 3 * kSystemPointerSize)); 4376 Sd(fp, MemOperand(sp, 2 * kSystemPointerSize)); 4377 { 4378 UseScratchRegisterScope temps(this); 4379 Register scratch = temps.Acquire(); 4380 li(scratch, Operand(StackFrame::TypeToMarker(frame_type))); 4381 Sd(scratch, MemOperand(sp, 1 * kSystemPointerSize)); 4382 } 4383 // Set up new frame pointer. 4384 addi(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp); 4385 4386 if (FLAG_debug_code) { 4387 Sd(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset)); 4388 } 4389 4390 { 4391 UseScratchRegisterScope temps(this); 4392 Register scratch = temps.Acquire(); 4393 BlockTrampolinePoolScope block_trampoline_pool(this); 4394 // Save the frame pointer and the context in top. 4395 li(scratch, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, 4396 isolate())); 4397 Sd(fp, MemOperand(scratch)); 4398 li(scratch, 4399 ExternalReference::Create(IsolateAddressId::kContextAddress, isolate())); 4400 Sd(cp, MemOperand(scratch)); 4401 } 4402 4403 const int frame_alignment = MacroAssembler::ActivationFrameAlignment(); 4404 if (save_doubles) { 4405 // The stack is already aligned to 0 modulo 8 for stores with sdc1. 4406 int space = kNumCallerSavedFPU * kDoubleSize; 4407 Sub64(sp, sp, Operand(space)); 4408 int count = 0; 4409 for (int i = 0; i < kNumFPURegisters; i++) { 4410 if (kCallerSavedFPU.bits() & (1 << i)) { 4411 FPURegister reg = FPURegister::from_code(i); 4412 StoreDouble(reg, MemOperand(sp, count * kDoubleSize)); 4413 count++; 4414 } 4415 } 4416 } 4417 4418 // Reserve place for the return address, stack space and an optional slot 4419 // (used by DirectCEntry to hold the return value if a struct is 4420 // returned) and align the frame preparing for calling the runtime function. 4421 DCHECK_GE(stack_space, 0); 4422 Sub64(sp, sp, Operand((stack_space + 2) * kSystemPointerSize)); 4423 if (frame_alignment > 0) { 4424 DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); 4425 And(sp, sp, Operand(-frame_alignment)); // Align stack. 4426 } 4427 4428 // Set the exit frame sp value to point just before the return address 4429 // location. 4430 UseScratchRegisterScope temps(this); 4431 Register scratch = temps.Acquire(); 4432 addi(scratch, sp, kSystemPointerSize); 4433 Sd(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset)); 4434} 4435 4436void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count, 4437 bool do_return, 4438 bool argument_count_is_length) { 4439 ASM_CODE_COMMENT(this); 4440 UseScratchRegisterScope temps(this); 4441 Register scratch = temps.Acquire(); 4442 BlockTrampolinePoolScope block_trampoline_pool(this); 4443 // Optionally restore all double registers. 4444 if (save_doubles) { 4445 // Remember: we only need to restore kCallerSavedFPU. 4446 Sub64(scratch, fp, 4447 Operand(ExitFrameConstants::kFixedFrameSizeFromFp + 4448 kNumCallerSavedFPU * kDoubleSize)); 4449 int cout = 0; 4450 for (int i = 0; i < kNumFPURegisters; i++) { 4451 if (kCalleeSavedFPU.bits() & (1 << i)) { 4452 FPURegister reg = FPURegister::from_code(i); 4453 LoadDouble(reg, MemOperand(scratch, cout * kDoubleSize)); 4454 cout++; 4455 } 4456 } 4457 } 4458 4459 // Clear top frame. 4460 li(scratch, 4461 ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate())); 4462 Sd(zero_reg, MemOperand(scratch)); 4463 4464 // Restore current context from top and clear it in debug mode. 4465 li(scratch, 4466 ExternalReference::Create(IsolateAddressId::kContextAddress, isolate())); 4467 Ld(cp, MemOperand(scratch)); 4468 4469 if (FLAG_debug_code) { 4470 UseScratchRegisterScope temp(this); 4471 Register scratch2 = temp.Acquire(); 4472 li(scratch2, Operand(Context::kInvalidContext)); 4473 Sd(scratch2, MemOperand(scratch)); 4474 } 4475 4476 // Pop the arguments, restore registers, and return. 4477 Mv(sp, fp); // Respect ABI stack constraint. 4478 Ld(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset)); 4479 Ld(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset)); 4480 4481 if (argument_count.is_valid()) { 4482 if (argument_count_is_length) { 4483 add(sp, sp, argument_count); 4484 } else { 4485 CalcScaledAddress(sp, sp, argument_count, kSystemPointerSizeLog2); 4486 } 4487 } 4488 4489 addi(sp, sp, 2 * kSystemPointerSize); 4490 4491 if (do_return) { 4492 Ret(); 4493 } 4494} 4495 4496int TurboAssembler::ActivationFrameAlignment() { 4497#if V8_HOST_ARCH_RISCV64 4498 // Running on the real platform. Use the alignment as mandated by the local 4499 // environment. 4500 // Note: This will break if we ever start generating snapshots on one RISC-V 4501 // platform for another RISC-V platform with a different alignment. 4502 return base::OS::ActivationFrameAlignment(); 4503#else // V8_HOST_ARCH_RISCV64 4504 // If we are using the simulator then we should always align to the expected 4505 // alignment. As the simulator is used to generate snapshots we do not know 4506 // if the target platform will need alignment, so this is controlled from a 4507 // flag. 4508 return FLAG_sim_stack_alignment; 4509#endif // V8_HOST_ARCH_RISCV64 4510} 4511 4512void MacroAssembler::AssertStackIsAligned() { 4513 if (FLAG_debug_code) { 4514 ASM_CODE_COMMENT(this); 4515 const int frame_alignment = ActivationFrameAlignment(); 4516 const int frame_alignment_mask = frame_alignment - 1; 4517 4518 if (frame_alignment > kSystemPointerSize) { 4519 Label alignment_as_expected; 4520 DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); 4521 { 4522 UseScratchRegisterScope temps(this); 4523 Register scratch = temps.Acquire(); 4524 andi(scratch, sp, frame_alignment_mask); 4525 BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg)); 4526 } 4527 // Don't use Check here, as it will call Runtime_Abort re-entering here. 4528 ebreak(); 4529 bind(&alignment_as_expected); 4530 } 4531 } 4532} 4533 4534void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) { 4535 ASM_CODE_COMMENT(this); 4536 if (SmiValuesAre32Bits()) { 4537 Lw(dst, MemOperand(src.rm(), SmiWordOffset(src.offset()))); 4538 } else { 4539 DCHECK(SmiValuesAre31Bits()); 4540 if (COMPRESS_POINTERS_BOOL) { 4541 Lw(dst, src); 4542 } else { 4543 Ld(dst, src); 4544 } 4545 SmiUntag(dst); 4546 } 4547} 4548 4549void TurboAssembler::SmiToInt32(Register smi) { 4550 ASM_CODE_COMMENT(this); 4551 if (FLAG_enable_slow_asserts) { 4552 AssertSmi(smi); 4553 } 4554 DCHECK(SmiValuesAre32Bits() || SmiValuesAre31Bits()); 4555 SmiUntag(smi); 4556} 4557 4558void TurboAssembler::JumpIfSmi(Register value, Label* smi_label) { 4559 ASM_CODE_COMMENT(this); 4560 DCHECK_EQ(0, kSmiTag); 4561 UseScratchRegisterScope temps(this); 4562 Register scratch = temps.Acquire(); 4563 andi(scratch, value, kSmiTagMask); 4564 Branch(smi_label, eq, scratch, Operand(zero_reg)); 4565} 4566 4567void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label) { 4568 ASM_CODE_COMMENT(this); 4569 UseScratchRegisterScope temps(this); 4570 Register scratch = temps.Acquire(); 4571 DCHECK_EQ(0, kSmiTag); 4572 andi(scratch, value, kSmiTagMask); 4573 Branch(not_smi_label, ne, scratch, Operand(zero_reg)); 4574} 4575 4576void TurboAssembler::AssertNotSmi(Register object, AbortReason reason) { 4577 if (FLAG_debug_code) { 4578 ASM_CODE_COMMENT(this); 4579 STATIC_ASSERT(kSmiTag == 0); 4580 DCHECK(object != kScratchReg); 4581 andi(kScratchReg, object, kSmiTagMask); 4582 Check(ne, reason, kScratchReg, Operand(zero_reg)); 4583 } 4584} 4585 4586void TurboAssembler::AssertSmi(Register object, AbortReason reason) { 4587 if (FLAG_debug_code) { 4588 ASM_CODE_COMMENT(this); 4589 STATIC_ASSERT(kSmiTag == 0); 4590 DCHECK(object != kScratchReg); 4591 andi(kScratchReg, object, kSmiTagMask); 4592 Check(eq, reason, kScratchReg, Operand(zero_reg)); 4593 } 4594} 4595 4596void MacroAssembler::AssertConstructor(Register object) { 4597 if (FLAG_debug_code) { 4598 ASM_CODE_COMMENT(this); 4599 DCHECK(object != kScratchReg); 4600 BlockTrampolinePoolScope block_trampoline_pool(this); 4601 STATIC_ASSERT(kSmiTag == 0); 4602 SmiTst(object, kScratchReg); 4603 Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, kScratchReg, 4604 Operand(zero_reg)); 4605 4606 LoadMap(kScratchReg, object); 4607 Lbu(kScratchReg, FieldMemOperand(kScratchReg, Map::kBitFieldOffset)); 4608 And(kScratchReg, kScratchReg, Operand(Map::Bits1::IsConstructorBit::kMask)); 4609 Check(ne, AbortReason::kOperandIsNotAConstructor, kScratchReg, 4610 Operand(zero_reg)); 4611 } 4612} 4613 4614void MacroAssembler::AssertFunction(Register object) { 4615 if (FLAG_debug_code) { 4616 ASM_CODE_COMMENT(this); 4617 BlockTrampolinePoolScope block_trampoline_pool(this); 4618 STATIC_ASSERT(kSmiTag == 0); 4619 DCHECK(object != kScratchReg); 4620 SmiTst(object, kScratchReg); 4621 Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, kScratchReg, 4622 Operand(zero_reg)); 4623 push(object); 4624 LoadMap(object, object); 4625 UseScratchRegisterScope temps(this); 4626 Register range = temps.Acquire(); 4627 GetInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE, range); 4628 Check(Uless_equal, AbortReason::kOperandIsNotAFunction, range, 4629 Operand(LAST_JS_FUNCTION_TYPE - FIRST_JS_FUNCTION_TYPE)); 4630 pop(object); 4631 } 4632} 4633 4634void MacroAssembler::AssertCallableFunction(Register object) { 4635 if (!FLAG_debug_code) return; 4636 ASM_CODE_COMMENT(this); 4637 STATIC_ASSERT(kSmiTag == 0); 4638 AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction); 4639 push(object); 4640 LoadMap(object, object); 4641 UseScratchRegisterScope temps(this); 4642 Register range = temps.Acquire(); 4643 GetInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE, range); 4644 Check(Uless_equal, AbortReason::kOperandIsNotACallableFunction, range, 4645 Operand(LAST_CALLABLE_JS_FUNCTION_TYPE - 4646 FIRST_CALLABLE_JS_FUNCTION_TYPE)); 4647 pop(object); 4648} 4649 4650void MacroAssembler::AssertBoundFunction(Register object) { 4651 if (FLAG_debug_code) { 4652 ASM_CODE_COMMENT(this); 4653 BlockTrampolinePoolScope block_trampoline_pool(this); 4654 STATIC_ASSERT(kSmiTag == 0); 4655 DCHECK(object != kScratchReg); 4656 SmiTst(object, kScratchReg); 4657 Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, kScratchReg, 4658 Operand(zero_reg)); 4659 GetObjectType(object, kScratchReg, kScratchReg); 4660 Check(eq, AbortReason::kOperandIsNotABoundFunction, kScratchReg, 4661 Operand(JS_BOUND_FUNCTION_TYPE)); 4662 } 4663} 4664 4665void MacroAssembler::AssertGeneratorObject(Register object) { 4666 if (!FLAG_debug_code) return; 4667 ASM_CODE_COMMENT(this); 4668 BlockTrampolinePoolScope block_trampoline_pool(this); 4669 STATIC_ASSERT(kSmiTag == 0); 4670 DCHECK(object != kScratchReg); 4671 SmiTst(object, kScratchReg); 4672 Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, kScratchReg, 4673 Operand(zero_reg)); 4674 4675 GetObjectType(object, kScratchReg, kScratchReg); 4676 4677 Label done; 4678 4679 // Check if JSGeneratorObject 4680 BranchShort(&done, eq, kScratchReg, Operand(JS_GENERATOR_OBJECT_TYPE)); 4681 4682 // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType) 4683 BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE)); 4684 4685 // Check if JSAsyncGeneratorObject 4686 BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE)); 4687 4688 Abort(AbortReason::kOperandIsNotAGeneratorObject); 4689 4690 bind(&done); 4691} 4692 4693void MacroAssembler::AssertUndefinedOrAllocationSite(Register object, 4694 Register scratch) { 4695 if (FLAG_debug_code) { 4696 ASM_CODE_COMMENT(this); 4697 Label done_checking; 4698 AssertNotSmi(object); 4699 LoadRoot(scratch, RootIndex::kUndefinedValue); 4700 BranchShort(&done_checking, eq, object, Operand(scratch)); 4701 GetObjectType(object, scratch, scratch); 4702 Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch, 4703 Operand(ALLOCATION_SITE_TYPE)); 4704 bind(&done_checking); 4705 } 4706} 4707 4708template <typename F_TYPE> 4709void TurboAssembler::FloatMinMaxHelper(FPURegister dst, FPURegister src1, 4710 FPURegister src2, MaxMinKind kind) { 4711 DCHECK((std::is_same<F_TYPE, float>::value) || 4712 (std::is_same<F_TYPE, double>::value)); 4713 4714 if (src1 == src2 && dst != src1) { 4715 if (std::is_same<float, F_TYPE>::value) { 4716 fmv_s(dst, src1); 4717 } else { 4718 fmv_d(dst, src1); 4719 } 4720 return; 4721 } 4722 4723 Label done, nan; 4724 4725 // For RISCV, fmin_s returns the other non-NaN operand as result if only one 4726 // operand is NaN; but for JS, if any operand is NaN, result is Nan. The 4727 // following handles the discrepency between handling of NaN between ISA and 4728 // JS semantics 4729 UseScratchRegisterScope temps(this); 4730 Register scratch = temps.Acquire(); 4731 if (std::is_same<float, F_TYPE>::value) { 4732 CompareIsNotNanF32(scratch, src1, src2); 4733 } else { 4734 CompareIsNotNanF64(scratch, src1, src2); 4735 } 4736 BranchFalseF(scratch, &nan); 4737 4738 if (kind == MaxMinKind::kMax) { 4739 if (std::is_same<float, F_TYPE>::value) { 4740 fmax_s(dst, src1, src2); 4741 } else { 4742 fmax_d(dst, src1, src2); 4743 } 4744 } else { 4745 if (std::is_same<float, F_TYPE>::value) { 4746 fmin_s(dst, src1, src2); 4747 } else { 4748 fmin_d(dst, src1, src2); 4749 } 4750 } 4751 j(&done); 4752 4753 bind(&nan); 4754 // if any operand is NaN, return NaN (fadd returns NaN if any operand is NaN) 4755 if (std::is_same<float, F_TYPE>::value) { 4756 fadd_s(dst, src1, src2); 4757 } else { 4758 fadd_d(dst, src1, src2); 4759 } 4760 4761 bind(&done); 4762} 4763 4764void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1, 4765 FPURegister src2) { 4766 ASM_CODE_COMMENT(this); 4767 FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMax); 4768} 4769 4770void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1, 4771 FPURegister src2) { 4772 ASM_CODE_COMMENT(this); 4773 FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMin); 4774} 4775 4776void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1, 4777 FPURegister src2) { 4778 ASM_CODE_COMMENT(this); 4779 FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMax); 4780} 4781 4782void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1, 4783 FPURegister src2) { 4784 ASM_CODE_COMMENT(this); 4785 FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMin); 4786} 4787 4788static const int kRegisterPassedArguments = 8; 4789 4790int TurboAssembler::CalculateStackPassedDWords(int num_gp_arguments, 4791 int num_fp_arguments) { 4792 int stack_passed_dwords = 0; 4793 4794 // Up to eight integer arguments are passed in registers a0..a7 and 4795 // up to eight floating point arguments are passed in registers fa0..fa7 4796 if (num_gp_arguments > kRegisterPassedArguments) { 4797 stack_passed_dwords += num_gp_arguments - kRegisterPassedArguments; 4798 } 4799 if (num_fp_arguments > kRegisterPassedArguments) { 4800 stack_passed_dwords += num_fp_arguments - kRegisterPassedArguments; 4801 } 4802 stack_passed_dwords += kCArgSlotCount; 4803 return stack_passed_dwords; 4804} 4805 4806void TurboAssembler::PrepareCallCFunction(int num_reg_arguments, 4807 int num_double_arguments, 4808 Register scratch) { 4809 ASM_CODE_COMMENT(this); 4810 int frame_alignment = ActivationFrameAlignment(); 4811 4812 // Up to eight simple arguments in a0..a7, fa0..fa7. 4813 // Remaining arguments are pushed on the stack (arg slot calculation handled 4814 // by CalculateStackPassedDWords()). 4815 int stack_passed_arguments = 4816 CalculateStackPassedDWords(num_reg_arguments, num_double_arguments); 4817 if (frame_alignment > kSystemPointerSize) { 4818 // Make stack end at alignment and make room for stack arguments and the 4819 // original value of sp. 4820 Mv(scratch, sp); 4821 Sub64(sp, sp, Operand((stack_passed_arguments + 1) * kSystemPointerSize)); 4822 DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); 4823 And(sp, sp, Operand(-frame_alignment)); 4824 Sd(scratch, MemOperand(sp, stack_passed_arguments * kSystemPointerSize)); 4825 } else { 4826 Sub64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize)); 4827 } 4828} 4829 4830void TurboAssembler::PrepareCallCFunction(int num_reg_arguments, 4831 Register scratch) { 4832 PrepareCallCFunction(num_reg_arguments, 0, scratch); 4833} 4834 4835void TurboAssembler::CallCFunction(ExternalReference function, 4836 int num_reg_arguments, 4837 int num_double_arguments) { 4838 BlockTrampolinePoolScope block_trampoline_pool(this); 4839 li(t6, function); 4840 CallCFunctionHelper(t6, num_reg_arguments, num_double_arguments); 4841} 4842 4843void TurboAssembler::CallCFunction(Register function, int num_reg_arguments, 4844 int num_double_arguments) { 4845 CallCFunctionHelper(function, num_reg_arguments, num_double_arguments); 4846} 4847 4848void TurboAssembler::CallCFunction(ExternalReference function, 4849 int num_arguments) { 4850 CallCFunction(function, num_arguments, 0); 4851} 4852 4853void TurboAssembler::CallCFunction(Register function, int num_arguments) { 4854 CallCFunction(function, num_arguments, 0); 4855} 4856 4857void TurboAssembler::CallCFunctionHelper(Register function, 4858 int num_reg_arguments, 4859 int num_double_arguments) { 4860 DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters); 4861 DCHECK(has_frame()); 4862 ASM_CODE_COMMENT(this); 4863 // Make sure that the stack is aligned before calling a C function unless 4864 // running in the simulator. The simulator has its own alignment check which 4865 // provides more information. 4866 // The argument stots are presumed to have been set up by 4867 // PrepareCallCFunction. 4868 4869#if V8_HOST_ARCH_RISCV64 4870 if (FLAG_debug_code) { 4871 int frame_alignment = base::OS::ActivationFrameAlignment(); 4872 int frame_alignment_mask = frame_alignment - 1; 4873 if (frame_alignment > kSystemPointerSize) { 4874 DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); 4875 Label alignment_as_expected; 4876 { 4877 UseScratchRegisterScope temps(this); 4878 Register scratch = temps.Acquire(); 4879 And(scratch, sp, Operand(frame_alignment_mask)); 4880 BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg)); 4881 } 4882 // Don't use Check here, as it will call Runtime_Abort possibly 4883 // re-entering here. 4884 ebreak(); 4885 bind(&alignment_as_expected); 4886 } 4887 } 4888#endif // V8_HOST_ARCH_RISCV64 4889 4890 // Just call directly. The function called cannot cause a GC, or 4891 // allow preemption, so the return address in the link register 4892 // stays correct. 4893 { 4894 if (function != t6) { 4895 Mv(t6, function); 4896 function = t6; 4897 } 4898 4899 // Save the frame pointer and PC so that the stack layout remains 4900 // iterable, even without an ExitFrame which normally exists between JS 4901 // and C frames. 4902 // 't' registers are caller-saved so this is safe as a scratch register. 4903 Register pc_scratch = t1; 4904 Register scratch = t2; 4905 4906 auipc(pc_scratch, 0); 4907 // See x64 code for reasoning about how to address the isolate data fields. 4908 if (root_array_available()) { 4909 Sd(pc_scratch, MemOperand(kRootRegister, 4910 IsolateData::fast_c_call_caller_pc_offset())); 4911 Sd(fp, MemOperand(kRootRegister, 4912 IsolateData::fast_c_call_caller_fp_offset())); 4913 } else { 4914 DCHECK_NOT_NULL(isolate()); 4915 li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate())); 4916 Sd(pc_scratch, MemOperand(scratch)); 4917 li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); 4918 Sd(fp, MemOperand(scratch)); 4919 } 4920 4921 Call(function); 4922 4923 if (isolate() != nullptr) { 4924 // We don't unset the PC; the FP is the source of truth. 4925 UseScratchRegisterScope temps(this); 4926 Register scratch = temps.Acquire(); 4927 li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); 4928 Sd(zero_reg, MemOperand(scratch)); 4929 } 4930 } 4931 4932 int stack_passed_arguments = 4933 CalculateStackPassedDWords(num_reg_arguments, num_double_arguments); 4934 4935 if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) { 4936 Ld(sp, MemOperand(sp, stack_passed_arguments * kSystemPointerSize)); 4937 } else { 4938 Add64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize)); 4939 } 4940} 4941 4942#undef BRANCH_ARGS_CHECK 4943 4944void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask, 4945 Condition cc, Label* condition_met) { 4946 And(scratch, object, Operand(~kPageAlignmentMask)); 4947 Ld(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset)); 4948 And(scratch, scratch, Operand(mask)); 4949 Branch(condition_met, cc, scratch, Operand(zero_reg)); 4950} 4951 4952Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, 4953 Register reg4, Register reg5, 4954 Register reg6) { 4955 RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6}; 4956 4957 const RegisterConfiguration* config = RegisterConfiguration::Default(); 4958 for (int i = 0; i < config->num_allocatable_general_registers(); ++i) { 4959 int code = config->GetAllocatableGeneralCode(i); 4960 Register candidate = Register::from_code(code); 4961 if (regs.has(candidate)) continue; 4962 return candidate; 4963 } 4964 UNREACHABLE(); 4965} 4966 4967void TurboAssembler::ComputeCodeStartAddress(Register dst) { 4968 // This push on ra and the pop below together ensure that we restore the 4969 // register ra, which is needed while computing the code start address. 4970 push(ra); 4971 4972 auipc(ra, 0); 4973 addi(ra, ra, kInstrSize * 2); // ra = address of li 4974 int pc = pc_offset(); 4975 li(dst, Operand(pc)); 4976 Sub64(dst, ra, dst); 4977 4978 pop(ra); // Restore ra 4979} 4980 4981void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit, 4982 DeoptimizeKind kind, Label* ret, 4983 Label*) { 4984 ASM_CODE_COMMENT(this); 4985 BlockTrampolinePoolScope block_trampoline_pool(this); 4986 Ld(t6, 4987 MemOperand(kRootRegister, IsolateData::BuiltinEntrySlotOffset(target))); 4988 Call(t6); 4989 DCHECK_EQ(SizeOfCodeGeneratedSince(exit), 4990 (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize 4991 : Deoptimizer::kEagerDeoptExitSize); 4992} 4993 4994void TurboAssembler::LoadCodeObjectEntry(Register destination, 4995 Register code_object) { 4996 // Code objects are called differently depending on whether we are generating 4997 // builtin code (which will later be embedded into the binary) or compiling 4998 // user JS code at runtime. 4999 // * Builtin code runs in --jitless mode and thus must not call into on-heap 5000 // Code targets. Instead, we dispatch through the builtins entry table. 5001 // * Codegen at runtime does not have this restriction and we can use the 5002 // shorter, branchless instruction sequence. The assumption here is that 5003 // targets are usually generated code and not builtin Code objects. 5004 ASM_CODE_COMMENT(this); 5005 if (options().isolate_independent_code) { 5006 DCHECK(root_array_available()); 5007 Label if_code_is_off_heap, out; 5008 5009 UseScratchRegisterScope temps(this); 5010 Register scratch = temps.Acquire(); 5011 5012 DCHECK(!AreAliased(destination, scratch)); 5013 DCHECK(!AreAliased(code_object, scratch)); 5014 5015 // Check whether the Code object is an off-heap trampoline. If so, call its 5016 // (off-heap) entry point directly without going through the (on-heap) 5017 // trampoline. Otherwise, just call the Code object as always. 5018 5019 Lw(scratch, FieldMemOperand(code_object, Code::kFlagsOffset)); 5020 And(scratch, scratch, Operand(Code::IsOffHeapTrampoline::kMask)); 5021 Branch(&if_code_is_off_heap, ne, scratch, Operand(zero_reg)); 5022 // Not an off-heap trampoline object, the entry point is at 5023 // Code::raw_instruction_start(). 5024 Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag); 5025 Branch(&out); 5026 5027 // An off-heap trampoline, the entry point is loaded from the builtin entry 5028 // table. 5029 bind(&if_code_is_off_heap); 5030 Lw(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset)); 5031 slli(destination, scratch, kSystemPointerSizeLog2); 5032 Add64(destination, destination, kRootRegister); 5033 Ld(destination, 5034 MemOperand(destination, IsolateData::builtin_entry_table_offset())); 5035 5036 bind(&out); 5037 } else { 5038 Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag); 5039 } 5040} 5041 5042void TurboAssembler::CallCodeObject(Register code_object) { 5043 ASM_CODE_COMMENT(this); 5044 LoadCodeObjectEntry(code_object, code_object); 5045 Call(code_object); 5046} 5047 5048void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) { 5049 ASM_CODE_COMMENT(this); 5050 DCHECK_EQ(JumpMode::kJump, jump_mode); 5051 LoadCodeObjectEntry(code_object, code_object); 5052 Jump(code_object); 5053} 5054 5055void TurboAssembler::LoadTaggedPointerField(const Register& destination, 5056 const MemOperand& field_operand) { 5057 if (COMPRESS_POINTERS_BOOL) { 5058 DecompressTaggedPointer(destination, field_operand); 5059 } else { 5060 Ld(destination, field_operand); 5061 } 5062} 5063 5064void TurboAssembler::LoadAnyTaggedField(const Register& destination, 5065 const MemOperand& field_operand) { 5066 if (COMPRESS_POINTERS_BOOL) { 5067 DecompressAnyTagged(destination, field_operand); 5068 } else { 5069 Ld(destination, field_operand); 5070 } 5071} 5072 5073void TurboAssembler::LoadTaggedSignedField(const Register& destination, 5074 const MemOperand& field_operand) { 5075 if (COMPRESS_POINTERS_BOOL) { 5076 DecompressTaggedSigned(destination, field_operand); 5077 } else { 5078 Ld(destination, field_operand); 5079 } 5080} 5081 5082void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) { 5083 SmiUntag(dst, src); 5084} 5085 5086void TurboAssembler::StoreTaggedField(const Register& value, 5087 const MemOperand& dst_field_operand) { 5088 if (COMPRESS_POINTERS_BOOL) { 5089 Sw(value, dst_field_operand); 5090 } else { 5091 Sd(value, dst_field_operand); 5092 } 5093} 5094 5095void TurboAssembler::DecompressTaggedSigned(const Register& destination, 5096 const MemOperand& field_operand) { 5097 ASM_CODE_COMMENT(this); 5098 Lwu(destination, field_operand); 5099 if (FLAG_debug_code) { 5100 // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits. 5101 Add64(destination, destination, 5102 Operand(((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32)); 5103 } 5104} 5105 5106void TurboAssembler::DecompressTaggedPointer(const Register& destination, 5107 const MemOperand& field_operand) { 5108 ASM_CODE_COMMENT(this); 5109 Lwu(destination, field_operand); 5110 Add64(destination, kPtrComprCageBaseRegister, destination); 5111} 5112 5113void TurboAssembler::DecompressTaggedPointer(const Register& destination, 5114 const Register& source) { 5115 ASM_CODE_COMMENT(this); 5116 And(destination, source, Operand(0xFFFFFFFF)); 5117 Add64(destination, kPtrComprCageBaseRegister, Operand(destination)); 5118} 5119 5120void TurboAssembler::DecompressAnyTagged(const Register& destination, 5121 const MemOperand& field_operand) { 5122 ASM_CODE_COMMENT(this); 5123 Lwu(destination, field_operand); 5124 Add64(destination, kPtrComprCageBaseRegister, destination); 5125} 5126 5127void MacroAssembler::DropArguments(Register count, ArgumentsCountType type, 5128 ArgumentsCountMode mode, Register scratch) { 5129 switch (type) { 5130 case kCountIsInteger: { 5131 CalcScaledAddress(sp, sp, count, kPointerSizeLog2); 5132 break; 5133 } 5134 case kCountIsSmi: { 5135 STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0); 5136 DCHECK_NE(scratch, no_reg); 5137 SmiScale(scratch, count, kPointerSizeLog2); 5138 Add64(sp, sp, scratch); 5139 break; 5140 } 5141 case kCountIsBytes: { 5142 Add64(sp, sp, count); 5143 break; 5144 } 5145 } 5146 if (mode == kCountExcludesReceiver) { 5147 Add64(sp, sp, kSystemPointerSize); 5148 } 5149} 5150 5151void MacroAssembler::DropArgumentsAndPushNewReceiver(Register argc, 5152 Register receiver, 5153 ArgumentsCountType type, 5154 ArgumentsCountMode mode, 5155 Register scratch) { 5156 DCHECK(!AreAliased(argc, receiver)); 5157 if (mode == kCountExcludesReceiver) { 5158 // Drop arguments without receiver and override old receiver. 5159 DropArguments(argc, type, kCountIncludesReceiver, scratch); 5160 Sd(receiver, MemOperand(sp)); 5161 } else { 5162 DropArguments(argc, type, mode, scratch); 5163 push(receiver); 5164 } 5165} 5166 5167} // namespace internal 5168} // namespace v8 5169 5170#endif // V8_TARGET_ARCH_RISCV64 5171