1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <limits.h>  // For LONG_MIN, LONG_MAX.
6
7#if V8_TARGET_ARCH_RISCV64
8
9#include "src/base/bits.h"
10#include "src/base/division-by-constant.h"
11#include "src/codegen/assembler-inl.h"
12#include "src/codegen/callable.h"
13#include "src/codegen/code-factory.h"
14#include "src/codegen/external-reference-table.h"
15#include "src/codegen/interface-descriptors-inl.h"
16#include "src/codegen/macro-assembler.h"
17#include "src/codegen/register-configuration.h"
18#include "src/debug/debug.h"
19#include "src/deoptimizer/deoptimizer.h"
20#include "src/execution/frames-inl.h"
21#include "src/heap/memory-chunk.h"
22#include "src/init/bootstrapper.h"
23#include "src/logging/counters.h"
24#include "src/objects/heap-number.h"
25#include "src/runtime/runtime.h"
26#include "src/snapshot/snapshot.h"
27#include "src/wasm/wasm-code-manager.h"
28
29// Satisfy cpplint check, but don't include platform-specific header. It is
30// included recursively via macro-assembler.h.
31#if 0
32#include "src/codegen/riscv64/macro-assembler-riscv64.h"
33#endif
34
35namespace v8 {
36namespace internal {
37
38static inline bool IsZero(const Operand& rt) {
39  if (rt.is_reg()) {
40    return rt.rm() == zero_reg;
41  } else {
42    return rt.immediate() == 0;
43  }
44}
45
46int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
47                                                    Register exclusion1,
48                                                    Register exclusion2,
49                                                    Register exclusion3) const {
50  int bytes = 0;
51
52  RegList exclusions = {exclusion1, exclusion2, exclusion3};
53  RegList list = kJSCallerSaved - exclusions;
54  bytes += list.Count() * kSystemPointerSize;
55
56  if (fp_mode == SaveFPRegsMode::kSave) {
57    bytes += kCallerSavedFPU.Count() * kDoubleSize;
58  }
59
60  return bytes;
61}
62
63int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
64                                    Register exclusion2, Register exclusion3) {
65  int bytes = 0;
66
67  RegList exclusions = {exclusion1, exclusion2, exclusion3};
68  RegList list = kJSCallerSaved - exclusions;
69  MultiPush(list);
70  bytes += list.Count() * kSystemPointerSize;
71
72  if (fp_mode == SaveFPRegsMode::kSave) {
73    MultiPushFPU(kCallerSavedFPU);
74    bytes += kCallerSavedFPU.Count() * kDoubleSize;
75  }
76
77  return bytes;
78}
79
80int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
81                                   Register exclusion2, Register exclusion3) {
82  int bytes = 0;
83  if (fp_mode == SaveFPRegsMode::kSave) {
84    MultiPopFPU(kCallerSavedFPU);
85    bytes += kCallerSavedFPU.Count() * kDoubleSize;
86  }
87
88  RegList exclusions = {exclusion1, exclusion2, exclusion3};
89  RegList list = kJSCallerSaved - exclusions;
90  MultiPop(list);
91  bytes += list.Count() * kSystemPointerSize;
92
93  return bytes;
94}
95
96void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
97  Ld(destination,
98     MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
99}
100
101void TurboAssembler::LoadRoot(Register destination, RootIndex index,
102                              Condition cond, Register src1,
103                              const Operand& src2) {
104  Label skip;
105  BranchShort(&skip, NegateCondition(cond), src1, src2);
106  Ld(destination,
107     MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
108  bind(&skip);
109}
110
111void TurboAssembler::PushCommonFrame(Register marker_reg) {
112  if (marker_reg.is_valid()) {
113    Push(ra, fp, marker_reg);
114    Add64(fp, sp, Operand(kSystemPointerSize));
115  } else {
116    Push(ra, fp);
117    Mv(fp, sp);
118  }
119}
120
121void TurboAssembler::PushStandardFrame(Register function_reg) {
122  int offset = -StandardFrameConstants::kContextOffset;
123  if (function_reg.is_valid()) {
124    Push(ra, fp, cp, function_reg, kJavaScriptCallArgCountRegister);
125    offset += 2 * kSystemPointerSize;
126  } else {
127    Push(ra, fp, cp, kJavaScriptCallArgCountRegister);
128    offset += kSystemPointerSize;
129  }
130  Add64(fp, sp, Operand(offset));
131}
132
133int MacroAssembler::SafepointRegisterStackIndex(int reg_code) {
134  // The registers are pushed starting with the highest encoding,
135  // which means that lowest encodings are closest to the stack pointer.
136  return kSafepointRegisterStackIndexMap[reg_code];
137}
138
139// Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved)
140// The register 'object' contains a heap object pointer.  The heap object
141// tag is shifted away.
142void MacroAssembler::RecordWriteField(Register object, int offset,
143                                      Register value, RAStatus ra_status,
144                                      SaveFPRegsMode save_fp,
145                                      RememberedSetAction remembered_set_action,
146                                      SmiCheck smi_check) {
147  DCHECK(!AreAliased(object, value));
148  // First, check if a write barrier is even needed. The tests below
149  // catch stores of Smis.
150  Label done;
151
152  // Skip the barrier if writing a smi.
153  if (smi_check == SmiCheck::kInline) {
154    JumpIfSmi(value, &done);
155  }
156
157  // Although the object register is tagged, the offset is relative to the start
158  // of the object, so offset must be a multiple of kTaggedSize.
159  DCHECK(IsAligned(offset, kTaggedSize));
160
161  if (FLAG_debug_code) {
162    Label ok;
163    UseScratchRegisterScope temps(this);
164    Register scratch = temps.Acquire();
165    DCHECK(!AreAliased(object, value, scratch));
166    Add64(scratch, object, offset - kHeapObjectTag);
167    And(scratch, scratch, Operand(kTaggedSize - 1));
168    BranchShort(&ok, eq, scratch, Operand(zero_reg));
169    Abort(AbortReason::kUnalignedCellInWriteBarrier);
170    bind(&ok);
171  }
172
173  RecordWrite(object, Operand(offset - kHeapObjectTag), value, ra_status,
174              save_fp, remembered_set_action, SmiCheck::kOmit);
175
176  bind(&done);
177}
178
179void TurboAssembler::MaybeSaveRegisters(RegList registers) {
180  if (registers.is_empty()) return;
181  MultiPush(registers);
182}
183
184void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
185  if (registers.is_empty()) return;
186  MultiPop(registers);
187}
188
189void TurboAssembler::CallEphemeronKeyBarrier(Register object,
190                                             Register slot_address,
191                                             SaveFPRegsMode fp_mode) {
192  DCHECK(!AreAliased(object, slot_address));
193  RegList registers =
194      WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
195  MaybeSaveRegisters(registers);
196
197  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
198  Register slot_address_parameter =
199      WriteBarrierDescriptor::SlotAddressRegister();
200
201  Push(object);
202  Push(slot_address);
203  Pop(slot_address_parameter);
204  Pop(object_parameter);
205
206  Call(isolate()->builtins()->code_handle(
207           Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
208       RelocInfo::CODE_TARGET);
209  MaybeRestoreRegisters(registers);
210}
211
212void TurboAssembler::CallRecordWriteStubSaveRegisters(
213    Register object, Register slot_address,
214    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
215    StubCallMode mode) {
216  DCHECK(!AreAliased(object, slot_address));
217  RegList registers =
218      WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
219  MaybeSaveRegisters(registers);
220
221  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
222  Register slot_address_parameter =
223      WriteBarrierDescriptor::SlotAddressRegister();
224
225  Push(object);
226  Push(slot_address);
227  Pop(slot_address_parameter);
228  Pop(object_parameter);
229
230  CallRecordWriteStub(object_parameter, slot_address_parameter,
231                      remembered_set_action, fp_mode, mode);
232
233  MaybeRestoreRegisters(registers);
234}
235
236void TurboAssembler::CallRecordWriteStub(
237    Register object, Register slot_address,
238    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
239    StubCallMode mode) {
240  // Use CallRecordWriteStubSaveRegisters if the object and slot registers
241  // need to be caller saved.
242  DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
243  DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
244  if (mode == StubCallMode::kCallWasmRuntimeStub) {
245    auto wasm_target =
246        wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
247    Call(wasm_target, RelocInfo::WASM_STUB_CALL);
248  } else {
249    auto builtin = Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
250    if (options().inline_offheap_trampolines) {
251      // Inline the trampoline. //qj
252      RecordCommentForOffHeapTrampoline(builtin);
253
254      UseScratchRegisterScope temps(this);
255      BlockTrampolinePoolScope block_trampoline_pool(this);
256      Register scratch = temps.Acquire();
257      li(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
258      Call(scratch);
259      RecordComment("]");
260    } else {
261      Handle<Code> code_target = isolate()->builtins()->code_handle(builtin);
262      Call(code_target, RelocInfo::CODE_TARGET);
263    }
264  }
265}
266
267// Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved)
268// The register 'object' contains a heap object pointer.  The heap object
269// tag is shifted away.
270void MacroAssembler::RecordWrite(Register object, Operand offset,
271                                 Register value, RAStatus ra_status,
272                                 SaveFPRegsMode fp_mode,
273                                 RememberedSetAction remembered_set_action,
274                                 SmiCheck smi_check) {
275  DCHECK(!AreAliased(object, value));
276
277  if (FLAG_debug_code) {
278    UseScratchRegisterScope temps(this);
279    Register temp = temps.Acquire();
280    DCHECK(!AreAliased(object, value, temp));
281    Add64(temp, object, offset);
282    LoadTaggedPointerField(temp, MemOperand(temp));
283    Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, temp,
284           Operand(value));
285  }
286
287  if ((remembered_set_action == RememberedSetAction::kOmit &&
288       !FLAG_incremental_marking) ||
289      FLAG_disable_write_barriers) {
290    return;
291  }
292
293  // First, check if a write barrier is even needed. The tests below
294  // catch stores of smis and stores into the young generation.
295  Label done;
296
297  if (smi_check == SmiCheck::kInline) {
298    DCHECK_EQ(0, kSmiTag);
299    JumpIfSmi(value, &done);
300  }
301
302  {
303    UseScratchRegisterScope temps(this);
304    Register temp = temps.Acquire();
305    CheckPageFlag(value,
306                  temp,  // Used as scratch.
307                  MemoryChunk::kPointersToHereAreInterestingMask,
308                  eq,  // In RISC-V, it uses cc for a comparison with 0, so if
309                       // no bits are set, and cc is eq, it will branch to done
310                  &done);
311
312    CheckPageFlag(object,
313                  temp,  // Used as scratch.
314                  MemoryChunk::kPointersFromHereAreInterestingMask,
315                  eq,  // In RISC-V, it uses cc for a comparison with 0, so if
316                       // no bits are set, and cc is eq, it will branch to done
317                  &done);
318  }
319  // Record the actual write.
320  if (ra_status == kRAHasNotBeenSaved) {
321    push(ra);
322  }
323  Register slot_address = WriteBarrierDescriptor::SlotAddressRegister();
324  DCHECK(!AreAliased(object, slot_address, value));
325  // TODO(cbruni): Turn offset into int.
326  DCHECK(offset.IsImmediate());
327  Add64(slot_address, object, offset);
328  CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode);
329  if (ra_status == kRAHasNotBeenSaved) {
330    pop(ra);
331  }
332  if (FLAG_debug_code) li(slot_address, Operand(kZapValue));
333
334  bind(&done);
335}
336
337// ---------------------------------------------------------------------------
338// Instruction macros.
339
340void TurboAssembler::Add32(Register rd, Register rs, const Operand& rt) {
341  if (rt.is_reg()) {
342    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
343        ((rd.code() & 0b11000) == 0b01000) &&
344        ((rt.rm().code() & 0b11000) == 0b01000)) {
345      c_addw(rd, rt.rm());
346    } else {
347      addw(rd, rs, rt.rm());
348    }
349  } else {
350    if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
351        (rd.code() == rs.code()) && (rd != zero_reg) &&
352        !MustUseReg(rt.rmode())) {
353      c_addiw(rd, static_cast<int8_t>(rt.immediate()));
354    } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
355      addiw(rd, rs, static_cast<int32_t>(rt.immediate()));
356    } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
357               (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
358      addiw(rd, rs, rt.immediate() / 2);
359      addiw(rd, rd, rt.immediate() - (rt.immediate() / 2));
360    } else {
361      // li handles the relocation.
362      UseScratchRegisterScope temps(this);
363      Register scratch = temps.Acquire();
364      Li(scratch, rt.immediate());
365      addw(rd, rs, scratch);
366    }
367  }
368}
369
370void TurboAssembler::Add64(Register rd, Register rs, const Operand& rt) {
371  if (rt.is_reg()) {
372    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
373        (rt.rm() != zero_reg) && (rs != zero_reg)) {
374      c_add(rd, rt.rm());
375    } else {
376      add(rd, rs, rt.rm());
377    }
378  } else {
379    if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
380        (rd.code() == rs.code()) && (rd != zero_reg) && (rt.immediate() != 0) &&
381        !MustUseReg(rt.rmode())) {
382      c_addi(rd, static_cast<int8_t>(rt.immediate()));
383    } else if (FLAG_riscv_c_extension && is_int10(rt.immediate()) &&
384               (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
385               (rd.code() == rs.code()) && (rd == sp) &&
386               !MustUseReg(rt.rmode())) {
387      c_addi16sp(static_cast<int16_t>(rt.immediate()));
388    } else if (FLAG_riscv_c_extension && ((rd.code() & 0b11000) == 0b01000) &&
389               (rs == sp) && is_uint10(rt.immediate()) &&
390               (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
391      c_addi4spn(rd, static_cast<uint16_t>(rt.immediate()));
392    } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
393      addi(rd, rs, static_cast<int32_t>(rt.immediate()));
394    } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
395               (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
396      addi(rd, rs, rt.immediate() / 2);
397      addi(rd, rd, rt.immediate() - (rt.immediate() / 2));
398    } else {
399      // li handles the relocation.
400      UseScratchRegisterScope temps(this);
401      Register scratch = temps.Acquire();
402      BlockTrampolinePoolScope block_trampoline_pool(this);
403      Li(scratch, rt.immediate());
404      add(rd, rs, scratch);
405    }
406  }
407}
408
409void TurboAssembler::Sub32(Register rd, Register rs, const Operand& rt) {
410  if (rt.is_reg()) {
411    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
412        ((rd.code() & 0b11000) == 0b01000) &&
413        ((rt.rm().code() & 0b11000) == 0b01000)) {
414      c_subw(rd, rt.rm());
415    } else {
416      subw(rd, rs, rt.rm());
417    }
418  } else {
419    DCHECK(is_int32(rt.immediate()));
420    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
421        (rd != zero_reg) && is_int6(-rt.immediate()) &&
422        !MustUseReg(rt.rmode())) {
423      c_addiw(
424          rd,
425          static_cast<int8_t>(
426              -rt.immediate()));  // No c_subiw instr, use c_addiw(x, y, -imm).
427    } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
428      addiw(rd, rs,
429            static_cast<int32_t>(
430                -rt.immediate()));  // No subiw instr, use addiw(x, y, -imm).
431    } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
432               (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
433      addiw(rd, rs, -rt.immediate() / 2);
434      addiw(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
435    } else {
436      UseScratchRegisterScope temps(this);
437      Register scratch = temps.Acquire();
438      if (-rt.immediate() >> 12 == 0 && !MustUseReg(rt.rmode())) {
439        // Use load -imm and addu when loading -imm generates one instruction.
440        Li(scratch, -rt.immediate());
441        addw(rd, rs, scratch);
442      } else {
443        // li handles the relocation.
444        Li(scratch, rt.immediate());
445        subw(rd, rs, scratch);
446      }
447    }
448  }
449}
450
451void TurboAssembler::Sub64(Register rd, Register rs, const Operand& rt) {
452  if (rt.is_reg()) {
453    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
454        ((rd.code() & 0b11000) == 0b01000) &&
455        ((rt.rm().code() & 0b11000) == 0b01000)) {
456      c_sub(rd, rt.rm());
457    } else {
458      sub(rd, rs, rt.rm());
459    }
460  } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
461             (rd != zero_reg) && is_int6(-rt.immediate()) &&
462             (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
463    c_addi(rd,
464           static_cast<int8_t>(
465               -rt.immediate()));  // No c_subi instr, use c_addi(x, y, -imm).
466
467  } else if (FLAG_riscv_c_extension && is_int10(-rt.immediate()) &&
468             (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
469             (rd.code() == rs.code()) && (rd == sp) &&
470             !MustUseReg(rt.rmode())) {
471    c_addi16sp(static_cast<int16_t>(-rt.immediate()));
472  } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
473    addi(rd, rs,
474         static_cast<int32_t>(
475             -rt.immediate()));  // No subi instr, use addi(x, y, -imm).
476  } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
477             (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
478    addi(rd, rs, -rt.immediate() / 2);
479    addi(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
480  } else {
481    int li_count = InstrCountForLi64Bit(rt.immediate());
482    int li_neg_count = InstrCountForLi64Bit(-rt.immediate());
483    if (li_neg_count < li_count && !MustUseReg(rt.rmode())) {
484      // Use load -imm and add when loading -imm generates one instruction.
485      DCHECK(rt.immediate() != std::numeric_limits<int32_t>::min());
486      UseScratchRegisterScope temps(this);
487      Register scratch = temps.Acquire();
488      Li(scratch, -rt.immediate());
489      add(rd, rs, scratch);
490    } else {
491      // li handles the relocation.
492      UseScratchRegisterScope temps(this);
493      Register scratch = temps.Acquire();
494      Li(scratch, rt.immediate());
495      sub(rd, rs, scratch);
496    }
497  }
498}
499
500void TurboAssembler::Mul32(Register rd, Register rs, const Operand& rt) {
501  if (rt.is_reg()) {
502    mulw(rd, rs, rt.rm());
503  } else {
504    // li handles the relocation.
505    UseScratchRegisterScope temps(this);
506    Register scratch = temps.Acquire();
507    Li(scratch, rt.immediate());
508    mulw(rd, rs, scratch);
509  }
510}
511
512void TurboAssembler::Mulh32(Register rd, Register rs, const Operand& rt) {
513  if (rt.is_reg()) {
514    mul(rd, rs, rt.rm());
515  } else {
516    // li handles the relocation.
517    UseScratchRegisterScope temps(this);
518    Register scratch = temps.Acquire();
519    Li(scratch, rt.immediate());
520    mul(rd, rs, scratch);
521  }
522  srai(rd, rd, 32);
523}
524
525void TurboAssembler::Mulhu32(Register rd, Register rs, const Operand& rt,
526                             Register rsz, Register rtz) {
527  slli(rsz, rs, 32);
528  if (rt.is_reg()) {
529    slli(rtz, rt.rm(), 32);
530  } else {
531    Li(rtz, rt.immediate() << 32);
532  }
533  mulhu(rd, rsz, rtz);
534  srai(rd, rd, 32);
535}
536
537void TurboAssembler::Mul64(Register rd, Register rs, const Operand& rt) {
538  if (rt.is_reg()) {
539    mul(rd, rs, rt.rm());
540  } else {
541    // li handles the relocation.
542    UseScratchRegisterScope temps(this);
543    Register scratch = temps.Acquire();
544    Li(scratch, rt.immediate());
545    mul(rd, rs, scratch);
546  }
547}
548
549void TurboAssembler::Mulh64(Register rd, Register rs, const Operand& rt) {
550  if (rt.is_reg()) {
551    mulh(rd, rs, rt.rm());
552  } else {
553    // li handles the relocation.
554    UseScratchRegisterScope temps(this);
555    Register scratch = temps.Acquire();
556    Li(scratch, rt.immediate());
557    mulh(rd, rs, scratch);
558  }
559}
560
561void TurboAssembler::Div32(Register res, Register rs, const Operand& rt) {
562  if (rt.is_reg()) {
563    divw(res, rs, rt.rm());
564  } else {
565    // li handles the relocation.
566    UseScratchRegisterScope temps(this);
567    Register scratch = temps.Acquire();
568    Li(scratch, rt.immediate());
569    divw(res, rs, scratch);
570  }
571}
572
573void TurboAssembler::Mod32(Register rd, Register rs, const Operand& rt) {
574  if (rt.is_reg()) {
575    remw(rd, rs, rt.rm());
576  } else {
577    // li handles the relocation.
578    UseScratchRegisterScope temps(this);
579    Register scratch = temps.Acquire();
580    Li(scratch, rt.immediate());
581    remw(rd, rs, scratch);
582  }
583}
584
585void TurboAssembler::Modu32(Register rd, Register rs, const Operand& rt) {
586  if (rt.is_reg()) {
587    remuw(rd, rs, rt.rm());
588  } else {
589    // li handles the relocation.
590    UseScratchRegisterScope temps(this);
591    Register scratch = temps.Acquire();
592    Li(scratch, rt.immediate());
593    remuw(rd, rs, scratch);
594  }
595}
596
597void TurboAssembler::Div64(Register rd, Register rs, const Operand& rt) {
598  if (rt.is_reg()) {
599    div(rd, rs, rt.rm());
600  } else {
601    // li handles the relocation.
602    UseScratchRegisterScope temps(this);
603    Register scratch = temps.Acquire();
604    Li(scratch, rt.immediate());
605    div(rd, rs, scratch);
606  }
607}
608
609void TurboAssembler::Divu32(Register res, Register rs, const Operand& rt) {
610  if (rt.is_reg()) {
611    divuw(res, rs, rt.rm());
612  } else {
613    // li handles the relocation.
614    UseScratchRegisterScope temps(this);
615    Register scratch = temps.Acquire();
616    Li(scratch, rt.immediate());
617    divuw(res, rs, scratch);
618  }
619}
620
621void TurboAssembler::Divu64(Register res, Register rs, const Operand& rt) {
622  if (rt.is_reg()) {
623    divu(res, rs, rt.rm());
624  } else {
625    // li handles the relocation.
626    UseScratchRegisterScope temps(this);
627    Register scratch = temps.Acquire();
628    Li(scratch, rt.immediate());
629    divu(res, rs, scratch);
630  }
631}
632
633void TurboAssembler::Mod64(Register rd, Register rs, const Operand& rt) {
634  if (rt.is_reg()) {
635    rem(rd, rs, rt.rm());
636  } else {
637    // li handles the relocation.
638    UseScratchRegisterScope temps(this);
639    Register scratch = temps.Acquire();
640    Li(scratch, rt.immediate());
641    rem(rd, rs, scratch);
642  }
643}
644
645void TurboAssembler::Modu64(Register rd, Register rs, const Operand& rt) {
646  if (rt.is_reg()) {
647    remu(rd, rs, rt.rm());
648  } else {
649    // li handles the relocation.
650    UseScratchRegisterScope temps(this);
651    Register scratch = temps.Acquire();
652    Li(scratch, rt.immediate());
653    remu(rd, rs, scratch);
654  }
655}
656
657void TurboAssembler::And(Register rd, Register rs, const Operand& rt) {
658  if (rt.is_reg()) {
659    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
660        ((rd.code() & 0b11000) == 0b01000) &&
661        ((rt.rm().code() & 0b11000) == 0b01000)) {
662      c_and(rd, rt.rm());
663    } else {
664      and_(rd, rs, rt.rm());
665    }
666  } else {
667    if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
668        !MustUseReg(rt.rmode()) && (rd.code() == rs.code()) &&
669        ((rd.code() & 0b11000) == 0b01000)) {
670      c_andi(rd, static_cast<int8_t>(rt.immediate()));
671    } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
672      andi(rd, rs, static_cast<int32_t>(rt.immediate()));
673    } else {
674      // li handles the relocation.
675      UseScratchRegisterScope temps(this);
676      Register scratch = temps.Acquire();
677      Li(scratch, rt.immediate());
678      and_(rd, rs, scratch);
679    }
680  }
681}
682
683void TurboAssembler::Or(Register rd, Register rs, const Operand& rt) {
684  if (rt.is_reg()) {
685    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
686        ((rd.code() & 0b11000) == 0b01000) &&
687        ((rt.rm().code() & 0b11000) == 0b01000)) {
688      c_or(rd, rt.rm());
689    } else {
690      or_(rd, rs, rt.rm());
691    }
692  } else {
693    if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
694      ori(rd, rs, static_cast<int32_t>(rt.immediate()));
695    } else {
696      // li handles the relocation.
697      UseScratchRegisterScope temps(this);
698      Register scratch = temps.Acquire();
699      Li(scratch, rt.immediate());
700      or_(rd, rs, scratch);
701    }
702  }
703}
704
705void TurboAssembler::Xor(Register rd, Register rs, const Operand& rt) {
706  if (rt.is_reg()) {
707    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
708        ((rd.code() & 0b11000) == 0b01000) &&
709        ((rt.rm().code() & 0b11000) == 0b01000)) {
710      c_xor(rd, rt.rm());
711    } else {
712      xor_(rd, rs, rt.rm());
713    }
714  } else {
715    if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
716      xori(rd, rs, static_cast<int32_t>(rt.immediate()));
717    } else {
718      // li handles the relocation.
719      UseScratchRegisterScope temps(this);
720      Register scratch = temps.Acquire();
721      Li(scratch, rt.immediate());
722      xor_(rd, rs, scratch);
723    }
724  }
725}
726
727void TurboAssembler::Nor(Register rd, Register rs, const Operand& rt) {
728  if (rt.is_reg()) {
729    or_(rd, rs, rt.rm());
730    not_(rd, rd);
731  } else {
732    Or(rd, rs, rt);
733    not_(rd, rd);
734  }
735}
736
737void TurboAssembler::Neg(Register rs, const Operand& rt) {
738  DCHECK(rt.is_reg());
739  neg(rs, rt.rm());
740}
741
742void TurboAssembler::Seqz(Register rd, const Operand& rt) {
743  if (rt.is_reg()) {
744    seqz(rd, rt.rm());
745  } else {
746    li(rd, rt.immediate() == 0);
747  }
748}
749
750void TurboAssembler::Snez(Register rd, const Operand& rt) {
751  if (rt.is_reg()) {
752    snez(rd, rt.rm());
753  } else {
754    li(rd, rt.immediate() != 0);
755  }
756}
757
758void TurboAssembler::Seq(Register rd, Register rs, const Operand& rt) {
759  if (rs == zero_reg) {
760    Seqz(rd, rt);
761  } else if (IsZero(rt)) {
762    seqz(rd, rs);
763  } else {
764    Sub64(rd, rs, rt);
765    seqz(rd, rd);
766  }
767}
768
769void TurboAssembler::Sne(Register rd, Register rs, const Operand& rt) {
770  if (rs == zero_reg) {
771    Snez(rd, rt);
772  } else if (IsZero(rt)) {
773    snez(rd, rs);
774  } else {
775    Sub64(rd, rs, rt);
776    snez(rd, rd);
777  }
778}
779
780void TurboAssembler::Slt(Register rd, Register rs, const Operand& rt) {
781  if (rt.is_reg()) {
782    slt(rd, rs, rt.rm());
783  } else {
784    if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
785      slti(rd, rs, static_cast<int32_t>(rt.immediate()));
786    } else {
787      // li handles the relocation.
788      UseScratchRegisterScope temps(this);
789      Register scratch = temps.Acquire();
790      BlockTrampolinePoolScope block_trampoline_pool(this);
791      Li(scratch, rt.immediate());
792      slt(rd, rs, scratch);
793    }
794  }
795}
796
797void TurboAssembler::Sltu(Register rd, Register rs, const Operand& rt) {
798  if (rt.is_reg()) {
799    sltu(rd, rs, rt.rm());
800  } else {
801    if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
802      sltiu(rd, rs, static_cast<int32_t>(rt.immediate()));
803    } else {
804      // li handles the relocation.
805      UseScratchRegisterScope temps(this);
806      Register scratch = temps.Acquire();
807      BlockTrampolinePoolScope block_trampoline_pool(this);
808      Li(scratch, rt.immediate());
809      sltu(rd, rs, scratch);
810    }
811  }
812}
813
814void TurboAssembler::Sle(Register rd, Register rs, const Operand& rt) {
815  if (rt.is_reg()) {
816    slt(rd, rt.rm(), rs);
817  } else {
818    // li handles the relocation.
819    UseScratchRegisterScope temps(this);
820    Register scratch = temps.Acquire();
821    BlockTrampolinePoolScope block_trampoline_pool(this);
822    Li(scratch, rt.immediate());
823    slt(rd, scratch, rs);
824  }
825  xori(rd, rd, 1);
826}
827
828void TurboAssembler::Sleu(Register rd, Register rs, const Operand& rt) {
829  if (rt.is_reg()) {
830    sltu(rd, rt.rm(), rs);
831  } else {
832    // li handles the relocation.
833    UseScratchRegisterScope temps(this);
834    Register scratch = temps.Acquire();
835    BlockTrampolinePoolScope block_trampoline_pool(this);
836    Li(scratch, rt.immediate());
837    sltu(rd, scratch, rs);
838  }
839  xori(rd, rd, 1);
840}
841
842void TurboAssembler::Sge(Register rd, Register rs, const Operand& rt) {
843  Slt(rd, rs, rt);
844  xori(rd, rd, 1);
845}
846
847void TurboAssembler::Sgeu(Register rd, Register rs, const Operand& rt) {
848  Sltu(rd, rs, rt);
849  xori(rd, rd, 1);
850}
851
852void TurboAssembler::Sgt(Register rd, Register rs, const Operand& rt) {
853  if (rt.is_reg()) {
854    slt(rd, rt.rm(), rs);
855  } else {
856    // li handles the relocation.
857    UseScratchRegisterScope temps(this);
858    Register scratch = temps.Acquire();
859    BlockTrampolinePoolScope block_trampoline_pool(this);
860    Li(scratch, rt.immediate());
861    slt(rd, scratch, rs);
862  }
863}
864
865void TurboAssembler::Sgtu(Register rd, Register rs, const Operand& rt) {
866  if (rt.is_reg()) {
867    sltu(rd, rt.rm(), rs);
868  } else {
869    // li handles the relocation.
870    UseScratchRegisterScope temps(this);
871    Register scratch = temps.Acquire();
872    BlockTrampolinePoolScope block_trampoline_pool(this);
873    Li(scratch, rt.immediate());
874    sltu(rd, scratch, rs);
875  }
876}
877
878void TurboAssembler::Sll32(Register rd, Register rs, const Operand& rt) {
879  if (rt.is_reg()) {
880    sllw(rd, rs, rt.rm());
881  } else {
882    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
883    slliw(rd, rs, shamt);
884  }
885}
886
887void TurboAssembler::Sra32(Register rd, Register rs, const Operand& rt) {
888  if (rt.is_reg()) {
889    sraw(rd, rs, rt.rm());
890  } else {
891    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
892    sraiw(rd, rs, shamt);
893  }
894}
895
896void TurboAssembler::Srl32(Register rd, Register rs, const Operand& rt) {
897  if (rt.is_reg()) {
898    srlw(rd, rs, rt.rm());
899  } else {
900    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
901    srliw(rd, rs, shamt);
902  }
903}
904
905void TurboAssembler::Sra64(Register rd, Register rs, const Operand& rt) {
906  if (rt.is_reg()) {
907    sra(rd, rs, rt.rm());
908  } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
909             ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
910    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
911    c_srai(rd, shamt);
912  } else {
913    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
914    srai(rd, rs, shamt);
915  }
916}
917
918void TurboAssembler::Srl64(Register rd, Register rs, const Operand& rt) {
919  if (rt.is_reg()) {
920    srl(rd, rs, rt.rm());
921  } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
922             ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
923    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
924    c_srli(rd, shamt);
925  } else {
926    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
927    srli(rd, rs, shamt);
928  }
929}
930
931void TurboAssembler::Sll64(Register rd, Register rs, const Operand& rt) {
932  if (rt.is_reg()) {
933    sll(rd, rs, rt.rm());
934  } else {
935    uint8_t shamt = static_cast<uint8_t>(rt.immediate());
936    if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
937        (rd != zero_reg) && (shamt != 0) && is_uint6(shamt)) {
938      c_slli(rd, shamt);
939    } else {
940      slli(rd, rs, shamt);
941    }
942  }
943}
944
945void TurboAssembler::Li(Register rd, int64_t imm) {
946  if (FLAG_riscv_c_extension && (rd != zero_reg) && is_int6(imm)) {
947    c_li(rd, imm);
948  } else {
949    RV_li(rd, imm);
950  }
951}
952
953void TurboAssembler::Mv(Register rd, const Operand& rt) {
954  if (FLAG_riscv_c_extension && (rd != zero_reg) && (rt.rm() != zero_reg)) {
955    c_mv(rd, rt.rm());
956  } else {
957    mv(rd, rt.rm());
958  }
959}
960
961void TurboAssembler::Ror(Register rd, Register rs, const Operand& rt) {
962  UseScratchRegisterScope temps(this);
963  Register scratch = temps.Acquire();
964  BlockTrampolinePoolScope block_trampoline_pool(this);
965  if (rt.is_reg()) {
966    negw(scratch, rt.rm());
967    sllw(scratch, rs, scratch);
968    srlw(rd, rs, rt.rm());
969    or_(rd, scratch, rd);
970    sext_w(rd, rd);
971  } else {
972    int64_t ror_value = rt.immediate() % 32;
973    if (ror_value == 0) {
974      Mv(rd, rs);
975      return;
976    } else if (ror_value < 0) {
977      ror_value += 32;
978    }
979    srliw(scratch, rs, ror_value);
980    slliw(rd, rs, 32 - ror_value);
981    or_(rd, scratch, rd);
982    sext_w(rd, rd);
983  }
984}
985
986void TurboAssembler::Dror(Register rd, Register rs, const Operand& rt) {
987  UseScratchRegisterScope temps(this);
988  Register scratch = temps.Acquire();
989  BlockTrampolinePoolScope block_trampoline_pool(this);
990  if (rt.is_reg()) {
991    negw(scratch, rt.rm());
992    sll(scratch, rs, scratch);
993    srl(rd, rs, rt.rm());
994    or_(rd, scratch, rd);
995  } else {
996    int64_t dror_value = rt.immediate() % 64;
997    if (dror_value == 0) {
998      Mv(rd, rs);
999      return;
1000    } else if (dror_value < 0) {
1001      dror_value += 64;
1002    }
1003    srli(scratch, rs, dror_value);
1004    slli(rd, rs, 64 - dror_value);
1005    or_(rd, scratch, rd);
1006  }
1007}
1008
1009void TurboAssembler::CalcScaledAddress(Register rd, Register rt, Register rs,
1010                                       uint8_t sa) {
1011  DCHECK(sa >= 1 && sa <= 31);
1012  UseScratchRegisterScope temps(this);
1013  Register tmp = rd == rt ? temps.Acquire() : rd;
1014  DCHECK(tmp != rt);
1015  slli(tmp, rs, sa);
1016  Add64(rd, rt, tmp);
1017}
1018
1019// ------------Pseudo-instructions-------------
1020// Change endianness
1021void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size,
1022                              Register scratch) {
1023  DCHECK_NE(scratch, rs);
1024  DCHECK_NE(scratch, rd);
1025  DCHECK(operand_size == 4 || operand_size == 8);
1026  if (operand_size == 4) {
1027    // Uint32_t x1 = 0x00FF00FF;
1028    // x0 = (x0 << 16 | x0 >> 16);
1029    // x0 = (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8));
1030    UseScratchRegisterScope temps(this);
1031    BlockTrampolinePoolScope block_trampoline_pool(this);
1032    DCHECK((rd != t6) && (rs != t6));
1033    Register x0 = temps.Acquire();
1034    Register x1 = temps.Acquire();
1035    Register x2 = scratch;
1036    li(x1, 0x00FF00FF);
1037    slliw(x0, rs, 16);
1038    srliw(rd, rs, 16);
1039    or_(x0, rd, x0);   // x0 <- x0 << 16 | x0 >> 16
1040    and_(x2, x0, x1);  // x2 <- x0 & 0x00FF00FF
1041    slliw(x2, x2, 8);  // x2 <- (x0 & x1) << 8
1042    slliw(x1, x1, 8);  // x1 <- 0xFF00FF00
1043    and_(rd, x0, x1);  // x0 & 0xFF00FF00
1044    srliw(rd, rd, 8);
1045    or_(rd, rd, x2);  // (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8))
1046  } else {
1047    // uinx24_t x1 = 0x0000FFFF0000FFFFl;
1048    // uinx24_t x1 = 0x00FF00FF00FF00FFl;
1049    // x0 = (x0 << 32 | x0 >> 32);
1050    // x0 = (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1051    // x0 = (x0 & x1) << 8  | (x0 & (x1 << 8)) >> 8;
1052    UseScratchRegisterScope temps(this);
1053    BlockTrampolinePoolScope block_trampoline_pool(this);
1054    DCHECK((rd != t6) && (rs != t6));
1055    Register x0 = temps.Acquire();
1056    Register x1 = temps.Acquire();
1057    Register x2 = scratch;
1058    li(x1, 0x0000FFFF0000FFFFl);
1059    slli(x0, rs, 32);
1060    srli(rd, rs, 32);
1061    or_(x0, rd, x0);   // x0 <- x0 << 32 | x0 >> 32
1062    and_(x2, x0, x1);  // x2 <- x0 & 0x0000FFFF0000FFFF
1063    slli(x2, x2, 16);  // x2 <- (x0 & 0x0000FFFF0000FFFF) << 16
1064    slli(x1, x1, 16);  // x1 <- 0xFFFF0000FFFF0000
1065    and_(rd, x0, x1);  // rd <- x0 & 0xFFFF0000FFFF0000
1066    srli(rd, rd, 16);  // rd <- x0 & (x1 << 16)) >> 16
1067    or_(x0, rd, x2);   // (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1068    li(x1, 0x00FF00FF00FF00FFl);
1069    and_(x2, x0, x1);  // x2 <- x0 & 0x00FF00FF00FF00FF
1070    slli(x2, x2, 8);   // x2 <- (x0 & x1) << 8
1071    slli(x1, x1, 8);   // x1 <- 0xFF00FF00FF00FF00
1072    and_(rd, x0, x1);
1073    srli(rd, rd, 8);  // rd <- (x0 & (x1 << 8)) >> 8
1074    or_(rd, rd, x2);  // (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8))
1075  }
1076}
1077
1078template <int NBYTES, bool LOAD_SIGNED>
1079void TurboAssembler::LoadNBytes(Register rd, const MemOperand& rs,
1080                                Register scratch) {
1081  DCHECK(rd != rs.rm() && rd != scratch);
1082  DCHECK_LE(NBYTES, 8);
1083
1084  // load the most significant byte
1085  if (LOAD_SIGNED) {
1086    lb(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1087  } else {
1088    lbu(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1089  }
1090
1091  // load remaining (nbytes-1) bytes from higher to lower
1092  slli(rd, rd, 8 * (NBYTES - 1));
1093  for (int i = (NBYTES - 2); i >= 0; i--) {
1094    lbu(scratch, rs.rm(), rs.offset() + i);
1095    if (i) slli(scratch, scratch, i * 8);
1096    or_(rd, rd, scratch);
1097  }
1098}
1099
1100template <int NBYTES, bool LOAD_SIGNED>
1101void TurboAssembler::LoadNBytesOverwritingBaseReg(const MemOperand& rs,
1102                                                  Register scratch0,
1103                                                  Register scratch1) {
1104  // This function loads nbytes from memory specified by rs and into rs.rm()
1105  DCHECK(rs.rm() != scratch0 && rs.rm() != scratch1 && scratch0 != scratch1);
1106  DCHECK_LE(NBYTES, 8);
1107
1108  // load the most significant byte
1109  if (LOAD_SIGNED) {
1110    lb(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1111  } else {
1112    lbu(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1113  }
1114
1115  // load remaining (nbytes-1) bytes from higher to lower
1116  slli(scratch0, scratch0, 8 * (NBYTES - 1));
1117  for (int i = (NBYTES - 2); i >= 0; i--) {
1118    lbu(scratch1, rs.rm(), rs.offset() + i);
1119    if (i) {
1120      slli(scratch1, scratch1, i * 8);
1121      or_(scratch0, scratch0, scratch1);
1122    } else {
1123      // write to rs.rm() when processing the last byte
1124      or_(rs.rm(), scratch0, scratch1);
1125    }
1126  }
1127}
1128
1129template <int NBYTES, bool IS_SIGNED>
1130void TurboAssembler::UnalignedLoadHelper(Register rd, const MemOperand& rs) {
1131  BlockTrampolinePoolScope block_trampoline_pool(this);
1132  UseScratchRegisterScope temps(this);
1133
1134  if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1135    // Adjust offset for two accesses and check if offset + 3 fits into int12.
1136    MemOperand source = rs;
1137    Register scratch_base = temps.Acquire();
1138    DCHECK(scratch_base != rs.rm());
1139    AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1140                        NBYTES - 1);
1141
1142    // Since source.rm() is scratch_base, assume rd != source.rm()
1143    DCHECK(rd != source.rm());
1144    Register scratch_other = temps.Acquire();
1145    LoadNBytes<NBYTES, IS_SIGNED>(rd, source, scratch_other);
1146  } else {
1147    // no need to adjust base-and-offset
1148    if (rd != rs.rm()) {
1149      Register scratch = temps.Acquire();
1150      LoadNBytes<NBYTES, IS_SIGNED>(rd, rs, scratch);
1151    } else {  // rd == rs.rm()
1152      Register scratch = temps.Acquire();
1153      Register scratch2 = temps.Acquire();
1154      LoadNBytesOverwritingBaseReg<NBYTES, IS_SIGNED>(rs, scratch, scratch2);
1155    }
1156  }
1157}
1158
1159template <int NBYTES>
1160void TurboAssembler::UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs,
1161                                          Register scratch_base) {
1162  DCHECK(NBYTES == 4 || NBYTES == 8);
1163  DCHECK_NE(scratch_base, rs.rm());
1164  BlockTrampolinePoolScope block_trampoline_pool(this);
1165  MemOperand source = rs;
1166  if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1167    // Adjust offset for two accesses and check if offset + 3 fits into int12.
1168    DCHECK(scratch_base != rs.rm());
1169    AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1170                        NBYTES - 1);
1171  }
1172  UseScratchRegisterScope temps(this);
1173  Register scratch_other = temps.Acquire();
1174  Register scratch = temps.Acquire();
1175  DCHECK(scratch != rs.rm() && scratch_other != scratch &&
1176         scratch_other != rs.rm());
1177  LoadNBytes<NBYTES, true>(scratch, source, scratch_other);
1178  if (NBYTES == 4)
1179    fmv_w_x(frd, scratch);
1180  else
1181    fmv_d_x(frd, scratch);
1182}
1183
1184template <int NBYTES>
1185void TurboAssembler::UnalignedStoreHelper(Register rd, const MemOperand& rs,
1186                                          Register scratch_other) {
1187  DCHECK(scratch_other != rs.rm());
1188  DCHECK_LE(NBYTES, 8);
1189  MemOperand source = rs;
1190  UseScratchRegisterScope temps(this);
1191  Register scratch_base = temps.Acquire();
1192  // Adjust offset for two accesses and check if offset + 3 fits into int12.
1193  if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1194    DCHECK(scratch_base != rd && scratch_base != rs.rm());
1195    AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1196                        NBYTES - 1);
1197  }
1198
1199  BlockTrampolinePoolScope block_trampoline_pool(this);
1200  if (scratch_other == no_reg) {
1201    if (temps.hasAvailable()) {
1202      scratch_other = temps.Acquire();
1203    } else {
1204      push(t2);
1205      scratch_other = t2;
1206    }
1207  }
1208
1209  DCHECK(scratch_other != rd && scratch_other != rs.rm() &&
1210         scratch_other != source.rm());
1211
1212  sb(rd, source.rm(), source.offset());
1213  for (size_t i = 1; i <= (NBYTES - 1); i++) {
1214    srli(scratch_other, rd, i * 8);
1215    sb(scratch_other, source.rm(), source.offset() + i);
1216  }
1217  if (scratch_other == t2) {
1218    pop(t2);
1219  }
1220}
1221
1222template <int NBYTES>
1223void TurboAssembler::UnalignedFStoreHelper(FPURegister frd,
1224                                           const MemOperand& rs,
1225                                           Register scratch) {
1226  DCHECK(NBYTES == 8 || NBYTES == 4);
1227  DCHECK_NE(scratch, rs.rm());
1228  if (NBYTES == 4) {
1229    fmv_x_w(scratch, frd);
1230  } else {
1231    fmv_x_d(scratch, frd);
1232  }
1233  UnalignedStoreHelper<NBYTES>(scratch, rs);
1234}
1235
1236template <typename Reg_T, typename Func>
1237void TurboAssembler::AlignedLoadHelper(Reg_T target, const MemOperand& rs,
1238                                       Func generator) {
1239  MemOperand source = rs;
1240  UseScratchRegisterScope temps(this);
1241  BlockTrampolinePoolScope block_trampoline_pool(this);
1242  if (NeedAdjustBaseAndOffset(source)) {
1243    Register scratch = temps.Acquire();
1244    DCHECK(scratch != rs.rm());
1245    AdjustBaseAndOffset(&source, scratch);
1246  }
1247  generator(target, source);
1248}
1249
1250template <typename Reg_T, typename Func>
1251void TurboAssembler::AlignedStoreHelper(Reg_T value, const MemOperand& rs,
1252                                        Func generator) {
1253  MemOperand source = rs;
1254  UseScratchRegisterScope temps(this);
1255  BlockTrampolinePoolScope block_trampoline_pool(this);
1256  if (NeedAdjustBaseAndOffset(source)) {
1257    Register scratch = temps.Acquire();
1258    // make sure scratch does not overwrite value
1259    if (std::is_same<Reg_T, Register>::value)
1260      DCHECK(scratch.code() != value.code());
1261    DCHECK(scratch != rs.rm());
1262    AdjustBaseAndOffset(&source, scratch);
1263  }
1264  generator(value, source);
1265}
1266
1267void TurboAssembler::Ulw(Register rd, const MemOperand& rs) {
1268  UnalignedLoadHelper<4, true>(rd, rs);
1269}
1270
1271void TurboAssembler::Ulwu(Register rd, const MemOperand& rs) {
1272  UnalignedLoadHelper<4, false>(rd, rs);
1273}
1274
1275void TurboAssembler::Usw(Register rd, const MemOperand& rs) {
1276  UnalignedStoreHelper<4>(rd, rs);
1277}
1278
1279void TurboAssembler::Ulh(Register rd, const MemOperand& rs) {
1280  UnalignedLoadHelper<2, true>(rd, rs);
1281}
1282
1283void TurboAssembler::Ulhu(Register rd, const MemOperand& rs) {
1284  UnalignedLoadHelper<2, false>(rd, rs);
1285}
1286
1287void TurboAssembler::Ush(Register rd, const MemOperand& rs) {
1288  UnalignedStoreHelper<2>(rd, rs);
1289}
1290
1291void TurboAssembler::Uld(Register rd, const MemOperand& rs) {
1292  UnalignedLoadHelper<8, true>(rd, rs);
1293}
1294
1295// Load consequent 32-bit word pair in 64-bit reg. and put first word in low
1296// bits,
1297// second word in high bits.
1298void MacroAssembler::LoadWordPair(Register rd, const MemOperand& rs) {
1299  UseScratchRegisterScope temps(this);
1300  Register scratch = temps.Acquire();
1301  Lwu(rd, rs);
1302  Lw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1303  slli(scratch, scratch, 32);
1304  Add64(rd, rd, scratch);
1305}
1306
1307void TurboAssembler::Usd(Register rd, const MemOperand& rs) {
1308  UnalignedStoreHelper<8>(rd, rs);
1309}
1310
1311// Do 64-bit store as two consequent 32-bit stores to unaligned address.
1312void MacroAssembler::StoreWordPair(Register rd, const MemOperand& rs) {
1313  UseScratchRegisterScope temps(this);
1314  Register scratch = temps.Acquire();
1315  Sw(rd, rs);
1316  srai(scratch, rd, 32);
1317  Sw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1318}
1319
1320void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs,
1321                                Register scratch) {
1322  DCHECK_NE(scratch, rs.rm());
1323  UnalignedFLoadHelper<4>(fd, rs, scratch);
1324}
1325
1326void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs,
1327                                 Register scratch) {
1328  DCHECK_NE(scratch, rs.rm());
1329  UnalignedFStoreHelper<4>(fd, rs, scratch);
1330}
1331
1332void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs,
1333                                 Register scratch) {
1334  DCHECK_NE(scratch, rs.rm());
1335  UnalignedFLoadHelper<8>(fd, rs, scratch);
1336}
1337
1338void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs,
1339                                  Register scratch) {
1340  DCHECK_NE(scratch, rs.rm());
1341  UnalignedFStoreHelper<8>(fd, rs, scratch);
1342}
1343
1344void TurboAssembler::Lb(Register rd, const MemOperand& rs) {
1345  auto fn = [this](Register target, const MemOperand& source) {
1346    this->lb(target, source.rm(), source.offset());
1347  };
1348  AlignedLoadHelper(rd, rs, fn);
1349}
1350
1351void TurboAssembler::Lbu(Register rd, const MemOperand& rs) {
1352  auto fn = [this](Register target, const MemOperand& source) {
1353    this->lbu(target, source.rm(), source.offset());
1354  };
1355  AlignedLoadHelper(rd, rs, fn);
1356}
1357
1358void TurboAssembler::Sb(Register rd, const MemOperand& rs) {
1359  auto fn = [this](Register value, const MemOperand& source) {
1360    this->sb(value, source.rm(), source.offset());
1361  };
1362  AlignedStoreHelper(rd, rs, fn);
1363}
1364
1365void TurboAssembler::Lh(Register rd, const MemOperand& rs) {
1366  auto fn = [this](Register target, const MemOperand& source) {
1367    this->lh(target, source.rm(), source.offset());
1368  };
1369  AlignedLoadHelper(rd, rs, fn);
1370}
1371
1372void TurboAssembler::Lhu(Register rd, const MemOperand& rs) {
1373  auto fn = [this](Register target, const MemOperand& source) {
1374    this->lhu(target, source.rm(), source.offset());
1375  };
1376  AlignedLoadHelper(rd, rs, fn);
1377}
1378
1379void TurboAssembler::Sh(Register rd, const MemOperand& rs) {
1380  auto fn = [this](Register value, const MemOperand& source) {
1381    this->sh(value, source.rm(), source.offset());
1382  };
1383  AlignedStoreHelper(rd, rs, fn);
1384}
1385
1386void TurboAssembler::Lw(Register rd, const MemOperand& rs) {
1387  auto fn = [this](Register target, const MemOperand& source) {
1388    if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1389        ((source.rm().code() & 0b11000) == 0b01000) &&
1390        is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1391      this->c_lw(target, source.rm(), source.offset());
1392    } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1393               is_uint8(source.offset()) && (source.rm() == sp) &&
1394               ((source.offset() & 0x3) == 0)) {
1395      this->c_lwsp(target, source.offset());
1396    } else {
1397      this->lw(target, source.rm(), source.offset());
1398    }
1399  };
1400  AlignedLoadHelper(rd, rs, fn);
1401}
1402
1403void TurboAssembler::Lwu(Register rd, const MemOperand& rs) {
1404  auto fn = [this](Register target, const MemOperand& source) {
1405    this->lwu(target, source.rm(), source.offset());
1406  };
1407  AlignedLoadHelper(rd, rs, fn);
1408}
1409
1410void TurboAssembler::Sw(Register rd, const MemOperand& rs) {
1411  auto fn = [this](Register value, const MemOperand& source) {
1412    if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1413        ((source.rm().code() & 0b11000) == 0b01000) &&
1414        is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1415      this->c_sw(value, source.rm(), source.offset());
1416    } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1417               is_uint8(source.offset()) && (((source.offset() & 0x3) == 0))) {
1418      this->c_swsp(value, source.offset());
1419    } else {
1420      this->sw(value, source.rm(), source.offset());
1421    }
1422  };
1423  AlignedStoreHelper(rd, rs, fn);
1424}
1425
1426void TurboAssembler::Ld(Register rd, const MemOperand& rs) {
1427  auto fn = [this](Register target, const MemOperand& source) {
1428    if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1429        ((source.rm().code() & 0b11000) == 0b01000) &&
1430        is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1431      this->c_ld(target, source.rm(), source.offset());
1432    } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1433               is_uint9(source.offset()) && (source.rm() == sp) &&
1434               ((source.offset() & 0x7) == 0)) {
1435      this->c_ldsp(target, source.offset());
1436    } else {
1437      this->ld(target, source.rm(), source.offset());
1438    }
1439  };
1440  AlignedLoadHelper(rd, rs, fn);
1441}
1442
1443void TurboAssembler::Sd(Register rd, const MemOperand& rs) {
1444  auto fn = [this](Register value, const MemOperand& source) {
1445    if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1446        ((source.rm().code() & 0b11000) == 0b01000) &&
1447        is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1448      this->c_sd(value, source.rm(), source.offset());
1449    } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1450               is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1451      this->c_sdsp(value, source.offset());
1452    } else {
1453      this->sd(value, source.rm(), source.offset());
1454    }
1455  };
1456  AlignedStoreHelper(rd, rs, fn);
1457}
1458
1459void TurboAssembler::LoadFloat(FPURegister fd, const MemOperand& src) {
1460  auto fn = [this](FPURegister target, const MemOperand& source) {
1461    this->flw(target, source.rm(), source.offset());
1462  };
1463  AlignedLoadHelper(fd, src, fn);
1464}
1465
1466void TurboAssembler::StoreFloat(FPURegister fs, const MemOperand& src) {
1467  auto fn = [this](FPURegister value, const MemOperand& source) {
1468    this->fsw(value, source.rm(), source.offset());
1469  };
1470  AlignedStoreHelper(fs, src, fn);
1471}
1472
1473void TurboAssembler::LoadDouble(FPURegister fd, const MemOperand& src) {
1474  auto fn = [this](FPURegister target, const MemOperand& source) {
1475    if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1476        ((source.rm().code() & 0b11000) == 0b01000) &&
1477        is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1478      this->c_fld(target, source.rm(), source.offset());
1479    } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1480               is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1481      this->c_fldsp(target, source.offset());
1482    } else {
1483      this->fld(target, source.rm(), source.offset());
1484    }
1485  };
1486  AlignedLoadHelper(fd, src, fn);
1487}
1488
1489void TurboAssembler::StoreDouble(FPURegister fs, const MemOperand& src) {
1490  auto fn = [this](FPURegister value, const MemOperand& source) {
1491    if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1492        ((source.rm().code() & 0b11000) == 0b01000) &&
1493        is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1494      this->c_fsd(value, source.rm(), source.offset());
1495    } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1496               is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1497      this->c_fsdsp(value, source.offset());
1498    } else {
1499      this->fsd(value, source.rm(), source.offset());
1500    }
1501  };
1502  AlignedStoreHelper(fs, src, fn);
1503}
1504
1505void TurboAssembler::Ll(Register rd, const MemOperand& rs) {
1506  bool is_one_instruction = rs.offset() == 0;
1507  if (is_one_instruction) {
1508    lr_w(false, false, rd, rs.rm());
1509  } else {
1510    UseScratchRegisterScope temps(this);
1511    Register scratch = temps.Acquire();
1512    Add64(scratch, rs.rm(), rs.offset());
1513    lr_w(false, false, rd, scratch);
1514  }
1515}
1516
1517void TurboAssembler::Lld(Register rd, const MemOperand& rs) {
1518  bool is_one_instruction = rs.offset() == 0;
1519  if (is_one_instruction) {
1520    lr_d(false, false, rd, rs.rm());
1521  } else {
1522    UseScratchRegisterScope temps(this);
1523    Register scratch = temps.Acquire();
1524    Add64(scratch, rs.rm(), rs.offset());
1525    lr_d(false, false, rd, scratch);
1526  }
1527}
1528
1529void TurboAssembler::Sc(Register rd, const MemOperand& rs) {
1530  bool is_one_instruction = rs.offset() == 0;
1531  if (is_one_instruction) {
1532    sc_w(false, false, rd, rs.rm(), rd);
1533  } else {
1534    UseScratchRegisterScope temps(this);
1535    Register scratch = temps.Acquire();
1536    Add64(scratch, rs.rm(), rs.offset());
1537    sc_w(false, false, rd, scratch, rd);
1538  }
1539}
1540
1541void TurboAssembler::Scd(Register rd, const MemOperand& rs) {
1542  bool is_one_instruction = rs.offset() == 0;
1543  if (is_one_instruction) {
1544    sc_d(false, false, rd, rs.rm(), rd);
1545  } else {
1546    UseScratchRegisterScope temps(this);
1547    Register scratch = temps.Acquire();
1548    Add64(scratch, rs.rm(), rs.offset());
1549    sc_d(false, false, rd, scratch, rd);
1550  }
1551}
1552
1553void TurboAssembler::li(Register dst, Handle<HeapObject> value,
1554                        RelocInfo::Mode rmode) {
1555  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1556  // non-isolate-independent code. In many cases it might be cheaper than
1557  // embedding the relocatable value.
1558  if (root_array_available_ && options().isolate_independent_code) {
1559    IndirectLoadConstant(dst, value);
1560    return;
1561  } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) {
1562    EmbeddedObjectIndex index = AddEmbeddedObject(value);
1563    DCHECK(is_uint32(index));
1564    li(dst, Operand(index, rmode));
1565  } else {
1566    DCHECK(RelocInfo::IsFullEmbeddedObject(rmode));
1567    li(dst, Operand(value.address(), rmode));
1568  }
1569}
1570
1571void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) {
1572  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1573  // non-isolate-independent code. In many cases it might be cheaper than
1574  // embedding the relocatable value.
1575  if (root_array_available_ && options().isolate_independent_code) {
1576    IndirectLoadExternalReference(dst, value);
1577    return;
1578  }
1579  li(dst, Operand(value), mode);
1580}
1581
1582void TurboAssembler::li(Register dst, const StringConstantBase* string,
1583                        LiFlags mode) {
1584  li(dst, Operand::EmbeddedStringConstant(string), mode);
1585}
1586
1587static inline int InstrCountForLiLower32Bit(int64_t value) {
1588  int64_t Hi20 = ((value + 0x800) >> 12);
1589  int64_t Lo12 = value << 52 >> 52;
1590  if (Hi20 == 0 || Lo12 == 0) {
1591    return 1;
1592  }
1593  return 2;
1594}
1595
1596int TurboAssembler::InstrCountForLi64Bit(int64_t value) {
1597  if (is_int32(value + 0x800)) {
1598    return InstrCountForLiLower32Bit(value);
1599  } else {
1600    return li_estimate(value);
1601  }
1602  UNREACHABLE();
1603  return INT_MAX;
1604}
1605
1606void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
1607  DCHECK(!j.is_reg());
1608  DCHECK(!MustUseReg(j.rmode()));
1609  DCHECK(mode == OPTIMIZE_SIZE);
1610  Li(rd, j.immediate());
1611}
1612
1613void TurboAssembler::li(Register rd, Operand j, LiFlags mode) {
1614  DCHECK(!j.is_reg());
1615  BlockTrampolinePoolScope block_trampoline_pool(this);
1616  if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) {
1617    UseScratchRegisterScope temps(this);
1618    int count = li_estimate(j.immediate(), temps.hasAvailable());
1619    int reverse_count = li_estimate(~j.immediate(), temps.hasAvailable());
1620    if (FLAG_riscv_constant_pool && count >= 4 && reverse_count >= 4) {
1621      // Ld a Address from a constant pool.
1622      RecordEntry((uint64_t)j.immediate(), j.rmode());
1623      auipc(rd, 0);
1624      // Record a value into constant pool.
1625      ld(rd, rd, 0);
1626    } else {
1627      if ((count - reverse_count) > 1) {
1628        Li(rd, ~j.immediate());
1629        not_(rd, rd);
1630      } else {
1631        Li(rd, j.immediate());
1632      }
1633    }
1634  } else if (MustUseReg(j.rmode())) {
1635    int64_t immediate;
1636    if (j.IsHeapObjectRequest()) {
1637      RequestHeapObject(j.heap_object_request());
1638      immediate = 0;
1639    } else {
1640      immediate = j.immediate();
1641    }
1642
1643    RecordRelocInfo(j.rmode(), immediate);
1644    li_ptr(rd, immediate);
1645  } else if (mode == ADDRESS_LOAD) {
1646    // We always need the same number of instructions as we may need to patch
1647    // this code to load another value which may need all 6 instructions.
1648    RecordRelocInfo(j.rmode());
1649    li_ptr(rd, j.immediate());
1650  } else {  // Always emit the same 48 bit instruction
1651            // sequence.
1652    li_ptr(rd, j.immediate());
1653  }
1654}
1655
1656static RegList t_regs = {t0, t1, t2, t3, t4, t5, t6};
1657static RegList a_regs = {a0, a1, a2, a3, a4, a5, a6, a7};
1658static RegList s_regs = {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11};
1659
1660void TurboAssembler::MultiPush(RegList regs) {
1661  int16_t num_to_push = regs.Count();
1662  int16_t stack_offset = num_to_push * kSystemPointerSize;
1663
1664#define TEST_AND_PUSH_REG(reg)             \
1665  if (regs.has(reg)) {                     \
1666    stack_offset -= kSystemPointerSize;    \
1667    Sd(reg, MemOperand(sp, stack_offset)); \
1668    regs.clear(reg);                       \
1669  }
1670
1671#define T_REGS(V) V(t6) V(t5) V(t4) V(t3) V(t2) V(t1) V(t0)
1672#define A_REGS(V) V(a7) V(a6) V(a5) V(a4) V(a3) V(a2) V(a1) V(a0)
1673#define S_REGS(V) \
1674  V(s11) V(s10) V(s9) V(s8) V(s7) V(s6) V(s5) V(s4) V(s3) V(s2) V(s1)
1675
1676  Sub64(sp, sp, Operand(stack_offset));
1677
1678  // Certain usage of MultiPush requires that registers are pushed onto the
1679  // stack in a particular: ra, fp, sp, gp, .... (basically in the decreasing
1680  // order of register numbers according to MIPS register numbers)
1681  TEST_AND_PUSH_REG(ra);
1682  TEST_AND_PUSH_REG(fp);
1683  TEST_AND_PUSH_REG(sp);
1684  TEST_AND_PUSH_REG(gp);
1685  TEST_AND_PUSH_REG(tp);
1686  if (!(regs & s_regs).is_empty()) {
1687    S_REGS(TEST_AND_PUSH_REG)
1688  }
1689  if (!(regs & a_regs).is_empty()) {
1690    A_REGS(TEST_AND_PUSH_REG)
1691  }
1692  if (!(regs & t_regs).is_empty()) {
1693    T_REGS(TEST_AND_PUSH_REG)
1694  }
1695
1696  DCHECK(regs.is_empty());
1697
1698#undef TEST_AND_PUSH_REG
1699#undef T_REGS
1700#undef A_REGS
1701#undef S_REGS
1702}
1703
1704void TurboAssembler::MultiPop(RegList regs) {
1705  int16_t stack_offset = 0;
1706
1707#define TEST_AND_POP_REG(reg)              \
1708  if (regs.has(reg)) {                     \
1709    Ld(reg, MemOperand(sp, stack_offset)); \
1710    stack_offset += kSystemPointerSize;    \
1711    regs.clear(reg);                       \
1712  }
1713
1714#define T_REGS(V) V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(t6)
1715#define A_REGS(V) V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7)
1716#define S_REGS(V) \
1717  V(s1) V(s2) V(s3) V(s4) V(s5) V(s6) V(s7) V(s8) V(s9) V(s10) V(s11)
1718
1719  // MultiPop pops from the stack in reverse order as MultiPush
1720  if (!(regs & t_regs).is_empty()) {
1721    T_REGS(TEST_AND_POP_REG)
1722  }
1723  if (!(regs & a_regs).is_empty()) {
1724    A_REGS(TEST_AND_POP_REG)
1725  }
1726  if (!(regs & s_regs).is_empty()) {
1727    S_REGS(TEST_AND_POP_REG)
1728  }
1729  TEST_AND_POP_REG(tp);
1730  TEST_AND_POP_REG(gp);
1731  TEST_AND_POP_REG(sp);
1732  TEST_AND_POP_REG(fp);
1733  TEST_AND_POP_REG(ra);
1734
1735  DCHECK(regs.is_empty());
1736
1737  addi(sp, sp, stack_offset);
1738
1739#undef TEST_AND_POP_REG
1740#undef T_REGS
1741#undef S_REGS
1742#undef A_REGS
1743}
1744
1745void TurboAssembler::MultiPushFPU(DoubleRegList regs) {
1746  int16_t num_to_push = regs.Count();
1747  int16_t stack_offset = num_to_push * kDoubleSize;
1748
1749  Sub64(sp, sp, Operand(stack_offset));
1750  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1751    if ((regs.bits() & (1 << i)) != 0) {
1752      stack_offset -= kDoubleSize;
1753      StoreDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1754    }
1755  }
1756}
1757
1758void TurboAssembler::MultiPopFPU(DoubleRegList regs) {
1759  int16_t stack_offset = 0;
1760
1761  for (int16_t i = 0; i < kNumRegisters; i++) {
1762    if ((regs.bits() & (1 << i)) != 0) {
1763      LoadDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1764      stack_offset += kDoubleSize;
1765    }
1766  }
1767  addi(sp, sp, stack_offset);
1768}
1769
1770void TurboAssembler::ExtractBits(Register rt, Register rs, uint16_t pos,
1771                                 uint16_t size, bool sign_extend) {
1772  DCHECK(pos < 64 && 0 < size && size <= 64 && 0 < pos + size &&
1773         pos + size <= 64);
1774  slli(rt, rs, 64 - (pos + size));
1775  if (sign_extend) {
1776    srai(rt, rt, 64 - size);
1777  } else {
1778    srli(rt, rt, 64 - size);
1779  }
1780}
1781
1782void TurboAssembler::InsertBits(Register dest, Register source, Register pos,
1783                                int size) {
1784  DCHECK_LT(size, 64);
1785  UseScratchRegisterScope temps(this);
1786  Register mask = temps.Acquire();
1787  BlockTrampolinePoolScope block_trampoline_pool(this);
1788  Register source_ = temps.Acquire();
1789  // Create a mask of the length=size.
1790  li(mask, 1);
1791  slli(mask, mask, size);
1792  addi(mask, mask, -1);
1793  and_(source_, mask, source);
1794  sll(source_, source_, pos);
1795  // Make a mask containing 0's. 0's start at "pos" with length=size.
1796  sll(mask, mask, pos);
1797  not_(mask, mask);
1798  // cut area for insertion of source.
1799  and_(dest, mask, dest);
1800  // insert source
1801  or_(dest, dest, source_);
1802}
1803
1804void TurboAssembler::Neg_s(FPURegister fd, FPURegister fs) { fneg_s(fd, fs); }
1805
1806void TurboAssembler::Neg_d(FPURegister fd, FPURegister fs) { fneg_d(fd, fs); }
1807
1808void TurboAssembler::Cvt_d_uw(FPURegister fd, Register rs) {
1809  // Convert rs to a FP value in fd.
1810  fcvt_d_wu(fd, rs);
1811}
1812
1813void TurboAssembler::Cvt_d_w(FPURegister fd, Register rs) {
1814  // Convert rs to a FP value in fd.
1815  fcvt_d_w(fd, rs);
1816}
1817
1818void TurboAssembler::Cvt_d_ul(FPURegister fd, Register rs) {
1819  // Convert rs to a FP value in fd.
1820  fcvt_d_lu(fd, rs);
1821}
1822
1823void TurboAssembler::Cvt_s_uw(FPURegister fd, Register rs) {
1824  // Convert rs to a FP value in fd.
1825  fcvt_s_wu(fd, rs);
1826}
1827
1828void TurboAssembler::Cvt_s_w(FPURegister fd, Register rs) {
1829  // Convert rs to a FP value in fd.
1830  fcvt_s_w(fd, rs);
1831}
1832
1833void TurboAssembler::Cvt_s_ul(FPURegister fd, Register rs) {
1834  // Convert rs to a FP value in fd.
1835  fcvt_s_lu(fd, rs);
1836}
1837
1838template <typename CvtFunc>
1839void TurboAssembler::RoundFloatingPointToInteger(Register rd, FPURegister fs,
1840                                                 Register result,
1841                                                 CvtFunc fcvt_generator) {
1842  // Save csr_fflags to scratch & clear exception flags
1843  if (result.is_valid()) {
1844    BlockTrampolinePoolScope block_trampoline_pool(this);
1845    UseScratchRegisterScope temps(this);
1846    Register scratch = temps.Acquire();
1847
1848    int exception_flags = kInvalidOperation;
1849    csrrci(scratch, csr_fflags, exception_flags);
1850
1851    // actual conversion instruction
1852    fcvt_generator(this, rd, fs);
1853
1854    // check kInvalidOperation flag (out-of-range, NaN)
1855    // set result to 1 if normal, otherwise set result to 0 for abnormal
1856    frflags(result);
1857    andi(result, result, exception_flags);
1858    seqz(result, result);  // result <-- 1 (normal), result <-- 0 (abnormal)
1859
1860    // restore csr_fflags
1861    csrw(csr_fflags, scratch);
1862  } else {
1863    // actual conversion instruction
1864    fcvt_generator(this, rd, fs);
1865  }
1866}
1867
1868void TurboAssembler::Clear_if_nan_d(Register rd, FPURegister fs) {
1869  Label no_nan;
1870  feq_d(kScratchReg, fs, fs);
1871  bnez(kScratchReg, &no_nan);
1872  Move(rd, zero_reg);
1873  bind(&no_nan);
1874}
1875
1876void TurboAssembler::Clear_if_nan_s(Register rd, FPURegister fs) {
1877  Label no_nan;
1878  feq_s(kScratchReg, fs, fs);
1879  bnez(kScratchReg, &no_nan);
1880  Move(rd, zero_reg);
1881  bind(&no_nan);
1882}
1883
1884void TurboAssembler::Trunc_uw_d(Register rd, FPURegister fs, Register result) {
1885  RoundFloatingPointToInteger(
1886      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1887        tasm->fcvt_wu_d(dst, src, RTZ);
1888      });
1889}
1890
1891void TurboAssembler::Trunc_w_d(Register rd, FPURegister fs, Register result) {
1892  RoundFloatingPointToInteger(
1893      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1894        tasm->fcvt_w_d(dst, src, RTZ);
1895      });
1896}
1897
1898void TurboAssembler::Trunc_uw_s(Register rd, FPURegister fs, Register result) {
1899  RoundFloatingPointToInteger(
1900      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1901        tasm->fcvt_wu_s(dst, src, RTZ);
1902      });
1903}
1904
1905void TurboAssembler::Trunc_w_s(Register rd, FPURegister fs, Register result) {
1906  RoundFloatingPointToInteger(
1907      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1908        tasm->fcvt_w_s(dst, src, RTZ);
1909      });
1910}
1911
1912void TurboAssembler::Trunc_ul_d(Register rd, FPURegister fs, Register result) {
1913  RoundFloatingPointToInteger(
1914      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1915        tasm->fcvt_lu_d(dst, src, RTZ);
1916      });
1917}
1918
1919void TurboAssembler::Trunc_l_d(Register rd, FPURegister fs, Register result) {
1920  RoundFloatingPointToInteger(
1921      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1922        tasm->fcvt_l_d(dst, src, RTZ);
1923      });
1924}
1925
1926void TurboAssembler::Trunc_ul_s(Register rd, FPURegister fs, Register result) {
1927  RoundFloatingPointToInteger(
1928      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1929        tasm->fcvt_lu_s(dst, src, RTZ);
1930      });
1931}
1932
1933void TurboAssembler::Trunc_l_s(Register rd, FPURegister fs, Register result) {
1934  RoundFloatingPointToInteger(
1935      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1936        tasm->fcvt_l_s(dst, src, RTZ);
1937      });
1938}
1939
1940void TurboAssembler::Round_w_s(Register rd, FPURegister fs, Register result) {
1941  RoundFloatingPointToInteger(
1942      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1943        tasm->fcvt_w_s(dst, src, RNE);
1944      });
1945}
1946
1947void TurboAssembler::Round_w_d(Register rd, FPURegister fs, Register result) {
1948  RoundFloatingPointToInteger(
1949      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1950        tasm->fcvt_w_d(dst, src, RNE);
1951      });
1952}
1953
1954void TurboAssembler::Ceil_w_s(Register rd, FPURegister fs, Register result) {
1955  RoundFloatingPointToInteger(
1956      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1957        tasm->fcvt_w_s(dst, src, RUP);
1958      });
1959}
1960
1961void TurboAssembler::Ceil_w_d(Register rd, FPURegister fs, Register result) {
1962  RoundFloatingPointToInteger(
1963      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1964        tasm->fcvt_w_d(dst, src, RUP);
1965      });
1966}
1967
1968void TurboAssembler::Floor_w_s(Register rd, FPURegister fs, Register result) {
1969  RoundFloatingPointToInteger(
1970      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1971        tasm->fcvt_w_s(dst, src, RDN);
1972      });
1973}
1974
1975void TurboAssembler::Floor_w_d(Register rd, FPURegister fs, Register result) {
1976  RoundFloatingPointToInteger(
1977      rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1978        tasm->fcvt_w_d(dst, src, RDN);
1979      });
1980}
1981
1982// According to JS ECMA specification, for floating-point round operations, if
1983// the input is NaN, +/-infinity, or +/-0, the same input is returned as the
1984// rounded result; this differs from behavior of RISCV fcvt instructions (which
1985// round out-of-range values to the nearest max or min value), therefore special
1986// handling is needed by NaN, +/-Infinity, +/-0
1987template <typename F>
1988void TurboAssembler::RoundHelper(FPURegister dst, FPURegister src,
1989                                 FPURegister fpu_scratch, RoundingMode frm) {
1990  BlockTrampolinePoolScope block_trampoline_pool(this);
1991  UseScratchRegisterScope temps(this);
1992  Register scratch2 = temps.Acquire();
1993
1994  DCHECK((std::is_same<float, F>::value) || (std::is_same<double, F>::value));
1995  // Need at least two FPRs, so check against dst == src == fpu_scratch
1996  DCHECK(!(dst == src && dst == fpu_scratch));
1997
1998  const int kFloatMantissaBits =
1999      sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2000  const int kFloatExponentBits =
2001      sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2002  const int kFloatExponentBias =
2003      sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2004  Label done;
2005
2006  {
2007    UseScratchRegisterScope temps2(this);
2008    Register scratch = temps2.Acquire();
2009    // extract exponent value of the source floating-point to scratch
2010    if (std::is_same<F, double>::value) {
2011      fmv_x_d(scratch, src);
2012    } else {
2013      fmv_x_w(scratch, src);
2014    }
2015    ExtractBits(scratch2, scratch, kFloatMantissaBits, kFloatExponentBits);
2016  }
2017
2018  // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2019  // in mantissa, the result is the same as src, so move src to dest  (to avoid
2020  // generating another branch)
2021  if (dst != src) {
2022    if (std::is_same<F, double>::value) {
2023      fmv_d(dst, src);
2024    } else {
2025      fmv_s(dst, src);
2026    }
2027  }
2028  {
2029    Label not_NaN;
2030    UseScratchRegisterScope temps2(this);
2031    Register scratch = temps2.Acquire();
2032    // According to the wasm spec
2033    // (https://webassembly.github.io/spec/core/exec/numerics.html#aux-nans)
2034    // if input is canonical NaN, then output is canonical NaN, and if input is
2035    // any other NaN, then output is any NaN with most significant bit of
2036    // payload is 1. In RISC-V, feq_d will set scratch to 0 if src is a NaN. If
2037    // src is not a NaN, branch to the label and do nothing, but if it is,
2038    // fmin_d will set dst to the canonical NaN.
2039    if (std::is_same<F, double>::value) {
2040      feq_d(scratch, src, src);
2041      bnez(scratch, &not_NaN);
2042      fmin_d(dst, src, src);
2043    } else {
2044      feq_s(scratch, src, src);
2045      bnez(scratch, &not_NaN);
2046      fmin_s(dst, src, src);
2047    }
2048    bind(&not_NaN);
2049  }
2050
2051  // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2052  // kFloat32MantissaBits, it means the floating-point value has no fractional
2053  // part, thus the input is already rounded, jump to done. Note that, NaN and
2054  // Infinity in floating-point representation sets maximal exponent value, so
2055  // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2056  // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2057  // (Infinity), so NaN and Infinity are considered rounded value too.
2058  Branch(&done, greater_equal, scratch2,
2059         Operand(kFloatExponentBias + kFloatMantissaBits));
2060
2061  // Actual rounding is needed along this path
2062
2063  // old_src holds the original input, needed for the case of src == dst
2064  FPURegister old_src = src;
2065  if (src == dst) {
2066    DCHECK(fpu_scratch != dst);
2067    Move(fpu_scratch, src);
2068    old_src = fpu_scratch;
2069  }
2070
2071  // Since only input whose real exponent value is less than kMantissaBits
2072  // (i.e., 23 or 52-bits) falls into this path, the value range of the input
2073  // falls into that of 23- or 53-bit integers. So we round the input to integer
2074  // values, then convert them back to floating-point.
2075  {
2076    UseScratchRegisterScope temps(this);
2077    Register scratch = temps.Acquire();
2078    if (std::is_same<F, double>::value) {
2079      fcvt_l_d(scratch, src, frm);
2080      fcvt_d_l(dst, scratch, frm);
2081    } else {
2082      fcvt_w_s(scratch, src, frm);
2083      fcvt_s_w(dst, scratch, frm);
2084    }
2085  }
2086  // A special handling is needed if the input is a very small positive/negative
2087  // number that rounds to zero. JS semantics requires that the rounded result
2088  // retains the sign of the input, so a very small positive (negative)
2089  // floating-point number should be rounded to positive (negative) 0.
2090  // Therefore, we use sign-bit injection to produce +/-0 correctly. Instead of
2091  // testing for zero w/ a branch, we just insert sign-bit for everyone on this
2092  // path (this is where old_src is needed)
2093  if (std::is_same<F, double>::value) {
2094    fsgnj_d(dst, dst, old_src);
2095  } else {
2096    fsgnj_s(dst, dst, old_src);
2097  }
2098
2099  bind(&done);
2100}
2101
2102// According to JS ECMA specification, for floating-point round operations, if
2103// the input is NaN, +/-infinity, or +/-0, the same input is returned as the
2104// rounded result; this differs from behavior of RISCV fcvt instructions (which
2105// round out-of-range values to the nearest max or min value), therefore special
2106// handling is needed by NaN, +/-Infinity, +/-0
2107template <typename F>
2108void TurboAssembler::RoundHelper(VRegister dst, VRegister src, Register scratch,
2109                                 VRegister v_scratch, RoundingMode frm) {
2110  VU.set(scratch, std::is_same<F, float>::value ? E32 : E64, m1);
2111  // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2112  // in mantissa, the result is the same as src, so move src to dest  (to avoid
2113  // generating another branch)
2114
2115  // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2116  // kFloat32MantissaBits, it means the floating-point value has no fractional
2117  // part, thus the input is already rounded, jump to done. Note that, NaN and
2118  // Infinity in floating-point representation sets maximal exponent value, so
2119  // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2120  // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2121  // (Infinity), so NaN and Infinity are considered rounded value too.
2122  const int kFloatMantissaBits =
2123      sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2124  const int kFloatExponentBits =
2125      sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2126  const int kFloatExponentBias =
2127      sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2128
2129  // slli(rt, rs, 64 - (pos + size));
2130  // if (sign_extend) {
2131  //   srai(rt, rt, 64 - size);
2132  // } else {
2133  //   srli(rt, rt, 64 - size);
2134  // }
2135
2136  li(scratch, 64 - kFloatMantissaBits - kFloatExponentBits);
2137  vsll_vx(v_scratch, src, scratch);
2138  li(scratch, 64 - kFloatExponentBits);
2139  vsrl_vx(v_scratch, v_scratch, scratch);
2140  li(scratch, kFloatExponentBias + kFloatMantissaBits);
2141  vmslt_vx(v0, v_scratch, scratch);
2142
2143  VU.set(frm);
2144  vmv_vv(dst, src);
2145  if (dst == src) {
2146    vmv_vv(v_scratch, src);
2147  }
2148  vfcvt_x_f_v(dst, src, MaskType::Mask);
2149  vfcvt_f_x_v(dst, dst, MaskType::Mask);
2150
2151  // A special handling is needed if the input is a very small positive/negative
2152  // number that rounds to zero. JS semantics requires that the rounded result
2153  // retains the sign of the input, so a very small positive (negative)
2154  // floating-point number should be rounded to positive (negative) 0.
2155  if (dst == src) {
2156    vfsngj_vv(dst, dst, v_scratch);
2157  } else {
2158    vfsngj_vv(dst, dst, src);
2159  }
2160}
2161
2162void TurboAssembler::Ceil_f(VRegister vdst, VRegister vsrc, Register scratch,
2163                            VRegister v_scratch) {
2164  RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RUP);
2165}
2166
2167void TurboAssembler::Ceil_d(VRegister vdst, VRegister vsrc, Register scratch,
2168                            VRegister v_scratch) {
2169  RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RUP);
2170}
2171
2172void TurboAssembler::Floor_f(VRegister vdst, VRegister vsrc, Register scratch,
2173                             VRegister v_scratch) {
2174  RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RDN);
2175}
2176
2177void TurboAssembler::Floor_d(VRegister vdst, VRegister vsrc, Register scratch,
2178                             VRegister v_scratch) {
2179  RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RDN);
2180}
2181
2182void TurboAssembler::Trunc_d(VRegister vdst, VRegister vsrc, Register scratch,
2183                             VRegister v_scratch) {
2184  RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RTZ);
2185}
2186
2187void TurboAssembler::Trunc_f(VRegister vdst, VRegister vsrc, Register scratch,
2188                             VRegister v_scratch) {
2189  RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RTZ);
2190}
2191
2192void TurboAssembler::Round_f(VRegister vdst, VRegister vsrc, Register scratch,
2193                             VRegister v_scratch) {
2194  RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RNE);
2195}
2196
2197void TurboAssembler::Round_d(VRegister vdst, VRegister vsrc, Register scratch,
2198                             VRegister v_scratch) {
2199  RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RNE);
2200}
2201
2202void TurboAssembler::Floor_d_d(FPURegister dst, FPURegister src,
2203                               FPURegister fpu_scratch) {
2204  RoundHelper<double>(dst, src, fpu_scratch, RDN);
2205}
2206
2207void TurboAssembler::Ceil_d_d(FPURegister dst, FPURegister src,
2208                              FPURegister fpu_scratch) {
2209  RoundHelper<double>(dst, src, fpu_scratch, RUP);
2210}
2211
2212void TurboAssembler::Trunc_d_d(FPURegister dst, FPURegister src,
2213                               FPURegister fpu_scratch) {
2214  RoundHelper<double>(dst, src, fpu_scratch, RTZ);
2215}
2216
2217void TurboAssembler::Round_d_d(FPURegister dst, FPURegister src,
2218                               FPURegister fpu_scratch) {
2219  RoundHelper<double>(dst, src, fpu_scratch, RNE);
2220}
2221
2222void TurboAssembler::Floor_s_s(FPURegister dst, FPURegister src,
2223                               FPURegister fpu_scratch) {
2224  RoundHelper<float>(dst, src, fpu_scratch, RDN);
2225}
2226
2227void TurboAssembler::Ceil_s_s(FPURegister dst, FPURegister src,
2228                              FPURegister fpu_scratch) {
2229  RoundHelper<float>(dst, src, fpu_scratch, RUP);
2230}
2231
2232void TurboAssembler::Trunc_s_s(FPURegister dst, FPURegister src,
2233                               FPURegister fpu_scratch) {
2234  RoundHelper<float>(dst, src, fpu_scratch, RTZ);
2235}
2236
2237void TurboAssembler::Round_s_s(FPURegister dst, FPURegister src,
2238                               FPURegister fpu_scratch) {
2239  RoundHelper<float>(dst, src, fpu_scratch, RNE);
2240}
2241
2242void MacroAssembler::Madd_s(FPURegister fd, FPURegister fr, FPURegister fs,
2243                            FPURegister ft) {
2244  fmadd_s(fd, fs, ft, fr);
2245}
2246
2247void MacroAssembler::Madd_d(FPURegister fd, FPURegister fr, FPURegister fs,
2248                            FPURegister ft) {
2249  fmadd_d(fd, fs, ft, fr);
2250}
2251
2252void MacroAssembler::Msub_s(FPURegister fd, FPURegister fr, FPURegister fs,
2253                            FPURegister ft) {
2254  fmsub_s(fd, fs, ft, fr);
2255}
2256
2257void MacroAssembler::Msub_d(FPURegister fd, FPURegister fr, FPURegister fs,
2258                            FPURegister ft) {
2259  fmsub_d(fd, fs, ft, fr);
2260}
2261
2262void TurboAssembler::CompareF32(Register rd, FPUCondition cc, FPURegister cmp1,
2263                                FPURegister cmp2) {
2264  switch (cc) {
2265    case EQ:
2266      feq_s(rd, cmp1, cmp2);
2267      break;
2268    case NE:
2269      feq_s(rd, cmp1, cmp2);
2270      NegateBool(rd, rd);
2271      break;
2272    case LT:
2273      flt_s(rd, cmp1, cmp2);
2274      break;
2275    case GE:
2276      fle_s(rd, cmp2, cmp1);
2277      break;
2278    case LE:
2279      fle_s(rd, cmp1, cmp2);
2280      break;
2281    case GT:
2282      flt_s(rd, cmp2, cmp1);
2283      break;
2284    default:
2285      UNREACHABLE();
2286  }
2287}
2288
2289void TurboAssembler::CompareF64(Register rd, FPUCondition cc, FPURegister cmp1,
2290                                FPURegister cmp2) {
2291  switch (cc) {
2292    case EQ:
2293      feq_d(rd, cmp1, cmp2);
2294      break;
2295    case NE:
2296      feq_d(rd, cmp1, cmp2);
2297      NegateBool(rd, rd);
2298      break;
2299    case LT:
2300      flt_d(rd, cmp1, cmp2);
2301      break;
2302    case GE:
2303      fle_d(rd, cmp2, cmp1);
2304      break;
2305    case LE:
2306      fle_d(rd, cmp1, cmp2);
2307      break;
2308    case GT:
2309      flt_d(rd, cmp2, cmp1);
2310      break;
2311    default:
2312      UNREACHABLE();
2313  }
2314}
2315
2316void TurboAssembler::CompareIsNotNanF32(Register rd, FPURegister cmp1,
2317                                        FPURegister cmp2) {
2318  UseScratchRegisterScope temps(this);
2319  BlockTrampolinePoolScope block_trampoline_pool(this);
2320  Register scratch = temps.Acquire();
2321
2322  feq_s(rd, cmp1, cmp1);       // rd <- !isNan(cmp1)
2323  feq_s(scratch, cmp2, cmp2);  // scratch <- !isNaN(cmp2)
2324  And(rd, rd, scratch);        // rd <- !isNan(cmp1) && !isNan(cmp2)
2325}
2326
2327void TurboAssembler::CompareIsNotNanF64(Register rd, FPURegister cmp1,
2328                                        FPURegister cmp2) {
2329  UseScratchRegisterScope temps(this);
2330  BlockTrampolinePoolScope block_trampoline_pool(this);
2331  Register scratch = temps.Acquire();
2332
2333  feq_d(rd, cmp1, cmp1);       // rd <- !isNan(cmp1)
2334  feq_d(scratch, cmp2, cmp2);  // scratch <- !isNaN(cmp2)
2335  And(rd, rd, scratch);        // rd <- !isNan(cmp1) && !isNan(cmp2)
2336}
2337
2338void TurboAssembler::CompareIsNanF32(Register rd, FPURegister cmp1,
2339                                     FPURegister cmp2) {
2340  CompareIsNotNanF32(rd, cmp1, cmp2);  // rd <- !isNan(cmp1) && !isNan(cmp2)
2341  Xor(rd, rd, 1);                      // rd <- isNan(cmp1) || isNan(cmp2)
2342}
2343
2344void TurboAssembler::CompareIsNanF64(Register rd, FPURegister cmp1,
2345                                     FPURegister cmp2) {
2346  CompareIsNotNanF64(rd, cmp1, cmp2);  // rd <- !isNan(cmp1) && !isNan(cmp2)
2347  Xor(rd, rd, 1);                      // rd <- isNan(cmp1) || isNan(cmp2)
2348}
2349
2350void TurboAssembler::BranchTrueShortF(Register rs, Label* target) {
2351  Branch(target, not_equal, rs, Operand(zero_reg));
2352}
2353
2354void TurboAssembler::BranchFalseShortF(Register rs, Label* target) {
2355  Branch(target, equal, rs, Operand(zero_reg));
2356}
2357
2358void TurboAssembler::BranchTrueF(Register rs, Label* target) {
2359  bool long_branch =
2360      target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2361  if (long_branch) {
2362    Label skip;
2363    BranchFalseShortF(rs, &skip);
2364    BranchLong(target);
2365    bind(&skip);
2366  } else {
2367    BranchTrueShortF(rs, target);
2368  }
2369}
2370
2371void TurboAssembler::BranchFalseF(Register rs, Label* target) {
2372  bool long_branch =
2373      target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2374  if (long_branch) {
2375    Label skip;
2376    BranchTrueShortF(rs, &skip);
2377    BranchLong(target);
2378    bind(&skip);
2379  } else {
2380    BranchFalseShortF(rs, target);
2381  }
2382}
2383
2384void TurboAssembler::InsertHighWordF64(FPURegister dst, Register src_high) {
2385  UseScratchRegisterScope temps(this);
2386  Register scratch = temps.Acquire();
2387  Register scratch2 = temps.Acquire();
2388  BlockTrampolinePoolScope block_trampoline_pool(this);
2389
2390  DCHECK(src_high != scratch2 && src_high != scratch);
2391
2392  fmv_x_d(scratch, dst);
2393  slli(scratch2, src_high, 32);
2394  slli(scratch, scratch, 32);
2395  srli(scratch, scratch, 32);
2396  or_(scratch, scratch, scratch2);
2397  fmv_d_x(dst, scratch);
2398}
2399
2400void TurboAssembler::InsertLowWordF64(FPURegister dst, Register src_low) {
2401  UseScratchRegisterScope temps(this);
2402  Register scratch = temps.Acquire();
2403  Register scratch2 = temps.Acquire();
2404  BlockTrampolinePoolScope block_trampoline_pool(this);
2405
2406  DCHECK(src_low != scratch && src_low != scratch2);
2407  fmv_x_d(scratch, dst);
2408  slli(scratch2, src_low, 32);
2409  srli(scratch2, scratch2, 32);
2410  srli(scratch, scratch, 32);
2411  slli(scratch, scratch, 32);
2412  or_(scratch, scratch, scratch2);
2413  fmv_d_x(dst, scratch);
2414}
2415
2416void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint32_t src) {
2417  // Handle special values first.
2418  if (src == bit_cast<uint32_t>(0.0f) && has_single_zero_reg_set_) {
2419    if (dst != kDoubleRegZero) fmv_s(dst, kDoubleRegZero);
2420  } else if (src == bit_cast<uint32_t>(-0.0f) && has_single_zero_reg_set_) {
2421    Neg_s(dst, kDoubleRegZero);
2422  } else {
2423    if (dst == kDoubleRegZero) {
2424      DCHECK(src == bit_cast<uint32_t>(0.0f));
2425      fmv_w_x(dst, zero_reg);
2426      has_single_zero_reg_set_ = true;
2427      has_double_zero_reg_set_ = false;
2428    } else {
2429      UseScratchRegisterScope temps(this);
2430      Register scratch = temps.Acquire();
2431      li(scratch, Operand(static_cast<int32_t>(src)));
2432      fmv_w_x(dst, scratch);
2433    }
2434  }
2435}
2436
2437void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint64_t src) {
2438  // Handle special values first.
2439  if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) {
2440    if (dst != kDoubleRegZero) fmv_d(dst, kDoubleRegZero);
2441  } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) {
2442    Neg_d(dst, kDoubleRegZero);
2443  } else {
2444    if (dst == kDoubleRegZero) {
2445      DCHECK(src == bit_cast<uint64_t>(0.0));
2446      fmv_d_x(dst, zero_reg);
2447      has_double_zero_reg_set_ = true;
2448      has_single_zero_reg_set_ = false;
2449    } else {
2450      UseScratchRegisterScope temps(this);
2451      Register scratch = temps.Acquire();
2452      li(scratch, Operand(src));
2453      fmv_d_x(dst, scratch);
2454    }
2455  }
2456}
2457
2458void TurboAssembler::CompareI(Register rd, Register rs, const Operand& rt,
2459                              Condition cond) {
2460  switch (cond) {
2461    case eq:
2462      Seq(rd, rs, rt);
2463      break;
2464    case ne:
2465      Sne(rd, rs, rt);
2466      break;
2467
2468    // Signed comparison.
2469    case greater:
2470      Sgt(rd, rs, rt);
2471      break;
2472    case greater_equal:
2473      Sge(rd, rs, rt);  // rs >= rt
2474      break;
2475    case less:
2476      Slt(rd, rs, rt);  // rs < rt
2477      break;
2478    case less_equal:
2479      Sle(rd, rs, rt);  // rs <= rt
2480      break;
2481
2482    // Unsigned comparison.
2483    case Ugreater:
2484      Sgtu(rd, rs, rt);  // rs > rt
2485      break;
2486    case Ugreater_equal:
2487      Sgeu(rd, rs, rt);  // rs >= rt
2488      break;
2489    case Uless:
2490      Sltu(rd, rs, rt);  // rs < rt
2491      break;
2492    case Uless_equal:
2493      Sleu(rd, rs, rt);  // rs <= rt
2494      break;
2495    case cc_always:
2496      UNREACHABLE();
2497    default:
2498      UNREACHABLE();
2499  }
2500}
2501
2502// dest <- (condition != 0 ? zero : dest)
2503void TurboAssembler::LoadZeroIfConditionNotZero(Register dest,
2504                                                Register condition) {
2505  UseScratchRegisterScope temps(this);
2506  Register scratch = temps.Acquire();
2507  seqz(scratch, condition);
2508  // neg + and may be more efficient than mul(dest, dest, scratch)
2509  neg(scratch, scratch);  // 0 is still 0, 1 becomes all 1s
2510  and_(dest, dest, scratch);
2511}
2512
2513// dest <- (condition == 0 ? 0 : dest)
2514void TurboAssembler::LoadZeroIfConditionZero(Register dest,
2515                                             Register condition) {
2516  UseScratchRegisterScope temps(this);
2517  Register scratch = temps.Acquire();
2518  snez(scratch, condition);
2519  //  neg + and may be more efficient than mul(dest, dest, scratch);
2520  neg(scratch, scratch);  // 0 is still 0, 1 becomes all 1s
2521  and_(dest, dest, scratch);
2522}
2523
2524void TurboAssembler::Clz32(Register rd, Register xx) {
2525  // 32 bit unsigned in lower word: count number of leading zeros.
2526  //  int n = 32;
2527  //  unsigned y;
2528
2529  //  y = x >>16; if (y != 0) { n = n -16; x = y; }
2530  //  y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2531  //  y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2532  //  y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2533  //  y = x >> 1; if (y != 0) {rd = n - 2; return;}
2534  //  rd = n - x;
2535
2536  Label L0, L1, L2, L3, L4;
2537  UseScratchRegisterScope temps(this);
2538  BlockTrampolinePoolScope block_trampoline_pool(this);
2539  Register x = rd;
2540  Register y = temps.Acquire();
2541  Register n = temps.Acquire();
2542  DCHECK(xx != y && xx != n);
2543  Move(x, xx);
2544  li(n, Operand(32));
2545  srliw(y, x, 16);
2546  BranchShort(&L0, eq, y, Operand(zero_reg));
2547  Move(x, y);
2548  addiw(n, n, -16);
2549  bind(&L0);
2550  srliw(y, x, 8);
2551  BranchShort(&L1, eq, y, Operand(zero_reg));
2552  addiw(n, n, -8);
2553  Move(x, y);
2554  bind(&L1);
2555  srliw(y, x, 4);
2556  BranchShort(&L2, eq, y, Operand(zero_reg));
2557  addiw(n, n, -4);
2558  Move(x, y);
2559  bind(&L2);
2560  srliw(y, x, 2);
2561  BranchShort(&L3, eq, y, Operand(zero_reg));
2562  addiw(n, n, -2);
2563  Move(x, y);
2564  bind(&L3);
2565  srliw(y, x, 1);
2566  subw(rd, n, x);
2567  BranchShort(&L4, eq, y, Operand(zero_reg));
2568  addiw(rd, n, -2);
2569  bind(&L4);
2570}
2571
2572void TurboAssembler::Clz64(Register rd, Register xx) {
2573  // 64 bit: count number of leading zeros.
2574  //  int n = 64;
2575  //  unsigned y;
2576
2577  //  y = x >>32; if (y != 0) { n = n - 32; x = y; }
2578  //  y = x >>16; if (y != 0) { n = n - 16; x = y; }
2579  //  y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2580  //  y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2581  //  y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2582  //  y = x >> 1; if (y != 0) {rd = n - 2; return;}
2583  //  rd = n - x;
2584
2585  Label L0, L1, L2, L3, L4, L5;
2586  UseScratchRegisterScope temps(this);
2587  BlockTrampolinePoolScope block_trampoline_pool(this);
2588  Register x = rd;
2589  Register y = temps.Acquire();
2590  Register n = temps.Acquire();
2591  DCHECK(xx != y && xx != n);
2592  Move(x, xx);
2593  li(n, Operand(64));
2594  srli(y, x, 32);
2595  BranchShort(&L0, eq, y, Operand(zero_reg));
2596  addiw(n, n, -32);
2597  Move(x, y);
2598  bind(&L0);
2599  srli(y, x, 16);
2600  BranchShort(&L1, eq, y, Operand(zero_reg));
2601  addiw(n, n, -16);
2602  Move(x, y);
2603  bind(&L1);
2604  srli(y, x, 8);
2605  BranchShort(&L2, eq, y, Operand(zero_reg));
2606  addiw(n, n, -8);
2607  Move(x, y);
2608  bind(&L2);
2609  srli(y, x, 4);
2610  BranchShort(&L3, eq, y, Operand(zero_reg));
2611  addiw(n, n, -4);
2612  Move(x, y);
2613  bind(&L3);
2614  srli(y, x, 2);
2615  BranchShort(&L4, eq, y, Operand(zero_reg));
2616  addiw(n, n, -2);
2617  Move(x, y);
2618  bind(&L4);
2619  srli(y, x, 1);
2620  subw(rd, n, x);
2621  BranchShort(&L5, eq, y, Operand(zero_reg));
2622  addiw(rd, n, -2);
2623  bind(&L5);
2624}
2625
2626void TurboAssembler::Ctz32(Register rd, Register rs) {
2627  // Convert trailing zeroes to trailing ones, and bits to their left
2628  // to zeroes.
2629
2630  BlockTrampolinePoolScope block_trampoline_pool(this);
2631  {
2632    UseScratchRegisterScope temps(this);
2633    Register scratch = temps.Acquire();
2634    Add64(scratch, rs, -1);
2635    Xor(rd, scratch, rs);
2636    And(rd, rd, scratch);
2637    // Count number of leading zeroes.
2638  }
2639  Clz32(rd, rd);
2640  {
2641    // Subtract number of leading zeroes from 32 to get number of trailing
2642    // ones. Remember that the trailing ones were formerly trailing zeroes.
2643    UseScratchRegisterScope temps(this);
2644    Register scratch = temps.Acquire();
2645    li(scratch, 32);
2646    Sub32(rd, scratch, rd);
2647  }
2648}
2649
2650void TurboAssembler::Ctz64(Register rd, Register rs) {
2651  // Convert trailing zeroes to trailing ones, and bits to their left
2652  // to zeroes.
2653
2654  BlockTrampolinePoolScope block_trampoline_pool(this);
2655  {
2656    UseScratchRegisterScope temps(this);
2657    Register scratch = temps.Acquire();
2658    Add64(scratch, rs, -1);
2659    Xor(rd, scratch, rs);
2660    And(rd, rd, scratch);
2661    // Count number of leading zeroes.
2662  }
2663  Clz64(rd, rd);
2664  {
2665    // Subtract number of leading zeroes from 64 to get number of trailing
2666    // ones. Remember that the trailing ones were formerly trailing zeroes.
2667    UseScratchRegisterScope temps(this);
2668    Register scratch = temps.Acquire();
2669    li(scratch, 64);
2670    Sub64(rd, scratch, rd);
2671  }
2672}
2673
2674void TurboAssembler::Popcnt32(Register rd, Register rs, Register scratch) {
2675  DCHECK_NE(scratch, rs);
2676  DCHECK_NE(scratch, rd);
2677  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
2678  //
2679  // A generalization of the best bit counting method to integers of
2680  // bit-widths up to 128 (parameterized by type T) is this:
2681  //
2682  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
2683  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
2684  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
2685  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count
2686  //
2687  // There are algorithms which are faster in the cases where very few
2688  // bits are set but the algorithm here attempts to minimize the total
2689  // number of instructions executed even when a large number of bits
2690  // are set.
2691  // The number of instruction is 20.
2692  // uint32_t B0 = 0x55555555;     // (T)~(T)0/3
2693  // uint32_t B1 = 0x33333333;     // (T)~(T)0/15*3
2694  // uint32_t B2 = 0x0F0F0F0F;     // (T)~(T)0/255*15
2695  // uint32_t value = 0x01010101;  // (T)~(T)0/255
2696
2697  uint32_t shift = 24;
2698  UseScratchRegisterScope temps(this);
2699  BlockTrampolinePoolScope block_trampoline_pool(this);
2700  Register scratch2 = temps.Acquire();
2701  Register value = temps.Acquire();
2702  DCHECK((rd != value) && (rs != value));
2703  li(value, 0x01010101);     // value = 0x01010101;
2704  li(scratch2, 0x55555555);  // B0 = 0x55555555;
2705  Srl32(scratch, rs, 1);
2706  And(scratch, scratch, scratch2);
2707  Sub32(scratch, rs, scratch);
2708  li(scratch2, 0x33333333);  // B1 = 0x33333333;
2709  slli(rd, scratch2, 4);
2710  or_(scratch2, scratch2, rd);
2711  And(rd, scratch, scratch2);
2712  Srl32(scratch, scratch, 2);
2713  And(scratch, scratch, scratch2);
2714  Add32(scratch, rd, scratch);
2715  srliw(rd, scratch, 4);
2716  Add32(rd, rd, scratch);
2717  li(scratch2, 0xF);
2718  Mul32(scratch2, value, scratch2);  // B2 = 0x0F0F0F0F;
2719  And(rd, rd, scratch2);
2720  Mul32(rd, rd, value);
2721  Srl32(rd, rd, shift);
2722}
2723
2724void TurboAssembler::Popcnt64(Register rd, Register rs, Register scratch) {
2725  DCHECK_NE(scratch, rs);
2726  DCHECK_NE(scratch, rd);
2727  // uint64_t B0 = 0x5555555555555555l;     // (T)~(T)0/3
2728  // uint64_t B1 = 0x3333333333333333l;     // (T)~(T)0/15*3
2729  // uint64_t B2 = 0x0F0F0F0F0F0F0F0Fl;     // (T)~(T)0/255*15
2730  // uint64_t value = 0x0101010101010101l;  // (T)~(T)0/255
2731  // uint64_t shift = 24;                   // (sizeof(T) - 1) * BITS_PER_BYTE
2732
2733  uint64_t shift = 24;
2734  UseScratchRegisterScope temps(this);
2735  BlockTrampolinePoolScope block_trampoline_pool(this);
2736  Register scratch2 = temps.Acquire();
2737  Register value = temps.Acquire();
2738  DCHECK((rd != value) && (rs != value));
2739  li(value, 0x1111111111111111l);  // value = 0x1111111111111111l;
2740  li(scratch2, 5);
2741  Mul64(scratch2, value, scratch2);  // B0 = 0x5555555555555555l;
2742  Srl64(scratch, rs, 1);
2743  And(scratch, scratch, scratch2);
2744  Sub64(scratch, rs, scratch);
2745  li(scratch2, 3);
2746  Mul64(scratch2, value, scratch2);  // B1 = 0x3333333333333333l;
2747  And(rd, scratch, scratch2);
2748  Srl64(scratch, scratch, 2);
2749  And(scratch, scratch, scratch2);
2750  Add64(scratch, rd, scratch);
2751  Srl64(rd, scratch, 4);
2752  Add64(rd, rd, scratch);
2753  li(scratch2, 0xF);
2754  li(value, 0x0101010101010101l);    // value = 0x0101010101010101l;
2755  Mul64(scratch2, value, scratch2);  // B2 = 0x0F0F0F0F0F0F0F0Fl;
2756  And(rd, rd, scratch2);
2757  Mul64(rd, rd, value);
2758  srli(rd, rd, 32 + shift);
2759}
2760
2761void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
2762                                                DoubleRegister double_input,
2763                                                Label* done) {
2764  UseScratchRegisterScope temps(this);
2765  Register scratch = temps.Acquire();
2766  // if scratch == 1, exception happens during truncation
2767  Trunc_w_d(result, double_input, scratch);
2768  // If we had no exceptions (i.e., scratch==1) we are done.
2769  Branch(done, eq, scratch, Operand(1));
2770}
2771
2772void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
2773                                       Register result,
2774                                       DoubleRegister double_input,
2775                                       StubCallMode stub_mode) {
2776  Label done;
2777
2778  TryInlineTruncateDoubleToI(result, double_input, &done);
2779
2780  // If we fell through then inline version didn't succeed - call stub
2781  // instead.
2782  push(ra);
2783  Sub64(sp, sp, Operand(kDoubleSize));  // Put input on stack.
2784  fsd(double_input, sp, 0);
2785
2786  if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
2787    Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
2788  } else {
2789    Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
2790  }
2791  ld(result, sp, 0);
2792
2793  Add64(sp, sp, Operand(kDoubleSize));
2794  pop(ra);
2795
2796  bind(&done);
2797}
2798
2799// BRANCH_ARGS_CHECK checks that conditional jump arguments are correct.
2800#define BRANCH_ARGS_CHECK(cond, rs, rt)                                  \
2801  DCHECK((cond == cc_always && rs == zero_reg && rt.rm() == zero_reg) || \
2802         (cond != cc_always && (rs != zero_reg || rt.rm() != zero_reg)))
2803
2804void TurboAssembler::Branch(int32_t offset) {
2805  DCHECK(is_int21(offset));
2806  BranchShort(offset);
2807}
2808
2809void TurboAssembler::Branch(int32_t offset, Condition cond, Register rs,
2810                            const Operand& rt, Label::Distance near_jump) {
2811  bool is_near = BranchShortCheck(offset, nullptr, cond, rs, rt);
2812  DCHECK(is_near);
2813  USE(is_near);
2814}
2815
2816void TurboAssembler::Branch(Label* L) {
2817  if (L->is_bound()) {
2818    if (is_near(L)) {
2819      BranchShort(L);
2820    } else {
2821      BranchLong(L);
2822    }
2823  } else {
2824    if (is_trampoline_emitted()) {
2825      BranchLong(L);
2826    } else {
2827      BranchShort(L);
2828    }
2829  }
2830}
2831
2832void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2833                            const Operand& rt, Label::Distance near_jump) {
2834  if (L->is_bound()) {
2835    if (!BranchShortCheck(0, L, cond, rs, rt)) {
2836      if (cond != cc_always) {
2837        Label skip;
2838        Condition neg_cond = NegateCondition(cond);
2839        BranchShort(&skip, neg_cond, rs, rt);
2840        BranchLong(L);
2841        bind(&skip);
2842      } else {
2843        BranchLong(L);
2844        EmitConstPoolWithJumpIfNeeded();
2845      }
2846    }
2847  } else {
2848    if (is_trampoline_emitted() && near_jump == Label::Distance::kFar) {
2849      if (cond != cc_always) {
2850        Label skip;
2851        Condition neg_cond = NegateCondition(cond);
2852        BranchShort(&skip, neg_cond, rs, rt);
2853        BranchLong(L);
2854        bind(&skip);
2855      } else {
2856        BranchLong(L);
2857        EmitConstPoolWithJumpIfNeeded();
2858      }
2859    } else {
2860      BranchShort(L, cond, rs, rt);
2861    }
2862  }
2863}
2864
2865void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2866                            RootIndex index) {
2867  UseScratchRegisterScope temps(this);
2868  Register scratch = temps.Acquire();
2869  LoadRoot(scratch, index);
2870  Branch(L, cond, rs, Operand(scratch));
2871}
2872
2873void TurboAssembler::BranchShortHelper(int32_t offset, Label* L) {
2874  DCHECK(L == nullptr || offset == 0);
2875  offset = GetOffset(offset, L, OffsetSize::kOffset21);
2876  j(offset);
2877}
2878
2879void TurboAssembler::BranchShort(int32_t offset) {
2880  DCHECK(is_int21(offset));
2881  BranchShortHelper(offset, nullptr);
2882}
2883
2884void TurboAssembler::BranchShort(Label* L) { BranchShortHelper(0, L); }
2885
2886int32_t TurboAssembler::GetOffset(int32_t offset, Label* L, OffsetSize bits) {
2887  if (L) {
2888    offset = branch_offset_helper(L, bits);
2889  } else {
2890    DCHECK(is_intn(offset, bits));
2891  }
2892  return offset;
2893}
2894
2895Register TurboAssembler::GetRtAsRegisterHelper(const Operand& rt,
2896                                               Register scratch) {
2897  Register r2 = no_reg;
2898  if (rt.is_reg()) {
2899    r2 = rt.rm();
2900  } else {
2901    r2 = scratch;
2902    li(r2, rt);
2903  }
2904
2905  return r2;
2906}
2907
2908bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset,
2909                                     OffsetSize bits) {
2910  if (!is_near(L, bits)) return false;
2911  *offset = GetOffset(*offset, L, bits);
2912  return true;
2913}
2914
2915bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset, OffsetSize bits,
2916                                     Register* scratch, const Operand& rt) {
2917  if (!is_near(L, bits)) return false;
2918  *scratch = GetRtAsRegisterHelper(rt, *scratch);
2919  *offset = GetOffset(*offset, L, bits);
2920  return true;
2921}
2922
2923bool TurboAssembler::BranchShortHelper(int32_t offset, Label* L, Condition cond,
2924                                       Register rs, const Operand& rt) {
2925  DCHECK(L == nullptr || offset == 0);
2926  UseScratchRegisterScope temps(this);
2927  BlockTrampolinePoolScope block_trampoline_pool(this);
2928  Register scratch = no_reg;
2929  if (!rt.is_reg()) {
2930    scratch = temps.Acquire();
2931    li(scratch, rt);
2932  } else {
2933    scratch = rt.rm();
2934  }
2935  {
2936    BlockTrampolinePoolScope block_trampoline_pool(this);
2937    switch (cond) {
2938      case cc_always:
2939        if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2940        j(offset);
2941        EmitConstPoolWithJumpIfNeeded();
2942        break;
2943      case eq:
2944        // rs == rt
2945        if (rt.is_reg() && rs == rt.rm()) {
2946          if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2947          j(offset);
2948        } else {
2949          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2950          beq(rs, scratch, offset);
2951        }
2952        break;
2953      case ne:
2954        // rs != rt
2955        if (rt.is_reg() && rs == rt.rm()) {
2956          break;  // No code needs to be emitted
2957        } else {
2958          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2959          bne(rs, scratch, offset);
2960        }
2961        break;
2962
2963      // Signed comparison.
2964      case greater:
2965        // rs > rt
2966        if (rt.is_reg() && rs == rt.rm()) {
2967          break;  // No code needs to be emitted.
2968        } else {
2969          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2970          bgt(rs, scratch, offset);
2971        }
2972        break;
2973      case greater_equal:
2974        // rs >= rt
2975        if (rt.is_reg() && rs == rt.rm()) {
2976          if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2977          j(offset);
2978        } else {
2979          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2980          bge(rs, scratch, offset);
2981        }
2982        break;
2983      case less:
2984        // rs < rt
2985        if (rt.is_reg() && rs == rt.rm()) {
2986          break;  // No code needs to be emitted.
2987        } else {
2988          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2989          blt(rs, scratch, offset);
2990        }
2991        break;
2992      case less_equal:
2993        // rs <= rt
2994        if (rt.is_reg() && rs == rt.rm()) {
2995          if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2996          j(offset);
2997        } else {
2998          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2999          ble(rs, scratch, offset);
3000        }
3001        break;
3002
3003      // Unsigned comparison.
3004      case Ugreater:
3005        // rs > rt
3006        if (rt.is_reg() && rs == rt.rm()) {
3007          break;  // No code needs to be emitted.
3008        } else {
3009          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3010          bgtu(rs, scratch, offset);
3011        }
3012        break;
3013      case Ugreater_equal:
3014        // rs >= rt
3015        if (rt.is_reg() && rs == rt.rm()) {
3016          if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3017          j(offset);
3018        } else {
3019          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3020          bgeu(rs, scratch, offset);
3021        }
3022        break;
3023      case Uless:
3024        // rs < rt
3025        if (rt.is_reg() && rs == rt.rm()) {
3026          break;  // No code needs to be emitted.
3027        } else {
3028          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3029          bltu(rs, scratch, offset);
3030        }
3031        break;
3032      case Uless_equal:
3033        // rs <= rt
3034        if (rt.is_reg() && rs == rt.rm()) {
3035          if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3036          j(offset);
3037        } else {
3038          if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3039          bleu(rs, scratch, offset);
3040        }
3041        break;
3042      default:
3043        UNREACHABLE();
3044    }
3045  }
3046
3047  CheckTrampolinePoolQuick(1);
3048  return true;
3049}
3050
3051bool TurboAssembler::BranchShortCheck(int32_t offset, Label* L, Condition cond,
3052                                      Register rs, const Operand& rt) {
3053  BRANCH_ARGS_CHECK(cond, rs, rt);
3054
3055  if (!L) {
3056    DCHECK(is_int13(offset));
3057    return BranchShortHelper(offset, nullptr, cond, rs, rt);
3058  } else {
3059    DCHECK_EQ(offset, 0);
3060    return BranchShortHelper(0, L, cond, rs, rt);
3061  }
3062}
3063
3064void TurboAssembler::BranchShort(int32_t offset, Condition cond, Register rs,
3065                                 const Operand& rt) {
3066  BranchShortCheck(offset, nullptr, cond, rs, rt);
3067}
3068
3069void TurboAssembler::BranchShort(Label* L, Condition cond, Register rs,
3070                                 const Operand& rt) {
3071  BranchShortCheck(0, L, cond, rs, rt);
3072}
3073
3074void TurboAssembler::BranchAndLink(int32_t offset) {
3075  BranchAndLinkShort(offset);
3076}
3077
3078void TurboAssembler::BranchAndLink(int32_t offset, Condition cond, Register rs,
3079                                   const Operand& rt) {
3080  bool is_near = BranchAndLinkShortCheck(offset, nullptr, cond, rs, rt);
3081  DCHECK(is_near);
3082  USE(is_near);
3083}
3084
3085void TurboAssembler::BranchAndLink(Label* L) {
3086  if (L->is_bound()) {
3087    if (is_near(L)) {
3088      BranchAndLinkShort(L);
3089    } else {
3090      BranchAndLinkLong(L);
3091    }
3092  } else {
3093    if (is_trampoline_emitted()) {
3094      BranchAndLinkLong(L);
3095    } else {
3096      BranchAndLinkShort(L);
3097    }
3098  }
3099}
3100
3101void TurboAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
3102                                   const Operand& rt) {
3103  if (L->is_bound()) {
3104    if (!BranchAndLinkShortCheck(0, L, cond, rs, rt)) {
3105      Label skip;
3106      Condition neg_cond = NegateCondition(cond);
3107      BranchShort(&skip, neg_cond, rs, rt);
3108      BranchAndLinkLong(L);
3109      bind(&skip);
3110    }
3111  } else {
3112    if (is_trampoline_emitted()) {
3113      Label skip;
3114      Condition neg_cond = NegateCondition(cond);
3115      BranchShort(&skip, neg_cond, rs, rt);
3116      BranchAndLinkLong(L);
3117      bind(&skip);
3118    } else {
3119      BranchAndLinkShortCheck(0, L, cond, rs, rt);
3120    }
3121  }
3122}
3123
3124void TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L) {
3125  DCHECK(L == nullptr || offset == 0);
3126  offset = GetOffset(offset, L, OffsetSize::kOffset21);
3127  jal(offset);
3128}
3129
3130void TurboAssembler::BranchAndLinkShort(int32_t offset) {
3131  DCHECK(is_int21(offset));
3132  BranchAndLinkShortHelper(offset, nullptr);
3133}
3134
3135void TurboAssembler::BranchAndLinkShort(Label* L) {
3136  BranchAndLinkShortHelper(0, L);
3137}
3138
3139// Pre r6 we need to use a bgezal or bltzal, but they can't be used directly
3140// with the slt instructions. We could use sub or add instead but we would miss
3141// overflow cases, so we keep slt and add an intermediate third instruction.
3142bool TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L,
3143                                              Condition cond, Register rs,
3144                                              const Operand& rt) {
3145  DCHECK(L == nullptr || offset == 0);
3146  if (!is_near(L, OffsetSize::kOffset21)) return false;
3147
3148  UseScratchRegisterScope temps(this);
3149  Register scratch = temps.Acquire();
3150  BlockTrampolinePoolScope block_trampoline_pool(this);
3151
3152  if (cond == cc_always) {
3153    offset = GetOffset(offset, L, OffsetSize::kOffset21);
3154    jal(offset);
3155  } else {
3156    Branch(kInstrSize * 2, NegateCondition(cond), rs,
3157           Operand(GetRtAsRegisterHelper(rt, scratch)));
3158    offset = GetOffset(offset, L, OffsetSize::kOffset21);
3159    jal(offset);
3160  }
3161
3162  return true;
3163}
3164
3165bool TurboAssembler::BranchAndLinkShortCheck(int32_t offset, Label* L,
3166                                             Condition cond, Register rs,
3167                                             const Operand& rt) {
3168  BRANCH_ARGS_CHECK(cond, rs, rt);
3169
3170  if (!L) {
3171    DCHECK(is_int21(offset));
3172    return BranchAndLinkShortHelper(offset, nullptr, cond, rs, rt);
3173  } else {
3174    DCHECK_EQ(offset, 0);
3175    return BranchAndLinkShortHelper(0, L, cond, rs, rt);
3176  }
3177}
3178
3179void TurboAssembler::LoadFromConstantsTable(Register destination,
3180                                            int constant_index) {
3181  DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
3182  LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
3183  LoadTaggedPointerField(
3184      destination, FieldMemOperand(destination, FixedArray::OffsetOfElementAt(
3185                                                    constant_index)));
3186}
3187
3188void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
3189  Ld(destination, MemOperand(kRootRegister, offset));
3190}
3191
3192void TurboAssembler::LoadRootRegisterOffset(Register destination,
3193                                            intptr_t offset) {
3194  if (offset == 0) {
3195    Move(destination, kRootRegister);
3196  } else {
3197    Add64(destination, kRootRegister, Operand(offset));
3198  }
3199}
3200
3201void TurboAssembler::Jump(Register target, Condition cond, Register rs,
3202                          const Operand& rt) {
3203  BlockTrampolinePoolScope block_trampoline_pool(this);
3204  if (cond == cc_always) {
3205    jr(target);
3206    ForceConstantPoolEmissionWithoutJump();
3207  } else {
3208    BRANCH_ARGS_CHECK(cond, rs, rt);
3209    Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3210    jr(target);
3211  }
3212}
3213
3214void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
3215                          Condition cond, Register rs, const Operand& rt) {
3216  Label skip;
3217  if (cond != cc_always) {
3218    Branch(&skip, NegateCondition(cond), rs, rt);
3219  }
3220  {
3221    BlockTrampolinePoolScope block_trampoline_pool(this);
3222    li(t6, Operand(target, rmode));
3223    Jump(t6, al, zero_reg, Operand(zero_reg));
3224    EmitConstPoolWithJumpIfNeeded();
3225    bind(&skip);
3226  }
3227}
3228
3229void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond,
3230                          Register rs, const Operand& rt) {
3231  DCHECK(!RelocInfo::IsCodeTarget(rmode));
3232  Jump(static_cast<intptr_t>(target), rmode, cond, rs, rt);
3233}
3234
3235void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
3236                          Condition cond, Register rs, const Operand& rt) {
3237  DCHECK(RelocInfo::IsCodeTarget(rmode));
3238
3239  BlockTrampolinePoolScope block_trampoline_pool(this);
3240  Builtin builtin = Builtin::kNoBuiltinId;
3241  bool target_is_isolate_independent_builtin =
3242      isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3243      Builtins::IsIsolateIndependent(builtin);
3244  if (target_is_isolate_independent_builtin &&
3245      options().use_pc_relative_calls_and_jumps) {
3246    int32_t code_target_index = AddCodeTarget(code);
3247    Label skip;
3248    BlockTrampolinePoolScope block_trampoline_pool(this);
3249    if (cond != al) {
3250      Branch(&skip, NegateCondition(cond), rs, rt);
3251    }
3252    RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3253    GenPCRelativeJump(t6, code_target_index);
3254    bind(&skip);
3255    return;
3256  } else if (root_array_available_ && options().isolate_independent_code &&
3257             target_is_isolate_independent_builtin) {
3258    int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3259                 IsolateData::builtin_entry_table_offset();
3260    Ld(t6, MemOperand(kRootRegister, offset));
3261    Jump(t6, cond, rs, rt);
3262    return;
3263  } else if (options().inline_offheap_trampolines &&
3264             target_is_isolate_independent_builtin) {
3265    // Inline the trampoline.
3266    RecordCommentForOffHeapTrampoline(builtin);
3267    li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3268    Jump(t6, cond, rs, rt);
3269    RecordComment("]");
3270    return;
3271  }
3272
3273  int32_t target_index = AddCodeTarget(code);
3274  Jump(static_cast<intptr_t>(target_index), rmode, cond, rs, rt);
3275}
3276
3277void TurboAssembler::Jump(const ExternalReference& reference) {
3278  li(t6, reference);
3279  Jump(t6);
3280}
3281
3282// Note: To call gcc-compiled C code on riscv64, you must call through t6.
3283void TurboAssembler::Call(Register target, Condition cond, Register rs,
3284                          const Operand& rt) {
3285  BlockTrampolinePoolScope block_trampoline_pool(this);
3286  if (cond == cc_always) {
3287    jalr(ra, target, 0);
3288  } else {
3289    BRANCH_ARGS_CHECK(cond, rs, rt);
3290    Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3291    jalr(ra, target, 0);
3292  }
3293}
3294
3295void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
3296                                     unsigned higher_limit,
3297                                     Label* on_in_range) {
3298  if (lower_limit != 0) {
3299    UseScratchRegisterScope temps(this);
3300    Register scratch = temps.Acquire();
3301    Sub64(scratch, value, Operand(lower_limit));
3302    Branch(on_in_range, Uless_equal, scratch,
3303           Operand(higher_limit - lower_limit));
3304  } else {
3305    Branch(on_in_range, Uless_equal, value,
3306           Operand(higher_limit - lower_limit));
3307  }
3308}
3309
3310void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond,
3311                          Register rs, const Operand& rt) {
3312  li(t6, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD);
3313  Call(t6, cond, rs, rt);
3314}
3315
3316void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
3317                          Condition cond, Register rs, const Operand& rt) {
3318  Builtin builtin = Builtin::kNoBuiltinId;
3319  bool target_is_isolate_independent_builtin =
3320      isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3321      Builtins::IsIsolateIndependent(builtin);
3322  if (target_is_isolate_independent_builtin &&
3323      options().use_pc_relative_calls_and_jumps) {
3324    int32_t code_target_index = AddCodeTarget(code);
3325    Label skip;
3326    BlockTrampolinePoolScope block_trampoline_pool(this);
3327    RecordCommentForOffHeapTrampoline(builtin);
3328    if (cond != al) {
3329      Branch(&skip, NegateCondition(cond), rs, rt);
3330    }
3331    RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3332    GenPCRelativeJumpAndLink(t6, code_target_index);
3333    bind(&skip);
3334    RecordComment("]");
3335    return;
3336  } else if (root_array_available_ && options().isolate_independent_code &&
3337             target_is_isolate_independent_builtin) {
3338    int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3339                 IsolateData::builtin_entry_table_offset();
3340    LoadRootRelative(t6, offset);
3341    Call(t6, cond, rs, rt);
3342    return;
3343  } else if (options().inline_offheap_trampolines &&
3344             target_is_isolate_independent_builtin) {
3345    // Inline the trampoline.
3346    RecordCommentForOffHeapTrampoline(builtin);
3347    li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3348    Call(t6, cond, rs, rt);
3349    RecordComment("]");
3350    return;
3351  }
3352
3353  DCHECK(RelocInfo::IsCodeTarget(rmode));
3354  DCHECK(code->IsExecutable());
3355  int32_t target_index = AddCodeTarget(code);
3356  Call(static_cast<Address>(target_index), rmode, cond, rs, rt);
3357}
3358
3359void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin) {
3360  STATIC_ASSERT(kSystemPointerSize == 8);
3361  STATIC_ASSERT(kSmiTagSize == 1);
3362  STATIC_ASSERT(kSmiTag == 0);
3363
3364  // The builtin register contains the builtin index as a Smi.
3365  SmiUntag(builtin, builtin);
3366  CalcScaledAddress(builtin, kRootRegister, builtin, kSystemPointerSizeLog2);
3367  Ld(builtin, MemOperand(builtin, IsolateData::builtin_entry_table_offset()));
3368}
3369
3370void TurboAssembler::CallBuiltinByIndex(Register builtin) {
3371  LoadEntryFromBuiltinIndex(builtin);
3372  Call(builtin);
3373}
3374
3375void TurboAssembler::CallBuiltin(Builtin builtin) {
3376  RecordCommentForOffHeapTrampoline(builtin);
3377  if (options().short_builtin_calls) {
3378    Call(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3379  } else {
3380    Call(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3381  }
3382  RecordComment("]");
3383}
3384
3385void TurboAssembler::TailCallBuiltin(Builtin builtin) {
3386  RecordCommentForOffHeapTrampoline(builtin);
3387  if (options().short_builtin_calls) {
3388    Jump(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3389  } else {
3390    Jump(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3391  }
3392  RecordComment("]");
3393}
3394
3395void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
3396                                          Register destination) {
3397  Ld(destination, EntryFromBuiltinAsOperand(builtin));
3398}
3399
3400MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
3401  DCHECK(root_array_available());
3402  return MemOperand(kRootRegister,
3403                    IsolateData::BuiltinEntrySlotOffset(builtin));
3404}
3405
3406void TurboAssembler::PatchAndJump(Address target) {
3407  UseScratchRegisterScope temps(this);
3408  Register scratch = temps.Acquire();
3409  auipc(scratch, 0);  // Load PC into scratch
3410  Ld(t6, MemOperand(scratch, kInstrSize * 4));
3411  jr(t6);
3412  nop();  // For alignment
3413  DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0);
3414  *reinterpret_cast<uint64_t*>(pc_) = target;  // pc_ should be align.
3415  pc_ += sizeof(uint64_t);
3416}
3417
3418void TurboAssembler::StoreReturnAddressAndCall(Register target) {
3419  // This generates the final instruction sequence for calls to C functions
3420  // once an exit frame has been constructed.
3421  //
3422  // Note that this assumes the caller code (i.e. the Code object currently
3423  // being generated) is immovable or that the callee function cannot trigger
3424  // GC, since the callee function will return to it.
3425  //
3426  // Compute the return address in lr to return to after the jump below. The
3427  // pc is already at '+ 8' from the current instruction; but return is after
3428  // three instructions, so add another 4 to pc to get the return address.
3429  //
3430  Assembler::BlockTrampolinePoolScope block_trampoline_pool(this);
3431  int kNumInstructionsToJump = 5;
3432  if (FLAG_riscv_c_extension) kNumInstructionsToJump = 4;
3433  Label find_ra;
3434  // Adjust the value in ra to point to the correct return location, one
3435  // instruction past the real call into C code (the jalr(t6)), and push it.
3436  // This is the return address of the exit frame.
3437  auipc(ra, 0);  // Set ra the current PC
3438  bind(&find_ra);
3439  addi(ra, ra,
3440       (kNumInstructionsToJump + 1) *
3441           kInstrSize);  // Set ra to insn after the call
3442
3443  // This spot was reserved in EnterExitFrame.
3444  Sd(ra, MemOperand(sp));
3445  addi(sp, sp, -kCArgsSlotsSize);
3446  // Stack is still aligned.
3447
3448  // Call the C routine.
3449  Mv(t6,
3450     target);  // Function pointer to t6 to conform to ABI for PIC.
3451  jalr(t6);
3452  // Make sure the stored 'ra' points to this position.
3453  DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra));
3454}
3455
3456void TurboAssembler::Ret(Condition cond, Register rs, const Operand& rt) {
3457  Jump(ra, cond, rs, rt);
3458  if (cond == al) {
3459    ForceConstantPoolEmissionWithoutJump();
3460  }
3461}
3462
3463
3464void TurboAssembler::BranchLong(Label* L) {
3465  // Generate position independent long branch.
3466  BlockTrampolinePoolScope block_trampoline_pool(this);
3467  int64_t imm64;
3468  imm64 = branch_long_offset(L);
3469  GenPCRelativeJump(t6, imm64);
3470  EmitConstPoolWithJumpIfNeeded();
3471}
3472
3473void TurboAssembler::BranchAndLinkLong(Label* L) {
3474  // Generate position independent long branch and link.
3475  BlockTrampolinePoolScope block_trampoline_pool(this);
3476  int64_t imm64;
3477  imm64 = branch_long_offset(L);
3478  GenPCRelativeJumpAndLink(t6, imm64);
3479}
3480
3481void TurboAssembler::DropAndRet(int drop) {
3482  Add64(sp, sp, drop * kSystemPointerSize);
3483  Ret();
3484}
3485
3486void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1,
3487                                const Operand& r2) {
3488  // Both Drop and Ret need to be conditional.
3489  Label skip;
3490  if (cond != cc_always) {
3491    Branch(&skip, NegateCondition(cond), r1, r2);
3492  }
3493
3494  Drop(drop);
3495  Ret();
3496
3497  if (cond != cc_always) {
3498    bind(&skip);
3499  }
3500}
3501
3502void TurboAssembler::Drop(int count, Condition cond, Register reg,
3503                          const Operand& op) {
3504  if (count <= 0) {
3505    return;
3506  }
3507
3508  Label skip;
3509
3510  if (cond != al) {
3511    Branch(&skip, NegateCondition(cond), reg, op);
3512  }
3513
3514  Add64(sp, sp, Operand(count * kSystemPointerSize));
3515
3516  if (cond != al) {
3517    bind(&skip);
3518  }
3519}
3520
3521void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) {
3522  if (scratch == no_reg) {
3523    Xor(reg1, reg1, Operand(reg2));
3524    Xor(reg2, reg2, Operand(reg1));
3525    Xor(reg1, reg1, Operand(reg2));
3526  } else {
3527    Mv(scratch, reg1);
3528    Mv(reg1, reg2);
3529    Mv(reg2, scratch);
3530  }
3531}
3532
3533void TurboAssembler::Call(Label* target) { BranchAndLink(target); }
3534
3535void TurboAssembler::LoadAddress(Register dst, Label* target,
3536                                 RelocInfo::Mode rmode) {
3537  int32_t offset;
3538  if (CalculateOffset(target, &offset, OffsetSize::kOffset32)) {
3539    CHECK(is_int32(offset + 0x800));
3540    int32_t Hi20 = (((int32_t)offset + 0x800) >> 12);
3541    int32_t Lo12 = (int32_t)offset << 20 >> 20;
3542    BlockTrampolinePoolScope block_trampoline_pool(this);
3543    auipc(dst, Hi20);
3544    addi(dst, dst, Lo12);
3545  } else {
3546    uint64_t address = jump_address(target);
3547    li(dst, Operand(address, rmode), ADDRESS_LOAD);
3548  }
3549}
3550
3551void TurboAssembler::Push(Smi smi) {
3552  UseScratchRegisterScope temps(this);
3553  Register scratch = temps.Acquire();
3554  li(scratch, Operand(smi));
3555  push(scratch);
3556}
3557
3558void TurboAssembler::PushArray(Register array, Register size,
3559                               PushArrayOrder order) {
3560  UseScratchRegisterScope temps(this);
3561  Register scratch = temps.Acquire();
3562  Register scratch2 = temps.Acquire();
3563  Label loop, entry;
3564  if (order == PushArrayOrder::kReverse) {
3565    Mv(scratch, zero_reg);
3566    jmp(&entry);
3567    bind(&loop);
3568    CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3569    Ld(scratch2, MemOperand(scratch2));
3570    push(scratch2);
3571    Add64(scratch, scratch, Operand(1));
3572    bind(&entry);
3573    Branch(&loop, less, scratch, Operand(size));
3574  } else {
3575    Mv(scratch, size);
3576    jmp(&entry);
3577    bind(&loop);
3578    CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3579    Ld(scratch2, MemOperand(scratch2));
3580    push(scratch2);
3581    bind(&entry);
3582    Add64(scratch, scratch, Operand(-1));
3583    Branch(&loop, greater_equal, scratch, Operand(zero_reg));
3584  }
3585}
3586
3587void TurboAssembler::Push(Handle<HeapObject> handle) {
3588  UseScratchRegisterScope temps(this);
3589  Register scratch = temps.Acquire();
3590  li(scratch, Operand(handle));
3591  push(scratch);
3592}
3593
3594// ---------------------------------------------------------------------------
3595// Exception handling.
3596
3597void MacroAssembler::PushStackHandler() {
3598  // Adjust this code if not the case.
3599  STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
3600  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize);
3601
3602  Push(Smi::zero());  // Padding.
3603
3604  // Link the current handler as the next handler.
3605  UseScratchRegisterScope temps(this);
3606  Register handler_address = temps.Acquire();
3607  li(handler_address,
3608     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3609  Register handler = temps.Acquire();
3610  Ld(handler, MemOperand(handler_address));
3611  push(handler);
3612
3613  // Set this new handler as the current one.
3614  Sd(sp, MemOperand(handler_address));
3615}
3616
3617void MacroAssembler::PopStackHandler() {
3618  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
3619  pop(a1);
3620  Add64(sp, sp,
3621        Operand(static_cast<int64_t>(StackHandlerConstants::kSize -
3622                                     kSystemPointerSize)));
3623  UseScratchRegisterScope temps(this);
3624  Register scratch = temps.Acquire();
3625  li(scratch,
3626     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3627  Sd(a1, MemOperand(scratch));
3628}
3629
3630void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst,
3631                                        const DoubleRegister src) {
3632  // Subtracting 0.0 preserves all inputs except for signalling NaNs, which
3633  // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0
3634  // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0.
3635  fsub_d(dst, src, kDoubleRegZero);
3636}
3637
3638void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
3639  Move(dst, fa0);  // Reg fa0 is FP return value.
3640}
3641
3642void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
3643  Move(dst, fa0);  // Reg fa0 is FP first argument value.
3644}
3645
3646void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(fa0, src); }
3647
3648void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(fa0, src); }
3649
3650void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
3651                                          DoubleRegister src2) {
3652  const DoubleRegister fparg2 = fa1;
3653  if (src2 == fa0) {
3654    DCHECK(src1 != fparg2);
3655    Move(fparg2, src2);
3656    Move(fa0, src1);
3657  } else {
3658    Move(fa0, src1);
3659    Move(fparg2, src2);
3660  }
3661}
3662
3663// -----------------------------------------------------------------------------
3664// JavaScript invokes.
3665
3666void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
3667  DCHECK(root_array_available());
3668  Isolate* isolate = this->isolate();
3669  ExternalReference limit =
3670      kind == StackLimitKind::kRealStackLimit
3671          ? ExternalReference::address_of_real_jslimit(isolate)
3672          : ExternalReference::address_of_jslimit(isolate);
3673  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
3674
3675  intptr_t offset =
3676      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
3677  CHECK(is_int32(offset));
3678  Ld(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset)));
3679}
3680
3681void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
3682                                        Register scratch2,
3683                                        Label* stack_overflow, Label* done) {
3684  // Check the stack for overflow. We are not trying to catch
3685  // interruptions (e.g. debug break and preemption) here, so the "real stack
3686  // limit" is checked.
3687  DCHECK(stack_overflow != nullptr || done != nullptr);
3688  LoadStackLimit(scratch1, StackLimitKind::kRealStackLimit);
3689  // Make scratch1 the space we have left. The stack might already be overflowed
3690  // here which will cause scratch1 to become negative.
3691  Sub64(scratch1, sp, scratch1);
3692  // Check if the arguments will overflow the stack.
3693  Sll64(scratch2, num_args, kSystemPointerSizeLog2);
3694  // Signed comparison.
3695  if (stack_overflow != nullptr) {
3696    Branch(stack_overflow, le, scratch1, Operand(scratch2));
3697  } else if (done != nullptr) {
3698    Branch(done, gt, scratch1, Operand(scratch2));
3699  } else {
3700    UNREACHABLE();
3701  }
3702}
3703
3704void MacroAssembler::InvokePrologue(Register expected_parameter_count,
3705                                    Register actual_parameter_count,
3706                                    Label* done, InvokeType type) {
3707  Label regular_invoke;
3708
3709  //  a0: actual arguments count
3710  //  a1: function (passed through to callee)
3711  //  a2: expected arguments count
3712
3713  DCHECK_EQ(actual_parameter_count, a0);
3714  DCHECK_EQ(expected_parameter_count, a2);
3715
3716  // If the expected parameter count is equal to the adaptor sentinel, no need
3717  // to push undefined value as arguments.
3718  if (kDontAdaptArgumentsSentinel != 0) {
3719    Branch(&regular_invoke, eq, expected_parameter_count,
3720           Operand(kDontAdaptArgumentsSentinel));
3721  }
3722  // If overapplication or if the actual argument count is equal to the
3723  // formal parameter count, no need to push extra undefined values.
3724  Sub64(expected_parameter_count, expected_parameter_count,
3725        actual_parameter_count);
3726  Branch(&regular_invoke, le, expected_parameter_count, Operand(zero_reg));
3727
3728  Label stack_overflow;
3729  {
3730    UseScratchRegisterScope temps(this);
3731    StackOverflowCheck(expected_parameter_count, temps.Acquire(),
3732                       temps.Acquire(), &stack_overflow);
3733  }
3734  // Underapplication. Move the arguments already in the stack, including the
3735  // receiver and the return address.
3736  {
3737    Label copy;
3738    Register src = a6, dest = a7;
3739    Move(src, sp);
3740    Sll64(t0, expected_parameter_count, kSystemPointerSizeLog2);
3741    Sub64(sp, sp, Operand(t0));
3742    // Update stack pointer.
3743    Move(dest, sp);
3744    Move(t0, actual_parameter_count);
3745    bind(&copy);
3746    Ld(t1, MemOperand(src, 0));
3747    Sd(t1, MemOperand(dest, 0));
3748    Sub64(t0, t0, Operand(1));
3749    Add64(src, src, Operand(kSystemPointerSize));
3750    Add64(dest, dest, Operand(kSystemPointerSize));
3751    Branch(&copy, gt, t0, Operand(zero_reg));
3752  }
3753
3754  // Fill remaining expected arguments with undefined values.
3755  LoadRoot(t0, RootIndex::kUndefinedValue);
3756  {
3757    Label loop;
3758    bind(&loop);
3759    Sd(t0, MemOperand(a7, 0));
3760    Sub64(expected_parameter_count, expected_parameter_count, Operand(1));
3761    Add64(a7, a7, Operand(kSystemPointerSize));
3762    Branch(&loop, gt, expected_parameter_count, Operand(zero_reg));
3763  }
3764  Branch(&regular_invoke);
3765
3766  bind(&stack_overflow);
3767  {
3768    FrameScope frame(
3769        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3770    CallRuntime(Runtime::kThrowStackOverflow);
3771    break_(0xCC);
3772  }
3773  bind(&regular_invoke);
3774}
3775
3776void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
3777                                    Register expected_parameter_count,
3778                                    Register actual_parameter_count) {
3779  Label skip_hook;
3780  {
3781    UseScratchRegisterScope temps(this);
3782    Register scratch = temps.Acquire();
3783    li(scratch,
3784       ExternalReference::debug_hook_on_function_call_address(isolate()));
3785    Lb(scratch, MemOperand(scratch));
3786    Branch(&skip_hook, eq, scratch, Operand(zero_reg));
3787  }
3788  {
3789    // Load receiver to pass it later to DebugOnFunctionCall hook.
3790    UseScratchRegisterScope temps(this);
3791    Register receiver = temps.Acquire();
3792    LoadReceiver(receiver, actual_parameter_count);
3793
3794    FrameScope frame(
3795        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3796    SmiTag(expected_parameter_count);
3797    Push(expected_parameter_count);
3798
3799    SmiTag(actual_parameter_count);
3800    Push(actual_parameter_count);
3801
3802    if (new_target.is_valid()) {
3803      Push(new_target);
3804    }
3805    Push(fun);
3806    Push(fun);
3807    Push(receiver);
3808    CallRuntime(Runtime::kDebugOnFunctionCall);
3809    Pop(fun);
3810    if (new_target.is_valid()) {
3811      Pop(new_target);
3812    }
3813
3814    Pop(actual_parameter_count);
3815    SmiUntag(actual_parameter_count);
3816
3817    Pop(expected_parameter_count);
3818    SmiUntag(expected_parameter_count);
3819  }
3820  bind(&skip_hook);
3821}
3822
3823void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
3824                                        Register expected_parameter_count,
3825                                        Register actual_parameter_count,
3826                                        InvokeType type) {
3827  // You can't call a function without a valid frame.
3828  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3829  DCHECK_EQ(function, a1);
3830  DCHECK_IMPLIES(new_target.is_valid(), new_target == a3);
3831
3832  // On function call, call into the debugger if necessary.
3833  CheckDebugHook(function, new_target, expected_parameter_count,
3834                 actual_parameter_count);
3835
3836  // Clear the new.target register if not given.
3837  if (!new_target.is_valid()) {
3838    LoadRoot(a3, RootIndex::kUndefinedValue);
3839  }
3840
3841  Label done;
3842  InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
3843  // We call indirectly through the code field in the function to
3844  // allow recompilation to take effect without changing any of the
3845  // call sites.
3846  Register code = kJavaScriptCallCodeStartRegister;
3847  LoadTaggedPointerField(code,
3848                         FieldMemOperand(function, JSFunction::kCodeOffset));
3849  switch (type) {
3850    case InvokeType::kCall:
3851      CallCodeObject(code);
3852      break;
3853    case InvokeType::kJump:
3854      JumpCodeObject(code);
3855      break;
3856  }
3857
3858  // Continue here if InvokePrologue does handle the invocation due to
3859  // mismatched parameter counts.
3860  bind(&done);
3861}
3862
3863void MacroAssembler::InvokeFunctionWithNewTarget(
3864    Register function, Register new_target, Register actual_parameter_count,
3865    InvokeType type) {
3866  // You can't call a function without a valid frame.
3867  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3868
3869  // Contract with called JS functions requires that function is passed in a1.
3870  DCHECK_EQ(function, a1);
3871  Register expected_parameter_count = a2;
3872  {
3873    UseScratchRegisterScope temps(this);
3874    Register temp_reg = temps.Acquire();
3875    LoadTaggedPointerField(
3876        temp_reg,
3877        FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
3878    LoadTaggedPointerField(
3879        cp, FieldMemOperand(function, JSFunction::kContextOffset));
3880    // The argument count is stored as uint16_t
3881    Lhu(expected_parameter_count,
3882        FieldMemOperand(temp_reg,
3883                        SharedFunctionInfo::kFormalParameterCountOffset));
3884  }
3885  InvokeFunctionCode(function, new_target, expected_parameter_count,
3886                     actual_parameter_count, type);
3887}
3888
3889void MacroAssembler::InvokeFunction(Register function,
3890                                    Register expected_parameter_count,
3891                                    Register actual_parameter_count,
3892                                    InvokeType type) {
3893  // You can't call a function without a valid frame.
3894  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3895
3896  // Contract with called JS functions requires that function is passed in a1.
3897  DCHECK_EQ(function, a1);
3898
3899  // Get the function and setup the context.
3900  LoadTaggedPointerField(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
3901
3902  InvokeFunctionCode(a1, no_reg, expected_parameter_count,
3903                     actual_parameter_count, type);
3904}
3905
3906// ---------------------------------------------------------------------------
3907// Support functions.
3908
3909void MacroAssembler::GetObjectType(Register object, Register map,
3910                                   Register type_reg) {
3911  LoadMap(map, object);
3912  Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3913}
3914
3915void MacroAssembler::GetInstanceTypeRange(Register map, Register type_reg,
3916                                          InstanceType lower_limit,
3917                                          Register range) {
3918  Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3919  Sub64(range, type_reg, Operand(lower_limit));
3920}
3921//------------------------------------------------------------------------------
3922// Wasm
3923void TurboAssembler::WasmRvvEq(VRegister dst, VRegister lhs, VRegister rhs,
3924                               VSew sew, Vlmul lmul) {
3925  VU.set(kScratchReg, sew, lmul);
3926  vmseq_vv(v0, lhs, rhs);
3927  li(kScratchReg, -1);
3928  vmv_vx(dst, zero_reg);
3929  vmerge_vx(dst, kScratchReg, dst);
3930}
3931
3932void TurboAssembler::WasmRvvNe(VRegister dst, VRegister lhs, VRegister rhs,
3933                               VSew sew, Vlmul lmul) {
3934  VU.set(kScratchReg, sew, lmul);
3935  vmsne_vv(v0, lhs, rhs);
3936  li(kScratchReg, -1);
3937  vmv_vx(dst, zero_reg);
3938  vmerge_vx(dst, kScratchReg, dst);
3939}
3940
3941void TurboAssembler::WasmRvvGeS(VRegister dst, VRegister lhs, VRegister rhs,
3942                                VSew sew, Vlmul lmul) {
3943  VU.set(kScratchReg, sew, lmul);
3944  vmsle_vv(v0, rhs, lhs);
3945  li(kScratchReg, -1);
3946  vmv_vx(dst, zero_reg);
3947  vmerge_vx(dst, kScratchReg, dst);
3948}
3949
3950void TurboAssembler::WasmRvvGeU(VRegister dst, VRegister lhs, VRegister rhs,
3951                                VSew sew, Vlmul lmul) {
3952  VU.set(kScratchReg, sew, lmul);
3953  vmsleu_vv(v0, rhs, lhs);
3954  li(kScratchReg, -1);
3955  vmv_vx(dst, zero_reg);
3956  vmerge_vx(dst, kScratchReg, dst);
3957}
3958
3959void TurboAssembler::WasmRvvGtS(VRegister dst, VRegister lhs, VRegister rhs,
3960                                VSew sew, Vlmul lmul) {
3961  VU.set(kScratchReg, sew, lmul);
3962  vmslt_vv(v0, rhs, lhs);
3963  li(kScratchReg, -1);
3964  vmv_vx(dst, zero_reg);
3965  vmerge_vx(dst, kScratchReg, dst);
3966}
3967
3968void TurboAssembler::WasmRvvGtU(VRegister dst, VRegister lhs, VRegister rhs,
3969                                VSew sew, Vlmul lmul) {
3970  VU.set(kScratchReg, sew, lmul);
3971  vmsltu_vv(v0, rhs, lhs);
3972  li(kScratchReg, -1);
3973  vmv_vx(dst, zero_reg);
3974  vmerge_vx(dst, kScratchReg, dst);
3975}
3976
3977void TurboAssembler::WasmRvvS128const(VRegister dst, const uint8_t imms[16]) {
3978  uint64_t imm1 = *(reinterpret_cast<const uint64_t*>(imms));
3979  uint64_t imm2 = *((reinterpret_cast<const uint64_t*>(imms)) + 1);
3980  VU.set(kScratchReg, VSew::E64, Vlmul::m1);
3981  li(kScratchReg, 1);
3982  vmv_vx(v0, kScratchReg);
3983  li(kScratchReg, imm1);
3984  vmerge_vx(dst, kScratchReg, dst);
3985  li(kScratchReg, imm2);
3986  vsll_vi(v0, v0, 1);
3987  vmerge_vx(dst, kScratchReg, dst);
3988}
3989
3990void TurboAssembler::LoadLane(int ts, VRegister dst, uint8_t laneidx,
3991                              MemOperand src) {
3992  if (ts == 8) {
3993    Lbu(kScratchReg2, src);
3994    VU.set(kScratchReg, E64, m1);
3995    li(kScratchReg, 0x1 << laneidx);
3996    vmv_sx(v0, kScratchReg);
3997    VU.set(kScratchReg, E8, m1);
3998    vmerge_vx(dst, kScratchReg2, dst);
3999  } else if (ts == 16) {
4000    Lhu(kScratchReg2, src);
4001    VU.set(kScratchReg, E16, m1);
4002    li(kScratchReg, 0x1 << laneidx);
4003    vmv_sx(v0, kScratchReg);
4004    vmerge_vx(dst, kScratchReg2, dst);
4005  } else if (ts == 32) {
4006    Lwu(kScratchReg2, src);
4007    VU.set(kScratchReg, E32, m1);
4008    li(kScratchReg, 0x1 << laneidx);
4009    vmv_sx(v0, kScratchReg);
4010    vmerge_vx(dst, kScratchReg2, dst);
4011  } else if (ts == 64) {
4012    Ld(kScratchReg2, src);
4013    VU.set(kScratchReg, E64, m1);
4014    li(kScratchReg, 0x1 << laneidx);
4015    vmv_sx(v0, kScratchReg);
4016    vmerge_vx(dst, kScratchReg2, dst);
4017  } else {
4018    UNREACHABLE();
4019  }
4020}
4021
4022void TurboAssembler::StoreLane(int sz, VRegister src, uint8_t laneidx,
4023                               MemOperand dst) {
4024  if (sz == 8) {
4025    VU.set(kScratchReg, E8, m1);
4026    vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4027    vmv_xs(kScratchReg, kSimd128ScratchReg);
4028    Sb(kScratchReg, dst);
4029  } else if (sz == 16) {
4030    VU.set(kScratchReg, E16, m1);
4031    vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4032    vmv_xs(kScratchReg, kSimd128ScratchReg);
4033    Sh(kScratchReg, dst);
4034  } else if (sz == 32) {
4035    VU.set(kScratchReg, E32, m1);
4036    vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4037    vmv_xs(kScratchReg, kSimd128ScratchReg);
4038    Sw(kScratchReg, dst);
4039  } else {
4040    DCHECK_EQ(sz, 64);
4041    VU.set(kScratchReg, E64, m1);
4042    vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4043    vmv_xs(kScratchReg, kSimd128ScratchReg);
4044    Sd(kScratchReg, dst);
4045  }
4046}
4047// -----------------------------------------------------------------------------
4048// Runtime calls.
4049
4050void TurboAssembler::AddOverflow64(Register dst, Register left,
4051                                   const Operand& right, Register overflow) {
4052  UseScratchRegisterScope temps(this);
4053  BlockTrampolinePoolScope block_trampoline_pool(this);
4054  Register right_reg = no_reg;
4055  Register scratch = temps.Acquire();
4056  Register scratch2 = temps.Acquire();
4057  if (!right.is_reg()) {
4058    li(scratch, Operand(right));
4059    right_reg = scratch;
4060  } else {
4061    right_reg = right.rm();
4062  }
4063  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4064         overflow != scratch2);
4065  DCHECK(overflow != left && overflow != right_reg);
4066  if (dst == left || dst == right_reg) {
4067    add(scratch2, left, right_reg);
4068    xor_(overflow, scratch2, left);
4069    xor_(scratch, scratch2, right_reg);
4070    and_(overflow, overflow, scratch);
4071    Mv(dst, scratch2);
4072  } else {
4073    add(dst, left, right_reg);
4074    xor_(overflow, dst, left);
4075    xor_(scratch, dst, right_reg);
4076    and_(overflow, overflow, scratch);
4077  }
4078}
4079
4080void TurboAssembler::SubOverflow64(Register dst, Register left,
4081                                   const Operand& right, Register overflow) {
4082  UseScratchRegisterScope temps(this);
4083  BlockTrampolinePoolScope block_trampoline_pool(this);
4084  Register right_reg = no_reg;
4085  Register scratch = temps.Acquire();
4086  Register scratch2 = temps.Acquire();
4087  if (!right.is_reg()) {
4088    li(scratch, Operand(right));
4089    right_reg = scratch;
4090  } else {
4091    right_reg = right.rm();
4092  }
4093
4094  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4095         overflow != scratch2);
4096  DCHECK(overflow != left && overflow != right_reg);
4097
4098  if (dst == left || dst == right_reg) {
4099    sub(scratch2, left, right_reg);
4100    xor_(overflow, left, scratch2);
4101    xor_(scratch, left, right_reg);
4102    and_(overflow, overflow, scratch);
4103    Mv(dst, scratch2);
4104  } else {
4105    sub(dst, left, right_reg);
4106    xor_(overflow, left, dst);
4107    xor_(scratch, left, right_reg);
4108    and_(overflow, overflow, scratch);
4109  }
4110}
4111
4112void TurboAssembler::MulOverflow32(Register dst, Register left,
4113                                   const Operand& right, Register overflow) {
4114  ASM_CODE_COMMENT(this);
4115  UseScratchRegisterScope temps(this);
4116  BlockTrampolinePoolScope block_trampoline_pool(this);
4117  Register right_reg = no_reg;
4118  Register scratch = temps.Acquire();
4119  Register scratch2 = temps.Acquire();
4120  if (!right.is_reg()) {
4121    li(scratch, Operand(right));
4122    right_reg = scratch;
4123  } else {
4124    right_reg = right.rm();
4125  }
4126
4127  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4128         overflow != scratch2);
4129  DCHECK(overflow != left && overflow != right_reg);
4130  sext_w(overflow, left);
4131  sext_w(scratch2, right_reg);
4132
4133  mul(overflow, overflow, scratch2);
4134  sext_w(dst, overflow);
4135  xor_(overflow, overflow, dst);
4136}
4137
4138void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
4139                                 SaveFPRegsMode save_doubles) {
4140  ASM_CODE_COMMENT(this);
4141  // All parameters are on the stack. a0 has the return value after call.
4142
4143  // If the expected number of arguments of the runtime function is
4144  // constant, we check that the actual number of arguments match the
4145  // expectation.
4146  CHECK(f->nargs < 0 || f->nargs == num_arguments);
4147
4148  // TODO(1236192): Most runtime routines don't need the number of
4149  // arguments passed in because it is constant. At some point we
4150  // should remove this need and make the runtime routine entry code
4151  // smarter.
4152  PrepareCEntryArgs(num_arguments);
4153  PrepareCEntryFunction(ExternalReference::Create(f));
4154  Handle<Code> code =
4155      CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
4156  Call(code, RelocInfo::CODE_TARGET);
4157}
4158
4159void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
4160  ASM_CODE_COMMENT(this);
4161  const Runtime::Function* function = Runtime::FunctionForId(fid);
4162  DCHECK_EQ(1, function->result_size);
4163  if (function->nargs >= 0) {
4164    PrepareCEntryArgs(function->nargs);
4165  }
4166  JumpToExternalReference(ExternalReference::Create(fid));
4167}
4168
4169void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
4170                                             bool builtin_exit_frame) {
4171  ASM_CODE_COMMENT(this);
4172  PrepareCEntryFunction(builtin);
4173  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
4174                                          ArgvMode::kStack, builtin_exit_frame);
4175  Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg));
4176}
4177
4178void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
4179  // Ld a Address from a constant pool.
4180  // Record a value into constant pool.
4181  ASM_CODE_COMMENT(this);
4182  if (!FLAG_riscv_constant_pool) {
4183    li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
4184  } else {
4185    RecordEntry(entry, RelocInfo::OFF_HEAP_TARGET);
4186    RecordRelocInfo(RelocInfo::OFF_HEAP_TARGET, entry);
4187    auipc(kOffHeapTrampolineRegister, 0);
4188    ld(kOffHeapTrampolineRegister, kOffHeapTrampolineRegister, 0);
4189  }
4190  Jump(kOffHeapTrampolineRegister);
4191}
4192
4193void MacroAssembler::LoadWeakValue(Register out, Register in,
4194                                   Label* target_if_cleared) {
4195  ASM_CODE_COMMENT(this);
4196  Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32));
4197  And(out, in, Operand(~kWeakHeapObjectMask));
4198}
4199
4200void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
4201                                          Register scratch1,
4202                                          Register scratch2) {
4203  DCHECK_GT(value, 0);
4204  if (FLAG_native_code_counters && counter->Enabled()) {
4205    ASM_CODE_COMMENT(this);
4206    // This operation has to be exactly 32-bit wide in case the external
4207    // reference table redirects the counter to a uint32_t
4208    // dummy_stats_counter_ field.
4209    li(scratch2, ExternalReference::Create(counter));
4210    Lw(scratch1, MemOperand(scratch2));
4211    Add32(scratch1, scratch1, Operand(value));
4212    Sw(scratch1, MemOperand(scratch2));
4213  }
4214}
4215
4216void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
4217                                          Register scratch1,
4218                                          Register scratch2) {
4219  DCHECK_GT(value, 0);
4220  if (FLAG_native_code_counters && counter->Enabled()) {
4221    ASM_CODE_COMMENT(this);
4222    // This operation has to be exactly 32-bit wide in case the external
4223    // reference table redirects the counter to a uint32_t
4224    // dummy_stats_counter_ field.
4225    li(scratch2, ExternalReference::Create(counter));
4226    Lw(scratch1, MemOperand(scratch2));
4227    Sub32(scratch1, scratch1, Operand(value));
4228    Sw(scratch1, MemOperand(scratch2));
4229  }
4230}
4231
4232// -----------------------------------------------------------------------------
4233// Debugging.
4234
4235void TurboAssembler::Trap() { stop(); }
4236void TurboAssembler::DebugBreak() { stop(); }
4237
4238void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs,
4239                            Operand rt) {
4240  if (FLAG_debug_code) Check(cc, reason, rs, rt);
4241}
4242
4243void TurboAssembler::Check(Condition cc, AbortReason reason, Register rs,
4244                           Operand rt) {
4245  Label L;
4246  BranchShort(&L, cc, rs, rt);
4247  Abort(reason);
4248  // Will not return here.
4249  bind(&L);
4250}
4251
4252void TurboAssembler::Abort(AbortReason reason) {
4253  Label abort_start;
4254  bind(&abort_start);
4255  if (FLAG_code_comments) {
4256    const char* msg = GetAbortReason(reason);
4257    RecordComment("Abort message: ");
4258    RecordComment(msg);
4259  }
4260
4261  // Avoid emitting call to builtin if requested.
4262  if (trap_on_abort()) {
4263    ebreak();
4264    return;
4265  }
4266
4267  if (should_abort_hard()) {
4268    // We don't care if we constructed a frame. Just pretend we did.
4269    FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
4270    PrepareCallCFunction(0, a0);
4271    li(a0, Operand(static_cast<int64_t>(reason)));
4272    CallCFunction(ExternalReference::abort_with_reason(), 1);
4273    return;
4274  }
4275
4276  Move(a0, Smi::FromInt(static_cast<int>(reason)));
4277
4278  // Disable stub call restrictions to always allow calls to abort.
4279  if (!has_frame()) {
4280    // We don't actually want to generate a pile of code for this, so just
4281    // claim there is a stack frame, without generating one.
4282    FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
4283    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4284  } else {
4285    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4286  }
4287  // Will not return here.
4288  if (is_trampoline_pool_blocked()) {
4289    // If the calling code cares about the exact number of
4290    // instructions generated, we insert padding here to keep the size
4291    // of the Abort macro constant.
4292    // Currently in debug mode with debug_code enabled the number of
4293    // generated instructions is 10, so we use this as a maximum value.
4294    static const int kExpectedAbortInstructions = 10;
4295    int abort_instructions = InstructionsGeneratedSince(&abort_start);
4296    DCHECK_LE(abort_instructions, kExpectedAbortInstructions);
4297    while (abort_instructions++ < kExpectedAbortInstructions) {
4298      nop();
4299    }
4300  }
4301}
4302
4303void TurboAssembler::LoadMap(Register destination, Register object) {
4304  ASM_CODE_COMMENT(this);
4305  LoadTaggedPointerField(destination,
4306                         FieldMemOperand(object, HeapObject::kMapOffset));
4307}
4308
4309void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
4310  ASM_CODE_COMMENT(this);
4311  LoadMap(dst, cp);
4312  LoadTaggedPointerField(
4313      dst, FieldMemOperand(
4314               dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
4315  LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
4316}
4317
4318void TurboAssembler::StubPrologue(StackFrame::Type type) {
4319  ASM_CODE_COMMENT(this);
4320  UseScratchRegisterScope temps(this);
4321  Register scratch = temps.Acquire();
4322  li(scratch, Operand(StackFrame::TypeToMarker(type)));
4323  PushCommonFrame(scratch);
4324}
4325
4326void TurboAssembler::Prologue() { PushStandardFrame(a1); }
4327
4328void TurboAssembler::EnterFrame(StackFrame::Type type) {
4329  ASM_CODE_COMMENT(this);
4330  UseScratchRegisterScope temps(this);
4331  Register scratch = temps.Acquire();
4332  BlockTrampolinePoolScope block_trampoline_pool(this);
4333  Push(ra, fp);
4334  Move(fp, sp);
4335  if (!StackFrame::IsJavaScript(type)) {
4336    li(scratch, Operand(StackFrame::TypeToMarker(type)));
4337    Push(scratch);
4338  }
4339#if V8_ENABLE_WEBASSEMBLY
4340  if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
4341#endif  // V8_ENABLE_WEBASSEMBLY
4342}
4343
4344void TurboAssembler::LeaveFrame(StackFrame::Type type) {
4345  ASM_CODE_COMMENT(this);
4346  addi(sp, fp, 2 * kSystemPointerSize);
4347  Ld(ra, MemOperand(fp, 1 * kSystemPointerSize));
4348  Ld(fp, MemOperand(fp, 0 * kSystemPointerSize));
4349}
4350
4351void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
4352                                    StackFrame::Type frame_type) {
4353  ASM_CODE_COMMENT(this);
4354  DCHECK(frame_type == StackFrame::EXIT ||
4355         frame_type == StackFrame::BUILTIN_EXIT);
4356
4357  // Set up the frame structure on the stack.
4358  STATIC_ASSERT(2 * kSystemPointerSize ==
4359                ExitFrameConstants::kCallerSPDisplacement);
4360  STATIC_ASSERT(1 * kSystemPointerSize == ExitFrameConstants::kCallerPCOffset);
4361  STATIC_ASSERT(0 * kSystemPointerSize == ExitFrameConstants::kCallerFPOffset);
4362
4363  // This is how the stack will look:
4364  // fp + 2 (==kCallerSPDisplacement) - old stack's end
4365  // [fp + 1 (==kCallerPCOffset)] - saved old ra
4366  // [fp + 0 (==kCallerFPOffset)] - saved old fp
4367  // [fp - 1 StackFrame::EXIT Smi
4368  // [fp - 2 (==kSPOffset)] - sp of the called function
4369  // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the
4370  //   new stack (will contain saved ra)
4371
4372  // Save registers and reserve room for saved entry sp.
4373  addi(sp, sp,
4374       -2 * kSystemPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp);
4375  Sd(ra, MemOperand(sp, 3 * kSystemPointerSize));
4376  Sd(fp, MemOperand(sp, 2 * kSystemPointerSize));
4377  {
4378    UseScratchRegisterScope temps(this);
4379    Register scratch = temps.Acquire();
4380    li(scratch, Operand(StackFrame::TypeToMarker(frame_type)));
4381    Sd(scratch, MemOperand(sp, 1 * kSystemPointerSize));
4382  }
4383  // Set up new frame pointer.
4384  addi(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp);
4385
4386  if (FLAG_debug_code) {
4387    Sd(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset));
4388  }
4389
4390  {
4391    UseScratchRegisterScope temps(this);
4392    Register scratch = temps.Acquire();
4393    BlockTrampolinePoolScope block_trampoline_pool(this);
4394    // Save the frame pointer and the context in top.
4395    li(scratch, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
4396                                          isolate()));
4397    Sd(fp, MemOperand(scratch));
4398    li(scratch,
4399       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4400    Sd(cp, MemOperand(scratch));
4401  }
4402
4403  const int frame_alignment = MacroAssembler::ActivationFrameAlignment();
4404  if (save_doubles) {
4405    // The stack is already aligned to 0 modulo 8 for stores with sdc1.
4406    int space = kNumCallerSavedFPU * kDoubleSize;
4407    Sub64(sp, sp, Operand(space));
4408    int count = 0;
4409    for (int i = 0; i < kNumFPURegisters; i++) {
4410      if (kCallerSavedFPU.bits() & (1 << i)) {
4411        FPURegister reg = FPURegister::from_code(i);
4412        StoreDouble(reg, MemOperand(sp, count * kDoubleSize));
4413        count++;
4414      }
4415    }
4416  }
4417
4418  // Reserve place for the return address, stack space and an optional slot
4419  // (used by DirectCEntry to hold the return value if a struct is
4420  // returned) and align the frame preparing for calling the runtime function.
4421  DCHECK_GE(stack_space, 0);
4422  Sub64(sp, sp, Operand((stack_space + 2) * kSystemPointerSize));
4423  if (frame_alignment > 0) {
4424    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4425    And(sp, sp, Operand(-frame_alignment));  // Align stack.
4426  }
4427
4428  // Set the exit frame sp value to point just before the return address
4429  // location.
4430  UseScratchRegisterScope temps(this);
4431  Register scratch = temps.Acquire();
4432  addi(scratch, sp, kSystemPointerSize);
4433  Sd(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
4434}
4435
4436void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
4437                                    bool do_return,
4438                                    bool argument_count_is_length) {
4439  ASM_CODE_COMMENT(this);
4440  UseScratchRegisterScope temps(this);
4441  Register scratch = temps.Acquire();
4442  BlockTrampolinePoolScope block_trampoline_pool(this);
4443  // Optionally restore all double registers.
4444  if (save_doubles) {
4445    // Remember: we only need to restore kCallerSavedFPU.
4446    Sub64(scratch, fp,
4447          Operand(ExitFrameConstants::kFixedFrameSizeFromFp +
4448                  kNumCallerSavedFPU * kDoubleSize));
4449    int cout = 0;
4450    for (int i = 0; i < kNumFPURegisters; i++) {
4451      if (kCalleeSavedFPU.bits() & (1 << i)) {
4452        FPURegister reg = FPURegister::from_code(i);
4453        LoadDouble(reg, MemOperand(scratch, cout * kDoubleSize));
4454        cout++;
4455      }
4456    }
4457  }
4458
4459  // Clear top frame.
4460  li(scratch,
4461     ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
4462  Sd(zero_reg, MemOperand(scratch));
4463
4464  // Restore current context from top and clear it in debug mode.
4465  li(scratch,
4466     ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4467  Ld(cp, MemOperand(scratch));
4468
4469  if (FLAG_debug_code) {
4470    UseScratchRegisterScope temp(this);
4471    Register scratch2 = temp.Acquire();
4472    li(scratch2, Operand(Context::kInvalidContext));
4473    Sd(scratch2, MemOperand(scratch));
4474  }
4475
4476  // Pop the arguments, restore registers, and return.
4477  Mv(sp, fp);  // Respect ABI stack constraint.
4478  Ld(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
4479  Ld(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
4480
4481  if (argument_count.is_valid()) {
4482    if (argument_count_is_length) {
4483      add(sp, sp, argument_count);
4484    } else {
4485      CalcScaledAddress(sp, sp, argument_count, kSystemPointerSizeLog2);
4486    }
4487  }
4488
4489  addi(sp, sp, 2 * kSystemPointerSize);
4490
4491  if (do_return) {
4492    Ret();
4493  }
4494}
4495
4496int TurboAssembler::ActivationFrameAlignment() {
4497#if V8_HOST_ARCH_RISCV64
4498  // Running on the real platform. Use the alignment as mandated by the local
4499  // environment.
4500  // Note: This will break if we ever start generating snapshots on one RISC-V
4501  // platform for another RISC-V platform with a different alignment.
4502  return base::OS::ActivationFrameAlignment();
4503#else   // V8_HOST_ARCH_RISCV64
4504  // If we are using the simulator then we should always align to the expected
4505  // alignment. As the simulator is used to generate snapshots we do not know
4506  // if the target platform will need alignment, so this is controlled from a
4507  // flag.
4508  return FLAG_sim_stack_alignment;
4509#endif  // V8_HOST_ARCH_RISCV64
4510}
4511
4512void MacroAssembler::AssertStackIsAligned() {
4513  if (FLAG_debug_code) {
4514    ASM_CODE_COMMENT(this);
4515    const int frame_alignment = ActivationFrameAlignment();
4516    const int frame_alignment_mask = frame_alignment - 1;
4517
4518    if (frame_alignment > kSystemPointerSize) {
4519      Label alignment_as_expected;
4520      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4521      {
4522        UseScratchRegisterScope temps(this);
4523        Register scratch = temps.Acquire();
4524        andi(scratch, sp, frame_alignment_mask);
4525        BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4526      }
4527      // Don't use Check here, as it will call Runtime_Abort re-entering here.
4528      ebreak();
4529      bind(&alignment_as_expected);
4530    }
4531  }
4532}
4533
4534void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
4535  ASM_CODE_COMMENT(this);
4536  if (SmiValuesAre32Bits()) {
4537    Lw(dst, MemOperand(src.rm(), SmiWordOffset(src.offset())));
4538  } else {
4539    DCHECK(SmiValuesAre31Bits());
4540    if (COMPRESS_POINTERS_BOOL) {
4541      Lw(dst, src);
4542    } else {
4543      Ld(dst, src);
4544    }
4545    SmiUntag(dst);
4546  }
4547}
4548
4549void TurboAssembler::SmiToInt32(Register smi) {
4550  ASM_CODE_COMMENT(this);
4551  if (FLAG_enable_slow_asserts) {
4552    AssertSmi(smi);
4553  }
4554  DCHECK(SmiValuesAre32Bits() || SmiValuesAre31Bits());
4555  SmiUntag(smi);
4556}
4557
4558void TurboAssembler::JumpIfSmi(Register value, Label* smi_label) {
4559  ASM_CODE_COMMENT(this);
4560  DCHECK_EQ(0, kSmiTag);
4561  UseScratchRegisterScope temps(this);
4562  Register scratch = temps.Acquire();
4563  andi(scratch, value, kSmiTagMask);
4564  Branch(smi_label, eq, scratch, Operand(zero_reg));
4565}
4566
4567void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label) {
4568  ASM_CODE_COMMENT(this);
4569  UseScratchRegisterScope temps(this);
4570  Register scratch = temps.Acquire();
4571  DCHECK_EQ(0, kSmiTag);
4572  andi(scratch, value, kSmiTagMask);
4573  Branch(not_smi_label, ne, scratch, Operand(zero_reg));
4574}
4575
4576void TurboAssembler::AssertNotSmi(Register object, AbortReason reason) {
4577  if (FLAG_debug_code) {
4578    ASM_CODE_COMMENT(this);
4579    STATIC_ASSERT(kSmiTag == 0);
4580    DCHECK(object != kScratchReg);
4581    andi(kScratchReg, object, kSmiTagMask);
4582    Check(ne, reason, kScratchReg, Operand(zero_reg));
4583  }
4584}
4585
4586void TurboAssembler::AssertSmi(Register object, AbortReason reason) {
4587  if (FLAG_debug_code) {
4588    ASM_CODE_COMMENT(this);
4589    STATIC_ASSERT(kSmiTag == 0);
4590    DCHECK(object != kScratchReg);
4591    andi(kScratchReg, object, kSmiTagMask);
4592    Check(eq, reason, kScratchReg, Operand(zero_reg));
4593  }
4594}
4595
4596void MacroAssembler::AssertConstructor(Register object) {
4597  if (FLAG_debug_code) {
4598    ASM_CODE_COMMENT(this);
4599    DCHECK(object != kScratchReg);
4600    BlockTrampolinePoolScope block_trampoline_pool(this);
4601    STATIC_ASSERT(kSmiTag == 0);
4602    SmiTst(object, kScratchReg);
4603    Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, kScratchReg,
4604          Operand(zero_reg));
4605
4606    LoadMap(kScratchReg, object);
4607    Lbu(kScratchReg, FieldMemOperand(kScratchReg, Map::kBitFieldOffset));
4608    And(kScratchReg, kScratchReg, Operand(Map::Bits1::IsConstructorBit::kMask));
4609    Check(ne, AbortReason::kOperandIsNotAConstructor, kScratchReg,
4610          Operand(zero_reg));
4611  }
4612}
4613
4614void MacroAssembler::AssertFunction(Register object) {
4615  if (FLAG_debug_code) {
4616    ASM_CODE_COMMENT(this);
4617    BlockTrampolinePoolScope block_trampoline_pool(this);
4618    STATIC_ASSERT(kSmiTag == 0);
4619    DCHECK(object != kScratchReg);
4620    SmiTst(object, kScratchReg);
4621    Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, kScratchReg,
4622          Operand(zero_reg));
4623    push(object);
4624    LoadMap(object, object);
4625    UseScratchRegisterScope temps(this);
4626    Register range = temps.Acquire();
4627    GetInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE, range);
4628    Check(Uless_equal, AbortReason::kOperandIsNotAFunction, range,
4629          Operand(LAST_JS_FUNCTION_TYPE - FIRST_JS_FUNCTION_TYPE));
4630    pop(object);
4631  }
4632}
4633
4634void MacroAssembler::AssertCallableFunction(Register object) {
4635  if (!FLAG_debug_code) return;
4636  ASM_CODE_COMMENT(this);
4637  STATIC_ASSERT(kSmiTag == 0);
4638  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction);
4639  push(object);
4640  LoadMap(object, object);
4641  UseScratchRegisterScope temps(this);
4642  Register range = temps.Acquire();
4643  GetInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE, range);
4644  Check(Uless_equal, AbortReason::kOperandIsNotACallableFunction, range,
4645        Operand(LAST_CALLABLE_JS_FUNCTION_TYPE -
4646                FIRST_CALLABLE_JS_FUNCTION_TYPE));
4647  pop(object);
4648}
4649
4650void MacroAssembler::AssertBoundFunction(Register object) {
4651  if (FLAG_debug_code) {
4652    ASM_CODE_COMMENT(this);
4653    BlockTrampolinePoolScope block_trampoline_pool(this);
4654    STATIC_ASSERT(kSmiTag == 0);
4655    DCHECK(object != kScratchReg);
4656    SmiTst(object, kScratchReg);
4657    Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, kScratchReg,
4658          Operand(zero_reg));
4659    GetObjectType(object, kScratchReg, kScratchReg);
4660    Check(eq, AbortReason::kOperandIsNotABoundFunction, kScratchReg,
4661          Operand(JS_BOUND_FUNCTION_TYPE));
4662  }
4663}
4664
4665void MacroAssembler::AssertGeneratorObject(Register object) {
4666  if (!FLAG_debug_code) return;
4667  ASM_CODE_COMMENT(this);
4668  BlockTrampolinePoolScope block_trampoline_pool(this);
4669  STATIC_ASSERT(kSmiTag == 0);
4670  DCHECK(object != kScratchReg);
4671  SmiTst(object, kScratchReg);
4672  Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, kScratchReg,
4673        Operand(zero_reg));
4674
4675  GetObjectType(object, kScratchReg, kScratchReg);
4676
4677  Label done;
4678
4679  // Check if JSGeneratorObject
4680  BranchShort(&done, eq, kScratchReg, Operand(JS_GENERATOR_OBJECT_TYPE));
4681
4682  // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
4683  BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
4684
4685  // Check if JSAsyncGeneratorObject
4686  BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
4687
4688  Abort(AbortReason::kOperandIsNotAGeneratorObject);
4689
4690  bind(&done);
4691}
4692
4693void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
4694                                                     Register scratch) {
4695  if (FLAG_debug_code) {
4696    ASM_CODE_COMMENT(this);
4697    Label done_checking;
4698    AssertNotSmi(object);
4699    LoadRoot(scratch, RootIndex::kUndefinedValue);
4700    BranchShort(&done_checking, eq, object, Operand(scratch));
4701    GetObjectType(object, scratch, scratch);
4702    Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch,
4703           Operand(ALLOCATION_SITE_TYPE));
4704    bind(&done_checking);
4705  }
4706}
4707
4708template <typename F_TYPE>
4709void TurboAssembler::FloatMinMaxHelper(FPURegister dst, FPURegister src1,
4710                                       FPURegister src2, MaxMinKind kind) {
4711  DCHECK((std::is_same<F_TYPE, float>::value) ||
4712         (std::is_same<F_TYPE, double>::value));
4713
4714  if (src1 == src2 && dst != src1) {
4715    if (std::is_same<float, F_TYPE>::value) {
4716      fmv_s(dst, src1);
4717    } else {
4718      fmv_d(dst, src1);
4719    }
4720    return;
4721  }
4722
4723  Label done, nan;
4724
4725  // For RISCV, fmin_s returns the other non-NaN operand as result if only one
4726  // operand is NaN; but for JS, if any operand is NaN, result is Nan. The
4727  // following handles the discrepency between handling of NaN between ISA and
4728  // JS semantics
4729  UseScratchRegisterScope temps(this);
4730  Register scratch = temps.Acquire();
4731  if (std::is_same<float, F_TYPE>::value) {
4732    CompareIsNotNanF32(scratch, src1, src2);
4733  } else {
4734    CompareIsNotNanF64(scratch, src1, src2);
4735  }
4736  BranchFalseF(scratch, &nan);
4737
4738  if (kind == MaxMinKind::kMax) {
4739    if (std::is_same<float, F_TYPE>::value) {
4740      fmax_s(dst, src1, src2);
4741    } else {
4742      fmax_d(dst, src1, src2);
4743    }
4744  } else {
4745    if (std::is_same<float, F_TYPE>::value) {
4746      fmin_s(dst, src1, src2);
4747    } else {
4748      fmin_d(dst, src1, src2);
4749    }
4750  }
4751  j(&done);
4752
4753  bind(&nan);
4754  // if any operand is NaN, return NaN (fadd returns NaN if any operand is NaN)
4755  if (std::is_same<float, F_TYPE>::value) {
4756    fadd_s(dst, src1, src2);
4757  } else {
4758    fadd_d(dst, src1, src2);
4759  }
4760
4761  bind(&done);
4762}
4763
4764void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1,
4765                                FPURegister src2) {
4766  ASM_CODE_COMMENT(this);
4767  FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMax);
4768}
4769
4770void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1,
4771                                FPURegister src2) {
4772  ASM_CODE_COMMENT(this);
4773  FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMin);
4774}
4775
4776void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1,
4777                                FPURegister src2) {
4778  ASM_CODE_COMMENT(this);
4779  FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMax);
4780}
4781
4782void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1,
4783                                FPURegister src2) {
4784  ASM_CODE_COMMENT(this);
4785  FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMin);
4786}
4787
4788static const int kRegisterPassedArguments = 8;
4789
4790int TurboAssembler::CalculateStackPassedDWords(int num_gp_arguments,
4791                                               int num_fp_arguments) {
4792  int stack_passed_dwords = 0;
4793
4794  // Up to eight integer arguments are passed in registers a0..a7 and
4795  // up to eight floating point arguments are passed in registers fa0..fa7
4796  if (num_gp_arguments > kRegisterPassedArguments) {
4797    stack_passed_dwords += num_gp_arguments - kRegisterPassedArguments;
4798  }
4799  if (num_fp_arguments > kRegisterPassedArguments) {
4800    stack_passed_dwords += num_fp_arguments - kRegisterPassedArguments;
4801  }
4802  stack_passed_dwords += kCArgSlotCount;
4803  return stack_passed_dwords;
4804}
4805
4806void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4807                                          int num_double_arguments,
4808                                          Register scratch) {
4809  ASM_CODE_COMMENT(this);
4810  int frame_alignment = ActivationFrameAlignment();
4811
4812  // Up to eight simple arguments in a0..a7, fa0..fa7.
4813  // Remaining arguments are pushed on the stack (arg slot calculation handled
4814  // by CalculateStackPassedDWords()).
4815  int stack_passed_arguments =
4816      CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4817  if (frame_alignment > kSystemPointerSize) {
4818    // Make stack end at alignment and make room for stack arguments and the
4819    // original value of sp.
4820    Mv(scratch, sp);
4821    Sub64(sp, sp, Operand((stack_passed_arguments + 1) * kSystemPointerSize));
4822    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4823    And(sp, sp, Operand(-frame_alignment));
4824    Sd(scratch, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4825  } else {
4826    Sub64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4827  }
4828}
4829
4830void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4831                                          Register scratch) {
4832  PrepareCallCFunction(num_reg_arguments, 0, scratch);
4833}
4834
4835void TurboAssembler::CallCFunction(ExternalReference function,
4836                                   int num_reg_arguments,
4837                                   int num_double_arguments) {
4838  BlockTrampolinePoolScope block_trampoline_pool(this);
4839  li(t6, function);
4840  CallCFunctionHelper(t6, num_reg_arguments, num_double_arguments);
4841}
4842
4843void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
4844                                   int num_double_arguments) {
4845  CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
4846}
4847
4848void TurboAssembler::CallCFunction(ExternalReference function,
4849                                   int num_arguments) {
4850  CallCFunction(function, num_arguments, 0);
4851}
4852
4853void TurboAssembler::CallCFunction(Register function, int num_arguments) {
4854  CallCFunction(function, num_arguments, 0);
4855}
4856
4857void TurboAssembler::CallCFunctionHelper(Register function,
4858                                         int num_reg_arguments,
4859                                         int num_double_arguments) {
4860  DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
4861  DCHECK(has_frame());
4862  ASM_CODE_COMMENT(this);
4863  // Make sure that the stack is aligned before calling a C function unless
4864  // running in the simulator. The simulator has its own alignment check which
4865  // provides more information.
4866  // The argument stots are presumed to have been set up by
4867  // PrepareCallCFunction.
4868
4869#if V8_HOST_ARCH_RISCV64
4870  if (FLAG_debug_code) {
4871    int frame_alignment = base::OS::ActivationFrameAlignment();
4872    int frame_alignment_mask = frame_alignment - 1;
4873    if (frame_alignment > kSystemPointerSize) {
4874      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4875      Label alignment_as_expected;
4876      {
4877        UseScratchRegisterScope temps(this);
4878        Register scratch = temps.Acquire();
4879        And(scratch, sp, Operand(frame_alignment_mask));
4880        BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4881      }
4882      // Don't use Check here, as it will call Runtime_Abort possibly
4883      // re-entering here.
4884      ebreak();
4885      bind(&alignment_as_expected);
4886    }
4887  }
4888#endif  // V8_HOST_ARCH_RISCV64
4889
4890  // Just call directly. The function called cannot cause a GC, or
4891  // allow preemption, so the return address in the link register
4892  // stays correct.
4893  {
4894    if (function != t6) {
4895      Mv(t6, function);
4896      function = t6;
4897    }
4898
4899    // Save the frame pointer and PC so that the stack layout remains
4900    // iterable, even without an ExitFrame which normally exists between JS
4901    // and C frames.
4902    // 't' registers are caller-saved so this is safe as a scratch register.
4903    Register pc_scratch = t1;
4904    Register scratch = t2;
4905
4906    auipc(pc_scratch, 0);
4907    // See x64 code for reasoning about how to address the isolate data fields.
4908    if (root_array_available()) {
4909      Sd(pc_scratch, MemOperand(kRootRegister,
4910                                IsolateData::fast_c_call_caller_pc_offset()));
4911      Sd(fp, MemOperand(kRootRegister,
4912                        IsolateData::fast_c_call_caller_fp_offset()));
4913    } else {
4914      DCHECK_NOT_NULL(isolate());
4915      li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate()));
4916      Sd(pc_scratch, MemOperand(scratch));
4917      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4918      Sd(fp, MemOperand(scratch));
4919    }
4920
4921    Call(function);
4922
4923    if (isolate() != nullptr) {
4924      // We don't unset the PC; the FP is the source of truth.
4925      UseScratchRegisterScope temps(this);
4926      Register scratch = temps.Acquire();
4927      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4928      Sd(zero_reg, MemOperand(scratch));
4929    }
4930  }
4931
4932  int stack_passed_arguments =
4933      CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4934
4935  if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
4936    Ld(sp, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4937  } else {
4938    Add64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4939  }
4940}
4941
4942#undef BRANCH_ARGS_CHECK
4943
4944void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask,
4945                                   Condition cc, Label* condition_met) {
4946  And(scratch, object, Operand(~kPageAlignmentMask));
4947  Ld(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
4948  And(scratch, scratch, Operand(mask));
4949  Branch(condition_met, cc, scratch, Operand(zero_reg));
4950}
4951
4952Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
4953                                   Register reg4, Register reg5,
4954                                   Register reg6) {
4955  RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
4956
4957  const RegisterConfiguration* config = RegisterConfiguration::Default();
4958  for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
4959    int code = config->GetAllocatableGeneralCode(i);
4960    Register candidate = Register::from_code(code);
4961    if (regs.has(candidate)) continue;
4962    return candidate;
4963  }
4964  UNREACHABLE();
4965}
4966
4967void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4968  // This push on ra and the pop below together ensure that we restore the
4969  // register ra, which is needed while computing the code start address.
4970  push(ra);
4971
4972  auipc(ra, 0);
4973  addi(ra, ra, kInstrSize * 2);  // ra = address of li
4974  int pc = pc_offset();
4975  li(dst, Operand(pc));
4976  Sub64(dst, ra, dst);
4977
4978  pop(ra);  // Restore ra
4979}
4980
4981void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4982                                           DeoptimizeKind kind, Label* ret,
4983                                           Label*) {
4984  ASM_CODE_COMMENT(this);
4985  BlockTrampolinePoolScope block_trampoline_pool(this);
4986  Ld(t6,
4987     MemOperand(kRootRegister, IsolateData::BuiltinEntrySlotOffset(target)));
4988  Call(t6);
4989  DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4990            (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4991                                            : Deoptimizer::kEagerDeoptExitSize);
4992}
4993
4994void TurboAssembler::LoadCodeObjectEntry(Register destination,
4995                                         Register code_object) {
4996  // Code objects are called differently depending on whether we are generating
4997  // builtin code (which will later be embedded into the binary) or compiling
4998  // user JS code at runtime.
4999  // * Builtin code runs in --jitless mode and thus must not call into on-heap
5000  //   Code targets. Instead, we dispatch through the builtins entry table.
5001  // * Codegen at runtime does not have this restriction and we can use the
5002  //   shorter, branchless instruction sequence. The assumption here is that
5003  //   targets are usually generated code and not builtin Code objects.
5004  ASM_CODE_COMMENT(this);
5005  if (options().isolate_independent_code) {
5006    DCHECK(root_array_available());
5007    Label if_code_is_off_heap, out;
5008
5009    UseScratchRegisterScope temps(this);
5010    Register scratch = temps.Acquire();
5011
5012    DCHECK(!AreAliased(destination, scratch));
5013    DCHECK(!AreAliased(code_object, scratch));
5014
5015    // Check whether the Code object is an off-heap trampoline. If so, call its
5016    // (off-heap) entry point directly without going through the (on-heap)
5017    // trampoline.  Otherwise, just call the Code object as always.
5018
5019    Lw(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
5020    And(scratch, scratch, Operand(Code::IsOffHeapTrampoline::kMask));
5021    Branch(&if_code_is_off_heap, ne, scratch, Operand(zero_reg));
5022    // Not an off-heap trampoline object, the entry point is at
5023    // Code::raw_instruction_start().
5024    Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5025    Branch(&out);
5026
5027    // An off-heap trampoline, the entry point is loaded from the builtin entry
5028    // table.
5029    bind(&if_code_is_off_heap);
5030    Lw(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
5031    slli(destination, scratch, kSystemPointerSizeLog2);
5032    Add64(destination, destination, kRootRegister);
5033    Ld(destination,
5034       MemOperand(destination, IsolateData::builtin_entry_table_offset()));
5035
5036    bind(&out);
5037  } else {
5038    Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5039  }
5040}
5041
5042void TurboAssembler::CallCodeObject(Register code_object) {
5043  ASM_CODE_COMMENT(this);
5044  LoadCodeObjectEntry(code_object, code_object);
5045  Call(code_object);
5046}
5047
5048void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
5049  ASM_CODE_COMMENT(this);
5050  DCHECK_EQ(JumpMode::kJump, jump_mode);
5051  LoadCodeObjectEntry(code_object, code_object);
5052  Jump(code_object);
5053}
5054
5055void TurboAssembler::LoadTaggedPointerField(const Register& destination,
5056                                            const MemOperand& field_operand) {
5057  if (COMPRESS_POINTERS_BOOL) {
5058    DecompressTaggedPointer(destination, field_operand);
5059  } else {
5060    Ld(destination, field_operand);
5061  }
5062}
5063
5064void TurboAssembler::LoadAnyTaggedField(const Register& destination,
5065                                        const MemOperand& field_operand) {
5066  if (COMPRESS_POINTERS_BOOL) {
5067    DecompressAnyTagged(destination, field_operand);
5068  } else {
5069    Ld(destination, field_operand);
5070  }
5071}
5072
5073void TurboAssembler::LoadTaggedSignedField(const Register& destination,
5074                                           const MemOperand& field_operand) {
5075  if (COMPRESS_POINTERS_BOOL) {
5076    DecompressTaggedSigned(destination, field_operand);
5077  } else {
5078    Ld(destination, field_operand);
5079  }
5080}
5081
5082void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
5083  SmiUntag(dst, src);
5084}
5085
5086void TurboAssembler::StoreTaggedField(const Register& value,
5087                                      const MemOperand& dst_field_operand) {
5088  if (COMPRESS_POINTERS_BOOL) {
5089    Sw(value, dst_field_operand);
5090  } else {
5091    Sd(value, dst_field_operand);
5092  }
5093}
5094
5095void TurboAssembler::DecompressTaggedSigned(const Register& destination,
5096                                            const MemOperand& field_operand) {
5097  ASM_CODE_COMMENT(this);
5098  Lwu(destination, field_operand);
5099  if (FLAG_debug_code) {
5100    // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits.
5101    Add64(destination, destination,
5102          Operand(((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32));
5103  }
5104}
5105
5106void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5107                                             const MemOperand& field_operand) {
5108  ASM_CODE_COMMENT(this);
5109  Lwu(destination, field_operand);
5110  Add64(destination, kPtrComprCageBaseRegister, destination);
5111}
5112
5113void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5114                                             const Register& source) {
5115  ASM_CODE_COMMENT(this);
5116  And(destination, source, Operand(0xFFFFFFFF));
5117  Add64(destination, kPtrComprCageBaseRegister, Operand(destination));
5118}
5119
5120void TurboAssembler::DecompressAnyTagged(const Register& destination,
5121                                         const MemOperand& field_operand) {
5122  ASM_CODE_COMMENT(this);
5123  Lwu(destination, field_operand);
5124  Add64(destination, kPtrComprCageBaseRegister, destination);
5125}
5126
5127void MacroAssembler::DropArguments(Register count, ArgumentsCountType type,
5128                                   ArgumentsCountMode mode, Register scratch) {
5129  switch (type) {
5130    case kCountIsInteger: {
5131      CalcScaledAddress(sp, sp, count, kPointerSizeLog2);
5132      break;
5133    }
5134    case kCountIsSmi: {
5135      STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
5136      DCHECK_NE(scratch, no_reg);
5137      SmiScale(scratch, count, kPointerSizeLog2);
5138      Add64(sp, sp, scratch);
5139      break;
5140    }
5141    case kCountIsBytes: {
5142      Add64(sp, sp, count);
5143      break;
5144    }
5145  }
5146  if (mode == kCountExcludesReceiver) {
5147    Add64(sp, sp, kSystemPointerSize);
5148  }
5149}
5150
5151void MacroAssembler::DropArgumentsAndPushNewReceiver(Register argc,
5152                                                     Register receiver,
5153                                                     ArgumentsCountType type,
5154                                                     ArgumentsCountMode mode,
5155                                                     Register scratch) {
5156  DCHECK(!AreAliased(argc, receiver));
5157  if (mode == kCountExcludesReceiver) {
5158    // Drop arguments without receiver and override old receiver.
5159    DropArguments(argc, type, kCountIncludesReceiver, scratch);
5160    Sd(receiver, MemOperand(sp));
5161  } else {
5162    DropArguments(argc, type, mode, scratch);
5163    push(receiver);
5164  }
5165}
5166
5167}  // namespace internal
5168}  // namespace v8
5169
5170#endif  // V8_TARGET_ARCH_RISCV64
5171