1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <limits.h>  // For LONG_MIN, LONG_MAX.
6
7#if V8_TARGET_ARCH_LOONG64
8
9#include "src/base/bits.h"
10#include "src/base/division-by-constant.h"
11#include "src/codegen/assembler-inl.h"
12#include "src/codegen/callable.h"
13#include "src/codegen/code-factory.h"
14#include "src/codegen/external-reference-table.h"
15#include "src/codegen/interface-descriptors-inl.h"
16#include "src/codegen/macro-assembler.h"
17#include "src/codegen/register-configuration.h"
18#include "src/debug/debug.h"
19#include "src/deoptimizer/deoptimizer.h"
20#include "src/execution/frames-inl.h"
21#include "src/heap/memory-chunk.h"
22#include "src/init/bootstrapper.h"
23#include "src/logging/counters.h"
24#include "src/objects/heap-number.h"
25#include "src/runtime/runtime.h"
26#include "src/snapshot/snapshot.h"
27
28#if V8_ENABLE_WEBASSEMBLY
29#include "src/wasm/wasm-code-manager.h"
30#endif  // V8_ENABLE_WEBASSEMBLY
31
32// Satisfy cpplint check, but don't include platform-specific header. It is
33// included recursively via macro-assembler.h.
34#if 0
35#include "src/codegen/loong64/macro-assembler-loong64.h"
36#endif
37
38namespace v8 {
39namespace internal {
40
41static inline bool IsZero(const Operand& rk) {
42  if (rk.is_reg()) {
43    return rk.rm() == zero_reg;
44  } else {
45    return rk.immediate() == 0;
46  }
47}
48
49int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
50                                                    Register exclusion1,
51                                                    Register exclusion2,
52                                                    Register exclusion3) const {
53  int bytes = 0;
54
55  RegList exclusions = {exclusion1, exclusion2, exclusion3};
56  RegList list = kJSCallerSaved - exclusions;
57  bytes += list.Count() * kPointerSize;
58
59  if (fp_mode == SaveFPRegsMode::kSave) {
60    bytes += kCallerSavedFPU.Count() * kDoubleSize;
61  }
62
63  return bytes;
64}
65
66int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
67                                    Register exclusion2, Register exclusion3) {
68  ASM_CODE_COMMENT(this);
69  int bytes = 0;
70
71  RegList exclusions = {exclusion1, exclusion2, exclusion3};
72  RegList list = kJSCallerSaved - exclusions;
73  MultiPush(list);
74  bytes += list.Count() * kPointerSize;
75
76  if (fp_mode == SaveFPRegsMode::kSave) {
77    MultiPushFPU(kCallerSavedFPU);
78    bytes += kCallerSavedFPU.Count() * kDoubleSize;
79  }
80
81  return bytes;
82}
83
84int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
85                                   Register exclusion2, Register exclusion3) {
86  ASM_CODE_COMMENT(this);
87  int bytes = 0;
88  if (fp_mode == SaveFPRegsMode::kSave) {
89    MultiPopFPU(kCallerSavedFPU);
90    bytes += kCallerSavedFPU.Count() * kDoubleSize;
91  }
92
93  RegList exclusions = {exclusion1, exclusion2, exclusion3};
94  RegList list = kJSCallerSaved - exclusions;
95  MultiPop(list);
96  bytes += list.Count() * kPointerSize;
97
98  return bytes;
99}
100
101void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
102  Ld_d(destination, MemOperand(s6, RootRegisterOffsetForRootIndex(index)));
103}
104
105void TurboAssembler::PushCommonFrame(Register marker_reg) {
106  if (marker_reg.is_valid()) {
107    Push(ra, fp, marker_reg);
108    Add_d(fp, sp, Operand(kPointerSize));
109  } else {
110    Push(ra, fp);
111    mov(fp, sp);
112  }
113}
114
115void TurboAssembler::PushStandardFrame(Register function_reg) {
116  int offset = -StandardFrameConstants::kContextOffset;
117  if (function_reg.is_valid()) {
118    Push(ra, fp, cp, function_reg, kJavaScriptCallArgCountRegister);
119    offset += 2 * kPointerSize;
120  } else {
121    Push(ra, fp, cp, kJavaScriptCallArgCountRegister);
122    offset += kPointerSize;
123  }
124  Add_d(fp, sp, Operand(offset));
125}
126
127// Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved)
128// The register 'object' contains a heap object pointer.  The heap object
129// tag is shifted away.
130void MacroAssembler::RecordWriteField(Register object, int offset,
131                                      Register value, RAStatus ra_status,
132                                      SaveFPRegsMode save_fp,
133                                      RememberedSetAction remembered_set_action,
134                                      SmiCheck smi_check) {
135  ASM_CODE_COMMENT(this);
136  // First, check if a write barrier is even needed. The tests below
137  // catch stores of Smis.
138  Label done;
139
140  // Skip barrier if writing a smi.
141  if (smi_check == SmiCheck::kInline) {
142    JumpIfSmi(value, &done);
143  }
144
145  // Although the object register is tagged, the offset is relative to the start
146  // of the object, so offset must be a multiple of kPointerSize.
147  DCHECK(IsAligned(offset, kPointerSize));
148
149  if (FLAG_debug_code) {
150    Label ok;
151    BlockTrampolinePoolScope block_trampoline_pool(this);
152    UseScratchRegisterScope temps(this);
153    Register scratch = temps.Acquire();
154    Add_d(scratch, object, offset - kHeapObjectTag);
155    And(scratch, scratch, Operand(kPointerSize - 1));
156    Branch(&ok, eq, scratch, Operand(zero_reg));
157    Abort(AbortReason::kUnalignedCellInWriteBarrier);
158    bind(&ok);
159  }
160
161  RecordWrite(object, Operand(offset - kHeapObjectTag), value, ra_status,
162              save_fp, remembered_set_action, SmiCheck::kOmit);
163
164  bind(&done);
165}
166
167void TurboAssembler::MaybeSaveRegisters(RegList registers) {
168  if (registers.is_empty()) return;
169  MultiPush(registers);
170}
171
172void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
173  if (registers.is_empty()) return;
174  MultiPop(registers);
175}
176
177void TurboAssembler::CallEphemeronKeyBarrier(Register object, Operand offset,
178                                             SaveFPRegsMode fp_mode) {
179  ASM_CODE_COMMENT(this);
180  RegList registers = WriteBarrierDescriptor::ComputeSavedRegisters(object);
181  MaybeSaveRegisters(registers);
182
183  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
184  Register slot_address_parameter =
185      WriteBarrierDescriptor::SlotAddressRegister();
186
187  MoveObjectAndSlot(object_parameter, slot_address_parameter, object, offset);
188
189  Call(isolate()->builtins()->code_handle(
190           Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
191       RelocInfo::CODE_TARGET);
192  MaybeRestoreRegisters(registers);
193}
194
195void TurboAssembler::CallRecordWriteStubSaveRegisters(
196    Register object, Operand offset, RememberedSetAction remembered_set_action,
197    SaveFPRegsMode fp_mode, StubCallMode mode) {
198  ASM_CODE_COMMENT(this);
199  RegList registers = WriteBarrierDescriptor::ComputeSavedRegisters(object);
200  MaybeSaveRegisters(registers);
201
202  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
203  Register slot_address_parameter =
204      WriteBarrierDescriptor::SlotAddressRegister();
205
206  MoveObjectAndSlot(object_parameter, slot_address_parameter, object, offset);
207
208  CallRecordWriteStub(object_parameter, slot_address_parameter,
209                      remembered_set_action, fp_mode, mode);
210
211  MaybeRestoreRegisters(registers);
212}
213
214void TurboAssembler::CallRecordWriteStub(
215    Register object, Register slot_address,
216    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
217    StubCallMode mode) {
218  // Use CallRecordWriteStubSaveRegisters if the object and slot registers
219  // need to be caller saved.
220  DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
221  DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
222#if V8_ENABLE_WEBASSEMBLY
223  if (mode == StubCallMode::kCallWasmRuntimeStub) {
224    auto wasm_target =
225        wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
226    Call(wasm_target, RelocInfo::WASM_STUB_CALL);
227#else
228  if (false) {
229#endif
230  } else {
231    auto builtin = Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
232    if (options().inline_offheap_trampolines) {
233      // Inline the trampoline.
234      RecordCommentForOffHeapTrampoline(builtin);
235      UseScratchRegisterScope temps(this);
236      Register scratch = temps.Acquire();
237      li(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
238      Call(scratch);
239      RecordComment("]");
240    } else {
241      Handle<Code> code_target = isolate()->builtins()->code_handle(builtin);
242      Call(code_target, RelocInfo::CODE_TARGET);
243    }
244  }
245}
246
247void TurboAssembler::MoveObjectAndSlot(Register dst_object, Register dst_slot,
248                                       Register object, Operand offset) {
249  ASM_CODE_COMMENT(this);
250  DCHECK_NE(dst_object, dst_slot);
251  // If `offset` is a register, it cannot overlap with `object`.
252  DCHECK_IMPLIES(!offset.IsImmediate(), offset.rm() != object);
253
254  // If the slot register does not overlap with the object register, we can
255  // overwrite it.
256  if (dst_slot != object) {
257    Add_d(dst_slot, object, offset);
258    mov(dst_object, object);
259    return;
260  }
261
262  DCHECK_EQ(dst_slot, object);
263
264  // If the destination object register does not overlap with the offset
265  // register, we can overwrite it.
266  if (offset.IsImmediate() || (offset.rm() != dst_object)) {
267    mov(dst_object, dst_slot);
268    Add_d(dst_slot, dst_slot, offset);
269    return;
270  }
271
272  DCHECK_EQ(dst_object, offset.rm());
273
274  // We only have `dst_slot` and `dst_object` left as distinct registers so we
275  // have to swap them. We write this as a add+sub sequence to avoid using a
276  // scratch register.
277  Add_d(dst_slot, dst_slot, dst_object);
278  Sub_d(dst_object, dst_slot, dst_object);
279}
280
281// If lr_status is kLRHasBeenSaved, lr will be clobbered.
282// TODO(LOONG_dev): LOONG64 Check this comment
283// Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved)
284// The register 'object' contains a heap object pointer.  The heap object
285// tag is shifted away.
286void MacroAssembler::RecordWrite(Register object, Operand offset,
287                                 Register value, RAStatus ra_status,
288                                 SaveFPRegsMode fp_mode,
289                                 RememberedSetAction remembered_set_action,
290                                 SmiCheck smi_check) {
291  DCHECK(!AreAliased(object, value));
292
293  if (FLAG_debug_code) {
294    UseScratchRegisterScope temps(this);
295    Register scratch = temps.Acquire();
296    Add_d(scratch, object, offset);
297    Ld_d(scratch, MemOperand(scratch, 0));
298    Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, scratch,
299           Operand(value));
300  }
301
302  if ((remembered_set_action == RememberedSetAction::kOmit &&
303       !FLAG_incremental_marking) ||
304      FLAG_disable_write_barriers) {
305    return;
306  }
307
308  // First, check if a write barrier is even needed. The tests below
309  // catch stores of smis and stores into the young generation.
310  Label done;
311
312  if (smi_check == SmiCheck::kInline) {
313    DCHECK_EQ(0, kSmiTag);
314    JumpIfSmi(value, &done);
315  }
316
317  CheckPageFlag(value, MemoryChunk::kPointersToHereAreInterestingMask, eq,
318                &done);
319
320  CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask, eq,
321                &done);
322
323  // Record the actual write.
324  if (ra_status == kRAHasNotBeenSaved) {
325    Push(ra);
326  }
327
328  Register slot_address = WriteBarrierDescriptor::SlotAddressRegister();
329  DCHECK(!AreAliased(object, slot_address, value));
330  DCHECK(offset.IsImmediate());
331  Add_d(slot_address, object, offset);
332  CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode);
333  if (ra_status == kRAHasNotBeenSaved) {
334    Pop(ra);
335  }
336
337  bind(&done);
338}
339
340// ---------------------------------------------------------------------------
341// Instruction macros.
342
343void TurboAssembler::Add_w(Register rd, Register rj, const Operand& rk) {
344  if (rk.is_reg()) {
345    add_w(rd, rj, rk.rm());
346  } else {
347    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
348      addi_w(rd, rj, static_cast<int32_t>(rk.immediate()));
349    } else {
350      // li handles the relocation.
351      UseScratchRegisterScope temps(this);
352      Register scratch = temps.Acquire();
353      DCHECK(rj != scratch);
354      li(scratch, rk);
355      add_w(rd, rj, scratch);
356    }
357  }
358}
359
360void TurboAssembler::Add_d(Register rd, Register rj, const Operand& rk) {
361  if (rk.is_reg()) {
362    add_d(rd, rj, rk.rm());
363  } else {
364    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
365      addi_d(rd, rj, static_cast<int32_t>(rk.immediate()));
366    } else {
367      // li handles the relocation.
368      UseScratchRegisterScope temps(this);
369      Register scratch = temps.Acquire();
370      DCHECK(rj != scratch);
371      li(scratch, rk);
372      add_d(rd, rj, scratch);
373    }
374  }
375}
376
377void TurboAssembler::Sub_w(Register rd, Register rj, const Operand& rk) {
378  if (rk.is_reg()) {
379    sub_w(rd, rj, rk.rm());
380  } else {
381    DCHECK(is_int32(rk.immediate()));
382    if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) {
383      // No subi_w instr, use addi_w(x, y, -imm).
384      addi_w(rd, rj, static_cast<int32_t>(-rk.immediate()));
385    } else {
386      UseScratchRegisterScope temps(this);
387      Register scratch = temps.Acquire();
388      DCHECK(rj != scratch);
389      if (-rk.immediate() >> 12 == 0 && !MustUseReg(rk.rmode())) {
390        // Use load -imm and addu when loading -imm generates one instruction.
391        li(scratch, -rk.immediate());
392        add_w(rd, rj, scratch);
393      } else {
394        // li handles the relocation.
395        li(scratch, rk);
396        sub_w(rd, rj, scratch);
397      }
398    }
399  }
400}
401
402void TurboAssembler::Sub_d(Register rd, Register rj, const Operand& rk) {
403  if (rk.is_reg()) {
404    sub_d(rd, rj, rk.rm());
405  } else if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) {
406    // No subi_d instr, use addi_d(x, y, -imm).
407    addi_d(rd, rj, static_cast<int32_t>(-rk.immediate()));
408  } else {
409    DCHECK(rj != t7);
410    int li_count = InstrCountForLi64Bit(rk.immediate());
411    int li_neg_count = InstrCountForLi64Bit(-rk.immediate());
412    if (li_neg_count < li_count && !MustUseReg(rk.rmode())) {
413      // Use load -imm and add_d when loading -imm generates one instruction.
414      DCHECK(rk.immediate() != std::numeric_limits<int32_t>::min());
415      UseScratchRegisterScope temps(this);
416      Register scratch = temps.Acquire();
417      li(scratch, Operand(-rk.immediate()));
418      add_d(rd, rj, scratch);
419    } else {
420      // li handles the relocation.
421      UseScratchRegisterScope temps(this);
422      Register scratch = temps.Acquire();
423      li(scratch, rk);
424      sub_d(rd, rj, scratch);
425    }
426  }
427}
428
429void TurboAssembler::Mul_w(Register rd, Register rj, const Operand& rk) {
430  if (rk.is_reg()) {
431    mul_w(rd, rj, rk.rm());
432  } else {
433    // li handles the relocation.
434    UseScratchRegisterScope temps(this);
435    Register scratch = temps.Acquire();
436    DCHECK(rj != scratch);
437    li(scratch, rk);
438    mul_w(rd, rj, scratch);
439  }
440}
441
442void TurboAssembler::Mulh_w(Register rd, Register rj, const Operand& rk) {
443  if (rk.is_reg()) {
444    mulh_w(rd, rj, rk.rm());
445  } else {
446    // li handles the relocation.
447    UseScratchRegisterScope temps(this);
448    Register scratch = temps.Acquire();
449    DCHECK(rj != scratch);
450    li(scratch, rk);
451    mulh_w(rd, rj, scratch);
452  }
453}
454
455void TurboAssembler::Mulh_wu(Register rd, Register rj, const Operand& rk) {
456  if (rk.is_reg()) {
457    mulh_wu(rd, rj, rk.rm());
458  } else {
459    // li handles the relocation.
460    UseScratchRegisterScope temps(this);
461    Register scratch = temps.Acquire();
462    DCHECK(rj != scratch);
463    li(scratch, rk);
464    mulh_wu(rd, rj, scratch);
465  }
466}
467
468void TurboAssembler::Mul_d(Register rd, Register rj, const Operand& rk) {
469  if (rk.is_reg()) {
470    mul_d(rd, rj, rk.rm());
471  } else {
472    // li handles the relocation.
473    UseScratchRegisterScope temps(this);
474    Register scratch = temps.Acquire();
475    DCHECK(rj != scratch);
476    li(scratch, rk);
477    mul_d(rd, rj, scratch);
478  }
479}
480
481void TurboAssembler::Mulh_d(Register rd, Register rj, const Operand& rk) {
482  if (rk.is_reg()) {
483    mulh_d(rd, rj, rk.rm());
484  } else {
485    // li handles the relocation.
486    UseScratchRegisterScope temps(this);
487    Register scratch = temps.Acquire();
488    DCHECK(rj != scratch);
489    li(scratch, rk);
490    mulh_d(rd, rj, scratch);
491  }
492}
493
494void TurboAssembler::Div_w(Register rd, Register rj, const Operand& rk) {
495  if (rk.is_reg()) {
496    div_w(rd, rj, rk.rm());
497  } else {
498    // li handles the relocation.
499    UseScratchRegisterScope temps(this);
500    Register scratch = temps.Acquire();
501    DCHECK(rj != scratch);
502    li(scratch, rk);
503    div_w(rd, rj, scratch);
504  }
505}
506
507void TurboAssembler::Mod_w(Register rd, Register rj, const Operand& rk) {
508  if (rk.is_reg()) {
509    mod_w(rd, rj, rk.rm());
510  } else {
511    // li handles the relocation.
512    UseScratchRegisterScope temps(this);
513    Register scratch = temps.Acquire();
514    DCHECK(rj != scratch);
515    li(scratch, rk);
516    mod_w(rd, rj, scratch);
517  }
518}
519
520void TurboAssembler::Mod_wu(Register rd, Register rj, const Operand& rk) {
521  if (rk.is_reg()) {
522    mod_wu(rd, rj, rk.rm());
523  } else {
524    // li handles the relocation.
525    UseScratchRegisterScope temps(this);
526    Register scratch = temps.Acquire();
527    DCHECK(rj != scratch);
528    li(scratch, rk);
529    mod_wu(rd, rj, scratch);
530  }
531}
532
533void TurboAssembler::Div_d(Register rd, Register rj, const Operand& rk) {
534  if (rk.is_reg()) {
535    div_d(rd, rj, rk.rm());
536  } else {
537    // li handles the relocation.
538    UseScratchRegisterScope temps(this);
539    Register scratch = temps.Acquire();
540    DCHECK(rj != scratch);
541    li(scratch, rk);
542    div_d(rd, rj, scratch);
543  }
544}
545
546void TurboAssembler::Div_wu(Register rd, Register rj, const Operand& rk) {
547  if (rk.is_reg()) {
548    div_wu(rd, rj, rk.rm());
549  } else {
550    // li handles the relocation.
551    UseScratchRegisterScope temps(this);
552    Register scratch = temps.Acquire();
553    DCHECK(rj != scratch);
554    li(scratch, rk);
555    div_wu(rd, rj, scratch);
556  }
557}
558
559void TurboAssembler::Div_du(Register rd, Register rj, const Operand& rk) {
560  if (rk.is_reg()) {
561    div_du(rd, rj, rk.rm());
562  } else {
563    // li handles the relocation.
564    UseScratchRegisterScope temps(this);
565    Register scratch = temps.Acquire();
566    DCHECK(rj != scratch);
567    li(scratch, rk);
568    div_du(rd, rj, scratch);
569  }
570}
571
572void TurboAssembler::Mod_d(Register rd, Register rj, const Operand& rk) {
573  if (rk.is_reg()) {
574    mod_d(rd, rj, rk.rm());
575  } else {
576    // li handles the relocation.
577    UseScratchRegisterScope temps(this);
578    Register scratch = temps.Acquire();
579    DCHECK(rj != scratch);
580    li(scratch, rk);
581    mod_d(rd, rj, scratch);
582  }
583}
584
585void TurboAssembler::Mod_du(Register rd, Register rj, const Operand& rk) {
586  if (rk.is_reg()) {
587    mod_du(rd, rj, rk.rm());
588  } else {
589    // li handles the relocation.
590    UseScratchRegisterScope temps(this);
591    Register scratch = temps.Acquire();
592    DCHECK(rj != scratch);
593    li(scratch, rk);
594    mod_du(rd, rj, scratch);
595  }
596}
597
598void TurboAssembler::And(Register rd, Register rj, const Operand& rk) {
599  if (rk.is_reg()) {
600    and_(rd, rj, rk.rm());
601  } else {
602    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
603      andi(rd, rj, static_cast<int32_t>(rk.immediate()));
604    } else {
605      // li handles the relocation.
606      UseScratchRegisterScope temps(this);
607      Register scratch = temps.Acquire();
608      DCHECK(rj != scratch);
609      li(scratch, rk);
610      and_(rd, rj, scratch);
611    }
612  }
613}
614
615void TurboAssembler::Or(Register rd, Register rj, const Operand& rk) {
616  if (rk.is_reg()) {
617    or_(rd, rj, rk.rm());
618  } else {
619    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
620      ori(rd, rj, static_cast<int32_t>(rk.immediate()));
621    } else {
622      // li handles the relocation.
623      UseScratchRegisterScope temps(this);
624      Register scratch = temps.Acquire();
625      DCHECK(rj != scratch);
626      li(scratch, rk);
627      or_(rd, rj, scratch);
628    }
629  }
630}
631
632void TurboAssembler::Xor(Register rd, Register rj, const Operand& rk) {
633  if (rk.is_reg()) {
634    xor_(rd, rj, rk.rm());
635  } else {
636    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
637      xori(rd, rj, static_cast<int32_t>(rk.immediate()));
638    } else {
639      // li handles the relocation.
640      UseScratchRegisterScope temps(this);
641      Register scratch = temps.Acquire();
642      DCHECK(rj != scratch);
643      li(scratch, rk);
644      xor_(rd, rj, scratch);
645    }
646  }
647}
648
649void TurboAssembler::Nor(Register rd, Register rj, const Operand& rk) {
650  if (rk.is_reg()) {
651    nor(rd, rj, rk.rm());
652  } else {
653    // li handles the relocation.
654    UseScratchRegisterScope temps(this);
655    Register scratch = temps.Acquire();
656    DCHECK(rj != scratch);
657    li(scratch, rk);
658    nor(rd, rj, scratch);
659  }
660}
661
662void TurboAssembler::Andn(Register rd, Register rj, const Operand& rk) {
663  if (rk.is_reg()) {
664    andn(rd, rj, rk.rm());
665  } else {
666    // li handles the relocation.
667    UseScratchRegisterScope temps(this);
668    Register scratch = temps.Acquire();
669    DCHECK(rj != scratch);
670    li(scratch, rk);
671    andn(rd, rj, scratch);
672  }
673}
674
675void TurboAssembler::Orn(Register rd, Register rj, const Operand& rk) {
676  if (rk.is_reg()) {
677    orn(rd, rj, rk.rm());
678  } else {
679    // li handles the relocation.
680    UseScratchRegisterScope temps(this);
681    Register scratch = temps.Acquire();
682    DCHECK(rj != scratch);
683    li(scratch, rk);
684    orn(rd, rj, scratch);
685  }
686}
687
688void TurboAssembler::Neg(Register rj, const Operand& rk) {
689  DCHECK(rk.is_reg());
690  sub_d(rj, zero_reg, rk.rm());
691}
692
693void TurboAssembler::Slt(Register rd, Register rj, const Operand& rk) {
694  if (rk.is_reg()) {
695    slt(rd, rj, rk.rm());
696  } else {
697    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
698      slti(rd, rj, static_cast<int32_t>(rk.immediate()));
699    } else {
700      // li handles the relocation.
701      UseScratchRegisterScope temps(this);
702      BlockTrampolinePoolScope block_trampoline_pool(this);
703      Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
704      DCHECK(rj != scratch);
705      li(scratch, rk);
706      slt(rd, rj, scratch);
707    }
708  }
709}
710
711void TurboAssembler::Sltu(Register rd, Register rj, const Operand& rk) {
712  if (rk.is_reg()) {
713    sltu(rd, rj, rk.rm());
714  } else {
715    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
716      sltui(rd, rj, static_cast<int32_t>(rk.immediate()));
717    } else {
718      // li handles the relocation.
719      UseScratchRegisterScope temps(this);
720      BlockTrampolinePoolScope block_trampoline_pool(this);
721      Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
722      DCHECK(rj != scratch);
723      li(scratch, rk);
724      sltu(rd, rj, scratch);
725    }
726  }
727}
728
729void TurboAssembler::Sle(Register rd, Register rj, const Operand& rk) {
730  if (rk.is_reg()) {
731    slt(rd, rk.rm(), rj);
732  } else {
733    // li handles the relocation.
734    UseScratchRegisterScope temps(this);
735    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
736    BlockTrampolinePoolScope block_trampoline_pool(this);
737    DCHECK(rj != scratch);
738    li(scratch, rk);
739    slt(rd, scratch, rj);
740  }
741  xori(rd, rd, 1);
742}
743
744void TurboAssembler::Sleu(Register rd, Register rj, const Operand& rk) {
745  if (rk.is_reg()) {
746    sltu(rd, rk.rm(), rj);
747  } else {
748    // li handles the relocation.
749    UseScratchRegisterScope temps(this);
750    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
751    BlockTrampolinePoolScope block_trampoline_pool(this);
752    DCHECK(rj != scratch);
753    li(scratch, rk);
754    sltu(rd, scratch, rj);
755  }
756  xori(rd, rd, 1);
757}
758
759void TurboAssembler::Sge(Register rd, Register rj, const Operand& rk) {
760  Slt(rd, rj, rk);
761  xori(rd, rd, 1);
762}
763
764void TurboAssembler::Sgeu(Register rd, Register rj, const Operand& rk) {
765  Sltu(rd, rj, rk);
766  xori(rd, rd, 1);
767}
768
769void TurboAssembler::Sgt(Register rd, Register rj, const Operand& rk) {
770  if (rk.is_reg()) {
771    slt(rd, rk.rm(), rj);
772  } else {
773    // li handles the relocation.
774    UseScratchRegisterScope temps(this);
775    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
776    BlockTrampolinePoolScope block_trampoline_pool(this);
777    DCHECK(rj != scratch);
778    li(scratch, rk);
779    slt(rd, scratch, rj);
780  }
781}
782
783void TurboAssembler::Sgtu(Register rd, Register rj, const Operand& rk) {
784  if (rk.is_reg()) {
785    sltu(rd, rk.rm(), rj);
786  } else {
787    // li handles the relocation.
788    UseScratchRegisterScope temps(this);
789    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
790    BlockTrampolinePoolScope block_trampoline_pool(this);
791    DCHECK(rj != scratch);
792    li(scratch, rk);
793    sltu(rd, scratch, rj);
794  }
795}
796
797void TurboAssembler::Rotr_w(Register rd, Register rj, const Operand& rk) {
798  if (rk.is_reg()) {
799    rotr_w(rd, rj, rk.rm());
800  } else {
801    int64_t ror_value = rk.immediate() % 32;
802    if (ror_value < 0) {
803      ror_value += 32;
804    }
805    rotri_w(rd, rj, ror_value);
806  }
807}
808
809void TurboAssembler::Rotr_d(Register rd, Register rj, const Operand& rk) {
810  if (rk.is_reg()) {
811    rotr_d(rd, rj, rk.rm());
812  } else {
813    int64_t dror_value = rk.immediate() % 64;
814    if (dror_value < 0) dror_value += 64;
815    rotri_d(rd, rj, dror_value);
816  }
817}
818
819void TurboAssembler::Alsl_w(Register rd, Register rj, Register rk, uint8_t sa,
820                            Register scratch) {
821  DCHECK(sa >= 1 && sa <= 31);
822  if (sa <= 4) {
823    alsl_w(rd, rj, rk, sa);
824  } else {
825    Register tmp = rd == rk ? scratch : rd;
826    DCHECK(tmp != rk);
827    slli_w(tmp, rj, sa);
828    add_w(rd, rk, tmp);
829  }
830}
831
832void TurboAssembler::Alsl_d(Register rd, Register rj, Register rk, uint8_t sa,
833                            Register scratch) {
834  DCHECK(sa >= 1 && sa <= 63);
835  if (sa <= 4) {
836    alsl_d(rd, rj, rk, sa);
837  } else {
838    Register tmp = rd == rk ? scratch : rd;
839    DCHECK(tmp != rk);
840    slli_d(tmp, rj, sa);
841    add_d(rd, rk, tmp);
842  }
843}
844
845// ------------Pseudo-instructions-------------
846
847// Change endianness
848void TurboAssembler::ByteSwapSigned(Register dest, Register src,
849                                    int operand_size) {
850  DCHECK(operand_size == 2 || operand_size == 4 || operand_size == 8);
851  if (operand_size == 2) {
852    revb_2h(dest, src);
853    ext_w_h(dest, dest);
854  } else if (operand_size == 4) {
855    revb_2w(dest, src);
856    slli_w(dest, dest, 0);
857  } else {
858    revb_d(dest, dest);
859  }
860}
861
862void TurboAssembler::ByteSwapUnsigned(Register dest, Register src,
863                                      int operand_size) {
864  DCHECK(operand_size == 2 || operand_size == 4);
865  if (operand_size == 2) {
866    revb_2h(dest, src);
867    bstrins_d(dest, zero_reg, 63, 16);
868  } else {
869    revb_2w(dest, src);
870    bstrins_d(dest, zero_reg, 63, 32);
871  }
872}
873
874void TurboAssembler::Ld_b(Register rd, const MemOperand& rj) {
875  MemOperand source = rj;
876  AdjustBaseAndOffset(&source);
877  if (source.hasIndexReg()) {
878    ldx_b(rd, source.base(), source.index());
879  } else {
880    ld_b(rd, source.base(), source.offset());
881  }
882}
883
884void TurboAssembler::Ld_bu(Register rd, const MemOperand& rj) {
885  MemOperand source = rj;
886  AdjustBaseAndOffset(&source);
887  if (source.hasIndexReg()) {
888    ldx_bu(rd, source.base(), source.index());
889  } else {
890    ld_bu(rd, source.base(), source.offset());
891  }
892}
893
894void TurboAssembler::St_b(Register rd, const MemOperand& rj) {
895  MemOperand source = rj;
896  AdjustBaseAndOffset(&source);
897  if (source.hasIndexReg()) {
898    stx_b(rd, source.base(), source.index());
899  } else {
900    st_b(rd, source.base(), source.offset());
901  }
902}
903
904void TurboAssembler::Ld_h(Register rd, const MemOperand& rj) {
905  MemOperand source = rj;
906  AdjustBaseAndOffset(&source);
907  if (source.hasIndexReg()) {
908    ldx_h(rd, source.base(), source.index());
909  } else {
910    ld_h(rd, source.base(), source.offset());
911  }
912}
913
914void TurboAssembler::Ld_hu(Register rd, const MemOperand& rj) {
915  MemOperand source = rj;
916  AdjustBaseAndOffset(&source);
917  if (source.hasIndexReg()) {
918    ldx_hu(rd, source.base(), source.index());
919  } else {
920    ld_hu(rd, source.base(), source.offset());
921  }
922}
923
924void TurboAssembler::St_h(Register rd, const MemOperand& rj) {
925  MemOperand source = rj;
926  AdjustBaseAndOffset(&source);
927  if (source.hasIndexReg()) {
928    stx_h(rd, source.base(), source.index());
929  } else {
930    st_h(rd, source.base(), source.offset());
931  }
932}
933
934void TurboAssembler::Ld_w(Register rd, const MemOperand& rj) {
935  MemOperand source = rj;
936
937  if (!(source.hasIndexReg()) && is_int16(source.offset()) &&
938      (source.offset() & 0b11) == 0) {
939    ldptr_w(rd, source.base(), source.offset());
940    return;
941  }
942
943  AdjustBaseAndOffset(&source);
944  if (source.hasIndexReg()) {
945    ldx_w(rd, source.base(), source.index());
946  } else {
947    ld_w(rd, source.base(), source.offset());
948  }
949}
950
951void TurboAssembler::Ld_wu(Register rd, const MemOperand& rj) {
952  MemOperand source = rj;
953  AdjustBaseAndOffset(&source);
954  if (source.hasIndexReg()) {
955    ldx_wu(rd, source.base(), source.index());
956  } else {
957    ld_wu(rd, source.base(), source.offset());
958  }
959}
960
961void TurboAssembler::St_w(Register rd, const MemOperand& rj) {
962  MemOperand source = rj;
963
964  if (!(source.hasIndexReg()) && is_int16(source.offset()) &&
965      (source.offset() & 0b11) == 0) {
966    stptr_w(rd, source.base(), source.offset());
967    return;
968  }
969
970  AdjustBaseAndOffset(&source);
971  if (source.hasIndexReg()) {
972    stx_w(rd, source.base(), source.index());
973  } else {
974    st_w(rd, source.base(), source.offset());
975  }
976}
977
978void TurboAssembler::Ld_d(Register rd, const MemOperand& rj) {
979  MemOperand source = rj;
980
981  if (!(source.hasIndexReg()) && is_int16(source.offset()) &&
982      (source.offset() & 0b11) == 0) {
983    ldptr_d(rd, source.base(), source.offset());
984    return;
985  }
986
987  AdjustBaseAndOffset(&source);
988  if (source.hasIndexReg()) {
989    ldx_d(rd, source.base(), source.index());
990  } else {
991    ld_d(rd, source.base(), source.offset());
992  }
993}
994
995void TurboAssembler::St_d(Register rd, const MemOperand& rj) {
996  MemOperand source = rj;
997
998  if (!(source.hasIndexReg()) && is_int16(source.offset()) &&
999      (source.offset() & 0b11) == 0) {
1000    stptr_d(rd, source.base(), source.offset());
1001    return;
1002  }
1003
1004  AdjustBaseAndOffset(&source);
1005  if (source.hasIndexReg()) {
1006    stx_d(rd, source.base(), source.index());
1007  } else {
1008    st_d(rd, source.base(), source.offset());
1009  }
1010}
1011
1012void TurboAssembler::Fld_s(FPURegister fd, const MemOperand& src) {
1013  MemOperand tmp = src;
1014  AdjustBaseAndOffset(&tmp);
1015  if (tmp.hasIndexReg()) {
1016    fldx_s(fd, tmp.base(), tmp.index());
1017  } else {
1018    fld_s(fd, tmp.base(), tmp.offset());
1019  }
1020}
1021
1022void TurboAssembler::Fst_s(FPURegister fs, const MemOperand& src) {
1023  MemOperand tmp = src;
1024  AdjustBaseAndOffset(&tmp);
1025  if (tmp.hasIndexReg()) {
1026    fstx_s(fs, tmp.base(), tmp.index());
1027  } else {
1028    fst_s(fs, tmp.base(), tmp.offset());
1029  }
1030}
1031
1032void TurboAssembler::Fld_d(FPURegister fd, const MemOperand& src) {
1033  MemOperand tmp = src;
1034  AdjustBaseAndOffset(&tmp);
1035  if (tmp.hasIndexReg()) {
1036    fldx_d(fd, tmp.base(), tmp.index());
1037  } else {
1038    fld_d(fd, tmp.base(), tmp.offset());
1039  }
1040}
1041
1042void TurboAssembler::Fst_d(FPURegister fs, const MemOperand& src) {
1043  MemOperand tmp = src;
1044  AdjustBaseAndOffset(&tmp);
1045  if (tmp.hasIndexReg()) {
1046    fstx_d(fs, tmp.base(), tmp.index());
1047  } else {
1048    fst_d(fs, tmp.base(), tmp.offset());
1049  }
1050}
1051
1052void TurboAssembler::Ll_w(Register rd, const MemOperand& rj) {
1053  DCHECK(!rj.hasIndexReg());
1054  bool is_one_instruction = is_int14(rj.offset());
1055  if (is_one_instruction) {
1056    ll_w(rd, rj.base(), rj.offset());
1057  } else {
1058    UseScratchRegisterScope temps(this);
1059    Register scratch = temps.Acquire();
1060    li(scratch, rj.offset());
1061    add_d(scratch, scratch, rj.base());
1062    ll_w(rd, scratch, 0);
1063  }
1064}
1065
1066void TurboAssembler::Ll_d(Register rd, const MemOperand& rj) {
1067  DCHECK(!rj.hasIndexReg());
1068  bool is_one_instruction = is_int14(rj.offset());
1069  if (is_one_instruction) {
1070    ll_d(rd, rj.base(), rj.offset());
1071  } else {
1072    UseScratchRegisterScope temps(this);
1073    Register scratch = temps.Acquire();
1074    li(scratch, rj.offset());
1075    add_d(scratch, scratch, rj.base());
1076    ll_d(rd, scratch, 0);
1077  }
1078}
1079
1080void TurboAssembler::Sc_w(Register rd, const MemOperand& rj) {
1081  DCHECK(!rj.hasIndexReg());
1082  bool is_one_instruction = is_int14(rj.offset());
1083  if (is_one_instruction) {
1084    sc_w(rd, rj.base(), rj.offset());
1085  } else {
1086    UseScratchRegisterScope temps(this);
1087    Register scratch = temps.Acquire();
1088    li(scratch, rj.offset());
1089    add_d(scratch, scratch, rj.base());
1090    sc_w(rd, scratch, 0);
1091  }
1092}
1093
1094void TurboAssembler::Sc_d(Register rd, const MemOperand& rj) {
1095  DCHECK(!rj.hasIndexReg());
1096  bool is_one_instruction = is_int14(rj.offset());
1097  if (is_one_instruction) {
1098    sc_d(rd, rj.base(), rj.offset());
1099  } else {
1100    UseScratchRegisterScope temps(this);
1101    Register scratch = temps.Acquire();
1102    li(scratch, rj.offset());
1103    add_d(scratch, scratch, rj.base());
1104    sc_d(rd, scratch, 0);
1105  }
1106}
1107
1108void TurboAssembler::li(Register dst, Handle<HeapObject> value, LiFlags mode) {
1109  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1110  // non-isolate-independent code. In many cases it might be cheaper than
1111  // embedding the relocatable value.
1112  if (root_array_available_ && options().isolate_independent_code) {
1113    IndirectLoadConstant(dst, value);
1114    return;
1115  }
1116  li(dst, Operand(value), mode);
1117}
1118
1119void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) {
1120  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1121  // non-isolate-independent code. In many cases it might be cheaper than
1122  // embedding the relocatable value.
1123  if (root_array_available_ && options().isolate_independent_code) {
1124    IndirectLoadExternalReference(dst, value);
1125    return;
1126  }
1127  li(dst, Operand(value), mode);
1128}
1129
1130void TurboAssembler::li(Register dst, const StringConstantBase* string,
1131                        LiFlags mode) {
1132  li(dst, Operand::EmbeddedStringConstant(string), mode);
1133}
1134
1135static inline int InstrCountForLiLower32Bit(int64_t value) {
1136  if (is_int12(static_cast<int32_t>(value)) ||
1137      is_uint12(static_cast<int32_t>(value)) || !(value & kImm12Mask)) {
1138    return 1;
1139  } else {
1140    return 2;
1141  }
1142}
1143
1144void TurboAssembler::LiLower32BitHelper(Register rd, Operand j) {
1145  if (is_int12(static_cast<int32_t>(j.immediate()))) {
1146    addi_d(rd, zero_reg, j.immediate());
1147  } else if (is_uint12(static_cast<int32_t>(j.immediate()))) {
1148    ori(rd, zero_reg, j.immediate() & kImm12Mask);
1149  } else {
1150    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
1151    if (j.immediate() & kImm12Mask) {
1152      ori(rd, rd, j.immediate() & kImm12Mask);
1153    }
1154  }
1155}
1156
1157int TurboAssembler::InstrCountForLi64Bit(int64_t value) {
1158  if (is_int32(value)) {
1159    return InstrCountForLiLower32Bit(value);
1160  } else if (is_int52(value)) {
1161    return InstrCountForLiLower32Bit(value) + 1;
1162  } else if ((value & 0xffffffffL) == 0) {
1163    // 32 LSBs (Least Significant Bits) all set to zero.
1164    uint8_t tzc = base::bits::CountTrailingZeros32(value >> 32);
1165    uint8_t lzc = base::bits::CountLeadingZeros32(value >> 32);
1166    if (tzc >= 20) {
1167      return 1;
1168    } else if (tzc + lzc > 12) {
1169      return 2;
1170    } else {
1171      return 3;
1172    }
1173  } else {
1174    int64_t imm21 = (value >> 31) & 0x1fffffL;
1175    if (imm21 != 0x1fffffL && imm21 != 0) {
1176      return InstrCountForLiLower32Bit(value) + 2;
1177    } else {
1178      return InstrCountForLiLower32Bit(value) + 1;
1179    }
1180  }
1181  UNREACHABLE();
1182  return INT_MAX;
1183}
1184
1185// All changes to if...else conditions here must be added to
1186// InstrCountForLi64Bit as well.
1187void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
1188  DCHECK(!j.is_reg());
1189  DCHECK(!MustUseReg(j.rmode()));
1190  DCHECK(mode == OPTIMIZE_SIZE);
1191  int64_t imm = j.immediate();
1192  BlockTrampolinePoolScope block_trampoline_pool(this);
1193  // Normal load of an immediate value which does not need Relocation Info.
1194  if (is_int32(imm)) {
1195    LiLower32BitHelper(rd, j);
1196  } else if (is_int52(imm)) {
1197    LiLower32BitHelper(rd, j);
1198    lu32i_d(rd, imm >> 32 & 0xfffff);
1199  } else if ((imm & 0xffffffffL) == 0) {
1200    // 32 LSBs (Least Significant Bits) all set to zero.
1201    uint8_t tzc = base::bits::CountTrailingZeros32(imm >> 32);
1202    uint8_t lzc = base::bits::CountLeadingZeros32(imm >> 32);
1203    if (tzc >= 20) {
1204      lu52i_d(rd, zero_reg, imm >> 52 & kImm12Mask);
1205    } else if (tzc + lzc > 12) {
1206      int32_t mask = (1 << (32 - tzc)) - 1;
1207      lu12i_w(rd, imm >> (tzc + 32) & mask);
1208      slli_d(rd, rd, tzc + 20);
1209    } else {
1210      xor_(rd, rd, rd);
1211      lu32i_d(rd, imm >> 32 & 0xfffff);
1212      lu52i_d(rd, rd, imm >> 52 & kImm12Mask);
1213    }
1214  } else {
1215    int64_t imm21 = (imm >> 31) & 0x1fffffL;
1216    LiLower32BitHelper(rd, j);
1217    if (imm21 != 0x1fffffL && imm21 != 0) lu32i_d(rd, imm >> 32 & 0xfffff);
1218    lu52i_d(rd, rd, imm >> 52 & kImm12Mask);
1219  }
1220}
1221
1222void TurboAssembler::li(Register rd, Operand j, LiFlags mode) {
1223  DCHECK(!j.is_reg());
1224  BlockTrampolinePoolScope block_trampoline_pool(this);
1225  if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) {
1226    li_optimized(rd, j, mode);
1227  } else if (MustUseReg(j.rmode())) {
1228    int64_t immediate;
1229    if (j.IsHeapObjectRequest()) {
1230      RequestHeapObject(j.heap_object_request());
1231      immediate = 0;
1232    } else {
1233      immediate = j.immediate();
1234    }
1235
1236    RecordRelocInfo(j.rmode(), immediate);
1237    lu12i_w(rd, immediate >> 12 & 0xfffff);
1238    ori(rd, rd, immediate & kImm12Mask);
1239    lu32i_d(rd, immediate >> 32 & 0xfffff);
1240  } else if (mode == ADDRESS_LOAD) {
1241    // We always need the same number of instructions as we may need to patch
1242    // this code to load another value which may need all 3 instructions.
1243    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
1244    ori(rd, rd, j.immediate() & kImm12Mask);
1245    lu32i_d(rd, j.immediate() >> 32 & 0xfffff);
1246  } else {  // mode == CONSTANT_SIZE - always emit the same instruction
1247            // sequence.
1248    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
1249    ori(rd, rd, j.immediate() & kImm12Mask);
1250    lu32i_d(rd, j.immediate() >> 32 & 0xfffff);
1251    lu52i_d(rd, rd, j.immediate() >> 52 & kImm12Mask);
1252  }
1253}
1254
1255void TurboAssembler::MultiPush(RegList regs) {
1256  int16_t stack_offset = 0;
1257
1258  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1259    if ((regs.bits() & (1 << i)) != 0) {
1260      stack_offset -= kPointerSize;
1261      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1262    }
1263  }
1264  addi_d(sp, sp, stack_offset);
1265}
1266
1267void TurboAssembler::MultiPush(RegList regs1, RegList regs2) {
1268  DCHECK((regs1 & regs2).is_empty());
1269  int16_t stack_offset = 0;
1270
1271  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1272    if ((regs1.bits() & (1 << i)) != 0) {
1273      stack_offset -= kPointerSize;
1274      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1275    }
1276  }
1277  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1278    if ((regs2.bits() & (1 << i)) != 0) {
1279      stack_offset -= kPointerSize;
1280      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1281    }
1282  }
1283  addi_d(sp, sp, stack_offset);
1284}
1285
1286void TurboAssembler::MultiPush(RegList regs1, RegList regs2, RegList regs3) {
1287  DCHECK((regs1 & regs2).is_empty());
1288  DCHECK((regs1 & regs3).is_empty());
1289  DCHECK((regs2 & regs3).is_empty());
1290  int16_t stack_offset = 0;
1291
1292  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1293    if ((regs1.bits() & (1 << i)) != 0) {
1294      stack_offset -= kPointerSize;
1295      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1296    }
1297  }
1298  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1299    if ((regs2.bits() & (1 << i)) != 0) {
1300      stack_offset -= kPointerSize;
1301      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1302    }
1303  }
1304  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1305    if ((regs3.bits() & (1 << i)) != 0) {
1306      stack_offset -= kPointerSize;
1307      St_d(ToRegister(i), MemOperand(sp, stack_offset));
1308    }
1309  }
1310  addi_d(sp, sp, stack_offset);
1311}
1312
1313void TurboAssembler::MultiPop(RegList regs) {
1314  int16_t stack_offset = 0;
1315
1316  for (int16_t i = 0; i < kNumRegisters; i++) {
1317    if ((regs.bits() & (1 << i)) != 0) {
1318      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1319      stack_offset += kPointerSize;
1320    }
1321  }
1322  addi_d(sp, sp, stack_offset);
1323}
1324
1325void TurboAssembler::MultiPop(RegList regs1, RegList regs2) {
1326  DCHECK((regs1 & regs2).is_empty());
1327  int16_t stack_offset = 0;
1328
1329  for (int16_t i = 0; i < kNumRegisters; i++) {
1330    if ((regs2.bits() & (1 << i)) != 0) {
1331      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1332      stack_offset += kPointerSize;
1333    }
1334  }
1335  for (int16_t i = 0; i < kNumRegisters; i++) {
1336    if ((regs1.bits() & (1 << i)) != 0) {
1337      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1338      stack_offset += kPointerSize;
1339    }
1340  }
1341  addi_d(sp, sp, stack_offset);
1342}
1343
1344void TurboAssembler::MultiPop(RegList regs1, RegList regs2, RegList regs3) {
1345  DCHECK((regs1 & regs2).is_empty());
1346  DCHECK((regs1 & regs3).is_empty());
1347  DCHECK((regs2 & regs3).is_empty());
1348  int16_t stack_offset = 0;
1349
1350  for (int16_t i = 0; i < kNumRegisters; i++) {
1351    if ((regs3.bits() & (1 << i)) != 0) {
1352      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1353      stack_offset += kPointerSize;
1354    }
1355  }
1356  for (int16_t i = 0; i < kNumRegisters; i++) {
1357    if ((regs2.bits() & (1 << i)) != 0) {
1358      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1359      stack_offset += kPointerSize;
1360    }
1361  }
1362  for (int16_t i = 0; i < kNumRegisters; i++) {
1363    if ((regs1.bits() & (1 << i)) != 0) {
1364      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
1365      stack_offset += kPointerSize;
1366    }
1367  }
1368  addi_d(sp, sp, stack_offset);
1369}
1370
1371void TurboAssembler::MultiPushFPU(DoubleRegList regs) {
1372  int16_t num_to_push = regs.Count();
1373  int16_t stack_offset = num_to_push * kDoubleSize;
1374
1375  Sub_d(sp, sp, Operand(stack_offset));
1376  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1377    if ((regs.bits() & (1 << i)) != 0) {
1378      stack_offset -= kDoubleSize;
1379      Fst_d(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1380    }
1381  }
1382}
1383
1384void TurboAssembler::MultiPopFPU(DoubleRegList regs) {
1385  int16_t stack_offset = 0;
1386
1387  for (int16_t i = 0; i < kNumRegisters; i++) {
1388    if ((regs.bits() & (1 << i)) != 0) {
1389      Fld_d(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1390      stack_offset += kDoubleSize;
1391    }
1392  }
1393  addi_d(sp, sp, stack_offset);
1394}
1395
1396void TurboAssembler::Bstrpick_w(Register rk, Register rj, uint16_t msbw,
1397                                uint16_t lsbw) {
1398  DCHECK_LT(lsbw, msbw);
1399  DCHECK_LT(lsbw, 32);
1400  DCHECK_LT(msbw, 32);
1401  bstrpick_w(rk, rj, msbw, lsbw);
1402}
1403
1404void TurboAssembler::Bstrpick_d(Register rk, Register rj, uint16_t msbw,
1405                                uint16_t lsbw) {
1406  DCHECK_LT(lsbw, msbw);
1407  DCHECK_LT(lsbw, 64);
1408  DCHECK_LT(msbw, 64);
1409  bstrpick_d(rk, rj, msbw, lsbw);
1410}
1411
1412void TurboAssembler::Neg_s(FPURegister fd, FPURegister fj) { fneg_s(fd, fj); }
1413
1414void TurboAssembler::Neg_d(FPURegister fd, FPURegister fj) { fneg_d(fd, fj); }
1415
1416void TurboAssembler::Ffint_d_uw(FPURegister fd, FPURegister fj) {
1417  BlockTrampolinePoolScope block_trampoline_pool(this);
1418  movfr2gr_s(t8, fj);
1419  Ffint_d_uw(fd, t8);
1420}
1421
1422void TurboAssembler::Ffint_d_uw(FPURegister fd, Register rj) {
1423  BlockTrampolinePoolScope block_trampoline_pool(this);
1424  DCHECK(rj != t7);
1425
1426  Bstrpick_d(t7, rj, 31, 0);
1427  movgr2fr_d(fd, t7);
1428  ffint_d_l(fd, fd);
1429}
1430
1431void TurboAssembler::Ffint_d_ul(FPURegister fd, FPURegister fj) {
1432  BlockTrampolinePoolScope block_trampoline_pool(this);
1433  movfr2gr_d(t8, fj);
1434  Ffint_d_ul(fd, t8);
1435}
1436
1437void TurboAssembler::Ffint_d_ul(FPURegister fd, Register rj) {
1438  BlockTrampolinePoolScope block_trampoline_pool(this);
1439  DCHECK(rj != t7);
1440
1441  Label msb_clear, conversion_done;
1442
1443  Branch(&msb_clear, ge, rj, Operand(zero_reg));
1444
1445  // Rj >= 2^63
1446  andi(t7, rj, 1);
1447  srli_d(rj, rj, 1);
1448  or_(t7, t7, rj);
1449  movgr2fr_d(fd, t7);
1450  ffint_d_l(fd, fd);
1451  fadd_d(fd, fd, fd);
1452  Branch(&conversion_done);
1453
1454  bind(&msb_clear);
1455  // Rs < 2^63, we can do simple conversion.
1456  movgr2fr_d(fd, rj);
1457  ffint_d_l(fd, fd);
1458
1459  bind(&conversion_done);
1460}
1461
1462void TurboAssembler::Ffint_s_uw(FPURegister fd, FPURegister fj) {
1463  BlockTrampolinePoolScope block_trampoline_pool(this);
1464  movfr2gr_d(t8, fj);
1465  Ffint_s_uw(fd, t8);
1466}
1467
1468void TurboAssembler::Ffint_s_uw(FPURegister fd, Register rj) {
1469  BlockTrampolinePoolScope block_trampoline_pool(this);
1470  DCHECK(rj != t7);
1471
1472  bstrpick_d(t7, rj, 31, 0);
1473  movgr2fr_d(fd, t7);
1474  ffint_s_l(fd, fd);
1475}
1476
1477void TurboAssembler::Ffint_s_ul(FPURegister fd, FPURegister fj) {
1478  BlockTrampolinePoolScope block_trampoline_pool(this);
1479  movfr2gr_d(t8, fj);
1480  Ffint_s_ul(fd, t8);
1481}
1482
1483void TurboAssembler::Ffint_s_ul(FPURegister fd, Register rj) {
1484  BlockTrampolinePoolScope block_trampoline_pool(this);
1485  DCHECK(rj != t7);
1486
1487  Label positive, conversion_done;
1488
1489  Branch(&positive, ge, rj, Operand(zero_reg));
1490
1491  // Rs >= 2^31.
1492  andi(t7, rj, 1);
1493  srli_d(rj, rj, 1);
1494  or_(t7, t7, rj);
1495  movgr2fr_d(fd, t7);
1496  ffint_s_l(fd, fd);
1497  fadd_s(fd, fd, fd);
1498  Branch(&conversion_done);
1499
1500  bind(&positive);
1501  // Rs < 2^31, we can do simple conversion.
1502  movgr2fr_d(fd, rj);
1503  ffint_s_l(fd, fd);
1504
1505  bind(&conversion_done);
1506}
1507
1508void MacroAssembler::Ftintrne_l_d(FPURegister fd, FPURegister fj) {
1509  ftintrne_l_d(fd, fj);
1510}
1511
1512void MacroAssembler::Ftintrm_l_d(FPURegister fd, FPURegister fj) {
1513  ftintrm_l_d(fd, fj);
1514}
1515
1516void MacroAssembler::Ftintrp_l_d(FPURegister fd, FPURegister fj) {
1517  ftintrp_l_d(fd, fj);
1518}
1519
1520void MacroAssembler::Ftintrz_l_d(FPURegister fd, FPURegister fj) {
1521  ftintrz_l_d(fd, fj);
1522}
1523
1524void MacroAssembler::Ftintrz_l_ud(FPURegister fd, FPURegister fj,
1525                                  FPURegister scratch) {
1526  BlockTrampolinePoolScope block_trampoline_pool(this);
1527  // Load to GPR.
1528  movfr2gr_d(t8, fj);
1529  // Reset sign bit.
1530  {
1531    UseScratchRegisterScope temps(this);
1532    Register scratch1 = temps.Acquire();
1533    li(scratch1, 0x7FFFFFFFFFFFFFFFl);
1534    and_(t8, t8, scratch1);
1535  }
1536  movgr2fr_d(scratch, t8);
1537  Ftintrz_l_d(fd, scratch);
1538}
1539
1540void TurboAssembler::Ftintrz_uw_d(FPURegister fd, FPURegister fj,
1541                                  FPURegister scratch) {
1542  BlockTrampolinePoolScope block_trampoline_pool(this);
1543  Ftintrz_uw_d(t8, fj, scratch);
1544  movgr2fr_w(fd, t8);
1545}
1546
1547void TurboAssembler::Ftintrz_uw_s(FPURegister fd, FPURegister fj,
1548                                  FPURegister scratch) {
1549  BlockTrampolinePoolScope block_trampoline_pool(this);
1550  Ftintrz_uw_s(t8, fj, scratch);
1551  movgr2fr_w(fd, t8);
1552}
1553
1554void TurboAssembler::Ftintrz_ul_d(FPURegister fd, FPURegister fj,
1555                                  FPURegister scratch, Register result) {
1556  BlockTrampolinePoolScope block_trampoline_pool(this);
1557  Ftintrz_ul_d(t8, fj, scratch, result);
1558  movgr2fr_d(fd, t8);
1559}
1560
1561void TurboAssembler::Ftintrz_ul_s(FPURegister fd, FPURegister fj,
1562                                  FPURegister scratch, Register result) {
1563  BlockTrampolinePoolScope block_trampoline_pool(this);
1564  Ftintrz_ul_s(t8, fj, scratch, result);
1565  movgr2fr_d(fd, t8);
1566}
1567
1568void MacroAssembler::Ftintrz_w_d(FPURegister fd, FPURegister fj) {
1569  ftintrz_w_d(fd, fj);
1570}
1571
1572void MacroAssembler::Ftintrne_w_d(FPURegister fd, FPURegister fj) {
1573  ftintrne_w_d(fd, fj);
1574}
1575
1576void MacroAssembler::Ftintrm_w_d(FPURegister fd, FPURegister fj) {
1577  ftintrm_w_d(fd, fj);
1578}
1579
1580void MacroAssembler::Ftintrp_w_d(FPURegister fd, FPURegister fj) {
1581  ftintrp_w_d(fd, fj);
1582}
1583
1584void TurboAssembler::Ftintrz_uw_d(Register rd, FPURegister fj,
1585                                  FPURegister scratch) {
1586  DCHECK(fj != scratch);
1587  DCHECK(rd != t7);
1588
1589  {
1590    // Load 2^31 into scratch as its float representation.
1591    UseScratchRegisterScope temps(this);
1592    Register scratch1 = temps.Acquire();
1593    li(scratch1, 0x41E00000);
1594    movgr2fr_w(scratch, zero_reg);
1595    movgr2frh_w(scratch, scratch1);
1596  }
1597  // Test if scratch > fd.
1598  // If fd < 2^31 we can convert it normally.
1599  Label simple_convert;
1600  CompareF64(fj, scratch, CLT);
1601  BranchTrueShortF(&simple_convert);
1602
1603  // First we subtract 2^31 from fd, then trunc it to rs
1604  // and add 2^31 to rj.
1605  fsub_d(scratch, fj, scratch);
1606  ftintrz_w_d(scratch, scratch);
1607  movfr2gr_s(rd, scratch);
1608  Or(rd, rd, 1 << 31);
1609
1610  Label done;
1611  Branch(&done);
1612  // Simple conversion.
1613  bind(&simple_convert);
1614  ftintrz_w_d(scratch, fj);
1615  movfr2gr_s(rd, scratch);
1616
1617  bind(&done);
1618}
1619
1620void TurboAssembler::Ftintrz_uw_s(Register rd, FPURegister fj,
1621                                  FPURegister scratch) {
1622  DCHECK(fj != scratch);
1623  DCHECK(rd != t7);
1624  {
1625    // Load 2^31 into scratch as its float representation.
1626    UseScratchRegisterScope temps(this);
1627    Register scratch1 = temps.Acquire();
1628    li(scratch1, 0x4F000000);
1629    movgr2fr_w(scratch, scratch1);
1630  }
1631  // Test if scratch > fs.
1632  // If fs < 2^31 we can convert it normally.
1633  Label simple_convert;
1634  CompareF32(fj, scratch, CLT);
1635  BranchTrueShortF(&simple_convert);
1636
1637  // First we subtract 2^31 from fs, then trunc it to rd
1638  // and add 2^31 to rd.
1639  fsub_s(scratch, fj, scratch);
1640  ftintrz_w_s(scratch, scratch);
1641  movfr2gr_s(rd, scratch);
1642  Or(rd, rd, 1 << 31);
1643
1644  Label done;
1645  Branch(&done);
1646  // Simple conversion.
1647  bind(&simple_convert);
1648  ftintrz_w_s(scratch, fj);
1649  movfr2gr_s(rd, scratch);
1650
1651  bind(&done);
1652}
1653
1654void TurboAssembler::Ftintrz_ul_d(Register rd, FPURegister fj,
1655                                  FPURegister scratch, Register result) {
1656  DCHECK(fj != scratch);
1657  DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7));
1658
1659  Label simple_convert, done, fail;
1660  if (result.is_valid()) {
1661    mov(result, zero_reg);
1662    Move(scratch, -1.0);
1663    // If fd =< -1 or unordered, then the conversion fails.
1664    CompareF64(fj, scratch, CLE);
1665    BranchTrueShortF(&fail);
1666    CompareIsNanF64(fj, scratch);
1667    BranchTrueShortF(&fail);
1668  }
1669
1670  // Load 2^63 into scratch as its double representation.
1671  li(t7, 0x43E0000000000000);
1672  movgr2fr_d(scratch, t7);
1673
1674  // Test if scratch > fs.
1675  // If fs < 2^63 we can convert it normally.
1676  CompareF64(fj, scratch, CLT);
1677  BranchTrueShortF(&simple_convert);
1678
1679  // First we subtract 2^63 from fs, then trunc it to rd
1680  // and add 2^63 to rd.
1681  fsub_d(scratch, fj, scratch);
1682  ftintrz_l_d(scratch, scratch);
1683  movfr2gr_d(rd, scratch);
1684  Or(rd, rd, Operand(1UL << 63));
1685  Branch(&done);
1686
1687  // Simple conversion.
1688  bind(&simple_convert);
1689  ftintrz_l_d(scratch, fj);
1690  movfr2gr_d(rd, scratch);
1691
1692  bind(&done);
1693  if (result.is_valid()) {
1694    // Conversion is failed if the result is negative.
1695    {
1696      UseScratchRegisterScope temps(this);
1697      Register scratch1 = temps.Acquire();
1698      addi_d(scratch1, zero_reg, -1);
1699      srli_d(scratch1, scratch1, 1);  // Load 2^62.
1700      movfr2gr_d(result, scratch);
1701      xor_(result, result, scratch1);
1702    }
1703    Slt(result, zero_reg, result);
1704  }
1705
1706  bind(&fail);
1707}
1708
1709void TurboAssembler::Ftintrz_ul_s(Register rd, FPURegister fj,
1710                                  FPURegister scratch, Register result) {
1711  DCHECK(fj != scratch);
1712  DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7));
1713
1714  Label simple_convert, done, fail;
1715  if (result.is_valid()) {
1716    mov(result, zero_reg);
1717    Move(scratch, -1.0f);
1718    // If fd =< -1 or unordered, then the conversion fails.
1719    CompareF32(fj, scratch, CLE);
1720    BranchTrueShortF(&fail);
1721    CompareIsNanF32(fj, scratch);
1722    BranchTrueShortF(&fail);
1723  }
1724
1725  {
1726    // Load 2^63 into scratch as its float representation.
1727    UseScratchRegisterScope temps(this);
1728    Register scratch1 = temps.Acquire();
1729    li(scratch1, 0x5F000000);
1730    movgr2fr_w(scratch, scratch1);
1731  }
1732
1733  // Test if scratch > fs.
1734  // If fs < 2^63 we can convert it normally.
1735  CompareF32(fj, scratch, CLT);
1736  BranchTrueShortF(&simple_convert);
1737
1738  // First we subtract 2^63 from fs, then trunc it to rd
1739  // and add 2^63 to rd.
1740  fsub_s(scratch, fj, scratch);
1741  ftintrz_l_s(scratch, scratch);
1742  movfr2gr_d(rd, scratch);
1743  Or(rd, rd, Operand(1UL << 63));
1744  Branch(&done);
1745
1746  // Simple conversion.
1747  bind(&simple_convert);
1748  ftintrz_l_s(scratch, fj);
1749  movfr2gr_d(rd, scratch);
1750
1751  bind(&done);
1752  if (result.is_valid()) {
1753    // Conversion is failed if the result is negative or unordered.
1754    {
1755      UseScratchRegisterScope temps(this);
1756      Register scratch1 = temps.Acquire();
1757      addi_d(scratch1, zero_reg, -1);
1758      srli_d(scratch1, scratch1, 1);  // Load 2^62.
1759      movfr2gr_d(result, scratch);
1760      xor_(result, result, scratch1);
1761    }
1762    Slt(result, zero_reg, result);
1763  }
1764
1765  bind(&fail);
1766}
1767
1768void TurboAssembler::RoundDouble(FPURegister dst, FPURegister src,
1769                                 FPURoundingMode mode) {
1770  BlockTrampolinePoolScope block_trampoline_pool(this);
1771  Register scratch = t8;
1772  movfcsr2gr(scratch);
1773  li(t7, Operand(mode));
1774  movgr2fcsr(t7);
1775  frint_d(dst, src);
1776  movgr2fcsr(scratch);
1777}
1778
1779void TurboAssembler::Floor_d(FPURegister dst, FPURegister src) {
1780  RoundDouble(dst, src, mode_floor);
1781}
1782
1783void TurboAssembler::Ceil_d(FPURegister dst, FPURegister src) {
1784  RoundDouble(dst, src, mode_ceil);
1785}
1786
1787void TurboAssembler::Trunc_d(FPURegister dst, FPURegister src) {
1788  RoundDouble(dst, src, mode_trunc);
1789}
1790
1791void TurboAssembler::Round_d(FPURegister dst, FPURegister src) {
1792  RoundDouble(dst, src, mode_round);
1793}
1794
1795void TurboAssembler::RoundFloat(FPURegister dst, FPURegister src,
1796                                FPURoundingMode mode) {
1797  BlockTrampolinePoolScope block_trampoline_pool(this);
1798  Register scratch = t8;
1799  movfcsr2gr(scratch);
1800  li(t7, Operand(mode));
1801  movgr2fcsr(t7);
1802  frint_s(dst, src);
1803  movgr2fcsr(scratch);
1804}
1805
1806void TurboAssembler::Floor_s(FPURegister dst, FPURegister src) {
1807  RoundFloat(dst, src, mode_floor);
1808}
1809
1810void TurboAssembler::Ceil_s(FPURegister dst, FPURegister src) {
1811  RoundFloat(dst, src, mode_ceil);
1812}
1813
1814void TurboAssembler::Trunc_s(FPURegister dst, FPURegister src) {
1815  RoundFloat(dst, src, mode_trunc);
1816}
1817
1818void TurboAssembler::Round_s(FPURegister dst, FPURegister src) {
1819  RoundFloat(dst, src, mode_round);
1820}
1821
1822void TurboAssembler::CompareF(FPURegister cmp1, FPURegister cmp2,
1823                              FPUCondition cc, CFRegister cd, bool f32) {
1824  if (f32) {
1825    fcmp_cond_s(cc, cmp1, cmp2, cd);
1826  } else {
1827    fcmp_cond_d(cc, cmp1, cmp2, cd);
1828  }
1829}
1830
1831void TurboAssembler::CompareIsNanF(FPURegister cmp1, FPURegister cmp2,
1832                                   CFRegister cd, bool f32) {
1833  CompareF(cmp1, cmp2, CUN, cd, f32);
1834}
1835
1836void TurboAssembler::BranchTrueShortF(Label* target, CFRegister cj) {
1837  bcnez(cj, target);
1838}
1839
1840void TurboAssembler::BranchFalseShortF(Label* target, CFRegister cj) {
1841  bceqz(cj, target);
1842}
1843
1844void TurboAssembler::BranchTrueF(Label* target, CFRegister cj) {
1845  // TODO(yuyin): can be optimzed
1846  bool long_branch = target->is_bound()
1847                         ? !is_near(target, OffsetSize::kOffset21)
1848                         : is_trampoline_emitted();
1849  if (long_branch) {
1850    Label skip;
1851    BranchFalseShortF(&skip, cj);
1852    Branch(target);
1853    bind(&skip);
1854  } else {
1855    BranchTrueShortF(target, cj);
1856  }
1857}
1858
1859void TurboAssembler::BranchFalseF(Label* target, CFRegister cj) {
1860  bool long_branch = target->is_bound()
1861                         ? !is_near(target, OffsetSize::kOffset21)
1862                         : is_trampoline_emitted();
1863  if (long_branch) {
1864    Label skip;
1865    BranchTrueShortF(&skip, cj);
1866    Branch(target);
1867    bind(&skip);
1868  } else {
1869    BranchFalseShortF(target, cj);
1870  }
1871}
1872
1873void TurboAssembler::FmoveLow(FPURegister dst, Register src_low) {
1874  UseScratchRegisterScope temps(this);
1875  Register scratch = temps.Acquire();
1876  DCHECK(src_low != scratch);
1877  movfrh2gr_s(scratch, dst);
1878  movgr2fr_w(dst, src_low);
1879  movgr2frh_w(dst, scratch);
1880}
1881
1882void TurboAssembler::Move(FPURegister dst, uint32_t src) {
1883  UseScratchRegisterScope temps(this);
1884  Register scratch = temps.Acquire();
1885  li(scratch, Operand(static_cast<int32_t>(src)));
1886  movgr2fr_w(dst, scratch);
1887}
1888
1889void TurboAssembler::Move(FPURegister dst, uint64_t src) {
1890  // Handle special values first.
1891  if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) {
1892    fmov_d(dst, kDoubleRegZero);
1893  } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) {
1894    Neg_d(dst, kDoubleRegZero);
1895  } else {
1896    UseScratchRegisterScope temps(this);
1897    Register scratch = temps.Acquire();
1898    li(scratch, Operand(static_cast<int64_t>(src)));
1899    movgr2fr_d(dst, scratch);
1900    if (dst == kDoubleRegZero) has_double_zero_reg_set_ = true;
1901  }
1902}
1903
1904void TurboAssembler::Movz(Register rd, Register rj, Register rk) {
1905  UseScratchRegisterScope temps(this);
1906  Register scratch = temps.Acquire();
1907  masknez(scratch, rj, rk);
1908  maskeqz(rd, rd, rk);
1909  or_(rd, rd, scratch);
1910}
1911
1912void TurboAssembler::Movn(Register rd, Register rj, Register rk) {
1913  UseScratchRegisterScope temps(this);
1914  Register scratch = temps.Acquire();
1915  maskeqz(scratch, rj, rk);
1916  masknez(rd, rd, rk);
1917  or_(rd, rd, scratch);
1918}
1919
1920void TurboAssembler::LoadZeroOnCondition(Register rd, Register rj,
1921                                         const Operand& rk, Condition cond) {
1922  BlockTrampolinePoolScope block_trampoline_pool(this);
1923  switch (cond) {
1924    case cc_always:
1925      mov(rd, zero_reg);
1926      break;
1927    case eq:
1928      if (rj == zero_reg) {
1929        if (rk.is_reg()) {
1930          LoadZeroIfConditionZero(rd, rk.rm());
1931        } else if (rk.immediate() == 0) {
1932          mov(rd, zero_reg);
1933        }
1934      } else if (IsZero(rk)) {
1935        LoadZeroIfConditionZero(rd, rj);
1936      } else {
1937        Sub_d(t7, rj, rk);
1938        LoadZeroIfConditionZero(rd, t7);
1939      }
1940      break;
1941    case ne:
1942      if (rj == zero_reg) {
1943        if (rk.is_reg()) {
1944          LoadZeroIfConditionNotZero(rd, rk.rm());
1945        } else if (rk.immediate() != 0) {
1946          mov(rd, zero_reg);
1947        }
1948      } else if (IsZero(rk)) {
1949        LoadZeroIfConditionNotZero(rd, rj);
1950      } else {
1951        Sub_d(t7, rj, rk);
1952        LoadZeroIfConditionNotZero(rd, t7);
1953      }
1954      break;
1955
1956    // Signed comparison.
1957    case greater:
1958      Sgt(t7, rj, rk);
1959      LoadZeroIfConditionNotZero(rd, t7);
1960      break;
1961    case greater_equal:
1962      Sge(t7, rj, rk);
1963      LoadZeroIfConditionNotZero(rd, t7);
1964      // rj >= rk
1965      break;
1966    case less:
1967      Slt(t7, rj, rk);
1968      LoadZeroIfConditionNotZero(rd, t7);
1969      // rj < rk
1970      break;
1971    case less_equal:
1972      Sle(t7, rj, rk);
1973      LoadZeroIfConditionNotZero(rd, t7);
1974      // rj <= rk
1975      break;
1976
1977    // Unsigned comparison.
1978    case Ugreater:
1979      Sgtu(t7, rj, rk);
1980      LoadZeroIfConditionNotZero(rd, t7);
1981      // rj > rk
1982      break;
1983
1984    case Ugreater_equal:
1985      Sgeu(t7, rj, rk);
1986      LoadZeroIfConditionNotZero(rd, t7);
1987      // rj >= rk
1988      break;
1989    case Uless:
1990      Sltu(t7, rj, rk);
1991      LoadZeroIfConditionNotZero(rd, t7);
1992      // rj < rk
1993      break;
1994    case Uless_equal:
1995      Sleu(t7, rj, rk);
1996      LoadZeroIfConditionNotZero(rd, t7);
1997      // rj <= rk
1998      break;
1999    default:
2000      UNREACHABLE();
2001  }  // namespace internal
2002}  // namespace internal
2003
2004void TurboAssembler::LoadZeroIfConditionNotZero(Register dest,
2005                                                Register condition) {
2006  masknez(dest, dest, condition);
2007}
2008
2009void TurboAssembler::LoadZeroIfConditionZero(Register dest,
2010                                             Register condition) {
2011  maskeqz(dest, dest, condition);
2012}
2013
2014void TurboAssembler::LoadZeroIfFPUCondition(Register dest, CFRegister cc) {
2015  UseScratchRegisterScope temps(this);
2016  Register scratch = temps.Acquire();
2017  movcf2gr(scratch, cc);
2018  LoadZeroIfConditionNotZero(dest, scratch);
2019}
2020
2021void TurboAssembler::LoadZeroIfNotFPUCondition(Register dest, CFRegister cc) {
2022  UseScratchRegisterScope temps(this);
2023  Register scratch = temps.Acquire();
2024  movcf2gr(scratch, cc);
2025  LoadZeroIfConditionZero(dest, scratch);
2026}
2027
2028void TurboAssembler::Clz_w(Register rd, Register rj) { clz_w(rd, rj); }
2029
2030void TurboAssembler::Clz_d(Register rd, Register rj) { clz_d(rd, rj); }
2031
2032void TurboAssembler::Ctz_w(Register rd, Register rj) { ctz_w(rd, rj); }
2033
2034void TurboAssembler::Ctz_d(Register rd, Register rj) { ctz_d(rd, rj); }
2035
2036// TODO(LOONG_dev): Optimize like arm64, use simd instruction
2037void TurboAssembler::Popcnt_w(Register rd, Register rj) {
2038  ASM_CODE_COMMENT(this);
2039  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
2040  //
2041  // A generalization of the best bit counting method to integers of
2042  // bit-widths up to 128 (parameterized by type T) is this:
2043  //
2044  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
2045  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
2046  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
2047  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count
2048  //
2049  // There are algorithms which are faster in the cases where very few
2050  // bits are set but the algorithm here attempts to minimize the total
2051  // number of instructions executed even when a large number of bits
2052  // are set.
2053  int32_t B0 = 0x55555555;     // (T)~(T)0/3
2054  int32_t B1 = 0x33333333;     // (T)~(T)0/15*3
2055  int32_t B2 = 0x0F0F0F0F;     // (T)~(T)0/255*15
2056  int32_t value = 0x01010101;  // (T)~(T)0/255
2057  uint32_t shift = 24;         // (sizeof(T) - 1) * BITS_PER_BYTE
2058
2059  UseScratchRegisterScope temps(this);
2060  BlockTrampolinePoolScope block_trampoline_pool(this);
2061  Register scratch = temps.Acquire();
2062  Register scratch2 = t8;
2063  srli_w(scratch, rj, 1);
2064  li(scratch2, B0);
2065  And(scratch, scratch, scratch2);
2066  Sub_w(scratch, rj, scratch);
2067  li(scratch2, B1);
2068  And(rd, scratch, scratch2);
2069  srli_w(scratch, scratch, 2);
2070  And(scratch, scratch, scratch2);
2071  Add_w(scratch, rd, scratch);
2072  srli_w(rd, scratch, 4);
2073  Add_w(rd, rd, scratch);
2074  li(scratch2, B2);
2075  And(rd, rd, scratch2);
2076  li(scratch, value);
2077  Mul_w(rd, rd, scratch);
2078  srli_w(rd, rd, shift);
2079}
2080
2081void TurboAssembler::Popcnt_d(Register rd, Register rj) {
2082  ASM_CODE_COMMENT(this);
2083  int64_t B0 = 0x5555555555555555l;     // (T)~(T)0/3
2084  int64_t B1 = 0x3333333333333333l;     // (T)~(T)0/15*3
2085  int64_t B2 = 0x0F0F0F0F0F0F0F0Fl;     // (T)~(T)0/255*15
2086  int64_t value = 0x0101010101010101l;  // (T)~(T)0/255
2087  uint32_t shift = 56;                  // (sizeof(T) - 1) * BITS_PER_BYTE
2088
2089  UseScratchRegisterScope temps(this);
2090  BlockTrampolinePoolScope block_trampoline_pool(this);
2091  Register scratch = temps.Acquire();
2092  Register scratch2 = t8;
2093  srli_d(scratch, rj, 1);
2094  li(scratch2, B0);
2095  And(scratch, scratch, scratch2);
2096  Sub_d(scratch, rj, scratch);
2097  li(scratch2, B1);
2098  And(rd, scratch, scratch2);
2099  srli_d(scratch, scratch, 2);
2100  And(scratch, scratch, scratch2);
2101  Add_d(scratch, rd, scratch);
2102  srli_d(rd, scratch, 4);
2103  Add_d(rd, rd, scratch);
2104  li(scratch2, B2);
2105  And(rd, rd, scratch2);
2106  li(scratch, value);
2107  Mul_d(rd, rd, scratch);
2108  srli_d(rd, rd, shift);
2109}
2110
2111void TurboAssembler::ExtractBits(Register dest, Register source, Register pos,
2112                                 int size, bool sign_extend) {
2113  sra_d(dest, source, pos);
2114  bstrpick_d(dest, dest, size - 1, 0);
2115  if (sign_extend) {
2116    switch (size) {
2117      case 8:
2118        ext_w_b(dest, dest);
2119        break;
2120      case 16:
2121        ext_w_h(dest, dest);
2122        break;
2123      case 32:
2124        // sign-extend word
2125        slli_w(dest, dest, 0);
2126        break;
2127      default:
2128        UNREACHABLE();
2129    }
2130  }
2131}
2132
2133void TurboAssembler::InsertBits(Register dest, Register source, Register pos,
2134                                int size) {
2135  Rotr_d(dest, dest, pos);
2136  bstrins_d(dest, source, size - 1, 0);
2137  {
2138    UseScratchRegisterScope temps(this);
2139    Register scratch = temps.Acquire();
2140    Sub_d(scratch, zero_reg, pos);
2141    Rotr_d(dest, dest, scratch);
2142  }
2143}
2144
2145void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
2146                                                DoubleRegister double_input,
2147                                                Label* done) {
2148  DoubleRegister single_scratch = kScratchDoubleReg.low();
2149  BlockTrampolinePoolScope block_trampoline_pool(this);
2150  UseScratchRegisterScope temps(this);
2151  Register scratch = temps.Acquire();
2152  Register scratch2 = temps.Acquire();
2153
2154  ftintrz_l_d(single_scratch, double_input);
2155  movfr2gr_d(scratch2, single_scratch);
2156  li(scratch, 1L << 63);
2157  Xor(scratch, scratch, scratch2);
2158  rotri_d(scratch2, scratch, 1);
2159  movfr2gr_s(result, single_scratch);
2160  Branch(done, ne, scratch, Operand(scratch2));
2161
2162  // Truncate NaN to zero.
2163  CompareIsNanF64(double_input, double_input);
2164  Move(result, zero_reg);
2165  bcnez(FCC0, done);
2166}
2167
2168void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
2169                                       Register result,
2170                                       DoubleRegister double_input,
2171                                       StubCallMode stub_mode) {
2172  Label done;
2173
2174  TryInlineTruncateDoubleToI(result, double_input, &done);
2175
2176  // If we fell through then inline version didn't succeed - call stub instead.
2177  Sub_d(sp, sp,
2178        Operand(kDoubleSize + kSystemPointerSize));  // Put input on stack.
2179  St_d(ra, MemOperand(sp, kSystemPointerSize));
2180  Fst_d(double_input, MemOperand(sp, 0));
2181
2182#if V8_ENABLE_WEBASSEMBLY
2183  if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
2184    Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
2185#else
2186  // For balance.
2187  if (false) {
2188#endif  // V8_ENABLE_WEBASSEMBLY
2189  } else {
2190    Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
2191  }
2192
2193  Pop(ra, result);
2194  bind(&done);
2195}
2196
2197// BRANCH_ARGS_CHECK checks that conditional jump arguments are correct.
2198#define BRANCH_ARGS_CHECK(cond, rj, rk)                                  \
2199  DCHECK((cond == cc_always && rj == zero_reg && rk.rm() == zero_reg) || \
2200         (cond != cc_always && (rj != zero_reg || rk.rm() != zero_reg)))
2201
2202void TurboAssembler::Branch(Label* L, bool need_link) {
2203  int offset = GetOffset(L, OffsetSize::kOffset26);
2204  if (need_link) {
2205    bl(offset);
2206  } else {
2207    b(offset);
2208  }
2209}
2210
2211void TurboAssembler::Branch(Label* L, Condition cond, Register rj,
2212                            const Operand& rk, bool need_link) {
2213  if (L->is_bound()) {
2214    BRANCH_ARGS_CHECK(cond, rj, rk);
2215    if (!BranchShortOrFallback(L, cond, rj, rk, need_link)) {
2216      if (cond != cc_always) {
2217        Label skip;
2218        Condition neg_cond = NegateCondition(cond);
2219        BranchShort(&skip, neg_cond, rj, rk, need_link);
2220        Branch(L, need_link);
2221        bind(&skip);
2222      } else {
2223        Branch(L);
2224      }
2225    }
2226  } else {
2227    if (is_trampoline_emitted()) {
2228      if (cond != cc_always) {
2229        Label skip;
2230        Condition neg_cond = NegateCondition(cond);
2231        BranchShort(&skip, neg_cond, rj, rk, need_link);
2232        Branch(L, need_link);
2233        bind(&skip);
2234      } else {
2235        Branch(L);
2236      }
2237    } else {
2238      BranchShort(L, cond, rj, rk, need_link);
2239    }
2240  }
2241}
2242
2243void TurboAssembler::Branch(Label* L, Condition cond, Register rj,
2244                            RootIndex index) {
2245  UseScratchRegisterScope temps(this);
2246  Register scratch = temps.Acquire();
2247  LoadRoot(scratch, index);
2248  Branch(L, cond, rj, Operand(scratch));
2249}
2250
2251int32_t TurboAssembler::GetOffset(Label* L, OffsetSize bits) {
2252  return branch_offset_helper(L, bits) >> 2;
2253}
2254
2255Register TurboAssembler::GetRkAsRegisterHelper(const Operand& rk,
2256                                               Register scratch) {
2257  Register r2 = no_reg;
2258  if (rk.is_reg()) {
2259    r2 = rk.rm();
2260  } else {
2261    r2 = scratch;
2262    li(r2, rk);
2263  }
2264
2265  return r2;
2266}
2267
2268bool TurboAssembler::BranchShortOrFallback(Label* L, Condition cond,
2269                                           Register rj, const Operand& rk,
2270                                           bool need_link) {
2271  UseScratchRegisterScope temps(this);
2272  BlockTrampolinePoolScope block_trampoline_pool(this);
2273  Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
2274  DCHECK_NE(rj, zero_reg);
2275
2276  // Be careful to always use shifted_branch_offset only just before the
2277  // branch instruction, as the location will be remember for patching the
2278  // target.
2279  {
2280    BlockTrampolinePoolScope block_trampoline_pool(this);
2281    int offset = 0;
2282    switch (cond) {
2283      case cc_always:
2284        if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2285        offset = GetOffset(L, OffsetSize::kOffset26);
2286        if (need_link) {
2287          bl(offset);
2288        } else {
2289          b(offset);
2290        }
2291        break;
2292      case eq:
2293        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2294          // beq is used here to make the code patchable. Otherwise b should
2295          // be used which has no condition field so is not patchable.
2296          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2297          if (need_link) pcaddi(ra, 2);
2298          offset = GetOffset(L, OffsetSize::kOffset16);
2299          beq(rj, rj, offset);
2300        } else if (IsZero(rk)) {
2301          if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false;
2302          if (need_link) pcaddi(ra, 2);
2303          offset = GetOffset(L, OffsetSize::kOffset21);
2304          beqz(rj, offset);
2305        } else {
2306          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2307          if (need_link) pcaddi(ra, 2);
2308          // We don't want any other register but scratch clobbered.
2309          Register sc = GetRkAsRegisterHelper(rk, scratch);
2310          offset = GetOffset(L, OffsetSize::kOffset16);
2311          beq(rj, sc, offset);
2312        }
2313        break;
2314      case ne:
2315        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2316          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2317          if (need_link) pcaddi(ra, 2);
2318          // bne is used here to make the code patchable. Otherwise we
2319          // should not generate any instruction.
2320          offset = GetOffset(L, OffsetSize::kOffset16);
2321          bne(rj, rj, offset);
2322        } else if (IsZero(rk)) {
2323          if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false;
2324          if (need_link) pcaddi(ra, 2);
2325          offset = GetOffset(L, OffsetSize::kOffset21);
2326          bnez(rj, offset);
2327        } else {
2328          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2329          if (need_link) pcaddi(ra, 2);
2330          // We don't want any other register but scratch clobbered.
2331          Register sc = GetRkAsRegisterHelper(rk, scratch);
2332          offset = GetOffset(L, OffsetSize::kOffset16);
2333          bne(rj, sc, offset);
2334        }
2335        break;
2336
2337      // Signed comparison.
2338      case greater:
2339        // rj > rk
2340        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2341          // No code needs to be emitted.
2342        } else if (IsZero(rk)) {
2343          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2344          if (need_link) pcaddi(ra, 2);
2345          offset = GetOffset(L, OffsetSize::kOffset16);
2346          blt(zero_reg, rj, offset);
2347        } else {
2348          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2349          if (need_link) pcaddi(ra, 2);
2350          Register sc = GetRkAsRegisterHelper(rk, scratch);
2351          DCHECK(rj != sc);
2352          offset = GetOffset(L, OffsetSize::kOffset16);
2353          blt(sc, rj, offset);
2354        }
2355        break;
2356      case greater_equal:
2357        // rj >= rk
2358        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2359          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2360          if (need_link) pcaddi(ra, 2);
2361          offset = GetOffset(L, OffsetSize::kOffset26);
2362          b(offset);
2363        } else if (IsZero(rk)) {
2364          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2365          if (need_link) pcaddi(ra, 2);
2366          offset = GetOffset(L, OffsetSize::kOffset16);
2367          bge(rj, zero_reg, offset);
2368        } else {
2369          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2370          if (need_link) pcaddi(ra, 2);
2371          Register sc = GetRkAsRegisterHelper(rk, scratch);
2372          DCHECK(rj != sc);
2373          offset = GetOffset(L, OffsetSize::kOffset16);
2374          bge(rj, sc, offset);
2375        }
2376        break;
2377      case less:
2378        // rj < rk
2379        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2380          // No code needs to be emitted.
2381        } else if (IsZero(rk)) {
2382          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2383          if (need_link) pcaddi(ra, 2);
2384          offset = GetOffset(L, OffsetSize::kOffset16);
2385          blt(rj, zero_reg, offset);
2386        } else {
2387          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2388          if (need_link) pcaddi(ra, 2);
2389          Register sc = GetRkAsRegisterHelper(rk, scratch);
2390          DCHECK(rj != sc);
2391          offset = GetOffset(L, OffsetSize::kOffset16);
2392          blt(rj, sc, offset);
2393        }
2394        break;
2395      case less_equal:
2396        // rj <= rk
2397        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2398          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2399          if (need_link) pcaddi(ra, 2);
2400          offset = GetOffset(L, OffsetSize::kOffset26);
2401          b(offset);
2402        } else if (IsZero(rk)) {
2403          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2404          if (need_link) pcaddi(ra, 2);
2405          offset = GetOffset(L, OffsetSize::kOffset16);
2406          bge(zero_reg, rj, offset);
2407        } else {
2408          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2409          if (need_link) pcaddi(ra, 2);
2410          Register sc = GetRkAsRegisterHelper(rk, scratch);
2411          DCHECK(rj != sc);
2412          offset = GetOffset(L, OffsetSize::kOffset16);
2413          bge(sc, rj, offset);
2414        }
2415        break;
2416
2417      // Unsigned comparison.
2418      case Ugreater:
2419        // rj > rk
2420        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2421          // No code needs to be emitted.
2422        } else if (IsZero(rk)) {
2423          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2424          if (need_link) pcaddi(ra, 2);
2425          offset = GetOffset(L, OffsetSize::kOffset26);
2426          bnez(rj, offset);
2427        } else {
2428          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2429          if (need_link) pcaddi(ra, 2);
2430          Register sc = GetRkAsRegisterHelper(rk, scratch);
2431          DCHECK(rj != sc);
2432          offset = GetOffset(L, OffsetSize::kOffset16);
2433          bltu(sc, rj, offset);
2434        }
2435        break;
2436      case Ugreater_equal:
2437        // rj >= rk
2438        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2439          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2440          if (need_link) pcaddi(ra, 2);
2441          offset = GetOffset(L, OffsetSize::kOffset26);
2442          b(offset);
2443        } else if (IsZero(rk)) {
2444          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2445          if (need_link) pcaddi(ra, 2);
2446          offset = GetOffset(L, OffsetSize::kOffset26);
2447          b(offset);
2448        } else {
2449          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2450          if (need_link) pcaddi(ra, 2);
2451          Register sc = GetRkAsRegisterHelper(rk, scratch);
2452          DCHECK(rj != sc);
2453          offset = GetOffset(L, OffsetSize::kOffset16);
2454          bgeu(rj, sc, offset);
2455        }
2456        break;
2457      case Uless:
2458        // rj < rk
2459        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2460          // No code needs to be emitted.
2461        } else if (IsZero(rk)) {
2462          // No code needs to be emitted.
2463        } else {
2464          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2465          if (need_link) pcaddi(ra, 2);
2466          Register sc = GetRkAsRegisterHelper(rk, scratch);
2467          DCHECK(rj != sc);
2468          offset = GetOffset(L, OffsetSize::kOffset16);
2469          bltu(rj, sc, offset);
2470        }
2471        break;
2472      case Uless_equal:
2473        // rj <= rk
2474        if (rk.is_reg() && rj.code() == rk.rm().code()) {
2475          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
2476          if (need_link) pcaddi(ra, 2);
2477          offset = GetOffset(L, OffsetSize::kOffset26);
2478          b(offset);
2479        } else if (IsZero(rk)) {
2480          if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false;
2481          if (need_link) pcaddi(ra, 2);
2482          beqz(rj, L);
2483        } else {
2484          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
2485          if (need_link) pcaddi(ra, 2);
2486          Register sc = GetRkAsRegisterHelper(rk, scratch);
2487          DCHECK(rj != sc);
2488          offset = GetOffset(L, OffsetSize::kOffset16);
2489          bgeu(sc, rj, offset);
2490        }
2491        break;
2492      default:
2493        UNREACHABLE();
2494    }
2495  }
2496  return true;
2497}
2498
2499void TurboAssembler::BranchShort(Label* L, Condition cond, Register rj,
2500                                 const Operand& rk, bool need_link) {
2501  BRANCH_ARGS_CHECK(cond, rj, rk);
2502  bool result = BranchShortOrFallback(L, cond, rj, rk, need_link);
2503  DCHECK(result);
2504  USE(result);
2505}
2506
2507void TurboAssembler::LoadFromConstantsTable(Register destination,
2508                                            int constant_index) {
2509  ASM_CODE_COMMENT(this);
2510  DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
2511  LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
2512  Ld_d(destination,
2513       FieldMemOperand(destination, FixedArray::kHeaderSize +
2514                                        constant_index * kPointerSize));
2515}
2516
2517void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
2518  Ld_d(destination, MemOperand(kRootRegister, offset));
2519}
2520
2521void TurboAssembler::LoadRootRegisterOffset(Register destination,
2522                                            intptr_t offset) {
2523  if (offset == 0) {
2524    Move(destination, kRootRegister);
2525  } else {
2526    Add_d(destination, kRootRegister, Operand(offset));
2527  }
2528}
2529
2530void TurboAssembler::Jump(Register target, Condition cond, Register rj,
2531                          const Operand& rk) {
2532  BlockTrampolinePoolScope block_trampoline_pool(this);
2533  if (cond == cc_always) {
2534    jirl(zero_reg, target, 0);
2535  } else {
2536    BRANCH_ARGS_CHECK(cond, rj, rk);
2537    Label skip;
2538    Branch(&skip, NegateCondition(cond), rj, rk);
2539    jirl(zero_reg, target, 0);
2540    bind(&skip);
2541  }
2542}
2543
2544void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
2545                          Condition cond, Register rj, const Operand& rk) {
2546  Label skip;
2547  if (cond != cc_always) {
2548    Branch(&skip, NegateCondition(cond), rj, rk);
2549  }
2550  {
2551    BlockTrampolinePoolScope block_trampoline_pool(this);
2552    li(t7, Operand(target, rmode));
2553    jirl(zero_reg, t7, 0);
2554    bind(&skip);
2555  }
2556}
2557
2558void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond,
2559                          Register rj, const Operand& rk) {
2560  DCHECK(!RelocInfo::IsCodeTarget(rmode));
2561  Jump(static_cast<intptr_t>(target), rmode, cond, rj, rk);
2562}
2563
2564void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
2565                          Condition cond, Register rj, const Operand& rk) {
2566  DCHECK(RelocInfo::IsCodeTarget(rmode));
2567
2568  BlockTrampolinePoolScope block_trampoline_pool(this);
2569  Label skip;
2570  if (cond != cc_always) {
2571    BranchShort(&skip, NegateCondition(cond), rj, rk);
2572  }
2573
2574  Builtin builtin = Builtin::kNoBuiltinId;
2575  bool target_is_isolate_independent_builtin =
2576      isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
2577      Builtins::IsIsolateIndependent(builtin);
2578  if (target_is_isolate_independent_builtin &&
2579      options().use_pc_relative_calls_and_jumps) {
2580    int32_t code_target_index = AddCodeTarget(code);
2581    RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
2582    b(code_target_index);
2583    bind(&skip);
2584    return;
2585  } else if (root_array_available_ && options().isolate_independent_code) {
2586    UNREACHABLE();
2587    /*int offset = code->builtin_index() * kSystemPointerSize +
2588                 IsolateData::builtin_entry_table_offset();
2589    Ld_d(t7, MemOperand(kRootRegister, offset));
2590    Jump(t7, cc_always, rj, rk);
2591    bind(&skip);
2592    return;*/
2593  } else if (options().inline_offheap_trampolines &&
2594             target_is_isolate_independent_builtin) {
2595    // Inline the trampoline.
2596    RecordCommentForOffHeapTrampoline(builtin);
2597    li(t7, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
2598    Jump(t7, cc_always, rj, rk);
2599    bind(&skip);
2600    RecordComment("]");
2601    return;
2602  }
2603
2604  Jump(static_cast<intptr_t>(code.address()), rmode, cc_always, rj, rk);
2605  bind(&skip);
2606}
2607
2608void TurboAssembler::Jump(const ExternalReference& reference) {
2609  li(t7, reference);
2610  Jump(t7);
2611}
2612
2613// Note: To call gcc-compiled C code on loonarch, you must call through t[0-8].
2614void TurboAssembler::Call(Register target, Condition cond, Register rj,
2615                          const Operand& rk) {
2616  BlockTrampolinePoolScope block_trampoline_pool(this);
2617  if (cond == cc_always) {
2618    jirl(ra, target, 0);
2619  } else {
2620    BRANCH_ARGS_CHECK(cond, rj, rk);
2621    Label skip;
2622    Branch(&skip, NegateCondition(cond), rj, rk);
2623    jirl(ra, target, 0);
2624    bind(&skip);
2625  }
2626  set_pc_for_safepoint();
2627}
2628
2629void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
2630                                     unsigned higher_limit,
2631                                     Label* on_in_range) {
2632  ASM_CODE_COMMENT(this);
2633  if (lower_limit != 0) {
2634    UseScratchRegisterScope temps(this);
2635    Register scratch = temps.Acquire();
2636    Sub_d(scratch, value, Operand(lower_limit));
2637    Branch(on_in_range, ls, scratch, Operand(higher_limit - lower_limit));
2638  } else {
2639    Branch(on_in_range, ls, value, Operand(higher_limit - lower_limit));
2640  }
2641}
2642
2643void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond,
2644                          Register rj, const Operand& rk) {
2645  BlockTrampolinePoolScope block_trampoline_pool(this);
2646  Label skip;
2647  if (cond != cc_always) {
2648    BranchShort(&skip, NegateCondition(cond), rj, rk);
2649  }
2650  intptr_t offset_diff = target - pc_offset();
2651  if (RelocInfo::IsNoInfo(rmode) && is_int28(offset_diff)) {
2652    bl(offset_diff >> 2);
2653  } else if (RelocInfo::IsNoInfo(rmode) && is_int38(offset_diff)) {
2654    pcaddu18i(t7, static_cast<int32_t>(offset_diff) >> 18);
2655    jirl(ra, t7, (offset_diff & 0x3ffff) >> 2);
2656  } else {
2657    li(t7, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD);
2658    Call(t7, cc_always, rj, rk);
2659  }
2660  bind(&skip);
2661}
2662
2663void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
2664                          Condition cond, Register rj, const Operand& rk) {
2665  BlockTrampolinePoolScope block_trampoline_pool(this);
2666  Label skip;
2667  if (cond != cc_always) {
2668    BranchShort(&skip, NegateCondition(cond), rj, rk);
2669  }
2670
2671  Builtin builtin = Builtin::kNoBuiltinId;
2672  bool target_is_isolate_independent_builtin =
2673      isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
2674      Builtins::IsIsolateIndependent(builtin);
2675
2676  if (target_is_isolate_independent_builtin &&
2677      options().use_pc_relative_calls_and_jumps) {
2678    int32_t code_target_index = AddCodeTarget(code);
2679    RecordCommentForOffHeapTrampoline(builtin);
2680    RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
2681    bl(code_target_index);
2682    set_pc_for_safepoint();
2683    bind(&skip);
2684    RecordComment("]");
2685    return;
2686  } else if (root_array_available_ && options().isolate_independent_code) {
2687    UNREACHABLE();
2688    /*int offset = code->builtin_index() * kSystemPointerSize +
2689                 IsolateData::builtin_entry_table_offset();
2690    LoadRootRelative(t7, offset);
2691    Call(t7, cond, rj, rk);
2692    bind(&skip);
2693    return;*/
2694  } else if (options().inline_offheap_trampolines &&
2695             target_is_isolate_independent_builtin) {
2696    // Inline the trampoline.
2697    RecordCommentForOffHeapTrampoline(builtin);
2698    li(t7, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
2699    Call(t7, cond, rj, rk);
2700    bind(&skip);
2701    RecordComment("]");
2702    return;
2703  }
2704
2705  DCHECK(RelocInfo::IsCodeTarget(rmode));
2706  DCHECK(code->IsExecutable());
2707  Call(code.address(), rmode, cc_always, rj, rk);
2708  bind(&skip);
2709}
2710
2711void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) {
2712  ASM_CODE_COMMENT(this);
2713  STATIC_ASSERT(kSystemPointerSize == 8);
2714  STATIC_ASSERT(kSmiTagSize == 1);
2715  STATIC_ASSERT(kSmiTag == 0);
2716
2717  // The builtin_index register contains the builtin index as a Smi.
2718  SmiUntag(builtin_index, builtin_index);
2719  Alsl_d(builtin_index, builtin_index, kRootRegister, kSystemPointerSizeLog2,
2720         t7);
2721  Ld_d(builtin_index,
2722       MemOperand(builtin_index, IsolateData::builtin_entry_table_offset()));
2723}
2724
2725void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
2726                                          Register destination) {
2727  Ld_d(destination, EntryFromBuiltinAsOperand(builtin));
2728}
2729MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
2730  DCHECK(root_array_available());
2731  return MemOperand(kRootRegister,
2732                    IsolateData::BuiltinEntrySlotOffset(builtin));
2733}
2734
2735void TurboAssembler::CallBuiltinByIndex(Register builtin_index) {
2736  ASM_CODE_COMMENT(this);
2737  LoadEntryFromBuiltinIndex(builtin_index);
2738  Call(builtin_index);
2739}
2740void TurboAssembler::CallBuiltin(Builtin builtin) {
2741  RecordCommentForOffHeapTrampoline(builtin);
2742  Call(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
2743  RecordComment("]");
2744}
2745
2746void TurboAssembler::PatchAndJump(Address target) {
2747  ASM_CODE_COMMENT(this);
2748  UseScratchRegisterScope temps(this);
2749  Register scratch = temps.Acquire();
2750  pcaddi(scratch, 4);
2751  Ld_d(t7, MemOperand(scratch, 0));
2752  jirl(zero_reg, t7, 0);
2753  nop();
2754  DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0);
2755  *reinterpret_cast<uint64_t*>(pc_) = target;  // pc_ should be align.
2756  pc_ += sizeof(uint64_t);
2757}
2758
2759void TurboAssembler::StoreReturnAddressAndCall(Register target) {
2760  ASM_CODE_COMMENT(this);
2761  // This generates the final instruction sequence for calls to C functions
2762  // once an exit frame has been constructed.
2763  //
2764  // Note that this assumes the caller code (i.e. the Code object currently
2765  // being generated) is immovable or that the callee function cannot trigger
2766  // GC, since the callee function will return to it.
2767
2768  Assembler::BlockTrampolinePoolScope block_trampoline_pool(this);
2769  static constexpr int kNumInstructionsToJump = 2;
2770  Label find_ra;
2771  // Adjust the value in ra to point to the correct return location, 2nd
2772  // instruction past the real call into C code (the jirl)), and push it.
2773  // This is the return address of the exit frame.
2774  pcaddi(ra, kNumInstructionsToJump + 1);
2775  bind(&find_ra);
2776
2777  // This spot was reserved in EnterExitFrame.
2778  St_d(ra, MemOperand(sp, 0));
2779  // Stack is still aligned.
2780
2781  // TODO(LOONG_dev): can be jirl target? a0 -- a7?
2782  jirl(zero_reg, target, 0);
2783  // Make sure the stored 'ra' points to this position.
2784  DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra));
2785}
2786
2787void TurboAssembler::DropArguments(Register count, ArgumentsCountType type,
2788                                   ArgumentsCountMode mode, Register scratch) {
2789  switch (type) {
2790    case kCountIsInteger: {
2791      Alsl_d(sp, count, sp, kPointerSizeLog2);
2792      break;
2793    }
2794    case kCountIsSmi: {
2795      STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
2796      DCHECK_NE(scratch, no_reg);
2797      SmiScale(scratch, count, kPointerSizeLog2);
2798      Add_d(sp, sp, scratch);
2799      break;
2800    }
2801    case kCountIsBytes: {
2802      Add_d(sp, sp, count);
2803      break;
2804    }
2805  }
2806  if (mode == kCountExcludesReceiver) {
2807    Add_d(sp, sp, kSystemPointerSize);
2808  }
2809}
2810
2811void TurboAssembler::DropArgumentsAndPushNewReceiver(Register argc,
2812                                                     Register receiver,
2813                                                     ArgumentsCountType type,
2814                                                     ArgumentsCountMode mode,
2815                                                     Register scratch) {
2816  DCHECK(!AreAliased(argc, receiver));
2817  if (mode == kCountExcludesReceiver) {
2818    // Drop arguments without receiver and override old receiver.
2819    DropArguments(argc, type, kCountIncludesReceiver, scratch);
2820    St_d(receiver, MemOperand(sp, 0));
2821  } else {
2822    DropArguments(argc, type, mode, scratch);
2823    Push(receiver);
2824  }
2825}
2826
2827void TurboAssembler::Ret(Condition cond, Register rj, const Operand& rk) {
2828  Jump(ra, cond, rj, rk);
2829}
2830
2831void TurboAssembler::Drop(int count, Condition cond, Register reg,
2832                          const Operand& op) {
2833  if (count <= 0) {
2834    return;
2835  }
2836
2837  Label skip;
2838
2839  if (cond != al) {
2840    Branch(&skip, NegateCondition(cond), reg, op);
2841  }
2842
2843  Add_d(sp, sp, Operand(count * kPointerSize));
2844
2845  if (cond != al) {
2846    bind(&skip);
2847  }
2848}
2849
2850void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) {
2851  if (scratch == no_reg) {
2852    Xor(reg1, reg1, Operand(reg2));
2853    Xor(reg2, reg2, Operand(reg1));
2854    Xor(reg1, reg1, Operand(reg2));
2855  } else {
2856    mov(scratch, reg1);
2857    mov(reg1, reg2);
2858    mov(reg2, scratch);
2859  }
2860}
2861
2862void TurboAssembler::Call(Label* target) { Branch(target, true); }
2863
2864void TurboAssembler::Push(Smi smi) {
2865  UseScratchRegisterScope temps(this);
2866  Register scratch = temps.Acquire();
2867  li(scratch, Operand(smi));
2868  Push(scratch);
2869}
2870
2871void TurboAssembler::Push(Handle<HeapObject> handle) {
2872  UseScratchRegisterScope temps(this);
2873  Register scratch = temps.Acquire();
2874  li(scratch, Operand(handle));
2875  Push(scratch);
2876}
2877
2878void TurboAssembler::PushArray(Register array, Register size, Register scratch,
2879                               Register scratch2, PushArrayOrder order) {
2880  DCHECK(!AreAliased(array, size, scratch, scratch2));
2881  Label loop, entry;
2882  if (order == PushArrayOrder::kReverse) {
2883    mov(scratch, zero_reg);
2884    jmp(&entry);
2885    bind(&loop);
2886    Alsl_d(scratch2, scratch, array, kPointerSizeLog2, t7);
2887    Ld_d(scratch2, MemOperand(scratch2, 0));
2888    Push(scratch2);
2889    Add_d(scratch, scratch, Operand(1));
2890    bind(&entry);
2891    Branch(&loop, less, scratch, Operand(size));
2892  } else {
2893    mov(scratch, size);
2894    jmp(&entry);
2895    bind(&loop);
2896    Alsl_d(scratch2, scratch, array, kPointerSizeLog2, t7);
2897    Ld_d(scratch2, MemOperand(scratch2, 0));
2898    Push(scratch2);
2899    bind(&entry);
2900    Add_d(scratch, scratch, Operand(-1));
2901    Branch(&loop, greater_equal, scratch, Operand(zero_reg));
2902  }
2903}
2904
2905// ---------------------------------------------------------------------------
2906// Exception handling.
2907
2908void MacroAssembler::PushStackHandler() {
2909  // Adjust this code if not the case.
2910  STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kPointerSize);
2911  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kPointerSize);
2912
2913  Push(Smi::zero());  // Padding.
2914
2915  // Link the current handler as the next handler.
2916  li(t2,
2917     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
2918  Ld_d(t1, MemOperand(t2, 0));
2919  Push(t1);
2920
2921  // Set this new handler as the current one.
2922  St_d(sp, MemOperand(t2, 0));
2923}
2924
2925void MacroAssembler::PopStackHandler() {
2926  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
2927  Pop(a1);
2928  Add_d(sp, sp,
2929        Operand(
2930            static_cast<int64_t>(StackHandlerConstants::kSize - kPointerSize)));
2931  UseScratchRegisterScope temps(this);
2932  Register scratch = temps.Acquire();
2933  li(scratch,
2934     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
2935  St_d(a1, MemOperand(scratch, 0));
2936}
2937
2938void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst,
2939                                        const DoubleRegister src) {
2940  fsub_d(dst, src, kDoubleRegZero);
2941}
2942
2943// -----------------------------------------------------------------------------
2944// JavaScript invokes.
2945
2946void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
2947  ASM_CODE_COMMENT(this);
2948  DCHECK(root_array_available());
2949  Isolate* isolate = this->isolate();
2950  ExternalReference limit =
2951      kind == StackLimitKind::kRealStackLimit
2952          ? ExternalReference::address_of_real_jslimit(isolate)
2953          : ExternalReference::address_of_jslimit(isolate);
2954  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
2955
2956  intptr_t offset =
2957      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
2958  CHECK(is_int32(offset));
2959  Ld_d(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset)));
2960}
2961
2962void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
2963                                        Register scratch2,
2964                                        Label* stack_overflow) {
2965  ASM_CODE_COMMENT(this);
2966  // Check the stack for overflow. We are not trying to catch
2967  // interruptions (e.g. debug break and preemption) here, so the "real stack
2968  // limit" is checked.
2969
2970  LoadStackLimit(scratch1, StackLimitKind::kRealStackLimit);
2971  // Make scratch1 the space we have left. The stack might already be overflowed
2972  // here which will cause scratch1 to become negative.
2973  sub_d(scratch1, sp, scratch1);
2974  // Check if the arguments will overflow the stack.
2975  slli_d(scratch2, num_args, kPointerSizeLog2);
2976  // Signed comparison.
2977  Branch(stack_overflow, le, scratch1, Operand(scratch2));
2978}
2979
2980void MacroAssembler::InvokePrologue(Register expected_parameter_count,
2981                                    Register actual_parameter_count,
2982                                    Label* done, InvokeType type) {
2983  ASM_CODE_COMMENT(this);
2984  Label regular_invoke;
2985
2986  //  a0: actual arguments count
2987  //  a1: function (passed through to callee)
2988  //  a2: expected arguments count
2989
2990  DCHECK_EQ(actual_parameter_count, a0);
2991  DCHECK_EQ(expected_parameter_count, a2);
2992
2993  // If the expected parameter count is equal to the adaptor sentinel, no need
2994  // to push undefined value as arguments.
2995  if (kDontAdaptArgumentsSentinel != 0) {
2996    Branch(&regular_invoke, eq, expected_parameter_count,
2997           Operand(kDontAdaptArgumentsSentinel));
2998  }
2999
3000  // If overapplication or if the actual argument count is equal to the
3001  // formal parameter count, no need to push extra undefined values.
3002  sub_d(expected_parameter_count, expected_parameter_count,
3003        actual_parameter_count);
3004  Branch(&regular_invoke, le, expected_parameter_count, Operand(zero_reg));
3005
3006  Label stack_overflow;
3007  StackOverflowCheck(expected_parameter_count, t0, t1, &stack_overflow);
3008  // Underapplication. Move the arguments already in the stack, including the
3009  // receiver and the return address.
3010  {
3011    Label copy;
3012    Register src = a6, dest = a7;
3013    mov(src, sp);
3014    slli_d(t0, expected_parameter_count, kSystemPointerSizeLog2);
3015    Sub_d(sp, sp, Operand(t0));
3016    // Update stack pointer.
3017    mov(dest, sp);
3018    mov(t0, actual_parameter_count);
3019    bind(&copy);
3020    Ld_d(t1, MemOperand(src, 0));
3021    St_d(t1, MemOperand(dest, 0));
3022    Sub_d(t0, t0, Operand(1));
3023    Add_d(src, src, Operand(kSystemPointerSize));
3024    Add_d(dest, dest, Operand(kSystemPointerSize));
3025    Branch(&copy, gt, t0, Operand(zero_reg));
3026  }
3027
3028  // Fill remaining expected arguments with undefined values.
3029  LoadRoot(t0, RootIndex::kUndefinedValue);
3030  {
3031    Label loop;
3032    bind(&loop);
3033    St_d(t0, MemOperand(a7, 0));
3034    Sub_d(expected_parameter_count, expected_parameter_count, Operand(1));
3035    Add_d(a7, a7, Operand(kSystemPointerSize));
3036    Branch(&loop, gt, expected_parameter_count, Operand(zero_reg));
3037  }
3038  b(&regular_invoke);
3039
3040  bind(&stack_overflow);
3041  {
3042    FrameScope frame(
3043        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3044    CallRuntime(Runtime::kThrowStackOverflow);
3045    break_(0xCC);
3046  }
3047
3048  bind(&regular_invoke);
3049}
3050
3051void MacroAssembler::CallDebugOnFunctionCall(Register fun, Register new_target,
3052                                             Register expected_parameter_count,
3053                                             Register actual_parameter_count) {
3054  // Load receiver to pass it later to DebugOnFunctionCall hook.
3055  LoadReceiver(t0, actual_parameter_count);
3056  FrameScope frame(
3057      this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3058
3059  SmiTag(expected_parameter_count);
3060  Push(expected_parameter_count);
3061
3062  SmiTag(actual_parameter_count);
3063  Push(actual_parameter_count);
3064
3065  if (new_target.is_valid()) {
3066    Push(new_target);
3067  }
3068  // TODO(LOONG_dev): MultiPush/Pop
3069  Push(fun);
3070  Push(fun);
3071  Push(t0);
3072  CallRuntime(Runtime::kDebugOnFunctionCall);
3073  Pop(fun);
3074  if (new_target.is_valid()) {
3075    Pop(new_target);
3076  }
3077
3078  Pop(actual_parameter_count);
3079  SmiUntag(actual_parameter_count);
3080
3081  Pop(expected_parameter_count);
3082  SmiUntag(expected_parameter_count);
3083}
3084
3085void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
3086                                        Register expected_parameter_count,
3087                                        Register actual_parameter_count,
3088                                        InvokeType type) {
3089  // You can't call a function without a valid frame.
3090  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3091  DCHECK_EQ(function, a1);
3092  DCHECK_IMPLIES(new_target.is_valid(), new_target == a3);
3093
3094  // On function call, call into the debugger if necessary.
3095  Label debug_hook, continue_after_hook;
3096  {
3097    li(t0, ExternalReference::debug_hook_on_function_call_address(isolate()));
3098    Ld_b(t0, MemOperand(t0, 0));
3099    BranchShort(&debug_hook, ne, t0, Operand(zero_reg));
3100  }
3101  bind(&continue_after_hook);
3102
3103  // Clear the new.target register if not given.
3104  if (!new_target.is_valid()) {
3105    LoadRoot(a3, RootIndex::kUndefinedValue);
3106  }
3107
3108  Label done;
3109  InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
3110  // We call indirectly through the code field in the function to
3111  // allow recompilation to take effect without changing any of the
3112  // call sites.
3113  Register code = kJavaScriptCallCodeStartRegister;
3114  Ld_d(code, FieldMemOperand(function, JSFunction::kCodeOffset));
3115  switch (type) {
3116    case InvokeType::kCall:
3117      CallCodeObject(code);
3118      break;
3119    case InvokeType::kJump:
3120      JumpCodeObject(code);
3121      break;
3122  }
3123
3124  Branch(&done);
3125
3126  // Deferred debug hook.
3127  bind(&debug_hook);
3128  CallDebugOnFunctionCall(function, new_target, expected_parameter_count,
3129                          actual_parameter_count);
3130  Branch(&continue_after_hook);
3131
3132  // Continue here if InvokePrologue does handle the invocation due to
3133  // mismatched parameter counts.
3134  bind(&done);
3135}
3136
3137void MacroAssembler::InvokeFunctionWithNewTarget(
3138    Register function, Register new_target, Register actual_parameter_count,
3139    InvokeType type) {
3140  ASM_CODE_COMMENT(this);
3141  // You can't call a function without a valid frame.
3142  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3143
3144  // Contract with called JS functions requires that function is passed in a1.
3145  DCHECK_EQ(function, a1);
3146  Register expected_parameter_count = a2;
3147  Register temp_reg = t0;
3148  Ld_d(temp_reg, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
3149  Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
3150  // The argument count is stored as uint16_t
3151  Ld_hu(expected_parameter_count,
3152        FieldMemOperand(temp_reg,
3153                        SharedFunctionInfo::kFormalParameterCountOffset));
3154
3155  InvokeFunctionCode(a1, new_target, expected_parameter_count,
3156                     actual_parameter_count, type);
3157}
3158
3159void MacroAssembler::InvokeFunction(Register function,
3160                                    Register expected_parameter_count,
3161                                    Register actual_parameter_count,
3162                                    InvokeType type) {
3163  ASM_CODE_COMMENT(this);
3164  // You can't call a function without a valid frame.
3165  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3166
3167  // Contract with called JS functions requires that function is passed in a1.
3168  DCHECK_EQ(function, a1);
3169
3170  // Get the function and setup the context.
3171  Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
3172
3173  InvokeFunctionCode(a1, no_reg, expected_parameter_count,
3174                     actual_parameter_count, type);
3175}
3176
3177// ---------------------------------------------------------------------------
3178// Support functions.
3179
3180void MacroAssembler::GetObjectType(Register object, Register map,
3181                                   Register type_reg) {
3182  LoadMap(map, object);
3183  Ld_hu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3184}
3185
3186void MacroAssembler::GetInstanceTypeRange(Register map, Register type_reg,
3187                                          InstanceType lower_limit,
3188                                          Register range) {
3189  Ld_hu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3190  Sub_d(range, type_reg, Operand(lower_limit));
3191}
3192
3193// -----------------------------------------------------------------------------
3194// Runtime calls.
3195
3196void TurboAssembler::AddOverflow_d(Register dst, Register left,
3197                                   const Operand& right, Register overflow) {
3198  ASM_CODE_COMMENT(this);
3199  BlockTrampolinePoolScope block_trampoline_pool(this);
3200  UseScratchRegisterScope temps(this);
3201  Register scratch = temps.Acquire();
3202  Register scratch2 = temps.Acquire();
3203  Register right_reg = no_reg;
3204  if (!right.is_reg()) {
3205    li(scratch, Operand(right));
3206    right_reg = scratch;
3207  } else {
3208    right_reg = right.rm();
3209  }
3210
3211  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
3212         overflow != scratch2);
3213  DCHECK(overflow != left && overflow != right_reg);
3214
3215  if (dst == left || dst == right_reg) {
3216    add_d(scratch2, left, right_reg);
3217    xor_(overflow, scratch2, left);
3218    xor_(scratch, scratch2, right_reg);
3219    and_(overflow, overflow, scratch);
3220    mov(dst, scratch2);
3221  } else {
3222    add_d(dst, left, right_reg);
3223    xor_(overflow, dst, left);
3224    xor_(scratch, dst, right_reg);
3225    and_(overflow, overflow, scratch);
3226  }
3227}
3228
3229void TurboAssembler::SubOverflow_d(Register dst, Register left,
3230                                   const Operand& right, Register overflow) {
3231  ASM_CODE_COMMENT(this);
3232  BlockTrampolinePoolScope block_trampoline_pool(this);
3233  UseScratchRegisterScope temps(this);
3234  Register scratch = temps.Acquire();
3235  Register scratch2 = temps.Acquire();
3236  Register right_reg = no_reg;
3237  if (!right.is_reg()) {
3238    li(scratch, Operand(right));
3239    right_reg = scratch;
3240  } else {
3241    right_reg = right.rm();
3242  }
3243
3244  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
3245         overflow != scratch2);
3246  DCHECK(overflow != left && overflow != right_reg);
3247
3248  if (dst == left || dst == right_reg) {
3249    Sub_d(scratch2, left, right_reg);
3250    xor_(overflow, left, scratch2);
3251    xor_(scratch, left, right_reg);
3252    and_(overflow, overflow, scratch);
3253    mov(dst, scratch2);
3254  } else {
3255    sub_d(dst, left, right_reg);
3256    xor_(overflow, left, dst);
3257    xor_(scratch, left, right_reg);
3258    and_(overflow, overflow, scratch);
3259  }
3260}
3261
3262void TurboAssembler::MulOverflow_w(Register dst, Register left,
3263                                   const Operand& right, Register overflow) {
3264  ASM_CODE_COMMENT(this);
3265  BlockTrampolinePoolScope block_trampoline_pool(this);
3266  UseScratchRegisterScope temps(this);
3267  Register scratch = temps.Acquire();
3268  Register scratch2 = temps.Acquire();
3269  Register right_reg = no_reg;
3270  if (!right.is_reg()) {
3271    li(scratch, Operand(right));
3272    right_reg = scratch;
3273  } else {
3274    right_reg = right.rm();
3275  }
3276
3277  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
3278         overflow != scratch2);
3279  DCHECK(overflow != left && overflow != right_reg);
3280
3281  if (dst == left || dst == right_reg) {
3282    Mul_w(scratch2, left, right_reg);
3283    Mulh_w(overflow, left, right_reg);
3284    mov(dst, scratch2);
3285  } else {
3286    Mul_w(dst, left, right_reg);
3287    Mulh_w(overflow, left, right_reg);
3288  }
3289
3290  srai_d(scratch2, dst, 32);
3291  xor_(overflow, overflow, scratch2);
3292}
3293
3294void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
3295                                 SaveFPRegsMode save_doubles) {
3296  ASM_CODE_COMMENT(this);
3297  // All parameters are on the stack. v0 has the return value after call.
3298
3299  // If the expected number of arguments of the runtime function is
3300  // constant, we check that the actual number of arguments match the
3301  // expectation.
3302  CHECK(f->nargs < 0 || f->nargs == num_arguments);
3303
3304  // TODO(1236192): Most runtime routines don't need the number of
3305  // arguments passed in because it is constant. At some point we
3306  // should remove this need and make the runtime routine entry code
3307  // smarter.
3308  PrepareCEntryArgs(num_arguments);
3309  PrepareCEntryFunction(ExternalReference::Create(f));
3310  Handle<Code> code =
3311      CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
3312  Call(code, RelocInfo::CODE_TARGET);
3313}
3314
3315void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
3316  ASM_CODE_COMMENT(this);
3317  const Runtime::Function* function = Runtime::FunctionForId(fid);
3318  DCHECK_EQ(1, function->result_size);
3319  if (function->nargs >= 0) {
3320    PrepareCEntryArgs(function->nargs);
3321  }
3322  JumpToExternalReference(ExternalReference::Create(fid));
3323}
3324
3325void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
3326                                             bool builtin_exit_frame) {
3327  PrepareCEntryFunction(builtin);
3328  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
3329                                          ArgvMode::kStack, builtin_exit_frame);
3330  Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg));
3331}
3332
3333void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
3334  li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
3335  Jump(kOffHeapTrampolineRegister);
3336}
3337
3338void MacroAssembler::LoadWeakValue(Register out, Register in,
3339                                   Label* target_if_cleared) {
3340  Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32));
3341  And(out, in, Operand(~kWeakHeapObjectMask));
3342}
3343
3344void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
3345                                          Register scratch1,
3346                                          Register scratch2) {
3347  DCHECK_GT(value, 0);
3348  if (FLAG_native_code_counters && counter->Enabled()) {
3349    ASM_CODE_COMMENT(this);
3350    // This operation has to be exactly 32-bit wide in case the external
3351    // reference table redirects the counter to a uint32_t dummy_stats_counter_
3352    // field.
3353    li(scratch2, ExternalReference::Create(counter));
3354    Ld_w(scratch1, MemOperand(scratch2, 0));
3355    Add_w(scratch1, scratch1, Operand(value));
3356    St_w(scratch1, MemOperand(scratch2, 0));
3357  }
3358}
3359
3360void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
3361                                          Register scratch1,
3362                                          Register scratch2) {
3363  DCHECK_GT(value, 0);
3364  if (FLAG_native_code_counters && counter->Enabled()) {
3365    ASM_CODE_COMMENT(this);
3366    // This operation has to be exactly 32-bit wide in case the external
3367    // reference table redirects the counter to a uint32_t dummy_stats_counter_
3368    // field.
3369    li(scratch2, ExternalReference::Create(counter));
3370    Ld_w(scratch1, MemOperand(scratch2, 0));
3371    Sub_w(scratch1, scratch1, Operand(value));
3372    St_w(scratch1, MemOperand(scratch2, 0));
3373  }
3374}
3375
3376// -----------------------------------------------------------------------------
3377// Debugging.
3378
3379void TurboAssembler::Trap() { stop(); }
3380void TurboAssembler::DebugBreak() { stop(); }
3381
3382void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs,
3383                            Operand rk) {
3384  if (FLAG_debug_code) Check(cc, reason, rs, rk);
3385}
3386
3387void TurboAssembler::Check(Condition cc, AbortReason reason, Register rj,
3388                           Operand rk) {
3389  Label L;
3390  Branch(&L, cc, rj, rk);
3391  Abort(reason);
3392  // Will not return here.
3393  bind(&L);
3394}
3395
3396void TurboAssembler::Abort(AbortReason reason) {
3397  Label abort_start;
3398  bind(&abort_start);
3399  if (FLAG_code_comments) {
3400    const char* msg = GetAbortReason(reason);
3401    RecordComment("Abort message: ");
3402    RecordComment(msg);
3403  }
3404
3405  // Avoid emitting call to builtin if requested.
3406  if (trap_on_abort()) {
3407    stop();
3408    return;
3409  }
3410
3411  if (should_abort_hard()) {
3412    // We don't care if we constructed a frame. Just pretend we did.
3413    FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
3414    PrepareCallCFunction(0, a0);
3415    li(a0, Operand(static_cast<int>(reason)));
3416    CallCFunction(ExternalReference::abort_with_reason(), 1);
3417    return;
3418  }
3419
3420  Move(a0, Smi::FromInt(static_cast<int>(reason)));
3421
3422  // Disable stub call restrictions to always allow calls to abort.
3423  if (!has_frame()) {
3424    // We don't actually want to generate a pile of code for this, so just
3425    // claim there is a stack frame, without generating one.
3426    FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
3427    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
3428  } else {
3429    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
3430  }
3431  // Will not return here.
3432  if (is_trampoline_pool_blocked()) {
3433    // If the calling code cares about the exact number of
3434    // instructions generated, we insert padding here to keep the size
3435    // of the Abort macro constant.
3436    // Currently in debug mode with debug_code enabled the number of
3437    // generated instructions is 10, so we use this as a maximum value.
3438    static const int kExpectedAbortInstructions = 10;
3439    int abort_instructions = InstructionsGeneratedSince(&abort_start);
3440    DCHECK_LE(abort_instructions, kExpectedAbortInstructions);
3441    while (abort_instructions++ < kExpectedAbortInstructions) {
3442      nop();
3443    }
3444  }
3445}
3446
3447void TurboAssembler::LoadMap(Register destination, Register object) {
3448  Ld_d(destination, FieldMemOperand(object, HeapObject::kMapOffset));
3449}
3450
3451void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
3452  LoadMap(dst, cp);
3453  Ld_d(dst, FieldMemOperand(
3454                dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
3455  Ld_d(dst, MemOperand(dst, Context::SlotOffset(index)));
3456}
3457
3458void TurboAssembler::StubPrologue(StackFrame::Type type) {
3459  UseScratchRegisterScope temps(this);
3460  Register scratch = temps.Acquire();
3461  li(scratch, Operand(StackFrame::TypeToMarker(type)));
3462  PushCommonFrame(scratch);
3463}
3464
3465void TurboAssembler::Prologue() { PushStandardFrame(a1); }
3466
3467void TurboAssembler::EnterFrame(StackFrame::Type type) {
3468  ASM_CODE_COMMENT(this);
3469  BlockTrampolinePoolScope block_trampoline_pool(this);
3470  Push(ra, fp);
3471  Move(fp, sp);
3472  if (!StackFrame::IsJavaScript(type)) {
3473    li(kScratchReg, Operand(StackFrame::TypeToMarker(type)));
3474    Push(kScratchReg);
3475  }
3476#if V8_ENABLE_WEBASSEMBLY
3477  if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
3478#endif  // V8_ENABLE_WEBASSEMBLY
3479}
3480
3481void TurboAssembler::LeaveFrame(StackFrame::Type type) {
3482  ASM_CODE_COMMENT(this);
3483  addi_d(sp, fp, 2 * kPointerSize);
3484  Ld_d(ra, MemOperand(fp, 1 * kPointerSize));
3485  Ld_d(fp, MemOperand(fp, 0 * kPointerSize));
3486}
3487
3488void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
3489                                    StackFrame::Type frame_type) {
3490  ASM_CODE_COMMENT(this);
3491  DCHECK(frame_type == StackFrame::EXIT ||
3492         frame_type == StackFrame::BUILTIN_EXIT);
3493
3494  // Set up the frame structure on the stack.
3495  STATIC_ASSERT(2 * kPointerSize == ExitFrameConstants::kCallerSPDisplacement);
3496  STATIC_ASSERT(1 * kPointerSize == ExitFrameConstants::kCallerPCOffset);
3497  STATIC_ASSERT(0 * kPointerSize == ExitFrameConstants::kCallerFPOffset);
3498
3499  // This is how the stack will look:
3500  // fp + 2 (==kCallerSPDisplacement) - old stack's end
3501  // [fp + 1 (==kCallerPCOffset)] - saved old ra
3502  // [fp + 0 (==kCallerFPOffset)] - saved old fp
3503  // [fp - 1 StackFrame::EXIT Smi
3504  // [fp - 2 (==kSPOffset)] - sp of the called function
3505  // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the
3506  //   new stack (will contain saved ra)
3507
3508  // Save registers and reserve room for saved entry sp.
3509  addi_d(sp, sp, -2 * kPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp);
3510  St_d(ra, MemOperand(sp, 3 * kPointerSize));
3511  St_d(fp, MemOperand(sp, 2 * kPointerSize));
3512  {
3513    UseScratchRegisterScope temps(this);
3514    Register scratch = temps.Acquire();
3515    li(scratch, Operand(StackFrame::TypeToMarker(frame_type)));
3516    St_d(scratch, MemOperand(sp, 1 * kPointerSize));
3517  }
3518  // Set up new frame pointer.
3519  addi_d(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp);
3520
3521  if (FLAG_debug_code) {
3522    St_d(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset));
3523  }
3524
3525  {
3526    BlockTrampolinePoolScope block_trampoline_pool(this);
3527    // Save the frame pointer and the context in top.
3528    li(t8, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
3529                                     isolate()));
3530    St_d(fp, MemOperand(t8, 0));
3531    li(t8,
3532       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
3533    St_d(cp, MemOperand(t8, 0));
3534  }
3535
3536  const int frame_alignment = MacroAssembler::ActivationFrameAlignment();
3537  if (save_doubles) {
3538    // The stack is already aligned to 0 modulo 8 for stores with sdc1.
3539    int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2;
3540    int space = kNumOfSavedRegisters * kDoubleSize;
3541    Sub_d(sp, sp, Operand(space));
3542    // Remember: we only need to save every 2nd double FPU value.
3543    for (int i = 0; i < kNumOfSavedRegisters; i++) {
3544      FPURegister reg = FPURegister::from_code(2 * i);
3545      Fst_d(reg, MemOperand(sp, i * kDoubleSize));
3546    }
3547  }
3548
3549  // Reserve place for the return address, stack space and an optional slot
3550  // (used by DirectCEntry to hold the return value if a struct is
3551  // returned) and align the frame preparing for calling the runtime function.
3552  DCHECK_GE(stack_space, 0);
3553  Sub_d(sp, sp, Operand((stack_space + 2) * kPointerSize));
3554  if (frame_alignment > 0) {
3555    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
3556    And(sp, sp, Operand(-frame_alignment));  // Align stack.
3557  }
3558
3559  // Set the exit frame sp value to point just before the return address
3560  // location.
3561  UseScratchRegisterScope temps(this);
3562  Register scratch = temps.Acquire();
3563  addi_d(scratch, sp, kPointerSize);
3564  St_d(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
3565}
3566
3567void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
3568                                    bool do_return,
3569                                    bool argument_count_is_length) {
3570  ASM_CODE_COMMENT(this);
3571  BlockTrampolinePoolScope block_trampoline_pool(this);
3572  // Optionally restore all double registers.
3573  if (save_doubles) {
3574    // Remember: we only need to restore every 2nd double FPU value.
3575    int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2;
3576    Sub_d(t8, fp,
3577          Operand(ExitFrameConstants::kFixedFrameSizeFromFp +
3578                  kNumOfSavedRegisters * kDoubleSize));
3579    for (int i = 0; i < kNumOfSavedRegisters; i++) {
3580      FPURegister reg = FPURegister::from_code(2 * i);
3581      Fld_d(reg, MemOperand(t8, i * kDoubleSize));
3582    }
3583  }
3584
3585  // Clear top frame.
3586  li(t8,
3587     ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
3588  St_d(zero_reg, MemOperand(t8, 0));
3589
3590  // Restore current context from top and clear it in debug mode.
3591  li(t8,
3592     ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
3593  Ld_d(cp, MemOperand(t8, 0));
3594
3595  if (FLAG_debug_code) {
3596    UseScratchRegisterScope temp(this);
3597    Register scratch = temp.Acquire();
3598    li(scratch, Operand(Context::kInvalidContext));
3599    St_d(scratch, MemOperand(t8, 0));
3600  }
3601
3602  // Pop the arguments, restore registers, and return.
3603  mov(sp, fp);  // Respect ABI stack constraint.
3604  Ld_d(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
3605  Ld_d(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
3606
3607  if (argument_count.is_valid()) {
3608    if (argument_count_is_length) {
3609      add_d(sp, sp, argument_count);
3610    } else {
3611      Alsl_d(sp, argument_count, sp, kPointerSizeLog2, t8);
3612    }
3613  }
3614
3615  addi_d(sp, sp, 2 * kPointerSize);
3616  if (do_return) {
3617    Ret();
3618  }
3619}
3620
3621int TurboAssembler::ActivationFrameAlignment() {
3622#if V8_HOST_ARCH_LOONG64
3623  // Running on the real platform. Use the alignment as mandated by the local
3624  // environment.
3625  // Note: This will break if we ever start generating snapshots on one LOONG64
3626  // platform for another LOONG64 platform with a different alignment.
3627  return base::OS::ActivationFrameAlignment();
3628#else   // V8_HOST_ARCH_LOONG64
3629  // If we are using the simulator then we should always align to the expected
3630  // alignment. As the simulator is used to generate snapshots we do not know
3631  // if the target platform will need alignment, so this is controlled from a
3632  // flag.
3633  return FLAG_sim_stack_alignment;
3634#endif  // V8_HOST_ARCH_LOONG64
3635}
3636
3637void MacroAssembler::AssertStackIsAligned() {
3638  if (FLAG_debug_code) {
3639    ASM_CODE_COMMENT(this);
3640    const int frame_alignment = ActivationFrameAlignment();
3641    const int frame_alignment_mask = frame_alignment - 1;
3642
3643    if (frame_alignment > kPointerSize) {
3644      Label alignment_as_expected;
3645      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
3646      {
3647        UseScratchRegisterScope temps(this);
3648        Register scratch = temps.Acquire();
3649        andi(scratch, sp, frame_alignment_mask);
3650        Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg));
3651      }
3652      // Don't use Check here, as it will call Runtime_Abort re-entering here.
3653      stop();
3654      bind(&alignment_as_expected);
3655    }
3656  }
3657}
3658
3659void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
3660  if (SmiValuesAre32Bits()) {
3661    Ld_w(dst, MemOperand(src.base(), SmiWordOffset(src.offset())));
3662  } else {
3663    DCHECK(SmiValuesAre31Bits());
3664    Ld_w(dst, src);
3665    SmiUntag(dst);
3666  }
3667}
3668
3669void TurboAssembler::JumpIfSmi(Register value, Label* smi_label) {
3670  DCHECK_EQ(0, kSmiTag);
3671  UseScratchRegisterScope temps(this);
3672  Register scratch = temps.Acquire();
3673  andi(scratch, value, kSmiTagMask);
3674  Branch(smi_label, eq, scratch, Operand(zero_reg));
3675}
3676
3677void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label) {
3678  DCHECK_EQ(0, kSmiTag);
3679  UseScratchRegisterScope temps(this);
3680  Register scratch = temps.Acquire();
3681  andi(scratch, value, kSmiTagMask);
3682  Branch(not_smi_label, ne, scratch, Operand(zero_reg));
3683}
3684
3685void TurboAssembler::AssertNotSmi(Register object) {
3686  if (FLAG_debug_code) {
3687    ASM_CODE_COMMENT(this);
3688    STATIC_ASSERT(kSmiTag == 0);
3689    UseScratchRegisterScope temps(this);
3690    Register scratch = temps.Acquire();
3691    andi(scratch, object, kSmiTagMask);
3692    Check(ne, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg));
3693  }
3694}
3695
3696void TurboAssembler::AssertSmi(Register object) {
3697  if (FLAG_debug_code) {
3698    ASM_CODE_COMMENT(this);
3699    STATIC_ASSERT(kSmiTag == 0);
3700    UseScratchRegisterScope temps(this);
3701    Register scratch = temps.Acquire();
3702    andi(scratch, object, kSmiTagMask);
3703    Check(eq, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg));
3704  }
3705}
3706
3707void MacroAssembler::AssertConstructor(Register object) {
3708  if (FLAG_debug_code) {
3709    ASM_CODE_COMMENT(this);
3710    BlockTrampolinePoolScope block_trampoline_pool(this);
3711    STATIC_ASSERT(kSmiTag == 0);
3712    SmiTst(object, t8);
3713    Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, t8,
3714          Operand(zero_reg));
3715
3716    LoadMap(t8, object);
3717    Ld_bu(t8, FieldMemOperand(t8, Map::kBitFieldOffset));
3718    And(t8, t8, Operand(Map::Bits1::IsConstructorBit::kMask));
3719    Check(ne, AbortReason::kOperandIsNotAConstructor, t8, Operand(zero_reg));
3720  }
3721}
3722
3723void MacroAssembler::AssertFunction(Register object) {
3724  if (FLAG_debug_code) {
3725    ASM_CODE_COMMENT(this);
3726    BlockTrampolinePoolScope block_trampoline_pool(this);
3727    STATIC_ASSERT(kSmiTag == 0);
3728    SmiTst(object, t8);
3729    Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, t8,
3730          Operand(zero_reg));
3731    Push(object);
3732    LoadMap(object, object);
3733    GetInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE, t8);
3734    Check(ls, AbortReason::kOperandIsNotAFunction, t8,
3735          Operand(LAST_JS_FUNCTION_TYPE - FIRST_JS_FUNCTION_TYPE));
3736    Pop(object);
3737  }
3738}
3739
3740void MacroAssembler::AssertCallableFunction(Register object) {
3741  if (FLAG_debug_code) {
3742    ASM_CODE_COMMENT(this);
3743    BlockTrampolinePoolScope block_trampoline_pool(this);
3744    STATIC_ASSERT(kSmiTag == 0);
3745    SmiTst(object, t8);
3746    Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, t8,
3747          Operand(zero_reg));
3748    Push(object);
3749    LoadMap(object, object);
3750    GetInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE, t8);
3751    Check(ls, AbortReason::kOperandIsNotACallableFunction, t8,
3752          Operand(LAST_CALLABLE_JS_FUNCTION_TYPE -
3753                  FIRST_CALLABLE_JS_FUNCTION_TYPE));
3754    Pop(object);
3755  }
3756}
3757
3758void MacroAssembler::AssertBoundFunction(Register object) {
3759  if (FLAG_debug_code) {
3760    ASM_CODE_COMMENT(this);
3761    BlockTrampolinePoolScope block_trampoline_pool(this);
3762    STATIC_ASSERT(kSmiTag == 0);
3763    SmiTst(object, t8);
3764    Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, t8,
3765          Operand(zero_reg));
3766    GetObjectType(object, t8, t8);
3767    Check(eq, AbortReason::kOperandIsNotABoundFunction, t8,
3768          Operand(JS_BOUND_FUNCTION_TYPE));
3769  }
3770}
3771
3772void MacroAssembler::AssertGeneratorObject(Register object) {
3773  if (!FLAG_debug_code) return;
3774  ASM_CODE_COMMENT(this);
3775  BlockTrampolinePoolScope block_trampoline_pool(this);
3776  STATIC_ASSERT(kSmiTag == 0);
3777  SmiTst(object, t8);
3778  Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, t8,
3779        Operand(zero_reg));
3780
3781  GetObjectType(object, t8, t8);
3782
3783  Label done;
3784
3785  // Check if JSGeneratorObject
3786  Branch(&done, eq, t8, Operand(JS_GENERATOR_OBJECT_TYPE));
3787
3788  // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
3789  Branch(&done, eq, t8, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
3790
3791  // Check if JSAsyncGeneratorObject
3792  Branch(&done, eq, t8, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
3793
3794  Abort(AbortReason::kOperandIsNotAGeneratorObject);
3795
3796  bind(&done);
3797}
3798
3799void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
3800                                                     Register scratch) {
3801  if (FLAG_debug_code) {
3802    ASM_CODE_COMMENT(this);
3803    Label done_checking;
3804    AssertNotSmi(object);
3805    LoadRoot(scratch, RootIndex::kUndefinedValue);
3806    Branch(&done_checking, eq, object, Operand(scratch));
3807    GetObjectType(object, scratch, scratch);
3808    Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch,
3809           Operand(ALLOCATION_SITE_TYPE));
3810    bind(&done_checking);
3811  }
3812}
3813
3814void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1,
3815                                FPURegister src2, Label* out_of_line) {
3816  ASM_CODE_COMMENT(this);
3817  if (src1 == src2) {
3818    Move_s(dst, src1);
3819    return;
3820  }
3821
3822  // Check if one of operands is NaN.
3823  CompareIsNanF32(src1, src2);
3824  BranchTrueF(out_of_line);
3825
3826  fmax_s(dst, src1, src2);
3827}
3828
3829void TurboAssembler::Float32MaxOutOfLine(FPURegister dst, FPURegister src1,
3830                                         FPURegister src2) {
3831  fadd_s(dst, src1, src2);
3832}
3833
3834void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1,
3835                                FPURegister src2, Label* out_of_line) {
3836  ASM_CODE_COMMENT(this);
3837  if (src1 == src2) {
3838    Move_s(dst, src1);
3839    return;
3840  }
3841
3842  // Check if one of operands is NaN.
3843  CompareIsNanF32(src1, src2);
3844  BranchTrueF(out_of_line);
3845
3846  fmin_s(dst, src1, src2);
3847}
3848
3849void TurboAssembler::Float32MinOutOfLine(FPURegister dst, FPURegister src1,
3850                                         FPURegister src2) {
3851  fadd_s(dst, src1, src2);
3852}
3853
3854void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1,
3855                                FPURegister src2, Label* out_of_line) {
3856  ASM_CODE_COMMENT(this);
3857  if (src1 == src2) {
3858    Move_d(dst, src1);
3859    return;
3860  }
3861
3862  // Check if one of operands is NaN.
3863  CompareIsNanF64(src1, src2);
3864  BranchTrueF(out_of_line);
3865
3866  fmax_d(dst, src1, src2);
3867}
3868
3869void TurboAssembler::Float64MaxOutOfLine(FPURegister dst, FPURegister src1,
3870                                         FPURegister src2) {
3871  fadd_d(dst, src1, src2);
3872}
3873
3874void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1,
3875                                FPURegister src2, Label* out_of_line) {
3876  ASM_CODE_COMMENT(this);
3877  if (src1 == src2) {
3878    Move_d(dst, src1);
3879    return;
3880  }
3881
3882  // Check if one of operands is NaN.
3883  CompareIsNanF64(src1, src2);
3884  BranchTrueF(out_of_line);
3885
3886  fmin_d(dst, src1, src2);
3887}
3888
3889void TurboAssembler::Float64MinOutOfLine(FPURegister dst, FPURegister src1,
3890                                         FPURegister src2) {
3891  fadd_d(dst, src1, src2);
3892}
3893
3894static const int kRegisterPassedArguments = 8;
3895
3896int TurboAssembler::CalculateStackPassedWords(int num_reg_arguments,
3897                                              int num_double_arguments) {
3898  int stack_passed_words = 0;
3899  num_reg_arguments += 2 * num_double_arguments;
3900
3901  // Up to eight simple arguments are passed in registers a0..a7.
3902  if (num_reg_arguments > kRegisterPassedArguments) {
3903    stack_passed_words += num_reg_arguments - kRegisterPassedArguments;
3904  }
3905  return stack_passed_words;
3906}
3907
3908void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
3909                                          int num_double_arguments,
3910                                          Register scratch) {
3911  ASM_CODE_COMMENT(this);
3912  int frame_alignment = ActivationFrameAlignment();
3913
3914  // Up to eight simple arguments in a0..a3, a4..a7, No argument slots.
3915  // Remaining arguments are pushed on the stack.
3916  int stack_passed_arguments =
3917      CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
3918  if (frame_alignment > kPointerSize) {
3919    // Make stack end at alignment and make room for num_arguments - 4 words
3920    // and the original value of sp.
3921    mov(scratch, sp);
3922    Sub_d(sp, sp, Operand((stack_passed_arguments + 1) * kPointerSize));
3923    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
3924    bstrins_d(sp, zero_reg, std::log2(frame_alignment) - 1, 0);
3925    St_d(scratch, MemOperand(sp, stack_passed_arguments * kPointerSize));
3926  } else {
3927    Sub_d(sp, sp, Operand(stack_passed_arguments * kPointerSize));
3928  }
3929}
3930
3931void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
3932                                          Register scratch) {
3933  PrepareCallCFunction(num_reg_arguments, 0, scratch);
3934}
3935
3936void TurboAssembler::CallCFunction(ExternalReference function,
3937                                   int num_reg_arguments,
3938                                   int num_double_arguments) {
3939  ASM_CODE_COMMENT(this);
3940  BlockTrampolinePoolScope block_trampoline_pool(this);
3941  li(t7, function);
3942  CallCFunctionHelper(t7, num_reg_arguments, num_double_arguments);
3943}
3944
3945void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
3946                                   int num_double_arguments) {
3947  ASM_CODE_COMMENT(this);
3948  CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
3949}
3950
3951void TurboAssembler::CallCFunction(ExternalReference function,
3952                                   int num_arguments) {
3953  CallCFunction(function, num_arguments, 0);
3954}
3955
3956void TurboAssembler::CallCFunction(Register function, int num_arguments) {
3957  CallCFunction(function, num_arguments, 0);
3958}
3959
3960void TurboAssembler::CallCFunctionHelper(Register function,
3961                                         int num_reg_arguments,
3962                                         int num_double_arguments) {
3963  DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
3964  DCHECK(has_frame());
3965  // Make sure that the stack is aligned before calling a C function unless
3966  // running in the simulator. The simulator has its own alignment check which
3967  // provides more information.
3968
3969#if V8_HOST_ARCH_LOONG64
3970  if (FLAG_debug_code) {
3971    int frame_alignment = base::OS::ActivationFrameAlignment();
3972    int frame_alignment_mask = frame_alignment - 1;
3973    if (frame_alignment > kPointerSize) {
3974      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
3975      Label alignment_as_expected;
3976      {
3977        Register scratch = t8;
3978        And(scratch, sp, Operand(frame_alignment_mask));
3979        Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg));
3980      }
3981      // Don't use Check here, as it will call Runtime_Abort possibly
3982      // re-entering here.
3983      stop();
3984      bind(&alignment_as_expected);
3985    }
3986  }
3987#endif  // V8_HOST_ARCH_LOONG64
3988
3989  // Just call directly. The function called cannot cause a GC, or
3990  // allow preemption, so the return address in the link register
3991  // stays correct.
3992  {
3993    BlockTrampolinePoolScope block_trampoline_pool(this);
3994    if (function != t7) {
3995      mov(t7, function);
3996      function = t7;
3997    }
3998
3999    // Save the frame pointer and PC so that the stack layout remains iterable,
4000    // even without an ExitFrame which normally exists between JS and C frames.
4001    // 't' registers are caller-saved so this is safe as a scratch register.
4002    Register pc_scratch = t1;
4003    Register scratch = t2;
4004    DCHECK(!AreAliased(pc_scratch, scratch, function));
4005
4006    pcaddi(pc_scratch, 1);
4007
4008    // See x64 code for reasoning about how to address the isolate data fields.
4009    if (root_array_available()) {
4010      St_d(pc_scratch, MemOperand(kRootRegister,
4011                                  IsolateData::fast_c_call_caller_pc_offset()));
4012      St_d(fp, MemOperand(kRootRegister,
4013                          IsolateData::fast_c_call_caller_fp_offset()));
4014    } else {
4015      DCHECK_NOT_NULL(isolate());
4016      li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate()));
4017      St_d(pc_scratch, MemOperand(scratch, 0));
4018      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4019      St_d(fp, MemOperand(scratch, 0));
4020    }
4021
4022    Call(function);
4023
4024    // We don't unset the PC; the FP is the source of truth.
4025    if (root_array_available()) {
4026      St_d(zero_reg, MemOperand(kRootRegister,
4027                                IsolateData::fast_c_call_caller_fp_offset()));
4028    } else {
4029      DCHECK_NOT_NULL(isolate());
4030      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4031      St_d(zero_reg, MemOperand(scratch, 0));
4032    }
4033
4034    int stack_passed_arguments =
4035        CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
4036
4037    if (base::OS::ActivationFrameAlignment() > kPointerSize) {
4038      Ld_d(sp, MemOperand(sp, stack_passed_arguments * kPointerSize));
4039    } else {
4040      Add_d(sp, sp, Operand(stack_passed_arguments * kPointerSize));
4041    }
4042
4043    set_pc_for_safepoint();
4044  }
4045}
4046
4047#undef BRANCH_ARGS_CHECK
4048
4049void TurboAssembler::CheckPageFlag(const Register& object, int mask,
4050                                   Condition cc, Label* condition_met) {
4051  ASM_CODE_COMMENT(this);
4052  UseScratchRegisterScope temps(this);
4053  temps.Include(t8);
4054  Register scratch = temps.Acquire();
4055  And(scratch, object, Operand(~kPageAlignmentMask));
4056  Ld_d(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
4057  And(scratch, scratch, Operand(mask));
4058  Branch(condition_met, cc, scratch, Operand(zero_reg));
4059}
4060
4061Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
4062                                   Register reg4, Register reg5,
4063                                   Register reg6) {
4064  RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
4065
4066  const RegisterConfiguration* config = RegisterConfiguration::Default();
4067  for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
4068    int code = config->GetAllocatableGeneralCode(i);
4069    Register candidate = Register::from_code(code);
4070    if (regs.has(candidate)) continue;
4071    return candidate;
4072  }
4073  UNREACHABLE();
4074}
4075
4076void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4077  // TODO(LOONG_dev): range check, add Pcadd macro function?
4078  pcaddi(dst, -pc_offset() >> 2);
4079}
4080
4081void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4082                                           DeoptimizeKind kind, Label* ret,
4083                                           Label*) {
4084  ASM_CODE_COMMENT(this);
4085  BlockTrampolinePoolScope block_trampoline_pool(this);
4086  Ld_d(t7,
4087       MemOperand(kRootRegister, IsolateData::BuiltinEntrySlotOffset(target)));
4088  Call(t7);
4089  DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4090            (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4091                                            : Deoptimizer::kEagerDeoptExitSize);
4092}
4093
4094void TurboAssembler::LoadCodeObjectEntry(Register destination,
4095                                         Register code_object) {
4096  ASM_CODE_COMMENT(this);
4097  // Code objects are called differently depending on whether we are generating
4098  // builtin code (which will later be embedded into the binary) or compiling
4099  // user JS code at runtime.
4100  // * Builtin code runs in --jitless mode and thus must not call into on-heap
4101  //   Code targets. Instead, we dispatch through the builtins entry table.
4102  // * Codegen at runtime does not have this restriction and we can use the
4103  //   shorter, branchless instruction sequence. The assumption here is that
4104  //   targets are usually generated code and not builtin Code objects.
4105  if (options().isolate_independent_code) {
4106    DCHECK(root_array_available());
4107    Label if_code_is_off_heap, out;
4108    Register scratch = t8;
4109
4110    DCHECK(!AreAliased(destination, scratch));
4111    DCHECK(!AreAliased(code_object, scratch));
4112
4113    // Check whether the Code object is an off-heap trampoline. If so, call its
4114    // (off-heap) entry point directly without going through the (on-heap)
4115    // trampoline.  Otherwise, just call the Code object as always.
4116    Ld_w(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
4117    And(scratch, scratch, Operand(Code::IsOffHeapTrampoline::kMask));
4118    BranchShort(&if_code_is_off_heap, ne, scratch, Operand(zero_reg));
4119    // Not an off-heap trampoline object, the entry point is at
4120    // Code::raw_instruction_start().
4121    Add_d(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
4122    Branch(&out);
4123
4124    // An off-heap trampoline, the entry point is loaded from the builtin entry
4125    // table.
4126    bind(&if_code_is_off_heap);
4127    Ld_w(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
4128    // TODO(liuyu): don't use scratch_reg in Alsl_d;
4129    Alsl_d(destination, scratch, kRootRegister, kSystemPointerSizeLog2,
4130           zero_reg);
4131    Ld_d(destination,
4132         MemOperand(destination, IsolateData::builtin_entry_table_offset()));
4133
4134    bind(&out);
4135  } else {
4136    Add_d(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
4137  }
4138}
4139
4140void TurboAssembler::CallCodeObject(Register code_object) {
4141  ASM_CODE_COMMENT(this);
4142  LoadCodeObjectEntry(code_object, code_object);
4143  Call(code_object);
4144}
4145
4146void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
4147  ASM_CODE_COMMENT(this);
4148  DCHECK_EQ(JumpMode::kJump, jump_mode);
4149  LoadCodeObjectEntry(code_object, code_object);
4150  Jump(code_object);
4151}
4152
4153}  // namespace internal
4154}  // namespace v8
4155
4156#endif  // V8_TARGET_ARCH_LOONG64
4157