1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
6 #define V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
7 
8 #include "src/base/platform/wrappers.h"
9 #include "src/heap/memory-chunk.h"
10 #include "src/wasm/baseline/liftoff-assembler.h"
11 #include "src/wasm/wasm-objects.h"
12 
13 namespace v8 {
14 namespace internal {
15 namespace wasm {
16 
17 namespace liftoff {
18 
ToCondition(LiftoffCondition liftoff_cond)19 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
20   switch (liftoff_cond) {
21     case kEqual:
22       return eq;
23     case kUnequal:
24       return ne;
25     case kSignedLessThan:
26       return lt;
27     case kSignedLessEqual:
28       return le;
29     case kSignedGreaterThan:
30       return gt;
31     case kSignedGreaterEqual:
32       return ge;
33     case kUnsignedLessThan:
34       return lo;
35     case kUnsignedLessEqual:
36       return ls;
37     case kUnsignedGreaterThan:
38       return hi;
39     case kUnsignedGreaterEqual:
40       return hs;
41   }
42 }
43 
44 // Liftoff Frames.
45 //
46 //  slot      Frame
47 //       +--------------------+---------------------------
48 //  n+4  | optional padding slot to keep the stack 16 byte aligned.
49 //  n+3  |   parameter n      |
50 //  ...  |       ...          |
51 //   4   |   parameter 1      | or parameter 2
52 //   3   |   parameter 0      | or parameter 1
53 //   2   |  (result address)  | or parameter 0
54 //  -----+--------------------+---------------------------
55 //   1   | return addr (lr)   |
56 //   0   | previous frame (fp)|
57 //  -----+--------------------+  <-- frame ptr (fp)
58 //  -1   | StackFrame::WASM   |
59 //  -2   |     instance       |
60 //  -3   |     feedback vector|
61 //  -4   |     tiering budget |
62 //  -----+--------------------+---------------------------
63 //  -5   |     slot 0         |   ^
64 //  -6   |     slot 1         |   |
65 //       |                    | Frame slots
66 //       |                    |   |
67 //       |                    |   v
68 //       | optional padding slot to keep the stack 16 byte aligned.
69 //  -----+--------------------+  <-- stack ptr (sp)
70 //
71 
72 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
73 constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
74 constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
75 
GetStackSlot(int offset)76 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
77 
GetInstanceOperand()78 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
79 
GetRegFromType(const LiftoffRegister& reg, ValueKind kind)80 inline CPURegister GetRegFromType(const LiftoffRegister& reg, ValueKind kind) {
81   switch (kind) {
82     case kI32:
83       return reg.gp().W();
84     case kI64:
85     case kRef:
86     case kOptRef:
87     case kRtt:
88       return reg.gp().X();
89     case kF32:
90       return reg.fp().S();
91     case kF64:
92       return reg.fp().D();
93     case kS128:
94       return reg.fp().Q();
95     default:
96       UNREACHABLE();
97   }
98 }
99 
PadRegList(RegList list)100 inline CPURegList PadRegList(RegList list) {
101   if ((list.Count() & 1) != 0) list.set(padreg);
102   return CPURegList(kXRegSizeInBits, list);
103 }
104 
PadVRegList(DoubleRegList list)105 inline CPURegList PadVRegList(DoubleRegList list) {
106   if ((list.Count() & 1) != 0) list.set(fp_scratch);
107   return CPURegList(kQRegSizeInBits, list);
108 }
109 
AcquireByType(UseScratchRegisterScope* temps, ValueKind kind)110 inline CPURegister AcquireByType(UseScratchRegisterScope* temps,
111                                  ValueKind kind) {
112   switch (kind) {
113     case kI32:
114       return temps->AcquireW();
115     case kI64:
116     case kRef:
117     case kOptRef:
118       return temps->AcquireX();
119     case kF32:
120       return temps->AcquireS();
121     case kF64:
122       return temps->AcquireD();
123     case kS128:
124       return temps->AcquireQ();
125     default:
126       UNREACHABLE();
127   }
128 }
129 
130 template <typename T>
GetMemOp(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr, Register offset, T offset_imm, bool i64_offset = false)131 inline MemOperand GetMemOp(LiftoffAssembler* assm,
132                            UseScratchRegisterScope* temps, Register addr,
133                            Register offset, T offset_imm,
134                            bool i64_offset = false) {
135   if (!offset.is_valid()) return MemOperand(addr.X(), offset_imm);
136   Register effective_addr = addr.X();
137   if (offset_imm) {
138     effective_addr = temps->AcquireX();
139     assm->Add(effective_addr, addr.X(), offset_imm);
140   }
141   return i64_offset ? MemOperand(effective_addr, offset.X())
142                     : MemOperand(effective_addr, offset.W(), UXTW);
143 }
144 
145 // Compute the effective address (sum of |addr|, |offset| (if given) and
146 // |offset_imm|) into a temporary register. This is needed for certain load
147 // instructions that do not support an offset (register or immediate).
148 // Returns |addr| if both |offset| and |offset_imm| are zero.
GetEffectiveAddress(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr, Register offset, uintptr_t offset_imm)149 inline Register GetEffectiveAddress(LiftoffAssembler* assm,
150                                     UseScratchRegisterScope* temps,
151                                     Register addr, Register offset,
152                                     uintptr_t offset_imm) {
153   if (!offset.is_valid() && offset_imm == 0) return addr;
154   Register tmp = temps->AcquireX();
155   if (offset.is_valid()) {
156     // TODO(clemensb): This needs adaption for memory64.
157     assm->Add(tmp, addr, Operand(offset, UXTW));
158     addr = tmp;
159   }
160   if (offset_imm != 0) assm->Add(tmp, addr, offset_imm);
161   return tmp;
162 }
163 
164 enum class ShiftDirection : bool { kLeft, kRight };
165 
166 enum class ShiftSign : bool { kSigned, kUnsigned };
167 
168 template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned>
EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs, Register rhs, VectorFormat format)169 inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs,
170                           Register rhs, VectorFormat format) {
171   DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned);
172   DCHECK(dst.IsSameFormat(lhs));
173   DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format));
174 
175   UseScratchRegisterScope temps(assm);
176   VRegister tmp = temps.AcquireV(format);
177   Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW();
178   int mask = LaneSizeInBitsFromFormat(format) - 1;
179   assm->And(shift, rhs, mask);
180   assm->Dup(tmp, shift);
181 
182   if (dir == ShiftDirection::kRight) {
183     assm->Neg(tmp, tmp);
184   }
185 
186   if (sign == ShiftSign::kSigned) {
187     assm->Sshl(dst, lhs, tmp);
188   } else {
189     assm->Ushl(dst, lhs, tmp);
190   }
191 }
192 
193 template <VectorFormat format, ShiftSign sign>
EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst, VRegister lhs, int32_t rhs)194 inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst,
195                                         VRegister lhs, int32_t rhs) {
196   // Sshr and Ushr does not allow shifts to be 0, so check for that here.
197   int mask = LaneSizeInBitsFromFormat(format) - 1;
198   int32_t shift = rhs & mask;
199   if (!shift) {
200     if (dst != lhs) {
201       assm->Mov(dst, lhs);
202     }
203     return;
204   }
205 
206   if (sign == ShiftSign::kSigned) {
207     assm->Sshr(dst, lhs, rhs & mask);
208   } else {
209     assm->Ushr(dst, lhs, rhs & mask);
210   }
211 }
212 
EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src)213 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
214                         LiftoffRegister src) {
215   // AnyTrue does not depend on the number of lanes, so we can use V4S for all.
216   UseScratchRegisterScope scope(assm);
217   VRegister temp = scope.AcquireV(kFormatS);
218   assm->Umaxv(temp, src.fp().V4S());
219   assm->Umov(dst.gp().W(), temp, 0);
220   assm->Cmp(dst.gp().W(), 0);
221   assm->Cset(dst.gp().W(), ne);
222 }
223 
EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src, VectorFormat format)224 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
225                         LiftoffRegister src, VectorFormat format) {
226   UseScratchRegisterScope scope(assm);
227   VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format));
228   assm->Uminv(temp, VRegister::Create(src.fp().code(), format));
229   assm->Umov(dst.gp().W(), temp, 0);
230   assm->Cmp(dst.gp().W(), 0);
231   assm->Cset(dst.gp().W(), ne);
232 }
233 
234 }  // namespace liftoff
235 
PrepareStackFrame()236 int LiftoffAssembler::PrepareStackFrame() {
237   int offset = pc_offset();
238   InstructionAccurateScope scope(this, 1);
239   // Next we reserve the memory for the whole stack frame. We do not know yet
240   // how big the stack frame will be so we just emit a placeholder instruction.
241   // PatchPrepareStackFrame will patch this in order to increase the stack
242   // appropriately.
243   sub(sp, sp, 0);
244   return offset;
245 }
246 
PrepareTailCall(int num_callee_stack_params, int stack_param_delta)247 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
248                                        int stack_param_delta) {
249   UseScratchRegisterScope temps(this);
250   temps.Exclude(x16, x17);
251 
252   // This is the previous stack pointer value (before we push the lr and the
253   // fp). We need to keep it to autenticate the lr and adjust the new stack
254   // pointer afterwards.
255   Add(x16, fp, 16);
256 
257   // Load the fp and lr of the old frame, they will be pushed in the new frame
258   // during the actual call.
259 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
260   Ldp(fp, x17, MemOperand(fp));
261   Autib1716();
262   Mov(lr, x17);
263 #else
264   Ldp(fp, lr, MemOperand(fp));
265 #endif
266 
267   temps.Include(x17);
268 
269   Register scratch = temps.AcquireX();
270 
271   // Shift the whole frame upwards, except for fp and lr.
272   int slot_count = num_callee_stack_params;
273   for (int i = slot_count - 1; i >= 0; --i) {
274     ldr(scratch, MemOperand(sp, i * 8));
275     str(scratch, MemOperand(x16, (i - stack_param_delta) * 8));
276   }
277 
278   // Set the new stack pointer.
279   Sub(sp, x16, stack_param_delta * 8);
280 }
281 
AlignFrameSize()282 void LiftoffAssembler::AlignFrameSize() {
283   // The frame_size includes the frame marker. The frame marker has already been
284   // pushed on the stack though, so we don't need to allocate memory for it
285   // anymore.
286   int initial_frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
287   int frame_size = initial_frame_size;
288 
289   static_assert(kStackSlotSize == kXRegSize,
290                 "kStackSlotSize must equal kXRegSize");
291   // The stack pointer is required to be quadword aligned.
292   // Misalignment will cause a stack alignment fault.
293   frame_size = RoundUp(frame_size, kQuadWordSizeInBytes);
294   if (!IsImmAddSub(frame_size)) {
295     // Round the stack to a page to try to fit a add/sub immediate.
296     frame_size = RoundUp(frame_size, 0x1000);
297     if (!IsImmAddSub(frame_size)) {
298       // Stack greater than 4M! Because this is a quite improbable case, we
299       // just fallback to TurboFan.
300       bailout(kOtherReason, "Stack too big");
301       return;
302     }
303   }
304   if (frame_size > initial_frame_size) {
305     // Record the padding, as it is needed for GC offsets later.
306     max_used_spill_offset_ += (frame_size - initial_frame_size);
307   }
308 }
309 
PatchPrepareStackFrame( int offset, SafepointTableBuilder* safepoint_table_builder)310 void LiftoffAssembler::PatchPrepareStackFrame(
311     int offset, SafepointTableBuilder* safepoint_table_builder) {
312   // The frame_size includes the frame marker and the instance slot. Both are
313   // pushed as part of frame construction, so we don't need to allocate memory
314   // for them anymore.
315   int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
316 
317   // The stack pointer is required to be quadword aligned.
318   // Misalignment will cause a stack alignment fault.
319   DCHECK_EQ(frame_size, RoundUp(frame_size, kQuadWordSizeInBytes));
320   DCHECK(IsImmAddSub(frame_size));
321 
322   PatchingAssembler patching_assembler(AssemblerOptions{},
323                                        buffer_start_ + offset, 1);
324 
325   if (V8_LIKELY(frame_size < 4 * KB)) {
326     // This is the standard case for small frames: just subtract from SP and be
327     // done with it.
328     patching_assembler.PatchSubSp(frame_size);
329     return;
330   }
331 
332   // The frame size is bigger than 4KB, so we might overflow the available stack
333   // space if we first allocate the frame and then do the stack check (we will
334   // need some remaining stack space for throwing the exception). That's why we
335   // check the available stack space before we allocate the frame. To do this we
336   // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does
337   // this "extended stack check".
338   //
339   // The OOL code can simply be generated here with the normal assembler,
340   // because all other code generation, including OOL code, has already finished
341   // when {PatchPrepareStackFrame} is called. The function prologue then jumps
342   // to the current {pc_offset()} to execute the OOL code for allocating the
343   // large frame.
344 
345   // Emit the unconditional branch in the function prologue (from {offset} to
346   // {pc_offset()}).
347   patching_assembler.b((pc_offset() - offset) >> kInstrSizeLog2);
348 
349   // If the frame is bigger than the stack, we throw the stack overflow
350   // exception unconditionally. Thereby we can avoid the integer overflow
351   // check in the condition code.
352   RecordComment("OOL: stack check for large frame");
353   Label continuation;
354   if (frame_size < FLAG_stack_size * 1024) {
355     UseScratchRegisterScope temps(this);
356     Register stack_limit = temps.AcquireX();
357     Ldr(stack_limit,
358         FieldMemOperand(kWasmInstanceRegister,
359                         WasmInstanceObject::kRealStackLimitAddressOffset));
360     Ldr(stack_limit, MemOperand(stack_limit));
361     Add(stack_limit, stack_limit, Operand(frame_size));
362     Cmp(sp, stack_limit);
363     B(hs /* higher or same */, &continuation);
364   }
365 
366   Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
367   // The call will not return; just define an empty safepoint.
368   safepoint_table_builder->DefineSafepoint(this);
369   if (FLAG_debug_code) Brk(0);
370 
371   bind(&continuation);
372 
373   // Now allocate the stack space. Note that this might do more than just
374   // decrementing the SP; consult {TurboAssembler::Claim}.
375   Claim(frame_size, 1);
376 
377   // Jump back to the start of the function, from {pc_offset()} to
378   // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
379   // is a branch now).
380   int func_start_offset = offset + kInstrSize;
381   b((func_start_offset - pc_offset()) >> kInstrSizeLog2);
382 }
383 
FinishCode()384 void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
385 
AbortCompilation()386 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
387 
388 // static
StaticStackFrameSize()389 constexpr int LiftoffAssembler::StaticStackFrameSize() {
390   return liftoff::kTierupBudgetOffset;
391 }
392 
SlotSizeForType(ValueKind kind)393 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
394   // TODO(zhin): Unaligned access typically take additional cycles, we should do
395   // some performance testing to see how big an effect it will take.
396   switch (kind) {
397     case kS128:
398       return value_kind_size(kind);
399     default:
400       return kStackSlotSize;
401   }
402 }
403 
NeedsAlignment(ValueKind kind)404 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
405   return kind == kS128 || is_reference(kind);
406 }
407 
LoadConstant(LiftoffRegister reg, WasmValue value, RelocInfo::Mode rmode)408 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
409                                     RelocInfo::Mode rmode) {
410   switch (value.type().kind()) {
411     case kI32:
412       Mov(reg.gp().W(), Immediate(value.to_i32(), rmode));
413       break;
414     case kI64:
415       Mov(reg.gp().X(), Immediate(value.to_i64(), rmode));
416       break;
417     case kF32:
418       Fmov(reg.fp().S(), value.to_f32_boxed().get_scalar());
419       break;
420     case kF64:
421       Fmov(reg.fp().D(), value.to_f64_boxed().get_scalar());
422       break;
423     default:
424       UNREACHABLE();
425   }
426 }
427 
LoadInstanceFromFrame(Register dst)428 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
429   Ldr(dst, liftoff::GetInstanceOperand());
430 }
431 
LoadFromInstance(Register dst, Register instance, int offset, int size)432 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
433                                         int offset, int size) {
434   DCHECK_LE(0, offset);
435   MemOperand src{instance, offset};
436   switch (size) {
437     case 1:
438       Ldrb(dst.W(), src);
439       break;
440     case 4:
441       Ldr(dst.W(), src);
442       break;
443     case 8:
444       Ldr(dst, src);
445       break;
446     default:
447       UNIMPLEMENTED();
448   }
449 }
450 
LoadTaggedPointerFromInstance(Register dst, Register instance, int offset)451 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
452                                                      Register instance,
453                                                      int offset) {
454   DCHECK_LE(0, offset);
455   LoadTaggedPointerField(dst, MemOperand{instance, offset});
456 }
457 
LoadExternalPointer(Register dst, Register instance, int offset, ExternalPointerTag tag, Register isolate_root)458 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
459                                            int offset, ExternalPointerTag tag,
460                                            Register isolate_root) {
461   LoadExternalPointerField(dst, FieldMemOperand(instance, offset), tag,
462                            isolate_root);
463 }
464 
SpillInstance(Register instance)465 void LiftoffAssembler::SpillInstance(Register instance) {
466   Str(instance, liftoff::GetInstanceOperand());
467 }
468 
ResetOSRTarget()469 void LiftoffAssembler::ResetOSRTarget() {}
470 
LoadTaggedPointer(Register dst, Register src_addr, Register offset_reg, int32_t offset_imm, LiftoffRegList pinned)471 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
472                                          Register offset_reg,
473                                          int32_t offset_imm,
474                                          LiftoffRegList pinned) {
475   UseScratchRegisterScope temps(this);
476   MemOperand src_op =
477       liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
478   LoadTaggedPointerField(dst, src_op);
479 }
480 
LoadFullPointer(Register dst, Register src_addr, int32_t offset_imm)481 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
482                                        int32_t offset_imm) {
483   UseScratchRegisterScope temps(this);
484   MemOperand src_op =
485       liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm);
486   Ldr(dst.X(), src_op);
487 }
488 
StoreTaggedPointer(Register dst_addr, Register offset_reg, int32_t offset_imm, LiftoffRegister src, LiftoffRegList pinned, SkipWriteBarrier skip_write_barrier)489 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
490                                           Register offset_reg,
491                                           int32_t offset_imm,
492                                           LiftoffRegister src,
493                                           LiftoffRegList pinned,
494                                           SkipWriteBarrier skip_write_barrier) {
495   UseScratchRegisterScope temps(this);
496   Operand offset_op = offset_reg.is_valid() ? Operand(offset_reg.W(), UXTW)
497                                             : Operand(offset_imm);
498   // For the write barrier (below), we cannot have both an offset register and
499   // an immediate offset. Add them to a 32-bit offset initially, but in a 64-bit
500   // register, because that's needed in the MemOperand below.
501   if (offset_reg.is_valid() && offset_imm) {
502     Register effective_offset = temps.AcquireX();
503     Add(effective_offset.W(), offset_reg.W(), offset_imm);
504     offset_op = effective_offset;
505   }
506   StoreTaggedField(src.gp(), MemOperand(dst_addr.X(), offset_op));
507 
508   if (skip_write_barrier || FLAG_disable_write_barriers) return;
509 
510   // The write barrier.
511   Label write_barrier;
512   Label exit;
513   CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, eq,
514                 &write_barrier);
515   b(&exit);
516   bind(&write_barrier);
517   JumpIfSmi(src.gp(), &exit);
518   if (COMPRESS_POINTERS_BOOL) {
519     DecompressTaggedPointer(src.gp(), src.gp());
520   }
521   CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, ne,
522                 &exit);
523   CallRecordWriteStubSaveRegisters(
524       dst_addr, offset_op, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
525       StubCallMode::kCallWasmRuntimeStub);
526   bind(&exit);
527 }
528 
Load(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LiftoffRegList pinned, uint32_t* protected_load_pc, bool is_load_mem, bool i64_offset)529 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
530                             Register offset_reg, uintptr_t offset_imm,
531                             LoadType type, LiftoffRegList pinned,
532                             uint32_t* protected_load_pc, bool is_load_mem,
533                             bool i64_offset) {
534   UseScratchRegisterScope temps(this);
535   MemOperand src_op = liftoff::GetMemOp(this, &temps, src_addr, offset_reg,
536                                         offset_imm, i64_offset);
537   if (protected_load_pc) *protected_load_pc = pc_offset();
538   switch (type.value()) {
539     case LoadType::kI32Load8U:
540     case LoadType::kI64Load8U:
541       Ldrb(dst.gp().W(), src_op);
542       break;
543     case LoadType::kI32Load8S:
544       Ldrsb(dst.gp().W(), src_op);
545       break;
546     case LoadType::kI64Load8S:
547       Ldrsb(dst.gp().X(), src_op);
548       break;
549     case LoadType::kI32Load16U:
550     case LoadType::kI64Load16U:
551       Ldrh(dst.gp().W(), src_op);
552       break;
553     case LoadType::kI32Load16S:
554       Ldrsh(dst.gp().W(), src_op);
555       break;
556     case LoadType::kI64Load16S:
557       Ldrsh(dst.gp().X(), src_op);
558       break;
559     case LoadType::kI32Load:
560     case LoadType::kI64Load32U:
561       Ldr(dst.gp().W(), src_op);
562       break;
563     case LoadType::kI64Load32S:
564       Ldrsw(dst.gp().X(), src_op);
565       break;
566     case LoadType::kI64Load:
567       Ldr(dst.gp().X(), src_op);
568       break;
569     case LoadType::kF32Load:
570       Ldr(dst.fp().S(), src_op);
571       break;
572     case LoadType::kF64Load:
573       Ldr(dst.fp().D(), src_op);
574       break;
575     case LoadType::kS128Load:
576       Ldr(dst.fp().Q(), src_op);
577       break;
578   }
579 }
580 
Store(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned, uint32_t* protected_store_pc, bool is_store_mem)581 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
582                              uintptr_t offset_imm, LiftoffRegister src,
583                              StoreType type, LiftoffRegList pinned,
584                              uint32_t* protected_store_pc, bool is_store_mem) {
585   UseScratchRegisterScope temps(this);
586   MemOperand dst_op =
587       liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
588   if (protected_store_pc) *protected_store_pc = pc_offset();
589   switch (type.value()) {
590     case StoreType::kI32Store8:
591     case StoreType::kI64Store8:
592       Strb(src.gp().W(), dst_op);
593       break;
594     case StoreType::kI32Store16:
595     case StoreType::kI64Store16:
596       Strh(src.gp().W(), dst_op);
597       break;
598     case StoreType::kI32Store:
599     case StoreType::kI64Store32:
600       Str(src.gp().W(), dst_op);
601       break;
602     case StoreType::kI64Store:
603       Str(src.gp().X(), dst_op);
604       break;
605     case StoreType::kF32Store:
606       Str(src.fp().S(), dst_op);
607       break;
608     case StoreType::kF64Store:
609       Str(src.fp().D(), dst_op);
610       break;
611     case StoreType::kS128Store:
612       Str(src.fp().Q(), dst_op);
613       break;
614   }
615 }
616 
617 namespace liftoff {
618 #define __ lasm->
619 
CalculateActualAddress(LiftoffAssembler* lasm, Register addr_reg, Register offset_reg, uintptr_t offset_imm, Register result_reg)620 inline Register CalculateActualAddress(LiftoffAssembler* lasm,
621                                        Register addr_reg, Register offset_reg,
622                                        uintptr_t offset_imm,
623                                        Register result_reg) {
624   DCHECK_NE(offset_reg, no_reg);
625   DCHECK_NE(addr_reg, no_reg);
626   __ Add(result_reg, addr_reg, Operand(offset_reg));
627   if (offset_imm != 0) {
628     __ Add(result_reg, result_reg, Operand(offset_imm));
629   }
630   return result_reg;
631 }
632 
633 enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
634 
AtomicBinop(LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type, Binop op)635 inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr,
636                         Register offset_reg, uintptr_t offset_imm,
637                         LiftoffRegister value, LiftoffRegister result,
638                         StoreType type, Binop op) {
639   LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
640   Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
641 
642   // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique.
643   DCHECK(result.gp() != value.gp() && result.gp() != dst_addr &&
644          result.gp() != offset_reg);
645 
646   UseScratchRegisterScope temps(lasm);
647   Register actual_addr = liftoff::CalculateActualAddress(
648       lasm, dst_addr, offset_reg, offset_imm, temps.AcquireX());
649 
650   // Allocate an additional {temp} register to hold the result that should be
651   // stored to memory. Note that {temp} and {store_result} are not allowed to be
652   // the same register.
653   Register temp = temps.AcquireX();
654 
655   Label retry;
656   __ Bind(&retry);
657   switch (type.value()) {
658     case StoreType::kI64Store8:
659     case StoreType::kI32Store8:
660       __ ldaxrb(result.gp().W(), actual_addr);
661       break;
662     case StoreType::kI64Store16:
663     case StoreType::kI32Store16:
664       __ ldaxrh(result.gp().W(), actual_addr);
665       break;
666     case StoreType::kI64Store32:
667     case StoreType::kI32Store:
668       __ ldaxr(result.gp().W(), actual_addr);
669       break;
670     case StoreType::kI64Store:
671       __ ldaxr(result.gp().X(), actual_addr);
672       break;
673     default:
674       UNREACHABLE();
675   }
676 
677   switch (op) {
678     case Binop::kAdd:
679       __ add(temp, result.gp(), value.gp());
680       break;
681     case Binop::kSub:
682       __ sub(temp, result.gp(), value.gp());
683       break;
684     case Binop::kAnd:
685       __ and_(temp, result.gp(), value.gp());
686       break;
687     case Binop::kOr:
688       __ orr(temp, result.gp(), value.gp());
689       break;
690     case Binop::kXor:
691       __ eor(temp, result.gp(), value.gp());
692       break;
693     case Binop::kExchange:
694       __ mov(temp, value.gp());
695       break;
696   }
697 
698   switch (type.value()) {
699     case StoreType::kI64Store8:
700     case StoreType::kI32Store8:
701       __ stlxrb(store_result.W(), temp.W(), actual_addr);
702       break;
703     case StoreType::kI64Store16:
704     case StoreType::kI32Store16:
705       __ stlxrh(store_result.W(), temp.W(), actual_addr);
706       break;
707     case StoreType::kI64Store32:
708     case StoreType::kI32Store:
709       __ stlxr(store_result.W(), temp.W(), actual_addr);
710       break;
711     case StoreType::kI64Store:
712       __ stlxr(store_result.W(), temp.X(), actual_addr);
713       break;
714     default:
715       UNREACHABLE();
716   }
717 
718   __ Cbnz(store_result.W(), &retry);
719 }
720 
721 #undef __
722 }  // namespace liftoff
723 
AtomicLoad(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LiftoffRegList pinned)724 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
725                                   Register offset_reg, uintptr_t offset_imm,
726                                   LoadType type, LiftoffRegList pinned) {
727   UseScratchRegisterScope temps(this);
728   Register src_reg = liftoff::CalculateActualAddress(
729       this, src_addr, offset_reg, offset_imm, temps.AcquireX());
730   switch (type.value()) {
731     case LoadType::kI32Load8U:
732     case LoadType::kI64Load8U:
733       Ldarb(dst.gp().W(), src_reg);
734       return;
735     case LoadType::kI32Load16U:
736     case LoadType::kI64Load16U:
737       Ldarh(dst.gp().W(), src_reg);
738       return;
739     case LoadType::kI32Load:
740     case LoadType::kI64Load32U:
741       Ldar(dst.gp().W(), src_reg);
742       return;
743     case LoadType::kI64Load:
744       Ldar(dst.gp().X(), src_reg);
745       return;
746     default:
747       UNREACHABLE();
748   }
749 }
750 
AtomicStore(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned)751 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
752                                    uintptr_t offset_imm, LiftoffRegister src,
753                                    StoreType type, LiftoffRegList pinned) {
754   UseScratchRegisterScope temps(this);
755   Register dst_reg = liftoff::CalculateActualAddress(
756       this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
757   switch (type.value()) {
758     case StoreType::kI64Store8:
759     case StoreType::kI32Store8:
760       Stlrb(src.gp().W(), dst_reg);
761       return;
762     case StoreType::kI64Store16:
763     case StoreType::kI32Store16:
764       Stlrh(src.gp().W(), dst_reg);
765       return;
766     case StoreType::kI64Store32:
767     case StoreType::kI32Store:
768       Stlr(src.gp().W(), dst_reg);
769       return;
770     case StoreType::kI64Store:
771       Stlr(src.gp().X(), dst_reg);
772       return;
773     default:
774       UNREACHABLE();
775   }
776 }
777 
AtomicAdd(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)778 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
779                                  uintptr_t offset_imm, LiftoffRegister value,
780                                  LiftoffRegister result, StoreType type) {
781   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
782                        type, liftoff::Binop::kAdd);
783 }
784 
AtomicSub(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)785 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
786                                  uintptr_t offset_imm, LiftoffRegister value,
787                                  LiftoffRegister result, StoreType type) {
788   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
789                        type, liftoff::Binop::kSub);
790 }
791 
AtomicAnd(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)792 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
793                                  uintptr_t offset_imm, LiftoffRegister value,
794                                  LiftoffRegister result, StoreType type) {
795   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
796                        type, liftoff::Binop::kAnd);
797 }
798 
AtomicOr(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)799 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
800                                 uintptr_t offset_imm, LiftoffRegister value,
801                                 LiftoffRegister result, StoreType type) {
802   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
803                        type, liftoff::Binop::kOr);
804 }
805 
AtomicXor(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)806 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
807                                  uintptr_t offset_imm, LiftoffRegister value,
808                                  LiftoffRegister result, StoreType type) {
809   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
810                        type, liftoff::Binop::kXor);
811 }
812 
AtomicExchange(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)813 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
814                                       uintptr_t offset_imm,
815                                       LiftoffRegister value,
816                                       LiftoffRegister result, StoreType type) {
817   liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
818                        type, liftoff::Binop::kExchange);
819 }
820 
AtomicCompareExchange( Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, StoreType type)821 void LiftoffAssembler::AtomicCompareExchange(
822     Register dst_addr, Register offset_reg, uintptr_t offset_imm,
823     LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
824     StoreType type) {
825   LiftoffRegList pinned = {dst_addr, offset_reg, expected, new_value};
826 
827   Register result_reg = result.gp();
828   if (pinned.has(result)) {
829     result_reg = GetUnusedRegister(kGpReg, pinned).gp();
830   }
831 
832   UseScratchRegisterScope temps(this);
833 
834   Register actual_addr = liftoff::CalculateActualAddress(
835       this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
836 
837   Register store_result = temps.AcquireW();
838 
839   Label retry;
840   Label done;
841   Bind(&retry);
842   switch (type.value()) {
843     case StoreType::kI64Store8:
844     case StoreType::kI32Store8:
845       ldaxrb(result_reg.W(), actual_addr);
846       Cmp(result.gp().W(), Operand(expected.gp().W(), UXTB));
847       B(ne, &done);
848       stlxrb(store_result.W(), new_value.gp().W(), actual_addr);
849       break;
850     case StoreType::kI64Store16:
851     case StoreType::kI32Store16:
852       ldaxrh(result_reg.W(), actual_addr);
853       Cmp(result.gp().W(), Operand(expected.gp().W(), UXTH));
854       B(ne, &done);
855       stlxrh(store_result.W(), new_value.gp().W(), actual_addr);
856       break;
857     case StoreType::kI64Store32:
858     case StoreType::kI32Store:
859       ldaxr(result_reg.W(), actual_addr);
860       Cmp(result.gp().W(), Operand(expected.gp().W(), UXTW));
861       B(ne, &done);
862       stlxr(store_result.W(), new_value.gp().W(), actual_addr);
863       break;
864     case StoreType::kI64Store:
865       ldaxr(result_reg.X(), actual_addr);
866       Cmp(result.gp().X(), Operand(expected.gp().X(), UXTX));
867       B(ne, &done);
868       stlxr(store_result.W(), new_value.gp().X(), actual_addr);
869       break;
870     default:
871       UNREACHABLE();
872   }
873 
874   Cbnz(store_result.W(), &retry);
875   Bind(&done);
876 
877   if (result_reg != result.gp()) {
878     mov(result.gp(), result_reg);
879   }
880 }
881 
AtomicFence()882 void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); }
883 
LoadCallerFrameSlot(LiftoffRegister dst, uint32_t caller_slot_idx, ValueKind kind)884 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
885                                            uint32_t caller_slot_idx,
886                                            ValueKind kind) {
887   int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
888   Ldr(liftoff::GetRegFromType(dst, kind), MemOperand(fp, offset));
889 }
890 
StoreCallerFrameSlot(LiftoffRegister src, uint32_t caller_slot_idx, ValueKind kind)891 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
892                                             uint32_t caller_slot_idx,
893                                             ValueKind kind) {
894   int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
895   Str(liftoff::GetRegFromType(src, kind), MemOperand(fp, offset));
896 }
897 
LoadReturnStackSlot(LiftoffRegister dst, int offset, ValueKind kind)898 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
899                                            ValueKind kind) {
900   Ldr(liftoff::GetRegFromType(dst, kind), MemOperand(sp, offset));
901 }
902 
MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueKind kind)903 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
904                                       ValueKind kind) {
905   UseScratchRegisterScope temps(this);
906   CPURegister scratch = liftoff::AcquireByType(&temps, kind);
907   Ldr(scratch, liftoff::GetStackSlot(src_offset));
908   Str(scratch, liftoff::GetStackSlot(dst_offset));
909 }
910 
Move(Register dst, Register src, ValueKind kind)911 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
912   if (kind == kI32) {
913     Mov(dst.W(), src.W());
914   } else {
915     DCHECK(kI64 == kind || is_reference(kind));
916     Mov(dst.X(), src.X());
917   }
918 }
919 
Move(DoubleRegister dst, DoubleRegister src, ValueKind kind)920 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
921                             ValueKind kind) {
922   if (kind == kF32) {
923     Fmov(dst.S(), src.S());
924   } else if (kind == kF64) {
925     Fmov(dst.D(), src.D());
926   } else {
927     DCHECK_EQ(kS128, kind);
928     Mov(dst.Q(), src.Q());
929   }
930 }
931 
Spill(int offset, LiftoffRegister reg, ValueKind kind)932 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
933   RecordUsedSpillOffset(offset);
934   MemOperand dst = liftoff::GetStackSlot(offset);
935   Str(liftoff::GetRegFromType(reg, kind), dst);
936 }
937 
Spill(int offset, WasmValue value)938 void LiftoffAssembler::Spill(int offset, WasmValue value) {
939   RecordUsedSpillOffset(offset);
940   MemOperand dst = liftoff::GetStackSlot(offset);
941   UseScratchRegisterScope temps(this);
942   CPURegister src = CPURegister::no_reg();
943   switch (value.type().kind()) {
944     case kI32:
945       if (value.to_i32() == 0) {
946         src = wzr;
947       } else {
948         src = temps.AcquireW();
949         Mov(src.W(), value.to_i32());
950       }
951       break;
952     case kI64:
953       if (value.to_i64() == 0) {
954         src = xzr;
955       } else {
956         src = temps.AcquireX();
957         Mov(src.X(), value.to_i64());
958       }
959       break;
960     default:
961       // We do not track f32 and f64 constants, hence they are unreachable.
962       UNREACHABLE();
963   }
964   Str(src, dst);
965 }
966 
Fill(LiftoffRegister reg, int offset, ValueKind kind)967 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
968   MemOperand src = liftoff::GetStackSlot(offset);
969   Ldr(liftoff::GetRegFromType(reg, kind), src);
970 }
971 
FillI64Half(Register, int offset, RegPairHalf)972 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
973   UNREACHABLE();
974 }
975 
FillStackSlotsWithZero(int start, int size)976 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
977   // Zero 'size' bytes *below* start, byte at offset 'start' is untouched.
978   DCHECK_LE(0, start);
979   DCHECK_LT(0, size);
980   DCHECK_EQ(0, size % 4);
981   RecordUsedSpillOffset(start + size);
982 
983   int max_stp_offset = -start - size;
984   // We check IsImmLSUnscaled(-start-12) because str only allows for unscaled
985   // 9-bit immediate offset [-256,256]. If start is large enough, which can
986   // happen when a function has many params (>=32 i64), str cannot be encoded
987   // properly. We can use Str, which will generate more instructions, so
988   // fallback to the general case below.
989   if (size <= 12 * kStackSlotSize &&
990       IsImmLSPair(max_stp_offset, kXRegSizeLog2) &&
991       IsImmLSUnscaled(-start - 12)) {
992     // Special straight-line code for up to 12 slots. Generates one
993     // instruction per two slots (<= 7 instructions total).
994     STATIC_ASSERT(kStackSlotSize == kSystemPointerSize);
995     uint32_t remainder = size;
996     for (; remainder >= 2 * kStackSlotSize; remainder -= 2 * kStackSlotSize) {
997       stp(xzr, xzr, liftoff::GetStackSlot(start + remainder));
998     }
999 
1000     DCHECK_GE(12, remainder);
1001     switch (remainder) {
1002       case 12:
1003         str(xzr, liftoff::GetStackSlot(start + remainder));
1004         str(wzr, liftoff::GetStackSlot(start + remainder - 8));
1005         break;
1006       case 8:
1007         str(xzr, liftoff::GetStackSlot(start + remainder));
1008         break;
1009       case 4:
1010         str(wzr, liftoff::GetStackSlot(start + remainder));
1011         break;
1012       case 0:
1013         break;
1014       default:
1015         UNREACHABLE();
1016     }
1017   } else {
1018     // General case for bigger counts (5-8 instructions).
1019     UseScratchRegisterScope temps(this);
1020     Register address_reg = temps.AcquireX();
1021     // This {Sub} might use another temp register if the offset is too large.
1022     Sub(address_reg, fp, start + size);
1023     Register count_reg = temps.AcquireX();
1024     Mov(count_reg, size / 4);
1025 
1026     Label loop;
1027     bind(&loop);
1028     sub(count_reg, count_reg, 1);
1029     str(wzr, MemOperand(address_reg, kSystemPointerSize / 2, PostIndex));
1030     cbnz(count_reg, &loop);
1031   }
1032 }
1033 
1034 #define I32_BINOP(name, instruction)                             \
1035   void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
1036                                      Register rhs) {             \
1037     instruction(dst.W(), lhs.W(), rhs.W());                      \
1038   }
1039 #define I32_BINOP_I(name, instruction)                              \
1040   I32_BINOP(name, instruction)                                      \
1041   void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
1042                                         int32_t imm) {              \
1043     instruction(dst.W(), lhs.W(), Immediate(imm));                  \
1044   }
1045 #define I64_BINOP(name, instruction)                                           \
1046   void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
1047                                      LiftoffRegister rhs) {                    \
1048     instruction(dst.gp().X(), lhs.gp().X(), rhs.gp().X());                     \
1049   }
1050 #define I64_BINOP_I(name, instruction)                                      \
1051   I64_BINOP(name, instruction)                                              \
1052   void LiftoffAssembler::emit_##name##i(LiftoffRegister dst,                \
1053                                         LiftoffRegister lhs, int32_t imm) { \
1054     instruction(dst.gp().X(), lhs.gp().X(), imm);                           \
1055   }
1056 #define FP32_BINOP(name, instruction)                                        \
1057   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1058                                      DoubleRegister rhs) {                   \
1059     instruction(dst.S(), lhs.S(), rhs.S());                                  \
1060   }
1061 #define FP32_UNOP(name, instruction)                                           \
1062   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1063     instruction(dst.S(), src.S());                                             \
1064   }
1065 #define FP32_UNOP_RETURN_TRUE(name, instruction)                               \
1066   bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1067     instruction(dst.S(), src.S());                                             \
1068     return true;                                                               \
1069   }
1070 #define FP64_BINOP(name, instruction)                                        \
1071   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1072                                      DoubleRegister rhs) {                   \
1073     instruction(dst.D(), lhs.D(), rhs.D());                                  \
1074   }
1075 #define FP64_UNOP(name, instruction)                                           \
1076   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1077     instruction(dst.D(), src.D());                                             \
1078   }
1079 #define FP64_UNOP_RETURN_TRUE(name, instruction)                               \
1080   bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1081     instruction(dst.D(), src.D());                                             \
1082     return true;                                                               \
1083   }
1084 #define I32_SHIFTOP(name, instruction)                              \
1085   void LiftoffAssembler::emit_##name(Register dst, Register src,    \
1086                                      Register amount) {             \
1087     instruction(dst.W(), src.W(), amount.W());                      \
1088   }                                                                 \
1089   void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
1090                                         int32_t amount) {           \
1091     instruction(dst.W(), src.W(), amount & 31);                     \
1092   }
1093 #define I64_SHIFTOP(name, instruction)                                         \
1094   void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
1095                                      Register amount) {                        \
1096     instruction(dst.gp().X(), src.gp().X(), amount.X());                       \
1097   }                                                                            \
1098   void LiftoffAssembler::emit_##name##i(LiftoffRegister dst,                   \
1099                                         LiftoffRegister src, int32_t amount) { \
1100     instruction(dst.gp().X(), src.gp().X(), amount & 63);                      \
1101   }
1102 
1103 I32_BINOP_I(i32_add, Add)
1104 I32_BINOP_I(i32_sub, Sub)
1105 I32_BINOP(i32_mul, Mul)
1106 I32_BINOP_I(i32_and, And)
1107 I32_BINOP_I(i32_or, Orr)
1108 I32_BINOP_I(i32_xor, Eor)
1109 I32_SHIFTOP(i32_shl, Lsl)
1110 I32_SHIFTOP(i32_sar, Asr)
1111 I32_SHIFTOP(i32_shr, Lsr)
1112 I64_BINOP(i64_add, Add)
1113 I64_BINOP(i64_sub, Sub)
1114 I64_BINOP(i64_mul, Mul)
1115 I64_BINOP_I(i64_and, And)
1116 I64_BINOP_I(i64_or, Orr)
1117 I64_BINOP_I(i64_xor, Eor)
1118 I64_SHIFTOP(i64_shl, Lsl)
1119 I64_SHIFTOP(i64_sar, Asr)
1120 I64_SHIFTOP(i64_shr, Lsr)
1121 FP32_BINOP(f32_add, Fadd)
1122 FP32_BINOP(f32_sub, Fsub)
1123 FP32_BINOP(f32_mul, Fmul)
1124 FP32_BINOP(f32_div, Fdiv)
1125 FP32_BINOP(f32_min, Fmin)
1126 FP32_BINOP(f32_max, Fmax)
1127 FP32_UNOP(f32_abs, Fabs)
1128 FP32_UNOP(f32_neg, Fneg)
1129 FP32_UNOP_RETURN_TRUE(f32_ceil, Frintp)
1130 FP32_UNOP_RETURN_TRUE(f32_floor, Frintm)
1131 FP32_UNOP_RETURN_TRUE(f32_trunc, Frintz)
1132 FP32_UNOP_RETURN_TRUE(f32_nearest_int, Frintn)
1133 FP32_UNOP(f32_sqrt, Fsqrt)
1134 FP64_BINOP(f64_add, Fadd)
1135 FP64_BINOP(f64_sub, Fsub)
1136 FP64_BINOP(f64_mul, Fmul)
1137 FP64_BINOP(f64_div, Fdiv)
1138 FP64_BINOP(f64_min, Fmin)
1139 FP64_BINOP(f64_max, Fmax)
1140 FP64_UNOP(f64_abs, Fabs)
1141 FP64_UNOP(f64_neg, Fneg)
1142 FP64_UNOP_RETURN_TRUE(f64_ceil, Frintp)
1143 FP64_UNOP_RETURN_TRUE(f64_floor, Frintm)
1144 FP64_UNOP_RETURN_TRUE(f64_trunc, Frintz)
1145 FP64_UNOP_RETURN_TRUE(f64_nearest_int, Frintn)
1146 FP64_UNOP(f64_sqrt, Fsqrt)
1147 
1148 #undef I32_BINOP
1149 #undef I64_BINOP
1150 #undef FP32_BINOP
1151 #undef FP32_UNOP
1152 #undef FP64_BINOP
1153 #undef FP64_UNOP
1154 #undef FP64_UNOP_RETURN_TRUE
1155 #undef I32_SHIFTOP
1156 #undef I64_SHIFTOP
1157 
emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs, int64_t imm)1158 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1159                                      int64_t imm) {
1160   Add(dst.gp().X(), lhs.gp().X(), imm);
1161 }
1162 
emit_i32_clz(Register dst, Register src)1163 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1164   Clz(dst.W(), src.W());
1165 }
1166 
emit_i32_ctz(Register dst, Register src)1167 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1168   Rbit(dst.W(), src.W());
1169   Clz(dst.W(), dst.W());
1170 }
1171 
emit_i32_popcnt(Register dst, Register src)1172 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1173   PopcntHelper(dst.W(), src.W());
1174   return true;
1175 }
1176 
emit_i64_clz(LiftoffRegister dst, LiftoffRegister src)1177 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1178   Clz(dst.gp().X(), src.gp().X());
1179 }
1180 
emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src)1181 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1182   Rbit(dst.gp().X(), src.gp().X());
1183   Clz(dst.gp().X(), dst.gp().X());
1184 }
1185 
emit_i64_popcnt(LiftoffRegister dst, LiftoffRegister src)1186 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1187                                        LiftoffRegister src) {
1188   PopcntHelper(dst.gp().X(), src.gp().X());
1189   return true;
1190 }
1191 
IncrementSmi(LiftoffRegister dst, int offset)1192 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1193   UseScratchRegisterScope temps(this);
1194   if (COMPRESS_POINTERS_BOOL) {
1195     DCHECK(SmiValuesAre31Bits());
1196     Register scratch = temps.AcquireW();
1197     Ldr(scratch, MemOperand(dst.gp(), offset));
1198     Add(scratch, scratch, Operand(Smi::FromInt(1)));
1199     Str(scratch, MemOperand(dst.gp(), offset));
1200   } else {
1201     Register scratch = temps.AcquireX();
1202     SmiUntag(scratch, MemOperand(dst.gp(), offset));
1203     Add(scratch, scratch, Operand(1));
1204     SmiTag(scratch);
1205     Str(scratch, MemOperand(dst.gp(), offset));
1206   }
1207 }
1208 
emit_i32_divs(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable)1209 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1210                                      Label* trap_div_by_zero,
1211                                      Label* trap_div_unrepresentable) {
1212   Register dst_w = dst.W();
1213   Register lhs_w = lhs.W();
1214   Register rhs_w = rhs.W();
1215   bool can_use_dst = !dst_w.Aliases(lhs_w) && !dst_w.Aliases(rhs_w);
1216   if (can_use_dst) {
1217     // Do div early.
1218     Sdiv(dst_w, lhs_w, rhs_w);
1219   }
1220   // Check for division by zero.
1221   Cbz(rhs_w, trap_div_by_zero);
1222   // Check for kMinInt / -1. This is unrepresentable.
1223   Cmp(rhs_w, -1);
1224   Ccmp(lhs_w, 1, NoFlag, eq);
1225   B(trap_div_unrepresentable, vs);
1226   if (!can_use_dst) {
1227     // Do div.
1228     Sdiv(dst_w, lhs_w, rhs_w);
1229   }
1230 }
1231 
emit_i32_divu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1232 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1233                                      Label* trap_div_by_zero) {
1234   // Check for division by zero.
1235   Cbz(rhs.W(), trap_div_by_zero);
1236   // Do div.
1237   Udiv(dst.W(), lhs.W(), rhs.W());
1238 }
1239 
emit_i32_rems(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1240 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1241                                      Label* trap_div_by_zero) {
1242   Register dst_w = dst.W();
1243   Register lhs_w = lhs.W();
1244   Register rhs_w = rhs.W();
1245   // Do early div.
1246   // No need to check kMinInt / -1 because the result is kMinInt and then
1247   // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1248   UseScratchRegisterScope temps(this);
1249   Register scratch = temps.AcquireW();
1250   Sdiv(scratch, lhs_w, rhs_w);
1251   // Check for division by zero.
1252   Cbz(rhs_w, trap_div_by_zero);
1253   // Compute remainder.
1254   Msub(dst_w, scratch, rhs_w, lhs_w);
1255 }
1256 
emit_i32_remu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1257 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1258                                      Label* trap_div_by_zero) {
1259   Register dst_w = dst.W();
1260   Register lhs_w = lhs.W();
1261   Register rhs_w = rhs.W();
1262   // Do early div.
1263   UseScratchRegisterScope temps(this);
1264   Register scratch = temps.AcquireW();
1265   Udiv(scratch, lhs_w, rhs_w);
1266   // Check for division by zero.
1267   Cbz(rhs_w, trap_div_by_zero);
1268   // Compute remainder.
1269   Msub(dst_w, scratch, rhs_w, lhs_w);
1270 }
1271 
emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable)1272 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1273                                      LiftoffRegister rhs,
1274                                      Label* trap_div_by_zero,
1275                                      Label* trap_div_unrepresentable) {
1276   Register dst_x = dst.gp().X();
1277   Register lhs_x = lhs.gp().X();
1278   Register rhs_x = rhs.gp().X();
1279   bool can_use_dst = !dst_x.Aliases(lhs_x) && !dst_x.Aliases(rhs_x);
1280   if (can_use_dst) {
1281     // Do div early.
1282     Sdiv(dst_x, lhs_x, rhs_x);
1283   }
1284   // Check for division by zero.
1285   Cbz(rhs_x, trap_div_by_zero);
1286   // Check for kMinInt / -1. This is unrepresentable.
1287   Cmp(rhs_x, -1);
1288   Ccmp(lhs_x, 1, NoFlag, eq);
1289   B(trap_div_unrepresentable, vs);
1290   if (!can_use_dst) {
1291     // Do div.
1292     Sdiv(dst_x, lhs_x, rhs_x);
1293   }
1294   return true;
1295 }
1296 
emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1297 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1298                                      LiftoffRegister rhs,
1299                                      Label* trap_div_by_zero) {
1300   // Check for division by zero.
1301   Cbz(rhs.gp().X(), trap_div_by_zero);
1302   // Do div.
1303   Udiv(dst.gp().X(), lhs.gp().X(), rhs.gp().X());
1304   return true;
1305 }
1306 
emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1307 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1308                                      LiftoffRegister rhs,
1309                                      Label* trap_div_by_zero) {
1310   Register dst_x = dst.gp().X();
1311   Register lhs_x = lhs.gp().X();
1312   Register rhs_x = rhs.gp().X();
1313   // Do early div.
1314   // No need to check kMinInt / -1 because the result is kMinInt and then
1315   // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1316   UseScratchRegisterScope temps(this);
1317   Register scratch = temps.AcquireX();
1318   Sdiv(scratch, lhs_x, rhs_x);
1319   // Check for division by zero.
1320   Cbz(rhs_x, trap_div_by_zero);
1321   // Compute remainder.
1322   Msub(dst_x, scratch, rhs_x, lhs_x);
1323   return true;
1324 }
1325 
emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1326 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1327                                      LiftoffRegister rhs,
1328                                      Label* trap_div_by_zero) {
1329   Register dst_x = dst.gp().X();
1330   Register lhs_x = lhs.gp().X();
1331   Register rhs_x = rhs.gp().X();
1332   // Do early div.
1333   UseScratchRegisterScope temps(this);
1334   Register scratch = temps.AcquireX();
1335   Udiv(scratch, lhs_x, rhs_x);
1336   // Check for division by zero.
1337   Cbz(rhs_x, trap_div_by_zero);
1338   // Compute remainder.
1339   Msub(dst_x, scratch, rhs_x, lhs_x);
1340   return true;
1341 }
1342 
emit_u32_to_uintptr(Register dst, Register src)1343 void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
1344   Uxtw(dst, src);
1345 }
1346 
emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs)1347 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1348                                          DoubleRegister rhs) {
1349   UseScratchRegisterScope temps(this);
1350   DoubleRegister scratch = temps.AcquireD();
1351   Ushr(scratch.V2S(), rhs.V2S(), 31);
1352   if (dst != lhs) {
1353     Fmov(dst.S(), lhs.S());
1354   }
1355   Sli(dst.V2S(), scratch.V2S(), 31);
1356 }
1357 
emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs)1358 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1359                                          DoubleRegister rhs) {
1360   UseScratchRegisterScope temps(this);
1361   DoubleRegister scratch = temps.AcquireD();
1362   Ushr(scratch.V1D(), rhs.V1D(), 63);
1363   if (dst != lhs) {
1364     Fmov(dst.D(), lhs.D());
1365   }
1366   Sli(dst.V1D(), scratch.V1D(), 63);
1367 }
1368 
emit_type_conversion(WasmOpcode opcode, LiftoffRegister dst, LiftoffRegister src, Label* trap)1369 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1370                                             LiftoffRegister dst,
1371                                             LiftoffRegister src, Label* trap) {
1372   switch (opcode) {
1373     case kExprI32ConvertI64:
1374       Mov(dst.gp().W(), src.gp().W());
1375       return true;
1376     case kExprI32SConvertF32:
1377       Fcvtzs(dst.gp().W(), src.fp().S());  // f32 -> i32 round to zero.
1378       // Check underflow and NaN.
1379       Fcmp(src.fp().S(), static_cast<float>(INT32_MIN));
1380       // Check overflow.
1381       Ccmp(dst.gp().W(), -1, VFlag, ge);
1382       B(trap, vs);
1383       return true;
1384     case kExprI32UConvertF32:
1385       Fcvtzu(dst.gp().W(), src.fp().S());  // f32 -> i32 round to zero.
1386       // Check underflow and NaN.
1387       Fcmp(src.fp().S(), -1.0);
1388       // Check overflow.
1389       Ccmp(dst.gp().W(), -1, ZFlag, gt);
1390       B(trap, eq);
1391       return true;
1392     case kExprI32SConvertF64: {
1393       // INT32_MIN and INT32_MAX are valid results, we cannot test the result
1394       // to detect the overflows. We could have done two immediate floating
1395       // point comparisons but it would have generated two conditional branches.
1396       UseScratchRegisterScope temps(this);
1397       VRegister fp_ref = temps.AcquireD();
1398       VRegister fp_cmp = temps.AcquireD();
1399       Fcvtzs(dst.gp().W(), src.fp().D());  // f64 -> i32 round to zero.
1400       Frintz(fp_ref, src.fp().D());        // f64 -> f64 round to zero.
1401       Scvtf(fp_cmp, dst.gp().W());         // i32 -> f64.
1402       // If comparison fails, we have an overflow or a NaN.
1403       Fcmp(fp_cmp, fp_ref);
1404       B(trap, ne);
1405       return true;
1406     }
1407     case kExprI32UConvertF64: {
1408       // INT32_MAX is a valid result, we cannot test the result to detect the
1409       // overflows. We could have done two immediate floating point comparisons
1410       // but it would have generated two conditional branches.
1411       UseScratchRegisterScope temps(this);
1412       VRegister fp_ref = temps.AcquireD();
1413       VRegister fp_cmp = temps.AcquireD();
1414       Fcvtzu(dst.gp().W(), src.fp().D());  // f64 -> i32 round to zero.
1415       Frintz(fp_ref, src.fp().D());        // f64 -> f64 round to zero.
1416       Ucvtf(fp_cmp, dst.gp().W());         // i32 -> f64.
1417       // If comparison fails, we have an overflow or a NaN.
1418       Fcmp(fp_cmp, fp_ref);
1419       B(trap, ne);
1420       return true;
1421     }
1422     case kExprI32SConvertSatF32:
1423       Fcvtzs(dst.gp().W(), src.fp().S());
1424       return true;
1425     case kExprI32UConvertSatF32:
1426       Fcvtzu(dst.gp().W(), src.fp().S());
1427       return true;
1428     case kExprI32SConvertSatF64:
1429       Fcvtzs(dst.gp().W(), src.fp().D());
1430       return true;
1431     case kExprI32UConvertSatF64:
1432       Fcvtzu(dst.gp().W(), src.fp().D());
1433       return true;
1434     case kExprI64SConvertSatF32:
1435       Fcvtzs(dst.gp().X(), src.fp().S());
1436       return true;
1437     case kExprI64UConvertSatF32:
1438       Fcvtzu(dst.gp().X(), src.fp().S());
1439       return true;
1440     case kExprI64SConvertSatF64:
1441       Fcvtzs(dst.gp().X(), src.fp().D());
1442       return true;
1443     case kExprI64UConvertSatF64:
1444       Fcvtzu(dst.gp().X(), src.fp().D());
1445       return true;
1446     case kExprI32ReinterpretF32:
1447       Fmov(dst.gp().W(), src.fp().S());
1448       return true;
1449     case kExprI64SConvertI32:
1450       Sxtw(dst.gp().X(), src.gp().W());
1451       return true;
1452     case kExprI64SConvertF32:
1453       Fcvtzs(dst.gp().X(), src.fp().S());  // f32 -> i64 round to zero.
1454       // Check underflow and NaN.
1455       Fcmp(src.fp().S(), static_cast<float>(INT64_MIN));
1456       // Check overflow.
1457       Ccmp(dst.gp().X(), -1, VFlag, ge);
1458       B(trap, vs);
1459       return true;
1460     case kExprI64UConvertF32:
1461       Fcvtzu(dst.gp().X(), src.fp().S());  // f32 -> i64 round to zero.
1462       // Check underflow and NaN.
1463       Fcmp(src.fp().S(), -1.0);
1464       // Check overflow.
1465       Ccmp(dst.gp().X(), -1, ZFlag, gt);
1466       B(trap, eq);
1467       return true;
1468     case kExprI64SConvertF64:
1469       Fcvtzs(dst.gp().X(), src.fp().D());  // f64 -> i64 round to zero.
1470       // Check underflow and NaN.
1471       Fcmp(src.fp().D(), static_cast<float>(INT64_MIN));
1472       // Check overflow.
1473       Ccmp(dst.gp().X(), -1, VFlag, ge);
1474       B(trap, vs);
1475       return true;
1476     case kExprI64UConvertF64:
1477       Fcvtzu(dst.gp().X(), src.fp().D());  // f64 -> i64 round to zero.
1478       // Check underflow and NaN.
1479       Fcmp(src.fp().D(), -1.0);
1480       // Check overflow.
1481       Ccmp(dst.gp().X(), -1, ZFlag, gt);
1482       B(trap, eq);
1483       return true;
1484     case kExprI64UConvertI32:
1485       Mov(dst.gp().W(), src.gp().W());
1486       return true;
1487     case kExprI64ReinterpretF64:
1488       Fmov(dst.gp().X(), src.fp().D());
1489       return true;
1490     case kExprF32SConvertI32:
1491       Scvtf(dst.fp().S(), src.gp().W());
1492       return true;
1493     case kExprF32UConvertI32:
1494       Ucvtf(dst.fp().S(), src.gp().W());
1495       return true;
1496     case kExprF32SConvertI64:
1497       Scvtf(dst.fp().S(), src.gp().X());
1498       return true;
1499     case kExprF32UConvertI64:
1500       Ucvtf(dst.fp().S(), src.gp().X());
1501       return true;
1502     case kExprF32ConvertF64:
1503       Fcvt(dst.fp().S(), src.fp().D());
1504       return true;
1505     case kExprF32ReinterpretI32:
1506       Fmov(dst.fp().S(), src.gp().W());
1507       return true;
1508     case kExprF64SConvertI32:
1509       Scvtf(dst.fp().D(), src.gp().W());
1510       return true;
1511     case kExprF64UConvertI32:
1512       Ucvtf(dst.fp().D(), src.gp().W());
1513       return true;
1514     case kExprF64SConvertI64:
1515       Scvtf(dst.fp().D(), src.gp().X());
1516       return true;
1517     case kExprF64UConvertI64:
1518       Ucvtf(dst.fp().D(), src.gp().X());
1519       return true;
1520     case kExprF64ConvertF32:
1521       Fcvt(dst.fp().D(), src.fp().S());
1522       return true;
1523     case kExprF64ReinterpretI64:
1524       Fmov(dst.fp().D(), src.gp().X());
1525       return true;
1526     default:
1527       UNREACHABLE();
1528   }
1529 }
1530 
emit_i32_signextend_i8(Register dst, Register src)1531 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
1532   sxtb(dst.W(), src.W());
1533 }
1534 
emit_i32_signextend_i16(Register dst, Register src)1535 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
1536   sxth(dst.W(), src.W());
1537 }
1538 
emit_i64_signextend_i8(LiftoffRegister dst, LiftoffRegister src)1539 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
1540                                               LiftoffRegister src) {
1541   sxtb(dst.gp(), src.gp());
1542 }
1543 
emit_i64_signextend_i16(LiftoffRegister dst, LiftoffRegister src)1544 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
1545                                                LiftoffRegister src) {
1546   sxth(dst.gp(), src.gp());
1547 }
1548 
emit_i64_signextend_i32(LiftoffRegister dst, LiftoffRegister src)1549 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
1550                                                LiftoffRegister src) {
1551   sxtw(dst.gp(), src.gp());
1552 }
1553 
emit_jump(Label* label)1554 void LiftoffAssembler::emit_jump(Label* label) { B(label); }
1555 
emit_jump(Register target)1556 void LiftoffAssembler::emit_jump(Register target) { Br(target); }
1557 
emit_cond_jump(LiftoffCondition liftoff_cond, Label* label, ValueKind kind, Register lhs, Register rhs)1558 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
1559                                       Label* label, ValueKind kind,
1560                                       Register lhs, Register rhs) {
1561   Condition cond = liftoff::ToCondition(liftoff_cond);
1562   switch (kind) {
1563     case kI32:
1564       if (rhs.is_valid()) {
1565         Cmp(lhs.W(), rhs.W());
1566       } else {
1567         Cmp(lhs.W(), wzr);
1568       }
1569       break;
1570     case kRef:
1571     case kOptRef:
1572     case kRtt:
1573       DCHECK(rhs.is_valid());
1574       DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
1575       V8_FALLTHROUGH;
1576     case kI64:
1577       if (rhs.is_valid()) {
1578         Cmp(lhs.X(), rhs.X());
1579       } else {
1580         Cmp(lhs.X(), xzr);
1581       }
1582       break;
1583     default:
1584       UNREACHABLE();
1585   }
1586   B(label, cond);
1587 }
1588 
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, Label* label, Register lhs, int32_t imm)1589 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
1590                                            Label* label, Register lhs,
1591                                            int32_t imm) {
1592   Condition cond = liftoff::ToCondition(liftoff_cond);
1593   Cmp(lhs.W(), Operand(imm));
1594   B(label, cond);
1595 }
1596 
emit_i32_subi_jump_negative(Register value, int subtrahend, Label* result_negative)1597 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
1598                                                    int subtrahend,
1599                                                    Label* result_negative) {
1600   Subs(value.W(), value.W(), Immediate(subtrahend));
1601   B(result_negative, mi);
1602 }
1603 
emit_i32_eqz(Register dst, Register src)1604 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
1605   Cmp(src.W(), wzr);
1606   Cset(dst.W(), eq);
1607 }
1608 
emit_i32_set_cond(LiftoffCondition liftoff_cond, Register dst, Register lhs, Register rhs)1609 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
1610                                          Register dst, Register lhs,
1611                                          Register rhs) {
1612   Condition cond = liftoff::ToCondition(liftoff_cond);
1613   Cmp(lhs.W(), rhs.W());
1614   Cset(dst.W(), cond);
1615 }
1616 
emit_i64_eqz(Register dst, LiftoffRegister src)1617 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
1618   Cmp(src.gp().X(), xzr);
1619   Cset(dst.W(), eq);
1620 }
1621 
emit_i64_set_cond(LiftoffCondition liftoff_cond, Register dst, LiftoffRegister lhs, LiftoffRegister rhs)1622 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
1623                                          Register dst, LiftoffRegister lhs,
1624                                          LiftoffRegister rhs) {
1625   Condition cond = liftoff::ToCondition(liftoff_cond);
1626   Cmp(lhs.gp().X(), rhs.gp().X());
1627   Cset(dst.W(), cond);
1628 }
1629 
emit_f32_set_cond(LiftoffCondition liftoff_cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)1630 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
1631                                          Register dst, DoubleRegister lhs,
1632                                          DoubleRegister rhs) {
1633   Condition cond = liftoff::ToCondition(liftoff_cond);
1634   Fcmp(lhs.S(), rhs.S());
1635   Cset(dst.W(), cond);
1636   if (cond != ne) {
1637     // If V flag set, at least one of the arguments was a Nan -> false.
1638     Csel(dst.W(), wzr, dst.W(), vs);
1639   }
1640 }
1641 
emit_f64_set_cond(LiftoffCondition liftoff_cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)1642 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
1643                                          Register dst, DoubleRegister lhs,
1644                                          DoubleRegister rhs) {
1645   Condition cond = liftoff::ToCondition(liftoff_cond);
1646   Fcmp(lhs.D(), rhs.D());
1647   Cset(dst.W(), cond);
1648   if (cond != ne) {
1649     // If V flag set, at least one of the arguments was a Nan -> false.
1650     Csel(dst.W(), wzr, dst.W(), vs);
1651   }
1652 }
1653 
emit_select(LiftoffRegister dst, Register condition, LiftoffRegister true_value, LiftoffRegister false_value, ValueKind kind)1654 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
1655                                    LiftoffRegister true_value,
1656                                    LiftoffRegister false_value,
1657                                    ValueKind kind) {
1658   return false;
1659 }
1660 
emit_smi_check(Register obj, Label* target, SmiCheckMode mode)1661 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
1662                                       SmiCheckMode mode) {
1663   Label* smi_label = mode == kJumpOnSmi ? target : nullptr;
1664   Label* not_smi_label = mode == kJumpOnNotSmi ? target : nullptr;
1665   JumpIfSmi(obj, smi_label, not_smi_label);
1666 }
1667 
LoadTransform(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LoadTransformationKind transform, uint32_t* protected_load_pc)1668 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
1669                                      Register offset_reg, uintptr_t offset_imm,
1670                                      LoadType type,
1671                                      LoadTransformationKind transform,
1672                                      uint32_t* protected_load_pc) {
1673   UseScratchRegisterScope temps(this);
1674   MemOperand src_op =
1675       transform == LoadTransformationKind::kSplat
1676           ? MemOperand{liftoff::GetEffectiveAddress(this, &temps, src_addr,
1677                                                     offset_reg, offset_imm)}
1678           : liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
1679   *protected_load_pc = pc_offset();
1680   MachineType memtype = type.mem_type();
1681 
1682   if (transform == LoadTransformationKind::kExtend) {
1683     if (memtype == MachineType::Int8()) {
1684       Ldr(dst.fp().D(), src_op);
1685       Sxtl(dst.fp().V8H(), dst.fp().V8B());
1686     } else if (memtype == MachineType::Uint8()) {
1687       Ldr(dst.fp().D(), src_op);
1688       Uxtl(dst.fp().V8H(), dst.fp().V8B());
1689     } else if (memtype == MachineType::Int16()) {
1690       Ldr(dst.fp().D(), src_op);
1691       Sxtl(dst.fp().V4S(), dst.fp().V4H());
1692     } else if (memtype == MachineType::Uint16()) {
1693       Ldr(dst.fp().D(), src_op);
1694       Uxtl(dst.fp().V4S(), dst.fp().V4H());
1695     } else if (memtype == MachineType::Int32()) {
1696       Ldr(dst.fp().D(), src_op);
1697       Sxtl(dst.fp().V2D(), dst.fp().V2S());
1698     } else if (memtype == MachineType::Uint32()) {
1699       Ldr(dst.fp().D(), src_op);
1700       Uxtl(dst.fp().V2D(), dst.fp().V2S());
1701     }
1702   } else if (transform == LoadTransformationKind::kZeroExtend) {
1703     if (memtype == MachineType::Int32()) {
1704       Ldr(dst.fp().S(), src_op);
1705     } else {
1706       DCHECK_EQ(MachineType::Int64(), memtype);
1707       Ldr(dst.fp().D(), src_op);
1708     }
1709   } else {
1710     DCHECK_EQ(LoadTransformationKind::kSplat, transform);
1711     if (memtype == MachineType::Int8()) {
1712       ld1r(dst.fp().V16B(), src_op);
1713     } else if (memtype == MachineType::Int16()) {
1714       ld1r(dst.fp().V8H(), src_op);
1715     } else if (memtype == MachineType::Int32()) {
1716       ld1r(dst.fp().V4S(), src_op);
1717     } else if (memtype == MachineType::Int64()) {
1718       ld1r(dst.fp().V2D(), src_op);
1719     }
1720   }
1721 }
1722 
LoadLane(LiftoffRegister dst, LiftoffRegister src, Register addr, Register offset_reg, uintptr_t offset_imm, LoadType type, uint8_t laneidx, uint32_t* protected_load_pc)1723 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
1724                                 Register addr, Register offset_reg,
1725                                 uintptr_t offset_imm, LoadType type,
1726                                 uint8_t laneidx, uint32_t* protected_load_pc) {
1727   UseScratchRegisterScope temps(this);
1728   MemOperand src_op{
1729       liftoff::GetEffectiveAddress(this, &temps, addr, offset_reg, offset_imm)};
1730 
1731   MachineType mem_type = type.mem_type();
1732   if (dst != src) {
1733     Mov(dst.fp().Q(), src.fp().Q());
1734   }
1735 
1736   *protected_load_pc = pc_offset();
1737   if (mem_type == MachineType::Int8()) {
1738     ld1(dst.fp().B(), laneidx, src_op);
1739   } else if (mem_type == MachineType::Int16()) {
1740     ld1(dst.fp().H(), laneidx, src_op);
1741   } else if (mem_type == MachineType::Int32()) {
1742     ld1(dst.fp().S(), laneidx, src_op);
1743   } else if (mem_type == MachineType::Int64()) {
1744     ld1(dst.fp().D(), laneidx, src_op);
1745   } else {
1746     UNREACHABLE();
1747   }
1748 }
1749 
StoreLane(Register dst, Register offset, uintptr_t offset_imm, LiftoffRegister src, StoreType type, uint8_t lane, uint32_t* protected_store_pc)1750 void LiftoffAssembler::StoreLane(Register dst, Register offset,
1751                                  uintptr_t offset_imm, LiftoffRegister src,
1752                                  StoreType type, uint8_t lane,
1753                                  uint32_t* protected_store_pc) {
1754   UseScratchRegisterScope temps(this);
1755   MemOperand dst_op{
1756       liftoff::GetEffectiveAddress(this, &temps, dst, offset, offset_imm)};
1757   if (protected_store_pc) *protected_store_pc = pc_offset();
1758 
1759   MachineRepresentation rep = type.mem_rep();
1760   if (rep == MachineRepresentation::kWord8) {
1761     st1(src.fp().B(), lane, dst_op);
1762   } else if (rep == MachineRepresentation::kWord16) {
1763     st1(src.fp().H(), lane, dst_op);
1764   } else if (rep == MachineRepresentation::kWord32) {
1765     st1(src.fp().S(), lane, dst_op);
1766   } else {
1767     DCHECK_EQ(MachineRepresentation::kWord64, rep);
1768     st1(src.fp().D(), lane, dst_op);
1769   }
1770 }
1771 
emit_i8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1772 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
1773                                           LiftoffRegister lhs,
1774                                           LiftoffRegister rhs) {
1775   Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
1776 }
1777 
emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src)1778 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
1779                                         LiftoffRegister src) {
1780   Dup(dst.fp().V2D(), src.fp().D(), 0);
1781 }
1782 
emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)1783 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
1784                                                LiftoffRegister lhs,
1785                                                uint8_t imm_lane_idx) {
1786   Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
1787 }
1788 
emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)1789 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
1790                                                LiftoffRegister src1,
1791                                                LiftoffRegister src2,
1792                                                uint8_t imm_lane_idx) {
1793   if (dst != src1) {
1794     Mov(dst.fp().V2D(), src1.fp().V2D());
1795   }
1796   Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
1797 }
1798 
emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src)1799 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
1800                                       LiftoffRegister src) {
1801   Fabs(dst.fp().V2D(), src.fp().V2D());
1802 }
1803 
emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src)1804 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
1805                                       LiftoffRegister src) {
1806   Fneg(dst.fp().V2D(), src.fp().V2D());
1807 }
1808 
emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src)1809 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
1810                                        LiftoffRegister src) {
1811   Fsqrt(dst.fp().V2D(), src.fp().V2D());
1812 }
1813 
emit_f64x2_ceil(LiftoffRegister dst, LiftoffRegister src)1814 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
1815                                        LiftoffRegister src) {
1816   Frintp(dst.fp().V2D(), src.fp().V2D());
1817   return true;
1818 }
1819 
emit_f64x2_floor(LiftoffRegister dst, LiftoffRegister src)1820 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
1821                                         LiftoffRegister src) {
1822   Frintm(dst.fp().V2D(), src.fp().V2D());
1823   return true;
1824 }
1825 
emit_f64x2_trunc(LiftoffRegister dst, LiftoffRegister src)1826 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
1827                                         LiftoffRegister src) {
1828   Frintz(dst.fp().V2D(), src.fp().V2D());
1829   return true;
1830 }
1831 
emit_f64x2_nearest_int(LiftoffRegister dst, LiftoffRegister src)1832 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
1833                                               LiftoffRegister src) {
1834   Frintn(dst.fp().V2D(), src.fp().V2D());
1835   return true;
1836 }
1837 
emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1838 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
1839                                       LiftoffRegister rhs) {
1840   Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1841 }
1842 
emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1843 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
1844                                       LiftoffRegister rhs) {
1845   Fsub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1846 }
1847 
emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1848 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
1849                                       LiftoffRegister rhs) {
1850   Fmul(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1851 }
1852 
emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1853 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
1854                                       LiftoffRegister rhs) {
1855   Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1856 }
1857 
emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1858 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
1859                                       LiftoffRegister rhs) {
1860   Fmin(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1861 }
1862 
emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1863 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
1864                                       LiftoffRegister rhs) {
1865   Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1866 }
1867 
emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1868 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
1869                                        LiftoffRegister rhs) {
1870   UseScratchRegisterScope temps(this);
1871 
1872   VRegister tmp = dst.fp();
1873   if (dst == lhs || dst == rhs) {
1874     tmp = temps.AcquireV(kFormat2D);
1875   }
1876 
1877   Fcmgt(tmp.V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1878   Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1879 
1880   if (dst == lhs || dst == rhs) {
1881     Mov(dst.fp().V2D(), tmp);
1882   }
1883 }
1884 
emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1885 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
1886                                        LiftoffRegister rhs) {
1887   UseScratchRegisterScope temps(this);
1888 
1889   VRegister tmp = dst.fp();
1890   if (dst == lhs || dst == rhs) {
1891     tmp = temps.AcquireV(kFormat2D);
1892   }
1893 
1894   Fcmgt(tmp.V2D(), rhs.fp().V2D(), lhs.fp().V2D());
1895   Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1896 
1897   if (dst == lhs || dst == rhs) {
1898     Mov(dst.fp().V2D(), tmp);
1899   }
1900 }
1901 
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst, LiftoffRegister src)1902 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
1903                                                       LiftoffRegister src) {
1904   Sxtl(dst.fp().V2D(), src.fp().V2S());
1905   Scvtf(dst.fp().V2D(), dst.fp().V2D());
1906 }
1907 
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst, LiftoffRegister src)1908 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
1909                                                       LiftoffRegister src) {
1910   Uxtl(dst.fp().V2D(), src.fp().V2S());
1911   Ucvtf(dst.fp().V2D(), dst.fp().V2D());
1912 }
1913 
emit_f64x2_promote_low_f32x4(LiftoffRegister dst, LiftoffRegister src)1914 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
1915                                                     LiftoffRegister src) {
1916   Fcvtl(dst.fp().V2D(), src.fp().V2S());
1917 }
1918 
emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src)1919 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
1920                                         LiftoffRegister src) {
1921   Dup(dst.fp().V4S(), src.fp().S(), 0);
1922 }
1923 
emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)1924 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
1925                                                LiftoffRegister lhs,
1926                                                uint8_t imm_lane_idx) {
1927   Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
1928 }
1929 
emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)1930 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
1931                                                LiftoffRegister src1,
1932                                                LiftoffRegister src2,
1933                                                uint8_t imm_lane_idx) {
1934   if (dst != src1) {
1935     Mov(dst.fp().V4S(), src1.fp().V4S());
1936   }
1937   Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
1938 }
1939 
emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src)1940 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
1941                                       LiftoffRegister src) {
1942   Fabs(dst.fp().V4S(), src.fp().V4S());
1943 }
1944 
emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src)1945 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
1946                                       LiftoffRegister src) {
1947   Fneg(dst.fp().V4S(), src.fp().V4S());
1948 }
1949 
emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src)1950 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
1951                                        LiftoffRegister src) {
1952   Fsqrt(dst.fp().V4S(), src.fp().V4S());
1953 }
1954 
emit_f32x4_ceil(LiftoffRegister dst, LiftoffRegister src)1955 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
1956                                        LiftoffRegister src) {
1957   Frintp(dst.fp().V4S(), src.fp().V4S());
1958   return true;
1959 }
1960 
emit_f32x4_floor(LiftoffRegister dst, LiftoffRegister src)1961 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
1962                                         LiftoffRegister src) {
1963   Frintm(dst.fp().V4S(), src.fp().V4S());
1964   return true;
1965 }
1966 
emit_f32x4_trunc(LiftoffRegister dst, LiftoffRegister src)1967 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
1968                                         LiftoffRegister src) {
1969   Frintz(dst.fp().V4S(), src.fp().V4S());
1970   return true;
1971 }
1972 
emit_f32x4_nearest_int(LiftoffRegister dst, LiftoffRegister src)1973 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
1974                                               LiftoffRegister src) {
1975   Frintn(dst.fp().V4S(), src.fp().V4S());
1976   return true;
1977 }
1978 
emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1979 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
1980                                       LiftoffRegister rhs) {
1981   Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1982 }
1983 
emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1984 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
1985                                       LiftoffRegister rhs) {
1986   Fsub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1987 }
1988 
emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1989 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
1990                                       LiftoffRegister rhs) {
1991   Fmul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1992 }
1993 
emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1994 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
1995                                       LiftoffRegister rhs) {
1996   Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1997 }
1998 
emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1999 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
2000                                       LiftoffRegister rhs) {
2001   Fmin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2002 }
2003 
emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2004 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
2005                                       LiftoffRegister rhs) {
2006   Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2007 }
2008 
emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2009 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2010                                        LiftoffRegister rhs) {
2011   UseScratchRegisterScope temps(this);
2012 
2013   VRegister tmp = dst.fp();
2014   if (dst == lhs || dst == rhs) {
2015     tmp = temps.AcquireV(kFormat4S);
2016   }
2017 
2018   Fcmgt(tmp.V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2019   Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
2020 
2021   if (dst == lhs || dst == rhs) {
2022     Mov(dst.fp().V4S(), tmp);
2023   }
2024 }
2025 
emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2026 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2027                                        LiftoffRegister rhs) {
2028   UseScratchRegisterScope temps(this);
2029 
2030   VRegister tmp = dst.fp();
2031   if (dst == lhs || dst == rhs) {
2032     tmp = temps.AcquireV(kFormat4S);
2033   }
2034 
2035   Fcmgt(tmp.V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2036   Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
2037 
2038   if (dst == lhs || dst == rhs) {
2039     Mov(dst.fp().V4S(), tmp);
2040   }
2041 }
2042 
emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src)2043 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2044                                         LiftoffRegister src) {
2045   Dup(dst.fp().V2D(), src.gp().X());
2046 }
2047 
emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2048 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
2049                                                LiftoffRegister lhs,
2050                                                uint8_t imm_lane_idx) {
2051   Mov(dst.gp().X(), lhs.fp().V2D(), imm_lane_idx);
2052 }
2053 
emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2054 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
2055                                                LiftoffRegister src1,
2056                                                LiftoffRegister src2,
2057                                                uint8_t imm_lane_idx) {
2058   if (dst != src1) {
2059     Mov(dst.fp().V2D(), src1.fp().V2D());
2060   }
2061   Mov(dst.fp().V2D(), imm_lane_idx, src2.gp().X());
2062 }
2063 
emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src)2064 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2065                                       LiftoffRegister src) {
2066   Neg(dst.fp().V2D(), src.fp().V2D());
2067 }
2068 
emit_i64x2_alltrue(LiftoffRegister dst, LiftoffRegister src)2069 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2070                                           LiftoffRegister src) {
2071   I64x2AllTrue(dst.gp(), src.fp());
2072 }
2073 
emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2074 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2075                                       LiftoffRegister rhs) {
2076   liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2077       this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2078 }
2079 
emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2080 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2081                                        int32_t rhs) {
2082   Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63);
2083 }
2084 
emit_i64x2_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2085 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2086                                         LiftoffRegister lhs,
2087                                         LiftoffRegister rhs) {
2088   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2089                          liftoff::ShiftSign::kSigned>(
2090       this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2091 }
2092 
emit_i64x2_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2093 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2094                                          LiftoffRegister lhs, int32_t rhs) {
2095   liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>(
2096       this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
2097 }
2098 
emit_i64x2_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2099 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2100                                         LiftoffRegister lhs,
2101                                         LiftoffRegister rhs) {
2102   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2103                          liftoff::ShiftSign::kUnsigned>(
2104       this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2105 }
2106 
emit_i64x2_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2107 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2108                                          LiftoffRegister lhs, int32_t rhs) {
2109   liftoff::EmitSimdShiftRightImmediate<kFormat2D,
2110                                        liftoff::ShiftSign::kUnsigned>(
2111       this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
2112 }
2113 
emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2114 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2115                                       LiftoffRegister rhs) {
2116   Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2117 }
2118 
emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2119 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2120                                       LiftoffRegister rhs) {
2121   Sub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2122 }
2123 
emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2124 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2125                                       LiftoffRegister rhs) {
2126   UseScratchRegisterScope temps(this);
2127   VRegister tmp1 = temps.AcquireV(kFormat2D);
2128   VRegister tmp2 = temps.AcquireV(kFormat2D);
2129 
2130   // Algorithm copied from code-generator-arm64.cc with minor modifications:
2131   // - 2 (max number of scratch registers in Liftoff) temporaries instead of 3
2132   // - 1 more Umull instruction to calculate | cg | ae |,
2133   // - so, we can no longer use Umlal in the last step, and use Add instead.
2134   // Refer to comments there for details.
2135   Xtn(tmp1.V2S(), lhs.fp().V2D());
2136   Xtn(tmp2.V2S(), rhs.fp().V2D());
2137   Umull(tmp1.V2D(), tmp1.V2S(), tmp2.V2S());
2138   Rev64(tmp2.V4S(), rhs.fp().V4S());
2139   Mul(tmp2.V4S(), tmp2.V4S(), lhs.fp().V4S());
2140   Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2141   Shll(dst.fp().V2D(), tmp2.V2S(), 32);
2142   Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
2143 }
2144 
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2145 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2146                                                      LiftoffRegister src1,
2147                                                      LiftoffRegister src2) {
2148   Smull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
2149 }
2150 
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2151 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2152                                                      LiftoffRegister src1,
2153                                                      LiftoffRegister src2) {
2154   Umull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
2155 }
2156 
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2157 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2158                                                       LiftoffRegister src1,
2159                                                       LiftoffRegister src2) {
2160   Smull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
2161 }
2162 
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2163 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2164                                                       LiftoffRegister src1,
2165                                                       LiftoffRegister src2) {
2166   Umull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
2167 }
2168 
emit_i64x2_bitmask(LiftoffRegister dst, LiftoffRegister src)2169 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2170                                           LiftoffRegister src) {
2171   I64x2BitMask(dst.gp(), src.fp());
2172 }
2173 
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst, LiftoffRegister src)2174 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2175                                                      LiftoffRegister src) {
2176   Sxtl(dst.fp().V2D(), src.fp().V2S());
2177 }
2178 
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst, LiftoffRegister src)2179 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
2180                                                       LiftoffRegister src) {
2181   Sxtl2(dst.fp().V2D(), src.fp().V4S());
2182 }
2183 
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst, LiftoffRegister src)2184 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
2185                                                      LiftoffRegister src) {
2186   Uxtl(dst.fp().V2D(), src.fp().V2S());
2187 }
2188 
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst, LiftoffRegister src)2189 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
2190                                                       LiftoffRegister src) {
2191   Uxtl2(dst.fp().V2D(), src.fp().V4S());
2192 }
2193 
emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src)2194 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2195                                         LiftoffRegister src) {
2196   Dup(dst.fp().V4S(), src.gp().W());
2197 }
2198 
emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2199 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
2200                                                LiftoffRegister lhs,
2201                                                uint8_t imm_lane_idx) {
2202   Mov(dst.gp().W(), lhs.fp().V4S(), imm_lane_idx);
2203 }
2204 
emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2205 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
2206                                                LiftoffRegister src1,
2207                                                LiftoffRegister src2,
2208                                                uint8_t imm_lane_idx) {
2209   if (dst != src1) {
2210     Mov(dst.fp().V4S(), src1.fp().V4S());
2211   }
2212   Mov(dst.fp().V4S(), imm_lane_idx, src2.gp().W());
2213 }
2214 
emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src)2215 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
2216                                       LiftoffRegister src) {
2217   Neg(dst.fp().V4S(), src.fp().V4S());
2218 }
2219 
emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src)2220 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
2221                                           LiftoffRegister src) {
2222   liftoff::EmitAllTrue(this, dst, src, kFormat4S);
2223 }
2224 
emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src)2225 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
2226                                           LiftoffRegister src) {
2227   UseScratchRegisterScope temps(this);
2228   VRegister tmp = temps.AcquireQ();
2229   VRegister mask = temps.AcquireQ();
2230 
2231   Sshr(tmp.V4S(), src.fp().V4S(), 31);
2232   // Set i-th bit of each lane i. When AND with tmp, the lanes that
2233   // are signed will have i-th bit set, unsigned will be 0.
2234   Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
2235   And(tmp.V16B(), mask.V16B(), tmp.V16B());
2236   Addv(tmp.S(), tmp.V4S());
2237   Mov(dst.gp().W(), tmp.V4S(), 0);
2238 }
2239 
emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2240 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
2241                                       LiftoffRegister rhs) {
2242   liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2243       this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2244 }
2245 
emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2246 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
2247                                        int32_t rhs) {
2248   Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31);
2249 }
2250 
emit_i32x4_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2251 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
2252                                         LiftoffRegister lhs,
2253                                         LiftoffRegister rhs) {
2254   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2255                          liftoff::ShiftSign::kSigned>(
2256       this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2257 }
2258 
emit_i32x4_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2259 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
2260                                          LiftoffRegister lhs, int32_t rhs) {
2261   liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>(
2262       this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
2263 }
2264 
emit_i32x4_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2265 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
2266                                         LiftoffRegister lhs,
2267                                         LiftoffRegister rhs) {
2268   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2269                          liftoff::ShiftSign::kUnsigned>(
2270       this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2271 }
2272 
emit_i32x4_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2273 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
2274                                          LiftoffRegister lhs, int32_t rhs) {
2275   liftoff::EmitSimdShiftRightImmediate<kFormat4S,
2276                                        liftoff::ShiftSign::kUnsigned>(
2277       this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
2278 }
2279 
emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2280 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2281                                       LiftoffRegister rhs) {
2282   Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2283 }
2284 
emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2285 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2286                                       LiftoffRegister rhs) {
2287   Sub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2288 }
2289 
emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2290 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2291                                       LiftoffRegister rhs) {
2292   Mul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2293 }
2294 
emit_i32x4_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2295 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
2296                                         LiftoffRegister lhs,
2297                                         LiftoffRegister rhs) {
2298   Smin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2299 }
2300 
emit_i32x4_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2301 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
2302                                         LiftoffRegister lhs,
2303                                         LiftoffRegister rhs) {
2304   Umin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2305 }
2306 
emit_i32x4_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2307 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
2308                                         LiftoffRegister lhs,
2309                                         LiftoffRegister rhs) {
2310   Smax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2311 }
2312 
emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2313 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
2314                                         LiftoffRegister lhs,
2315                                         LiftoffRegister rhs) {
2316   Umax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2317 }
2318 
emit_i32x4_dot_i16x8_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2319 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
2320                                               LiftoffRegister lhs,
2321                                               LiftoffRegister rhs) {
2322   UseScratchRegisterScope scope(this);
2323   VRegister tmp1 = scope.AcquireV(kFormat4S);
2324   VRegister tmp2 = scope.AcquireV(kFormat4S);
2325   Smull(tmp1, lhs.fp().V4H(), rhs.fp().V4H());
2326   Smull2(tmp2, lhs.fp().V8H(), rhs.fp().V8H());
2327   Addp(dst.fp().V4S(), tmp1, tmp2);
2328 }
2329 
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst, LiftoffRegister src)2330 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
2331                                                           LiftoffRegister src) {
2332   Saddlp(dst.fp().V4S(), src.fp().V8H());
2333 }
2334 
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst, LiftoffRegister src)2335 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
2336                                                           LiftoffRegister src) {
2337   Uaddlp(dst.fp().V4S(), src.fp().V8H());
2338 }
2339 
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2340 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
2341                                                      LiftoffRegister src1,
2342                                                      LiftoffRegister src2) {
2343   Smull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
2344 }
2345 
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2346 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
2347                                                      LiftoffRegister src1,
2348                                                      LiftoffRegister src2) {
2349   Umull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
2350 }
2351 
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2352 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
2353                                                       LiftoffRegister src1,
2354                                                       LiftoffRegister src2) {
2355   Smull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
2356 }
2357 
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2358 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
2359                                                       LiftoffRegister src1,
2360                                                       LiftoffRegister src2) {
2361   Umull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
2362 }
2363 
emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src)2364 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2365                                         LiftoffRegister src) {
2366   Dup(dst.fp().V8H(), src.gp().W());
2367 }
2368 
emit_i16x8_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2369 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
2370                                                  LiftoffRegister lhs,
2371                                                  uint8_t imm_lane_idx) {
2372   Umov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2373 }
2374 
emit_i16x8_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2375 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
2376                                                  LiftoffRegister lhs,
2377                                                  uint8_t imm_lane_idx) {
2378   Smov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2379 }
2380 
emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2381 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
2382                                                LiftoffRegister src1,
2383                                                LiftoffRegister src2,
2384                                                uint8_t imm_lane_idx) {
2385   if (dst != src1) {
2386     Mov(dst.fp().V8H(), src1.fp().V8H());
2387   }
2388   Mov(dst.fp().V8H(), imm_lane_idx, src2.gp().W());
2389 }
2390 
emit_i16x8_neg(LiftoffRegister dst, LiftoffRegister src)2391 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2392                                       LiftoffRegister src) {
2393   Neg(dst.fp().V8H(), src.fp().V8H());
2394 }
2395 
emit_i16x8_alltrue(LiftoffRegister dst, LiftoffRegister src)2396 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
2397                                           LiftoffRegister src) {
2398   liftoff::EmitAllTrue(this, dst, src, kFormat8H);
2399 }
2400 
emit_i16x8_bitmask(LiftoffRegister dst, LiftoffRegister src)2401 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
2402                                           LiftoffRegister src) {
2403   UseScratchRegisterScope temps(this);
2404   VRegister tmp = temps.AcquireQ();
2405   VRegister mask = temps.AcquireQ();
2406 
2407   Sshr(tmp.V8H(), src.fp().V8H(), 15);
2408   // Set i-th bit of each lane i. When AND with tmp, the lanes that
2409   // are signed will have i-th bit set, unsigned will be 0.
2410   Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2411   And(tmp.V16B(), mask.V16B(), tmp.V16B());
2412   Addv(tmp.H(), tmp.V8H());
2413   Mov(dst.gp().W(), tmp.V8H(), 0);
2414 }
2415 
emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2416 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
2417                                       LiftoffRegister rhs) {
2418   liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2419       this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2420 }
2421 
emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2422 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
2423                                        int32_t rhs) {
2424   Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15);
2425 }
2426 
emit_i16x8_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2427 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
2428                                         LiftoffRegister lhs,
2429                                         LiftoffRegister rhs) {
2430   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2431                          liftoff::ShiftSign::kSigned>(
2432       this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2433 }
2434 
emit_i16x8_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2435 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
2436                                          LiftoffRegister lhs, int32_t rhs) {
2437   liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>(
2438       this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2439 }
2440 
emit_i16x8_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2441 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
2442                                         LiftoffRegister lhs,
2443                                         LiftoffRegister rhs) {
2444   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2445                          liftoff::ShiftSign::kUnsigned>(
2446       this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2447 }
2448 
emit_i16x8_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2449 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
2450                                          LiftoffRegister lhs, int32_t rhs) {
2451   liftoff::EmitSimdShiftRightImmediate<kFormat8H,
2452                                        liftoff::ShiftSign::kUnsigned>(
2453       this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2454 }
2455 
emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2456 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
2457                                       LiftoffRegister rhs) {
2458   Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2459 }
2460 
emit_i16x8_add_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2461 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
2462                                             LiftoffRegister lhs,
2463                                             LiftoffRegister rhs) {
2464   Sqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2465 }
2466 
emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2467 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
2468                                       LiftoffRegister rhs) {
2469   Sub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2470 }
2471 
emit_i16x8_sub_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2472 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
2473                                             LiftoffRegister lhs,
2474                                             LiftoffRegister rhs) {
2475   Sqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2476 }
2477 
emit_i16x8_sub_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2478 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
2479                                             LiftoffRegister lhs,
2480                                             LiftoffRegister rhs) {
2481   Uqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2482 }
2483 
emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2484 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
2485                                       LiftoffRegister rhs) {
2486   Mul(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2487 }
2488 
emit_i16x8_add_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2489 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
2490                                             LiftoffRegister lhs,
2491                                             LiftoffRegister rhs) {
2492   Uqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2493 }
2494 
emit_i16x8_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2495 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
2496                                         LiftoffRegister lhs,
2497                                         LiftoffRegister rhs) {
2498   Smin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2499 }
2500 
emit_i16x8_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2501 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
2502                                         LiftoffRegister lhs,
2503                                         LiftoffRegister rhs) {
2504   Umin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2505 }
2506 
emit_i16x8_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2507 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
2508                                         LiftoffRegister lhs,
2509                                         LiftoffRegister rhs) {
2510   Smax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2511 }
2512 
emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2513 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
2514                                         LiftoffRegister lhs,
2515                                         LiftoffRegister rhs) {
2516   Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2517 }
2518 
emit_i8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, const uint8_t shuffle[16], bool is_swizzle)2519 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2520                                           LiftoffRegister lhs,
2521                                           LiftoffRegister rhs,
2522                                           const uint8_t shuffle[16],
2523                                           bool is_swizzle) {
2524   VRegister src1 = lhs.fp();
2525   VRegister src2 = rhs.fp();
2526   VRegister temp = dst.fp();
2527   if (dst == lhs || dst == rhs) {
2528     // dst overlaps with lhs or rhs, so we need a temporary.
2529     temp = GetUnusedRegister(kFpReg, LiftoffRegList{lhs, rhs}).fp();
2530   }
2531 
2532   UseScratchRegisterScope scope(this);
2533 
2534   if (src1 != src2 && !AreConsecutive(src1, src2)) {
2535     // Tbl needs consecutive registers, which our scratch registers are.
2536     src1 = scope.AcquireV(kFormat16B);
2537     src2 = scope.AcquireV(kFormat16B);
2538     DCHECK(AreConsecutive(src1, src2));
2539     Mov(src1.Q(), lhs.fp().Q());
2540     Mov(src2.Q(), rhs.fp().Q());
2541   }
2542 
2543   int64_t imms[2] = {0, 0};
2544   for (int i = 7; i >= 0; i--) {
2545     imms[0] = (imms[0] << 8) | (shuffle[i]);
2546     imms[1] = (imms[1] << 8) | (shuffle[i + 8]);
2547   }
2548   DCHECK_EQ(0, (imms[0] | imms[1]) &
2549                    (lhs == rhs ? 0xF0F0F0F0F0F0F0F0 : 0xE0E0E0E0E0E0E0E0));
2550 
2551   Movi(temp.V16B(), imms[1], imms[0]);
2552 
2553   if (src1 == src2) {
2554     Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B());
2555   } else {
2556     Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B());
2557   }
2558 }
2559 
emit_i8x16_popcnt(LiftoffRegister dst, LiftoffRegister src)2560 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2561                                          LiftoffRegister src) {
2562   Cnt(dst.fp().V16B(), src.fp().V16B());
2563 }
2564 
emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src)2565 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2566                                         LiftoffRegister src) {
2567   Dup(dst.fp().V16B(), src.gp().W());
2568 }
2569 
emit_i8x16_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2570 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
2571                                                  LiftoffRegister lhs,
2572                                                  uint8_t imm_lane_idx) {
2573   Umov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2574 }
2575 
emit_i8x16_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2576 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
2577                                                  LiftoffRegister lhs,
2578                                                  uint8_t imm_lane_idx) {
2579   Smov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2580 }
2581 
emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2582 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
2583                                                LiftoffRegister src1,
2584                                                LiftoffRegister src2,
2585                                                uint8_t imm_lane_idx) {
2586   if (dst != src1) {
2587     Mov(dst.fp().V16B(), src1.fp().V16B());
2588   }
2589   Mov(dst.fp().V16B(), imm_lane_idx, src2.gp().W());
2590 }
2591 
emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src)2592 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2593                                       LiftoffRegister src) {
2594   Neg(dst.fp().V16B(), src.fp().V16B());
2595 }
2596 
emit_v128_anytrue(LiftoffRegister dst, LiftoffRegister src)2597 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2598                                          LiftoffRegister src) {
2599   liftoff::EmitAnyTrue(this, dst, src);
2600 }
2601 
emit_i8x16_alltrue(LiftoffRegister dst, LiftoffRegister src)2602 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2603                                           LiftoffRegister src) {
2604   liftoff::EmitAllTrue(this, dst, src, kFormat16B);
2605 }
2606 
emit_i8x16_bitmask(LiftoffRegister dst, LiftoffRegister src)2607 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2608                                           LiftoffRegister src) {
2609   UseScratchRegisterScope temps(this);
2610   VRegister tmp = temps.AcquireQ();
2611   VRegister mask = temps.AcquireQ();
2612 
2613   // Set i-th bit of each lane i. When AND with tmp, the lanes that
2614   // are signed will have i-th bit set, unsigned will be 0.
2615   Sshr(tmp.V16B(), src.fp().V16B(), 7);
2616   Movi(mask.V2D(), 0x8040'2010'0804'0201);
2617   And(tmp.V16B(), mask.V16B(), tmp.V16B());
2618   Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2619   Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2620   Addv(tmp.H(), tmp.V8H());
2621   Mov(dst.gp().W(), tmp.V8H(), 0);
2622 }
2623 
2624 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2625                                       LiftoffRegister rhs) {
2626   liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2627       this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2628 }
2629 
2630 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2631                                        int32_t rhs) {
2632   Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7);
2633 }
2634 
2635 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2636                                         LiftoffRegister lhs,
2637                                         LiftoffRegister rhs) {
2638   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2639                          liftoff::ShiftSign::kSigned>(
2640       this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2641 }
2642 
2643 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2644                                          LiftoffRegister lhs, int32_t rhs) {
2645   liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>(
2646       this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2647 }
2648 
2649 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2650                                         LiftoffRegister lhs,
2651                                         LiftoffRegister rhs) {
2652   liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2653                          liftoff::ShiftSign::kUnsigned>(
2654       this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2655 }
2656 
2657 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2658                                          LiftoffRegister lhs, int32_t rhs) {
2659   liftoff::EmitSimdShiftRightImmediate<kFormat16B,
2660                                        liftoff::ShiftSign::kUnsigned>(
2661       this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2662 }
2663 
2664 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2665                                       LiftoffRegister rhs) {
2666   Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2667 }
2668 
2669 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2670                                             LiftoffRegister lhs,
2671                                             LiftoffRegister rhs) {
2672   Sqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2673 }
2674 
2675 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2676                                       LiftoffRegister rhs) {
2677   Sub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2678 }
2679 
2680 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2681                                             LiftoffRegister lhs,
2682                                             LiftoffRegister rhs) {
2683   Sqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2684 }
2685 
2686 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2687                                             LiftoffRegister lhs,
2688                                             LiftoffRegister rhs) {
2689   Uqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2690 }
2691 
2692 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2693                                             LiftoffRegister lhs,
2694                                             LiftoffRegister rhs) {
2695   Uqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2696 }
2697 
2698 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2699                                         LiftoffRegister lhs,
2700                                         LiftoffRegister rhs) {
2701   Smin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2702 }
2703 
2704 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2705                                         LiftoffRegister lhs,
2706                                         LiftoffRegister rhs) {
2707   Umin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2708 }
2709 
2710 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2711                                         LiftoffRegister lhs,
2712                                         LiftoffRegister rhs) {
2713   Smax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2714 }
2715 
2716 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2717                                         LiftoffRegister lhs,
2718                                         LiftoffRegister rhs) {
2719   Umax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2720 }
2721 
2722 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2723                                      LiftoffRegister rhs) {
2724   Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2725 }
2726 
2727 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2728                                      LiftoffRegister rhs) {
2729   Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2730   Mvn(dst.fp().V16B(), dst.fp().V16B());
2731 }
2732 
2733 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2734                                        LiftoffRegister rhs) {
2735   Cmgt(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2736 }
2737 
2738 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2739                                        LiftoffRegister rhs) {
2740   Cmhi(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2741 }
2742 
2743 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2744                                        LiftoffRegister rhs) {
2745   Cmge(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2746 }
2747 
2748 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2749                                        LiftoffRegister rhs) {
2750   Cmhs(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2751 }
2752 
2753 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2754                                      LiftoffRegister rhs) {
2755   Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2756 }
2757 
2758 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2759                                      LiftoffRegister rhs) {
2760   Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2761   Mvn(dst.fp().V8H(), dst.fp().V8H());
2762 }
2763 
2764 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2765                                        LiftoffRegister rhs) {
2766   Cmgt(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2767 }
2768 
2769 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2770                                        LiftoffRegister rhs) {
2771   Cmhi(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2772 }
2773 
2774 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2775                                        LiftoffRegister rhs) {
2776   Cmge(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2777 }
2778 
2779 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2780                                        LiftoffRegister rhs) {
2781   Cmhs(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2782 }
2783 
2784 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2785                                      LiftoffRegister rhs) {
2786   Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2787 }
2788 
2789 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2790                                      LiftoffRegister rhs) {
2791   Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2792   Mvn(dst.fp().V4S(), dst.fp().V4S());
2793 }
2794 
2795 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2796                                        LiftoffRegister rhs) {
2797   Cmgt(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2798 }
2799 
2800 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2801                                        LiftoffRegister rhs) {
2802   Cmhi(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2803 }
2804 
2805 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2806                                        LiftoffRegister rhs) {
2807   Cmge(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2808 }
2809 
2810 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2811                                        LiftoffRegister rhs) {
2812   Cmhs(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2813 }
2814 
2815 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2816                                      LiftoffRegister rhs) {
2817   Cmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2818 }
2819 
2820 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2821                                      LiftoffRegister rhs) {
2822   Cmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2823   Mvn(dst.fp().V2D(), dst.fp().V2D());
2824 }
2825 
2826 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2827                                        LiftoffRegister rhs) {
2828   Cmgt(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2829 }
2830 
2831 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2832                                        LiftoffRegister rhs) {
2833   Cmge(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2834 }
2835 
2836 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2837                                      LiftoffRegister rhs) {
2838   Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2839 }
2840 
2841 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2842                                      LiftoffRegister rhs) {
2843   Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2844   Mvn(dst.fp().V4S(), dst.fp().V4S());
2845 }
2846 
2847 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2848                                      LiftoffRegister rhs) {
2849   Fcmgt(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2850 }
2851 
2852 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2853                                      LiftoffRegister rhs) {
2854   Fcmge(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2855 }
2856 
2857 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2858                                      LiftoffRegister rhs) {
2859   Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2860 }
2861 
2862 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2863                                      LiftoffRegister rhs) {
2864   Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2865   Mvn(dst.fp().V2D(), dst.fp().V2D());
2866 }
2867 
2868 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2869                                      LiftoffRegister rhs) {
2870   Fcmgt(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2871 }
2872 
2873 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2874                                      LiftoffRegister rhs) {
2875   Fcmge(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2876 }
2877 
2878 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2879                                        const uint8_t imms[16]) {
2880   uint64_t vals[2];
2881   memcpy(vals, imms, sizeof(vals));
2882   Movi(dst.fp().V16B(), vals[1], vals[0]);
2883 }
2884 
2885 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2886   Mvn(dst.fp().V16B(), src.fp().V16B());
2887 }
2888 
2889 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2890                                      LiftoffRegister rhs) {
2891   And(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2892 }
2893 
2894 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2895                                     LiftoffRegister rhs) {
2896   Orr(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2897 }
2898 
2899 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2900                                      LiftoffRegister rhs) {
2901   Eor(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2902 }
2903 
2904 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2905                                         LiftoffRegister src1,
2906                                         LiftoffRegister src2,
2907                                         LiftoffRegister mask) {
2908   if (dst != mask) {
2909     Mov(dst.fp().V16B(), mask.fp().V16B());
2910   }
2911   Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
2912 }
2913 
2914 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
2915                                                  LiftoffRegister src) {
2916   Fcvtzs(dst.fp().V4S(), src.fp().V4S());
2917 }
2918 
2919 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
2920                                                  LiftoffRegister src) {
2921   Fcvtzu(dst.fp().V4S(), src.fp().V4S());
2922 }
2923 
2924 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
2925                                                  LiftoffRegister src) {
2926   Scvtf(dst.fp().V4S(), src.fp().V4S());
2927 }
2928 
2929 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
2930                                                  LiftoffRegister src) {
2931   Ucvtf(dst.fp().V4S(), src.fp().V4S());
2932 }
2933 
2934 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
2935                                                     LiftoffRegister src) {
2936   Fcvtn(dst.fp().V2S(), src.fp().V2D());
2937 }
2938 
2939 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
2940                                                  LiftoffRegister lhs,
2941                                                  LiftoffRegister rhs) {
2942   UseScratchRegisterScope temps(this);
2943   VRegister tmp = temps.AcquireV(kFormat8H);
2944   VRegister right = rhs.fp().V8H();
2945   if (dst == rhs) {
2946     Mov(tmp, right);
2947     right = tmp;
2948   }
2949   Sqxtn(dst.fp().V8B(), lhs.fp().V8H());
2950   Sqxtn2(dst.fp().V16B(), right);
2951 }
2952 
2953 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
2954                                                  LiftoffRegister lhs,
2955                                                  LiftoffRegister rhs) {
2956   UseScratchRegisterScope temps(this);
2957   VRegister tmp = temps.AcquireV(kFormat8H);
2958   VRegister right = rhs.fp().V8H();
2959   if (dst == rhs) {
2960     Mov(tmp, right);
2961     right = tmp;
2962   }
2963   Sqxtun(dst.fp().V8B(), lhs.fp().V8H());
2964   Sqxtun2(dst.fp().V16B(), right);
2965 }
2966 
2967 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
2968                                                  LiftoffRegister lhs,
2969                                                  LiftoffRegister rhs) {
2970   UseScratchRegisterScope temps(this);
2971   VRegister tmp = temps.AcquireV(kFormat4S);
2972   VRegister right = rhs.fp().V4S();
2973   if (dst == rhs) {
2974     Mov(tmp, right);
2975     right = tmp;
2976   }
2977   Sqxtn(dst.fp().V4H(), lhs.fp().V4S());
2978   Sqxtn2(dst.fp().V8H(), right);
2979 }
2980 
2981 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
2982                                                  LiftoffRegister lhs,
2983                                                  LiftoffRegister rhs) {
2984   UseScratchRegisterScope temps(this);
2985   VRegister tmp = temps.AcquireV(kFormat4S);
2986   VRegister right = rhs.fp().V4S();
2987   if (dst == rhs) {
2988     Mov(tmp, right);
2989     right = tmp;
2990   }
2991   Sqxtun(dst.fp().V4H(), lhs.fp().V4S());
2992   Sqxtun2(dst.fp().V8H(), right);
2993 }
2994 
2995 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
2996                                                      LiftoffRegister src) {
2997   Sxtl(dst.fp().V8H(), src.fp().V8B());
2998 }
2999 
3000 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3001                                                       LiftoffRegister src) {
3002   Sxtl2(dst.fp().V8H(), src.fp().V16B());
3003 }
3004 
3005 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3006                                                      LiftoffRegister src) {
3007   Uxtl(dst.fp().V8H(), src.fp().V8B());
3008 }
3009 
3010 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3011                                                       LiftoffRegister src) {
3012   Uxtl2(dst.fp().V8H(), src.fp().V16B());
3013 }
3014 
3015 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3016                                                      LiftoffRegister src) {
3017   Sxtl(dst.fp().V4S(), src.fp().V4H());
3018 }
3019 
3020 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3021                                                       LiftoffRegister src) {
3022   Sxtl2(dst.fp().V4S(), src.fp().V8H());
3023 }
3024 
3025 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3026                                                      LiftoffRegister src) {
3027   Uxtl(dst.fp().V4S(), src.fp().V4H());
3028 }
3029 
3030 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3031                                                       LiftoffRegister src) {
3032   Uxtl2(dst.fp().V4S(), src.fp().V8H());
3033 }
3034 
3035 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3036                                                          LiftoffRegister src) {
3037   Fcvtzs(dst.fp().V2D(), src.fp().V2D());
3038   Sqxtn(dst.fp().V2S(), dst.fp().V2D());
3039 }
3040 
3041 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3042                                                          LiftoffRegister src) {
3043   Fcvtzu(dst.fp().V2D(), src.fp().V2D());
3044   Uqxtn(dst.fp().V2S(), dst.fp().V2D());
3045 }
3046 
3047 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3048                                          LiftoffRegister lhs,
3049                                          LiftoffRegister rhs) {
3050   Bic(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
3051 }
3052 
3053 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3054                                                      LiftoffRegister lhs,
3055                                                      LiftoffRegister rhs) {
3056   Urhadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
3057 }
3058 
3059 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3060                                                      LiftoffRegister lhs,
3061                                                      LiftoffRegister rhs) {
3062   Urhadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
3063 }
3064 
3065 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3066                                       LiftoffRegister src) {
3067   Abs(dst.fp().V16B(), src.fp().V16B());
3068 }
3069 
3070 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3071                                       LiftoffRegister src) {
3072   Abs(dst.fp().V8H(), src.fp().V8H());
3073 }
3074 
3075 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3076                                                           LiftoffRegister src) {
3077   Saddlp(dst.fp().V8H(), src.fp().V16B());
3078 }
3079 
3080 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3081                                                           LiftoffRegister src) {
3082   Uaddlp(dst.fp().V8H(), src.fp().V16B());
3083 }
3084 
3085 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3086                                                      LiftoffRegister src1,
3087                                                      LiftoffRegister src2) {
3088   Smull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
3089 }
3090 
3091 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3092                                                      LiftoffRegister src1,
3093                                                      LiftoffRegister src2) {
3094   Umull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
3095 }
3096 
3097 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3098                                                       LiftoffRegister src1,
3099                                                       LiftoffRegister src2) {
3100   Smull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
3101 }
3102 
3103 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3104                                                       LiftoffRegister src1,
3105                                                       LiftoffRegister src2) {
3106   Umull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
3107 }
3108 
3109 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3110                                                 LiftoffRegister src1,
3111                                                 LiftoffRegister src2) {
3112   Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H());
3113 }
3114 
3115 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3116                                       LiftoffRegister src) {
3117   Abs(dst.fp().V4S(), src.fp().V4S());
3118 }
3119 
3120 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3121                                       LiftoffRegister src) {
3122   Abs(dst.fp().V2D(), src.fp().V2D());
3123 }
3124 
3125 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
3126   Ldr(limit_address, MemOperand(limit_address));
3127   Cmp(sp, limit_address);
3128   B(ool_code, ls);
3129 }
3130 
3131 void LiftoffAssembler::CallTrapCallbackForTesting() {
3132   CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3133 }
3134 
3135 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
3136   TurboAssembler::AssertUnreachable(reason);
3137 }
3138 
3139 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
3140   PushCPURegList(liftoff::PadRegList(regs.GetGpList()));
3141   PushCPURegList(liftoff::PadVRegList(regs.GetFpList()));
3142 }
3143 
3144 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
3145   PopCPURegList(liftoff::PadVRegList(regs.GetFpList()));
3146   PopCPURegList(liftoff::PadRegList(regs.GetGpList()));
3147 }
3148 
3149 void LiftoffAssembler::RecordSpillsInSafepoint(
3150     SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
3151     LiftoffRegList ref_spills, int spill_offset) {
3152   int spill_space_size = 0;
3153   bool needs_padding = (all_spills.GetGpList().Count() & 1) != 0;
3154   if (needs_padding) {
3155     spill_space_size += kSystemPointerSize;
3156     ++spill_offset;
3157   }
3158   while (!all_spills.is_empty()) {
3159     LiftoffRegister reg = all_spills.GetLastRegSet();
3160     if (ref_spills.has(reg)) {
3161       safepoint.DefineTaggedStackSlot(spill_offset);
3162     }
3163     all_spills.clear(reg);
3164     ++spill_offset;
3165     spill_space_size += kSystemPointerSize;
3166   }
3167   // Record the number of additional spill slots.
3168   RecordOolSpillSpaceSize(spill_space_size);
3169 }
3170 
3171 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
3172   DropSlots(num_stack_slots);
3173   Ret();
3174 }
3175 
3176 void LiftoffAssembler::CallC(const ValueKindSig* sig,
3177                              const LiftoffRegister* args,
3178                              const LiftoffRegister* rets,
3179                              ValueKind out_argument_kind, int stack_bytes,
3180                              ExternalReference ext_ref) {
3181   // The stack pointer is required to be quadword aligned.
3182   int total_size = RoundUp(stack_bytes, kQuadWordSizeInBytes);
3183   // Reserve space in the stack.
3184   Claim(total_size, 1);
3185 
3186   int arg_bytes = 0;
3187   for (ValueKind param_kind : sig->parameters()) {
3188     Poke(liftoff::GetRegFromType(*args++, param_kind), arg_bytes);
3189     arg_bytes += value_kind_size(param_kind);
3190   }
3191   DCHECK_LE(arg_bytes, stack_bytes);
3192 
3193   // Pass a pointer to the buffer with the arguments to the C function.
3194   Mov(x0, sp);
3195 
3196   // Now call the C function.
3197   constexpr int kNumCCallArgs = 1;
3198   CallCFunction(ext_ref, kNumCCallArgs);
3199 
3200   // Move return value to the right register.
3201   const LiftoffRegister* next_result_reg = rets;
3202   if (sig->return_count() > 0) {
3203     DCHECK_EQ(1, sig->return_count());
3204     constexpr Register kReturnReg = x0;
3205     if (kReturnReg != next_result_reg->gp()) {
3206       Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
3207     }
3208     ++next_result_reg;
3209   }
3210 
3211   // Load potential output value from the buffer on the stack.
3212   if (out_argument_kind != kVoid) {
3213     Peek(liftoff::GetRegFromType(*next_result_reg, out_argument_kind), 0);
3214   }
3215 
3216   Drop(total_size, 1);
3217 }
3218 
3219 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
3220   Call(addr, RelocInfo::WASM_CALL);
3221 }
3222 
3223 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
3224   Jump(addr, RelocInfo::WASM_CALL);
3225 }
3226 
3227 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
3228                                     compiler::CallDescriptor* call_descriptor,
3229                                     Register target) {
3230   // For Arm64, we have more cache registers than wasm parameters. That means
3231   // that target will always be in a register.
3232   DCHECK(target.is_valid());
3233   Call(target);
3234 }
3235 
3236 void LiftoffAssembler::TailCallIndirect(Register target) {
3237   DCHECK(target.is_valid());
3238   // When control flow integrity is enabled, the target is a "bti c"
3239   // instruction, which enforces that the jump instruction is either a "blr", or
3240   // a "br" with x16 or x17 as its destination.
3241   UseScratchRegisterScope temps(this);
3242   temps.Exclude(x17);
3243   Mov(x17, target);
3244   Jump(x17);
3245 }
3246 
3247 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
3248   // A direct call to a wasm runtime stub defined in this module.
3249   // Just encode the stub index. This will be patched at relocation.
3250   Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
3251 }
3252 
3253 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
3254   // The stack pointer is required to be quadword aligned.
3255   size = RoundUp(size, kQuadWordSizeInBytes);
3256   Claim(size, 1);
3257   Mov(addr, sp);
3258 }
3259 
3260 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
3261   // The stack pointer is required to be quadword aligned.
3262   size = RoundUp(size, kQuadWordSizeInBytes);
3263   Drop(size, 1);
3264 }
3265 
3266 void LiftoffAssembler::MaybeOSR() {}
3267 
3268 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
3269                                        ValueKind kind) {
3270   Label not_nan;
3271   if (kind == kF32) {
3272     Fcmp(src.S(), src.S());
3273     B(eq, &not_nan);  // x != x iff isnan(x)
3274     // If it's a NaN, it must be non-zero, so store that as the set value.
3275     Str(src.S(), MemOperand(dst));
3276   } else {
3277     DCHECK_EQ(kind, kF64);
3278     Fcmp(src.D(), src.D());
3279     B(eq, &not_nan);  // x != x iff isnan(x)
3280     // Double-precision NaNs must be non-zero in the most-significant 32
3281     // bits, so store that.
3282     St1(src.V4S(), 1, MemOperand(dst));
3283   }
3284   Bind(&not_nan);
3285 }
3286 
3287 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
3288                                             Register tmp_gp,
3289                                             LiftoffRegister tmp_s128,
3290                                             ValueKind lane_kind) {
3291   DoubleRegister tmp_fp = tmp_s128.fp();
3292   if (lane_kind == kF32) {
3293     Fmaxv(tmp_fp.S(), src.fp().V4S());
3294   } else {
3295     DCHECK_EQ(lane_kind, kF64);
3296     Fmaxp(tmp_fp.D(), src.fp().V2D());
3297   }
3298   emit_set_if_nan(dst, tmp_fp, lane_kind);
3299 }
3300 
3301 void LiftoffStackSlots::Construct(int param_slots) {
3302   DCHECK_LT(0, slots_.size());
3303   // The stack pointer is required to be quadword aligned.
3304   asm_->Claim(RoundUp(param_slots, 2));
3305   for (auto& slot : slots_) {
3306     int poke_offset = slot.dst_slot_ * kSystemPointerSize;
3307     switch (slot.src_.loc()) {
3308       case LiftoffAssembler::VarState::kStack: {
3309         UseScratchRegisterScope temps(asm_);
3310         CPURegister scratch = liftoff::AcquireByType(&temps, slot.src_.kind());
3311         asm_->Ldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
3312         asm_->Poke(scratch, poke_offset);
3313         break;
3314       }
3315       case LiftoffAssembler::VarState::kRegister:
3316         asm_->Poke(liftoff::GetRegFromType(slot.src_.reg(), slot.src_.kind()),
3317                    poke_offset);
3318         break;
3319       case LiftoffAssembler::VarState::kIntConst:
3320         DCHECK(slot.src_.kind() == kI32 || slot.src_.kind() == kI64);
3321         if (slot.src_.i32_const() == 0) {
3322           Register zero_reg = slot.src_.kind() == kI32 ? wzr : xzr;
3323           asm_->Poke(zero_reg, poke_offset);
3324         } else {
3325           UseScratchRegisterScope temps(asm_);
3326           Register scratch =
3327               slot.src_.kind() == kI32 ? temps.AcquireW() : temps.AcquireX();
3328           asm_->Mov(scratch, int64_t{slot.src_.i32_const()});
3329           asm_->Poke(scratch, poke_offset);
3330         }
3331         break;
3332     }
3333   }
3334 }
3335 
3336 }  // namespace wasm
3337 }  // namespace internal
3338 }  // namespace v8
3339 
3340 #endif  // V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
3341