1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
6#define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
7
8#include "src/base/platform/wrappers.h"
9#include "src/codegen/arm/register-arm.h"
10#include "src/heap/memory-chunk.h"
11#include "src/wasm/baseline/liftoff-assembler.h"
12#include "src/wasm/baseline/liftoff-register.h"
13#include "src/wasm/wasm-objects.h"
14
15namespace v8 {
16namespace internal {
17namespace wasm {
18
19namespace liftoff {
20
21inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
22  switch (liftoff_cond) {
23    case kEqual:
24      return eq;
25    case kUnequal:
26      return ne;
27    case kSignedLessThan:
28      return lt;
29    case kSignedLessEqual:
30      return le;
31    case kSignedGreaterThan:
32      return gt;
33    case kSignedGreaterEqual:
34      return ge;
35    case kUnsignedLessThan:
36      return lo;
37    case kUnsignedLessEqual:
38      return ls;
39    case kUnsignedGreaterThan:
40      return hi;
41    case kUnsignedGreaterEqual:
42      return hs;
43  }
44}
45
46//  half
47//  slot        Frame
48//  -----+--------------------+---------------------------
49//  n+3  |   parameter n      |
50//  ...  |       ...          |
51//   4   |   parameter 1      | or parameter 2
52//   3   |   parameter 0      | or parameter 1
53//   2   |  (result address)  | or parameter 0
54//  -----+--------------------+---------------------------
55//   1   | return addr (lr)   |
56//   0   | previous frame (fp)|
57//  -----+--------------------+  <-- frame ptr (fp)
58//  -1   | StackFrame::WASM   |
59//  -2   |    instance        |
60//  -3   |    feedback vector |
61//  -4   |    tiering budget  |
62//  -----+--------------------+---------------------------
63//  -5   |    slot 0 (high)   |   ^
64//  -6   |    slot 0 (low)    |   |
65//  -7   |    slot 1 (high)   | Frame slots
66//  -8   |    slot 1 (low)    |   |
67//       |                    |   v
68//  -----+--------------------+  <-- stack ptr (sp)
69//
70static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
71              "Slot size should be twice the size of the 32 bit pointer.");
72constexpr int kInstanceOffset = 2 * kSystemPointerSize;
73constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
74constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
75// kPatchInstructionsRequired sets a maximum limit of how many instructions that
76// PatchPrepareStackFrame will use in order to increase the stack appropriately.
77// Three instructions are required to sub a large constant, movw + movt + sub.
78constexpr int32_t kPatchInstructionsRequired = 3;
79constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1;
80
81inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
82
83inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
84  int32_t half_offset =
85      half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
86  return MemOperand(offset > 0 ? fp : sp, -offset + half_offset);
87}
88
89inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
90
91inline MemOperand GetMemOp(LiftoffAssembler* assm,
92                           UseScratchRegisterScope* temps, Register addr,
93                           Register offset, int32_t offset_imm) {
94  if (offset != no_reg) {
95    if (offset_imm == 0) return MemOperand(addr, offset);
96    Register tmp = temps->Acquire();
97    assm->add(tmp, offset, Operand(offset_imm));
98    return MemOperand(addr, tmp);
99  }
100  return MemOperand(addr, offset_imm);
101}
102
103inline Register CalculateActualAddress(LiftoffAssembler* assm,
104                                       UseScratchRegisterScope* temps,
105                                       Register addr_reg, Register offset_reg,
106                                       uintptr_t offset_imm,
107                                       Register result_reg = no_reg) {
108  if (offset_reg == no_reg && offset_imm == 0) {
109    if (result_reg == no_reg) {
110      return addr_reg;
111    } else {
112      assm->mov(result_reg, addr_reg);
113      return result_reg;
114    }
115  }
116  Register actual_addr_reg =
117      result_reg != no_reg ? result_reg : temps->Acquire();
118  if (offset_reg == no_reg) {
119    assm->add(actual_addr_reg, addr_reg, Operand(offset_imm));
120  } else {
121    assm->add(actual_addr_reg, addr_reg, Operand(offset_reg));
122    if (offset_imm != 0) {
123      assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm));
124    }
125  }
126  return actual_addr_reg;
127}
128
129inline LiftoffCondition MakeUnsigned(LiftoffCondition cond) {
130  switch (cond) {
131    case kSignedLessThan:
132      return kUnsignedLessThan;
133    case kSignedLessEqual:
134      return kUnsignedLessEqual;
135    case kSignedGreaterThan:
136      return kUnsignedGreaterThan;
137    case kSignedGreaterEqual:
138      return kUnsignedGreaterEqual;
139    case kEqual:
140    case kUnequal:
141    case kUnsignedLessThan:
142    case kUnsignedLessEqual:
143    case kUnsignedGreaterThan:
144    case kUnsignedGreaterEqual:
145      return cond;
146    default:
147      UNREACHABLE();
148  }
149}
150
151template <void (Assembler::*op)(Register, Register, Register, SBit, Condition),
152          void (Assembler::*op_with_carry)(Register, Register, const Operand&,
153                                           SBit, Condition)>
154inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst,
155                     LiftoffRegister lhs, LiftoffRegister rhs) {
156  Register dst_low = dst.low_gp();
157  if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) {
158    dst_low =
159        assm->GetUnusedRegister(kGpReg, LiftoffRegList{lhs, rhs, dst.high_gp()})
160            .gp();
161  }
162  (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al);
163  (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()),
164                         LeaveCC, al);
165  if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low);
166}
167
168template <void (Assembler::*op)(Register, Register, const Operand&, SBit,
169                                Condition),
170          void (Assembler::*op_with_carry)(Register, Register, const Operand&,
171                                           SBit, Condition)>
172inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst,
173                      LiftoffRegister lhs, int64_t imm) {
174  // The compiler allocated registers such that either {dst == lhs} or there is
175  // no overlap between the two.
176  DCHECK_NE(dst.low_gp(), lhs.high_gp());
177  int32_t imm_low_word = static_cast<int32_t>(imm);
178  int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
179  (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm_low_word), SetCC, al);
180  (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(imm_high_word),
181                         LeaveCC, al);
182}
183
184template <void (TurboAssembler::*op)(Register, Register, Register, Register,
185                                     Register),
186          bool is_left_shift>
187inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst,
188                       LiftoffRegister src, Register amount) {
189  Register src_low = src.low_gp();
190  Register src_high = src.high_gp();
191  Register dst_low = dst.low_gp();
192  Register dst_high = dst.high_gp();
193  // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low}
194  // then {dst_high}.
195  Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low;
196  LiftoffRegList pinned = {clobbered_dst_reg, src};
197  Register amount_capped =
198      pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp();
199  assm->and_(amount_capped, amount, Operand(0x3F));
200
201  // Ensure that writing the first half of {dst} does not overwrite the still
202  // needed half of {src}.
203  Register* later_src_reg = is_left_shift ? &src_low : &src_high;
204  if (*later_src_reg == clobbered_dst_reg) {
205    *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
206    assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg);
207  }
208
209  (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped);
210}
211
212inline FloatRegister GetFloatRegister(DoubleRegister reg) {
213  DCHECK_LT(reg.code(), kDoubleCode_d16);
214  return LowDwVfpRegister::from_code(reg.code()).low();
215}
216
217inline Simd128Register GetSimd128Register(DoubleRegister reg) {
218  return QwNeonRegister::from_code(reg.code() / 2);
219}
220
221inline Simd128Register GetSimd128Register(LiftoffRegister reg) {
222  return liftoff::GetSimd128Register(reg.low_fp());
223}
224
225enum class MinOrMax : uint8_t { kMin, kMax };
226template <typename RegisterType>
227inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst,
228                              RegisterType lhs, RegisterType rhs,
229                              MinOrMax min_or_max) {
230  DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8);
231  if (lhs == rhs) {
232    assm->TurboAssembler::Move(dst, lhs);
233    return;
234  }
235  Label done, is_nan;
236  if (min_or_max == MinOrMax::kMin) {
237    assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan);
238  } else {
239    assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan);
240  }
241  assm->b(&done);
242  assm->bind(&is_nan);
243  // Create a NaN output.
244  assm->vadd(dst, lhs, rhs);
245  assm->bind(&done);
246}
247
248inline Register EnsureNoAlias(Assembler* assm, Register reg,
249                              Register must_not_alias,
250                              UseScratchRegisterScope* temps) {
251  if (reg != must_not_alias) return reg;
252  Register tmp = temps->Acquire();
253  DCHECK_NE(reg, tmp);
254  assm->mov(tmp, reg);
255  return tmp;
256}
257
258inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt,
259                         NeonDataType sdt, LiftoffRegister dst,
260                         LiftoffRegister lhs, LiftoffRegister rhs) {
261  if (dst == lhs) {
262    assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
263    assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
264  } else {
265    assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
266    assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
267  }
268}
269
270inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst,
271                         LiftoffRegister lhs, LiftoffRegister rhs,
272                         Condition cond) {
273  DCHECK(cond == eq || cond == ne || cond == lt || cond == le);
274
275  QwNeonRegister dest = liftoff::GetSimd128Register(dst);
276  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
277  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
278  UseScratchRegisterScope temps(assm);
279  Register scratch = temps.Acquire();
280
281  assm->mov(scratch, Operand(0));
282  assm->VFPCompareAndSetFlags(left.low(), right.low());
283  assm->mov(scratch, Operand(-1), LeaveCC, cond);
284  if (cond == lt || cond == le) {
285    // Check for NaN.
286    assm->mov(scratch, Operand(0), LeaveCC, vs);
287  }
288  assm->vmov(dest.low(), scratch, scratch);
289
290  assm->mov(scratch, Operand(0));
291  assm->VFPCompareAndSetFlags(left.high(), right.high());
292  assm->mov(scratch, Operand(-1), LeaveCC, cond);
293  if (cond == lt || cond == le) {
294    // Check for NaN.
295    assm->mov(scratch, Operand(0), LeaveCC, vs);
296  }
297  assm->vmov(dest.high(), scratch, scratch);
298}
299
300inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst,
301                  ValueKind kind) {
302#ifdef DEBUG
303  // The {str} instruction needs a temp register when the immediate in the
304  // provided MemOperand does not fit into 12 bits. This happens for large stack
305  // frames. This DCHECK checks that the temp register is available when needed.
306  DCHECK(UseScratchRegisterScope{assm}.CanAcquire());
307#endif
308  switch (kind) {
309    case kI32:
310    case kOptRef:
311    case kRef:
312    case kRtt:
313      assm->str(src.gp(), dst);
314      break;
315    case kI64:
316      // Positive offsets should be lowered to kI32.
317      assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset()));
318      assm->str(
319          src.high_gp(),
320          MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize));
321      break;
322    case kF32:
323      assm->vstr(liftoff::GetFloatRegister(src.fp()), dst);
324      break;
325    case kF64:
326      assm->vstr(src.fp(), dst);
327      break;
328    case kS128: {
329      UseScratchRegisterScope temps(assm);
330      Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(),
331                                                      no_reg, dst.offset());
332      assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr));
333      break;
334    }
335    default:
336      UNREACHABLE();
337  }
338}
339
340inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
341                 ValueKind kind) {
342  switch (kind) {
343    case kI32:
344    case kOptRef:
345    case kRef:
346    case kRtt:
347      assm->ldr(dst.gp(), src);
348      break;
349    case kI64:
350      assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset()));
351      assm->ldr(
352          dst.high_gp(),
353          MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize));
354      break;
355    case kF32:
356      assm->vldr(liftoff::GetFloatRegister(dst.fp()), src);
357      break;
358    case kF64:
359      assm->vldr(dst.fp(), src);
360      break;
361    case kS128: {
362      // Get memory address of slot to fill from.
363      UseScratchRegisterScope temps(assm);
364      Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(),
365                                                      no_reg, src.offset());
366      assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
367      break;
368    }
369    default:
370      UNREACHABLE();
371  }
372}
373
374constexpr int MaskFromNeonDataType(NeonDataType dt) {
375  switch (dt) {
376    case NeonS8:
377    case NeonU8:
378      return 7;
379    case NeonS16:
380    case NeonU16:
381      return 15;
382    case NeonS32:
383    case NeonU32:
384      return 31;
385    case NeonS64:
386    case NeonU64:
387      return 63;
388  }
389}
390
391enum ShiftDirection { kLeft, kRight };
392
393template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz>
394inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst,
395                          LiftoffRegister lhs, LiftoffRegister rhs) {
396  constexpr int mask = MaskFromNeonDataType(dt);
397  UseScratchRegisterScope temps(assm);
398  QwNeonRegister tmp = temps.AcquireQ();
399  Register shift = temps.Acquire();
400  assm->and_(shift, rhs.gp(), Operand(mask));
401  assm->vdup(sz, tmp, shift);
402  if (dir == kRight) {
403    assm->vneg(sz, tmp, tmp);
404  }
405  assm->vshl(dt, liftoff::GetSimd128Register(dst),
406             liftoff::GetSimd128Register(lhs), tmp);
407}
408
409template <ShiftDirection dir, NeonDataType dt>
410inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst,
411                                   LiftoffRegister lhs, int32_t rhs) {
412  // vshr by 0 is not allowed, so check for it, and only move if dst != lhs.
413  int32_t shift = rhs & MaskFromNeonDataType(dt);
414  if (shift) {
415    if (dir == kLeft) {
416      assm->vshl(dt, liftoff::GetSimd128Register(dst),
417                 liftoff::GetSimd128Register(lhs), shift);
418    } else {
419      assm->vshr(dt, liftoff::GetSimd128Register(dst),
420                 liftoff::GetSimd128Register(lhs), shift);
421    }
422  } else if (dst != lhs) {
423    assm->vmov(liftoff::GetSimd128Register(dst),
424               liftoff::GetSimd128Register(lhs));
425  }
426}
427
428inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
429                        LiftoffRegister src) {
430  UseScratchRegisterScope temps(assm);
431  DwVfpRegister scratch = temps.AcquireD();
432  assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp());
433  assm->vpmax(NeonU32, scratch, scratch, scratch);
434  assm->ExtractLane(dst.gp(), scratch, NeonS32, 0);
435  assm->cmp(dst.gp(), Operand(0));
436  assm->mov(dst.gp(), Operand(1), LeaveCC, ne);
437}
438
439}  // namespace liftoff
440
441int LiftoffAssembler::PrepareStackFrame() {
442  if (!CpuFeatures::IsSupported(ARMv7)) {
443    bailout(kUnsupportedArchitecture, "Liftoff needs ARMv7");
444    return 0;
445  }
446  uint32_t offset = static_cast<uint32_t>(pc_offset());
447  // PatchPrepareStackFrame will patch this in order to increase the stack
448  // appropriately. Additional nops are required as the bytes operand might
449  // require extra moves to encode.
450  for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) {
451    nop();
452  }
453  DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize,
454            pc_offset());
455  return offset;
456}
457
458void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
459                                       int stack_param_delta) {
460  UseScratchRegisterScope temps(this);
461  Register scratch = temps.Acquire();
462
463  // Push the return address and frame pointer to complete the stack frame.
464  sub(sp, sp, Operand(8));
465  ldr(scratch, MemOperand(fp, 4));
466  str(scratch, MemOperand(sp, 4));
467  ldr(scratch, MemOperand(fp, 0));
468  str(scratch, MemOperand(sp, 0));
469
470  // Shift the whole frame upwards.
471  int slot_count = num_callee_stack_params + 2;
472  for (int i = slot_count - 1; i >= 0; --i) {
473    ldr(scratch, MemOperand(sp, i * 4));
474    str(scratch, MemOperand(fp, (i - stack_param_delta) * 4));
475  }
476
477  // Set the new stack and frame pointer.
478  sub(sp, fp, Operand(stack_param_delta * 4));
479  Pop(lr, fp);
480}
481
482void LiftoffAssembler::AlignFrameSize() {}
483
484void LiftoffAssembler::PatchPrepareStackFrame(
485    int offset, SafepointTableBuilder* safepoint_table_builder) {
486  // The frame_size includes the frame marker and the instance slot. Both are
487  // pushed as part of frame construction, so we don't need to allocate memory
488  // for them anymore.
489  int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
490
491  PatchingAssembler patching_assembler(AssemblerOptions{},
492                                       buffer_start_ + offset,
493                                       liftoff::kPatchInstructionsRequired);
494  if (V8_LIKELY(frame_size < 4 * KB)) {
495    // This is the standard case for small frames: just subtract from SP and be
496    // done with it.
497    patching_assembler.sub(sp, sp, Operand(frame_size));
498    patching_assembler.PadWithNops();
499    return;
500  }
501
502  // The frame size is bigger than 4KB, so we might overflow the available stack
503  // space if we first allocate the frame and then do the stack check (we will
504  // need some remaining stack space for throwing the exception). That's why we
505  // check the available stack space before we allocate the frame. To do this we
506  // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does
507  // this "extended stack check".
508  //
509  // The OOL code can simply be generated here with the normal assembler,
510  // because all other code generation, including OOL code, has already finished
511  // when {PatchPrepareStackFrame} is called. The function prologue then jumps
512  // to the current {pc_offset()} to execute the OOL code for allocating the
513  // large frame.
514
515  // Emit the unconditional branch in the function prologue (from {offset} to
516  // {pc_offset()}).
517  patching_assembler.b(pc_offset() - offset - Instruction::kPcLoadDelta);
518  patching_assembler.PadWithNops();
519
520  // If the frame is bigger than the stack, we throw the stack overflow
521  // exception unconditionally. Thereby we can avoid the integer overflow
522  // check in the condition code.
523  RecordComment("OOL: stack check for large frame");
524  Label continuation;
525  if (frame_size < FLAG_stack_size * 1024) {
526    UseScratchRegisterScope temps(this);
527    Register stack_limit = temps.Acquire();
528    ldr(stack_limit,
529        FieldMemOperand(kWasmInstanceRegister,
530                        WasmInstanceObject::kRealStackLimitAddressOffset));
531    ldr(stack_limit, MemOperand(stack_limit));
532    add(stack_limit, stack_limit, Operand(frame_size));
533    cmp(sp, stack_limit);
534    b(cs /* higher or same */, &continuation);
535  }
536
537  Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
538  // The call will not return; just define an empty safepoint.
539  safepoint_table_builder->DefineSafepoint(this);
540  if (FLAG_debug_code) stop();
541
542  bind(&continuation);
543
544  // Now allocate the stack space. Note that this might do more than just
545  // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
546  AllocateStackSpace(frame_size);
547
548  // Jump back to the start of the function, from {pc_offset()} to
549  // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
550  // is a branch now).
551  int func_start_offset =
552      offset + liftoff::kPatchInstructionsRequired * kInstrSize;
553  b(func_start_offset - pc_offset() - Instruction::kPcLoadDelta);
554}
555
556void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); }
557
558void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
559
560// static
561constexpr int LiftoffAssembler::StaticStackFrameSize() {
562  return liftoff::kTierupBudgetOffset;
563}
564
565int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
566  switch (kind) {
567    case kS128:
568      return value_kind_size(kind);
569    default:
570      return kStackSlotSize;
571  }
572}
573
574bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
575  return kind == kS128 || is_reference(kind);
576}
577
578void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
579                                    RelocInfo::Mode rmode) {
580  switch (value.type().kind()) {
581    case kI32:
582      TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode));
583      break;
584    case kI64: {
585      DCHECK(RelocInfo::IsNoInfo(rmode));
586      int32_t low_word = value.to_i64();
587      int32_t high_word = value.to_i64() >> 32;
588      TurboAssembler::Move(reg.low_gp(), Operand(low_word));
589      TurboAssembler::Move(reg.high_gp(), Operand(high_word));
590      break;
591    }
592    case kF32:
593      vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed());
594      break;
595    case kF64: {
596      Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp();
597      vmov(reg.fp(), base::Double(value.to_f64_boxed().get_bits()),
598           extra_scratch);
599      break;
600    }
601    default:
602      UNREACHABLE();
603  }
604}
605
606void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
607  ldr(dst, liftoff::GetInstanceOperand());
608}
609
610void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
611                                        int offset, int size) {
612  DCHECK_LE(0, offset);
613  MemOperand src{instance, offset};
614  switch (size) {
615    case 1:
616      ldrb(dst, src);
617      break;
618    case 4:
619      ldr(dst, src);
620      break;
621    default:
622      UNIMPLEMENTED();
623  }
624}
625
626void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
627                                                     Register instance,
628                                                     int offset) {
629  STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
630  ldr(dst, MemOperand{instance, offset});
631}
632
633void LiftoffAssembler::SpillInstance(Register instance) {
634  str(instance, liftoff::GetInstanceOperand());
635}
636
637void LiftoffAssembler::ResetOSRTarget() {}
638
639namespace liftoff {
640#define __ lasm->
641inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst,
642                         Register src_addr, Register offset_reg,
643                         int32_t offset_imm, LoadType type,
644                         LiftoffRegList pinned,
645                         uint32_t* protected_load_pc = nullptr,
646                         bool is_load_mem = false) {
647  DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair());
648  UseScratchRegisterScope temps(lasm);
649  if (type.value() == LoadType::kF64Load ||
650      type.value() == LoadType::kF32Load ||
651      type.value() == LoadType::kS128Load) {
652    Register actual_src_addr = liftoff::CalculateActualAddress(
653        lasm, &temps, src_addr, offset_reg, offset_imm);
654    if (type.value() == LoadType::kF64Load) {
655      // Armv6 is not supported so Neon can be used to avoid alignment issues.
656      CpuFeatureScope scope(lasm, NEON);
657      __ vld1(Neon64, NeonListOperand(dst.fp()),
658              NeonMemOperand(actual_src_addr));
659    } else if (type.value() == LoadType::kF32Load) {
660      // TODO(arm): Use vld1 for f32 when implemented in simulator as used for
661      // f64. It supports unaligned access.
662      Register scratch =
663          (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr;
664      __ ldr(scratch, MemOperand(actual_src_addr));
665      __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch);
666    } else {
667      // Armv6 is not supported so Neon can be used to avoid alignment issues.
668      CpuFeatureScope scope(lasm, NEON);
669      __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2),
670              NeonMemOperand(actual_src_addr));
671    }
672  } else {
673    MemOperand src_op =
674        liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm);
675    if (protected_load_pc) *protected_load_pc = __ pc_offset();
676    switch (type.value()) {
677      case LoadType::kI32Load8U:
678        __ ldrb(dst.gp(), src_op);
679        break;
680      case LoadType::kI64Load8U:
681        __ ldrb(dst.low_gp(), src_op);
682        __ mov(dst.high_gp(), Operand(0));
683        break;
684      case LoadType::kI32Load8S:
685        __ ldrsb(dst.gp(), src_op);
686        break;
687      case LoadType::kI64Load8S:
688        __ ldrsb(dst.low_gp(), src_op);
689        __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
690        break;
691      case LoadType::kI32Load16U:
692        __ ldrh(dst.gp(), src_op);
693        break;
694      case LoadType::kI64Load16U:
695        __ ldrh(dst.low_gp(), src_op);
696        __ mov(dst.high_gp(), Operand(0));
697        break;
698      case LoadType::kI32Load16S:
699        __ ldrsh(dst.gp(), src_op);
700        break;
701      case LoadType::kI32Load:
702        __ ldr(dst.gp(), src_op);
703        break;
704      case LoadType::kI64Load16S:
705        __ ldrsh(dst.low_gp(), src_op);
706        __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
707        break;
708      case LoadType::kI64Load32U:
709        __ ldr(dst.low_gp(), src_op);
710        __ mov(dst.high_gp(), Operand(0));
711        break;
712      case LoadType::kI64Load32S:
713        __ ldr(dst.low_gp(), src_op);
714        __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
715        break;
716      case LoadType::kI64Load:
717        __ ldr(dst.low_gp(), src_op);
718        // GetMemOp may use a scratch register as the offset register, in which
719        // case, calling GetMemOp again will fail due to the assembler having
720        // ran out of scratch registers.
721        if (temps.CanAcquire()) {
722          src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg,
723                                     offset_imm + kSystemPointerSize);
724        } else {
725          __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize));
726        }
727        __ ldr(dst.high_gp(), src_op);
728        break;
729      default:
730        UNREACHABLE();
731    }
732  }
733}
734#undef __
735}  // namespace liftoff
736
737void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
738                                         Register offset_reg,
739                                         int32_t offset_imm,
740                                         LiftoffRegList pinned) {
741  STATIC_ASSERT(kTaggedSize == kInt32Size);
742  liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg,
743                        offset_imm, LoadType::kI32Load, pinned);
744}
745
746void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
747                                       int32_t offset_imm) {
748  UseScratchRegisterScope temps(this);
749  MemOperand src_op =
750      liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm);
751  ldr(dst, src_op);
752}
753
754void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
755                                          Register offset_reg,
756                                          int32_t offset_imm,
757                                          LiftoffRegister src,
758                                          LiftoffRegList pinned,
759                                          SkipWriteBarrier skip_write_barrier) {
760  STATIC_ASSERT(kTaggedSize == kInt32Size);
761  Register actual_offset_reg = offset_reg;
762  if (offset_reg != no_reg && offset_imm != 0) {
763    if (cache_state()->is_used(LiftoffRegister(offset_reg))) {
764      actual_offset_reg = GetUnusedRegister(kGpReg, pinned).gp();
765    }
766    add(actual_offset_reg, offset_reg, Operand(offset_imm));
767  }
768  MemOperand dst_op = actual_offset_reg == no_reg
769                          ? MemOperand(dst_addr, offset_imm)
770                          : MemOperand(dst_addr, actual_offset_reg);
771  str(src.gp(), dst_op);
772
773  if (skip_write_barrier || FLAG_disable_write_barriers) return;
774
775  // The write barrier.
776  Label write_barrier;
777  Label exit;
778  CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne,
779                &write_barrier);
780  b(&exit);
781  bind(&write_barrier);
782  JumpIfSmi(src.gp(), &exit);
783  CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq,
784                &exit);
785  CallRecordWriteStubSaveRegisters(
786      dst_addr,
787      actual_offset_reg == no_reg ? Operand(offset_imm)
788                                  : Operand(actual_offset_reg),
789      RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
790      StubCallMode::kCallWasmRuntimeStub);
791  bind(&exit);
792}
793
794void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
795                            Register offset_reg, uint32_t offset_imm,
796                            LoadType type, LiftoffRegList pinned,
797                            uint32_t* protected_load_pc, bool is_load_mem,
798                            bool i64_offset) {
799  // Offsets >=2GB are statically OOB on 32-bit systems.
800  DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
801  liftoff::LoadInternal(this, dst, src_addr, offset_reg,
802                        static_cast<int32_t>(offset_imm), type, pinned,
803                        protected_load_pc, is_load_mem);
804}
805
806void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
807                             uint32_t offset_imm, LiftoffRegister src,
808                             StoreType type, LiftoffRegList pinned,
809                             uint32_t* protected_store_pc, bool is_store_mem) {
810  // Offsets >=2GB are statically OOB on 32-bit systems.
811  DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
812  UseScratchRegisterScope temps(this);
813  if (type.value() == StoreType::kF64Store) {
814    Register actual_dst_addr = liftoff::CalculateActualAddress(
815        this, &temps, dst_addr, offset_reg, offset_imm);
816    // Armv6 is not supported so Neon can be used to avoid alignment issues.
817    CpuFeatureScope scope(this, NEON);
818    vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr));
819  } else if (type.value() == StoreType::kS128Store) {
820    Register actual_dst_addr = liftoff::CalculateActualAddress(
821        this, &temps, dst_addr, offset_reg, offset_imm);
822    // Armv6 is not supported so Neon can be used to avoid alignment issues.
823    CpuFeatureScope scope(this, NEON);
824    vst1(Neon8, NeonListOperand(src.low_fp(), 2),
825         NeonMemOperand(actual_dst_addr));
826  } else if (type.value() == StoreType::kF32Store) {
827    // TODO(arm): Use vst1 for f32 when implemented in simulator as used for
828    // f64. It supports unaligned access.
829    // CalculateActualAddress will only not use a scratch register if the
830    // following condition holds, otherwise another register must be
831    // retrieved.
832    Register scratch = (offset_reg == no_reg && offset_imm == 0)
833                           ? temps.Acquire()
834                           : GetUnusedRegister(kGpReg, pinned).gp();
835    Register actual_dst_addr = liftoff::CalculateActualAddress(
836        this, &temps, dst_addr, offset_reg, offset_imm);
837    vmov(scratch, liftoff::GetFloatRegister(src.fp()));
838    str(scratch, MemOperand(actual_dst_addr));
839  } else {
840    MemOperand dst_op =
841        liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
842    if (protected_store_pc) *protected_store_pc = pc_offset();
843    switch (type.value()) {
844      case StoreType::kI64Store8:
845        src = src.low();
846        V8_FALLTHROUGH;
847      case StoreType::kI32Store8:
848        strb(src.gp(), dst_op);
849        break;
850      case StoreType::kI64Store16:
851        src = src.low();
852        V8_FALLTHROUGH;
853      case StoreType::kI32Store16:
854        strh(src.gp(), dst_op);
855        break;
856      case StoreType::kI64Store32:
857        src = src.low();
858        V8_FALLTHROUGH;
859      case StoreType::kI32Store:
860        str(src.gp(), dst_op);
861        break;
862      case StoreType::kI64Store:
863        str(src.low_gp(), dst_op);
864        // GetMemOp may use a scratch register as the offset register, in which
865        // case, calling GetMemOp again will fail due to the assembler having
866        // ran out of scratch registers.
867        if (temps.CanAcquire()) {
868          dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg,
869                                     offset_imm + kSystemPointerSize);
870        } else {
871          add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize));
872        }
873        str(src.high_gp(), dst_op);
874        break;
875      default:
876        UNREACHABLE();
877    }
878  }
879}
880
881namespace liftoff {
882#define __ lasm->
883
884inline void AtomicOp32(
885    LiftoffAssembler* lasm, Register dst_addr, Register offset_reg,
886    uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result,
887    LiftoffRegList pinned,
888    void (Assembler::*load)(Register, Register, Condition),
889    void (Assembler::*store)(Register, Register, Register, Condition),
890    void (*op)(LiftoffAssembler*, Register, Register, Register)) {
891  Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
892
893  // Allocate an additional {temp} register to hold the result that should be
894  // stored to memory. Note that {temp} and {store_result} are not allowed to be
895  // the same register.
896  Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
897
898  // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique.
899  DCHECK(result.gp() != value.gp() && result.gp() != dst_addr &&
900         result.gp() != offset_reg);
901
902  UseScratchRegisterScope temps(lasm);
903  Register actual_addr = liftoff::CalculateActualAddress(
904      lasm, &temps, dst_addr, offset_reg, offset_imm);
905
906  __ dmb(ISH);
907  Label retry;
908  __ bind(&retry);
909  (lasm->*load)(result.gp(), actual_addr, al);
910  op(lasm, temp, result.gp(), value.gp());
911  (lasm->*store)(store_result, temp, actual_addr, al);
912  __ cmp(store_result, Operand(0));
913  __ b(ne, &retry);
914  __ dmb(ISH);
915}
916
917inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs,
918                Register rhs) {
919  __ add(dst, lhs, rhs);
920}
921
922inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs,
923                Register rhs) {
924  __ sub(dst, lhs, rhs);
925}
926
927inline void And(LiftoffAssembler* lasm, Register dst, Register lhs,
928                Register rhs) {
929  __ and_(dst, lhs, rhs);
930}
931
932inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs,
933               Register rhs) {
934  __ orr(dst, lhs, rhs);
935}
936
937inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs,
938                Register rhs) {
939  __ eor(dst, lhs, rhs);
940}
941
942inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs,
943                     Register rhs) {
944  __ mov(dst, rhs);
945}
946
947inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr,
948                          Register offset_reg, uint32_t offset_imm,
949                          LiftoffRegister value, LiftoffRegister result,
950                          StoreType type,
951                          void (*op)(LiftoffAssembler*, Register, Register,
952                                     Register)) {
953  LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
954  switch (type.value()) {
955    case StoreType::kI64Store8:
956      __ LoadConstant(result.high(), WasmValue(0));
957      result = result.low();
958      value = value.low();
959      V8_FALLTHROUGH;
960    case StoreType::kI32Store8:
961      liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
962                          pinned, &Assembler::ldrexb, &Assembler::strexb, op);
963      return;
964    case StoreType::kI64Store16:
965      __ LoadConstant(result.high(), WasmValue(0));
966      result = result.low();
967      value = value.low();
968      V8_FALLTHROUGH;
969    case StoreType::kI32Store16:
970      liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
971                          pinned, &Assembler::ldrexh, &Assembler::strexh, op);
972      return;
973    case StoreType::kI64Store32:
974      __ LoadConstant(result.high(), WasmValue(0));
975      result = result.low();
976      value = value.low();
977      V8_FALLTHROUGH;
978    case StoreType::kI32Store:
979      liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
980                          pinned, &Assembler::ldrex, &Assembler::strex, op);
981      return;
982    default:
983      UNREACHABLE();
984  }
985}
986
987inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr,
988                       Register offset_reg, uint32_t offset_imm,
989                       LiftoffRegister value,
990                       base::Optional<LiftoffRegister> result,
991                       void (*op)(LiftoffAssembler*, LiftoffRegister,
992                                  LiftoffRegister, LiftoffRegister)) {
993  // strexd loads a 64 bit word into two registers. The first register needs
994  // to have an even index, e.g. r8, the second register needs to be the one
995  // with the next higher index, e.g. r9 if the first register is r8. In the
996  // following code we use the fixed register pair r8/r9 to make the code here
997  // simpler, even though other register pairs would also be possible.
998  constexpr Register dst_low = r8;
999  constexpr Register dst_high = r9;
1000
1001  // Make sure {dst_low} and {dst_high} are not occupied by any other value.
1002  Register value_low = value.low_gp();
1003  Register value_high = value.high_gp();
1004  LiftoffRegList pinned = {dst_addr,   offset_reg, value_low,
1005                           value_high, dst_low,    dst_high};
1006  __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high},
1007                   pinned);
1008  pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1009  __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high},
1010                   pinned);
1011  pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1012
1013  // Make sure that {result}, if it exists, also does not overlap with
1014  // {dst_low} and {dst_high}. We don't have to transfer the value stored in
1015  // {result}.
1016  Register result_low = no_reg;
1017  Register result_high = no_reg;
1018  if (result.has_value()) {
1019    result_low = result.value().low_gp();
1020    if (pinned.has(result_low)) {
1021      result_low = __ GetUnusedRegister(kGpReg, pinned).gp();
1022    }
1023    pinned.set(result_low);
1024
1025    result_high = result.value().high_gp();
1026    if (pinned.has(result_high)) {
1027      result_high = __ GetUnusedRegister(kGpReg, pinned).gp();
1028    }
1029    pinned.set(result_high);
1030  }
1031
1032  Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp();
1033
1034  UseScratchRegisterScope temps(lasm);
1035  Register actual_addr = liftoff::CalculateActualAddress(
1036      lasm, &temps, dst_addr, offset_reg, offset_imm);
1037
1038  __ dmb(ISH);
1039  Label retry;
1040  __ bind(&retry);
1041  // {ldrexd} is needed here so that the {strexd} instruction below can
1042  // succeed. We don't need the value we are reading. We use {dst_low} and
1043  // {dst_high} as the destination registers because {ldrexd} has the same
1044  // restrictions on registers as {strexd}, see the comment above.
1045  __ ldrexd(dst_low, dst_high, actual_addr);
1046  if (result.has_value()) {
1047    __ mov(result_low, dst_low);
1048    __ mov(result_high, dst_high);
1049  }
1050  op(lasm, LiftoffRegister::ForPair(dst_low, dst_high),
1051     LiftoffRegister::ForPair(dst_low, dst_high),
1052     LiftoffRegister::ForPair(value_low, value_high));
1053  __ strexd(store_result, dst_low, dst_high, actual_addr);
1054  __ cmp(store_result, Operand(0));
1055  __ b(ne, &retry);
1056  __ dmb(ISH);
1057
1058  if (result.has_value()) {
1059    if (result_low != result.value().low_gp()) {
1060      __ mov(result.value().low_gp(), result_low);
1061    }
1062    if (result_high != result.value().high_gp()) {
1063      __ mov(result.value().high_gp(), result_high);
1064    }
1065  }
1066}
1067
1068inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst,
1069                     LiftoffRegister, LiftoffRegister src) {
1070  __ mov(dst.low_gp(), src.low_gp());
1071  __ mov(dst.high_gp(), src.high_gp());
1072}
1073
1074#undef __
1075}  // namespace liftoff
1076
1077void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
1078                                  Register offset_reg, uint32_t offset_imm,
1079                                  LoadType type, LiftoffRegList pinned) {
1080  if (type.value() != LoadType::kI64Load) {
1081    Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
1082    dmb(ISH);
1083    return;
1084  }
1085  // ldrexd loads a 64 bit word into two registers. The first register needs to
1086  // have an even index, e.g. r8, the second register needs to be the one with
1087  // the next higher index, e.g. r9 if the first register is r8. In the
1088  // following code we use the fixed register pair r8/r9 to make the code here
1089  // simpler, even though other register pairs would also be possible.
1090  constexpr Register dst_low = r8;
1091  constexpr Register dst_high = r9;
1092  SpillRegisters(dst_low, dst_high);
1093  {
1094    UseScratchRegisterScope temps(this);
1095    Register actual_addr = liftoff::CalculateActualAddress(
1096        this, &temps, src_addr, offset_reg, offset_imm);
1097    ldrexd(dst_low, dst_high, actual_addr);
1098    dmb(ISH);
1099  }
1100
1101  ParallelRegisterMove(
1102      {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kI64}});
1103}
1104
1105void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
1106                                   uint32_t offset_imm, LiftoffRegister src,
1107                                   StoreType type, LiftoffRegList pinned) {
1108  if (type.value() == StoreType::kI64Store) {
1109    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {},
1110                        liftoff::I64Store);
1111    return;
1112  }
1113
1114  dmb(ISH);
1115  Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true);
1116  dmb(ISH);
1117  return;
1118}
1119
1120void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
1121                                 uint32_t offset_imm, LiftoffRegister value,
1122                                 LiftoffRegister result, StoreType type) {
1123  if (type.value() == StoreType::kI64Store) {
1124    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1125                        liftoff::I64Binop<&Assembler::add, &Assembler::adc>);
1126    return;
1127  }
1128  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1129                         type, &liftoff::Add);
1130}
1131
1132void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
1133                                 uint32_t offset_imm, LiftoffRegister value,
1134                                 LiftoffRegister result, StoreType type) {
1135  if (type.value() == StoreType::kI64Store) {
1136    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1137                        liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>);
1138    return;
1139  }
1140  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1141                         type, &liftoff::Sub);
1142}
1143
1144void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
1145                                 uint32_t offset_imm, LiftoffRegister value,
1146                                 LiftoffRegister result, StoreType type) {
1147  if (type.value() == StoreType::kI64Store) {
1148    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1149                        liftoff::I64Binop<&Assembler::and_, &Assembler::and_>);
1150    return;
1151  }
1152  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1153                         type, &liftoff::And);
1154}
1155
1156void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
1157                                uint32_t offset_imm, LiftoffRegister value,
1158                                LiftoffRegister result, StoreType type) {
1159  if (type.value() == StoreType::kI64Store) {
1160    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1161                        liftoff::I64Binop<&Assembler::orr, &Assembler::orr>);
1162    return;
1163  }
1164  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1165                         type, &liftoff::Or);
1166}
1167
1168void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
1169                                 uint32_t offset_imm, LiftoffRegister value,
1170                                 LiftoffRegister result, StoreType type) {
1171  if (type.value() == StoreType::kI64Store) {
1172    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1173                        liftoff::I64Binop<&Assembler::eor, &Assembler::eor>);
1174    return;
1175  }
1176  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1177                         type, &liftoff::Xor);
1178}
1179
1180void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1181                                      uint32_t offset_imm,
1182                                      LiftoffRegister value,
1183                                      LiftoffRegister result, StoreType type) {
1184  if (type.value() == StoreType::kI64Store) {
1185    liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1186                        liftoff::I64Store);
1187    return;
1188  }
1189  liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1190                         type, &liftoff::Exchange);
1191}
1192
1193namespace liftoff {
1194#define __ lasm->
1195
1196inline void AtomicI64CompareExchange(LiftoffAssembler* lasm,
1197                                     Register dst_addr_reg, Register offset_reg,
1198                                     uint32_t offset_imm,
1199                                     LiftoffRegister expected,
1200                                     LiftoffRegister new_value,
1201                                     LiftoffRegister result) {
1202  // To implement I64AtomicCompareExchange, we nearly need all registers, with
1203  // some registers having special constraints, e.g. like for {new_value} and
1204  // {result} the low-word register has to have an even register code, and the
1205  // high-word has to be in the next higher register. To avoid complicated
1206  // register allocation code here, we just assign fixed registers to all
1207  // values here, and then move all values into the correct register.
1208  Register dst_addr = r0;
1209  Register offset = r1;
1210  Register result_low = r4;
1211  Register result_high = r5;
1212  Register new_value_low = r2;
1213  Register new_value_high = r3;
1214  Register store_result = r6;
1215  Register expected_low = r8;
1216  Register expected_high = r9;
1217
1218  // We spill all registers, so that we can re-assign them afterwards.
1219  __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low,
1220                    new_value_high, store_result, expected_low, expected_high);
1221
1222  __ ParallelRegisterMove(
1223      {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value,
1224        kI64},
1225       {LiftoffRegister::ForPair(expected_low, expected_high), expected, kI64},
1226       {dst_addr, dst_addr_reg, kI32},
1227       {offset, offset_reg != no_reg ? offset_reg : offset, kI32}});
1228
1229  {
1230    UseScratchRegisterScope temps(lasm);
1231    Register temp = liftoff::CalculateActualAddress(
1232        lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset,
1233        offset_imm, dst_addr);
1234    // Make sure the actual address is stored in the right register.
1235    DCHECK_EQ(dst_addr, temp);
1236    USE(temp);
1237  }
1238
1239  Label retry;
1240  Label done;
1241  __ dmb(ISH);
1242  __ bind(&retry);
1243  __ ldrexd(result_low, result_high, dst_addr);
1244  __ cmp(result_low, expected_low);
1245  __ b(ne, &done);
1246  __ cmp(result_high, expected_high);
1247  __ b(ne, &done);
1248  __ strexd(store_result, new_value_low, new_value_high, dst_addr);
1249  __ cmp(store_result, Operand(0));
1250  __ b(ne, &retry);
1251  __ dmb(ISH);
1252  __ bind(&done);
1253
1254  __ ParallelRegisterMove(
1255      {{result, LiftoffRegister::ForPair(result_low, result_high), kI64}});
1256}
1257#undef __
1258}  // namespace liftoff
1259
1260void LiftoffAssembler::AtomicCompareExchange(
1261    Register dst_addr, Register offset_reg, uint32_t offset_imm,
1262    LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1263    StoreType type) {
1264  if (type.value() == StoreType::kI64Store) {
1265    liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm,
1266                                      expected, new_value, result);
1267    return;
1268  }
1269
1270  // The other versions of CompareExchange can share code, but need special load
1271  // and store instructions.
1272  void (Assembler::*load)(Register, Register, Condition) = nullptr;
1273  void (Assembler::*store)(Register, Register, Register, Condition) = nullptr;
1274
1275  LiftoffRegList pinned = {dst_addr, offset_reg};
1276  // We need to remember the high word of {result}, so we can set it to zero in
1277  // the end if necessary.
1278  Register result_high = no_reg;
1279  switch (type.value()) {
1280    case StoreType::kI64Store8:
1281      result_high = result.high_gp();
1282      result = result.low();
1283      new_value = new_value.low();
1284      expected = expected.low();
1285      V8_FALLTHROUGH;
1286    case StoreType::kI32Store8:
1287      load = &Assembler::ldrexb;
1288      store = &Assembler::strexb;
1289      // We have to clear the high bits of {expected}, as we can only do a
1290      // 32-bit comparison. If the {expected} register is used, we spill it
1291      // first.
1292      if (cache_state()->is_used(expected)) {
1293        SpillRegister(expected);
1294      }
1295      uxtb(expected.gp(), expected.gp());
1296      break;
1297    case StoreType::kI64Store16:
1298      result_high = result.high_gp();
1299      result = result.low();
1300      new_value = new_value.low();
1301      expected = expected.low();
1302      V8_FALLTHROUGH;
1303    case StoreType::kI32Store16:
1304      load = &Assembler::ldrexh;
1305      store = &Assembler::strexh;
1306      // We have to clear the high bits of {expected}, as we can only do a
1307      // 32-bit comparison. If the {expected} register is used, we spill it
1308      // first.
1309      if (cache_state()->is_used(expected)) {
1310        SpillRegister(expected);
1311      }
1312      uxth(expected.gp(), expected.gp());
1313      break;
1314    case StoreType::kI64Store32:
1315      result_high = result.high_gp();
1316      result = result.low();
1317      new_value = new_value.low();
1318      expected = expected.low();
1319      V8_FALLTHROUGH;
1320    case StoreType::kI32Store:
1321      load = &Assembler::ldrex;
1322      store = &Assembler::strex;
1323      break;
1324    default:
1325      UNREACHABLE();
1326  }
1327  pinned.set(new_value);
1328  pinned.set(expected);
1329
1330  Register result_reg = result.gp();
1331  if (pinned.has(result)) {
1332    result_reg = GetUnusedRegister(kGpReg, pinned).gp();
1333  }
1334  pinned.set(LiftoffRegister(result));
1335  Register store_result = GetUnusedRegister(kGpReg, pinned).gp();
1336
1337  UseScratchRegisterScope temps(this);
1338  Register actual_addr = liftoff::CalculateActualAddress(
1339      this, &temps, dst_addr, offset_reg, offset_imm);
1340
1341  Label retry;
1342  Label done;
1343  dmb(ISH);
1344  bind(&retry);
1345  (this->*load)(result.gp(), actual_addr, al);
1346  cmp(result.gp(), expected.gp());
1347  b(ne, &done);
1348  (this->*store)(store_result, new_value.gp(), actual_addr, al);
1349  cmp(store_result, Operand(0));
1350  b(ne, &retry);
1351  dmb(ISH);
1352  bind(&done);
1353
1354  if (result.gp() != result_reg) {
1355    mov(result.gp(), result_reg);
1356  }
1357  if (result_high != no_reg) {
1358    LoadConstant(LiftoffRegister(result_high), WasmValue(0));
1359  }
1360}
1361
1362void LiftoffAssembler::AtomicFence() { dmb(ISH); }
1363
1364void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1365                                           uint32_t caller_slot_idx,
1366                                           ValueKind kind) {
1367  MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1368  liftoff::Load(this, dst, src, kind);
1369}
1370
1371void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1372                                            uint32_t caller_slot_idx,
1373                                            ValueKind kind) {
1374  MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1375  liftoff::Store(this, src, dst, kind);
1376}
1377
1378void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
1379                                           ValueKind kind) {
1380  MemOperand src(sp, offset);
1381  liftoff::Load(this, dst, src, kind);
1382}
1383
1384void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1385                                      ValueKind kind) {
1386  DCHECK_NE(dst_offset, src_offset);
1387  LiftoffRegister reg = GetUnusedRegister(reg_class_for(kind), {});
1388  Fill(reg, src_offset, kind);
1389  Spill(dst_offset, reg, kind);
1390}
1391
1392void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1393  DCHECK_NE(dst, src);
1394  DCHECK(kind == kI32 || is_reference(kind));
1395  TurboAssembler::Move(dst, src);
1396}
1397
1398void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1399                            ValueKind kind) {
1400  DCHECK_NE(dst, src);
1401  if (kind == kF32) {
1402    vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1403  } else if (kind == kF64) {
1404    vmov(dst, src);
1405  } else {
1406    DCHECK_EQ(kS128, kind);
1407    vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
1408  }
1409}
1410
1411void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1412  // The {str} instruction needs a temp register when the immediate in the
1413  // provided MemOperand does not fit into 12 bits. This happens for large stack
1414  // frames. This DCHECK checks that the temp register is available when needed.
1415  DCHECK(UseScratchRegisterScope{this}.CanAcquire());
1416  DCHECK_LT(0, offset);
1417  RecordUsedSpillOffset(offset);
1418  MemOperand dst(fp, -offset);
1419  liftoff::Store(this, reg, dst, kind);
1420}
1421
1422void LiftoffAssembler::Spill(int offset, WasmValue value) {
1423  RecordUsedSpillOffset(offset);
1424  MemOperand dst = liftoff::GetStackSlot(offset);
1425  UseScratchRegisterScope temps(this);
1426  Register src = no_reg;
1427  // The scratch register will be required by str if multiple instructions
1428  // are required to encode the offset, and so we cannot use it in that case.
1429  if (!ImmediateFitsAddrMode2Instruction(dst.offset())) {
1430    src = GetUnusedRegister(kGpReg, {}).gp();
1431  } else {
1432    src = temps.Acquire();
1433  }
1434  switch (value.type().kind()) {
1435    case kI32:
1436      mov(src, Operand(value.to_i32()));
1437      str(src, dst);
1438      break;
1439    case kI64: {
1440      int32_t low_word = value.to_i64();
1441      mov(src, Operand(low_word));
1442      str(src, liftoff::GetHalfStackSlot(offset, kLowWord));
1443      int32_t high_word = value.to_i64() >> 32;
1444      mov(src, Operand(high_word));
1445      str(src, liftoff::GetHalfStackSlot(offset, kHighWord));
1446      break;
1447    }
1448    default:
1449      // We do not track f32 and f64 constants, hence they are unreachable.
1450      UNREACHABLE();
1451  }
1452}
1453
1454void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1455  liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
1456}
1457
1458void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1459  ldr(reg, liftoff::GetHalfStackSlot(offset, half));
1460}
1461
1462void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1463  DCHECK_LT(0, size);
1464  DCHECK_EQ(0, size % 4);
1465  RecordUsedSpillOffset(start + size);
1466
1467  // We need a zero reg. Always use r0 for that, and push it before to restore
1468  // its value afterwards.
1469  push(r0);
1470  mov(r0, Operand(0));
1471
1472  if (size <= 36) {
1473    // Special straight-line code for up to 9 words. Generates one
1474    // instruction per word.
1475    for (int offset = 4; offset <= size; offset += 4) {
1476      str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord));
1477    }
1478  } else {
1479    // General case for bigger counts (9 instructions).
1480    // Use r1 for start address (inclusive), r2 for end address (exclusive).
1481    push(r1);
1482    push(r2);
1483    sub(r1, fp, Operand(start + size));
1484    sub(r2, fp, Operand(start));
1485
1486    Label loop;
1487    bind(&loop);
1488    str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex));
1489    cmp(r1, r2);
1490    b(&loop, ne);
1491
1492    pop(r2);
1493    pop(r1);
1494  }
1495
1496  pop(r0);
1497}
1498
1499#define I32_BINOP(name, instruction)                             \
1500  void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
1501                                     Register rhs) {             \
1502    instruction(dst, lhs, rhs);                                  \
1503  }
1504#define I32_BINOP_I(name, instruction)                              \
1505  I32_BINOP(name, instruction)                                      \
1506  void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
1507                                        int32_t imm) {              \
1508    instruction(dst, lhs, Operand(imm));                            \
1509  }
1510#define I32_SHIFTOP(name, instruction)                              \
1511  void LiftoffAssembler::emit_##name(Register dst, Register src,    \
1512                                     Register amount) {             \
1513    UseScratchRegisterScope temps(this);                            \
1514    Register scratch = temps.Acquire();                             \
1515    and_(scratch, amount, Operand(0x1f));                           \
1516    instruction(dst, src, Operand(scratch));                        \
1517  }                                                                 \
1518  void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
1519                                        int32_t amount) {           \
1520    if (V8_LIKELY((amount & 31) != 0)) {                            \
1521      instruction(dst, src, Operand(amount & 31));                  \
1522    } else if (dst != src) {                                        \
1523      mov(dst, src);                                                \
1524    }                                                               \
1525  }
1526#define FP32_UNOP(name, instruction)                                           \
1527  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1528    instruction(liftoff::GetFloatRegister(dst),                                \
1529                liftoff::GetFloatRegister(src));                               \
1530  }
1531#define FP32_BINOP(name, instruction)                                        \
1532  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1533                                     DoubleRegister rhs) {                   \
1534    instruction(liftoff::GetFloatRegister(dst),                              \
1535                liftoff::GetFloatRegister(lhs),                              \
1536                liftoff::GetFloatRegister(rhs));                             \
1537  }
1538#define FP64_UNOP(name, instruction)                                           \
1539  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1540    instruction(dst, src);                                                     \
1541  }
1542#define FP64_BINOP(name, instruction)                                        \
1543  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1544                                     DoubleRegister rhs) {                   \
1545    instruction(dst, lhs, rhs);                                              \
1546  }
1547
1548I32_BINOP_I(i32_add, add)
1549I32_BINOP_I(i32_sub, sub)
1550I32_BINOP(i32_mul, mul)
1551I32_BINOP_I(i32_and, and_)
1552I32_BINOP_I(i32_or, orr)
1553I32_BINOP_I(i32_xor, eor)
1554I32_SHIFTOP(i32_shl, lsl)
1555I32_SHIFTOP(i32_sar, asr)
1556I32_SHIFTOP(i32_shr, lsr)
1557FP32_BINOP(f32_add, vadd)
1558FP32_BINOP(f32_sub, vsub)
1559FP32_BINOP(f32_mul, vmul)
1560FP32_BINOP(f32_div, vdiv)
1561FP32_UNOP(f32_abs, vabs)
1562FP32_UNOP(f32_neg, vneg)
1563FP32_UNOP(f32_sqrt, vsqrt)
1564FP64_BINOP(f64_add, vadd)
1565FP64_BINOP(f64_sub, vsub)
1566FP64_BINOP(f64_mul, vmul)
1567FP64_BINOP(f64_div, vdiv)
1568FP64_UNOP(f64_abs, vabs)
1569FP64_UNOP(f64_neg, vneg)
1570FP64_UNOP(f64_sqrt, vsqrt)
1571
1572#undef I32_BINOP
1573#undef I32_SHIFTOP
1574#undef FP32_UNOP
1575#undef FP32_BINOP
1576#undef FP64_UNOP
1577#undef FP64_BINOP
1578
1579void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1580  clz(dst, src);
1581}
1582
1583void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1584  rbit(dst, src);
1585  clz(dst, dst);
1586}
1587
1588namespace liftoff {
1589inline void GeneratePopCnt(Assembler* assm, Register dst, Register src,
1590                           Register scratch1, Register scratch2) {
1591  DCHECK(!AreAliased(dst, scratch1, scratch2));
1592  if (src == scratch1) std::swap(scratch1, scratch2);
1593  // x = x - ((x & (0x55555555 << 1)) >> 1)
1594  assm->and_(scratch1, src, Operand(0xaaaaaaaa));
1595  assm->sub(dst, src, Operand(scratch1, LSR, 1));
1596  // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2)
1597  assm->mov(scratch1, Operand(0x33333333));
1598  assm->and_(scratch2, dst, Operand(scratch1, LSL, 2));
1599  assm->and_(scratch1, dst, scratch1);
1600  assm->add(dst, scratch1, Operand(scratch2, LSR, 2));
1601  // x = (x + (x >> 4)) & 0x0F0F0F0F
1602  assm->add(dst, dst, Operand(dst, LSR, 4));
1603  assm->and_(dst, dst, Operand(0x0f0f0f0f));
1604  // x = x + (x >> 8)
1605  assm->add(dst, dst, Operand(dst, LSR, 8));
1606  // x = x + (x >> 16)
1607  assm->add(dst, dst, Operand(dst, LSR, 16));
1608  // x = x & 0x3F
1609  assm->and_(dst, dst, Operand(0x3f));
1610}
1611}  // namespace liftoff
1612
1613bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1614  LiftoffRegList pinned = {dst};
1615  Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1616  Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1617  liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2);
1618  return true;
1619}
1620
1621void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1622                                     Label* trap_div_by_zero,
1623                                     Label* trap_div_unrepresentable) {
1624  if (!CpuFeatures::IsSupported(SUDIV)) {
1625    bailout(kMissingCPUFeature, "i32_divs");
1626    return;
1627  }
1628  CpuFeatureScope scope(this, SUDIV);
1629  // Issue division early so we can perform the trapping checks whilst it
1630  // completes.
1631  bool speculative_sdiv = dst != lhs && dst != rhs;
1632  if (speculative_sdiv) {
1633    sdiv(dst, lhs, rhs);
1634  }
1635  Label noTrap;
1636  // Check for division by zero.
1637  cmp(rhs, Operand(0));
1638  b(trap_div_by_zero, eq);
1639  // Check for kMinInt / -1. This is unrepresentable.
1640  cmp(rhs, Operand(-1));
1641  b(&noTrap, ne);
1642  cmp(lhs, Operand(kMinInt));
1643  b(trap_div_unrepresentable, eq);
1644  bind(&noTrap);
1645  if (!speculative_sdiv) {
1646    sdiv(dst, lhs, rhs);
1647  }
1648}
1649
1650void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1651                                     Label* trap_div_by_zero) {
1652  if (!CpuFeatures::IsSupported(SUDIV)) {
1653    bailout(kMissingCPUFeature, "i32_divu");
1654    return;
1655  }
1656  CpuFeatureScope scope(this, SUDIV);
1657  // Check for division by zero.
1658  cmp(rhs, Operand(0));
1659  b(trap_div_by_zero, eq);
1660  udiv(dst, lhs, rhs);
1661}
1662
1663void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1664                                     Label* trap_div_by_zero) {
1665  if (!CpuFeatures::IsSupported(SUDIV)) {
1666    // When this case is handled, a check for ARMv7 is required to use mls.
1667    // Mls support is implied with SUDIV support.
1668    bailout(kMissingCPUFeature, "i32_rems");
1669    return;
1670  }
1671  CpuFeatureScope scope(this, SUDIV);
1672  // No need to check kMinInt / -1 because the result is kMinInt and then
1673  // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1674  UseScratchRegisterScope temps(this);
1675  Register scratch = temps.Acquire();
1676  sdiv(scratch, lhs, rhs);
1677  // Check for division by zero.
1678  cmp(rhs, Operand(0));
1679  b(trap_div_by_zero, eq);
1680  // Compute remainder.
1681  mls(dst, scratch, rhs, lhs);
1682}
1683
1684void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1685                                     Label* trap_div_by_zero) {
1686  if (!CpuFeatures::IsSupported(SUDIV)) {
1687    // When this case is handled, a check for ARMv7 is required to use mls.
1688    // Mls support is implied with SUDIV support.
1689    bailout(kMissingCPUFeature, "i32_remu");
1690    return;
1691  }
1692  CpuFeatureScope scope(this, SUDIV);
1693  // No need to check kMinInt / -1 because the result is kMinInt and then
1694  // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1695  UseScratchRegisterScope temps(this);
1696  Register scratch = temps.Acquire();
1697  udiv(scratch, lhs, rhs);
1698  // Check for division by zero.
1699  cmp(rhs, Operand(0));
1700  b(trap_div_by_zero, eq);
1701  // Compute remainder.
1702  mls(dst, scratch, rhs, lhs);
1703}
1704
1705void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1706                                    LiftoffRegister rhs) {
1707  liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1708}
1709
1710void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1711                                     int64_t imm) {
1712  liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1713}
1714
1715void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1716                                    LiftoffRegister rhs) {
1717  liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs);
1718}
1719
1720void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1721                                    LiftoffRegister rhs) {
1722  // Idea:
1723  //        [           lhs_hi  |           lhs_lo  ] * [  rhs_hi  |  rhs_lo  ]
1724  //    =   [  lhs_hi * rhs_lo  |                   ]  (32 bit mul, shift 32)
1725  //      + [  lhs_lo * rhs_hi  |                   ]  (32 bit mul, shift 32)
1726  //      + [             lhs_lo * rhs_lo           ]  (32x32->64 mul, shift 0)
1727  UseScratchRegisterScope temps(this);
1728  Register scratch = temps.Acquire();
1729  // scratch = lhs_hi * rhs_lo
1730  mul(scratch, lhs.high_gp(), rhs.low_gp());
1731  // scratch += lhs_lo * rhs_hi
1732  mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch);
1733  // TODO(arm): use umlal once implemented correctly in the simulator.
1734  // [dst_hi|dst_lo] = lhs_lo * rhs_lo
1735  umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp());
1736  // dst_hi += scratch
1737  add(dst.high_gp(), dst.high_gp(), scratch);
1738}
1739
1740bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1741                                     LiftoffRegister rhs,
1742                                     Label* trap_div_by_zero,
1743                                     Label* trap_div_unrepresentable) {
1744  return false;
1745}
1746
1747bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1748                                     LiftoffRegister rhs,
1749                                     Label* trap_div_by_zero) {
1750  return false;
1751}
1752
1753bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1754                                     LiftoffRegister rhs,
1755                                     Label* trap_div_by_zero) {
1756  return false;
1757}
1758
1759bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1760                                     LiftoffRegister rhs,
1761                                     Label* trap_div_by_zero) {
1762  return false;
1763}
1764
1765void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1766                                    Register amount) {
1767  liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount);
1768}
1769
1770void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1771                                     int32_t amount) {
1772  UseScratchRegisterScope temps(this);
1773  // {src.low_gp()} will still be needed after writing {dst.high_gp()}.
1774  Register src_low =
1775      liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps);
1776
1777  LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63);
1778}
1779
1780void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1781                                    Register amount) {
1782  liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount);
1783}
1784
1785void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1786                                     int32_t amount) {
1787  UseScratchRegisterScope temps(this);
1788  // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1789  Register src_high =
1790      liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1791
1792  AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1793}
1794
1795void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1796                                    Register amount) {
1797  liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount);
1798}
1799
1800void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1801                                     int32_t amount) {
1802  UseScratchRegisterScope temps(this);
1803  // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1804  Register src_high =
1805      liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1806
1807  LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1808}
1809
1810void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1811  // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1812  Label done;
1813  Label high_is_zero;
1814  cmp(src.high_gp(), Operand(0));
1815  b(&high_is_zero, eq);
1816
1817  clz(dst.low_gp(), src.high_gp());
1818  jmp(&done);
1819
1820  bind(&high_is_zero);
1821  clz(dst.low_gp(), src.low_gp());
1822  add(dst.low_gp(), dst.low_gp(), Operand(32));
1823
1824  bind(&done);
1825  mov(dst.high_gp(), Operand(0));  // High word of result is always 0.
1826}
1827
1828void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1829  // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
1830  // CTZ32(x) = CLZ(RBIT(x))
1831  Label done;
1832  Label low_is_zero;
1833  cmp(src.low_gp(), Operand(0));
1834  b(&low_is_zero, eq);
1835
1836  rbit(dst.low_gp(), src.low_gp());
1837  clz(dst.low_gp(), dst.low_gp());
1838  jmp(&done);
1839
1840  bind(&low_is_zero);
1841  rbit(dst.low_gp(), src.high_gp());
1842  clz(dst.low_gp(), dst.low_gp());
1843  add(dst.low_gp(), dst.low_gp(), Operand(32));
1844
1845  bind(&done);
1846  mov(dst.high_gp(), Operand(0));  // High word of result is always 0.
1847}
1848
1849bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1850                                       LiftoffRegister src) {
1851  // Produce partial popcnts in the two dst registers, making sure not to
1852  // overwrite the second src register before using it.
1853  Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
1854  Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
1855  LiftoffRegList pinned = {dst, src2};
1856  Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1857  Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1858  liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2);
1859  liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2);
1860  // Now add the two into the lower dst reg and clear the higher dst reg.
1861  add(dst.low_gp(), dst.low_gp(), dst.high_gp());
1862  mov(dst.high_gp(), Operand(0));
1863  return true;
1864}
1865
1866void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1867  UseScratchRegisterScope temps(this);
1868  Register scratch = temps.Acquire();
1869  ldr(scratch, MemOperand(dst.gp(), offset));
1870  add(scratch, scratch, Operand(Smi::FromInt(1)));
1871  str(scratch, MemOperand(dst.gp(), offset));
1872}
1873
1874bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1875  if (CpuFeatures::IsSupported(ARMv8)) {
1876    CpuFeatureScope scope(this, ARMv8);
1877    vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1878    return true;
1879  }
1880  return false;
1881}
1882
1883bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1884  if (CpuFeatures::IsSupported(ARMv8)) {
1885    CpuFeatureScope scope(this, ARMv8);
1886    vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1887    return true;
1888  }
1889  return false;
1890}
1891
1892bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1893  if (CpuFeatures::IsSupported(ARMv8)) {
1894    CpuFeatureScope scope(this, ARMv8);
1895    vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1896    return true;
1897  }
1898  return false;
1899}
1900
1901bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1902                                            DoubleRegister src) {
1903  if (CpuFeatures::IsSupported(ARMv8)) {
1904    CpuFeatureScope scope(this, ARMv8);
1905    vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1906    return true;
1907  }
1908  return false;
1909}
1910
1911void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1912                                    DoubleRegister rhs) {
1913  liftoff::EmitFloatMinOrMax(
1914      this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1915      liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin);
1916}
1917
1918void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1919                                    DoubleRegister rhs) {
1920  liftoff::EmitFloatMinOrMax(
1921      this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1922      liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax);
1923}
1924
1925bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1926  if (CpuFeatures::IsSupported(ARMv8)) {
1927    CpuFeatureScope scope(this, ARMv8);
1928    vrintp(dst, src);
1929    return true;
1930  }
1931  return false;
1932}
1933
1934bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1935  if (CpuFeatures::IsSupported(ARMv8)) {
1936    CpuFeatureScope scope(this, ARMv8);
1937    vrintm(dst, src);
1938    return true;
1939  }
1940  return false;
1941}
1942
1943bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1944  if (CpuFeatures::IsSupported(ARMv8)) {
1945    CpuFeatureScope scope(this, ARMv8);
1946    vrintz(dst, src);
1947    return true;
1948  }
1949  return false;
1950}
1951
1952bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1953                                            DoubleRegister src) {
1954  if (CpuFeatures::IsSupported(ARMv8)) {
1955    CpuFeatureScope scope(this, ARMv8);
1956    vrintn(dst, src);
1957    return true;
1958  }
1959  return false;
1960}
1961
1962void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1963                                    DoubleRegister rhs) {
1964  liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin);
1965}
1966
1967void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1968                                    DoubleRegister rhs) {
1969  liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax);
1970}
1971
1972void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1973                                         DoubleRegister rhs) {
1974  constexpr uint32_t kF32SignBit = uint32_t{1} << 31;
1975  UseScratchRegisterScope temps(this);
1976  Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1977  Register scratch2 = temps.Acquire();
1978  VmovLow(scratch, lhs);
1979  // Clear sign bit in {scratch}.
1980  bic(scratch, scratch, Operand(kF32SignBit));
1981  VmovLow(scratch2, rhs);
1982  // Isolate sign bit in {scratch2}.
1983  and_(scratch2, scratch2, Operand(kF32SignBit));
1984  // Combine {scratch2} into {scratch}.
1985  orr(scratch, scratch, scratch2);
1986  VmovLow(dst, scratch);
1987}
1988
1989void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1990                                         DoubleRegister rhs) {
1991  constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31;
1992  // On arm, we cannot hold the whole f64 value in a gp register, so we just
1993  // operate on the upper half (UH).
1994  UseScratchRegisterScope temps(this);
1995  Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1996  Register scratch2 = temps.Acquire();
1997  VmovHigh(scratch, lhs);
1998  // Clear sign bit in {scratch}.
1999  bic(scratch, scratch, Operand(kF64SignBitHighWord));
2000  VmovHigh(scratch2, rhs);
2001  // Isolate sign bit in {scratch2}.
2002  and_(scratch2, scratch2, Operand(kF64SignBitHighWord));
2003  // Combine {scratch2} into {scratch}.
2004  orr(scratch, scratch, scratch2);
2005  vmov(dst, lhs);
2006  VmovHigh(dst, scratch);
2007}
2008
2009bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2010                                            LiftoffRegister dst,
2011                                            LiftoffRegister src, Label* trap) {
2012  switch (opcode) {
2013    case kExprI32ConvertI64:
2014      TurboAssembler::Move(dst.gp(), src.low_gp());
2015      return true;
2016    case kExprI32SConvertF32: {
2017      UseScratchRegisterScope temps(this);
2018      SwVfpRegister scratch_f = temps.AcquireS();
2019      vcvt_s32_f32(
2020          scratch_f,
2021          liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2022      vmov(dst.gp(), scratch_f);
2023      // Check underflow and NaN.
2024      vmov(scratch_f, Float32(static_cast<float>(INT32_MIN)));
2025      VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2026      b(trap, lt);
2027      // Check overflow.
2028      cmp(dst.gp(), Operand(-1));
2029      b(trap, vs);
2030      return true;
2031    }
2032    case kExprI32UConvertF32: {
2033      UseScratchRegisterScope temps(this);
2034      SwVfpRegister scratch_f = temps.AcquireS();
2035      vcvt_u32_f32(
2036          scratch_f,
2037          liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2038      vmov(dst.gp(), scratch_f);
2039      // Check underflow and NaN.
2040      vmov(scratch_f, Float32(-1.0f));
2041      VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2042      b(trap, le);
2043      // Check overflow.
2044      cmp(dst.gp(), Operand(-1));
2045      b(trap, eq);
2046      return true;
2047    }
2048    case kExprI32SConvertF64: {
2049      UseScratchRegisterScope temps(this);
2050      SwVfpRegister scratch_f = temps.AcquireS();
2051      vcvt_s32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2052      vmov(dst.gp(), scratch_f);
2053      // Check underflow and NaN.
2054      DwVfpRegister scratch_d = temps.AcquireD();
2055      vmov(scratch_d, base::Double(static_cast<double>(INT32_MIN - 1.0)));
2056      VFPCompareAndSetFlags(src.fp(), scratch_d);
2057      b(trap, le);
2058      // Check overflow.
2059      vmov(scratch_d, base::Double(static_cast<double>(INT32_MAX + 1.0)));
2060      VFPCompareAndSetFlags(src.fp(), scratch_d);
2061      b(trap, ge);
2062      return true;
2063    }
2064    case kExprI32UConvertF64: {
2065      UseScratchRegisterScope temps(this);
2066      SwVfpRegister scratch_f = temps.AcquireS();
2067      vcvt_u32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2068      vmov(dst.gp(), scratch_f);
2069      // Check underflow and NaN.
2070      DwVfpRegister scratch_d = temps.AcquireD();
2071      vmov(scratch_d, base::Double(static_cast<double>(-1.0)));
2072      VFPCompareAndSetFlags(src.fp(), scratch_d);
2073      b(trap, le);
2074      // Check overflow.
2075      vmov(scratch_d, base::Double(static_cast<double>(UINT32_MAX + 1.0)));
2076      VFPCompareAndSetFlags(src.fp(), scratch_d);
2077      b(trap, ge);
2078      return true;
2079    }
2080    case kExprI32SConvertSatF32: {
2081      UseScratchRegisterScope temps(this);
2082      SwVfpRegister scratch_f = temps.AcquireS();
2083      vcvt_s32_f32(
2084          scratch_f,
2085          liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2086      vmov(dst.gp(), scratch_f);
2087      return true;
2088    }
2089    case kExprI32UConvertSatF32: {
2090      UseScratchRegisterScope temps(this);
2091      SwVfpRegister scratch_f = temps.AcquireS();
2092      vcvt_u32_f32(
2093          scratch_f,
2094          liftoff::GetFloatRegister(src.fp()));  // f32 -> u32 round to zero.
2095      vmov(dst.gp(), scratch_f);
2096      return true;
2097    }
2098    case kExprI32SConvertSatF64: {
2099      UseScratchRegisterScope temps(this);
2100      SwVfpRegister scratch_f = temps.AcquireS();
2101      vcvt_s32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2102      vmov(dst.gp(), scratch_f);
2103      return true;
2104    }
2105    case kExprI32UConvertSatF64: {
2106      UseScratchRegisterScope temps(this);
2107      SwVfpRegister scratch_f = temps.AcquireS();
2108      vcvt_u32_f64(scratch_f, src.fp());  // f64 -> u32 round to zero.
2109      vmov(dst.gp(), scratch_f);
2110      return true;
2111    }
2112    case kExprI32ReinterpretF32:
2113      vmov(dst.gp(), liftoff::GetFloatRegister(src.fp()));
2114      return true;
2115    case kExprI64SConvertI32:
2116      if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2117      mov(dst.high_gp(), Operand(src.gp(), ASR, 31));
2118      return true;
2119    case kExprI64UConvertI32:
2120      if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2121      mov(dst.high_gp(), Operand(0));
2122      return true;
2123    case kExprI64ReinterpretF64:
2124      vmov(dst.low_gp(), dst.high_gp(), src.fp());
2125      return true;
2126    case kExprF32SConvertI32: {
2127      SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2128      vmov(dst_float, src.gp());
2129      vcvt_f32_s32(dst_float, dst_float);
2130      return true;
2131    }
2132    case kExprF32UConvertI32: {
2133      SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2134      vmov(dst_float, src.gp());
2135      vcvt_f32_u32(dst_float, dst_float);
2136      return true;
2137    }
2138    case kExprF32ConvertF64:
2139      vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp());
2140      return true;
2141    case kExprF32ReinterpretI32:
2142      vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2143      return true;
2144    case kExprF64SConvertI32: {
2145      vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2146      vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2147      return true;
2148    }
2149    case kExprF64UConvertI32: {
2150      vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2151      vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2152      return true;
2153    }
2154    case kExprF64ConvertF32:
2155      vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp()));
2156      return true;
2157    case kExprF64ReinterpretI64:
2158      vmov(dst.fp(), src.low_gp(), src.high_gp());
2159      return true;
2160    case kExprF64SConvertI64:
2161    case kExprF64UConvertI64:
2162    case kExprI64SConvertF32:
2163    case kExprI64UConvertF32:
2164    case kExprI64SConvertSatF32:
2165    case kExprI64UConvertSatF32:
2166    case kExprF32SConvertI64:
2167    case kExprF32UConvertI64:
2168    case kExprI64SConvertF64:
2169    case kExprI64UConvertF64:
2170    case kExprI64SConvertSatF64:
2171    case kExprI64UConvertSatF64:
2172      // These cases can be handled by the C fallback function.
2173      return false;
2174    default:
2175      UNREACHABLE();
2176  }
2177}
2178
2179void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2180  sxtb(dst, src);
2181}
2182
2183void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2184  sxth(dst, src);
2185}
2186
2187void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2188                                              LiftoffRegister src) {
2189  emit_i32_signextend_i8(dst.low_gp(), src.low_gp());
2190  mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2191}
2192
2193void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2194                                               LiftoffRegister src) {
2195  emit_i32_signextend_i16(dst.low_gp(), src.low_gp());
2196  mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2197}
2198
2199void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2200                                               LiftoffRegister src) {
2201  TurboAssembler::Move(dst.low_gp(), src.low_gp());
2202  mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31));
2203}
2204
2205void LiftoffAssembler::emit_jump(Label* label) { b(label); }
2206
2207void LiftoffAssembler::emit_jump(Register target) { bx(target); }
2208
2209void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2210                                      Label* label, ValueKind kind,
2211                                      Register lhs, Register rhs) {
2212  Condition cond = liftoff::ToCondition(liftoff_cond);
2213
2214  if (rhs == no_reg) {
2215    DCHECK_EQ(kind, kI32);
2216    cmp(lhs, Operand(0));
2217  } else {
2218    DCHECK(kind == kI32 || (is_reference(kind) && (liftoff_cond == kEqual ||
2219                                                   liftoff_cond == kUnequal)));
2220    cmp(lhs, rhs);
2221  }
2222  b(label, cond);
2223}
2224
2225void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2226                                           Label* label, Register lhs,
2227                                           int32_t imm) {
2228  Condition cond = liftoff::ToCondition(liftoff_cond);
2229  cmp(lhs, Operand(imm));
2230  b(label, cond);
2231}
2232
2233void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2234                                                   int subtrahend,
2235                                                   Label* result_negative) {
2236  sub(value, value, Operand(subtrahend), SetCC);
2237  b(result_negative, mi);
2238}
2239
2240void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2241  clz(dst, src);
2242  mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2));
2243}
2244
2245void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2246                                         Register dst, Register lhs,
2247                                         Register rhs) {
2248  Condition cond = liftoff::ToCondition(liftoff_cond);
2249  cmp(lhs, rhs);
2250  mov(dst, Operand(0), LeaveCC);
2251  mov(dst, Operand(1), LeaveCC, cond);
2252}
2253
2254void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2255  orr(dst, src.low_gp(), src.high_gp());
2256  clz(dst, dst);
2257  mov(dst, Operand(dst, LSR, 5));
2258}
2259
2260void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2261                                         Register dst, LiftoffRegister lhs,
2262                                         LiftoffRegister rhs) {
2263  // For signed i64 comparisons, we still need to use unsigned comparison for
2264  // the low word (the only bit carrying signedness information is the MSB in
2265  // the high word).
2266  Condition cond = liftoff::ToCondition(liftoff_cond);
2267  Condition unsigned_cond =
2268      liftoff::ToCondition(liftoff::MakeUnsigned(liftoff_cond));
2269  Label set_cond;
2270  Label cont;
2271  LiftoffRegister dest = LiftoffRegister(dst);
2272  bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs);
2273  if (speculative_move) {
2274    mov(dst, Operand(0));
2275  }
2276  // Compare high word first. If it differs, use it for the set_cond. If it's
2277  // equal, compare the low word and use that for set_cond.
2278  cmp(lhs.high_gp(), rhs.high_gp());
2279  if (unsigned_cond == cond) {
2280    cmp(lhs.low_gp(), rhs.low_gp(), eq);
2281    if (!speculative_move) {
2282      mov(dst, Operand(0));
2283    }
2284    mov(dst, Operand(1), LeaveCC, cond);
2285  } else {
2286    // If the condition predicate for the low differs from that for the high
2287    // word, the conditional move instructions must be separated.
2288    b(ne, &set_cond);
2289    cmp(lhs.low_gp(), rhs.low_gp());
2290    if (!speculative_move) {
2291      mov(dst, Operand(0));
2292    }
2293    mov(dst, Operand(1), LeaveCC, unsigned_cond);
2294    b(&cont);
2295    bind(&set_cond);
2296    if (!speculative_move) {
2297      mov(dst, Operand(0));
2298    }
2299    mov(dst, Operand(1), LeaveCC, cond);
2300    bind(&cont);
2301  }
2302}
2303
2304void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2305                                         Register dst, DoubleRegister lhs,
2306                                         DoubleRegister rhs) {
2307  Condition cond = liftoff::ToCondition(liftoff_cond);
2308  VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs),
2309                        liftoff::GetFloatRegister(rhs));
2310  mov(dst, Operand(0), LeaveCC);
2311  mov(dst, Operand(1), LeaveCC, cond);
2312  if (cond != ne) {
2313    // If V flag set, at least one of the arguments was a Nan -> false.
2314    mov(dst, Operand(0), LeaveCC, vs);
2315  }
2316}
2317
2318void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2319                                         Register dst, DoubleRegister lhs,
2320                                         DoubleRegister rhs) {
2321  Condition cond = liftoff::ToCondition(liftoff_cond);
2322  VFPCompareAndSetFlags(lhs, rhs);
2323  mov(dst, Operand(0), LeaveCC);
2324  mov(dst, Operand(1), LeaveCC, cond);
2325  if (cond != ne) {
2326    // If V flag set, at least one of the arguments was a Nan -> false.
2327    mov(dst, Operand(0), LeaveCC, vs);
2328  }
2329}
2330
2331bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2332                                   LiftoffRegister true_value,
2333                                   LiftoffRegister false_value,
2334                                   ValueKind kind) {
2335  return false;
2336}
2337
2338void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2339                                      SmiCheckMode mode) {
2340  tst(obj, Operand(kSmiTagMask));
2341  Condition condition = mode == kJumpOnSmi ? eq : ne;
2342  b(condition, target);
2343}
2344
2345void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2346                                     Register offset_reg, uintptr_t offset_imm,
2347                                     LoadType type,
2348                                     LoadTransformationKind transform,
2349                                     uint32_t* protected_load_pc) {
2350  UseScratchRegisterScope temps(this);
2351  Register actual_src_addr = liftoff::CalculateActualAddress(
2352      this, &temps, src_addr, offset_reg, offset_imm);
2353  *protected_load_pc = pc_offset();
2354  MachineType memtype = type.mem_type();
2355
2356  if (transform == LoadTransformationKind::kExtend) {
2357    if (memtype == MachineType::Int8()) {
2358      vld1(Neon8, NeonListOperand(dst.low_fp()),
2359           NeonMemOperand(actual_src_addr));
2360      vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp());
2361    } else if (memtype == MachineType::Uint8()) {
2362      vld1(Neon8, NeonListOperand(dst.low_fp()),
2363           NeonMemOperand(actual_src_addr));
2364      vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp());
2365    } else if (memtype == MachineType::Int16()) {
2366      vld1(Neon16, NeonListOperand(dst.low_fp()),
2367           NeonMemOperand(actual_src_addr));
2368      vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp());
2369    } else if (memtype == MachineType::Uint16()) {
2370      vld1(Neon16, NeonListOperand(dst.low_fp()),
2371           NeonMemOperand(actual_src_addr));
2372      vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp());
2373    } else if (memtype == MachineType::Int32()) {
2374      vld1(Neon32, NeonListOperand(dst.low_fp()),
2375           NeonMemOperand(actual_src_addr));
2376      vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp());
2377    } else if (memtype == MachineType::Uint32()) {
2378      vld1(Neon32, NeonListOperand(dst.low_fp()),
2379           NeonMemOperand(actual_src_addr));
2380      vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp());
2381    }
2382  } else if (transform == LoadTransformationKind::kZeroExtend) {
2383    Simd128Register dest = liftoff::GetSimd128Register(dst);
2384    if (memtype == MachineType::Int32()) {
2385      vmov(dest, 0);
2386      vld1s(Neon32, NeonListOperand(dst.low_fp()), 0,
2387            NeonMemOperand(actual_src_addr));
2388    } else {
2389      DCHECK_EQ(MachineType::Int64(), memtype);
2390      vmov(dest.high(), 0);
2391      vld1(Neon64, NeonListOperand(dest.low()),
2392           NeonMemOperand(actual_src_addr));
2393    }
2394  } else {
2395    DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2396    if (memtype == MachineType::Int8()) {
2397      vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)),
2398            NeonMemOperand(actual_src_addr));
2399    } else if (memtype == MachineType::Int16()) {
2400      vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)),
2401            NeonMemOperand(actual_src_addr));
2402    } else if (memtype == MachineType::Int32()) {
2403      vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)),
2404            NeonMemOperand(actual_src_addr));
2405    } else if (memtype == MachineType::Int64()) {
2406      vld1(Neon32, NeonListOperand(dst.low_fp()),
2407           NeonMemOperand(actual_src_addr));
2408      TurboAssembler::Move(dst.high_fp(), dst.low_fp());
2409    }
2410  }
2411}
2412
2413void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2414                                Register addr, Register offset_reg,
2415                                uintptr_t offset_imm, LoadType type,
2416                                uint8_t laneidx, uint32_t* protected_load_pc) {
2417  UseScratchRegisterScope temps(this);
2418  Register actual_src_addr = liftoff::CalculateActualAddress(
2419      this, &temps, addr, offset_reg, offset_imm);
2420  TurboAssembler::Move(liftoff::GetSimd128Register(dst),
2421                       liftoff::GetSimd128Register(src));
2422  *protected_load_pc = pc_offset();
2423  LoadStoreLaneParams load_params(type.mem_type().representation(), laneidx);
2424  NeonListOperand dst_op =
2425      NeonListOperand(load_params.low_op ? dst.low_fp() : dst.high_fp());
2426  TurboAssembler::LoadLane(load_params.sz, dst_op, load_params.laneidx,
2427                           NeonMemOperand(actual_src_addr));
2428}
2429
2430void LiftoffAssembler::StoreLane(Register dst, Register offset,
2431                                 uintptr_t offset_imm, LiftoffRegister src,
2432                                 StoreType type, uint8_t laneidx,
2433                                 uint32_t* protected_store_pc) {
2434  UseScratchRegisterScope temps(this);
2435  Register actual_dst_addr =
2436      liftoff::CalculateActualAddress(this, &temps, dst, offset, offset_imm);
2437  *protected_store_pc = pc_offset();
2438
2439  LoadStoreLaneParams store_params(type.mem_rep(), laneidx);
2440  NeonListOperand src_op =
2441      NeonListOperand(store_params.low_op ? src.low_fp() : src.high_fp());
2442  TurboAssembler::StoreLane(store_params.sz, src_op, store_params.laneidx,
2443                            NeonMemOperand(actual_dst_addr));
2444}
2445
2446void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2447                                          LiftoffRegister lhs,
2448                                          LiftoffRegister rhs) {
2449  UseScratchRegisterScope temps(this);
2450
2451  NeonListOperand table(liftoff::GetSimd128Register(lhs));
2452  if (dst == lhs) {
2453    // dst will be overwritten, so keep the table somewhere else.
2454    QwNeonRegister tbl = temps.AcquireQ();
2455    TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs));
2456    table = NeonListOperand(tbl);
2457  }
2458
2459  vtbl(dst.low_fp(), table, rhs.low_fp());
2460  vtbl(dst.high_fp(), table, rhs.high_fp());
2461}
2462
2463void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2464                                        LiftoffRegister src) {
2465  TurboAssembler::Move(dst.low_fp(), src.fp());
2466  TurboAssembler::Move(dst.high_fp(), src.fp());
2467}
2468
2469void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
2470                                               LiftoffRegister lhs,
2471                                               uint8_t imm_lane_idx) {
2472  ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx);
2473}
2474
2475void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
2476                                               LiftoffRegister src1,
2477                                               LiftoffRegister src2,
2478                                               uint8_t imm_lane_idx) {
2479  ReplaceLane(liftoff::GetSimd128Register(dst),
2480              liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx);
2481}
2482
2483void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
2484                                      LiftoffRegister src) {
2485  vabs(dst.low_fp(), src.low_fp());
2486  vabs(dst.high_fp(), src.high_fp());
2487}
2488
2489void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
2490                                      LiftoffRegister src) {
2491  vneg(dst.low_fp(), src.low_fp());
2492  vneg(dst.high_fp(), src.high_fp());
2493}
2494
2495void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
2496                                       LiftoffRegister src) {
2497  vsqrt(dst.low_fp(), src.low_fp());
2498  vsqrt(dst.high_fp(), src.high_fp());
2499}
2500
2501bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
2502                                       LiftoffRegister src) {
2503  if (!CpuFeatures::IsSupported(ARMv8)) {
2504    return false;
2505  }
2506
2507  CpuFeatureScope scope(this, ARMv8);
2508  vrintp(dst.low_fp(), src.low_fp());
2509  vrintp(dst.high_fp(), src.high_fp());
2510  return true;
2511}
2512
2513bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
2514                                        LiftoffRegister src) {
2515  if (!CpuFeatures::IsSupported(ARMv8)) {
2516    return false;
2517  }
2518
2519  CpuFeatureScope scope(this, ARMv8);
2520  vrintm(dst.low_fp(), src.low_fp());
2521  vrintm(dst.high_fp(), src.high_fp());
2522  return true;
2523}
2524
2525bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
2526                                        LiftoffRegister src) {
2527  if (!CpuFeatures::IsSupported(ARMv8)) {
2528    return false;
2529  }
2530
2531  CpuFeatureScope scope(this, ARMv8);
2532  vrintz(dst.low_fp(), src.low_fp());
2533  vrintz(dst.high_fp(), src.high_fp());
2534  return true;
2535}
2536
2537bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
2538                                              LiftoffRegister src) {
2539  if (!CpuFeatures::IsSupported(ARMv8)) {
2540    return false;
2541  }
2542
2543  CpuFeatureScope scope(this, ARMv8);
2544  vrintn(dst.low_fp(), src.low_fp());
2545  vrintn(dst.high_fp(), src.high_fp());
2546  return true;
2547}
2548
2549void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2550                                      LiftoffRegister rhs) {
2551  vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2552  vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2553}
2554
2555void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2556                                      LiftoffRegister rhs) {
2557  vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2558  vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2559}
2560
2561void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2562                                      LiftoffRegister rhs) {
2563  vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2564  vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2565}
2566
2567void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
2568                                      LiftoffRegister rhs) {
2569  vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2570  vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2571}
2572
2573void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
2574                                      LiftoffRegister rhs) {
2575  Simd128Register dest = liftoff::GetSimd128Register(dst);
2576  Simd128Register left = liftoff::GetSimd128Register(lhs);
2577  Simd128Register right = liftoff::GetSimd128Register(rhs);
2578
2579  liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2580                             liftoff::MinOrMax::kMin);
2581  liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2582                             liftoff::MinOrMax::kMin);
2583}
2584
2585void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
2586                                      LiftoffRegister rhs) {
2587  Simd128Register dest = liftoff::GetSimd128Register(dst);
2588  Simd128Register left = liftoff::GetSimd128Register(lhs);
2589  Simd128Register right = liftoff::GetSimd128Register(rhs);
2590
2591  liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2592                             liftoff::MinOrMax::kMax);
2593  liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2594                             liftoff::MinOrMax::kMax);
2595}
2596
2597void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2598                                       LiftoffRegister rhs) {
2599  QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2600  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2601  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2602
2603  if (dst != rhs) {
2604    vmov(dest, left);
2605  }
2606
2607  VFPCompareAndSetFlags(right.low(), left.low());
2608  vmov(dest.low(), right.low(), mi);
2609  VFPCompareAndSetFlags(right.high(), left.high());
2610  vmov(dest.high(), right.high(), mi);
2611}
2612
2613void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2614                                       LiftoffRegister rhs) {
2615  QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2616  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2617  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2618
2619  if (dst != rhs) {
2620    vmov(dest, left);
2621  }
2622
2623  VFPCompareAndSetFlags(right.low(), left.low());
2624  vmov(dest.low(), right.low(), gt);
2625  VFPCompareAndSetFlags(right.high(), left.high());
2626  vmov(dest.high(), right.high(), gt);
2627}
2628
2629void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
2630                                                      LiftoffRegister src) {
2631  F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst),
2632                        liftoff::GetSimd128Register(src));
2633}
2634
2635void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
2636                                                      LiftoffRegister src) {
2637  F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst),
2638                        liftoff::GetSimd128Register(src));
2639}
2640
2641void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
2642                                                    LiftoffRegister src) {
2643  F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst),
2644                       liftoff::GetSimd128Register(src));
2645}
2646
2647void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2648                                        LiftoffRegister src) {
2649  vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
2650}
2651
2652void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
2653                                               LiftoffRegister lhs,
2654                                               uint8_t imm_lane_idx) {
2655  ExtractLane(liftoff::GetFloatRegister(dst.fp()),
2656              liftoff::GetSimd128Register(lhs), imm_lane_idx);
2657}
2658
2659void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
2660                                               LiftoffRegister src1,
2661                                               LiftoffRegister src2,
2662                                               uint8_t imm_lane_idx) {
2663  ReplaceLane(liftoff::GetSimd128Register(dst),
2664              liftoff::GetSimd128Register(src1),
2665              liftoff::GetFloatRegister(src2.fp()), imm_lane_idx);
2666}
2667
2668void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
2669                                      LiftoffRegister src) {
2670  vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2671}
2672
2673void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
2674                                      LiftoffRegister src) {
2675  vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2676}
2677
2678void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
2679                                       LiftoffRegister src) {
2680  // The list of d registers available to us is from d0 to d15, which always
2681  // maps to 2 s registers.
2682  LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2683  LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code());
2684
2685  LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2686  LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code());
2687
2688  vsqrt(dst_low.low(), src_low.low());
2689  vsqrt(dst_low.high(), src_low.high());
2690  vsqrt(dst_high.low(), src_high.low());
2691  vsqrt(dst_high.high(), src_high.high());
2692}
2693
2694bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
2695                                       LiftoffRegister src) {
2696  if (!CpuFeatures::IsSupported(ARMv8)) {
2697    return false;
2698  }
2699
2700  CpuFeatureScope scope(this, ARMv8);
2701  vrintp(NeonS32, liftoff::GetSimd128Register(dst),
2702         liftoff::GetSimd128Register(src));
2703  return true;
2704}
2705
2706bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
2707                                        LiftoffRegister src) {
2708  if (!CpuFeatures::IsSupported(ARMv8)) {
2709    return false;
2710  }
2711
2712  CpuFeatureScope scope(this, ARMv8);
2713  vrintm(NeonS32, liftoff::GetSimd128Register(dst),
2714         liftoff::GetSimd128Register(src));
2715  return true;
2716}
2717
2718bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
2719                                        LiftoffRegister src) {
2720  if (!CpuFeatures::IsSupported(ARMv8)) {
2721    return false;
2722  }
2723
2724  CpuFeatureScope scope(this, ARMv8);
2725  vrintz(NeonS32, liftoff::GetSimd128Register(dst),
2726         liftoff::GetSimd128Register(src));
2727  return true;
2728}
2729
2730bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
2731                                              LiftoffRegister src) {
2732  if (!CpuFeatures::IsSupported(ARMv8)) {
2733    return false;
2734  }
2735
2736  CpuFeatureScope scope(this, ARMv8);
2737  vrintn(NeonS32, liftoff::GetSimd128Register(dst),
2738         liftoff::GetSimd128Register(src));
2739  return true;
2740}
2741
2742void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2743                                      LiftoffRegister rhs) {
2744  vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2745       liftoff::GetSimd128Register(rhs));
2746}
2747
2748void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2749                                      LiftoffRegister rhs) {
2750  vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2751       liftoff::GetSimd128Register(rhs));
2752}
2753
2754void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2755                                      LiftoffRegister rhs) {
2756  vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2757       liftoff::GetSimd128Register(rhs));
2758}
2759
2760void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
2761                                      LiftoffRegister rhs) {
2762  // The list of d registers available to us is from d0 to d15, which always
2763  // maps to 2 s registers.
2764  LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2765  LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code());
2766  LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code());
2767
2768  LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2769  LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code());
2770  LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code());
2771
2772  vdiv(dst_low.low(), lhs_low.low(), rhs_low.low());
2773  vdiv(dst_low.high(), lhs_low.high(), rhs_low.high());
2774  vdiv(dst_high.low(), lhs_high.low(), rhs_high.low());
2775  vdiv(dst_high.high(), lhs_high.high(), rhs_high.high());
2776}
2777
2778void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
2779                                      LiftoffRegister rhs) {
2780  vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2781       liftoff::GetSimd128Register(rhs));
2782}
2783
2784void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
2785                                      LiftoffRegister rhs) {
2786  vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2787       liftoff::GetSimd128Register(rhs));
2788}
2789
2790void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2791                                       LiftoffRegister rhs) {
2792  UseScratchRegisterScope temps(this);
2793
2794  QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2795  if (dst == lhs || dst == rhs) {
2796    tmp = temps.AcquireQ();
2797  }
2798
2799  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2800  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2801  vcgt(tmp, left, right);
2802  vbsl(tmp, right, left);
2803
2804  if (dst == lhs || dst == rhs) {
2805    vmov(liftoff::GetSimd128Register(dst), tmp);
2806  }
2807}
2808
2809void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2810                                       LiftoffRegister rhs) {
2811  UseScratchRegisterScope temps(this);
2812
2813  QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2814  if (dst == lhs || dst == rhs) {
2815    tmp = temps.AcquireQ();
2816  }
2817
2818  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2819  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2820  vcgt(tmp, right, left);
2821  vbsl(tmp, right, left);
2822
2823  if (dst == lhs || dst == rhs) {
2824    vmov(liftoff::GetSimd128Register(dst), tmp);
2825  }
2826}
2827
2828void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2829                                        LiftoffRegister src) {
2830  Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2831  vdup(Neon32, dst_simd, src.low_gp());
2832  ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1);
2833  ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3);
2834}
2835
2836void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
2837                                               LiftoffRegister lhs,
2838                                               uint8_t imm_lane_idx) {
2839  ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2840              imm_lane_idx * 2);
2841  ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2842              imm_lane_idx * 2 + 1);
2843}
2844
2845void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
2846                                               LiftoffRegister src1,
2847                                               LiftoffRegister src2,
2848                                               uint8_t imm_lane_idx) {
2849  Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2850  Simd128Register src1_simd = liftoff::GetSimd128Register(src1);
2851  ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2);
2852  ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32,
2853              imm_lane_idx * 2 + 1);
2854}
2855
2856void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2857                                      LiftoffRegister src) {
2858  UseScratchRegisterScope temps(this);
2859  QwNeonRegister zero =
2860      dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst);
2861  vmov(zero, uint64_t{0});
2862  vsub(Neon64, liftoff::GetSimd128Register(dst), zero,
2863       liftoff::GetSimd128Register(src));
2864}
2865
2866void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2867                                          LiftoffRegister src) {
2868  I64x2AllTrue(dst.gp(), liftoff::GetSimd128Register(src));
2869}
2870
2871void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2872                                      LiftoffRegister rhs) {
2873  liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs);
2874}
2875
2876void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2877                                       int32_t rhs) {
2878  vshl(NeonS64, liftoff::GetSimd128Register(dst),
2879       liftoff::GetSimd128Register(lhs), rhs & 63);
2880}
2881
2882void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2883                                        LiftoffRegister lhs,
2884                                        LiftoffRegister rhs) {
2885  liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs);
2886}
2887
2888void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2889                                         LiftoffRegister lhs, int32_t rhs) {
2890  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs,
2891                                                            rhs);
2892}
2893
2894void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2895                                        LiftoffRegister lhs,
2896                                        LiftoffRegister rhs) {
2897  liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs);
2898}
2899
2900void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2901                                         LiftoffRegister lhs, int32_t rhs) {
2902  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs,
2903                                                            rhs);
2904}
2905
2906void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2907                                      LiftoffRegister rhs) {
2908  vadd(Neon64, liftoff::GetSimd128Register(dst),
2909       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2910}
2911
2912void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2913                                      LiftoffRegister rhs) {
2914  vsub(Neon64, liftoff::GetSimd128Register(dst),
2915       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2916}
2917
2918void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2919                                      LiftoffRegister rhs) {
2920  UseScratchRegisterScope temps(this);
2921
2922  QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst);
2923  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2924  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2925
2926  // These temporary registers will be modified. We can directly modify lhs and
2927  // rhs if they are not uesd, saving on temporaries.
2928  QwNeonRegister tmp1 = left;
2929  QwNeonRegister tmp2 = right;
2930
2931  LiftoffRegList used_plus_dst =
2932      cache_state()->used_registers | LiftoffRegList{dst};
2933
2934  if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) {
2935    tmp1 = temps.AcquireQ();
2936    // We only have 1 scratch Q register, so acquire another ourselves.
2937    LiftoffRegList pinned = {dst};
2938    LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
2939    tmp2 = liftoff::GetSimd128Register(unused_pair);
2940  } else if (used_plus_dst.has(lhs)) {
2941    tmp1 = temps.AcquireQ();
2942  } else if (used_plus_dst.has(rhs)) {
2943    tmp2 = temps.AcquireQ();
2944  }
2945
2946  // Algorithm from code-generator-arm.cc, refer to comments there for details.
2947  if (tmp1 != left) {
2948    vmov(tmp1, left);
2949  }
2950  if (tmp2 != right) {
2951    vmov(tmp2, right);
2952  }
2953
2954  vtrn(Neon32, tmp1.low(), tmp1.high());
2955  vtrn(Neon32, tmp2.low(), tmp2.high());
2956
2957  vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high());
2958  vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low());
2959  vshl(NeonU64, dst_neon, dst_neon, 32);
2960
2961  vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low());
2962}
2963
2964void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2965                                                     LiftoffRegister src1,
2966                                                     LiftoffRegister src2) {
2967  vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2968        src2.low_fp());
2969}
2970
2971void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2972                                                     LiftoffRegister src1,
2973                                                     LiftoffRegister src2) {
2974  vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2975        src2.low_fp());
2976}
2977
2978void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2979                                                      LiftoffRegister src1,
2980                                                      LiftoffRegister src2) {
2981  vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2982        src2.high_fp());
2983}
2984
2985void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2986                                                      LiftoffRegister src1,
2987                                                      LiftoffRegister src2) {
2988  vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2989        src2.high_fp());
2990}
2991
2992void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2993                                          LiftoffRegister src) {
2994  I64x2BitMask(dst.gp(), liftoff::GetSimd128Register(src));
2995}
2996
2997void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2998                                                     LiftoffRegister src) {
2999  vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.low_fp());
3000}
3001
3002void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3003                                                      LiftoffRegister src) {
3004  vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.high_fp());
3005}
3006
3007void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3008                                                     LiftoffRegister src) {
3009  vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.low_fp());
3010}
3011
3012void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3013                                                      LiftoffRegister src) {
3014  vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.high_fp());
3015}
3016
3017void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
3018                                        LiftoffRegister src) {
3019  vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
3020}
3021
3022void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3023                                               LiftoffRegister lhs,
3024                                               uint8_t imm_lane_idx) {
3025  ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32,
3026              imm_lane_idx);
3027}
3028
3029void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3030                                               LiftoffRegister src1,
3031                                               LiftoffRegister src2,
3032                                               uint8_t imm_lane_idx) {
3033  ReplaceLane(liftoff::GetSimd128Register(dst),
3034              liftoff::GetSimd128Register(src1), src2.gp(), NeonS32,
3035              imm_lane_idx);
3036}
3037
3038void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3039                                      LiftoffRegister src) {
3040  vneg(Neon32, liftoff::GetSimd128Register(dst),
3041       liftoff::GetSimd128Register(src));
3042}
3043
3044void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3045                                          LiftoffRegister src) {
3046  UseScratchRegisterScope temps(this);
3047  DwVfpRegister scratch = temps.AcquireD();
3048  vpmin(NeonU32, scratch, src.low_fp(), src.high_fp());
3049  vpmin(NeonU32, scratch, scratch, scratch);
3050  ExtractLane(dst.gp(), scratch, NeonS32, 0);
3051  cmp(dst.gp(), Operand(0));
3052  mov(dst.gp(), Operand(1), LeaveCC, ne);
3053}
3054
3055void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3056                                          LiftoffRegister src) {
3057  UseScratchRegisterScope temps(this);
3058  Simd128Register tmp = liftoff::GetSimd128Register(src);
3059  Simd128Register mask = temps.AcquireQ();
3060
3061  if (cache_state()->is_used(src)) {
3062    // We only have 1 scratch Q register, so try and reuse src.
3063    LiftoffRegList pinned = {src};
3064    LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3065    mask = liftoff::GetSimd128Register(unused_pair);
3066  }
3067
3068  vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31);
3069  // Set i-th bit of each lane i. When AND with tmp, the lanes that
3070  // are signed will have i-th bit set, unsigned will be 0.
3071  vmov(mask.low(), base::Double((uint64_t)0x0000'0002'0000'0001));
3072  vmov(mask.high(), base::Double((uint64_t)0x0000'0008'0000'0004));
3073  vand(tmp, mask, tmp);
3074  vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
3075  vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
3076  VmovLow(dst.gp(), tmp.low());
3077}
3078
3079void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3080                                      LiftoffRegister rhs) {
3081  liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs);
3082}
3083
3084void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3085                                       int32_t rhs) {
3086  vshl(NeonS32, liftoff::GetSimd128Register(dst),
3087       liftoff::GetSimd128Register(lhs), rhs & 31);
3088}
3089
3090void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3091                                        LiftoffRegister lhs,
3092                                        LiftoffRegister rhs) {
3093  liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs);
3094}
3095
3096void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3097                                         LiftoffRegister lhs, int32_t rhs) {
3098  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs,
3099                                                            rhs);
3100}
3101
3102void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3103                                        LiftoffRegister lhs,
3104                                        LiftoffRegister rhs) {
3105  liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs);
3106}
3107
3108void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3109                                         LiftoffRegister lhs, int32_t rhs) {
3110  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs,
3111                                                            rhs);
3112}
3113
3114void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3115                                      LiftoffRegister rhs) {
3116  vadd(Neon32, liftoff::GetSimd128Register(dst),
3117       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3118}
3119
3120void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3121                                      LiftoffRegister rhs) {
3122  vsub(Neon32, liftoff::GetSimd128Register(dst),
3123       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3124}
3125
3126void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3127                                      LiftoffRegister rhs) {
3128  vmul(Neon32, liftoff::GetSimd128Register(dst),
3129       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3130}
3131
3132void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3133                                        LiftoffRegister lhs,
3134                                        LiftoffRegister rhs) {
3135  vmin(NeonS32, liftoff::GetSimd128Register(dst),
3136       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3137}
3138
3139void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3140                                        LiftoffRegister lhs,
3141                                        LiftoffRegister rhs) {
3142  vmin(NeonU32, liftoff::GetSimd128Register(dst),
3143       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3144}
3145
3146void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3147                                        LiftoffRegister lhs,
3148                                        LiftoffRegister rhs) {
3149  vmax(NeonS32, liftoff::GetSimd128Register(dst),
3150       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3151}
3152
3153void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3154                                        LiftoffRegister lhs,
3155                                        LiftoffRegister rhs) {
3156  vmax(NeonU32, liftoff::GetSimd128Register(dst),
3157       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3158}
3159
3160void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3161                                              LiftoffRegister lhs,
3162                                              LiftoffRegister rhs) {
3163  QwNeonRegister dest = liftoff::GetSimd128Register(dst);
3164  QwNeonRegister left = liftoff::GetSimd128Register(lhs);
3165  QwNeonRegister right = liftoff::GetSimd128Register(rhs);
3166
3167  UseScratchRegisterScope temps(this);
3168  Simd128Register scratch = temps.AcquireQ();
3169
3170  vmull(NeonS16, scratch, left.low(), right.low());
3171  vpadd(Neon32, dest.low(), scratch.low(), scratch.high());
3172
3173  vmull(NeonS16, scratch, left.high(), right.high());
3174  vpadd(Neon32, dest.high(), scratch.low(), scratch.high());
3175}
3176
3177void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3178                                                          LiftoffRegister src) {
3179  vpaddl(NeonS16, liftoff::GetSimd128Register(dst),
3180         liftoff::GetSimd128Register(src));
3181}
3182
3183void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3184                                                          LiftoffRegister src) {
3185  vpaddl(NeonU16, liftoff::GetSimd128Register(dst),
3186         liftoff::GetSimd128Register(src));
3187}
3188
3189void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3190                                                     LiftoffRegister src1,
3191                                                     LiftoffRegister src2) {
3192  vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3193        src2.low_fp());
3194}
3195
3196void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3197                                                     LiftoffRegister src1,
3198                                                     LiftoffRegister src2) {
3199  vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3200        src2.low_fp());
3201}
3202
3203void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3204                                                      LiftoffRegister src1,
3205                                                      LiftoffRegister src2) {
3206  vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3207        src2.high_fp());
3208}
3209
3210void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3211                                                      LiftoffRegister src1,
3212                                                      LiftoffRegister src2) {
3213  vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3214        src2.high_fp());
3215}
3216
3217void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
3218                                        LiftoffRegister src) {
3219  vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp());
3220}
3221
3222void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3223                                      LiftoffRegister src) {
3224  vneg(Neon16, liftoff::GetSimd128Register(dst),
3225       liftoff::GetSimd128Register(src));
3226}
3227
3228void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3229                                          LiftoffRegister src) {
3230  UseScratchRegisterScope temps(this);
3231  DwVfpRegister scratch = temps.AcquireD();
3232  vpmin(NeonU16, scratch, src.low_fp(), src.high_fp());
3233  vpmin(NeonU16, scratch, scratch, scratch);
3234  vpmin(NeonU16, scratch, scratch, scratch);
3235  ExtractLane(dst.gp(), scratch, NeonS16, 0);
3236  cmp(dst.gp(), Operand(0));
3237  mov(dst.gp(), Operand(1), LeaveCC, ne);
3238}
3239
3240void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3241                                          LiftoffRegister src) {
3242  UseScratchRegisterScope temps(this);
3243  Simd128Register tmp = liftoff::GetSimd128Register(src);
3244  Simd128Register mask = temps.AcquireQ();
3245
3246  if (cache_state()->is_used(src)) {
3247    // We only have 1 scratch Q register, so try and reuse src.
3248    LiftoffRegList pinned = {src};
3249    LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3250    mask = liftoff::GetSimd128Register(unused_pair);
3251  }
3252
3253  vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15);
3254  // Set i-th bit of each lane i. When AND with tmp, the lanes that
3255  // are signed will have i-th bit set, unsigned will be 0.
3256  vmov(mask.low(), base::Double((uint64_t)0x0008'0004'0002'0001));
3257  vmov(mask.high(), base::Double((uint64_t)0x0080'0040'0020'0010));
3258  vand(tmp, mask, tmp);
3259  vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3260  vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3261  vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3262  vmov(NeonU16, dst.gp(), tmp.low(), 0);
3263}
3264
3265void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3266                                      LiftoffRegister rhs) {
3267  liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs);
3268}
3269
3270void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3271                                       int32_t rhs) {
3272  vshl(NeonS16, liftoff::GetSimd128Register(dst),
3273       liftoff::GetSimd128Register(lhs), rhs & 15);
3274}
3275
3276void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3277                                        LiftoffRegister lhs,
3278                                        LiftoffRegister rhs) {
3279  liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs);
3280}
3281
3282void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3283                                         LiftoffRegister lhs, int32_t rhs) {
3284  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs,
3285                                                            rhs);
3286}
3287
3288void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3289                                        LiftoffRegister lhs,
3290                                        LiftoffRegister rhs) {
3291  liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs);
3292}
3293
3294void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3295                                         LiftoffRegister lhs, int32_t rhs) {
3296  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs,
3297                                                            rhs);
3298}
3299
3300void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3301                                      LiftoffRegister rhs) {
3302  vadd(Neon16, liftoff::GetSimd128Register(dst),
3303       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3304}
3305
3306void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3307                                            LiftoffRegister lhs,
3308                                            LiftoffRegister rhs) {
3309  vqadd(NeonS16, liftoff::GetSimd128Register(dst),
3310        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3311}
3312
3313void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3314                                      LiftoffRegister rhs) {
3315  vsub(Neon16, liftoff::GetSimd128Register(dst),
3316       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3317}
3318
3319void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3320                                            LiftoffRegister lhs,
3321                                            LiftoffRegister rhs) {
3322  vqsub(NeonS16, liftoff::GetSimd128Register(dst),
3323        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3324}
3325
3326void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3327                                            LiftoffRegister lhs,
3328                                            LiftoffRegister rhs) {
3329  vqsub(NeonU16, liftoff::GetSimd128Register(dst),
3330        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3331}
3332
3333void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3334                                      LiftoffRegister rhs) {
3335  vmul(Neon16, liftoff::GetSimd128Register(dst),
3336       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3337}
3338
3339void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3340                                            LiftoffRegister lhs,
3341                                            LiftoffRegister rhs) {
3342  vqadd(NeonU16, liftoff::GetSimd128Register(dst),
3343        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3344}
3345
3346void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3347                                        LiftoffRegister lhs,
3348                                        LiftoffRegister rhs) {
3349  vmin(NeonS16, liftoff::GetSimd128Register(dst),
3350       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3351}
3352
3353void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3354                                        LiftoffRegister lhs,
3355                                        LiftoffRegister rhs) {
3356  vmin(NeonU16, liftoff::GetSimd128Register(dst),
3357       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3358}
3359
3360void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3361                                        LiftoffRegister lhs,
3362                                        LiftoffRegister rhs) {
3363  vmax(NeonS16, liftoff::GetSimd128Register(dst),
3364       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3365}
3366
3367void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3368                                        LiftoffRegister lhs,
3369                                        LiftoffRegister rhs) {
3370  vmax(NeonU16, liftoff::GetSimd128Register(dst),
3371       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3372}
3373
3374void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3375                                                 LiftoffRegister lhs,
3376                                                 uint8_t imm_lane_idx) {
3377  ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16,
3378              imm_lane_idx);
3379}
3380
3381void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3382                                                 LiftoffRegister lhs,
3383                                                 uint8_t imm_lane_idx) {
3384  ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16,
3385              imm_lane_idx);
3386}
3387
3388void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3389                                               LiftoffRegister src1,
3390                                               LiftoffRegister src2,
3391                                               uint8_t imm_lane_idx) {
3392  ReplaceLane(liftoff::GetSimd128Register(dst),
3393              liftoff::GetSimd128Register(src1), src2.gp(), NeonS16,
3394              imm_lane_idx);
3395}
3396
3397void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3398                                                          LiftoffRegister src) {
3399  vpaddl(NeonS8, liftoff::GetSimd128Register(dst),
3400         liftoff::GetSimd128Register(src));
3401}
3402
3403void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3404                                                          LiftoffRegister src) {
3405  vpaddl(NeonU8, liftoff::GetSimd128Register(dst),
3406         liftoff::GetSimd128Register(src));
3407}
3408
3409void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3410                                                     LiftoffRegister src1,
3411                                                     LiftoffRegister src2) {
3412  vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3413}
3414
3415void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3416                                                     LiftoffRegister src1,
3417                                                     LiftoffRegister src2) {
3418  vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3419}
3420
3421void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3422                                                      LiftoffRegister src1,
3423                                                      LiftoffRegister src2) {
3424  vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3425        src2.high_fp());
3426}
3427
3428void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3429                                                      LiftoffRegister src1,
3430                                                      LiftoffRegister src2) {
3431  vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3432        src2.high_fp());
3433}
3434
3435void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3436                                                LiftoffRegister src1,
3437                                                LiftoffRegister src2) {
3438  vqrdmulh(NeonS16, liftoff::GetSimd128Register(dst),
3439           liftoff::GetSimd128Register(src1),
3440           liftoff::GetSimd128Register(src2));
3441}
3442
3443void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
3444                                          LiftoffRegister lhs,
3445                                          LiftoffRegister rhs,
3446                                          const uint8_t shuffle[16],
3447                                          bool is_swizzle) {
3448  Simd128Register dest = liftoff::GetSimd128Register(dst);
3449  Simd128Register src1 = liftoff::GetSimd128Register(lhs);
3450  Simd128Register src2 = liftoff::GetSimd128Register(rhs);
3451  UseScratchRegisterScope temps(this);
3452  Simd128Register scratch = temps.AcquireQ();
3453  if ((src1 != src2) && src1.code() + 1 != src2.code()) {
3454    // vtbl requires the operands to be consecutive or the same.
3455    // If they are the same, we build a smaller list operand (table_size = 2).
3456    // If they are not the same, and not consecutive, we move the src1 and src2
3457    // to q14 and q15, which will be unused since they are not allocatable in
3458    // Liftoff. If the operands are the same, then we build a smaller list
3459    // operand below.
3460    static_assert(!kLiftoffAssemblerFpCacheRegs.has(d28),
3461                  "This only works if q14-q15 (d28-d31) are not used.");
3462    static_assert(!kLiftoffAssemblerFpCacheRegs.has(d29),
3463                  "This only works if q14-q15 (d28-d31) are not used.");
3464    static_assert(!kLiftoffAssemblerFpCacheRegs.has(d30),
3465                  "This only works if q14-q15 (d28-d31) are not used.");
3466    static_assert(!kLiftoffAssemblerFpCacheRegs.has(d31),
3467                  "This only works if q14-q15 (d28-d31) are not used.");
3468    vmov(q14, src1);
3469    src1 = q14;
3470    vmov(q15, src2);
3471    src2 = q15;
3472  }
3473
3474  int table_size = src1 == src2 ? 2 : 4;
3475
3476  int scratch_s_base = scratch.code() * 4;
3477  for (int j = 0; j < 4; j++) {
3478    uint32_t imm = 0;
3479    for (int i = 3; i >= 0; i--) {
3480      imm = (imm << 8) | shuffle[j * 4 + i];
3481    }
3482    DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3483    // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
3484    vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm));
3485  }
3486
3487  DwVfpRegister table_base = src1.low();
3488  NeonListOperand table(table_base, table_size);
3489
3490  if (dest != src1 && dest != src2) {
3491    vtbl(dest.low(), table, scratch.low());
3492    vtbl(dest.high(), table, scratch.high());
3493  } else {
3494    vtbl(scratch.low(), table, scratch.low());
3495    vtbl(scratch.high(), table, scratch.high());
3496    vmov(dest, scratch);
3497  }
3498}
3499
3500void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
3501                                         LiftoffRegister src) {
3502  vcnt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3503}
3504
3505void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
3506                                        LiftoffRegister src) {
3507  vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
3508}
3509
3510void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3511                                                 LiftoffRegister lhs,
3512                                                 uint8_t imm_lane_idx) {
3513  ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx);
3514}
3515
3516void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3517                                                 LiftoffRegister lhs,
3518                                                 uint8_t imm_lane_idx) {
3519  ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx);
3520}
3521
3522void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3523                                               LiftoffRegister src1,
3524                                               LiftoffRegister src2,
3525                                               uint8_t imm_lane_idx) {
3526  ReplaceLane(liftoff::GetSimd128Register(dst),
3527              liftoff::GetSimd128Register(src1), src2.gp(), NeonS8,
3528              imm_lane_idx);
3529}
3530
3531void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3532                                      LiftoffRegister src) {
3533  vneg(Neon8, liftoff::GetSimd128Register(dst),
3534       liftoff::GetSimd128Register(src));
3535}
3536
3537void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3538                                         LiftoffRegister src) {
3539  liftoff::EmitAnyTrue(this, dst, src);
3540}
3541
3542void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3543                                          LiftoffRegister src) {
3544  UseScratchRegisterScope temps(this);
3545  DwVfpRegister scratch = temps.AcquireD();
3546  vpmin(NeonU8, scratch, src.low_fp(), src.high_fp());
3547  vpmin(NeonU8, scratch, scratch, scratch);
3548  vpmin(NeonU8, scratch, scratch, scratch);
3549  vpmin(NeonU8, scratch, scratch, scratch);
3550  ExtractLane(dst.gp(), scratch, NeonS8, 0);
3551  cmp(dst.gp(), Operand(0));
3552  mov(dst.gp(), Operand(1), LeaveCC, ne);
3553}
3554
3555void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3556                                          LiftoffRegister src) {
3557  UseScratchRegisterScope temps(this);
3558  Simd128Register tmp = liftoff::GetSimd128Register(src);
3559  Simd128Register mask = temps.AcquireQ();
3560
3561  if (cache_state()->is_used(src)) {
3562    // We only have 1 scratch Q register, so try and reuse src.
3563    LiftoffRegList pinned = {src};
3564    LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3565    mask = liftoff::GetSimd128Register(unused_pair);
3566  }
3567
3568  vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7);
3569  // Set i-th bit of each lane i. When AND with tmp, the lanes that
3570  // are signed will have i-th bit set, unsigned will be 0.
3571  vmov(mask.low(), base::Double((uint64_t)0x8040'2010'0804'0201));
3572  vmov(mask.high(), base::Double((uint64_t)0x8040'2010'0804'0201));
3573  vand(tmp, mask, tmp);
3574  vext(mask, tmp, tmp, 8);
3575  vzip(Neon8, mask, tmp);
3576  vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3577  vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3578  vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3579  vmov(NeonU16, dst.gp(), tmp.low(), 0);
3580}
3581
3582void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3583                                      LiftoffRegister rhs) {
3584  liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs);
3585}
3586
3587void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3588                                       int32_t rhs) {
3589  vshl(NeonS8, liftoff::GetSimd128Register(dst),
3590       liftoff::GetSimd128Register(lhs), rhs & 7);
3591}
3592
3593void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3594                                        LiftoffRegister lhs,
3595                                        LiftoffRegister rhs) {
3596  liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs);
3597}
3598
3599void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3600                                         LiftoffRegister lhs, int32_t rhs) {
3601  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs);
3602}
3603
3604void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3605                                        LiftoffRegister lhs,
3606                                        LiftoffRegister rhs) {
3607  liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs);
3608}
3609
3610void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3611                                         LiftoffRegister lhs, int32_t rhs) {
3612  liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs);
3613}
3614
3615void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3616                                      LiftoffRegister rhs) {
3617  vadd(Neon8, liftoff::GetSimd128Register(dst),
3618       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3619}
3620
3621void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3622                                            LiftoffRegister lhs,
3623                                            LiftoffRegister rhs) {
3624  vqadd(NeonS8, liftoff::GetSimd128Register(dst),
3625        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3626}
3627
3628void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3629                                      LiftoffRegister rhs) {
3630  vsub(Neon8, liftoff::GetSimd128Register(dst),
3631       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3632}
3633
3634void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3635                                            LiftoffRegister lhs,
3636                                            LiftoffRegister rhs) {
3637  vqsub(NeonS8, liftoff::GetSimd128Register(dst),
3638        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3639}
3640
3641void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3642                                            LiftoffRegister lhs,
3643                                            LiftoffRegister rhs) {
3644  vqsub(NeonU8, liftoff::GetSimd128Register(dst),
3645        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3646}
3647
3648void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3649                                            LiftoffRegister lhs,
3650                                            LiftoffRegister rhs) {
3651  vqadd(NeonU8, liftoff::GetSimd128Register(dst),
3652        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3653}
3654
3655void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3656                                        LiftoffRegister lhs,
3657                                        LiftoffRegister rhs) {
3658  vmin(NeonS8, liftoff::GetSimd128Register(dst),
3659       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3660}
3661
3662void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3663                                        LiftoffRegister lhs,
3664                                        LiftoffRegister rhs) {
3665  vmin(NeonU8, liftoff::GetSimd128Register(dst),
3666       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3667}
3668
3669void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3670                                        LiftoffRegister lhs,
3671                                        LiftoffRegister rhs) {
3672  vmax(NeonS8, liftoff::GetSimd128Register(dst),
3673       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3674}
3675
3676void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3677                                        LiftoffRegister lhs,
3678                                        LiftoffRegister rhs) {
3679  vmax(NeonU8, liftoff::GetSimd128Register(dst),
3680       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3681}
3682
3683void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
3684                                     LiftoffRegister rhs) {
3685  vceq(Neon8, liftoff::GetSimd128Register(dst),
3686       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3687}
3688
3689void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
3690                                     LiftoffRegister rhs) {
3691  vceq(Neon8, liftoff::GetSimd128Register(dst),
3692       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3693  vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3694}
3695
3696void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3697                                       LiftoffRegister rhs) {
3698  vcgt(NeonS8, liftoff::GetSimd128Register(dst),
3699       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3700}
3701
3702void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3703                                       LiftoffRegister rhs) {
3704  vcgt(NeonU8, liftoff::GetSimd128Register(dst),
3705       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3706}
3707
3708void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3709                                       LiftoffRegister rhs) {
3710  vcge(NeonS8, liftoff::GetSimd128Register(dst),
3711       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3712}
3713
3714void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3715                                       LiftoffRegister rhs) {
3716  vcge(NeonU8, liftoff::GetSimd128Register(dst),
3717       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3718}
3719
3720void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3721                                     LiftoffRegister rhs) {
3722  vceq(Neon16, liftoff::GetSimd128Register(dst),
3723       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3724}
3725
3726void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3727                                     LiftoffRegister rhs) {
3728  vceq(Neon16, liftoff::GetSimd128Register(dst),
3729       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3730  vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3731}
3732
3733void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3734                                       LiftoffRegister rhs) {
3735  vcgt(NeonS16, liftoff::GetSimd128Register(dst),
3736       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3737}
3738
3739void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3740                                       LiftoffRegister rhs) {
3741  vcgt(NeonU16, liftoff::GetSimd128Register(dst),
3742       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3743}
3744
3745void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3746                                       LiftoffRegister rhs) {
3747  vcge(NeonS16, liftoff::GetSimd128Register(dst),
3748       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3749}
3750
3751void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3752                                       LiftoffRegister rhs) {
3753  vcge(NeonU16, liftoff::GetSimd128Register(dst),
3754       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3755}
3756
3757void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3758                                     LiftoffRegister rhs) {
3759  vceq(Neon32, liftoff::GetSimd128Register(dst),
3760       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3761}
3762
3763void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3764                                     LiftoffRegister rhs) {
3765  vceq(Neon32, liftoff::GetSimd128Register(dst),
3766       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3767  vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3768}
3769
3770void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3771                                       LiftoffRegister rhs) {
3772  vcgt(NeonS32, liftoff::GetSimd128Register(dst),
3773       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3774}
3775
3776void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3777                                       LiftoffRegister rhs) {
3778  vcgt(NeonU32, liftoff::GetSimd128Register(dst),
3779       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3780}
3781
3782void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3783                                       LiftoffRegister rhs) {
3784  vcge(NeonS32, liftoff::GetSimd128Register(dst),
3785       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3786}
3787
3788void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3789                                       LiftoffRegister rhs) {
3790  vcge(NeonU32, liftoff::GetSimd128Register(dst),
3791       liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3792}
3793
3794void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3795                                     LiftoffRegister rhs) {
3796  I64x2Eq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3797          liftoff::GetSimd128Register(rhs));
3798}
3799
3800void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3801                                     LiftoffRegister rhs) {
3802  I64x2Ne(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3803          liftoff::GetSimd128Register(rhs));
3804}
3805
3806void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3807                                       LiftoffRegister rhs) {
3808  I64x2GtS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3809           liftoff::GetSimd128Register(rhs));
3810}
3811
3812void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3813                                       LiftoffRegister rhs) {
3814  I64x2GeS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3815           liftoff::GetSimd128Register(rhs));
3816}
3817
3818void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3819                                     LiftoffRegister rhs) {
3820  vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3821       liftoff::GetSimd128Register(rhs));
3822}
3823
3824void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3825                                     LiftoffRegister rhs) {
3826  vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3827       liftoff::GetSimd128Register(rhs));
3828  vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3829}
3830
3831void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3832                                     LiftoffRegister rhs) {
3833  vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3834       liftoff::GetSimd128Register(lhs));
3835}
3836
3837void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3838                                     LiftoffRegister rhs) {
3839  vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3840       liftoff::GetSimd128Register(lhs));
3841}
3842
3843void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3844                                     LiftoffRegister rhs) {
3845  liftoff::F64x2Compare(this, dst, lhs, rhs, eq);
3846}
3847
3848void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3849                                     LiftoffRegister rhs) {
3850  liftoff::F64x2Compare(this, dst, lhs, rhs, ne);
3851}
3852
3853void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3854                                     LiftoffRegister rhs) {
3855  liftoff::F64x2Compare(this, dst, lhs, rhs, lt);
3856}
3857
3858void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3859                                     LiftoffRegister rhs) {
3860  liftoff::F64x2Compare(this, dst, lhs, rhs, le);
3861}
3862
3863void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3864                                       const uint8_t imms[16]) {
3865  uint64_t vals[2];
3866  memcpy(vals, imms, sizeof(vals));
3867  vmov(dst.low_fp(), base::Double(vals[0]));
3868  vmov(dst.high_fp(), base::Double(vals[1]));
3869}
3870
3871void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3872  vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3873}
3874
3875void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3876                                     LiftoffRegister rhs) {
3877  vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3878       liftoff::GetSimd128Register(rhs));
3879}
3880
3881void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3882                                    LiftoffRegister rhs) {
3883  vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3884       liftoff::GetSimd128Register(rhs));
3885}
3886
3887void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3888                                     LiftoffRegister rhs) {
3889  veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3890       liftoff::GetSimd128Register(rhs));
3891}
3892
3893void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3894                                        LiftoffRegister src1,
3895                                        LiftoffRegister src2,
3896                                        LiftoffRegister mask) {
3897  if (dst != mask) {
3898    vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask));
3899  }
3900  vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1),
3901       liftoff::GetSimd128Register(src2));
3902}
3903
3904void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3905                                                 LiftoffRegister src) {
3906  vcvt_s32_f32(liftoff::GetSimd128Register(dst),
3907               liftoff::GetSimd128Register(src));
3908}
3909
3910void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3911                                                 LiftoffRegister src) {
3912  vcvt_u32_f32(liftoff::GetSimd128Register(dst),
3913               liftoff::GetSimd128Register(src));
3914}
3915
3916void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3917                                                 LiftoffRegister src) {
3918  vcvt_f32_s32(liftoff::GetSimd128Register(dst),
3919               liftoff::GetSimd128Register(src));
3920}
3921
3922void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3923                                                 LiftoffRegister src) {
3924  vcvt_f32_u32(liftoff::GetSimd128Register(dst),
3925               liftoff::GetSimd128Register(src));
3926}
3927
3928void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3929                                                    LiftoffRegister src) {
3930  LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
3931  vcvt_f32_f64(dst_d.low(), src.low_fp());
3932  vcvt_f32_f64(dst_d.high(), src.high_fp());
3933  vmov(dst.high_fp(), 0);
3934}
3935
3936void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3937                                                 LiftoffRegister lhs,
3938                                                 LiftoffRegister rhs) {
3939  liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs);
3940}
3941
3942void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3943                                                 LiftoffRegister lhs,
3944                                                 LiftoffRegister rhs) {
3945  liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs);
3946}
3947
3948void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3949                                                 LiftoffRegister lhs,
3950                                                 LiftoffRegister rhs) {
3951  liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs);
3952}
3953
3954void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3955                                                 LiftoffRegister lhs,
3956                                                 LiftoffRegister rhs) {
3957  liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs);
3958}
3959
3960void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3961                                                     LiftoffRegister src) {
3962  vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp());
3963}
3964
3965void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3966                                                      LiftoffRegister src) {
3967  vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp());
3968}
3969
3970void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3971                                                     LiftoffRegister src) {
3972  vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp());
3973}
3974
3975void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3976                                                      LiftoffRegister src) {
3977  vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp());
3978}
3979
3980void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3981                                                     LiftoffRegister src) {
3982  vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp());
3983}
3984
3985void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3986                                                      LiftoffRegister src) {
3987  vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp());
3988}
3989
3990void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3991                                                     LiftoffRegister src) {
3992  vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp());
3993}
3994
3995void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3996                                                      LiftoffRegister src) {
3997  vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp());
3998}
3999
4000void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4001                                                         LiftoffRegister src) {
4002  LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4003  vcvt_s32_f64(dst_d.low(), src.low_fp());
4004  vcvt_s32_f64(dst_d.high(), src.high_fp());
4005  vmov(dst.high_fp(), 0);
4006}
4007
4008void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4009                                                         LiftoffRegister src) {
4010  LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4011  vcvt_u32_f64(dst_d.low(), src.low_fp());
4012  vcvt_u32_f64(dst_d.high(), src.high_fp());
4013  vmov(dst.high_fp(), 0);
4014}
4015
4016void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4017                                         LiftoffRegister lhs,
4018                                         LiftoffRegister rhs) {
4019  vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
4020       liftoff::GetSimd128Register(rhs));
4021}
4022
4023void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4024                                                     LiftoffRegister lhs,
4025                                                     LiftoffRegister rhs) {
4026  vrhadd(NeonU8, liftoff::GetSimd128Register(dst),
4027         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4028}
4029
4030void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4031                                                     LiftoffRegister lhs,
4032                                                     LiftoffRegister rhs) {
4033  vrhadd(NeonU16, liftoff::GetSimd128Register(dst),
4034         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4035}
4036
4037void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4038                                      LiftoffRegister src) {
4039  vabs(Neon8, liftoff::GetSimd128Register(dst),
4040       liftoff::GetSimd128Register(src));
4041}
4042
4043void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4044                                      LiftoffRegister src) {
4045  vabs(Neon16, liftoff::GetSimd128Register(dst),
4046       liftoff::GetSimd128Register(src));
4047}
4048
4049void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4050                                      LiftoffRegister src) {
4051  vabs(Neon32, liftoff::GetSimd128Register(dst),
4052       liftoff::GetSimd128Register(src));
4053}
4054
4055void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4056                                      LiftoffRegister src) {
4057  I64x2Abs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
4058}
4059
4060void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4061  ldr(limit_address, MemOperand(limit_address));
4062  cmp(sp, limit_address);
4063  b(ool_code, ls);
4064}
4065
4066void LiftoffAssembler::CallTrapCallbackForTesting() {
4067  PrepareCallCFunction(0, 0);
4068  CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4069}
4070
4071void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4072  // Asserts unreachable within the wasm code.
4073  TurboAssembler::AssertUnreachable(reason);
4074}
4075
4076void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4077  RegList core_regs = regs.GetGpList();
4078  if (!core_regs.is_empty()) {
4079    stm(db_w, sp, core_regs);
4080  }
4081  LiftoffRegList fp_regs = regs & kFpCacheRegList;
4082  while (!fp_regs.is_empty()) {
4083    LiftoffRegister reg = fp_regs.GetFirstRegSet();
4084    DoubleRegister first = reg.fp();
4085    DoubleRegister last = first;
4086    fp_regs.clear(reg);
4087    while (!fp_regs.is_empty()) {
4088      LiftoffRegister reg = fp_regs.GetFirstRegSet();
4089      int code = reg.fp().code();
4090      // vstm can not push more than 16 registers. We have to make sure the
4091      // condition is met.
4092      if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break;
4093      last = reg.fp();
4094      fp_regs.clear(reg);
4095    }
4096    vstm(db_w, sp, first, last);
4097  }
4098}
4099
4100void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4101  LiftoffRegList fp_regs = regs & kFpCacheRegList;
4102  while (!fp_regs.is_empty()) {
4103    LiftoffRegister reg = fp_regs.GetLastRegSet();
4104    DoubleRegister last = reg.fp();
4105    DoubleRegister first = last;
4106    fp_regs.clear(reg);
4107    while (!fp_regs.is_empty()) {
4108      LiftoffRegister reg = fp_regs.GetLastRegSet();
4109      int code = reg.fp().code();
4110      if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break;
4111      first = reg.fp();
4112      fp_regs.clear(reg);
4113    }
4114    vldm(ia_w, sp, first, last);
4115  }
4116  RegList core_regs = regs.GetGpList();
4117  if (!core_regs.is_empty()) {
4118    ldm(ia_w, sp, core_regs);
4119  }
4120}
4121
4122void LiftoffAssembler::RecordSpillsInSafepoint(
4123    SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4124    LiftoffRegList ref_spills, int spill_offset) {
4125  int spill_space_size = 0;
4126  while (!all_spills.is_empty()) {
4127    LiftoffRegister reg = all_spills.GetLastRegSet();
4128    if (ref_spills.has(reg)) {
4129      safepoint.DefineTaggedStackSlot(spill_offset);
4130    }
4131    all_spills.clear(reg);
4132    ++spill_offset;
4133    spill_space_size += kSystemPointerSize;
4134  }
4135  // Record the number of additional spill slots.
4136  RecordOolSpillSpaceSize(spill_space_size);
4137}
4138
4139void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4140  Drop(num_stack_slots);
4141  Ret();
4142}
4143
4144void LiftoffAssembler::CallC(const ValueKindSig* sig,
4145                             const LiftoffRegister* args,
4146                             const LiftoffRegister* rets,
4147                             ValueKind out_argument_kind, int stack_bytes,
4148                             ExternalReference ext_ref) {
4149  // Arguments are passed by pushing them all to the stack and then passing
4150  // a pointer to them.
4151  DCHECK(IsAligned(stack_bytes, kSystemPointerSize));
4152  // Reserve space in the stack.
4153  AllocateStackSpace(stack_bytes);
4154
4155  int arg_bytes = 0;
4156  for (ValueKind param_kind : sig->parameters()) {
4157    switch (param_kind) {
4158      case kI32:
4159        str(args->gp(), MemOperand(sp, arg_bytes));
4160        break;
4161      case kI64:
4162        str(args->low_gp(), MemOperand(sp, arg_bytes));
4163        str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize));
4164        break;
4165      case kF32:
4166        vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes));
4167        break;
4168      case kF64:
4169        vstr(args->fp(), MemOperand(sp, arg_bytes));
4170        break;
4171      case kS128:
4172        vstr(args->low_fp(), MemOperand(sp, arg_bytes));
4173        vstr(args->high_fp(),
4174             MemOperand(sp, arg_bytes + 2 * kSystemPointerSize));
4175        break;
4176      default:
4177        UNREACHABLE();
4178    }
4179    args++;
4180    arg_bytes += value_kind_size(param_kind);
4181  }
4182  DCHECK_LE(arg_bytes, stack_bytes);
4183
4184  // Pass a pointer to the buffer with the arguments to the C function.
4185  mov(r0, sp);
4186
4187  // Now call the C function.
4188  constexpr int kNumCCallArgs = 1;
4189  PrepareCallCFunction(kNumCCallArgs);
4190  CallCFunction(ext_ref, kNumCCallArgs);
4191
4192  // Move return value to the right register.
4193  const LiftoffRegister* result_reg = rets;
4194  if (sig->return_count() > 0) {
4195    DCHECK_EQ(1, sig->return_count());
4196    constexpr Register kReturnReg = r0;
4197    if (kReturnReg != rets->gp()) {
4198      Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4199    }
4200    result_reg++;
4201  }
4202
4203  // Load potential output value from the buffer on the stack.
4204  if (out_argument_kind != kVoid) {
4205    switch (out_argument_kind) {
4206      case kI32:
4207        ldr(result_reg->gp(), MemOperand(sp));
4208        break;
4209      case kI64:
4210        ldr(result_reg->low_gp(), MemOperand(sp));
4211        ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize));
4212        break;
4213      case kF32:
4214        vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp));
4215        break;
4216      case kF64:
4217        vldr(result_reg->fp(), MemOperand(sp));
4218        break;
4219      case kS128:
4220        vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2),
4221             NeonMemOperand(sp));
4222        break;
4223      default:
4224        UNREACHABLE();
4225    }
4226  }
4227  add(sp, sp, Operand(stack_bytes));
4228}
4229
4230void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4231  Call(addr, RelocInfo::WASM_CALL);
4232}
4233
4234void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4235  Jump(addr, RelocInfo::WASM_CALL);
4236}
4237
4238void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4239                                    compiler::CallDescriptor* call_descriptor,
4240                                    Register target) {
4241  DCHECK(target != no_reg);
4242  Call(target);
4243}
4244
4245void LiftoffAssembler::TailCallIndirect(Register target) {
4246  DCHECK(target != no_reg);
4247  Jump(target);
4248}
4249
4250void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4251  // A direct call to a wasm runtime stub defined in this module.
4252  // Just encode the stub index. This will be patched at relocation.
4253  Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4254}
4255
4256void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4257  AllocateStackSpace(size);
4258  mov(addr, sp);
4259}
4260
4261void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4262  add(sp, sp, Operand(size));
4263}
4264
4265void LiftoffAssembler::MaybeOSR() {}
4266
4267void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4268                                       ValueKind kind) {
4269  if (kind == kF32) {
4270    FloatRegister src_f = liftoff::GetFloatRegister(src);
4271    VFPCompareAndSetFlags(src_f, src_f);
4272  } else {
4273    DCHECK_EQ(kind, kF64);
4274    VFPCompareAndSetFlags(src, src);
4275  }
4276
4277  // Store a non-zero value if src is NaN.
4278  str(dst, MemOperand(dst), ne);  // x != x iff isnan(x)
4279}
4280
4281void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4282                                            Register tmp_gp,
4283                                            LiftoffRegister tmp_s128,
4284                                            ValueKind lane_kind) {
4285  QwNeonRegister src_q = liftoff::GetSimd128Register(src);
4286  QwNeonRegister tmp_q = liftoff::GetSimd128Register(tmp_s128);
4287  if (lane_kind == kF32) {
4288    vpadd(tmp_q.low(), src_q.low(), src_q.high());
4289    LowDwVfpRegister tmp_d =
4290        LowDwVfpRegister::from_code(tmp_s128.low_fp().code());
4291    vadd(tmp_d.low(), tmp_d.low(), tmp_d.high());
4292  } else {
4293    DCHECK_EQ(lane_kind, kF64);
4294    vadd(tmp_q.low(), src_q.low(), src_q.high());
4295  }
4296  emit_set_if_nan(dst, tmp_q.low(), lane_kind);
4297}
4298
4299void LiftoffStackSlots::Construct(int param_slots) {
4300  DCHECK_LT(0, slots_.size());
4301  SortInPushOrder();
4302  int last_stack_slot = param_slots;
4303  for (auto& slot : slots_) {
4304    const int stack_slot = slot.dst_slot_;
4305    int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4306    DCHECK_LT(0, stack_decrement);
4307    last_stack_slot = stack_slot;
4308    const LiftoffAssembler::VarState& src = slot.src_;
4309    switch (src.loc()) {
4310      case LiftoffAssembler::VarState::kStack: {
4311        switch (src.kind()) {
4312          // i32 and i64 can be treated as similar cases, i64 being previously
4313          // split into two i32 registers
4314          case kI32:
4315          case kI64:
4316          case kF32:
4317          case kRef:
4318          case kOptRef: {
4319            asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4320            UseScratchRegisterScope temps(asm_);
4321            Register scratch = temps.Acquire();
4322            asm_->ldr(scratch,
4323                      liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
4324            asm_->Push(scratch);
4325          } break;
4326          case kF64: {
4327            asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
4328            UseScratchRegisterScope temps(asm_);
4329            DwVfpRegister scratch = temps.AcquireD();
4330            asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
4331            asm_->vpush(scratch);
4332          } break;
4333          case kS128: {
4334            asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4335            MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_);
4336            UseScratchRegisterScope temps(asm_);
4337            Register addr = liftoff::CalculateActualAddress(
4338                asm_, &temps, mem_op.rn(), no_reg, mem_op.offset());
4339            QwNeonRegister scratch = temps.AcquireQ();
4340            asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr));
4341            asm_->vpush(scratch);
4342            break;
4343          }
4344          default:
4345            UNREACHABLE();
4346        }
4347        break;
4348      }
4349      case LiftoffAssembler::VarState::kRegister: {
4350        int pushed_bytes = SlotSizeInBytes(slot);
4351        asm_->AllocateStackSpace(stack_decrement - pushed_bytes);
4352        switch (src.kind()) {
4353          case kI64: {
4354            LiftoffRegister reg =
4355                slot.half_ == kLowWord ? src.reg().low() : src.reg().high();
4356            asm_->push(reg.gp());
4357          } break;
4358          case kI32:
4359          case kRef:
4360          case kOptRef:
4361            asm_->push(src.reg().gp());
4362            break;
4363          case kF32:
4364            asm_->vpush(liftoff::GetFloatRegister(src.reg().fp()));
4365            break;
4366          case kF64:
4367            asm_->vpush(src.reg().fp());
4368            break;
4369          case kS128:
4370            asm_->vpush(liftoff::GetSimd128Register(src.reg()));
4371            break;
4372          default:
4373            UNREACHABLE();
4374        }
4375        break;
4376      }
4377      case LiftoffAssembler::VarState::kIntConst: {
4378        asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4379        DCHECK(src.kind() == kI32 || src.kind() == kI64);
4380        UseScratchRegisterScope temps(asm_);
4381        Register scratch = temps.Acquire();
4382        // The high word is the sign extension of the low word.
4383        asm_->mov(scratch,
4384                  Operand(slot.half_ == kLowWord ? src.i32_const()
4385                                                 : src.i32_const() >> 31));
4386        asm_->push(scratch);
4387        break;
4388      }
4389    }
4390  }
4391}
4392
4393}  // namespace wasm
4394}  // namespace internal
4395}  // namespace v8
4396
4397#endif  // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
4398