1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/base/numbers/double.h"
6 #include "src/codegen/arm/constants-arm.h"
7 #include "src/codegen/assembler-inl.h"
8 #include "src/codegen/macro-assembler.h"
9 #include "src/codegen/optimized-compilation-info.h"
10 #include "src/compiler/backend/code-generator-impl.h"
11 #include "src/compiler/backend/code-generator.h"
12 #include "src/compiler/backend/gap-resolver.h"
13 #include "src/compiler/backend/instruction-codes.h"
14 #include "src/compiler/node-matchers.h"
15 #include "src/compiler/osr.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/utils/boxed-float.h"
18 
19 #if V8_ENABLE_WEBASSEMBLY
20 #include "src/wasm/wasm-code-manager.h"
21 #include "src/wasm/wasm-objects.h"
22 #endif  // V8_ENABLE_WEBASSEMBLY
23 
24 namespace v8 {
25 namespace internal {
26 namespace compiler {
27 
28 #define __ tasm()->
29 
30 // Adds Arm-specific methods to convert InstructionOperands.
31 class ArmOperandConverter final : public InstructionOperandConverter {
32  public:
ArmOperandConverter(CodeGenerator* gen, Instruction* instr)33   ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
34       : InstructionOperandConverter(gen, instr) {}
35 
OutputSBit() const36   SBit OutputSBit() const {
37     switch (instr_->flags_mode()) {
38       case kFlags_branch:
39       case kFlags_deoptimize:
40       case kFlags_set:
41       case kFlags_trap:
42       case kFlags_select:
43         return SetCC;
44       case kFlags_none:
45         return LeaveCC;
46     }
47     UNREACHABLE();
48   }
49 
InputImmediate(size_t index) const50   Operand InputImmediate(size_t index) const {
51     return ToImmediate(instr_->InputAt(index));
52   }
53 
InputOperand2(size_t first_index)54   Operand InputOperand2(size_t first_index) {
55     const size_t index = first_index;
56     switch (AddressingModeField::decode(instr_->opcode())) {
57       case kMode_None:
58       case kMode_Offset_RI:
59       case kMode_Offset_RR:
60       case kMode_Root:
61         break;
62       case kMode_Operand2_I:
63         return InputImmediate(index + 0);
64       case kMode_Operand2_R:
65         return Operand(InputRegister(index + 0));
66       case kMode_Operand2_R_ASR_I:
67         return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
68       case kMode_Operand2_R_ASR_R:
69         return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
70       case kMode_Operand2_R_LSL_I:
71         return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
72       case kMode_Operand2_R_LSL_R:
73         return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
74       case kMode_Operand2_R_LSR_I:
75         return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
76       case kMode_Operand2_R_LSR_R:
77         return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
78       case kMode_Operand2_R_ROR_I:
79         return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
80       case kMode_Operand2_R_ROR_R:
81         return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
82     }
83     UNREACHABLE();
84   }
85 
InputOffset(size_t* first_index)86   MemOperand InputOffset(size_t* first_index) {
87     const size_t index = *first_index;
88     switch (AddressingModeField::decode(instr_->opcode())) {
89       case kMode_None:
90       case kMode_Operand2_I:
91       case kMode_Operand2_R:
92       case kMode_Operand2_R_ASR_I:
93       case kMode_Operand2_R_ASR_R:
94       case kMode_Operand2_R_LSL_R:
95       case kMode_Operand2_R_LSR_I:
96       case kMode_Operand2_R_LSR_R:
97       case kMode_Operand2_R_ROR_I:
98       case kMode_Operand2_R_ROR_R:
99         break;
100       case kMode_Operand2_R_LSL_I:
101         *first_index += 3;
102         return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
103                           LSL, InputInt32(index + 2));
104       case kMode_Offset_RI:
105         *first_index += 2;
106         return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
107       case kMode_Offset_RR:
108         *first_index += 2;
109         return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
110       case kMode_Root:
111         *first_index += 1;
112         return MemOperand(kRootRegister, InputInt32(index));
113     }
114     UNREACHABLE();
115   }
116 
InputOffset(size_t first_index = 0)117   MemOperand InputOffset(size_t first_index = 0) {
118     return InputOffset(&first_index);
119   }
120 
ToImmediate(InstructionOperand* operand) const121   Operand ToImmediate(InstructionOperand* operand) const {
122     Constant constant = ToConstant(operand);
123     switch (constant.type()) {
124       case Constant::kInt32:
125 #if V8_ENABLE_WEBASSEMBLY
126         if (RelocInfo::IsWasmReference(constant.rmode())) {
127           return Operand(constant.ToInt32(), constant.rmode());
128         }
129 #endif  // V8_ENABLE_WEBASSEMBLY
130         return Operand(constant.ToInt32());
131       case Constant::kFloat32:
132         return Operand::EmbeddedNumber(constant.ToFloat32());
133       case Constant::kFloat64:
134         return Operand::EmbeddedNumber(constant.ToFloat64().value());
135       case Constant::kExternalReference:
136         return Operand(constant.ToExternalReference());
137       case Constant::kDelayedStringConstant:
138         return Operand::EmbeddedStringConstant(
139             constant.ToDelayedStringConstant());
140       case Constant::kInt64:
141       case Constant::kCompressedHeapObject:
142       case Constant::kHeapObject:
143       // TODO(dcarney): loading RPO constants on arm.
144       case Constant::kRpoNumber:
145         break;
146     }
147     UNREACHABLE();
148   }
149 
ToMemOperand(InstructionOperand* op) const150   MemOperand ToMemOperand(InstructionOperand* op) const {
151     DCHECK_NOT_NULL(op);
152     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
153     return SlotToMemOperand(AllocatedOperand::cast(op)->index());
154   }
155 
SlotToMemOperand(int slot) const156   MemOperand SlotToMemOperand(int slot) const {
157     FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
158     return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
159   }
160 
NeonInputOperand(size_t first_index)161   NeonMemOperand NeonInputOperand(size_t first_index) {
162     const size_t index = first_index;
163     switch (AddressingModeField::decode(instr_->opcode())) {
164       case kMode_Operand2_R:
165         return NeonMemOperand(InputRegister(index + 0));
166       default:
167         break;
168     }
169     UNREACHABLE();
170   }
171 };
172 
173 namespace {
174 
175 class OutOfLineRecordWrite final : public OutOfLineCode {
176  public:
OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset, Register value, RecordWriteMode mode, StubCallMode stub_mode, UnwindingInfoWriter* unwinding_info_writer)177   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
178                        Register value, RecordWriteMode mode,
179                        StubCallMode stub_mode,
180                        UnwindingInfoWriter* unwinding_info_writer)
181       : OutOfLineCode(gen),
182         object_(object),
183         offset_(offset),
184         value_(value),
185         mode_(mode),
186 #if V8_ENABLE_WEBASSEMBLY
187         stub_mode_(stub_mode),
188 #endif  // V8_ENABLE_WEBASSEMBLY
189         must_save_lr_(!gen->frame_access_state()->has_frame()),
190         unwinding_info_writer_(unwinding_info_writer),
191         zone_(gen->zone()) {
192   }
193 
194   void Generate() final {
195     __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq,
196                      exit());
197     RememberedSetAction const remembered_set_action =
198         mode_ > RecordWriteMode::kValueIsMap ||
199                 FLAG_use_full_record_write_builtin
200             ? RememberedSetAction::kEmit
201             : RememberedSetAction::kOmit;
202     SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
203                                             ? SaveFPRegsMode::kSave
204                                             : SaveFPRegsMode::kIgnore;
205     if (must_save_lr_) {
206       // We need to save and restore lr if the frame was elided.
207       __ Push(lr);
208       unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
209     }
210     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
211       __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
212 #if V8_ENABLE_WEBASSEMBLY
213     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
214       __ CallRecordWriteStubSaveRegisters(object_, offset_,
215                                           remembered_set_action, save_fp_mode,
216                                           StubCallMode::kCallWasmRuntimeStub);
217 #endif  // V8_ENABLE_WEBASSEMBLY
218     } else {
219       __ CallRecordWriteStubSaveRegisters(object_, offset_,
220                                           remembered_set_action, save_fp_mode);
221     }
222     if (must_save_lr_) {
223       __ Pop(lr);
224       unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
225     }
226   }
227 
228  private:
229   Register const object_;
230   Operand const offset_;
231   Register const value_;
232   RecordWriteMode const mode_;
233 #if V8_ENABLE_WEBASSEMBLY
234   StubCallMode stub_mode_;
235 #endif  // V8_ENABLE_WEBASSEMBLY
236   bool must_save_lr_;
237   UnwindingInfoWriter* const unwinding_info_writer_;
238   Zone* zone_;
239 };
240 
241 template <typename T>
242 class OutOfLineFloatMin final : public OutOfLineCode {
243  public:
OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)244   OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
245       : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
246 
247   void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
248 
249  private:
250   T const result_;
251   T const left_;
252   T const right_;
253 };
254 using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>;
255 using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>;
256 
257 template <typename T>
258 class OutOfLineFloatMax final : public OutOfLineCode {
259  public:
OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)260   OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
261       : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
262 
263   void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
264 
265  private:
266   T const result_;
267   T const left_;
268   T const right_;
269 };
270 using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>;
271 using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>;
272 
FlagsConditionToCondition(FlagsCondition condition)273 Condition FlagsConditionToCondition(FlagsCondition condition) {
274   switch (condition) {
275     case kEqual:
276       return eq;
277     case kNotEqual:
278       return ne;
279     case kSignedLessThan:
280       return lt;
281     case kSignedGreaterThanOrEqual:
282       return ge;
283     case kSignedLessThanOrEqual:
284       return le;
285     case kSignedGreaterThan:
286       return gt;
287     case kUnsignedLessThan:
288       return lo;
289     case kUnsignedGreaterThanOrEqual:
290       return hs;
291     case kUnsignedLessThanOrEqual:
292       return ls;
293     case kUnsignedGreaterThan:
294       return hi;
295     case kFloatLessThanOrUnordered:
296       return lt;
297     case kFloatGreaterThanOrEqual:
298       return ge;
299     case kFloatLessThanOrEqual:
300       return ls;
301     case kFloatGreaterThanOrUnordered:
302       return hi;
303     case kFloatLessThan:
304       return lo;
305     case kFloatGreaterThanOrEqualOrUnordered:
306       return hs;
307     case kFloatLessThanOrEqualOrUnordered:
308       return le;
309     case kFloatGreaterThan:
310       return gt;
311     case kOverflow:
312       return vs;
313     case kNotOverflow:
314       return vc;
315     case kPositiveOrZero:
316       return pl;
317     case kNegative:
318       return mi;
319     default:
320       break;
321   }
322   UNREACHABLE();
323 }
324 
325 }  // namespace
326 
327 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)                       \
328   do {                                                                \
329     __ asm_instr(i.OutputRegister(),                                  \
330                  MemOperand(i.InputRegister(0), i.InputRegister(1))); \
331     __ dmb(ISH);                                                      \
332   } while (0)
333 
334 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, order)   \
335   do {                                                    \
336     __ dmb(ISH);                                          \
337     __ asm_instr(i.InputRegister(0), i.InputOffset(1));   \
338     if (order == AtomicMemoryOrder::kSeqCst) __ dmb(ISH); \
339   } while (0)
340 
341 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)             \
342   do {                                                                        \
343     Label exchange;                                                           \
344     __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));        \
345     __ dmb(ISH);                                                              \
346     __ bind(&exchange);                                                       \
347     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
348     __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
349     __ teq(i.TempRegister(0), Operand(0));                                    \
350     __ b(ne, &exchange);                                                      \
351     __ dmb(ISH);                                                              \
352   } while (0)
353 
354 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr,     \
355                                                  cmp_reg)                     \
356   do {                                                                        \
357     Label compareExchange;                                                    \
358     Label exit;                                                               \
359     __ dmb(ISH);                                                              \
360     __ bind(&compareExchange);                                                \
361     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
362     __ teq(cmp_reg, Operand(i.OutputRegister(0)));                            \
363     __ b(ne, &exit);                                                          \
364     __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
365     __ teq(i.TempRegister(0), Operand(0));                                    \
366     __ b(ne, &compareExchange);                                               \
367     __ bind(&exit);                                                           \
368     __ dmb(ISH);                                                              \
369   } while (0)
370 
371 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr)            \
372   do {                                                                       \
373     Label binop;                                                             \
374     __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));       \
375     __ dmb(ISH);                                                             \
376     __ bind(&binop);                                                         \
377     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                   \
378     __ bin_instr(i.TempRegister(0), i.OutputRegister(0),                     \
379                  Operand(i.InputRegister(2)));                               \
380     __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
381     __ teq(i.TempRegister(2), Operand(0));                                   \
382     __ b(ne, &binop);                                                        \
383     __ dmb(ISH);                                                             \
384   } while (0)
385 
386 #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2)                  \
387   do {                                                                 \
388     Label binop;                                                       \
389     __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
390     __ dmb(ISH);                                                       \
391     __ bind(&binop);                                                   \
392     __ ldrexd(r2, r3, i.TempRegister(0));                              \
393     __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \
394     __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1)));     \
395     DCHECK_EQ(LeaveCC, i.OutputSBit());                                \
396     __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
397               i.TempRegister(0));                                      \
398     __ teq(i.TempRegister(3), Operand(0));                             \
399     __ b(ne, &binop);                                                  \
400     __ dmb(ISH);                                                       \
401   } while (0)
402 
403 #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr)                           \
404   do {                                                                 \
405     Label binop;                                                       \
406     __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
407     __ dmb(ISH);                                                       \
408     __ bind(&binop);                                                   \
409     __ ldrexd(r2, r3, i.TempRegister(0));                              \
410     __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0)));      \
411     __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1)));      \
412     __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
413               i.TempRegister(0));                                      \
414     __ teq(i.TempRegister(3), Operand(0));                             \
415     __ b(ne, &binop);                                                  \
416     __ dmb(ISH);                                                       \
417   } while (0)
418 
419 #define ASSEMBLE_IEEE754_BINOP(name)                                           \
420   do {                                                                         \
421     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
422     /* and generate a CallAddress instruction instead. */                      \
423     FrameScope scope(tasm(), StackFrame::MANUAL);                              \
424     __ PrepareCallCFunction(0, 2);                                             \
425     __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
426                             i.InputDoubleRegister(1));                         \
427     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2);    \
428     /* Move the result in the double result register. */                       \
429     __ MovFromFloatResult(i.OutputDoubleRegister());                           \
430     DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
431   } while (0)
432 
433 #define ASSEMBLE_IEEE754_UNOP(name)                                            \
434   do {                                                                         \
435     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
436     /* and generate a CallAddress instruction instead. */                      \
437     FrameScope scope(tasm(), StackFrame::MANUAL);                              \
438     __ PrepareCallCFunction(0, 1);                                             \
439     __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
440     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
441     /* Move the result in the double result register. */                       \
442     __ MovFromFloatResult(i.OutputDoubleRegister());                           \
443     DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
444   } while (0)
445 
446 #define ASSEMBLE_NEON_NARROWING_OP(dt, sdt)           \
447   do {                                                \
448     Simd128Register dst = i.OutputSimd128Register(),  \
449                     src0 = i.InputSimd128Register(0), \
450                     src1 = i.InputSimd128Register(1); \
451     if (dst == src0 && dst == src1) {                 \
452       __ vqmovn(dt, sdt, dst.low(), src0);            \
453       __ vmov(dst.high(), dst.low());                 \
454     } else if (dst == src0) {                         \
455       __ vqmovn(dt, sdt, dst.low(), src0);            \
456       __ vqmovn(dt, sdt, dst.high(), src1);           \
457     } else {                                          \
458       __ vqmovn(dt, sdt, dst.high(), src1);           \
459       __ vqmovn(dt, sdt, dst.low(), src0);            \
460     }                                                 \
461   } while (0)
462 
463 #define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)                                   \
464   do {                                                                        \
465     __ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(),   \
466           i.InputSimd128Register(1).low());                                   \
467     __ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \
468           i.InputSimd128Register(1).high());                                  \
469   } while (0)
470 
471 // If shift value is an immediate, we can call asm_imm, taking the shift value
472 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
473 // call vshl.
474 #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \
475   do {                                                   \
476     QwNeonRegister dst = i.OutputSimd128Register();      \
477     QwNeonRegister src = i.InputSimd128Register(0);      \
478     if (instr->InputAt(1)->IsImmediate()) {              \
479       __ asm_imm(dt, dst, src, i.InputInt##width(1));    \
480     } else {                                             \
481       UseScratchRegisterScope temps(tasm());             \
482       Simd128Register tmp = temps.AcquireQ();            \
483       Register shift = temps.Acquire();                  \
484       constexpr int mask = (1 << width) - 1;             \
485       __ and_(shift, i.InputRegister(1), Operand(mask)); \
486       __ vdup(sz, tmp, shift);                           \
487       __ vshl(dt, dst, src, tmp);                        \
488     }                                                    \
489   } while (0)
490 
491 // If shift value is an immediate, we can call asm_imm, taking the shift value
492 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
493 // call vshl, passing in the negative shift value (treated as a right shift).
494 #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \
495   do {                                                    \
496     QwNeonRegister dst = i.OutputSimd128Register();       \
497     QwNeonRegister src = i.InputSimd128Register(0);       \
498     if (instr->InputAt(1)->IsImmediate()) {               \
499       __ asm_imm(dt, dst, src, i.InputInt##width(1));     \
500     } else {                                              \
501       UseScratchRegisterScope temps(tasm());              \
502       Simd128Register tmp = temps.AcquireQ();             \
503       Register shift = temps.Acquire();                   \
504       constexpr int mask = (1 << width) - 1;              \
505       __ and_(shift, i.InputRegister(1), Operand(mask));  \
506       __ vdup(sz, tmp, shift);                            \
507       __ vneg(sz, tmp, tmp);                              \
508       __ vshl(dt, dst, src, tmp);                         \
509     }                                                     \
510   } while (0)
511 
AssembleDeconstructFrame()512 void CodeGenerator::AssembleDeconstructFrame() {
513   __ LeaveFrame(StackFrame::MANUAL);
514   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
515 }
516 
AssemblePrepareTailCall()517 void CodeGenerator::AssemblePrepareTailCall() {
518   if (frame_access_state()->has_frame()) {
519     __ ldm(ia, fp, {lr, fp});
520   }
521   frame_access_state()->SetFrameAccessToSP();
522 }
523 
524 namespace {
525 
FlushPendingPushRegisters(TurboAssembler* tasm, FrameAccessState* frame_access_state, ZoneVector<Register>* pending_pushes)526 void FlushPendingPushRegisters(TurboAssembler* tasm,
527                                FrameAccessState* frame_access_state,
528                                ZoneVector<Register>* pending_pushes) {
529   switch (pending_pushes->size()) {
530     case 0:
531       break;
532     case 1:
533       tasm->push((*pending_pushes)[0]);
534       break;
535     case 2:
536       tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
537       break;
538     case 3:
539       tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
540                  (*pending_pushes)[2]);
541       break;
542     default:
543       UNREACHABLE();
544   }
545   frame_access_state->IncreaseSPDelta(pending_pushes->size());
546   pending_pushes->clear();
547 }
548 
AdjustStackPointerForTailCall( TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp, ZoneVector<Register>* pending_pushes = nullptr, bool allow_shrinkage = true)549 void AdjustStackPointerForTailCall(
550     TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
551     ZoneVector<Register>* pending_pushes = nullptr,
552     bool allow_shrinkage = true) {
553   int current_sp_offset = state->GetSPToFPSlotCount() +
554                           StandardFrameConstants::kFixedSlotCountAboveFp;
555   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
556   if (stack_slot_delta > 0) {
557     if (pending_pushes != nullptr) {
558       FlushPendingPushRegisters(tasm, state, pending_pushes);
559     }
560     tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
561     state->IncreaseSPDelta(stack_slot_delta);
562   } else if (allow_shrinkage && stack_slot_delta < 0) {
563     if (pending_pushes != nullptr) {
564       FlushPendingPushRegisters(tasm, state, pending_pushes);
565     }
566     tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
567     state->IncreaseSPDelta(stack_slot_delta);
568   }
569 }
570 
571 #if DEBUG
VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter, const Instruction* instr, Register low, Register high)572 bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
573                                    const Instruction* instr, Register low,
574                                    Register high) {
575   DCHECK_GE(instr->OutputCount() + instr->TempCount(), 2);
576   if (instr->OutputCount() == 2) {
577     return (converter->OutputRegister(0) == low &&
578             converter->OutputRegister(1) == high);
579   }
580   if (instr->OutputCount() == 1) {
581     return (converter->OutputRegister(0) == low &&
582             converter->TempRegister(instr->TempCount() - 1) == high) ||
583            (converter->OutputRegister(0) == high &&
584             converter->TempRegister(instr->TempCount() - 1) == low);
585   }
586   DCHECK_EQ(instr->OutputCount(), 0);
587   return (converter->TempRegister(instr->TempCount() - 2) == low &&
588           converter->TempRegister(instr->TempCount() - 1) == high);
589 }
590 #endif
591 
592 }  // namespace
593 
AssembleTailCallBeforeGap(Instruction* instr, int first_unused_slot_offset)594 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
595                                               int first_unused_slot_offset) {
596   ZoneVector<MoveOperands*> pushes(zone());
597   GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
598 
599   if (!pushes.empty() &&
600       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
601        first_unused_slot_offset)) {
602     ArmOperandConverter g(this, instr);
603     ZoneVector<Register> pending_pushes(zone());
604     for (auto move : pushes) {
605       LocationOperand destination_location(
606           LocationOperand::cast(move->destination()));
607       InstructionOperand source(move->source());
608       AdjustStackPointerForTailCall(
609           tasm(), frame_access_state(),
610           destination_location.index() - pending_pushes.size(),
611           &pending_pushes);
612       // Pushes of non-register data types are not supported.
613       DCHECK(source.IsRegister());
614       LocationOperand source_location(LocationOperand::cast(source));
615       pending_pushes.push_back(source_location.GetRegister());
616       // TODO(arm): We can push more than 3 registers at once. Add support in
617       // the macro-assembler for pushing a list of registers.
618       if (pending_pushes.size() == 3) {
619         FlushPendingPushRegisters(tasm(), frame_access_state(),
620                                   &pending_pushes);
621       }
622       move->Eliminate();
623     }
624     FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
625   }
626   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
627                                 first_unused_slot_offset, nullptr, false);
628 }
629 
AssembleTailCallAfterGap(Instruction* instr, int first_unused_slot_offset)630 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
631                                              int first_unused_slot_offset) {
632   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
633                                 first_unused_slot_offset);
634 }
635 
636 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()637 void CodeGenerator::AssembleCodeStartRegisterCheck() {
638   UseScratchRegisterScope temps(tasm());
639   Register scratch = temps.Acquire();
640   __ ComputeCodeStartAddress(scratch);
641   __ cmp(scratch, kJavaScriptCallCodeStartRegister);
642   __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
643 }
644 
645 // Check if the code object is marked for deoptimization. If it is, then it
646 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
647 // to:
648 //    1. read from memory the word that contains that bit, which can be found in
649 //       the flags in the referenced {CodeDataContainer} object;
650 //    2. test kMarkedForDeoptimizationBit in those flags; and
651 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()652 void CodeGenerator::BailoutIfDeoptimized() {
653   UseScratchRegisterScope temps(tasm());
654   Register scratch = temps.Acquire();
655   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
656   __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
657   __ ldr(scratch,
658          FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
659   __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
660   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
661           RelocInfo::CODE_TARGET, ne);
662 }
663 
664 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction( Instruction* instr)665 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
666     Instruction* instr) {
667   ArmOperandConverter i(this, instr);
668 
669   __ MaybeCheckConstPool();
670   InstructionCode opcode = instr->opcode();
671   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
672   switch (arch_opcode) {
673     case kArchCallCodeObject: {
674       if (instr->InputAt(0)->IsImmediate()) {
675         __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
676       } else {
677         Register reg = i.InputRegister(0);
678         DCHECK_IMPLIES(
679             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
680             reg == kJavaScriptCallCodeStartRegister);
681         __ CallCodeObject(reg);
682       }
683       RecordCallPosition(instr);
684       DCHECK_EQ(LeaveCC, i.OutputSBit());
685       frame_access_state()->ClearSPDelta();
686       break;
687     }
688     case kArchCallBuiltinPointer: {
689       DCHECK(!instr->InputAt(0)->IsImmediate());
690       Register builtin_index = i.InputRegister(0);
691       __ CallBuiltinByIndex(builtin_index);
692       RecordCallPosition(instr);
693       frame_access_state()->ClearSPDelta();
694       break;
695     }
696 #if V8_ENABLE_WEBASSEMBLY
697     case kArchCallWasmFunction: {
698       if (instr->InputAt(0)->IsImmediate()) {
699         Constant constant = i.ToConstant(instr->InputAt(0));
700         Address wasm_code = static_cast<Address>(constant.ToInt32());
701         __ Call(wasm_code, constant.rmode());
702       } else {
703         __ Call(i.InputRegister(0));
704       }
705       RecordCallPosition(instr);
706       DCHECK_EQ(LeaveCC, i.OutputSBit());
707       frame_access_state()->ClearSPDelta();
708       break;
709     }
710     case kArchTailCallWasm: {
711       if (instr->InputAt(0)->IsImmediate()) {
712         Constant constant = i.ToConstant(instr->InputAt(0));
713         Address wasm_code = static_cast<Address>(constant.ToInt32());
714         __ Jump(wasm_code, constant.rmode());
715       } else {
716         __ Jump(i.InputRegister(0));
717       }
718       DCHECK_EQ(LeaveCC, i.OutputSBit());
719       unwinding_info_writer_.MarkBlockWillExit();
720       frame_access_state()->ClearSPDelta();
721       frame_access_state()->SetFrameAccessToDefault();
722       break;
723     }
724 #endif  // V8_ENABLE_WEBASSEMBLY
725     case kArchTailCallCodeObject: {
726       if (instr->InputAt(0)->IsImmediate()) {
727         __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
728       } else {
729         Register reg = i.InputRegister(0);
730         DCHECK_IMPLIES(
731             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
732             reg == kJavaScriptCallCodeStartRegister);
733         __ JumpCodeObject(reg);
734       }
735       DCHECK_EQ(LeaveCC, i.OutputSBit());
736       unwinding_info_writer_.MarkBlockWillExit();
737       frame_access_state()->ClearSPDelta();
738       frame_access_state()->SetFrameAccessToDefault();
739       break;
740     }
741     case kArchTailCallAddress: {
742       CHECK(!instr->InputAt(0)->IsImmediate());
743       Register reg = i.InputRegister(0);
744       DCHECK_IMPLIES(
745           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
746           reg == kJavaScriptCallCodeStartRegister);
747       __ Jump(reg);
748       unwinding_info_writer_.MarkBlockWillExit();
749       frame_access_state()->ClearSPDelta();
750       frame_access_state()->SetFrameAccessToDefault();
751       break;
752     }
753     case kArchCallJSFunction: {
754       Register func = i.InputRegister(0);
755       if (FLAG_debug_code) {
756         UseScratchRegisterScope temps(tasm());
757         Register scratch = temps.Acquire();
758         // Check the function's context matches the context argument.
759         __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
760         __ cmp(cp, scratch);
761         __ Assert(eq, AbortReason::kWrongFunctionContext);
762       }
763       static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
764       __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
765       __ CallCodeObject(r2);
766       RecordCallPosition(instr);
767       DCHECK_EQ(LeaveCC, i.OutputSBit());
768       frame_access_state()->ClearSPDelta();
769       break;
770     }
771     case kArchPrepareCallCFunction: {
772       int const num_gp_parameters = ParamField::decode(instr->opcode());
773       int const num_fp_parameters = FPParamField::decode(instr->opcode());
774       __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters);
775       // Frame alignment requires using FP-relative frame addressing.
776       frame_access_state()->SetFrameAccessToFP();
777       break;
778     }
779     case kArchSaveCallerRegisters: {
780       fp_mode_ =
781           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
782       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
783              fp_mode_ == SaveFPRegsMode::kSave);
784       // kReturnRegister0 should have been saved before entering the stub.
785       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
786       DCHECK(IsAligned(bytes, kSystemPointerSize));
787       DCHECK_EQ(0, frame_access_state()->sp_delta());
788       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
789       DCHECK(!caller_registers_saved_);
790       caller_registers_saved_ = true;
791       break;
792     }
793     case kArchRestoreCallerRegisters: {
794       DCHECK(fp_mode_ ==
795              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
796       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
797              fp_mode_ == SaveFPRegsMode::kSave);
798       // Don't overwrite the returned value.
799       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
800       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
801       DCHECK_EQ(0, frame_access_state()->sp_delta());
802       DCHECK(caller_registers_saved_);
803       caller_registers_saved_ = false;
804       break;
805     }
806     case kArchPrepareTailCall:
807       AssemblePrepareTailCall();
808       break;
809     case kArchCallCFunction: {
810       int const num_parameters = MiscField::decode(instr->opcode());
811 #if V8_ENABLE_WEBASSEMBLY
812       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
813         // Put the current address in a stack slot, and record a safepoint on
814         // the same address. In most architectures, we record the address after
815         // the function call, but this works too as long as the address in the
816         // frame and safepoint table match.
817         __ str(pc, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
818         // In Arm, the pc points two instructions after the currently executing
819         // instruction: see https://bit.ly/3CD80OA. To line up the safepoint
820         // address with the stored pc, we add a nop here.
821         __ nop();
822         RecordSafepoint(instr->reference_map());
823       }
824 #endif  // V8_ENABLE_WEBASSEMBLY
825       if (instr->InputAt(0)->IsImmediate()) {
826         ExternalReference ref = i.InputExternalReference(0);
827         __ CallCFunction(ref, num_parameters);
828       } else {
829         Register func = i.InputRegister(0);
830         __ CallCFunction(func, num_parameters);
831       }
832       frame_access_state()->SetFrameAccessToDefault();
833       // Ideally, we should decrement SP delta to match the change of stack
834       // pointer in CallCFunction. However, for certain architectures (e.g.
835       // ARM), there may be more strict alignment requirement, causing old SP
836       // to be saved on the stack. In those cases, we can not calculate the SP
837       // delta statically.
838       frame_access_state()->ClearSPDelta();
839       if (caller_registers_saved_) {
840         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
841         // Here, we assume the sequence to be:
842         //   kArchSaveCallerRegisters;
843         //   kArchCallCFunction;
844         //   kArchRestoreCallerRegisters;
845         int bytes =
846             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
847         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
848       }
849       break;
850     }
851     case kArchJmp:
852       AssembleArchJump(i.InputRpo(0));
853       DCHECK_EQ(LeaveCC, i.OutputSBit());
854       break;
855     case kArchBinarySearchSwitch:
856       AssembleArchBinarySearchSwitch(instr);
857       break;
858     case kArchTableSwitch:
859       AssembleArchTableSwitch(instr);
860       DCHECK_EQ(LeaveCC, i.OutputSBit());
861       break;
862     case kArchAbortCSADcheck:
863       DCHECK(i.InputRegister(0) == r1);
864       {
865         // We don't actually want to generate a pile of code for this, so just
866         // claim there is a stack frame, without generating one.
867         FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
868         __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
869                 RelocInfo::CODE_TARGET);
870       }
871       __ stop();
872       unwinding_info_writer_.MarkBlockWillExit();
873       break;
874     case kArchDebugBreak:
875       __ DebugBreak();
876       break;
877     case kArchComment:
878       __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
879       break;
880     case kArchThrowTerminator:
881       DCHECK_EQ(LeaveCC, i.OutputSBit());
882       unwinding_info_writer_.MarkBlockWillExit();
883       break;
884     case kArchNop:
885       // don't emit code for nops.
886       DCHECK_EQ(LeaveCC, i.OutputSBit());
887       break;
888     case kArchDeoptimize: {
889       DeoptimizationExit* exit =
890           BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
891       __ b(exit->label());
892       break;
893     }
894     case kArchRet:
895       AssembleReturn(instr->InputAt(0));
896       DCHECK_EQ(LeaveCC, i.OutputSBit());
897       break;
898     case kArchFramePointer:
899       __ mov(i.OutputRegister(), fp);
900       DCHECK_EQ(LeaveCC, i.OutputSBit());
901       break;
902     case kArchParentFramePointer:
903       if (frame_access_state()->has_frame()) {
904         __ ldr(i.OutputRegister(), MemOperand(fp, 0));
905       } else {
906         __ mov(i.OutputRegister(), fp);
907       }
908       break;
909     case kArchStackPointerGreaterThan: {
910       // Potentially apply an offset to the current stack pointer before the
911       // comparison to consider the size difference of an optimized frame versus
912       // the contained unoptimized frames.
913 
914       Register lhs_register = sp;
915       uint32_t offset;
916 
917       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
918         lhs_register = i.TempRegister(0);
919         __ sub(lhs_register, sp, Operand(offset));
920       }
921 
922       constexpr size_t kValueIndex = 0;
923       DCHECK(instr->InputAt(kValueIndex)->IsRegister());
924       __ cmp(lhs_register, i.InputRegister(kValueIndex));
925       break;
926     }
927     case kArchStackCheckOffset:
928       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
929       break;
930     case kArchTruncateDoubleToI:
931       __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
932                            i.InputDoubleRegister(0), DetermineStubCallMode());
933       DCHECK_EQ(LeaveCC, i.OutputSBit());
934       break;
935     case kArchStoreWithWriteBarrier:  // Fall through.
936     case kArchAtomicStoreWithWriteBarrier: {
937       RecordWriteMode mode;
938       if (arch_opcode == kArchStoreWithWriteBarrier) {
939         mode = static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
940       } else {
941         mode = AtomicStoreRecordWriteModeField::decode(instr->opcode());
942       }
943       Register object = i.InputRegister(0);
944       Register value = i.InputRegister(2);
945 
946       if (FLAG_debug_code) {
947         // Checking that |value| is not a cleared weakref: our write barrier
948         // does not support that for now.
949         __ cmp(value, Operand(kClearedWeakHeapObjectLower32));
950         __ Check(ne, AbortReason::kOperandIsCleared);
951       }
952 
953       AddressingMode addressing_mode =
954           AddressingModeField::decode(instr->opcode());
955       Operand offset(0);
956 
957       if (arch_opcode == kArchAtomicStoreWithWriteBarrier) {
958         __ dmb(ISH);
959       }
960       if (addressing_mode == kMode_Offset_RI) {
961         int32_t immediate = i.InputInt32(1);
962         offset = Operand(immediate);
963         __ str(value, MemOperand(object, immediate));
964       } else {
965         DCHECK_EQ(kMode_Offset_RR, addressing_mode);
966         Register reg = i.InputRegister(1);
967         offset = Operand(reg);
968         __ str(value, MemOperand(object, reg));
969       }
970       if (arch_opcode == kArchAtomicStoreWithWriteBarrier &&
971           AtomicMemoryOrderField::decode(instr->opcode()) ==
972               AtomicMemoryOrder::kSeqCst) {
973         __ dmb(ISH);
974       }
975 
976       auto ool = zone()->New<OutOfLineRecordWrite>(
977           this, object, offset, value, mode, DetermineStubCallMode(),
978           &unwinding_info_writer_);
979       if (mode > RecordWriteMode::kValueIsPointer) {
980         __ JumpIfSmi(value, ool->exit());
981       }
982       __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
983                        ne, ool->entry());
984       __ bind(ool->exit());
985       break;
986     }
987     case kArchStackSlot: {
988       FrameOffset offset =
989           frame_access_state()->GetFrameOffset(i.InputInt32(0));
990       Register base = offset.from_stack_pointer() ? sp : fp;
991       __ add(i.OutputRegister(0), base, Operand(offset.offset()));
992       break;
993     }
994     case kIeee754Float64Acos:
995       ASSEMBLE_IEEE754_UNOP(acos);
996       break;
997     case kIeee754Float64Acosh:
998       ASSEMBLE_IEEE754_UNOP(acosh);
999       break;
1000     case kIeee754Float64Asin:
1001       ASSEMBLE_IEEE754_UNOP(asin);
1002       break;
1003     case kIeee754Float64Asinh:
1004       ASSEMBLE_IEEE754_UNOP(asinh);
1005       break;
1006     case kIeee754Float64Atan:
1007       ASSEMBLE_IEEE754_UNOP(atan);
1008       break;
1009     case kIeee754Float64Atanh:
1010       ASSEMBLE_IEEE754_UNOP(atanh);
1011       break;
1012     case kIeee754Float64Atan2:
1013       ASSEMBLE_IEEE754_BINOP(atan2);
1014       break;
1015     case kIeee754Float64Cbrt:
1016       ASSEMBLE_IEEE754_UNOP(cbrt);
1017       break;
1018     case kIeee754Float64Cos:
1019       ASSEMBLE_IEEE754_UNOP(cos);
1020       break;
1021     case kIeee754Float64Cosh:
1022       ASSEMBLE_IEEE754_UNOP(cosh);
1023       break;
1024     case kIeee754Float64Exp:
1025       ASSEMBLE_IEEE754_UNOP(exp);
1026       break;
1027     case kIeee754Float64Expm1:
1028       ASSEMBLE_IEEE754_UNOP(expm1);
1029       break;
1030     case kIeee754Float64Log:
1031       ASSEMBLE_IEEE754_UNOP(log);
1032       break;
1033     case kIeee754Float64Log1p:
1034       ASSEMBLE_IEEE754_UNOP(log1p);
1035       break;
1036     case kIeee754Float64Log2:
1037       ASSEMBLE_IEEE754_UNOP(log2);
1038       break;
1039     case kIeee754Float64Log10:
1040       ASSEMBLE_IEEE754_UNOP(log10);
1041       break;
1042     case kIeee754Float64Pow:
1043       ASSEMBLE_IEEE754_BINOP(pow);
1044       break;
1045     case kIeee754Float64Sin:
1046       ASSEMBLE_IEEE754_UNOP(sin);
1047       break;
1048     case kIeee754Float64Sinh:
1049       ASSEMBLE_IEEE754_UNOP(sinh);
1050       break;
1051     case kIeee754Float64Tan:
1052       ASSEMBLE_IEEE754_UNOP(tan);
1053       break;
1054     case kIeee754Float64Tanh:
1055       ASSEMBLE_IEEE754_UNOP(tanh);
1056       break;
1057     case kArmAdd:
1058       __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1059              i.OutputSBit());
1060       break;
1061     case kArmAnd:
1062       __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1063               i.OutputSBit());
1064       break;
1065     case kArmBic:
1066       __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1067              i.OutputSBit());
1068       break;
1069     case kArmMul:
1070       __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1071              i.OutputSBit());
1072       break;
1073     case kArmMla:
1074       __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1075              i.InputRegister(2), i.OutputSBit());
1076       break;
1077     case kArmMls: {
1078       CpuFeatureScope scope(tasm(), ARMv7);
1079       __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1080              i.InputRegister(2));
1081       DCHECK_EQ(LeaveCC, i.OutputSBit());
1082       break;
1083     }
1084     case kArmSmull:
1085       __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1086                i.InputRegister(1));
1087       break;
1088     case kArmSmmul:
1089       __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1090       DCHECK_EQ(LeaveCC, i.OutputSBit());
1091       break;
1092     case kArmSmmla:
1093       __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1094                i.InputRegister(2));
1095       DCHECK_EQ(LeaveCC, i.OutputSBit());
1096       break;
1097     case kArmUmull:
1098       __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1099                i.InputRegister(1), i.OutputSBit());
1100       break;
1101     case kArmSdiv: {
1102       CpuFeatureScope scope(tasm(), SUDIV);
1103       __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1104       DCHECK_EQ(LeaveCC, i.OutputSBit());
1105       break;
1106     }
1107     case kArmUdiv: {
1108       CpuFeatureScope scope(tasm(), SUDIV);
1109       __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1110       DCHECK_EQ(LeaveCC, i.OutputSBit());
1111       break;
1112     }
1113     case kArmMov:
1114       __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1115       break;
1116     case kArmMvn:
1117       __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1118       break;
1119     case kArmOrr:
1120       __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1121              i.OutputSBit());
1122       break;
1123     case kArmEor:
1124       __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1125              i.OutputSBit());
1126       break;
1127     case kArmSub:
1128       __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1129              i.OutputSBit());
1130       break;
1131     case kArmRsb:
1132       __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1133              i.OutputSBit());
1134       break;
1135     case kArmBfc: {
1136       CpuFeatureScope scope(tasm(), ARMv7);
1137       __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1138       DCHECK_EQ(LeaveCC, i.OutputSBit());
1139       break;
1140     }
1141     case kArmUbfx: {
1142       CpuFeatureScope scope(tasm(), ARMv7);
1143       __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1144               i.InputInt8(2));
1145       DCHECK_EQ(LeaveCC, i.OutputSBit());
1146       break;
1147     }
1148     case kArmSbfx: {
1149       CpuFeatureScope scope(tasm(), ARMv7);
1150       __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1151               i.InputInt8(2));
1152       DCHECK_EQ(LeaveCC, i.OutputSBit());
1153       break;
1154     }
1155     case kArmSxtb:
1156       __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1157       DCHECK_EQ(LeaveCC, i.OutputSBit());
1158       break;
1159     case kArmSxth:
1160       __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1161       DCHECK_EQ(LeaveCC, i.OutputSBit());
1162       break;
1163     case kArmSxtab:
1164       __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1165                i.InputInt32(2));
1166       DCHECK_EQ(LeaveCC, i.OutputSBit());
1167       break;
1168     case kArmSxtah:
1169       __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1170                i.InputInt32(2));
1171       DCHECK_EQ(LeaveCC, i.OutputSBit());
1172       break;
1173     case kArmUxtb:
1174       __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1175       DCHECK_EQ(LeaveCC, i.OutputSBit());
1176       break;
1177     case kArmUxth:
1178       __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1179       DCHECK_EQ(LeaveCC, i.OutputSBit());
1180       break;
1181     case kArmUxtab:
1182       __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1183                i.InputInt32(2));
1184       DCHECK_EQ(LeaveCC, i.OutputSBit());
1185       break;
1186     case kArmUxtah:
1187       __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1188                i.InputInt32(2));
1189       DCHECK_EQ(LeaveCC, i.OutputSBit());
1190       break;
1191     case kArmRbit: {
1192       CpuFeatureScope scope(tasm(), ARMv7);
1193       __ rbit(i.OutputRegister(), i.InputRegister(0));
1194       DCHECK_EQ(LeaveCC, i.OutputSBit());
1195       break;
1196     }
1197     case kArmRev:
1198       __ rev(i.OutputRegister(), i.InputRegister(0));
1199       DCHECK_EQ(LeaveCC, i.OutputSBit());
1200       break;
1201     case kArmClz:
1202       __ clz(i.OutputRegister(), i.InputRegister(0));
1203       DCHECK_EQ(LeaveCC, i.OutputSBit());
1204       break;
1205     case kArmCmp:
1206       __ cmp(i.InputRegister(0), i.InputOperand2(1));
1207       DCHECK_EQ(SetCC, i.OutputSBit());
1208       break;
1209     case kArmCmn:
1210       __ cmn(i.InputRegister(0), i.InputOperand2(1));
1211       DCHECK_EQ(SetCC, i.OutputSBit());
1212       break;
1213     case kArmTst:
1214       __ tst(i.InputRegister(0), i.InputOperand2(1));
1215       DCHECK_EQ(SetCC, i.OutputSBit());
1216       break;
1217     case kArmTeq:
1218       __ teq(i.InputRegister(0), i.InputOperand2(1));
1219       DCHECK_EQ(SetCC, i.OutputSBit());
1220       break;
1221     case kArmAddPair:
1222       // i.InputRegister(0) ... left low word.
1223       // i.InputRegister(1) ... left high word.
1224       // i.InputRegister(2) ... right low word.
1225       // i.InputRegister(3) ... right high word.
1226       __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1227              SBit::SetCC);
1228       __ adc(i.OutputRegister(1), i.InputRegister(1),
1229              Operand(i.InputRegister(3)));
1230       DCHECK_EQ(LeaveCC, i.OutputSBit());
1231       break;
1232     case kArmSubPair:
1233       // i.InputRegister(0) ... left low word.
1234       // i.InputRegister(1) ... left high word.
1235       // i.InputRegister(2) ... right low word.
1236       // i.InputRegister(3) ... right high word.
1237       __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1238              SBit::SetCC);
1239       __ sbc(i.OutputRegister(1), i.InputRegister(1),
1240              Operand(i.InputRegister(3)));
1241       DCHECK_EQ(LeaveCC, i.OutputSBit());
1242       break;
1243     case kArmMulPair:
1244       // i.InputRegister(0) ... left low word.
1245       // i.InputRegister(1) ... left high word.
1246       // i.InputRegister(2) ... right low word.
1247       // i.InputRegister(3) ... right high word.
1248       __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1249                i.InputRegister(2));
1250       __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1251              i.OutputRegister(1));
1252       __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1253              i.OutputRegister(1));
1254       break;
1255     case kArmLslPair: {
1256       Register second_output =
1257           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1258       if (instr->InputAt(2)->IsImmediate()) {
1259         __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1260                    i.InputRegister(1), i.InputInt32(2));
1261       } else {
1262         __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1263                    i.InputRegister(1), i.InputRegister(2));
1264       }
1265       break;
1266     }
1267     case kArmLsrPair: {
1268       Register second_output =
1269           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1270       if (instr->InputAt(2)->IsImmediate()) {
1271         __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1272                    i.InputRegister(1), i.InputInt32(2));
1273       } else {
1274         __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1275                    i.InputRegister(1), i.InputRegister(2));
1276       }
1277       break;
1278     }
1279     case kArmAsrPair: {
1280       Register second_output =
1281           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1282       if (instr->InputAt(2)->IsImmediate()) {
1283         __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1284                    i.InputRegister(1), i.InputInt32(2));
1285       } else {
1286         __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1287                    i.InputRegister(1), i.InputRegister(2));
1288       }
1289       break;
1290     }
1291     case kArmVcmpF32:
1292       if (instr->InputAt(1)->IsFPRegister()) {
1293         __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1294                                  i.InputFloatRegister(1));
1295       } else {
1296         DCHECK(instr->InputAt(1)->IsImmediate());
1297         // 0.0 is the only immediate supported by vcmp instructions.
1298         DCHECK_EQ(0.0f, i.InputFloat32(1));
1299         __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1300       }
1301       DCHECK_EQ(SetCC, i.OutputSBit());
1302       break;
1303     case kArmVaddF32:
1304       __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1305               i.InputFloatRegister(1));
1306       DCHECK_EQ(LeaveCC, i.OutputSBit());
1307       break;
1308     case kArmVsubF32:
1309       __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1310               i.InputFloatRegister(1));
1311       DCHECK_EQ(LeaveCC, i.OutputSBit());
1312       break;
1313     case kArmVmulF32:
1314       __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1315               i.InputFloatRegister(1));
1316       DCHECK_EQ(LeaveCC, i.OutputSBit());
1317       break;
1318     case kArmVmlaF32:
1319       __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1320               i.InputFloatRegister(2));
1321       DCHECK_EQ(LeaveCC, i.OutputSBit());
1322       break;
1323     case kArmVmlsF32:
1324       __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1325               i.InputFloatRegister(2));
1326       DCHECK_EQ(LeaveCC, i.OutputSBit());
1327       break;
1328     case kArmVdivF32:
1329       __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1330               i.InputFloatRegister(1));
1331       DCHECK_EQ(LeaveCC, i.OutputSBit());
1332       break;
1333     case kArmVsqrtF32:
1334       __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1335       break;
1336     case kArmVabsF32:
1337       __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1338       break;
1339     case kArmVnegF32:
1340       __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1341       break;
1342     case kArmVcmpF64:
1343       if (instr->InputAt(1)->IsFPRegister()) {
1344         __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1345                                  i.InputDoubleRegister(1));
1346       } else {
1347         DCHECK(instr->InputAt(1)->IsImmediate());
1348         // 0.0 is the only immediate supported by vcmp instructions.
1349         DCHECK_EQ(0.0, i.InputDouble(1));
1350         __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1351       }
1352       DCHECK_EQ(SetCC, i.OutputSBit());
1353       break;
1354     case kArmVaddF64:
1355       __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1356               i.InputDoubleRegister(1));
1357       DCHECK_EQ(LeaveCC, i.OutputSBit());
1358       break;
1359     case kArmVsubF64:
1360       __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1361               i.InputDoubleRegister(1));
1362       DCHECK_EQ(LeaveCC, i.OutputSBit());
1363       break;
1364     case kArmVmulF64:
1365       __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1366               i.InputDoubleRegister(1));
1367       DCHECK_EQ(LeaveCC, i.OutputSBit());
1368       break;
1369     case kArmVmlaF64:
1370       __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1371               i.InputDoubleRegister(2));
1372       DCHECK_EQ(LeaveCC, i.OutputSBit());
1373       break;
1374     case kArmVmlsF64:
1375       __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1376               i.InputDoubleRegister(2));
1377       DCHECK_EQ(LeaveCC, i.OutputSBit());
1378       break;
1379     case kArmVdivF64:
1380       __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1381               i.InputDoubleRegister(1));
1382       DCHECK_EQ(LeaveCC, i.OutputSBit());
1383       break;
1384     case kArmVmodF64: {
1385       // TODO(bmeurer): We should really get rid of this special instruction,
1386       // and generate a CallAddress instruction instead.
1387       FrameScope scope(tasm(), StackFrame::MANUAL);
1388       __ PrepareCallCFunction(0, 2);
1389       __ MovToFloatParameters(i.InputDoubleRegister(0),
1390                               i.InputDoubleRegister(1));
1391       __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1392       // Move the result in the double result register.
1393       __ MovFromFloatResult(i.OutputDoubleRegister());
1394       DCHECK_EQ(LeaveCC, i.OutputSBit());
1395       break;
1396     }
1397     case kArmVsqrtF64:
1398       __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1399       break;
1400     case kArmVabsF64:
1401       __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1402       break;
1403     case kArmVnegF64:
1404       __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1405       break;
1406     case kArmVrintmF32: {
1407       CpuFeatureScope scope(tasm(), ARMv8);
1408       if (instr->InputAt(0)->IsSimd128Register()) {
1409         __ vrintm(NeonS32, i.OutputSimd128Register(),
1410                   i.InputSimd128Register(0));
1411       } else {
1412         __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1413       }
1414       break;
1415     }
1416     case kArmVrintmF64: {
1417       CpuFeatureScope scope(tasm(), ARMv8);
1418       __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1419       break;
1420     }
1421     case kArmVrintpF32: {
1422       CpuFeatureScope scope(tasm(), ARMv8);
1423       if (instr->InputAt(0)->IsSimd128Register()) {
1424         __ vrintp(NeonS32, i.OutputSimd128Register(),
1425                   i.InputSimd128Register(0));
1426       } else {
1427         __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1428       }
1429       break;
1430     }
1431     case kArmVrintpF64: {
1432       CpuFeatureScope scope(tasm(), ARMv8);
1433       __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1434       break;
1435     }
1436     case kArmVrintzF32: {
1437       CpuFeatureScope scope(tasm(), ARMv8);
1438       if (instr->InputAt(0)->IsSimd128Register()) {
1439         __ vrintz(NeonS32, i.OutputSimd128Register(),
1440                   i.InputSimd128Register(0));
1441       } else {
1442         __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1443       }
1444       break;
1445     }
1446     case kArmVrintzF64: {
1447       CpuFeatureScope scope(tasm(), ARMv8);
1448       __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1449       break;
1450     }
1451     case kArmVrintaF64: {
1452       CpuFeatureScope scope(tasm(), ARMv8);
1453       __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1454       break;
1455     }
1456     case kArmVrintnF32: {
1457       CpuFeatureScope scope(tasm(), ARMv8);
1458       if (instr->InputAt(0)->IsSimd128Register()) {
1459         __ vrintn(NeonS32, i.OutputSimd128Register(),
1460                   i.InputSimd128Register(0));
1461       } else {
1462         __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1463       }
1464       break;
1465     }
1466     case kArmVrintnF64: {
1467       CpuFeatureScope scope(tasm(), ARMv8);
1468       __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1469       break;
1470     }
1471     case kArmVcvtF32F64: {
1472       __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1473       DCHECK_EQ(LeaveCC, i.OutputSBit());
1474       break;
1475     }
1476     case kArmVcvtF64F32: {
1477       __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1478       DCHECK_EQ(LeaveCC, i.OutputSBit());
1479       break;
1480     }
1481     case kArmVcvtF32S32: {
1482       UseScratchRegisterScope temps(tasm());
1483       SwVfpRegister scratch = temps.AcquireS();
1484       __ vmov(scratch, i.InputRegister(0));
1485       __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1486       DCHECK_EQ(LeaveCC, i.OutputSBit());
1487       break;
1488     }
1489     case kArmVcvtF32U32: {
1490       UseScratchRegisterScope temps(tasm());
1491       SwVfpRegister scratch = temps.AcquireS();
1492       __ vmov(scratch, i.InputRegister(0));
1493       __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1494       DCHECK_EQ(LeaveCC, i.OutputSBit());
1495       break;
1496     }
1497     case kArmVcvtF64S32: {
1498       UseScratchRegisterScope temps(tasm());
1499       SwVfpRegister scratch = temps.AcquireS();
1500       __ vmov(scratch, i.InputRegister(0));
1501       __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1502       DCHECK_EQ(LeaveCC, i.OutputSBit());
1503       break;
1504     }
1505     case kArmVcvtF64U32: {
1506       UseScratchRegisterScope temps(tasm());
1507       SwVfpRegister scratch = temps.AcquireS();
1508       __ vmov(scratch, i.InputRegister(0));
1509       __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1510       DCHECK_EQ(LeaveCC, i.OutputSBit());
1511       break;
1512     }
1513     case kArmVcvtS32F32: {
1514       UseScratchRegisterScope temps(tasm());
1515       SwVfpRegister scratch = temps.AcquireS();
1516       __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1517       __ vmov(i.OutputRegister(), scratch);
1518       bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1519       if (set_overflow_to_min_i32) {
1520         // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1521         // because INT32_MIN allows easier out-of-bounds detection.
1522         __ cmn(i.OutputRegister(), Operand(1));
1523         __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
1524       }
1525       DCHECK_EQ(LeaveCC, i.OutputSBit());
1526       break;
1527     }
1528     case kArmVcvtU32F32: {
1529       UseScratchRegisterScope temps(tasm());
1530       SwVfpRegister scratch = temps.AcquireS();
1531       __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1532       __ vmov(i.OutputRegister(), scratch);
1533       bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1534       if (set_overflow_to_min_u32) {
1535         // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1536         // because 0 allows easier out-of-bounds detection.
1537         __ cmn(i.OutputRegister(), Operand(1));
1538         __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1539       }
1540       DCHECK_EQ(LeaveCC, i.OutputSBit());
1541       break;
1542     }
1543     case kArmVcvtS32F64: {
1544       UseScratchRegisterScope temps(tasm());
1545       SwVfpRegister scratch = temps.AcquireS();
1546       __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1547       __ vmov(i.OutputRegister(), scratch);
1548       DCHECK_EQ(LeaveCC, i.OutputSBit());
1549       break;
1550     }
1551     case kArmVcvtU32F64: {
1552       UseScratchRegisterScope temps(tasm());
1553       SwVfpRegister scratch = temps.AcquireS();
1554       __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1555       __ vmov(i.OutputRegister(), scratch);
1556       DCHECK_EQ(LeaveCC, i.OutputSBit());
1557       break;
1558     }
1559     case kArmVmovU32F32:
1560       __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1561       DCHECK_EQ(LeaveCC, i.OutputSBit());
1562       break;
1563     case kArmVmovF32U32:
1564       __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1565       DCHECK_EQ(LeaveCC, i.OutputSBit());
1566       break;
1567     case kArmVmovLowU32F64:
1568       __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1569       DCHECK_EQ(LeaveCC, i.OutputSBit());
1570       break;
1571     case kArmVmovLowF64U32:
1572       __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1573       DCHECK_EQ(LeaveCC, i.OutputSBit());
1574       break;
1575     case kArmVmovHighU32F64:
1576       __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1577       DCHECK_EQ(LeaveCC, i.OutputSBit());
1578       break;
1579     case kArmVmovHighF64U32:
1580       __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1581       DCHECK_EQ(LeaveCC, i.OutputSBit());
1582       break;
1583     case kArmVmovF64U32U32:
1584       __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1585       DCHECK_EQ(LeaveCC, i.OutputSBit());
1586       break;
1587     case kArmVmovU32U32F64:
1588       __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1589               i.InputDoubleRegister(0));
1590       DCHECK_EQ(LeaveCC, i.OutputSBit());
1591       break;
1592     case kArmVcnt: {
1593       __ vcnt(i.OutputSimd128Register(), i.InputSimd128Register(0));
1594       break;
1595     }
1596     case kArmLdrb:
1597       __ ldrb(i.OutputRegister(), i.InputOffset());
1598       DCHECK_EQ(LeaveCC, i.OutputSBit());
1599       break;
1600     case kArmLdrsb:
1601       __ ldrsb(i.OutputRegister(), i.InputOffset());
1602       DCHECK_EQ(LeaveCC, i.OutputSBit());
1603       break;
1604     case kArmStrb:
1605       __ strb(i.InputRegister(0), i.InputOffset(1));
1606       DCHECK_EQ(LeaveCC, i.OutputSBit());
1607       break;
1608     case kArmLdrh:
1609       __ ldrh(i.OutputRegister(), i.InputOffset());
1610       break;
1611     case kArmLdrsh:
1612       __ ldrsh(i.OutputRegister(), i.InputOffset());
1613       break;
1614     case kArmStrh:
1615       __ strh(i.InputRegister(0), i.InputOffset(1));
1616       DCHECK_EQ(LeaveCC, i.OutputSBit());
1617       break;
1618     case kArmLdr:
1619       __ ldr(i.OutputRegister(), i.InputOffset());
1620       break;
1621     case kArmStr:
1622       __ str(i.InputRegister(0), i.InputOffset(1));
1623       DCHECK_EQ(LeaveCC, i.OutputSBit());
1624       break;
1625     case kArmVldrF32: {
1626       __ vldr(i.OutputFloatRegister(), i.InputOffset());
1627       DCHECK_EQ(LeaveCC, i.OutputSBit());
1628       break;
1629     }
1630     case kArmVstrF32:
1631       __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1632       DCHECK_EQ(LeaveCC, i.OutputSBit());
1633       break;
1634     case kArmVld1F64: {
1635       __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1636               i.NeonInputOperand(0));
1637       break;
1638     }
1639     case kArmVst1F64: {
1640       __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1641               i.NeonInputOperand(1));
1642       break;
1643     }
1644     case kArmVld1S128: {
1645       __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1646               i.NeonInputOperand(0));
1647       break;
1648     }
1649     case kArmVst1S128: {
1650       __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1651               i.NeonInputOperand(1));
1652       break;
1653     }
1654     case kArmVldrF64: {
1655       __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1656       DCHECK_EQ(LeaveCC, i.OutputSBit());
1657       break;
1658     }
1659     case kArmVstrF64:
1660       __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1661       DCHECK_EQ(LeaveCC, i.OutputSBit());
1662       break;
1663     case kArmFloat32Max: {
1664       SwVfpRegister result = i.OutputFloatRegister();
1665       SwVfpRegister left = i.InputFloatRegister(0);
1666       SwVfpRegister right = i.InputFloatRegister(1);
1667       if (left == right) {
1668         __ Move(result, left);
1669       } else {
1670         auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right);
1671         __ FloatMax(result, left, right, ool->entry());
1672         __ bind(ool->exit());
1673       }
1674       DCHECK_EQ(LeaveCC, i.OutputSBit());
1675       break;
1676     }
1677     case kArmFloat64Max: {
1678       DwVfpRegister result = i.OutputDoubleRegister();
1679       DwVfpRegister left = i.InputDoubleRegister(0);
1680       DwVfpRegister right = i.InputDoubleRegister(1);
1681       if (left == right) {
1682         __ Move(result, left);
1683       } else {
1684         auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right);
1685         __ FloatMax(result, left, right, ool->entry());
1686         __ bind(ool->exit());
1687       }
1688       DCHECK_EQ(LeaveCC, i.OutputSBit());
1689       break;
1690     }
1691     case kArmFloat32Min: {
1692       SwVfpRegister result = i.OutputFloatRegister();
1693       SwVfpRegister left = i.InputFloatRegister(0);
1694       SwVfpRegister right = i.InputFloatRegister(1);
1695       if (left == right) {
1696         __ Move(result, left);
1697       } else {
1698         auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right);
1699         __ FloatMin(result, left, right, ool->entry());
1700         __ bind(ool->exit());
1701       }
1702       DCHECK_EQ(LeaveCC, i.OutputSBit());
1703       break;
1704     }
1705     case kArmFloat64Min: {
1706       DwVfpRegister result = i.OutputDoubleRegister();
1707       DwVfpRegister left = i.InputDoubleRegister(0);
1708       DwVfpRegister right = i.InputDoubleRegister(1);
1709       if (left == right) {
1710         __ Move(result, left);
1711       } else {
1712         auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right);
1713         __ FloatMin(result, left, right, ool->entry());
1714         __ bind(ool->exit());
1715       }
1716       DCHECK_EQ(LeaveCC, i.OutputSBit());
1717       break;
1718     }
1719     case kArmFloat64SilenceNaN: {
1720       DwVfpRegister value = i.InputDoubleRegister(0);
1721       DwVfpRegister result = i.OutputDoubleRegister();
1722       __ VFPCanonicalizeNaN(result, value);
1723       break;
1724     }
1725     case kArmPush: {
1726       int stack_decrement = i.InputInt32(0);
1727       int slots = stack_decrement / kSystemPointerSize;
1728       LocationOperand* op = LocationOperand::cast(instr->InputAt(1));
1729       MachineRepresentation rep = op->representation();
1730       int pushed_slots = ElementSizeInPointers(rep);
1731       // Slot-sized arguments are never padded but there may be a gap if
1732       // the slot allocator reclaimed other padding slots. Adjust the stack
1733       // here to skip any gap.
1734       __ AllocateStackSpace((slots - pushed_slots) * kSystemPointerSize);
1735       switch (rep) {
1736         case MachineRepresentation::kFloat32:
1737           __ vpush(i.InputFloatRegister(1));
1738           break;
1739         case MachineRepresentation::kFloat64:
1740           __ vpush(i.InputDoubleRegister(1));
1741           break;
1742         case MachineRepresentation::kSimd128:
1743           __ vpush(i.InputSimd128Register(1));
1744           break;
1745         default:
1746           __ push(i.InputRegister(1));
1747           break;
1748       }
1749       frame_access_state()->IncreaseSPDelta(slots);
1750       DCHECK_EQ(LeaveCC, i.OutputSBit());
1751       break;
1752     }
1753     case kArmPoke: {
1754       int const slot = MiscField::decode(instr->opcode());
1755       __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize));
1756       DCHECK_EQ(LeaveCC, i.OutputSBit());
1757       break;
1758     }
1759     case kArmPeek: {
1760       int reverse_slot = i.InputInt32(0);
1761       int offset =
1762           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1763       if (instr->OutputAt(0)->IsFPRegister()) {
1764         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1765         if (op->representation() == MachineRepresentation::kFloat64) {
1766           __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1767         } else if (op->representation() == MachineRepresentation::kFloat32) {
1768           __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1769         } else {
1770           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1771           UseScratchRegisterScope temps(tasm());
1772           Register scratch = temps.Acquire();
1773           __ add(scratch, fp, Operand(offset));
1774           __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1775                   NeonMemOperand(scratch));
1776         }
1777       } else {
1778         __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1779       }
1780       break;
1781     }
1782     case kArmDmbIsh: {
1783       __ dmb(ISH);
1784       break;
1785     }
1786     case kArmDsbIsb: {
1787       __ dsb(SY);
1788       __ isb(SY);
1789       break;
1790     }
1791     case kArmVmullLow: {
1792       auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1793       __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).low(),
1794                i.InputSimd128Register(1).low());
1795       break;
1796     }
1797     case kArmVmullHigh: {
1798       auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1799       __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).high(),
1800                i.InputSimd128Register(1).high());
1801       break;
1802     }
1803     case kArmVpadal: {
1804       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1805       auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1806       __ vpadal(dt, i.OutputSimd128Register(), i.InputSimd128Register(1));
1807       break;
1808     }
1809     case kArmVpaddl: {
1810       auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1811       __ vpaddl(dt, i.OutputSimd128Register(), i.InputSimd128Register(0));
1812       break;
1813     }
1814     case kArmF64x2Splat: {
1815       Simd128Register dst = i.OutputSimd128Register();
1816       DoubleRegister src = i.InputDoubleRegister(0);
1817       __ Move(dst.low(), src);
1818       __ Move(dst.high(), src);
1819       break;
1820     }
1821     case kArmF64x2ExtractLane: {
1822       __ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
1823                      i.InputInt8(1));
1824       break;
1825     }
1826     case kArmF64x2ReplaceLane: {
1827       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1828                      i.InputDoubleRegister(2), i.InputInt8(1));
1829       break;
1830     }
1831     case kArmF64x2Abs: {
1832       __ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1833       __ vabs(i.OutputSimd128Register().high(),
1834               i.InputSimd128Register(0).high());
1835       break;
1836     }
1837     case kArmF64x2Neg: {
1838       __ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1839       __ vneg(i.OutputSimd128Register().high(),
1840               i.InputSimd128Register(0).high());
1841       break;
1842     }
1843     case kArmF64x2Sqrt: {
1844       __ vsqrt(i.OutputSimd128Register().low(),
1845                i.InputSimd128Register(0).low());
1846       __ vsqrt(i.OutputSimd128Register().high(),
1847                i.InputSimd128Register(0).high());
1848       break;
1849     }
1850     case kArmF64x2Add: {
1851       ASSEMBLE_F64X2_ARITHMETIC_BINOP(vadd);
1852       break;
1853     }
1854     case kArmF64x2Sub: {
1855       ASSEMBLE_F64X2_ARITHMETIC_BINOP(vsub);
1856       break;
1857     }
1858     case kArmF64x2Mul: {
1859       ASSEMBLE_F64X2_ARITHMETIC_BINOP(vmul);
1860       break;
1861     }
1862     case kArmF64x2Div: {
1863       ASSEMBLE_F64X2_ARITHMETIC_BINOP(vdiv);
1864       break;
1865     }
1866     case kArmF64x2Min: {
1867       Simd128Register result = i.OutputSimd128Register();
1868       Simd128Register left = i.InputSimd128Register(0);
1869       Simd128Register right = i.InputSimd128Register(1);
1870       if (left == right) {
1871         __ Move(result, left);
1872       } else {
1873         auto ool_low = zone()->New<OutOfLineFloat64Min>(
1874             this, result.low(), left.low(), right.low());
1875         auto ool_high = zone()->New<OutOfLineFloat64Min>(
1876             this, result.high(), left.high(), right.high());
1877         __ FloatMin(result.low(), left.low(), right.low(), ool_low->entry());
1878         __ bind(ool_low->exit());
1879         __ FloatMin(result.high(), left.high(), right.high(),
1880                     ool_high->entry());
1881         __ bind(ool_high->exit());
1882       }
1883       DCHECK_EQ(LeaveCC, i.OutputSBit());
1884       break;
1885     }
1886     case kArmF64x2Max: {
1887       Simd128Register result = i.OutputSimd128Register();
1888       Simd128Register left = i.InputSimd128Register(0);
1889       Simd128Register right = i.InputSimd128Register(1);
1890       if (left == right) {
1891         __ Move(result, left);
1892       } else {
1893         auto ool_low = zone()->New<OutOfLineFloat64Max>(
1894             this, result.low(), left.low(), right.low());
1895         auto ool_high = zone()->New<OutOfLineFloat64Max>(
1896             this, result.high(), left.high(), right.high());
1897         __ FloatMax(result.low(), left.low(), right.low(), ool_low->entry());
1898         __ bind(ool_low->exit());
1899         __ FloatMax(result.high(), left.high(), right.high(),
1900                     ool_high->entry());
1901         __ bind(ool_high->exit());
1902       }
1903       DCHECK_EQ(LeaveCC, i.OutputSBit());
1904       break;
1905     }
1906 #undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
1907     case kArmF64x2Eq: {
1908       UseScratchRegisterScope temps(tasm());
1909       Register scratch = temps.Acquire();
1910       __ mov(scratch, Operand(0));
1911       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1912                                i.InputSimd128Register(1).low());
1913       __ mov(scratch, Operand(-1), LeaveCC, eq);
1914       __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1915 
1916       __ mov(scratch, Operand(0));
1917       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1918                                i.InputSimd128Register(1).high());
1919       __ mov(scratch, Operand(-1), LeaveCC, eq);
1920       __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1921       break;
1922     }
1923     case kArmF64x2Ne: {
1924       UseScratchRegisterScope temps(tasm());
1925       Register scratch = temps.Acquire();
1926       __ mov(scratch, Operand(0));
1927       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1928                                i.InputSimd128Register(1).low());
1929       __ mov(scratch, Operand(-1), LeaveCC, ne);
1930       __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1931 
1932       __ mov(scratch, Operand(0));
1933       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1934                                i.InputSimd128Register(1).high());
1935       __ mov(scratch, Operand(-1), LeaveCC, ne);
1936       __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1937       break;
1938     }
1939     case kArmF64x2Lt: {
1940       UseScratchRegisterScope temps(tasm());
1941       Register scratch = temps.Acquire();
1942       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1943                                i.InputSimd128Register(1).low());
1944       __ mov(scratch, Operand(0), LeaveCC, cs);
1945       __ mov(scratch, Operand(-1), LeaveCC, mi);
1946       __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1947 
1948       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1949                                i.InputSimd128Register(1).high());
1950       __ mov(scratch, Operand(0), LeaveCC, cs);
1951       __ mov(scratch, Operand(-1), LeaveCC, mi);
1952       __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1953       break;
1954     }
1955     case kArmF64x2Le: {
1956       UseScratchRegisterScope temps(tasm());
1957       Register scratch = temps.Acquire();
1958       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1959                                i.InputSimd128Register(1).low());
1960       __ mov(scratch, Operand(0), LeaveCC, hi);
1961       __ mov(scratch, Operand(-1), LeaveCC, ls);
1962       __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1963 
1964       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1965                                i.InputSimd128Register(1).high());
1966       __ mov(scratch, Operand(0), LeaveCC, hi);
1967       __ mov(scratch, Operand(-1), LeaveCC, ls);
1968       __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1969       break;
1970     }
1971     case kArmF64x2Pmin: {
1972       Simd128Register dst = i.OutputSimd128Register();
1973       Simd128Register lhs = i.InputSimd128Register(0);
1974       Simd128Register rhs = i.InputSimd128Register(1);
1975       DCHECK_EQ(dst, lhs);
1976 
1977       // Move rhs only when rhs is strictly lesser (mi).
1978       __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
1979       __ vmov(dst.low(), rhs.low(), mi);
1980       __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
1981       __ vmov(dst.high(), rhs.high(), mi);
1982       break;
1983     }
1984     case kArmF64x2Pmax: {
1985       Simd128Register dst = i.OutputSimd128Register();
1986       Simd128Register lhs = i.InputSimd128Register(0);
1987       Simd128Register rhs = i.InputSimd128Register(1);
1988       DCHECK_EQ(dst, lhs);
1989 
1990       // Move rhs only when rhs is strictly greater (gt).
1991       __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
1992       __ vmov(dst.low(), rhs.low(), gt);
1993       __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
1994       __ vmov(dst.high(), rhs.high(), gt);
1995       break;
1996     }
1997     case kArmF64x2Ceil: {
1998       CpuFeatureScope scope(tasm(), ARMv8);
1999       Simd128Register dst = i.OutputSimd128Register();
2000       Simd128Register src = i.InputSimd128Register(0);
2001       __ vrintp(dst.low(), src.low());
2002       __ vrintp(dst.high(), src.high());
2003       break;
2004     }
2005     case kArmF64x2Floor: {
2006       CpuFeatureScope scope(tasm(), ARMv8);
2007       Simd128Register dst = i.OutputSimd128Register();
2008       Simd128Register src = i.InputSimd128Register(0);
2009       __ vrintm(dst.low(), src.low());
2010       __ vrintm(dst.high(), src.high());
2011       break;
2012     }
2013     case kArmF64x2Trunc: {
2014       CpuFeatureScope scope(tasm(), ARMv8);
2015       Simd128Register dst = i.OutputSimd128Register();
2016       Simd128Register src = i.InputSimd128Register(0);
2017       __ vrintz(dst.low(), src.low());
2018       __ vrintz(dst.high(), src.high());
2019       break;
2020     }
2021     case kArmF64x2NearestInt: {
2022       CpuFeatureScope scope(tasm(), ARMv8);
2023       Simd128Register dst = i.OutputSimd128Register();
2024       Simd128Register src = i.InputSimd128Register(0);
2025       __ vrintn(dst.low(), src.low());
2026       __ vrintn(dst.high(), src.high());
2027       break;
2028     }
2029     case kArmF64x2ConvertLowI32x4S: {
2030       __ F64x2ConvertLowI32x4S(i.OutputSimd128Register(),
2031                                i.InputSimd128Register(0));
2032       break;
2033     }
2034     case kArmF64x2ConvertLowI32x4U: {
2035       __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2036                                i.InputSimd128Register(0));
2037       break;
2038     }
2039     case kArmF64x2PromoteLowF32x4: {
2040       __ F64x2PromoteLowF32x4(i.OutputSimd128Register(),
2041                               i.InputSimd128Register(0));
2042       break;
2043     }
2044     case kArmI64x2SplatI32Pair: {
2045       Simd128Register dst = i.OutputSimd128Register();
2046       __ vdup(Neon32, dst, i.InputRegister(0));
2047       __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
2048       __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
2049       break;
2050     }
2051     case kArmI64x2ReplaceLaneI32Pair: {
2052       Simd128Register dst = i.OutputSimd128Register();
2053       int8_t lane = i.InputInt8(1);
2054       __ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
2055       __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
2056       break;
2057     }
2058     case kArmI64x2Add: {
2059       __ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2060               i.InputSimd128Register(1));
2061       break;
2062     }
2063     case kArmI64x2Sub: {
2064       __ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2065               i.InputSimd128Register(1));
2066       break;
2067     }
2068     case kArmI64x2Mul: {
2069       UseScratchRegisterScope temps(tasm());
2070       QwNeonRegister dst = i.OutputSimd128Register();
2071       QwNeonRegister left = i.InputSimd128Register(0);
2072       QwNeonRegister right = i.InputSimd128Register(1);
2073       QwNeonRegister tmp1 = i.TempSimd128Register(0);
2074       QwNeonRegister tmp2 = temps.AcquireQ();
2075 
2076       // This algorithm uses vector operations to perform 64-bit integer
2077       // multiplication by splitting it into a high and low 32-bit integers.
2078       // The tricky part is getting the low and high integers in the correct
2079       // place inside a NEON register, so that we can use as little vmull and
2080       // vmlal as possible.
2081 
2082       // Move left and right into temporaries, they will be modified by vtrn.
2083       __ vmov(tmp1, left);
2084       __ vmov(tmp2, right);
2085 
2086       // This diagram shows how the 64-bit integers fit into NEON registers.
2087       //
2088       //             [q.high()| q.low()]
2089       // left/tmp1:  [ a3, a2 | a1, a0 ]
2090       // right/tmp2: [ b3, b2 | b1, b0 ]
2091       //
2092       // We want to multiply the low 32 bits of left with high 32 bits of right,
2093       // for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d
2094       // registers, and multiply the corresponding low/high 32 bits, to get a
2095       // 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose
2096       // the vectors, so that we get the low 32 bits of each 64-bit integer into
2097       // the same lane, similarly for high 32 bits.
2098       __ vtrn(Neon32, tmp1.low(), tmp1.high());
2099       // tmp1: [ a3, a1 | a2, a0 ]
2100       __ vtrn(Neon32, tmp2.low(), tmp2.high());
2101       // tmp2: [ b3, b1 | b2, b0 ]
2102 
2103       __ vmull(NeonU32, dst, tmp1.low(), tmp2.high());
2104       // dst: [ a2*b3 | a0*b1 ]
2105       __ vmlal(NeonU32, dst, tmp1.high(), tmp2.low());
2106       // dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ]
2107       __ vshl(NeonU64, dst, dst, 32);
2108       // dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ]
2109 
2110       __ vmlal(NeonU32, dst, tmp1.low(), tmp2.low());
2111       // dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ]
2112       break;
2113     }
2114     case kArmI64x2Abs: {
2115       __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2116       break;
2117     }
2118     case kArmI64x2Neg: {
2119       Simd128Register dst = i.OutputSimd128Register();
2120       __ vmov(dst, uint64_t{0});
2121       __ vsub(Neon64, dst, dst, i.InputSimd128Register(0));
2122       break;
2123     }
2124     case kArmI64x2Shl: {
2125       ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 6, Neon32, NeonS64);
2126       break;
2127     }
2128     case kArmI64x2ShrS: {
2129       // Only the least significant byte of each lane is used, so we can use
2130       // Neon32 as the size.
2131       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonS64);
2132       break;
2133     }
2134     case kArmI64x2ShrU: {
2135       // Only the least significant byte of each lane is used, so we can use
2136       // Neon32 as the size.
2137       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
2138       break;
2139     }
2140     case kArmI64x2BitMask: {
2141       __ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0));
2142       break;
2143     }
2144     case kArmI64x2SConvertI32x4Low: {
2145       __ vmovl(NeonS32, i.OutputSimd128Register(),
2146                i.InputSimd128Register(0).low());
2147       break;
2148     }
2149     case kArmI64x2SConvertI32x4High: {
2150       __ vmovl(NeonS32, i.OutputSimd128Register(),
2151                i.InputSimd128Register(0).high());
2152       break;
2153     }
2154     case kArmI64x2UConvertI32x4Low: {
2155       __ vmovl(NeonU32, i.OutputSimd128Register(),
2156                i.InputSimd128Register(0).low());
2157       break;
2158     }
2159     case kArmI64x2UConvertI32x4High: {
2160       __ vmovl(NeonU32, i.OutputSimd128Register(),
2161                i.InputSimd128Register(0).high());
2162       break;
2163     }
2164     case kArmF32x4Splat: {
2165       int src_code = i.InputFloatRegister(0).code();
2166       __ vdup(Neon32, i.OutputSimd128Register(),
2167               DwVfpRegister::from_code(src_code / 2), src_code % 2);
2168       break;
2169     }
2170     case kArmF32x4ExtractLane: {
2171       __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2172                      i.InputInt8(1));
2173       break;
2174     }
2175     case kArmF32x4ReplaceLane: {
2176       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2177                      i.InputFloatRegister(2), i.InputInt8(1));
2178       break;
2179     }
2180     case kArmF32x4SConvertI32x4: {
2181       __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2182       break;
2183     }
2184     case kArmF32x4UConvertI32x4: {
2185       __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2186       break;
2187     }
2188     case kArmF32x4Abs: {
2189       __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2190       break;
2191     }
2192     case kArmF32x4Neg: {
2193       __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
2194       break;
2195     }
2196     case kArmF32x4Sqrt: {
2197       QwNeonRegister dst = i.OutputSimd128Register();
2198       QwNeonRegister src1 = i.InputSimd128Register(0);
2199       DCHECK_EQ(dst, q0);
2200       DCHECK_EQ(src1, q0);
2201 #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2202       __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
2203       __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
2204       __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
2205       __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
2206 #undef S_FROM_Q
2207       break;
2208     }
2209     case kArmF32x4RecipApprox: {
2210       __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
2211       break;
2212     }
2213     case kArmF32x4RecipSqrtApprox: {
2214       __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
2215       break;
2216     }
2217     case kArmF32x4Add: {
2218       __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2219               i.InputSimd128Register(1));
2220       break;
2221     }
2222     case kArmF32x4Sub: {
2223       __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2224               i.InputSimd128Register(1));
2225       break;
2226     }
2227     case kArmF32x4Mul: {
2228       __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2229               i.InputSimd128Register(1));
2230       break;
2231     }
2232     case kArmF32x4Div: {
2233       QwNeonRegister dst = i.OutputSimd128Register();
2234       QwNeonRegister src1 = i.InputSimd128Register(0);
2235       QwNeonRegister src2 = i.InputSimd128Register(1);
2236       DCHECK_EQ(dst, q0);
2237       DCHECK_EQ(src1, q0);
2238       DCHECK_EQ(src2, q1);
2239 #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2240       __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
2241       __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
2242       __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
2243       __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
2244 #undef S_FROM_Q
2245       break;
2246     }
2247     case kArmF32x4Min: {
2248       __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
2249               i.InputSimd128Register(1));
2250       break;
2251     }
2252     case kArmF32x4Max: {
2253       __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
2254               i.InputSimd128Register(1));
2255       break;
2256     }
2257     case kArmF32x4Eq: {
2258       __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2259               i.InputSimd128Register(1));
2260       break;
2261     }
2262     case kArmF32x4Ne: {
2263       Simd128Register dst = i.OutputSimd128Register();
2264       __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2265       __ vmvn(dst, dst);
2266       break;
2267     }
2268     case kArmF32x4Lt: {
2269       __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
2270               i.InputSimd128Register(0));
2271       break;
2272     }
2273     case kArmF32x4Le: {
2274       __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
2275               i.InputSimd128Register(0));
2276       break;
2277     }
2278     case kArmF32x4Pmin: {
2279       Simd128Register dst = i.OutputSimd128Register();
2280       Simd128Register lhs = i.InputSimd128Register(0);
2281       Simd128Register rhs = i.InputSimd128Register(1);
2282       DCHECK_NE(dst, lhs);
2283       DCHECK_NE(dst, rhs);
2284 
2285       // f32x4.pmin(lhs, rhs)
2286       // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2287       // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2288       __ vcgt(dst, lhs, rhs);
2289       __ vbsl(dst, rhs, lhs);
2290       break;
2291     }
2292     case kArmF32x4Pmax: {
2293       Simd128Register dst = i.OutputSimd128Register();
2294       Simd128Register lhs = i.InputSimd128Register(0);
2295       Simd128Register rhs = i.InputSimd128Register(1);
2296       DCHECK_NE(dst, lhs);
2297       DCHECK_NE(dst, rhs);
2298 
2299       // f32x4.pmax(lhs, rhs)
2300       // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2301       __ vcgt(dst, rhs, lhs);
2302       __ vbsl(dst, rhs, lhs);
2303       break;
2304     }
2305     case kArmF32x4DemoteF64x2Zero: {
2306       Simd128Register dst = i.OutputSimd128Register();
2307       Simd128Register src = i.InputSimd128Register(0);
2308       __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2309       __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2310       __ vmov(dst.high(), 0);
2311       break;
2312     }
2313     case kArmI32x4Splat: {
2314       __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
2315       break;
2316     }
2317     case kArmI32x4ExtractLane: {
2318       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
2319                      i.InputInt8(1));
2320       break;
2321     }
2322     case kArmI32x4ReplaceLane: {
2323       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2324                      i.InputRegister(2), NeonS32, i.InputInt8(1));
2325       break;
2326     }
2327     case kArmI32x4SConvertF32x4: {
2328       __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2329       break;
2330     }
2331     case kArmI32x4SConvertI16x8Low: {
2332       __ vmovl(NeonS16, i.OutputSimd128Register(),
2333                i.InputSimd128Register(0).low());
2334       break;
2335     }
2336     case kArmI32x4SConvertI16x8High: {
2337       __ vmovl(NeonS16, i.OutputSimd128Register(),
2338                i.InputSimd128Register(0).high());
2339       break;
2340     }
2341     case kArmI32x4Neg: {
2342       __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2343       break;
2344     }
2345     case kArmI32x4Shl: {
2346       ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 5, Neon32, NeonS32);
2347       break;
2348     }
2349     case kArmI32x4ShrS: {
2350       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonS32);
2351       break;
2352     }
2353     case kArmI32x4Add: {
2354       __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2355               i.InputSimd128Register(1));
2356       break;
2357     }
2358     case kArmI32x4Sub: {
2359       __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2360               i.InputSimd128Register(1));
2361       break;
2362     }
2363     case kArmI32x4Mul: {
2364       __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2365               i.InputSimd128Register(1));
2366       break;
2367     }
2368     case kArmI32x4MinS: {
2369       __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2370               i.InputSimd128Register(1));
2371       break;
2372     }
2373     case kArmI32x4MaxS: {
2374       __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2375               i.InputSimd128Register(1));
2376       break;
2377     }
2378     case kArmI64x2Eq: {
2379       __ I64x2Eq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2380                  i.InputSimd128Register(1));
2381       break;
2382     }
2383     case kArmI64x2Ne: {
2384       __ I64x2Ne(i.OutputSimd128Register(), i.InputSimd128Register(0),
2385                  i.InputSimd128Register(1));
2386       break;
2387     }
2388     case kArmI64x2GtS: {
2389       __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2390                   i.InputSimd128Register(1));
2391       break;
2392     }
2393     case kArmI64x2GeS: {
2394       __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2395                   i.InputSimd128Register(1));
2396       break;
2397     }
2398     case kArmI32x4Eq: {
2399       __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2400               i.InputSimd128Register(1));
2401       break;
2402     }
2403     case kArmI32x4Ne: {
2404       Simd128Register dst = i.OutputSimd128Register();
2405       __ vceq(Neon32, dst, i.InputSimd128Register(0),
2406               i.InputSimd128Register(1));
2407       __ vmvn(dst, dst);
2408       break;
2409     }
2410     case kArmI32x4GtS: {
2411       __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2412               i.InputSimd128Register(1));
2413       break;
2414     }
2415     case kArmI32x4GeS: {
2416       __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2417               i.InputSimd128Register(1));
2418       break;
2419     }
2420     case kArmI32x4UConvertF32x4: {
2421       __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2422       break;
2423     }
2424     case kArmI32x4UConvertI16x8Low: {
2425       __ vmovl(NeonU16, i.OutputSimd128Register(),
2426                i.InputSimd128Register(0).low());
2427       break;
2428     }
2429     case kArmI32x4UConvertI16x8High: {
2430       __ vmovl(NeonU16, i.OutputSimd128Register(),
2431                i.InputSimd128Register(0).high());
2432       break;
2433     }
2434     case kArmI32x4ShrU: {
2435       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonU32);
2436       break;
2437     }
2438     case kArmI32x4MinU: {
2439       __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2440               i.InputSimd128Register(1));
2441       break;
2442     }
2443     case kArmI32x4MaxU: {
2444       __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2445               i.InputSimd128Register(1));
2446       break;
2447     }
2448     case kArmI32x4GtU: {
2449       __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2450               i.InputSimd128Register(1));
2451       break;
2452     }
2453     case kArmI32x4GeU: {
2454       __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2455               i.InputSimd128Register(1));
2456       break;
2457     }
2458     case kArmI32x4Abs: {
2459       __ vabs(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2460       break;
2461     }
2462     case kArmI32x4BitMask: {
2463       Register dst = i.OutputRegister();
2464       UseScratchRegisterScope temps(tasm());
2465       Simd128Register src = i.InputSimd128Register(0);
2466       Simd128Register tmp = temps.AcquireQ();
2467       Simd128Register mask = i.TempSimd128Register(0);
2468 
2469       __ vshr(NeonS32, tmp, src, 31);
2470       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2471       // are signed will have i-th bit set, unsigned will be 0.
2472       __ vmov(mask.low(), base::Double(uint64_t{0x0000'0002'0000'0001}));
2473       __ vmov(mask.high(), base::Double(uint64_t{0x0000'0008'0000'0004}));
2474       __ vand(tmp, mask, tmp);
2475       __ vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
2476       __ vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
2477       __ VmovLow(dst, tmp.low());
2478       break;
2479     }
2480     case kArmI32x4DotI16x8S: {
2481       Simd128Register dst = i.OutputSimd128Register();
2482       Simd128Register lhs = i.InputSimd128Register(0);
2483       Simd128Register rhs = i.InputSimd128Register(1);
2484       Simd128Register tmp1 = i.TempSimd128Register(0);
2485       UseScratchRegisterScope temps(tasm());
2486       Simd128Register scratch = temps.AcquireQ();
2487       __ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
2488       __ vmull(NeonS16, scratch, lhs.high(), rhs.high());
2489       __ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
2490       __ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
2491       break;
2492     }
2493     case kArmI32x4TruncSatF64x2SZero: {
2494       Simd128Register dst = i.OutputSimd128Register();
2495       Simd128Register src = i.InputSimd128Register(0);
2496       __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2497       __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2498       __ vmov(dst.high(), 0);
2499       break;
2500     }
2501     case kArmI32x4TruncSatF64x2UZero: {
2502       Simd128Register dst = i.OutputSimd128Register();
2503       Simd128Register src = i.InputSimd128Register(0);
2504       __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2505       __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2506       __ vmov(dst.high(), 0);
2507       break;
2508     }
2509     case kArmI16x8Splat: {
2510       __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2511       break;
2512     }
2513     case kArmI16x8ExtractLaneU: {
2514       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
2515                      i.InputInt8(1));
2516       break;
2517     }
2518     case kArmI16x8ExtractLaneS: {
2519       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2520                      i.InputInt8(1));
2521       break;
2522     }
2523     case kArmI16x8ReplaceLane: {
2524       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2525                      i.InputRegister(2), NeonS16, i.InputInt8(1));
2526       break;
2527     }
2528     case kArmI16x8SConvertI8x16Low: {
2529       __ vmovl(NeonS8, i.OutputSimd128Register(),
2530                i.InputSimd128Register(0).low());
2531       break;
2532     }
2533     case kArmI16x8SConvertI8x16High: {
2534       __ vmovl(NeonS8, i.OutputSimd128Register(),
2535                i.InputSimd128Register(0).high());
2536       break;
2537     }
2538     case kArmI16x8Neg: {
2539       __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2540       break;
2541     }
2542     case kArmI16x8Shl: {
2543       ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 4, Neon16, NeonS16);
2544       break;
2545     }
2546     case kArmI16x8ShrS: {
2547       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonS16);
2548       break;
2549     }
2550     case kArmI16x8SConvertI32x4:
2551       ASSEMBLE_NEON_NARROWING_OP(NeonS16, NeonS16);
2552       break;
2553     case kArmI16x8Add: {
2554       __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2555               i.InputSimd128Register(1));
2556       break;
2557     }
2558     case kArmI16x8AddSatS: {
2559       __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2560                i.InputSimd128Register(1));
2561       break;
2562     }
2563     case kArmI16x8Sub: {
2564       __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2565               i.InputSimd128Register(1));
2566       break;
2567     }
2568     case kArmI16x8SubSatS: {
2569       __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2570                i.InputSimd128Register(1));
2571       break;
2572     }
2573     case kArmI16x8Mul: {
2574       __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2575               i.InputSimd128Register(1));
2576       break;
2577     }
2578     case kArmI16x8MinS: {
2579       __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2580               i.InputSimd128Register(1));
2581       break;
2582     }
2583     case kArmI16x8MaxS: {
2584       __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2585               i.InputSimd128Register(1));
2586       break;
2587     }
2588     case kArmI16x8Eq: {
2589       __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2590               i.InputSimd128Register(1));
2591       break;
2592     }
2593     case kArmI16x8Ne: {
2594       Simd128Register dst = i.OutputSimd128Register();
2595       __ vceq(Neon16, dst, i.InputSimd128Register(0),
2596               i.InputSimd128Register(1));
2597       __ vmvn(dst, dst);
2598       break;
2599     }
2600     case kArmI16x8GtS: {
2601       __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2602               i.InputSimd128Register(1));
2603       break;
2604     }
2605     case kArmI16x8GeS: {
2606       __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2607               i.InputSimd128Register(1));
2608       break;
2609     }
2610     case kArmI16x8UConvertI8x16Low: {
2611       __ vmovl(NeonU8, i.OutputSimd128Register(),
2612                i.InputSimd128Register(0).low());
2613       break;
2614     }
2615     case kArmI16x8UConvertI8x16High: {
2616       __ vmovl(NeonU8, i.OutputSimd128Register(),
2617                i.InputSimd128Register(0).high());
2618       break;
2619     }
2620     case kArmI16x8ShrU: {
2621       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonU16);
2622       break;
2623     }
2624     case kArmI16x8UConvertI32x4:
2625       ASSEMBLE_NEON_NARROWING_OP(NeonU16, NeonS16);
2626       break;
2627     case kArmI16x8AddSatU: {
2628       __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2629                i.InputSimd128Register(1));
2630       break;
2631     }
2632     case kArmI16x8SubSatU: {
2633       __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2634                i.InputSimd128Register(1));
2635       break;
2636     }
2637     case kArmI16x8MinU: {
2638       __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2639               i.InputSimd128Register(1));
2640       break;
2641     }
2642     case kArmI16x8MaxU: {
2643       __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2644               i.InputSimd128Register(1));
2645       break;
2646     }
2647     case kArmI16x8GtU: {
2648       __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2649               i.InputSimd128Register(1));
2650       break;
2651     }
2652     case kArmI16x8GeU: {
2653       __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2654               i.InputSimd128Register(1));
2655       break;
2656     }
2657     case kArmI16x8RoundingAverageU: {
2658       __ vrhadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2659                 i.InputSimd128Register(1));
2660       break;
2661     }
2662     case kArmI16x8Abs: {
2663       __ vabs(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2664       break;
2665     }
2666     case kArmI16x8BitMask: {
2667       UseScratchRegisterScope temps(tasm());
2668       Register dst = i.OutputRegister();
2669       Simd128Register src = i.InputSimd128Register(0);
2670       Simd128Register tmp = temps.AcquireQ();
2671       Simd128Register mask = i.TempSimd128Register(0);
2672 
2673       __ vshr(NeonS16, tmp, src, 15);
2674       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2675       // are signed will have i-th bit set, unsigned will be 0.
2676       __ vmov(mask.low(), base::Double(uint64_t{0x0008'0004'0002'0001}));
2677       __ vmov(mask.high(), base::Double(uint64_t{0x0080'0040'0020'0010}));
2678       __ vand(tmp, mask, tmp);
2679       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2680       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2681       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2682       __ vmov(NeonU16, dst, tmp.low(), 0);
2683       break;
2684     }
2685     case kArmI16x8Q15MulRSatS: {
2686       __ vqrdmulh(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2687                   i.InputSimd128Register(1));
2688       break;
2689     }
2690     case kArmI8x16Splat: {
2691       __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2692       break;
2693     }
2694     case kArmI8x16ExtractLaneU: {
2695       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
2696                      i.InputInt8(1));
2697       break;
2698     }
2699     case kArmI8x16ExtractLaneS: {
2700       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2701                      i.InputInt8(1));
2702       break;
2703     }
2704     case kArmI8x16ReplaceLane: {
2705       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2706                      i.InputRegister(2), NeonS8, i.InputInt8(1));
2707       break;
2708     }
2709     case kArmI8x16Neg: {
2710       __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2711       break;
2712     }
2713     case kArmI8x16Shl: {
2714       ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 3, Neon8, NeonS8);
2715       break;
2716     }
2717     case kArmI8x16ShrS: {
2718       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonS8);
2719       break;
2720     }
2721     case kArmI8x16SConvertI16x8:
2722       ASSEMBLE_NEON_NARROWING_OP(NeonS8, NeonS8);
2723       break;
2724     case kArmI8x16Add: {
2725       __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2726               i.InputSimd128Register(1));
2727       break;
2728     }
2729     case kArmI8x16AddSatS: {
2730       __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2731                i.InputSimd128Register(1));
2732       break;
2733     }
2734     case kArmI8x16Sub: {
2735       __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2736               i.InputSimd128Register(1));
2737       break;
2738     }
2739     case kArmI8x16SubSatS: {
2740       __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2741                i.InputSimd128Register(1));
2742       break;
2743     }
2744     case kArmI8x16MinS: {
2745       __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2746               i.InputSimd128Register(1));
2747       break;
2748     }
2749     case kArmI8x16MaxS: {
2750       __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2751               i.InputSimd128Register(1));
2752       break;
2753     }
2754     case kArmI8x16Eq: {
2755       __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2756               i.InputSimd128Register(1));
2757       break;
2758     }
2759     case kArmI8x16Ne: {
2760       Simd128Register dst = i.OutputSimd128Register();
2761       __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2762       __ vmvn(dst, dst);
2763       break;
2764     }
2765     case kArmI8x16GtS: {
2766       __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2767               i.InputSimd128Register(1));
2768       break;
2769     }
2770     case kArmI8x16GeS: {
2771       __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2772               i.InputSimd128Register(1));
2773       break;
2774     }
2775     case kArmI8x16ShrU: {
2776       ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonU8);
2777       break;
2778     }
2779     case kArmI8x16UConvertI16x8:
2780       ASSEMBLE_NEON_NARROWING_OP(NeonU8, NeonS8);
2781       break;
2782     case kArmI8x16AddSatU: {
2783       __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2784                i.InputSimd128Register(1));
2785       break;
2786     }
2787     case kArmI8x16SubSatU: {
2788       __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2789                i.InputSimd128Register(1));
2790       break;
2791     }
2792     case kArmI8x16MinU: {
2793       __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2794               i.InputSimd128Register(1));
2795       break;
2796     }
2797     case kArmI8x16MaxU: {
2798       __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2799               i.InputSimd128Register(1));
2800       break;
2801     }
2802     case kArmI8x16GtU: {
2803       __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2804               i.InputSimd128Register(1));
2805       break;
2806     }
2807     case kArmI8x16GeU: {
2808       __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2809               i.InputSimd128Register(1));
2810       break;
2811     }
2812     case kArmI8x16RoundingAverageU: {
2813       __ vrhadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2814                 i.InputSimd128Register(1));
2815       break;
2816     }
2817     case kArmI8x16Abs: {
2818       __ vabs(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2819       break;
2820     }
2821     case kArmI8x16BitMask: {
2822       UseScratchRegisterScope temps(tasm());
2823       Register dst = i.OutputRegister();
2824       Simd128Register src = i.InputSimd128Register(0);
2825       Simd128Register tmp = temps.AcquireQ();
2826       Simd128Register mask = i.TempSimd128Register(0);
2827 
2828       __ vshr(NeonS8, tmp, src, 7);
2829       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2830       // are signed will have i-th bit set, unsigned will be 0.
2831       __ vmov(mask.low(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2832       __ vmov(mask.high(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2833       __ vand(tmp, mask, tmp);
2834       __ vext(mask, tmp, tmp, 8);
2835       __ vzip(Neon8, mask, tmp);
2836       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2837       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2838       __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2839       __ vmov(NeonU16, dst, tmp.low(), 0);
2840       break;
2841     }
2842     case kArmS128Const: {
2843       QwNeonRegister dst = i.OutputSimd128Register();
2844       uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
2845       uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
2846       __ vmov(dst.low(), base::Double(imm1));
2847       __ vmov(dst.high(), base::Double(imm2));
2848       break;
2849     }
2850     case kArmS128Zero: {
2851       __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2852               i.OutputSimd128Register());
2853       break;
2854     }
2855     case kArmS128AllOnes: {
2856       __ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
2857       break;
2858     }
2859     case kArmS128Dup: {
2860       NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2861       int lanes = kSimd128Size >> size;
2862       int index = i.InputInt32(2);
2863       DCHECK(index < lanes);
2864       int d_lanes = lanes / 2;
2865       int src_d_index = index & (d_lanes - 1);
2866       int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2867       __ vdup(size, i.OutputSimd128Register(),
2868               DwVfpRegister::from_code(src_d_code), src_d_index);
2869       break;
2870     }
2871     case kArmS128And: {
2872       __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2873               i.InputSimd128Register(1));
2874       break;
2875     }
2876     case kArmS128Or: {
2877       __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2878               i.InputSimd128Register(1));
2879       break;
2880     }
2881     case kArmS128Xor: {
2882       __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2883               i.InputSimd128Register(1));
2884       break;
2885     }
2886     case kArmS128Not: {
2887       __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2888       break;
2889     }
2890     case kArmS128Select: {
2891       Simd128Register dst = i.OutputSimd128Register();
2892       DCHECK(dst == i.InputSimd128Register(0));
2893       __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2894       break;
2895     }
2896     case kArmS128AndNot: {
2897       __ vbic(i.OutputSimd128Register(), i.InputSimd128Register(0),
2898               i.InputSimd128Register(1));
2899       break;
2900     }
2901     case kArmS32x4ZipLeft: {
2902       Simd128Register dst = i.OutputSimd128Register(),
2903                       src1 = i.InputSimd128Register(1);
2904       DCHECK(dst == i.InputSimd128Register(0));
2905       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2906       __ vmov(dst.high(), src1.low());         // dst = [0, 1, 4, 5]
2907       __ vtrn(Neon32, dst.low(), dst.high());  // dst = [0, 4, 1, 5]
2908       break;
2909     }
2910     case kArmS32x4ZipRight: {
2911       Simd128Register dst = i.OutputSimd128Register(),
2912                       src1 = i.InputSimd128Register(1);
2913       DCHECK(dst == i.InputSimd128Register(0));
2914       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2915       __ vmov(dst.low(), src1.high());         // dst = [2, 3, 6, 7]
2916       __ vtrn(Neon32, dst.low(), dst.high());  // dst = [2, 6, 3, 7]
2917       break;
2918     }
2919     case kArmS32x4UnzipLeft: {
2920       Simd128Register dst = i.OutputSimd128Register(),
2921                       src1 = i.InputSimd128Register(1);
2922       DCHECK(dst == i.InputSimd128Register(0));
2923       UseScratchRegisterScope temps(tasm());
2924       Simd128Register scratch = temps.AcquireQ();
2925       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2926       __ vmov(scratch, src1);
2927       __ vuzp(Neon32, dst, scratch);  // dst = [0, 2, 4, 6]
2928       break;
2929     }
2930     case kArmS32x4UnzipRight: {
2931       Simd128Register dst = i.OutputSimd128Register(),
2932                       src1 = i.InputSimd128Register(1);
2933       DCHECK(dst == i.InputSimd128Register(0));
2934       UseScratchRegisterScope temps(tasm());
2935       Simd128Register scratch = temps.AcquireQ();
2936       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2937       __ vmov(scratch, src1);
2938       __ vuzp(Neon32, scratch, dst);  // dst = [1, 3, 5, 7]
2939       break;
2940     }
2941     case kArmS32x4TransposeLeft: {
2942       Simd128Register dst = i.OutputSimd128Register(),
2943                       src1 = i.InputSimd128Register(1);
2944       DCHECK(dst == i.InputSimd128Register(0));
2945       UseScratchRegisterScope temps(tasm());
2946       Simd128Register scratch = temps.AcquireQ();
2947       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2948       __ vmov(scratch, src1);
2949       __ vtrn(Neon32, dst, scratch);  // dst = [0, 4, 2, 6]
2950       break;
2951     }
2952     case kArmS32x4Shuffle: {
2953       Simd128Register dst = i.OutputSimd128Register(),
2954                       src0 = i.InputSimd128Register(0),
2955                       src1 = i.InputSimd128Register(1);
2956       DCHECK_NE(dst, src0);
2957       DCHECK_NE(dst, src1);
2958       // Perform shuffle as a vmov per lane.
2959       int dst_code = dst.code() * 4;
2960       int src0_code = src0.code() * 4;
2961       int src1_code = src1.code() * 4;
2962       int32_t shuffle = i.InputInt32(2);
2963       for (int i = 0; i < 4; i++) {
2964         int lane = shuffle & 0x7;
2965         int src_code = src0_code;
2966         if (lane >= 4) {
2967           src_code = src1_code;
2968           lane &= 0x3;
2969         }
2970         __ VmovExtended(dst_code + i, src_code + lane);
2971         shuffle >>= 8;
2972       }
2973       break;
2974     }
2975     case kArmS32x4TransposeRight: {
2976       Simd128Register dst = i.OutputSimd128Register(),
2977                       src1 = i.InputSimd128Register(1);
2978       UseScratchRegisterScope temps(tasm());
2979       Simd128Register scratch = temps.AcquireQ();
2980       DCHECK(dst == i.InputSimd128Register(0));
2981       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2982       __ vmov(scratch, src1);
2983       __ vtrn(Neon32, scratch, dst);  // dst = [1, 5, 3, 7]
2984       break;
2985     }
2986     case kArmS16x8ZipLeft: {
2987       Simd128Register dst = i.OutputSimd128Register(),
2988                       src1 = i.InputSimd128Register(1);
2989       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2990       DCHECK(dst == i.InputSimd128Register(0));
2991       __ vmov(dst.high(), src1.low());         // dst = [0, 1, 2, 3, 8, ... 11]
2992       __ vzip(Neon16, dst.low(), dst.high());  // dst = [0, 8, 1, 9, ... 11]
2993       break;
2994     }
2995     case kArmS16x8ZipRight: {
2996       Simd128Register dst = i.OutputSimd128Register(),
2997                       src1 = i.InputSimd128Register(1);
2998       DCHECK(dst == i.InputSimd128Register(0));
2999       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3000       __ vmov(dst.low(), src1.high());
3001       __ vzip(Neon16, dst.low(), dst.high());  // dst = [4, 12, 5, 13, ... 15]
3002       break;
3003     }
3004     case kArmS16x8UnzipLeft: {
3005       Simd128Register dst = i.OutputSimd128Register(),
3006                       src1 = i.InputSimd128Register(1);
3007       UseScratchRegisterScope temps(tasm());
3008       Simd128Register scratch = temps.AcquireQ();
3009       DCHECK(dst == i.InputSimd128Register(0));
3010       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3011       __ vmov(scratch, src1);
3012       __ vuzp(Neon16, dst, scratch);  // dst = [0, 2, 4, 6, ... 14]
3013       break;
3014     }
3015     case kArmS16x8UnzipRight: {
3016       Simd128Register dst = i.OutputSimd128Register(),
3017                       src1 = i.InputSimd128Register(1);
3018       UseScratchRegisterScope temps(tasm());
3019       Simd128Register scratch = temps.AcquireQ();
3020       DCHECK(dst == i.InputSimd128Register(0));
3021       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3022       __ vmov(scratch, src1);
3023       __ vuzp(Neon16, scratch, dst);  // dst = [1, 3, 5, 7, ... 15]
3024       break;
3025     }
3026     case kArmS16x8TransposeLeft: {
3027       Simd128Register dst = i.OutputSimd128Register(),
3028                       src1 = i.InputSimd128Register(1);
3029       UseScratchRegisterScope temps(tasm());
3030       Simd128Register scratch = temps.AcquireQ();
3031       DCHECK(dst == i.InputSimd128Register(0));
3032       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3033       __ vmov(scratch, src1);
3034       __ vtrn(Neon16, dst, scratch);  // dst = [0, 8, 2, 10, ... 14]
3035       break;
3036     }
3037     case kArmS16x8TransposeRight: {
3038       Simd128Register dst = i.OutputSimd128Register(),
3039                       src1 = i.InputSimd128Register(1);
3040       UseScratchRegisterScope temps(tasm());
3041       Simd128Register scratch = temps.AcquireQ();
3042       DCHECK(dst == i.InputSimd128Register(0));
3043       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3044       __ vmov(scratch, src1);
3045       __ vtrn(Neon16, scratch, dst);  // dst = [1, 9, 3, 11, ... 15]
3046       break;
3047     }
3048     case kArmS8x16ZipLeft: {
3049       Simd128Register dst = i.OutputSimd128Register(),
3050                       src1 = i.InputSimd128Register(1);
3051       DCHECK(dst == i.InputSimd128Register(0));
3052       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3053       __ vmov(dst.high(), src1.low());
3054       __ vzip(Neon8, dst.low(), dst.high());  // dst = [0, 16, 1, 17, ... 23]
3055       break;
3056     }
3057     case kArmS8x16ZipRight: {
3058       Simd128Register dst = i.OutputSimd128Register(),
3059                       src1 = i.InputSimd128Register(1);
3060       DCHECK(dst == i.InputSimd128Register(0));
3061       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3062       __ vmov(dst.low(), src1.high());
3063       __ vzip(Neon8, dst.low(), dst.high());  // dst = [8, 24, 9, 25, ... 31]
3064       break;
3065     }
3066     case kArmS8x16UnzipLeft: {
3067       Simd128Register dst = i.OutputSimd128Register(),
3068                       src1 = i.InputSimd128Register(1);
3069       UseScratchRegisterScope temps(tasm());
3070       Simd128Register scratch = temps.AcquireQ();
3071       DCHECK(dst == i.InputSimd128Register(0));
3072       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3073       __ vmov(scratch, src1);
3074       __ vuzp(Neon8, dst, scratch);  // dst = [0, 2, 4, 6, ... 30]
3075       break;
3076     }
3077     case kArmS8x16UnzipRight: {
3078       Simd128Register dst = i.OutputSimd128Register(),
3079                       src1 = i.InputSimd128Register(1);
3080       UseScratchRegisterScope temps(tasm());
3081       Simd128Register scratch = temps.AcquireQ();
3082       DCHECK(dst == i.InputSimd128Register(0));
3083       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3084       __ vmov(scratch, src1);
3085       __ vuzp(Neon8, scratch, dst);  // dst = [1, 3, 5, 7, ... 31]
3086       break;
3087     }
3088     case kArmS8x16TransposeLeft: {
3089       Simd128Register dst = i.OutputSimd128Register(),
3090                       src1 = i.InputSimd128Register(1);
3091       UseScratchRegisterScope temps(tasm());
3092       Simd128Register scratch = temps.AcquireQ();
3093       DCHECK(dst == i.InputSimd128Register(0));
3094       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3095       __ vmov(scratch, src1);
3096       __ vtrn(Neon8, dst, scratch);  // dst = [0, 16, 2, 18, ... 30]
3097       break;
3098     }
3099     case kArmS8x16TransposeRight: {
3100       Simd128Register dst = i.OutputSimd128Register(),
3101                       src1 = i.InputSimd128Register(1);
3102       UseScratchRegisterScope temps(tasm());
3103       Simd128Register scratch = temps.AcquireQ();
3104       DCHECK(dst == i.InputSimd128Register(0));
3105       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3106       __ vmov(scratch, src1);
3107       __ vtrn(Neon8, scratch, dst);  // dst = [1, 17, 3, 19, ... 31]
3108       break;
3109     }
3110     case kArmS8x16Concat: {
3111       __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
3112               i.InputSimd128Register(1), i.InputInt4(2));
3113       break;
3114     }
3115     case kArmI8x16Swizzle: {
3116       Simd128Register dst = i.OutputSimd128Register(),
3117                       tbl = i.InputSimd128Register(0),
3118                       src = i.InputSimd128Register(1);
3119       NeonListOperand table(tbl);
3120       __ vtbl(dst.low(), table, src.low());
3121       __ vtbl(dst.high(), table, src.high());
3122       break;
3123     }
3124     case kArmI8x16Shuffle: {
3125       Simd128Register dst = i.OutputSimd128Register(),
3126                       src0 = i.InputSimd128Register(0),
3127                       src1 = i.InputSimd128Register(1);
3128       DwVfpRegister table_base = src0.low();
3129       UseScratchRegisterScope temps(tasm());
3130       Simd128Register scratch = temps.AcquireQ();
3131       // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
3132       // src1. They must be consecutive.
3133       int table_size = src0 == src1 ? 2 : 4;
3134       DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
3135       // The shuffle lane mask is a byte mask, materialize in scratch.
3136       int scratch_s_base = scratch.code() * 4;
3137       for (int j = 0; j < 4; j++) {
3138         uint32_t four_lanes = i.InputUint32(2 + j);
3139         DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3140         __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
3141                 Float32::FromBits(four_lanes));
3142       }
3143       NeonListOperand table(table_base, table_size);
3144       if (dst != src0 && dst != src1) {
3145         __ vtbl(dst.low(), table, scratch.low());
3146         __ vtbl(dst.high(), table, scratch.high());
3147       } else {
3148         __ vtbl(scratch.low(), table, scratch.low());
3149         __ vtbl(scratch.high(), table, scratch.high());
3150         __ vmov(dst, scratch);
3151       }
3152       break;
3153     }
3154     case kArmS32x2Reverse: {
3155       __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
3156       break;
3157     }
3158     case kArmS16x4Reverse: {
3159       __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3160       break;
3161     }
3162     case kArmS16x2Reverse: {
3163       __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3164       break;
3165     }
3166     case kArmS8x8Reverse: {
3167       __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3168       break;
3169     }
3170     case kArmS8x4Reverse: {
3171       __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3172       break;
3173     }
3174     case kArmS8x2Reverse: {
3175       __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3176       break;
3177     }
3178     case kArmV128AnyTrue: {
3179       const QwNeonRegister& src = i.InputSimd128Register(0);
3180       UseScratchRegisterScope temps(tasm());
3181       DwVfpRegister scratch = temps.AcquireD();
3182       __ vpmax(NeonU32, scratch, src.low(), src.high());
3183       __ vpmax(NeonU32, scratch, scratch, scratch);
3184       __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3185       __ cmp(i.OutputRegister(), Operand(0));
3186       __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3187       break;
3188     }
3189     case kArmI64x2AllTrue: {
3190       __ I64x2AllTrue(i.OutputRegister(), i.InputSimd128Register(0));
3191       break;
3192     }
3193     case kArmI32x4AllTrue: {
3194       const QwNeonRegister& src = i.InputSimd128Register(0);
3195       UseScratchRegisterScope temps(tasm());
3196       DwVfpRegister scratch = temps.AcquireD();
3197       __ vpmin(NeonU32, scratch, src.low(), src.high());
3198       __ vpmin(NeonU32, scratch, scratch, scratch);
3199       __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3200       __ cmp(i.OutputRegister(), Operand(0));
3201       __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3202       break;
3203     }
3204     case kArmI16x8AllTrue: {
3205       const QwNeonRegister& src = i.InputSimd128Register(0);
3206       UseScratchRegisterScope temps(tasm());
3207       DwVfpRegister scratch = temps.AcquireD();
3208       __ vpmin(NeonU16, scratch, src.low(), src.high());
3209       __ vpmin(NeonU16, scratch, scratch, scratch);
3210       __ vpmin(NeonU16, scratch, scratch, scratch);
3211       __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
3212       __ cmp(i.OutputRegister(), Operand(0));
3213       __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3214       break;
3215     }
3216     case kArmI8x16AllTrue: {
3217       const QwNeonRegister& src = i.InputSimd128Register(0);
3218       UseScratchRegisterScope temps(tasm());
3219       DwVfpRegister scratch = temps.AcquireD();
3220       __ vpmin(NeonU8, scratch, src.low(), src.high());
3221       __ vpmin(NeonU8, scratch, scratch, scratch);
3222       __ vpmin(NeonU8, scratch, scratch, scratch);
3223       __ vpmin(NeonU8, scratch, scratch, scratch);
3224       __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
3225       __ cmp(i.OutputRegister(), Operand(0));
3226       __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3227       break;
3228     }
3229     case kArmS128Load8Splat: {
3230       __ vld1r(Neon8, NeonListOperand(i.OutputSimd128Register()),
3231                i.NeonInputOperand(0));
3232       break;
3233     }
3234     case kArmS128Load16Splat: {
3235       __ vld1r(Neon16, NeonListOperand(i.OutputSimd128Register()),
3236                i.NeonInputOperand(0));
3237       break;
3238     }
3239     case kArmS128Load32Splat: {
3240       __ vld1r(Neon32, NeonListOperand(i.OutputSimd128Register()),
3241                i.NeonInputOperand(0));
3242       break;
3243     }
3244     case kArmS128Load64Splat: {
3245       Simd128Register dst = i.OutputSimd128Register();
3246       __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3247       __ Move(dst.high(), dst.low());
3248       break;
3249     }
3250     case kArmS128Load8x8S: {
3251       Simd128Register dst = i.OutputSimd128Register();
3252       __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3253       __ vmovl(NeonS8, dst, dst.low());
3254       break;
3255     }
3256     case kArmS128Load8x8U: {
3257       Simd128Register dst = i.OutputSimd128Register();
3258       __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3259       __ vmovl(NeonU8, dst, dst.low());
3260       break;
3261     }
3262     case kArmS128Load16x4S: {
3263       Simd128Register dst = i.OutputSimd128Register();
3264       __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3265       __ vmovl(NeonS16, dst, dst.low());
3266       break;
3267     }
3268     case kArmS128Load16x4U: {
3269       Simd128Register dst = i.OutputSimd128Register();
3270       __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3271       __ vmovl(NeonU16, dst, dst.low());
3272       break;
3273     }
3274     case kArmS128Load32x2S: {
3275       Simd128Register dst = i.OutputSimd128Register();
3276       __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3277       __ vmovl(NeonS32, dst, dst.low());
3278       break;
3279     }
3280     case kArmS128Load32x2U: {
3281       Simd128Register dst = i.OutputSimd128Register();
3282       __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3283       __ vmovl(NeonU32, dst, dst.low());
3284       break;
3285     }
3286     case kArmS128Load32Zero: {
3287       Simd128Register dst = i.OutputSimd128Register();
3288       __ vmov(dst, 0);
3289       __ vld1s(Neon32, NeonListOperand(dst.low()), 0, i.NeonInputOperand(0));
3290       break;
3291     }
3292     case kArmS128Load64Zero: {
3293       Simd128Register dst = i.OutputSimd128Register();
3294       __ vmov(dst.high(), 0);
3295       __ vld1(Neon64, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3296       break;
3297     }
3298     case kArmS128LoadLaneLow: {
3299       Simd128Register dst = i.OutputSimd128Register();
3300       DCHECK_EQ(dst, i.InputSimd128Register(0));
3301       auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3302       NeonListOperand dst_list = NeonListOperand(dst.low());
3303       __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3304       break;
3305     }
3306     case kArmS128LoadLaneHigh: {
3307       Simd128Register dst = i.OutputSimd128Register();
3308       DCHECK_EQ(dst, i.InputSimd128Register(0));
3309       auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3310       NeonListOperand dst_list = NeonListOperand(dst.high());
3311       __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3312       break;
3313     }
3314     case kArmS128StoreLaneLow: {
3315       Simd128Register src = i.InputSimd128Register(0);
3316       NeonListOperand src_list = NeonListOperand(src.low());
3317       auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3318       __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3319       break;
3320     }
3321     case kArmS128StoreLaneHigh: {
3322       Simd128Register src = i.InputSimd128Register(0);
3323       NeonListOperand src_list = NeonListOperand(src.high());
3324       auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3325       __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3326       break;
3327     }
3328     case kAtomicLoadInt8:
3329       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
3330       break;
3331     case kAtomicLoadUint8:
3332       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
3333       break;
3334     case kAtomicLoadInt16:
3335       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
3336       break;
3337     case kAtomicLoadUint16:
3338       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
3339       break;
3340     case kAtomicLoadWord32:
3341       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
3342       break;
3343     case kAtomicStoreWord8:
3344       ASSEMBLE_ATOMIC_STORE_INTEGER(strb,
3345                                     AtomicMemoryOrderField::decode(opcode));
3346       break;
3347     case kAtomicStoreWord16:
3348       ASSEMBLE_ATOMIC_STORE_INTEGER(strh,
3349                                     AtomicMemoryOrderField::decode(opcode));
3350       break;
3351     case kAtomicStoreWord32:
3352       ASSEMBLE_ATOMIC_STORE_INTEGER(str,
3353                                     AtomicMemoryOrderField::decode(opcode));
3354       break;
3355     case kAtomicExchangeInt8:
3356       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3357       __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3358       break;
3359     case kAtomicExchangeUint8:
3360       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3361       break;
3362     case kAtomicExchangeInt16:
3363       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3364       __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3365       break;
3366     case kAtomicExchangeUint16:
3367       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3368       break;
3369     case kAtomicExchangeWord32:
3370       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
3371       break;
3372     case kAtomicCompareExchangeInt8:
3373       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3374       __ uxtb(i.TempRegister(2), i.InputRegister(2));
3375       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
3376                                                i.TempRegister(2));
3377       __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3378       break;
3379     case kAtomicCompareExchangeUint8:
3380       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3381       __ uxtb(i.TempRegister(2), i.InputRegister(2));
3382       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
3383                                                i.TempRegister(2));
3384       break;
3385     case kAtomicCompareExchangeInt16:
3386       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3387       __ uxth(i.TempRegister(2), i.InputRegister(2));
3388       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
3389                                                i.TempRegister(2));
3390       __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3391       break;
3392     case kAtomicCompareExchangeUint16:
3393       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3394       __ uxth(i.TempRegister(2), i.InputRegister(2));
3395       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
3396                                                i.TempRegister(2));
3397       break;
3398     case kAtomicCompareExchangeWord32:
3399       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3400       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
3401                                                i.InputRegister(2));
3402       break;
3403 #define ATOMIC_BINOP_CASE(op, inst)                    \
3404   case kAtomic##op##Int8:                              \
3405     ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
3406     __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
3407     break;                                             \
3408   case kAtomic##op##Uint8:                             \
3409     ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
3410     break;                                             \
3411   case kAtomic##op##Int16:                             \
3412     ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
3413     __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
3414     break;                                             \
3415   case kAtomic##op##Uint16:                            \
3416     ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
3417     break;                                             \
3418   case kAtomic##op##Word32:                            \
3419     ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst);         \
3420     break;
3421       ATOMIC_BINOP_CASE(Add, add)
3422       ATOMIC_BINOP_CASE(Sub, sub)
3423       ATOMIC_BINOP_CASE(And, and_)
3424       ATOMIC_BINOP_CASE(Or, orr)
3425       ATOMIC_BINOP_CASE(Xor, eor)
3426 #undef ATOMIC_BINOP_CASE
3427     case kArmWord32AtomicPairLoad: {
3428       if (instr->OutputCount() == 2) {
3429         DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
3430         __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
3431         __ ldrexd(r0, r1, i.TempRegister(0));
3432         __ dmb(ISH);
3433       } else {
3434         // A special case of this instruction: even though this is a pair load,
3435         // we only need one of the two words. We emit a normal atomic load.
3436         DCHECK_EQ(instr->OutputCount(), 1);
3437         Register base = i.InputRegister(0);
3438         Register offset = i.InputRegister(1);
3439         DCHECK(instr->InputAt(2)->IsImmediate());
3440         int32_t offset_imm = i.InputInt32(2);
3441         if (offset_imm != 0) {
3442           Register temp = i.TempRegister(0);
3443           __ add(temp, offset, Operand(offset_imm));
3444           offset = temp;
3445         }
3446         __ ldr(i.OutputRegister(), MemOperand(base, offset));
3447         __ dmb(ISH);
3448       }
3449       break;
3450     }
3451     case kArmWord32AtomicPairStore: {
3452       Label store;
3453       Register base = i.InputRegister(0);
3454       Register offset = i.InputRegister(1);
3455       Register value_low = i.InputRegister(2);
3456       Register value_high = i.InputRegister(3);
3457       Register actual_addr = i.TempRegister(0);
3458       // The {ldrexd} instruction needs two temp registers. We do not need the
3459       // result of {ldrexd}, but {strexd} likely fails without the {ldrexd}.
3460       Register tmp1 = i.TempRegister(1);
3461       Register tmp2 = i.TempRegister(2);
3462       // Reuse one of the temp registers for the result of {strexd}.
3463       Register store_result = tmp1;
3464       __ add(actual_addr, base, offset);
3465       __ dmb(ISH);
3466       __ bind(&store);
3467       // Add this {ldrexd} instruction here so that {strexd} below can succeed.
3468       // We don't need the result of {ldrexd} itself.
3469       __ ldrexd(tmp1, tmp2, actual_addr);
3470       __ strexd(store_result, value_low, value_high, actual_addr);
3471       __ cmp(store_result, Operand(0));
3472       __ b(ne, &store);
3473       __ dmb(ISH);
3474       break;
3475     }
3476 #define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2)           \
3477   case kArmWord32AtomicPair##op: {                            \
3478     DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3479     ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2);            \
3480     break;                                                    \
3481   }
3482       ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
3483       ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
3484 #undef ATOMIC_ARITH_BINOP_CASE
3485 #define ATOMIC_LOGIC_BINOP_CASE(op, instr1)                   \
3486   case kArmWord32AtomicPair##op: {                            \
3487     DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3488     ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1);                    \
3489     break;                                                    \
3490   }
3491       ATOMIC_LOGIC_BINOP_CASE(And, and_)
3492       ATOMIC_LOGIC_BINOP_CASE(Or, orr)
3493       ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
3494 #undef ATOMIC_LOGIC_BINOP_CASE
3495     case kArmWord32AtomicPairExchange: {
3496       DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
3497       Label exchange;
3498       __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
3499       __ dmb(ISH);
3500       __ bind(&exchange);
3501       __ ldrexd(r6, r7, i.TempRegister(0));
3502       __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
3503                 i.TempRegister(0));
3504       __ teq(i.TempRegister(1), Operand(0));
3505       __ b(ne, &exchange);
3506       __ dmb(ISH);
3507       break;
3508     }
3509     case kArmWord32AtomicPairCompareExchange: {
3510       DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
3511       __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
3512       Label compareExchange;
3513       Label exit;
3514       __ dmb(ISH);
3515       __ bind(&compareExchange);
3516       __ ldrexd(r2, r3, i.TempRegister(0));
3517       __ teq(i.InputRegister(0), Operand(r2));
3518       __ b(ne, &exit);
3519       __ teq(i.InputRegister(1), Operand(r3));
3520       __ b(ne, &exit);
3521       __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
3522                 i.TempRegister(0));
3523       __ teq(i.TempRegister(1), Operand(0));
3524       __ b(ne, &compareExchange);
3525       __ bind(&exit);
3526       __ dmb(ISH);
3527       break;
3528     }
3529 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
3530 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
3531 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
3532 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
3533 #undef ASSEMBLE_ATOMIC_BINOP
3534 #undef ASSEMBLE_ATOMIC64_ARITH_BINOP
3535 #undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
3536 #undef ASSEMBLE_IEEE754_BINOP
3537 #undef ASSEMBLE_IEEE754_UNOP
3538 #undef ASSEMBLE_NEON_NARROWING_OP
3539 #undef ASSEMBLE_SIMD_SHIFT_LEFT
3540 #undef ASSEMBLE_SIMD_SHIFT_RIGHT
3541   }
3542   return kSuccess;
3543 }
3544 
3545 // Assembles branches after an instruction.
3546 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3547   ArmOperandConverter i(this, instr);
3548   Label* tlabel = branch->true_label;
3549   Label* flabel = branch->false_label;
3550   Condition cc = FlagsConditionToCondition(branch->condition);
3551   __ b(cc, tlabel);
3552   if (!branch->fallthru) __ b(flabel);  // no fallthru to flabel.
3553 }
3554 
3555 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3556                                             BranchInfo* branch) {
3557   AssembleArchBranch(instr, branch);
3558 }
3559 
3560 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
3561     RpoNumber target) {
3562   __ b(GetLabel(target));
3563 }
3564 
3565 #if V8_ENABLE_WEBASSEMBLY
3566 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3567                                      FlagsCondition condition) {
3568   class OutOfLineTrap final : public OutOfLineCode {
3569    public:
3570     OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3571         : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3572 
3573     void Generate() final {
3574       ArmOperandConverter i(gen_, instr_);
3575       TrapId trap_id =
3576           static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3577       GenerateCallToTrap(trap_id);
3578     }
3579 
3580    private:
3581     void GenerateCallToTrap(TrapId trap_id) {
3582       if (trap_id == TrapId::kInvalid) {
3583         // We cannot test calls to the runtime in cctest/test-run-wasm.
3584         // Therefore we emit a call to C here instead of a call to the runtime.
3585         // We use the context register as the scratch register, because we do
3586         // not have a context here.
3587         __ PrepareCallCFunction(0, 0);
3588         __ CallCFunction(
3589             ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3590         __ LeaveFrame(StackFrame::WASM);
3591         auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3592         int pop_count = static_cast<int>(call_descriptor->ParameterSlotCount());
3593         __ Drop(pop_count);
3594         __ Ret();
3595       } else {
3596         gen_->AssembleSourcePosition(instr_);
3597         // A direct call to a wasm runtime stub defined in this module.
3598         // Just encode the stub index. This will be patched when the code
3599         // is added to the native module and copied into wasm code space.
3600         __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3601         ReferenceMap* reference_map =
3602             gen_->zone()->New<ReferenceMap>(gen_->zone());
3603         gen_->RecordSafepoint(reference_map);
3604         if (FLAG_debug_code) {
3605           __ stop();
3606         }
3607       }
3608     }
3609 
3610     Instruction* instr_;
3611     CodeGenerator* gen_;
3612   };
3613   auto ool = zone()->New<OutOfLineTrap>(this, instr);
3614   Label* tlabel = ool->entry();
3615   Condition cc = FlagsConditionToCondition(condition);
3616   __ b(cc, tlabel);
3617 }
3618 #endif  // V8_ENABLE_WEBASSEMBLY
3619 
3620 // Assembles boolean materializations after an instruction.
3621 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3622                                         FlagsCondition condition) {
3623   ArmOperandConverter i(this, instr);
3624 
3625   // Materialize a full 32-bit 1 or 0 value. The result register is always the
3626   // last output of the instruction.
3627   DCHECK_NE(0u, instr->OutputCount());
3628   Register reg = i.OutputRegister(instr->OutputCount() - 1);
3629   Condition cc = FlagsConditionToCondition(condition);
3630   __ mov(reg, Operand(0));
3631   __ mov(reg, Operand(1), LeaveCC, cc);
3632 }
3633 
3634 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3635   ArmOperandConverter i(this, instr);
3636   Register input = i.InputRegister(0);
3637   std::vector<std::pair<int32_t, Label*>> cases;
3638   for (size_t index = 2; index < instr->InputCount(); index += 2) {
3639     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3640   }
3641   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3642                                       cases.data() + cases.size());
3643 }
3644 
3645 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3646   ArmOperandConverter i(this, instr);
3647   Register input = i.InputRegister(0);
3648   size_t const case_count = instr->InputCount() - 2;
3649   // This {cmp} might still emit a constant pool entry.
3650   __ cmp(input, Operand(case_count));
3651   // Ensure to emit the constant pool first if necessary.
3652   __ CheckConstPool(true, true);
3653   __ BlockConstPoolFor(case_count + 2);
3654   __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
3655   __ b(GetLabel(i.InputRpo(1)));
3656   for (size_t index = 0; index < case_count; ++index) {
3657     __ b(GetLabel(i.InputRpo(index + 2)));
3658   }
3659 }
3660 
3661 void CodeGenerator::AssembleArchSelect(Instruction* instr,
3662                                        FlagsCondition condition) {
3663   UNIMPLEMENTED();
3664 }
3665 
3666 void CodeGenerator::FinishFrame(Frame* frame) {
3667   auto call_descriptor = linkage()->GetIncomingDescriptor();
3668 
3669   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3670   if (!saves_fp.is_empty()) {
3671     frame->AlignSavedCalleeRegisterSlots();
3672   }
3673 
3674   if (!saves_fp.is_empty()) {
3675     // Save callee-saved FP registers.
3676     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3677     uint32_t last = base::bits::CountLeadingZeros32(saves_fp.bits()) - 1;
3678     uint32_t first = base::bits::CountTrailingZeros32(saves_fp.bits());
3679     DCHECK_EQ((last - first + 1), saves_fp.Count());
3680     frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3681                                             (kDoubleSize / kSystemPointerSize));
3682   }
3683   const RegList saves = call_descriptor->CalleeSavedRegisters();
3684   if (!saves.is_empty()) {
3685     // Save callee-saved registers.
3686     frame->AllocateSavedCalleeRegisterSlots(saves.Count());
3687   }
3688 }
3689 
3690 void CodeGenerator::AssembleConstructFrame() {
3691   auto call_descriptor = linkage()->GetIncomingDescriptor();
3692   if (frame_access_state()->has_frame()) {
3693     if (call_descriptor->IsCFunctionCall()) {
3694 #if V8_ENABLE_WEBASSEMBLY
3695       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3696         __ StubPrologue(StackFrame::C_WASM_ENTRY);
3697         // Reserve stack space for saving the c_entry_fp later.
3698         __ AllocateStackSpace(kSystemPointerSize);
3699 #else
3700       // For balance.
3701       if (false) {
3702 #endif  // V8_ENABLE_WEBASSEMBLY
3703       } else {
3704         __ Push(lr, fp);
3705         __ mov(fp, sp);
3706       }
3707     } else if (call_descriptor->IsJSFunctionCall()) {
3708       __ Prologue();
3709     } else {
3710       __ StubPrologue(info()->GetOutputStackFrameType());
3711 #if V8_ENABLE_WEBASSEMBLY
3712       if (call_descriptor->IsWasmFunctionCall() ||
3713           call_descriptor->IsWasmImportWrapper() ||
3714           call_descriptor->IsWasmCapiFunction()) {
3715         __ Push(kWasmInstanceRegister);
3716       }
3717       if (call_descriptor->IsWasmCapiFunction()) {
3718         // Reserve space for saving the PC later.
3719         __ AllocateStackSpace(kSystemPointerSize);
3720       }
3721 #endif  // V8_ENABLE_WEBASSEMBLY
3722     }
3723 
3724     unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
3725   }
3726 
3727   int required_slots =
3728       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
3729 
3730   if (info()->is_osr()) {
3731     // TurboFan OSR-compiled functions cannot be entered directly.
3732     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3733 
3734     // Unoptimized code jumps directly to this entrypoint while the unoptimized
3735     // frame is still on the stack. Optimized code uses OSR values directly from
3736     // the unoptimized frame. Thus, all that needs to be done is to allocate the
3737     // remaining stack slots.
3738     __ RecordComment("-- OSR entrypoint --");
3739     osr_pc_offset_ = __ pc_offset();
3740     required_slots -= osr_helper()->UnoptimizedFrameSlots();
3741   }
3742 
3743   const RegList saves = call_descriptor->CalleeSavedRegisters();
3744   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3745 
3746   if (required_slots > 0) {
3747     DCHECK(frame_access_state()->has_frame());
3748 #if V8_ENABLE_WEBASSEMBLY
3749     if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
3750       // For WebAssembly functions with big frames we have to do the stack
3751       // overflow check before we construct the frame. Otherwise we may not
3752       // have enough space on the stack to call the runtime for the stack
3753       // overflow.
3754       Label done;
3755 
3756       // If the frame is bigger than the stack, we throw the stack overflow
3757       // exception unconditionally. Thereby we can avoid the integer overflow
3758       // check in the condition code.
3759       if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
3760         UseScratchRegisterScope temps(tasm());
3761         Register scratch = temps.Acquire();
3762         __ ldr(scratch, FieldMemOperand(
3763                             kWasmInstanceRegister,
3764                             WasmInstanceObject::kRealStackLimitAddressOffset));
3765         __ ldr(scratch, MemOperand(scratch));
3766         __ add(scratch, scratch, Operand(required_slots * kSystemPointerSize));
3767         __ cmp(sp, scratch);
3768         __ b(cs, &done);
3769       }
3770 
3771       __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
3772       // The call does not return, hence we can ignore any references and just
3773       // define an empty safepoint.
3774       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3775       RecordSafepoint(reference_map);
3776       if (FLAG_debug_code) __ stop();
3777 
3778       __ bind(&done);
3779     }
3780 #endif  // V8_ENABLE_WEBASSEMBLY
3781 
3782     // Skip callee-saved and return slots, which are pushed below.
3783     required_slots -= saves.Count();
3784     required_slots -= frame()->GetReturnSlotCount();
3785     required_slots -= 2 * saves_fp.Count();
3786     if (required_slots > 0) {
3787       __ AllocateStackSpace(required_slots * kSystemPointerSize);
3788     }
3789   }
3790 
3791   if (!saves_fp.is_empty()) {
3792     // Save callee-saved FP registers.
3793     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3794     __ vstm(db_w, sp, saves_fp.first(), saves_fp.last());
3795   }
3796 
3797   if (!saves.is_empty()) {
3798     // Save callee-saved registers.
3799     __ stm(db_w, sp, saves);
3800   }
3801 
3802   const int returns = frame()->GetReturnSlotCount();
3803   // Create space for returns.
3804   __ AllocateStackSpace(returns * kSystemPointerSize);
3805 }
3806 
3807 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3808   auto call_descriptor = linkage()->GetIncomingDescriptor();
3809 
3810   const int returns = frame()->GetReturnSlotCount();
3811   if (returns != 0) {
3812     // Free space of returns.
3813     __ add(sp, sp, Operand(returns * kSystemPointerSize));
3814   }
3815 
3816   // Restore registers.
3817   const RegList saves = call_descriptor->CalleeSavedRegisters();
3818   if (!saves.is_empty()) {
3819     __ ldm(ia_w, sp, saves);
3820   }
3821 
3822   // Restore FP registers.
3823   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3824   if (!saves_fp.is_empty()) {
3825     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3826     __ vldm(ia_w, sp, saves_fp.first(), saves_fp.last());
3827   }
3828 
3829   unwinding_info_writer_.MarkBlockWillExit();
3830 
3831   ArmOperandConverter g(this, nullptr);
3832   const int parameter_slots =
3833       static_cast<int>(call_descriptor->ParameterSlotCount());
3834 
3835   // {additional_pop_count} is only greater than zero if {parameter_slots = 0}.
3836   // Check RawMachineAssembler::PopAndReturn.
3837   if (parameter_slots != 0) {
3838     if (additional_pop_count->IsImmediate()) {
3839       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
3840     } else if (FLAG_debug_code) {
3841       __ cmp(g.ToRegister(additional_pop_count), Operand(0));
3842       __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
3843     }
3844   }
3845 
3846   Register argc_reg = r3;
3847   // Functions with JS linkage have at least one parameter (the receiver).
3848   // If {parameter_slots} == 0, it means it is a builtin with
3849   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
3850   // itself.
3851   const bool drop_jsargs = parameter_slots != 0 &&
3852                            frame_access_state()->has_frame() &&
3853                            call_descriptor->IsJSFunctionCall();
3854   if (call_descriptor->IsCFunctionCall()) {
3855     AssembleDeconstructFrame();
3856   } else if (frame_access_state()->has_frame()) {
3857     // Canonicalize JSFunction return sites for now unless they have an variable
3858     // number of stack slot pops.
3859     if (additional_pop_count->IsImmediate() &&
3860         g.ToConstant(additional_pop_count).ToInt32() == 0) {
3861       if (return_label_.is_bound()) {
3862         __ b(&return_label_);
3863         return;
3864       } else {
3865         __ bind(&return_label_);
3866       }
3867     }
3868     if (drop_jsargs) {
3869       // Get the actual argument count.
3870       __ ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
3871       DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
3872     }
3873     AssembleDeconstructFrame();
3874   }
3875 
3876   if (drop_jsargs) {
3877     // We must pop all arguments from the stack (including the receiver).
3878     // The number of arguments without the receiver is
3879     // max(argc_reg, parameter_slots-1), and the receiver is added in
3880     // DropArguments().
3881     DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
3882     if (parameter_slots > 1) {
3883       __ cmp(argc_reg, Operand(parameter_slots));
3884       __ mov(argc_reg, Operand(parameter_slots), LeaveCC, lt);
3885     }
3886     __ DropArguments(argc_reg, TurboAssembler::kCountIsInteger,
3887                      TurboAssembler::kCountIncludesReceiver);
3888   } else if (additional_pop_count->IsImmediate()) {
3889     DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
3890     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
3891     __ Drop(parameter_slots + additional_count);
3892   } else if (parameter_slots == 0) {
3893     __ Drop(g.ToRegister(additional_pop_count));
3894   } else {
3895     // {additional_pop_count} is guaranteed to be zero if {parameter_slots !=
3896     // 0}. Check RawMachineAssembler::PopAndReturn.
3897     __ Drop(parameter_slots);
3898   }
3899   __ Ret();
3900 }
3901 
3902 void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
3903 
3904 void CodeGenerator::PrepareForDeoptimizationExits(
3905     ZoneDeque<DeoptimizationExit*>* exits) {
3906   __ CheckConstPool(true, false);
3907 }
3908 
3909 void CodeGenerator::AssembleMove(InstructionOperand* source,
3910                                  InstructionOperand* destination) {
3911   ArmOperandConverter g(this, nullptr);
3912   // Helper function to write the given constant to the dst register.
3913   auto MoveConstantToRegister = [&](Register dst, Constant src) {
3914     if (src.type() == Constant::kHeapObject) {
3915       Handle<HeapObject> src_object = src.ToHeapObject();
3916       RootIndex index;
3917       if (IsMaterializableFromRoot(src_object, &index)) {
3918         __ LoadRoot(dst, index);
3919       } else {
3920         __ Move(dst, src_object);
3921       }
3922     } else if (src.type() == Constant::kExternalReference) {
3923       __ Move(dst, src.ToExternalReference());
3924     } else {
3925       __ mov(dst, g.ToImmediate(source));
3926     }
3927   };
3928   switch (MoveType::InferMove(source, destination)) {
3929     case MoveType::kRegisterToRegister:
3930       if (source->IsRegister()) {
3931         __ mov(g.ToRegister(destination), g.ToRegister(source));
3932       } else if (source->IsFloatRegister()) {
3933         DCHECK(destination->IsFloatRegister());
3934         // GapResolver may give us reg codes that don't map to actual
3935         // s-registers. Generate code to work around those cases.
3936         int src_code = LocationOperand::cast(source)->register_code();
3937         int dst_code = LocationOperand::cast(destination)->register_code();
3938         __ VmovExtended(dst_code, src_code);
3939       } else if (source->IsDoubleRegister()) {
3940         __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3941       } else {
3942         __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
3943       }
3944       return;
3945     case MoveType::kRegisterToStack: {
3946       MemOperand dst = g.ToMemOperand(destination);
3947       if (source->IsRegister()) {
3948         __ str(g.ToRegister(source), dst);
3949       } else if (source->IsFloatRegister()) {
3950         // GapResolver may give us reg codes that don't map to actual
3951         // s-registers. Generate code to work around those cases.
3952         int src_code = LocationOperand::cast(source)->register_code();
3953         __ VmovExtended(dst, src_code);
3954       } else if (source->IsDoubleRegister()) {
3955         __ vstr(g.ToDoubleRegister(source), dst);
3956       } else {
3957         UseScratchRegisterScope temps(tasm());
3958         Register temp = temps.Acquire();
3959         QwNeonRegister src = g.ToSimd128Register(source);
3960         __ add(temp, dst.rn(), Operand(dst.offset()));
3961         __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3962       }
3963       return;
3964     }
3965     case MoveType::kStackToRegister: {
3966       MemOperand src = g.ToMemOperand(source);
3967       if (source->IsStackSlot()) {
3968         __ ldr(g.ToRegister(destination), src);
3969       } else if (source->IsFloatStackSlot()) {
3970         DCHECK(destination->IsFloatRegister());
3971         // GapResolver may give us reg codes that don't map to actual
3972         // s-registers. Generate code to work around those cases.
3973         int dst_code = LocationOperand::cast(destination)->register_code();
3974         __ VmovExtended(dst_code, src);
3975       } else if (source->IsDoubleStackSlot()) {
3976         __ vldr(g.ToDoubleRegister(destination), src);
3977       } else {
3978         UseScratchRegisterScope temps(tasm());
3979         Register temp = temps.Acquire();
3980         QwNeonRegister dst = g.ToSimd128Register(destination);
3981         __ add(temp, src.rn(), Operand(src.offset()));
3982         __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
3983       }
3984       return;
3985     }
3986     case MoveType::kStackToStack: {
3987       MemOperand src = g.ToMemOperand(source);
3988       MemOperand dst = g.ToMemOperand(destination);
3989       UseScratchRegisterScope temps(tasm());
3990       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3991         SwVfpRegister temp = temps.AcquireS();
3992         __ vldr(temp, src);
3993         __ vstr(temp, dst);
3994       } else if (source->IsDoubleStackSlot()) {
3995         DwVfpRegister temp = temps.AcquireD();
3996         __ vldr(temp, src);
3997         __ vstr(temp, dst);
3998       } else {
3999         DCHECK(source->IsSimd128StackSlot());
4000         Register temp = temps.Acquire();
4001         QwNeonRegister temp_q = temps.AcquireQ();
4002         __ add(temp, src.rn(), Operand(src.offset()));
4003         __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4004         __ add(temp, dst.rn(), Operand(dst.offset()));
4005         __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4006       }
4007       return;
4008     }
4009     case MoveType::kConstantToRegister: {
4010       Constant src = g.ToConstant(source);
4011       if (destination->IsRegister()) {
4012         MoveConstantToRegister(g.ToRegister(destination), src);
4013       } else if (destination->IsFloatRegister()) {
4014         __ vmov(g.ToFloatRegister(destination),
4015                 Float32::FromBits(src.ToFloat32AsInt()));
4016       } else {
4017         // TODO(arm): Look into optimizing this further if possible. Supporting
4018         // the NEON version of VMOV may help.
4019         __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
4020       }
4021       return;
4022     }
4023     case MoveType::kConstantToStack: {
4024       Constant src = g.ToConstant(source);
4025       MemOperand dst = g.ToMemOperand(destination);
4026       if (destination->IsStackSlot()) {
4027         UseScratchRegisterScope temps(tasm());
4028         // Acquire a S register instead of a general purpose register in case
4029         // `vstr` needs one to compute the address of `dst`.
4030         SwVfpRegister s_temp = temps.AcquireS();
4031         {
4032           // TODO(arm): This sequence could be optimized further if necessary by
4033           // writing the constant directly into `s_temp`.
4034           UseScratchRegisterScope temps(tasm());
4035           Register temp = temps.Acquire();
4036           MoveConstantToRegister(temp, src);
4037           __ vmov(s_temp, temp);
4038         }
4039         __ vstr(s_temp, dst);
4040       } else if (destination->IsFloatStackSlot()) {
4041         UseScratchRegisterScope temps(tasm());
4042         SwVfpRegister temp = temps.AcquireS();
4043         __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
4044         __ vstr(temp, dst);
4045       } else {
4046         DCHECK(destination->IsDoubleStackSlot());
4047         UseScratchRegisterScope temps(tasm());
4048         DwVfpRegister temp = temps.AcquireD();
4049         // TODO(arm): Look into optimizing this further if possible. Supporting
4050         // the NEON version of VMOV may help.
4051         __ vmov(temp, src.ToFloat64());
4052         __ vstr(temp, g.ToMemOperand(destination));
4053       }
4054       return;
4055     }
4056   }
4057   UNREACHABLE();
4058 }
4059 
4060 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4061                                  InstructionOperand* destination) {
4062   ArmOperandConverter g(this, nullptr);
4063   switch (MoveType::InferSwap(source, destination)) {
4064     case MoveType::kRegisterToRegister:
4065       if (source->IsRegister()) {
4066         __ Swap(g.ToRegister(source), g.ToRegister(destination));
4067       } else if (source->IsFloatRegister()) {
4068         DCHECK(destination->IsFloatRegister());
4069         // GapResolver may give us reg codes that don't map to actual
4070         // s-registers. Generate code to work around those cases.
4071         UseScratchRegisterScope temps(tasm());
4072         LowDwVfpRegister temp = temps.AcquireLowD();
4073         int src_code = LocationOperand::cast(source)->register_code();
4074         int dst_code = LocationOperand::cast(destination)->register_code();
4075         __ VmovExtended(temp.low().code(), src_code);
4076         __ VmovExtended(src_code, dst_code);
4077         __ VmovExtended(dst_code, temp.low().code());
4078       } else if (source->IsDoubleRegister()) {
4079         __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
4080       } else {
4081         __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
4082       }
4083       return;
4084     case MoveType::kRegisterToStack: {
4085       MemOperand dst = g.ToMemOperand(destination);
4086       if (source->IsRegister()) {
4087         Register src = g.ToRegister(source);
4088         UseScratchRegisterScope temps(tasm());
4089         SwVfpRegister temp = temps.AcquireS();
4090         __ vmov(temp, src);
4091         __ ldr(src, dst);
4092         __ vstr(temp, dst);
4093       } else if (source->IsFloatRegister()) {
4094         int src_code = LocationOperand::cast(source)->register_code();
4095         UseScratchRegisterScope temps(tasm());
4096         LowDwVfpRegister temp = temps.AcquireLowD();
4097         __ VmovExtended(temp.low().code(), src_code);
4098         __ VmovExtended(src_code, dst);
4099         __ vstr(temp.low(), dst);
4100       } else if (source->IsDoubleRegister()) {
4101         UseScratchRegisterScope temps(tasm());
4102         DwVfpRegister temp = temps.AcquireD();
4103         DwVfpRegister src = g.ToDoubleRegister(source);
4104         __ Move(temp, src);
4105         __ vldr(src, dst);
4106         __ vstr(temp, dst);
4107       } else {
4108         QwNeonRegister src = g.ToSimd128Register(source);
4109         UseScratchRegisterScope temps(tasm());
4110         Register temp = temps.Acquire();
4111         QwNeonRegister temp_q = temps.AcquireQ();
4112         __ Move(temp_q, src);
4113         __ add(temp, dst.rn(), Operand(dst.offset()));
4114         __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
4115         __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4116       }
4117       return;
4118     }
4119     case MoveType::kStackToStack: {
4120       MemOperand src = g.ToMemOperand(source);
4121       MemOperand dst = g.ToMemOperand(destination);
4122       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
4123         UseScratchRegisterScope temps(tasm());
4124         SwVfpRegister temp_0 = temps.AcquireS();
4125         SwVfpRegister temp_1 = temps.AcquireS();
4126         __ vldr(temp_0, dst);
4127         __ vldr(temp_1, src);
4128         __ vstr(temp_0, src);
4129         __ vstr(temp_1, dst);
4130       } else if (source->IsDoubleStackSlot()) {
4131         UseScratchRegisterScope temps(tasm());
4132         LowDwVfpRegister temp = temps.AcquireLowD();
4133         if (temps.CanAcquireD()) {
4134           DwVfpRegister temp_0 = temp;
4135           DwVfpRegister temp_1 = temps.AcquireD();
4136           __ vldr(temp_0, dst);
4137           __ vldr(temp_1, src);
4138           __ vstr(temp_0, src);
4139           __ vstr(temp_1, dst);
4140         } else {
4141           // We only have a single D register available. However, we can split
4142           // it into 2 S registers and swap the slots 32 bits at a time.
4143           MemOperand src0 = src;
4144           MemOperand dst0 = dst;
4145           MemOperand src1(src.rn(), src.offset() + kFloatSize);
4146           MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
4147           SwVfpRegister temp_0 = temp.low();
4148           SwVfpRegister temp_1 = temp.high();
4149           __ vldr(temp_0, dst0);
4150           __ vldr(temp_1, src0);
4151           __ vstr(temp_0, src0);
4152           __ vstr(temp_1, dst0);
4153           __ vldr(temp_0, dst1);
4154           __ vldr(temp_1, src1);
4155           __ vstr(temp_0, src1);
4156           __ vstr(temp_1, dst1);
4157         }
4158       } else {
4159         DCHECK(source->IsSimd128StackSlot());
4160         MemOperand src0 = src;
4161         MemOperand dst0 = dst;
4162         MemOperand src1(src.rn(), src.offset() + kDoubleSize);
4163         MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
4164         UseScratchRegisterScope temps(tasm());
4165         DwVfpRegister temp_0 = temps.AcquireD();
4166         DwVfpRegister temp_1 = temps.AcquireD();
4167         __ vldr(temp_0, dst0);
4168         __ vldr(temp_1, src0);
4169         __ vstr(temp_0, src0);
4170         __ vstr(temp_1, dst0);
4171         __ vldr(temp_0, dst1);
4172         __ vldr(temp_1, src1);
4173         __ vstr(temp_0, src1);
4174         __ vstr(temp_1, dst1);
4175       }
4176       return;
4177     }
4178     default:
4179       UNREACHABLE();
4180   }
4181 }
4182 
4183 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4184   // On 32-bit ARM we emit the jump tables inline.
4185   UNREACHABLE();
4186 }
4187 
4188 #undef __
4189 
4190 }  // namespace compiler
4191 }  // namespace internal
4192 }  // namespace v8
4193