1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/base/numbers/double.h"
6 #include "src/codegen/arm/constants-arm.h"
7 #include "src/codegen/assembler-inl.h"
8 #include "src/codegen/macro-assembler.h"
9 #include "src/codegen/optimized-compilation-info.h"
10 #include "src/compiler/backend/code-generator-impl.h"
11 #include "src/compiler/backend/code-generator.h"
12 #include "src/compiler/backend/gap-resolver.h"
13 #include "src/compiler/backend/instruction-codes.h"
14 #include "src/compiler/node-matchers.h"
15 #include "src/compiler/osr.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/utils/boxed-float.h"
18
19 #if V8_ENABLE_WEBASSEMBLY
20 #include "src/wasm/wasm-code-manager.h"
21 #include "src/wasm/wasm-objects.h"
22 #endif // V8_ENABLE_WEBASSEMBLY
23
24 namespace v8 {
25 namespace internal {
26 namespace compiler {
27
28 #define __ tasm()->
29
30 // Adds Arm-specific methods to convert InstructionOperands.
31 class ArmOperandConverter final : public InstructionOperandConverter {
32 public:
ArmOperandConverter(CodeGenerator* gen, Instruction* instr)33 ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
34 : InstructionOperandConverter(gen, instr) {}
35
OutputSBit() const36 SBit OutputSBit() const {
37 switch (instr_->flags_mode()) {
38 case kFlags_branch:
39 case kFlags_deoptimize:
40 case kFlags_set:
41 case kFlags_trap:
42 case kFlags_select:
43 return SetCC;
44 case kFlags_none:
45 return LeaveCC;
46 }
47 UNREACHABLE();
48 }
49
InputImmediate(size_t index) const50 Operand InputImmediate(size_t index) const {
51 return ToImmediate(instr_->InputAt(index));
52 }
53
InputOperand2(size_t first_index)54 Operand InputOperand2(size_t first_index) {
55 const size_t index = first_index;
56 switch (AddressingModeField::decode(instr_->opcode())) {
57 case kMode_None:
58 case kMode_Offset_RI:
59 case kMode_Offset_RR:
60 case kMode_Root:
61 break;
62 case kMode_Operand2_I:
63 return InputImmediate(index + 0);
64 case kMode_Operand2_R:
65 return Operand(InputRegister(index + 0));
66 case kMode_Operand2_R_ASR_I:
67 return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
68 case kMode_Operand2_R_ASR_R:
69 return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
70 case kMode_Operand2_R_LSL_I:
71 return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
72 case kMode_Operand2_R_LSL_R:
73 return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
74 case kMode_Operand2_R_LSR_I:
75 return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
76 case kMode_Operand2_R_LSR_R:
77 return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
78 case kMode_Operand2_R_ROR_I:
79 return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
80 case kMode_Operand2_R_ROR_R:
81 return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
82 }
83 UNREACHABLE();
84 }
85
InputOffset(size_t* first_index)86 MemOperand InputOffset(size_t* first_index) {
87 const size_t index = *first_index;
88 switch (AddressingModeField::decode(instr_->opcode())) {
89 case kMode_None:
90 case kMode_Operand2_I:
91 case kMode_Operand2_R:
92 case kMode_Operand2_R_ASR_I:
93 case kMode_Operand2_R_ASR_R:
94 case kMode_Operand2_R_LSL_R:
95 case kMode_Operand2_R_LSR_I:
96 case kMode_Operand2_R_LSR_R:
97 case kMode_Operand2_R_ROR_I:
98 case kMode_Operand2_R_ROR_R:
99 break;
100 case kMode_Operand2_R_LSL_I:
101 *first_index += 3;
102 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
103 LSL, InputInt32(index + 2));
104 case kMode_Offset_RI:
105 *first_index += 2;
106 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
107 case kMode_Offset_RR:
108 *first_index += 2;
109 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
110 case kMode_Root:
111 *first_index += 1;
112 return MemOperand(kRootRegister, InputInt32(index));
113 }
114 UNREACHABLE();
115 }
116
InputOffset(size_t first_index = 0)117 MemOperand InputOffset(size_t first_index = 0) {
118 return InputOffset(&first_index);
119 }
120
ToImmediate(InstructionOperand* operand) const121 Operand ToImmediate(InstructionOperand* operand) const {
122 Constant constant = ToConstant(operand);
123 switch (constant.type()) {
124 case Constant::kInt32:
125 #if V8_ENABLE_WEBASSEMBLY
126 if (RelocInfo::IsWasmReference(constant.rmode())) {
127 return Operand(constant.ToInt32(), constant.rmode());
128 }
129 #endif // V8_ENABLE_WEBASSEMBLY
130 return Operand(constant.ToInt32());
131 case Constant::kFloat32:
132 return Operand::EmbeddedNumber(constant.ToFloat32());
133 case Constant::kFloat64:
134 return Operand::EmbeddedNumber(constant.ToFloat64().value());
135 case Constant::kExternalReference:
136 return Operand(constant.ToExternalReference());
137 case Constant::kDelayedStringConstant:
138 return Operand::EmbeddedStringConstant(
139 constant.ToDelayedStringConstant());
140 case Constant::kInt64:
141 case Constant::kCompressedHeapObject:
142 case Constant::kHeapObject:
143 // TODO(dcarney): loading RPO constants on arm.
144 case Constant::kRpoNumber:
145 break;
146 }
147 UNREACHABLE();
148 }
149
ToMemOperand(InstructionOperand* op) const150 MemOperand ToMemOperand(InstructionOperand* op) const {
151 DCHECK_NOT_NULL(op);
152 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
153 return SlotToMemOperand(AllocatedOperand::cast(op)->index());
154 }
155
SlotToMemOperand(int slot) const156 MemOperand SlotToMemOperand(int slot) const {
157 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
158 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
159 }
160
NeonInputOperand(size_t first_index)161 NeonMemOperand NeonInputOperand(size_t first_index) {
162 const size_t index = first_index;
163 switch (AddressingModeField::decode(instr_->opcode())) {
164 case kMode_Operand2_R:
165 return NeonMemOperand(InputRegister(index + 0));
166 default:
167 break;
168 }
169 UNREACHABLE();
170 }
171 };
172
173 namespace {
174
175 class OutOfLineRecordWrite final : public OutOfLineCode {
176 public:
OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset, Register value, RecordWriteMode mode, StubCallMode stub_mode, UnwindingInfoWriter* unwinding_info_writer)177 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
178 Register value, RecordWriteMode mode,
179 StubCallMode stub_mode,
180 UnwindingInfoWriter* unwinding_info_writer)
181 : OutOfLineCode(gen),
182 object_(object),
183 offset_(offset),
184 value_(value),
185 mode_(mode),
186 #if V8_ENABLE_WEBASSEMBLY
187 stub_mode_(stub_mode),
188 #endif // V8_ENABLE_WEBASSEMBLY
189 must_save_lr_(!gen->frame_access_state()->has_frame()),
190 unwinding_info_writer_(unwinding_info_writer),
191 zone_(gen->zone()) {
192 }
193
194 void Generate() final {
195 __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq,
196 exit());
197 RememberedSetAction const remembered_set_action =
198 mode_ > RecordWriteMode::kValueIsMap ||
199 FLAG_use_full_record_write_builtin
200 ? RememberedSetAction::kEmit
201 : RememberedSetAction::kOmit;
202 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
203 ? SaveFPRegsMode::kSave
204 : SaveFPRegsMode::kIgnore;
205 if (must_save_lr_) {
206 // We need to save and restore lr if the frame was elided.
207 __ Push(lr);
208 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
209 }
210 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
211 __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
212 #if V8_ENABLE_WEBASSEMBLY
213 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
214 __ CallRecordWriteStubSaveRegisters(object_, offset_,
215 remembered_set_action, save_fp_mode,
216 StubCallMode::kCallWasmRuntimeStub);
217 #endif // V8_ENABLE_WEBASSEMBLY
218 } else {
219 __ CallRecordWriteStubSaveRegisters(object_, offset_,
220 remembered_set_action, save_fp_mode);
221 }
222 if (must_save_lr_) {
223 __ Pop(lr);
224 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
225 }
226 }
227
228 private:
229 Register const object_;
230 Operand const offset_;
231 Register const value_;
232 RecordWriteMode const mode_;
233 #if V8_ENABLE_WEBASSEMBLY
234 StubCallMode stub_mode_;
235 #endif // V8_ENABLE_WEBASSEMBLY
236 bool must_save_lr_;
237 UnwindingInfoWriter* const unwinding_info_writer_;
238 Zone* zone_;
239 };
240
241 template <typename T>
242 class OutOfLineFloatMin final : public OutOfLineCode {
243 public:
OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)244 OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
245 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
246
247 void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
248
249 private:
250 T const result_;
251 T const left_;
252 T const right_;
253 };
254 using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>;
255 using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>;
256
257 template <typename T>
258 class OutOfLineFloatMax final : public OutOfLineCode {
259 public:
OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)260 OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
261 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
262
263 void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
264
265 private:
266 T const result_;
267 T const left_;
268 T const right_;
269 };
270 using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>;
271 using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>;
272
FlagsConditionToCondition(FlagsCondition condition)273 Condition FlagsConditionToCondition(FlagsCondition condition) {
274 switch (condition) {
275 case kEqual:
276 return eq;
277 case kNotEqual:
278 return ne;
279 case kSignedLessThan:
280 return lt;
281 case kSignedGreaterThanOrEqual:
282 return ge;
283 case kSignedLessThanOrEqual:
284 return le;
285 case kSignedGreaterThan:
286 return gt;
287 case kUnsignedLessThan:
288 return lo;
289 case kUnsignedGreaterThanOrEqual:
290 return hs;
291 case kUnsignedLessThanOrEqual:
292 return ls;
293 case kUnsignedGreaterThan:
294 return hi;
295 case kFloatLessThanOrUnordered:
296 return lt;
297 case kFloatGreaterThanOrEqual:
298 return ge;
299 case kFloatLessThanOrEqual:
300 return ls;
301 case kFloatGreaterThanOrUnordered:
302 return hi;
303 case kFloatLessThan:
304 return lo;
305 case kFloatGreaterThanOrEqualOrUnordered:
306 return hs;
307 case kFloatLessThanOrEqualOrUnordered:
308 return le;
309 case kFloatGreaterThan:
310 return gt;
311 case kOverflow:
312 return vs;
313 case kNotOverflow:
314 return vc;
315 case kPositiveOrZero:
316 return pl;
317 case kNegative:
318 return mi;
319 default:
320 break;
321 }
322 UNREACHABLE();
323 }
324
325 } // namespace
326
327 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
328 do { \
329 __ asm_instr(i.OutputRegister(), \
330 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
331 __ dmb(ISH); \
332 } while (0)
333
334 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, order) \
335 do { \
336 __ dmb(ISH); \
337 __ asm_instr(i.InputRegister(0), i.InputOffset(1)); \
338 if (order == AtomicMemoryOrder::kSeqCst) __ dmb(ISH); \
339 } while (0)
340
341 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \
342 do { \
343 Label exchange; \
344 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
345 __ dmb(ISH); \
346 __ bind(&exchange); \
347 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
348 __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
349 __ teq(i.TempRegister(0), Operand(0)); \
350 __ b(ne, &exchange); \
351 __ dmb(ISH); \
352 } while (0)
353
354 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \
355 cmp_reg) \
356 do { \
357 Label compareExchange; \
358 Label exit; \
359 __ dmb(ISH); \
360 __ bind(&compareExchange); \
361 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
362 __ teq(cmp_reg, Operand(i.OutputRegister(0))); \
363 __ b(ne, &exit); \
364 __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
365 __ teq(i.TempRegister(0), Operand(0)); \
366 __ b(ne, &compareExchange); \
367 __ bind(&exit); \
368 __ dmb(ISH); \
369 } while (0)
370
371 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \
372 do { \
373 Label binop; \
374 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
375 __ dmb(ISH); \
376 __ bind(&binop); \
377 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
378 __ bin_instr(i.TempRegister(0), i.OutputRegister(0), \
379 Operand(i.InputRegister(2))); \
380 __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
381 __ teq(i.TempRegister(2), Operand(0)); \
382 __ b(ne, &binop); \
383 __ dmb(ISH); \
384 } while (0)
385
386 #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \
387 do { \
388 Label binop; \
389 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
390 __ dmb(ISH); \
391 __ bind(&binop); \
392 __ ldrexd(r2, r3, i.TempRegister(0)); \
393 __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \
394 __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
395 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
396 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
397 i.TempRegister(0)); \
398 __ teq(i.TempRegister(3), Operand(0)); \
399 __ b(ne, &binop); \
400 __ dmb(ISH); \
401 } while (0)
402
403 #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \
404 do { \
405 Label binop; \
406 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
407 __ dmb(ISH); \
408 __ bind(&binop); \
409 __ ldrexd(r2, r3, i.TempRegister(0)); \
410 __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0))); \
411 __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
412 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
413 i.TempRegister(0)); \
414 __ teq(i.TempRegister(3), Operand(0)); \
415 __ b(ne, &binop); \
416 __ dmb(ISH); \
417 } while (0)
418
419 #define ASSEMBLE_IEEE754_BINOP(name) \
420 do { \
421 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
422 /* and generate a CallAddress instruction instead. */ \
423 FrameScope scope(tasm(), StackFrame::MANUAL); \
424 __ PrepareCallCFunction(0, 2); \
425 __ MovToFloatParameters(i.InputDoubleRegister(0), \
426 i.InputDoubleRegister(1)); \
427 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
428 /* Move the result in the double result register. */ \
429 __ MovFromFloatResult(i.OutputDoubleRegister()); \
430 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
431 } while (0)
432
433 #define ASSEMBLE_IEEE754_UNOP(name) \
434 do { \
435 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
436 /* and generate a CallAddress instruction instead. */ \
437 FrameScope scope(tasm(), StackFrame::MANUAL); \
438 __ PrepareCallCFunction(0, 1); \
439 __ MovToFloatParameter(i.InputDoubleRegister(0)); \
440 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
441 /* Move the result in the double result register. */ \
442 __ MovFromFloatResult(i.OutputDoubleRegister()); \
443 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
444 } while (0)
445
446 #define ASSEMBLE_NEON_NARROWING_OP(dt, sdt) \
447 do { \
448 Simd128Register dst = i.OutputSimd128Register(), \
449 src0 = i.InputSimd128Register(0), \
450 src1 = i.InputSimd128Register(1); \
451 if (dst == src0 && dst == src1) { \
452 __ vqmovn(dt, sdt, dst.low(), src0); \
453 __ vmov(dst.high(), dst.low()); \
454 } else if (dst == src0) { \
455 __ vqmovn(dt, sdt, dst.low(), src0); \
456 __ vqmovn(dt, sdt, dst.high(), src1); \
457 } else { \
458 __ vqmovn(dt, sdt, dst.high(), src1); \
459 __ vqmovn(dt, sdt, dst.low(), src0); \
460 } \
461 } while (0)
462
463 #define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \
464 do { \
465 __ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(), \
466 i.InputSimd128Register(1).low()); \
467 __ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \
468 i.InputSimd128Register(1).high()); \
469 } while (0)
470
471 // If shift value is an immediate, we can call asm_imm, taking the shift value
472 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
473 // call vshl.
474 #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \
475 do { \
476 QwNeonRegister dst = i.OutputSimd128Register(); \
477 QwNeonRegister src = i.InputSimd128Register(0); \
478 if (instr->InputAt(1)->IsImmediate()) { \
479 __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
480 } else { \
481 UseScratchRegisterScope temps(tasm()); \
482 Simd128Register tmp = temps.AcquireQ(); \
483 Register shift = temps.Acquire(); \
484 constexpr int mask = (1 << width) - 1; \
485 __ and_(shift, i.InputRegister(1), Operand(mask)); \
486 __ vdup(sz, tmp, shift); \
487 __ vshl(dt, dst, src, tmp); \
488 } \
489 } while (0)
490
491 // If shift value is an immediate, we can call asm_imm, taking the shift value
492 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
493 // call vshl, passing in the negative shift value (treated as a right shift).
494 #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \
495 do { \
496 QwNeonRegister dst = i.OutputSimd128Register(); \
497 QwNeonRegister src = i.InputSimd128Register(0); \
498 if (instr->InputAt(1)->IsImmediate()) { \
499 __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
500 } else { \
501 UseScratchRegisterScope temps(tasm()); \
502 Simd128Register tmp = temps.AcquireQ(); \
503 Register shift = temps.Acquire(); \
504 constexpr int mask = (1 << width) - 1; \
505 __ and_(shift, i.InputRegister(1), Operand(mask)); \
506 __ vdup(sz, tmp, shift); \
507 __ vneg(sz, tmp, tmp); \
508 __ vshl(dt, dst, src, tmp); \
509 } \
510 } while (0)
511
AssembleDeconstructFrame()512 void CodeGenerator::AssembleDeconstructFrame() {
513 __ LeaveFrame(StackFrame::MANUAL);
514 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
515 }
516
AssemblePrepareTailCall()517 void CodeGenerator::AssemblePrepareTailCall() {
518 if (frame_access_state()->has_frame()) {
519 __ ldm(ia, fp, {lr, fp});
520 }
521 frame_access_state()->SetFrameAccessToSP();
522 }
523
524 namespace {
525
FlushPendingPushRegisters(TurboAssembler* tasm, FrameAccessState* frame_access_state, ZoneVector<Register>* pending_pushes)526 void FlushPendingPushRegisters(TurboAssembler* tasm,
527 FrameAccessState* frame_access_state,
528 ZoneVector<Register>* pending_pushes) {
529 switch (pending_pushes->size()) {
530 case 0:
531 break;
532 case 1:
533 tasm->push((*pending_pushes)[0]);
534 break;
535 case 2:
536 tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
537 break;
538 case 3:
539 tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
540 (*pending_pushes)[2]);
541 break;
542 default:
543 UNREACHABLE();
544 }
545 frame_access_state->IncreaseSPDelta(pending_pushes->size());
546 pending_pushes->clear();
547 }
548
AdjustStackPointerForTailCall( TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp, ZoneVector<Register>* pending_pushes = nullptr, bool allow_shrinkage = true)549 void AdjustStackPointerForTailCall(
550 TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
551 ZoneVector<Register>* pending_pushes = nullptr,
552 bool allow_shrinkage = true) {
553 int current_sp_offset = state->GetSPToFPSlotCount() +
554 StandardFrameConstants::kFixedSlotCountAboveFp;
555 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
556 if (stack_slot_delta > 0) {
557 if (pending_pushes != nullptr) {
558 FlushPendingPushRegisters(tasm, state, pending_pushes);
559 }
560 tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
561 state->IncreaseSPDelta(stack_slot_delta);
562 } else if (allow_shrinkage && stack_slot_delta < 0) {
563 if (pending_pushes != nullptr) {
564 FlushPendingPushRegisters(tasm, state, pending_pushes);
565 }
566 tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
567 state->IncreaseSPDelta(stack_slot_delta);
568 }
569 }
570
571 #if DEBUG
VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter, const Instruction* instr, Register low, Register high)572 bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
573 const Instruction* instr, Register low,
574 Register high) {
575 DCHECK_GE(instr->OutputCount() + instr->TempCount(), 2);
576 if (instr->OutputCount() == 2) {
577 return (converter->OutputRegister(0) == low &&
578 converter->OutputRegister(1) == high);
579 }
580 if (instr->OutputCount() == 1) {
581 return (converter->OutputRegister(0) == low &&
582 converter->TempRegister(instr->TempCount() - 1) == high) ||
583 (converter->OutputRegister(0) == high &&
584 converter->TempRegister(instr->TempCount() - 1) == low);
585 }
586 DCHECK_EQ(instr->OutputCount(), 0);
587 return (converter->TempRegister(instr->TempCount() - 2) == low &&
588 converter->TempRegister(instr->TempCount() - 1) == high);
589 }
590 #endif
591
592 } // namespace
593
AssembleTailCallBeforeGap(Instruction* instr, int first_unused_slot_offset)594 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
595 int first_unused_slot_offset) {
596 ZoneVector<MoveOperands*> pushes(zone());
597 GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
598
599 if (!pushes.empty() &&
600 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
601 first_unused_slot_offset)) {
602 ArmOperandConverter g(this, instr);
603 ZoneVector<Register> pending_pushes(zone());
604 for (auto move : pushes) {
605 LocationOperand destination_location(
606 LocationOperand::cast(move->destination()));
607 InstructionOperand source(move->source());
608 AdjustStackPointerForTailCall(
609 tasm(), frame_access_state(),
610 destination_location.index() - pending_pushes.size(),
611 &pending_pushes);
612 // Pushes of non-register data types are not supported.
613 DCHECK(source.IsRegister());
614 LocationOperand source_location(LocationOperand::cast(source));
615 pending_pushes.push_back(source_location.GetRegister());
616 // TODO(arm): We can push more than 3 registers at once. Add support in
617 // the macro-assembler for pushing a list of registers.
618 if (pending_pushes.size() == 3) {
619 FlushPendingPushRegisters(tasm(), frame_access_state(),
620 &pending_pushes);
621 }
622 move->Eliminate();
623 }
624 FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
625 }
626 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
627 first_unused_slot_offset, nullptr, false);
628 }
629
AssembleTailCallAfterGap(Instruction* instr, int first_unused_slot_offset)630 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
631 int first_unused_slot_offset) {
632 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
633 first_unused_slot_offset);
634 }
635
636 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()637 void CodeGenerator::AssembleCodeStartRegisterCheck() {
638 UseScratchRegisterScope temps(tasm());
639 Register scratch = temps.Acquire();
640 __ ComputeCodeStartAddress(scratch);
641 __ cmp(scratch, kJavaScriptCallCodeStartRegister);
642 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
643 }
644
645 // Check if the code object is marked for deoptimization. If it is, then it
646 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
647 // to:
648 // 1. read from memory the word that contains that bit, which can be found in
649 // the flags in the referenced {CodeDataContainer} object;
650 // 2. test kMarkedForDeoptimizationBit in those flags; and
651 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()652 void CodeGenerator::BailoutIfDeoptimized() {
653 UseScratchRegisterScope temps(tasm());
654 Register scratch = temps.Acquire();
655 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
656 __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
657 __ ldr(scratch,
658 FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
659 __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
660 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
661 RelocInfo::CODE_TARGET, ne);
662 }
663
664 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction( Instruction* instr)665 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
666 Instruction* instr) {
667 ArmOperandConverter i(this, instr);
668
669 __ MaybeCheckConstPool();
670 InstructionCode opcode = instr->opcode();
671 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
672 switch (arch_opcode) {
673 case kArchCallCodeObject: {
674 if (instr->InputAt(0)->IsImmediate()) {
675 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
676 } else {
677 Register reg = i.InputRegister(0);
678 DCHECK_IMPLIES(
679 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
680 reg == kJavaScriptCallCodeStartRegister);
681 __ CallCodeObject(reg);
682 }
683 RecordCallPosition(instr);
684 DCHECK_EQ(LeaveCC, i.OutputSBit());
685 frame_access_state()->ClearSPDelta();
686 break;
687 }
688 case kArchCallBuiltinPointer: {
689 DCHECK(!instr->InputAt(0)->IsImmediate());
690 Register builtin_index = i.InputRegister(0);
691 __ CallBuiltinByIndex(builtin_index);
692 RecordCallPosition(instr);
693 frame_access_state()->ClearSPDelta();
694 break;
695 }
696 #if V8_ENABLE_WEBASSEMBLY
697 case kArchCallWasmFunction: {
698 if (instr->InputAt(0)->IsImmediate()) {
699 Constant constant = i.ToConstant(instr->InputAt(0));
700 Address wasm_code = static_cast<Address>(constant.ToInt32());
701 __ Call(wasm_code, constant.rmode());
702 } else {
703 __ Call(i.InputRegister(0));
704 }
705 RecordCallPosition(instr);
706 DCHECK_EQ(LeaveCC, i.OutputSBit());
707 frame_access_state()->ClearSPDelta();
708 break;
709 }
710 case kArchTailCallWasm: {
711 if (instr->InputAt(0)->IsImmediate()) {
712 Constant constant = i.ToConstant(instr->InputAt(0));
713 Address wasm_code = static_cast<Address>(constant.ToInt32());
714 __ Jump(wasm_code, constant.rmode());
715 } else {
716 __ Jump(i.InputRegister(0));
717 }
718 DCHECK_EQ(LeaveCC, i.OutputSBit());
719 unwinding_info_writer_.MarkBlockWillExit();
720 frame_access_state()->ClearSPDelta();
721 frame_access_state()->SetFrameAccessToDefault();
722 break;
723 }
724 #endif // V8_ENABLE_WEBASSEMBLY
725 case kArchTailCallCodeObject: {
726 if (instr->InputAt(0)->IsImmediate()) {
727 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
728 } else {
729 Register reg = i.InputRegister(0);
730 DCHECK_IMPLIES(
731 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
732 reg == kJavaScriptCallCodeStartRegister);
733 __ JumpCodeObject(reg);
734 }
735 DCHECK_EQ(LeaveCC, i.OutputSBit());
736 unwinding_info_writer_.MarkBlockWillExit();
737 frame_access_state()->ClearSPDelta();
738 frame_access_state()->SetFrameAccessToDefault();
739 break;
740 }
741 case kArchTailCallAddress: {
742 CHECK(!instr->InputAt(0)->IsImmediate());
743 Register reg = i.InputRegister(0);
744 DCHECK_IMPLIES(
745 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
746 reg == kJavaScriptCallCodeStartRegister);
747 __ Jump(reg);
748 unwinding_info_writer_.MarkBlockWillExit();
749 frame_access_state()->ClearSPDelta();
750 frame_access_state()->SetFrameAccessToDefault();
751 break;
752 }
753 case kArchCallJSFunction: {
754 Register func = i.InputRegister(0);
755 if (FLAG_debug_code) {
756 UseScratchRegisterScope temps(tasm());
757 Register scratch = temps.Acquire();
758 // Check the function's context matches the context argument.
759 __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
760 __ cmp(cp, scratch);
761 __ Assert(eq, AbortReason::kWrongFunctionContext);
762 }
763 static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
764 __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
765 __ CallCodeObject(r2);
766 RecordCallPosition(instr);
767 DCHECK_EQ(LeaveCC, i.OutputSBit());
768 frame_access_state()->ClearSPDelta();
769 break;
770 }
771 case kArchPrepareCallCFunction: {
772 int const num_gp_parameters = ParamField::decode(instr->opcode());
773 int const num_fp_parameters = FPParamField::decode(instr->opcode());
774 __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters);
775 // Frame alignment requires using FP-relative frame addressing.
776 frame_access_state()->SetFrameAccessToFP();
777 break;
778 }
779 case kArchSaveCallerRegisters: {
780 fp_mode_ =
781 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
782 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
783 fp_mode_ == SaveFPRegsMode::kSave);
784 // kReturnRegister0 should have been saved before entering the stub.
785 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
786 DCHECK(IsAligned(bytes, kSystemPointerSize));
787 DCHECK_EQ(0, frame_access_state()->sp_delta());
788 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
789 DCHECK(!caller_registers_saved_);
790 caller_registers_saved_ = true;
791 break;
792 }
793 case kArchRestoreCallerRegisters: {
794 DCHECK(fp_mode_ ==
795 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
796 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
797 fp_mode_ == SaveFPRegsMode::kSave);
798 // Don't overwrite the returned value.
799 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
800 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
801 DCHECK_EQ(0, frame_access_state()->sp_delta());
802 DCHECK(caller_registers_saved_);
803 caller_registers_saved_ = false;
804 break;
805 }
806 case kArchPrepareTailCall:
807 AssemblePrepareTailCall();
808 break;
809 case kArchCallCFunction: {
810 int const num_parameters = MiscField::decode(instr->opcode());
811 #if V8_ENABLE_WEBASSEMBLY
812 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
813 // Put the current address in a stack slot, and record a safepoint on
814 // the same address. In most architectures, we record the address after
815 // the function call, but this works too as long as the address in the
816 // frame and safepoint table match.
817 __ str(pc, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
818 // In Arm, the pc points two instructions after the currently executing
819 // instruction: see https://bit.ly/3CD80OA. To line up the safepoint
820 // address with the stored pc, we add a nop here.
821 __ nop();
822 RecordSafepoint(instr->reference_map());
823 }
824 #endif // V8_ENABLE_WEBASSEMBLY
825 if (instr->InputAt(0)->IsImmediate()) {
826 ExternalReference ref = i.InputExternalReference(0);
827 __ CallCFunction(ref, num_parameters);
828 } else {
829 Register func = i.InputRegister(0);
830 __ CallCFunction(func, num_parameters);
831 }
832 frame_access_state()->SetFrameAccessToDefault();
833 // Ideally, we should decrement SP delta to match the change of stack
834 // pointer in CallCFunction. However, for certain architectures (e.g.
835 // ARM), there may be more strict alignment requirement, causing old SP
836 // to be saved on the stack. In those cases, we can not calculate the SP
837 // delta statically.
838 frame_access_state()->ClearSPDelta();
839 if (caller_registers_saved_) {
840 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
841 // Here, we assume the sequence to be:
842 // kArchSaveCallerRegisters;
843 // kArchCallCFunction;
844 // kArchRestoreCallerRegisters;
845 int bytes =
846 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
847 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
848 }
849 break;
850 }
851 case kArchJmp:
852 AssembleArchJump(i.InputRpo(0));
853 DCHECK_EQ(LeaveCC, i.OutputSBit());
854 break;
855 case kArchBinarySearchSwitch:
856 AssembleArchBinarySearchSwitch(instr);
857 break;
858 case kArchTableSwitch:
859 AssembleArchTableSwitch(instr);
860 DCHECK_EQ(LeaveCC, i.OutputSBit());
861 break;
862 case kArchAbortCSADcheck:
863 DCHECK(i.InputRegister(0) == r1);
864 {
865 // We don't actually want to generate a pile of code for this, so just
866 // claim there is a stack frame, without generating one.
867 FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
868 __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
869 RelocInfo::CODE_TARGET);
870 }
871 __ stop();
872 unwinding_info_writer_.MarkBlockWillExit();
873 break;
874 case kArchDebugBreak:
875 __ DebugBreak();
876 break;
877 case kArchComment:
878 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
879 break;
880 case kArchThrowTerminator:
881 DCHECK_EQ(LeaveCC, i.OutputSBit());
882 unwinding_info_writer_.MarkBlockWillExit();
883 break;
884 case kArchNop:
885 // don't emit code for nops.
886 DCHECK_EQ(LeaveCC, i.OutputSBit());
887 break;
888 case kArchDeoptimize: {
889 DeoptimizationExit* exit =
890 BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
891 __ b(exit->label());
892 break;
893 }
894 case kArchRet:
895 AssembleReturn(instr->InputAt(0));
896 DCHECK_EQ(LeaveCC, i.OutputSBit());
897 break;
898 case kArchFramePointer:
899 __ mov(i.OutputRegister(), fp);
900 DCHECK_EQ(LeaveCC, i.OutputSBit());
901 break;
902 case kArchParentFramePointer:
903 if (frame_access_state()->has_frame()) {
904 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
905 } else {
906 __ mov(i.OutputRegister(), fp);
907 }
908 break;
909 case kArchStackPointerGreaterThan: {
910 // Potentially apply an offset to the current stack pointer before the
911 // comparison to consider the size difference of an optimized frame versus
912 // the contained unoptimized frames.
913
914 Register lhs_register = sp;
915 uint32_t offset;
916
917 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
918 lhs_register = i.TempRegister(0);
919 __ sub(lhs_register, sp, Operand(offset));
920 }
921
922 constexpr size_t kValueIndex = 0;
923 DCHECK(instr->InputAt(kValueIndex)->IsRegister());
924 __ cmp(lhs_register, i.InputRegister(kValueIndex));
925 break;
926 }
927 case kArchStackCheckOffset:
928 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
929 break;
930 case kArchTruncateDoubleToI:
931 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
932 i.InputDoubleRegister(0), DetermineStubCallMode());
933 DCHECK_EQ(LeaveCC, i.OutputSBit());
934 break;
935 case kArchStoreWithWriteBarrier: // Fall through.
936 case kArchAtomicStoreWithWriteBarrier: {
937 RecordWriteMode mode;
938 if (arch_opcode == kArchStoreWithWriteBarrier) {
939 mode = static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
940 } else {
941 mode = AtomicStoreRecordWriteModeField::decode(instr->opcode());
942 }
943 Register object = i.InputRegister(0);
944 Register value = i.InputRegister(2);
945
946 if (FLAG_debug_code) {
947 // Checking that |value| is not a cleared weakref: our write barrier
948 // does not support that for now.
949 __ cmp(value, Operand(kClearedWeakHeapObjectLower32));
950 __ Check(ne, AbortReason::kOperandIsCleared);
951 }
952
953 AddressingMode addressing_mode =
954 AddressingModeField::decode(instr->opcode());
955 Operand offset(0);
956
957 if (arch_opcode == kArchAtomicStoreWithWriteBarrier) {
958 __ dmb(ISH);
959 }
960 if (addressing_mode == kMode_Offset_RI) {
961 int32_t immediate = i.InputInt32(1);
962 offset = Operand(immediate);
963 __ str(value, MemOperand(object, immediate));
964 } else {
965 DCHECK_EQ(kMode_Offset_RR, addressing_mode);
966 Register reg = i.InputRegister(1);
967 offset = Operand(reg);
968 __ str(value, MemOperand(object, reg));
969 }
970 if (arch_opcode == kArchAtomicStoreWithWriteBarrier &&
971 AtomicMemoryOrderField::decode(instr->opcode()) ==
972 AtomicMemoryOrder::kSeqCst) {
973 __ dmb(ISH);
974 }
975
976 auto ool = zone()->New<OutOfLineRecordWrite>(
977 this, object, offset, value, mode, DetermineStubCallMode(),
978 &unwinding_info_writer_);
979 if (mode > RecordWriteMode::kValueIsPointer) {
980 __ JumpIfSmi(value, ool->exit());
981 }
982 __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
983 ne, ool->entry());
984 __ bind(ool->exit());
985 break;
986 }
987 case kArchStackSlot: {
988 FrameOffset offset =
989 frame_access_state()->GetFrameOffset(i.InputInt32(0));
990 Register base = offset.from_stack_pointer() ? sp : fp;
991 __ add(i.OutputRegister(0), base, Operand(offset.offset()));
992 break;
993 }
994 case kIeee754Float64Acos:
995 ASSEMBLE_IEEE754_UNOP(acos);
996 break;
997 case kIeee754Float64Acosh:
998 ASSEMBLE_IEEE754_UNOP(acosh);
999 break;
1000 case kIeee754Float64Asin:
1001 ASSEMBLE_IEEE754_UNOP(asin);
1002 break;
1003 case kIeee754Float64Asinh:
1004 ASSEMBLE_IEEE754_UNOP(asinh);
1005 break;
1006 case kIeee754Float64Atan:
1007 ASSEMBLE_IEEE754_UNOP(atan);
1008 break;
1009 case kIeee754Float64Atanh:
1010 ASSEMBLE_IEEE754_UNOP(atanh);
1011 break;
1012 case kIeee754Float64Atan2:
1013 ASSEMBLE_IEEE754_BINOP(atan2);
1014 break;
1015 case kIeee754Float64Cbrt:
1016 ASSEMBLE_IEEE754_UNOP(cbrt);
1017 break;
1018 case kIeee754Float64Cos:
1019 ASSEMBLE_IEEE754_UNOP(cos);
1020 break;
1021 case kIeee754Float64Cosh:
1022 ASSEMBLE_IEEE754_UNOP(cosh);
1023 break;
1024 case kIeee754Float64Exp:
1025 ASSEMBLE_IEEE754_UNOP(exp);
1026 break;
1027 case kIeee754Float64Expm1:
1028 ASSEMBLE_IEEE754_UNOP(expm1);
1029 break;
1030 case kIeee754Float64Log:
1031 ASSEMBLE_IEEE754_UNOP(log);
1032 break;
1033 case kIeee754Float64Log1p:
1034 ASSEMBLE_IEEE754_UNOP(log1p);
1035 break;
1036 case kIeee754Float64Log2:
1037 ASSEMBLE_IEEE754_UNOP(log2);
1038 break;
1039 case kIeee754Float64Log10:
1040 ASSEMBLE_IEEE754_UNOP(log10);
1041 break;
1042 case kIeee754Float64Pow:
1043 ASSEMBLE_IEEE754_BINOP(pow);
1044 break;
1045 case kIeee754Float64Sin:
1046 ASSEMBLE_IEEE754_UNOP(sin);
1047 break;
1048 case kIeee754Float64Sinh:
1049 ASSEMBLE_IEEE754_UNOP(sinh);
1050 break;
1051 case kIeee754Float64Tan:
1052 ASSEMBLE_IEEE754_UNOP(tan);
1053 break;
1054 case kIeee754Float64Tanh:
1055 ASSEMBLE_IEEE754_UNOP(tanh);
1056 break;
1057 case kArmAdd:
1058 __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1059 i.OutputSBit());
1060 break;
1061 case kArmAnd:
1062 __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1063 i.OutputSBit());
1064 break;
1065 case kArmBic:
1066 __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1067 i.OutputSBit());
1068 break;
1069 case kArmMul:
1070 __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1071 i.OutputSBit());
1072 break;
1073 case kArmMla:
1074 __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1075 i.InputRegister(2), i.OutputSBit());
1076 break;
1077 case kArmMls: {
1078 CpuFeatureScope scope(tasm(), ARMv7);
1079 __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1080 i.InputRegister(2));
1081 DCHECK_EQ(LeaveCC, i.OutputSBit());
1082 break;
1083 }
1084 case kArmSmull:
1085 __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1086 i.InputRegister(1));
1087 break;
1088 case kArmSmmul:
1089 __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1090 DCHECK_EQ(LeaveCC, i.OutputSBit());
1091 break;
1092 case kArmSmmla:
1093 __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1094 i.InputRegister(2));
1095 DCHECK_EQ(LeaveCC, i.OutputSBit());
1096 break;
1097 case kArmUmull:
1098 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1099 i.InputRegister(1), i.OutputSBit());
1100 break;
1101 case kArmSdiv: {
1102 CpuFeatureScope scope(tasm(), SUDIV);
1103 __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1104 DCHECK_EQ(LeaveCC, i.OutputSBit());
1105 break;
1106 }
1107 case kArmUdiv: {
1108 CpuFeatureScope scope(tasm(), SUDIV);
1109 __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1110 DCHECK_EQ(LeaveCC, i.OutputSBit());
1111 break;
1112 }
1113 case kArmMov:
1114 __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1115 break;
1116 case kArmMvn:
1117 __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1118 break;
1119 case kArmOrr:
1120 __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1121 i.OutputSBit());
1122 break;
1123 case kArmEor:
1124 __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1125 i.OutputSBit());
1126 break;
1127 case kArmSub:
1128 __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1129 i.OutputSBit());
1130 break;
1131 case kArmRsb:
1132 __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1133 i.OutputSBit());
1134 break;
1135 case kArmBfc: {
1136 CpuFeatureScope scope(tasm(), ARMv7);
1137 __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1138 DCHECK_EQ(LeaveCC, i.OutputSBit());
1139 break;
1140 }
1141 case kArmUbfx: {
1142 CpuFeatureScope scope(tasm(), ARMv7);
1143 __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1144 i.InputInt8(2));
1145 DCHECK_EQ(LeaveCC, i.OutputSBit());
1146 break;
1147 }
1148 case kArmSbfx: {
1149 CpuFeatureScope scope(tasm(), ARMv7);
1150 __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1151 i.InputInt8(2));
1152 DCHECK_EQ(LeaveCC, i.OutputSBit());
1153 break;
1154 }
1155 case kArmSxtb:
1156 __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1157 DCHECK_EQ(LeaveCC, i.OutputSBit());
1158 break;
1159 case kArmSxth:
1160 __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1161 DCHECK_EQ(LeaveCC, i.OutputSBit());
1162 break;
1163 case kArmSxtab:
1164 __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1165 i.InputInt32(2));
1166 DCHECK_EQ(LeaveCC, i.OutputSBit());
1167 break;
1168 case kArmSxtah:
1169 __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1170 i.InputInt32(2));
1171 DCHECK_EQ(LeaveCC, i.OutputSBit());
1172 break;
1173 case kArmUxtb:
1174 __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1175 DCHECK_EQ(LeaveCC, i.OutputSBit());
1176 break;
1177 case kArmUxth:
1178 __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1179 DCHECK_EQ(LeaveCC, i.OutputSBit());
1180 break;
1181 case kArmUxtab:
1182 __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1183 i.InputInt32(2));
1184 DCHECK_EQ(LeaveCC, i.OutputSBit());
1185 break;
1186 case kArmUxtah:
1187 __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1188 i.InputInt32(2));
1189 DCHECK_EQ(LeaveCC, i.OutputSBit());
1190 break;
1191 case kArmRbit: {
1192 CpuFeatureScope scope(tasm(), ARMv7);
1193 __ rbit(i.OutputRegister(), i.InputRegister(0));
1194 DCHECK_EQ(LeaveCC, i.OutputSBit());
1195 break;
1196 }
1197 case kArmRev:
1198 __ rev(i.OutputRegister(), i.InputRegister(0));
1199 DCHECK_EQ(LeaveCC, i.OutputSBit());
1200 break;
1201 case kArmClz:
1202 __ clz(i.OutputRegister(), i.InputRegister(0));
1203 DCHECK_EQ(LeaveCC, i.OutputSBit());
1204 break;
1205 case kArmCmp:
1206 __ cmp(i.InputRegister(0), i.InputOperand2(1));
1207 DCHECK_EQ(SetCC, i.OutputSBit());
1208 break;
1209 case kArmCmn:
1210 __ cmn(i.InputRegister(0), i.InputOperand2(1));
1211 DCHECK_EQ(SetCC, i.OutputSBit());
1212 break;
1213 case kArmTst:
1214 __ tst(i.InputRegister(0), i.InputOperand2(1));
1215 DCHECK_EQ(SetCC, i.OutputSBit());
1216 break;
1217 case kArmTeq:
1218 __ teq(i.InputRegister(0), i.InputOperand2(1));
1219 DCHECK_EQ(SetCC, i.OutputSBit());
1220 break;
1221 case kArmAddPair:
1222 // i.InputRegister(0) ... left low word.
1223 // i.InputRegister(1) ... left high word.
1224 // i.InputRegister(2) ... right low word.
1225 // i.InputRegister(3) ... right high word.
1226 __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1227 SBit::SetCC);
1228 __ adc(i.OutputRegister(1), i.InputRegister(1),
1229 Operand(i.InputRegister(3)));
1230 DCHECK_EQ(LeaveCC, i.OutputSBit());
1231 break;
1232 case kArmSubPair:
1233 // i.InputRegister(0) ... left low word.
1234 // i.InputRegister(1) ... left high word.
1235 // i.InputRegister(2) ... right low word.
1236 // i.InputRegister(3) ... right high word.
1237 __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1238 SBit::SetCC);
1239 __ sbc(i.OutputRegister(1), i.InputRegister(1),
1240 Operand(i.InputRegister(3)));
1241 DCHECK_EQ(LeaveCC, i.OutputSBit());
1242 break;
1243 case kArmMulPair:
1244 // i.InputRegister(0) ... left low word.
1245 // i.InputRegister(1) ... left high word.
1246 // i.InputRegister(2) ... right low word.
1247 // i.InputRegister(3) ... right high word.
1248 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1249 i.InputRegister(2));
1250 __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1251 i.OutputRegister(1));
1252 __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1253 i.OutputRegister(1));
1254 break;
1255 case kArmLslPair: {
1256 Register second_output =
1257 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1258 if (instr->InputAt(2)->IsImmediate()) {
1259 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1260 i.InputRegister(1), i.InputInt32(2));
1261 } else {
1262 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1263 i.InputRegister(1), i.InputRegister(2));
1264 }
1265 break;
1266 }
1267 case kArmLsrPair: {
1268 Register second_output =
1269 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1270 if (instr->InputAt(2)->IsImmediate()) {
1271 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1272 i.InputRegister(1), i.InputInt32(2));
1273 } else {
1274 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1275 i.InputRegister(1), i.InputRegister(2));
1276 }
1277 break;
1278 }
1279 case kArmAsrPair: {
1280 Register second_output =
1281 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1282 if (instr->InputAt(2)->IsImmediate()) {
1283 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1284 i.InputRegister(1), i.InputInt32(2));
1285 } else {
1286 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1287 i.InputRegister(1), i.InputRegister(2));
1288 }
1289 break;
1290 }
1291 case kArmVcmpF32:
1292 if (instr->InputAt(1)->IsFPRegister()) {
1293 __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1294 i.InputFloatRegister(1));
1295 } else {
1296 DCHECK(instr->InputAt(1)->IsImmediate());
1297 // 0.0 is the only immediate supported by vcmp instructions.
1298 DCHECK_EQ(0.0f, i.InputFloat32(1));
1299 __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1300 }
1301 DCHECK_EQ(SetCC, i.OutputSBit());
1302 break;
1303 case kArmVaddF32:
1304 __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1305 i.InputFloatRegister(1));
1306 DCHECK_EQ(LeaveCC, i.OutputSBit());
1307 break;
1308 case kArmVsubF32:
1309 __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1310 i.InputFloatRegister(1));
1311 DCHECK_EQ(LeaveCC, i.OutputSBit());
1312 break;
1313 case kArmVmulF32:
1314 __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1315 i.InputFloatRegister(1));
1316 DCHECK_EQ(LeaveCC, i.OutputSBit());
1317 break;
1318 case kArmVmlaF32:
1319 __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1320 i.InputFloatRegister(2));
1321 DCHECK_EQ(LeaveCC, i.OutputSBit());
1322 break;
1323 case kArmVmlsF32:
1324 __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1325 i.InputFloatRegister(2));
1326 DCHECK_EQ(LeaveCC, i.OutputSBit());
1327 break;
1328 case kArmVdivF32:
1329 __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1330 i.InputFloatRegister(1));
1331 DCHECK_EQ(LeaveCC, i.OutputSBit());
1332 break;
1333 case kArmVsqrtF32:
1334 __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1335 break;
1336 case kArmVabsF32:
1337 __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1338 break;
1339 case kArmVnegF32:
1340 __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1341 break;
1342 case kArmVcmpF64:
1343 if (instr->InputAt(1)->IsFPRegister()) {
1344 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1345 i.InputDoubleRegister(1));
1346 } else {
1347 DCHECK(instr->InputAt(1)->IsImmediate());
1348 // 0.0 is the only immediate supported by vcmp instructions.
1349 DCHECK_EQ(0.0, i.InputDouble(1));
1350 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1351 }
1352 DCHECK_EQ(SetCC, i.OutputSBit());
1353 break;
1354 case kArmVaddF64:
1355 __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1356 i.InputDoubleRegister(1));
1357 DCHECK_EQ(LeaveCC, i.OutputSBit());
1358 break;
1359 case kArmVsubF64:
1360 __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1361 i.InputDoubleRegister(1));
1362 DCHECK_EQ(LeaveCC, i.OutputSBit());
1363 break;
1364 case kArmVmulF64:
1365 __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1366 i.InputDoubleRegister(1));
1367 DCHECK_EQ(LeaveCC, i.OutputSBit());
1368 break;
1369 case kArmVmlaF64:
1370 __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1371 i.InputDoubleRegister(2));
1372 DCHECK_EQ(LeaveCC, i.OutputSBit());
1373 break;
1374 case kArmVmlsF64:
1375 __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1376 i.InputDoubleRegister(2));
1377 DCHECK_EQ(LeaveCC, i.OutputSBit());
1378 break;
1379 case kArmVdivF64:
1380 __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1381 i.InputDoubleRegister(1));
1382 DCHECK_EQ(LeaveCC, i.OutputSBit());
1383 break;
1384 case kArmVmodF64: {
1385 // TODO(bmeurer): We should really get rid of this special instruction,
1386 // and generate a CallAddress instruction instead.
1387 FrameScope scope(tasm(), StackFrame::MANUAL);
1388 __ PrepareCallCFunction(0, 2);
1389 __ MovToFloatParameters(i.InputDoubleRegister(0),
1390 i.InputDoubleRegister(1));
1391 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1392 // Move the result in the double result register.
1393 __ MovFromFloatResult(i.OutputDoubleRegister());
1394 DCHECK_EQ(LeaveCC, i.OutputSBit());
1395 break;
1396 }
1397 case kArmVsqrtF64:
1398 __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1399 break;
1400 case kArmVabsF64:
1401 __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1402 break;
1403 case kArmVnegF64:
1404 __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1405 break;
1406 case kArmVrintmF32: {
1407 CpuFeatureScope scope(tasm(), ARMv8);
1408 if (instr->InputAt(0)->IsSimd128Register()) {
1409 __ vrintm(NeonS32, i.OutputSimd128Register(),
1410 i.InputSimd128Register(0));
1411 } else {
1412 __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1413 }
1414 break;
1415 }
1416 case kArmVrintmF64: {
1417 CpuFeatureScope scope(tasm(), ARMv8);
1418 __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1419 break;
1420 }
1421 case kArmVrintpF32: {
1422 CpuFeatureScope scope(tasm(), ARMv8);
1423 if (instr->InputAt(0)->IsSimd128Register()) {
1424 __ vrintp(NeonS32, i.OutputSimd128Register(),
1425 i.InputSimd128Register(0));
1426 } else {
1427 __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1428 }
1429 break;
1430 }
1431 case kArmVrintpF64: {
1432 CpuFeatureScope scope(tasm(), ARMv8);
1433 __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1434 break;
1435 }
1436 case kArmVrintzF32: {
1437 CpuFeatureScope scope(tasm(), ARMv8);
1438 if (instr->InputAt(0)->IsSimd128Register()) {
1439 __ vrintz(NeonS32, i.OutputSimd128Register(),
1440 i.InputSimd128Register(0));
1441 } else {
1442 __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1443 }
1444 break;
1445 }
1446 case kArmVrintzF64: {
1447 CpuFeatureScope scope(tasm(), ARMv8);
1448 __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1449 break;
1450 }
1451 case kArmVrintaF64: {
1452 CpuFeatureScope scope(tasm(), ARMv8);
1453 __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1454 break;
1455 }
1456 case kArmVrintnF32: {
1457 CpuFeatureScope scope(tasm(), ARMv8);
1458 if (instr->InputAt(0)->IsSimd128Register()) {
1459 __ vrintn(NeonS32, i.OutputSimd128Register(),
1460 i.InputSimd128Register(0));
1461 } else {
1462 __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1463 }
1464 break;
1465 }
1466 case kArmVrintnF64: {
1467 CpuFeatureScope scope(tasm(), ARMv8);
1468 __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1469 break;
1470 }
1471 case kArmVcvtF32F64: {
1472 __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1473 DCHECK_EQ(LeaveCC, i.OutputSBit());
1474 break;
1475 }
1476 case kArmVcvtF64F32: {
1477 __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1478 DCHECK_EQ(LeaveCC, i.OutputSBit());
1479 break;
1480 }
1481 case kArmVcvtF32S32: {
1482 UseScratchRegisterScope temps(tasm());
1483 SwVfpRegister scratch = temps.AcquireS();
1484 __ vmov(scratch, i.InputRegister(0));
1485 __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1486 DCHECK_EQ(LeaveCC, i.OutputSBit());
1487 break;
1488 }
1489 case kArmVcvtF32U32: {
1490 UseScratchRegisterScope temps(tasm());
1491 SwVfpRegister scratch = temps.AcquireS();
1492 __ vmov(scratch, i.InputRegister(0));
1493 __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1494 DCHECK_EQ(LeaveCC, i.OutputSBit());
1495 break;
1496 }
1497 case kArmVcvtF64S32: {
1498 UseScratchRegisterScope temps(tasm());
1499 SwVfpRegister scratch = temps.AcquireS();
1500 __ vmov(scratch, i.InputRegister(0));
1501 __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1502 DCHECK_EQ(LeaveCC, i.OutputSBit());
1503 break;
1504 }
1505 case kArmVcvtF64U32: {
1506 UseScratchRegisterScope temps(tasm());
1507 SwVfpRegister scratch = temps.AcquireS();
1508 __ vmov(scratch, i.InputRegister(0));
1509 __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1510 DCHECK_EQ(LeaveCC, i.OutputSBit());
1511 break;
1512 }
1513 case kArmVcvtS32F32: {
1514 UseScratchRegisterScope temps(tasm());
1515 SwVfpRegister scratch = temps.AcquireS();
1516 __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1517 __ vmov(i.OutputRegister(), scratch);
1518 bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1519 if (set_overflow_to_min_i32) {
1520 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1521 // because INT32_MIN allows easier out-of-bounds detection.
1522 __ cmn(i.OutputRegister(), Operand(1));
1523 __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
1524 }
1525 DCHECK_EQ(LeaveCC, i.OutputSBit());
1526 break;
1527 }
1528 case kArmVcvtU32F32: {
1529 UseScratchRegisterScope temps(tasm());
1530 SwVfpRegister scratch = temps.AcquireS();
1531 __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1532 __ vmov(i.OutputRegister(), scratch);
1533 bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1534 if (set_overflow_to_min_u32) {
1535 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1536 // because 0 allows easier out-of-bounds detection.
1537 __ cmn(i.OutputRegister(), Operand(1));
1538 __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1539 }
1540 DCHECK_EQ(LeaveCC, i.OutputSBit());
1541 break;
1542 }
1543 case kArmVcvtS32F64: {
1544 UseScratchRegisterScope temps(tasm());
1545 SwVfpRegister scratch = temps.AcquireS();
1546 __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1547 __ vmov(i.OutputRegister(), scratch);
1548 DCHECK_EQ(LeaveCC, i.OutputSBit());
1549 break;
1550 }
1551 case kArmVcvtU32F64: {
1552 UseScratchRegisterScope temps(tasm());
1553 SwVfpRegister scratch = temps.AcquireS();
1554 __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1555 __ vmov(i.OutputRegister(), scratch);
1556 DCHECK_EQ(LeaveCC, i.OutputSBit());
1557 break;
1558 }
1559 case kArmVmovU32F32:
1560 __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1561 DCHECK_EQ(LeaveCC, i.OutputSBit());
1562 break;
1563 case kArmVmovF32U32:
1564 __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1565 DCHECK_EQ(LeaveCC, i.OutputSBit());
1566 break;
1567 case kArmVmovLowU32F64:
1568 __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1569 DCHECK_EQ(LeaveCC, i.OutputSBit());
1570 break;
1571 case kArmVmovLowF64U32:
1572 __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1573 DCHECK_EQ(LeaveCC, i.OutputSBit());
1574 break;
1575 case kArmVmovHighU32F64:
1576 __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1577 DCHECK_EQ(LeaveCC, i.OutputSBit());
1578 break;
1579 case kArmVmovHighF64U32:
1580 __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1581 DCHECK_EQ(LeaveCC, i.OutputSBit());
1582 break;
1583 case kArmVmovF64U32U32:
1584 __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1585 DCHECK_EQ(LeaveCC, i.OutputSBit());
1586 break;
1587 case kArmVmovU32U32F64:
1588 __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1589 i.InputDoubleRegister(0));
1590 DCHECK_EQ(LeaveCC, i.OutputSBit());
1591 break;
1592 case kArmVcnt: {
1593 __ vcnt(i.OutputSimd128Register(), i.InputSimd128Register(0));
1594 break;
1595 }
1596 case kArmLdrb:
1597 __ ldrb(i.OutputRegister(), i.InputOffset());
1598 DCHECK_EQ(LeaveCC, i.OutputSBit());
1599 break;
1600 case kArmLdrsb:
1601 __ ldrsb(i.OutputRegister(), i.InputOffset());
1602 DCHECK_EQ(LeaveCC, i.OutputSBit());
1603 break;
1604 case kArmStrb:
1605 __ strb(i.InputRegister(0), i.InputOffset(1));
1606 DCHECK_EQ(LeaveCC, i.OutputSBit());
1607 break;
1608 case kArmLdrh:
1609 __ ldrh(i.OutputRegister(), i.InputOffset());
1610 break;
1611 case kArmLdrsh:
1612 __ ldrsh(i.OutputRegister(), i.InputOffset());
1613 break;
1614 case kArmStrh:
1615 __ strh(i.InputRegister(0), i.InputOffset(1));
1616 DCHECK_EQ(LeaveCC, i.OutputSBit());
1617 break;
1618 case kArmLdr:
1619 __ ldr(i.OutputRegister(), i.InputOffset());
1620 break;
1621 case kArmStr:
1622 __ str(i.InputRegister(0), i.InputOffset(1));
1623 DCHECK_EQ(LeaveCC, i.OutputSBit());
1624 break;
1625 case kArmVldrF32: {
1626 __ vldr(i.OutputFloatRegister(), i.InputOffset());
1627 DCHECK_EQ(LeaveCC, i.OutputSBit());
1628 break;
1629 }
1630 case kArmVstrF32:
1631 __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1632 DCHECK_EQ(LeaveCC, i.OutputSBit());
1633 break;
1634 case kArmVld1F64: {
1635 __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1636 i.NeonInputOperand(0));
1637 break;
1638 }
1639 case kArmVst1F64: {
1640 __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1641 i.NeonInputOperand(1));
1642 break;
1643 }
1644 case kArmVld1S128: {
1645 __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1646 i.NeonInputOperand(0));
1647 break;
1648 }
1649 case kArmVst1S128: {
1650 __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1651 i.NeonInputOperand(1));
1652 break;
1653 }
1654 case kArmVldrF64: {
1655 __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1656 DCHECK_EQ(LeaveCC, i.OutputSBit());
1657 break;
1658 }
1659 case kArmVstrF64:
1660 __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1661 DCHECK_EQ(LeaveCC, i.OutputSBit());
1662 break;
1663 case kArmFloat32Max: {
1664 SwVfpRegister result = i.OutputFloatRegister();
1665 SwVfpRegister left = i.InputFloatRegister(0);
1666 SwVfpRegister right = i.InputFloatRegister(1);
1667 if (left == right) {
1668 __ Move(result, left);
1669 } else {
1670 auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right);
1671 __ FloatMax(result, left, right, ool->entry());
1672 __ bind(ool->exit());
1673 }
1674 DCHECK_EQ(LeaveCC, i.OutputSBit());
1675 break;
1676 }
1677 case kArmFloat64Max: {
1678 DwVfpRegister result = i.OutputDoubleRegister();
1679 DwVfpRegister left = i.InputDoubleRegister(0);
1680 DwVfpRegister right = i.InputDoubleRegister(1);
1681 if (left == right) {
1682 __ Move(result, left);
1683 } else {
1684 auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right);
1685 __ FloatMax(result, left, right, ool->entry());
1686 __ bind(ool->exit());
1687 }
1688 DCHECK_EQ(LeaveCC, i.OutputSBit());
1689 break;
1690 }
1691 case kArmFloat32Min: {
1692 SwVfpRegister result = i.OutputFloatRegister();
1693 SwVfpRegister left = i.InputFloatRegister(0);
1694 SwVfpRegister right = i.InputFloatRegister(1);
1695 if (left == right) {
1696 __ Move(result, left);
1697 } else {
1698 auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right);
1699 __ FloatMin(result, left, right, ool->entry());
1700 __ bind(ool->exit());
1701 }
1702 DCHECK_EQ(LeaveCC, i.OutputSBit());
1703 break;
1704 }
1705 case kArmFloat64Min: {
1706 DwVfpRegister result = i.OutputDoubleRegister();
1707 DwVfpRegister left = i.InputDoubleRegister(0);
1708 DwVfpRegister right = i.InputDoubleRegister(1);
1709 if (left == right) {
1710 __ Move(result, left);
1711 } else {
1712 auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right);
1713 __ FloatMin(result, left, right, ool->entry());
1714 __ bind(ool->exit());
1715 }
1716 DCHECK_EQ(LeaveCC, i.OutputSBit());
1717 break;
1718 }
1719 case kArmFloat64SilenceNaN: {
1720 DwVfpRegister value = i.InputDoubleRegister(0);
1721 DwVfpRegister result = i.OutputDoubleRegister();
1722 __ VFPCanonicalizeNaN(result, value);
1723 break;
1724 }
1725 case kArmPush: {
1726 int stack_decrement = i.InputInt32(0);
1727 int slots = stack_decrement / kSystemPointerSize;
1728 LocationOperand* op = LocationOperand::cast(instr->InputAt(1));
1729 MachineRepresentation rep = op->representation();
1730 int pushed_slots = ElementSizeInPointers(rep);
1731 // Slot-sized arguments are never padded but there may be a gap if
1732 // the slot allocator reclaimed other padding slots. Adjust the stack
1733 // here to skip any gap.
1734 __ AllocateStackSpace((slots - pushed_slots) * kSystemPointerSize);
1735 switch (rep) {
1736 case MachineRepresentation::kFloat32:
1737 __ vpush(i.InputFloatRegister(1));
1738 break;
1739 case MachineRepresentation::kFloat64:
1740 __ vpush(i.InputDoubleRegister(1));
1741 break;
1742 case MachineRepresentation::kSimd128:
1743 __ vpush(i.InputSimd128Register(1));
1744 break;
1745 default:
1746 __ push(i.InputRegister(1));
1747 break;
1748 }
1749 frame_access_state()->IncreaseSPDelta(slots);
1750 DCHECK_EQ(LeaveCC, i.OutputSBit());
1751 break;
1752 }
1753 case kArmPoke: {
1754 int const slot = MiscField::decode(instr->opcode());
1755 __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize));
1756 DCHECK_EQ(LeaveCC, i.OutputSBit());
1757 break;
1758 }
1759 case kArmPeek: {
1760 int reverse_slot = i.InputInt32(0);
1761 int offset =
1762 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1763 if (instr->OutputAt(0)->IsFPRegister()) {
1764 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1765 if (op->representation() == MachineRepresentation::kFloat64) {
1766 __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1767 } else if (op->representation() == MachineRepresentation::kFloat32) {
1768 __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1769 } else {
1770 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1771 UseScratchRegisterScope temps(tasm());
1772 Register scratch = temps.Acquire();
1773 __ add(scratch, fp, Operand(offset));
1774 __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1775 NeonMemOperand(scratch));
1776 }
1777 } else {
1778 __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1779 }
1780 break;
1781 }
1782 case kArmDmbIsh: {
1783 __ dmb(ISH);
1784 break;
1785 }
1786 case kArmDsbIsb: {
1787 __ dsb(SY);
1788 __ isb(SY);
1789 break;
1790 }
1791 case kArmVmullLow: {
1792 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1793 __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).low(),
1794 i.InputSimd128Register(1).low());
1795 break;
1796 }
1797 case kArmVmullHigh: {
1798 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1799 __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).high(),
1800 i.InputSimd128Register(1).high());
1801 break;
1802 }
1803 case kArmVpadal: {
1804 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1805 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1806 __ vpadal(dt, i.OutputSimd128Register(), i.InputSimd128Register(1));
1807 break;
1808 }
1809 case kArmVpaddl: {
1810 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1811 __ vpaddl(dt, i.OutputSimd128Register(), i.InputSimd128Register(0));
1812 break;
1813 }
1814 case kArmF64x2Splat: {
1815 Simd128Register dst = i.OutputSimd128Register();
1816 DoubleRegister src = i.InputDoubleRegister(0);
1817 __ Move(dst.low(), src);
1818 __ Move(dst.high(), src);
1819 break;
1820 }
1821 case kArmF64x2ExtractLane: {
1822 __ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
1823 i.InputInt8(1));
1824 break;
1825 }
1826 case kArmF64x2ReplaceLane: {
1827 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1828 i.InputDoubleRegister(2), i.InputInt8(1));
1829 break;
1830 }
1831 case kArmF64x2Abs: {
1832 __ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1833 __ vabs(i.OutputSimd128Register().high(),
1834 i.InputSimd128Register(0).high());
1835 break;
1836 }
1837 case kArmF64x2Neg: {
1838 __ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1839 __ vneg(i.OutputSimd128Register().high(),
1840 i.InputSimd128Register(0).high());
1841 break;
1842 }
1843 case kArmF64x2Sqrt: {
1844 __ vsqrt(i.OutputSimd128Register().low(),
1845 i.InputSimd128Register(0).low());
1846 __ vsqrt(i.OutputSimd128Register().high(),
1847 i.InputSimd128Register(0).high());
1848 break;
1849 }
1850 case kArmF64x2Add: {
1851 ASSEMBLE_F64X2_ARITHMETIC_BINOP(vadd);
1852 break;
1853 }
1854 case kArmF64x2Sub: {
1855 ASSEMBLE_F64X2_ARITHMETIC_BINOP(vsub);
1856 break;
1857 }
1858 case kArmF64x2Mul: {
1859 ASSEMBLE_F64X2_ARITHMETIC_BINOP(vmul);
1860 break;
1861 }
1862 case kArmF64x2Div: {
1863 ASSEMBLE_F64X2_ARITHMETIC_BINOP(vdiv);
1864 break;
1865 }
1866 case kArmF64x2Min: {
1867 Simd128Register result = i.OutputSimd128Register();
1868 Simd128Register left = i.InputSimd128Register(0);
1869 Simd128Register right = i.InputSimd128Register(1);
1870 if (left == right) {
1871 __ Move(result, left);
1872 } else {
1873 auto ool_low = zone()->New<OutOfLineFloat64Min>(
1874 this, result.low(), left.low(), right.low());
1875 auto ool_high = zone()->New<OutOfLineFloat64Min>(
1876 this, result.high(), left.high(), right.high());
1877 __ FloatMin(result.low(), left.low(), right.low(), ool_low->entry());
1878 __ bind(ool_low->exit());
1879 __ FloatMin(result.high(), left.high(), right.high(),
1880 ool_high->entry());
1881 __ bind(ool_high->exit());
1882 }
1883 DCHECK_EQ(LeaveCC, i.OutputSBit());
1884 break;
1885 }
1886 case kArmF64x2Max: {
1887 Simd128Register result = i.OutputSimd128Register();
1888 Simd128Register left = i.InputSimd128Register(0);
1889 Simd128Register right = i.InputSimd128Register(1);
1890 if (left == right) {
1891 __ Move(result, left);
1892 } else {
1893 auto ool_low = zone()->New<OutOfLineFloat64Max>(
1894 this, result.low(), left.low(), right.low());
1895 auto ool_high = zone()->New<OutOfLineFloat64Max>(
1896 this, result.high(), left.high(), right.high());
1897 __ FloatMax(result.low(), left.low(), right.low(), ool_low->entry());
1898 __ bind(ool_low->exit());
1899 __ FloatMax(result.high(), left.high(), right.high(),
1900 ool_high->entry());
1901 __ bind(ool_high->exit());
1902 }
1903 DCHECK_EQ(LeaveCC, i.OutputSBit());
1904 break;
1905 }
1906 #undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
1907 case kArmF64x2Eq: {
1908 UseScratchRegisterScope temps(tasm());
1909 Register scratch = temps.Acquire();
1910 __ mov(scratch, Operand(0));
1911 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1912 i.InputSimd128Register(1).low());
1913 __ mov(scratch, Operand(-1), LeaveCC, eq);
1914 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1915
1916 __ mov(scratch, Operand(0));
1917 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1918 i.InputSimd128Register(1).high());
1919 __ mov(scratch, Operand(-1), LeaveCC, eq);
1920 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1921 break;
1922 }
1923 case kArmF64x2Ne: {
1924 UseScratchRegisterScope temps(tasm());
1925 Register scratch = temps.Acquire();
1926 __ mov(scratch, Operand(0));
1927 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1928 i.InputSimd128Register(1).low());
1929 __ mov(scratch, Operand(-1), LeaveCC, ne);
1930 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1931
1932 __ mov(scratch, Operand(0));
1933 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1934 i.InputSimd128Register(1).high());
1935 __ mov(scratch, Operand(-1), LeaveCC, ne);
1936 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1937 break;
1938 }
1939 case kArmF64x2Lt: {
1940 UseScratchRegisterScope temps(tasm());
1941 Register scratch = temps.Acquire();
1942 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1943 i.InputSimd128Register(1).low());
1944 __ mov(scratch, Operand(0), LeaveCC, cs);
1945 __ mov(scratch, Operand(-1), LeaveCC, mi);
1946 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1947
1948 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1949 i.InputSimd128Register(1).high());
1950 __ mov(scratch, Operand(0), LeaveCC, cs);
1951 __ mov(scratch, Operand(-1), LeaveCC, mi);
1952 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1953 break;
1954 }
1955 case kArmF64x2Le: {
1956 UseScratchRegisterScope temps(tasm());
1957 Register scratch = temps.Acquire();
1958 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1959 i.InputSimd128Register(1).low());
1960 __ mov(scratch, Operand(0), LeaveCC, hi);
1961 __ mov(scratch, Operand(-1), LeaveCC, ls);
1962 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1963
1964 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1965 i.InputSimd128Register(1).high());
1966 __ mov(scratch, Operand(0), LeaveCC, hi);
1967 __ mov(scratch, Operand(-1), LeaveCC, ls);
1968 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1969 break;
1970 }
1971 case kArmF64x2Pmin: {
1972 Simd128Register dst = i.OutputSimd128Register();
1973 Simd128Register lhs = i.InputSimd128Register(0);
1974 Simd128Register rhs = i.InputSimd128Register(1);
1975 DCHECK_EQ(dst, lhs);
1976
1977 // Move rhs only when rhs is strictly lesser (mi).
1978 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
1979 __ vmov(dst.low(), rhs.low(), mi);
1980 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
1981 __ vmov(dst.high(), rhs.high(), mi);
1982 break;
1983 }
1984 case kArmF64x2Pmax: {
1985 Simd128Register dst = i.OutputSimd128Register();
1986 Simd128Register lhs = i.InputSimd128Register(0);
1987 Simd128Register rhs = i.InputSimd128Register(1);
1988 DCHECK_EQ(dst, lhs);
1989
1990 // Move rhs only when rhs is strictly greater (gt).
1991 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
1992 __ vmov(dst.low(), rhs.low(), gt);
1993 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
1994 __ vmov(dst.high(), rhs.high(), gt);
1995 break;
1996 }
1997 case kArmF64x2Ceil: {
1998 CpuFeatureScope scope(tasm(), ARMv8);
1999 Simd128Register dst = i.OutputSimd128Register();
2000 Simd128Register src = i.InputSimd128Register(0);
2001 __ vrintp(dst.low(), src.low());
2002 __ vrintp(dst.high(), src.high());
2003 break;
2004 }
2005 case kArmF64x2Floor: {
2006 CpuFeatureScope scope(tasm(), ARMv8);
2007 Simd128Register dst = i.OutputSimd128Register();
2008 Simd128Register src = i.InputSimd128Register(0);
2009 __ vrintm(dst.low(), src.low());
2010 __ vrintm(dst.high(), src.high());
2011 break;
2012 }
2013 case kArmF64x2Trunc: {
2014 CpuFeatureScope scope(tasm(), ARMv8);
2015 Simd128Register dst = i.OutputSimd128Register();
2016 Simd128Register src = i.InputSimd128Register(0);
2017 __ vrintz(dst.low(), src.low());
2018 __ vrintz(dst.high(), src.high());
2019 break;
2020 }
2021 case kArmF64x2NearestInt: {
2022 CpuFeatureScope scope(tasm(), ARMv8);
2023 Simd128Register dst = i.OutputSimd128Register();
2024 Simd128Register src = i.InputSimd128Register(0);
2025 __ vrintn(dst.low(), src.low());
2026 __ vrintn(dst.high(), src.high());
2027 break;
2028 }
2029 case kArmF64x2ConvertLowI32x4S: {
2030 __ F64x2ConvertLowI32x4S(i.OutputSimd128Register(),
2031 i.InputSimd128Register(0));
2032 break;
2033 }
2034 case kArmF64x2ConvertLowI32x4U: {
2035 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2036 i.InputSimd128Register(0));
2037 break;
2038 }
2039 case kArmF64x2PromoteLowF32x4: {
2040 __ F64x2PromoteLowF32x4(i.OutputSimd128Register(),
2041 i.InputSimd128Register(0));
2042 break;
2043 }
2044 case kArmI64x2SplatI32Pair: {
2045 Simd128Register dst = i.OutputSimd128Register();
2046 __ vdup(Neon32, dst, i.InputRegister(0));
2047 __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
2048 __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
2049 break;
2050 }
2051 case kArmI64x2ReplaceLaneI32Pair: {
2052 Simd128Register dst = i.OutputSimd128Register();
2053 int8_t lane = i.InputInt8(1);
2054 __ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
2055 __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
2056 break;
2057 }
2058 case kArmI64x2Add: {
2059 __ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2060 i.InputSimd128Register(1));
2061 break;
2062 }
2063 case kArmI64x2Sub: {
2064 __ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2065 i.InputSimd128Register(1));
2066 break;
2067 }
2068 case kArmI64x2Mul: {
2069 UseScratchRegisterScope temps(tasm());
2070 QwNeonRegister dst = i.OutputSimd128Register();
2071 QwNeonRegister left = i.InputSimd128Register(0);
2072 QwNeonRegister right = i.InputSimd128Register(1);
2073 QwNeonRegister tmp1 = i.TempSimd128Register(0);
2074 QwNeonRegister tmp2 = temps.AcquireQ();
2075
2076 // This algorithm uses vector operations to perform 64-bit integer
2077 // multiplication by splitting it into a high and low 32-bit integers.
2078 // The tricky part is getting the low and high integers in the correct
2079 // place inside a NEON register, so that we can use as little vmull and
2080 // vmlal as possible.
2081
2082 // Move left and right into temporaries, they will be modified by vtrn.
2083 __ vmov(tmp1, left);
2084 __ vmov(tmp2, right);
2085
2086 // This diagram shows how the 64-bit integers fit into NEON registers.
2087 //
2088 // [q.high()| q.low()]
2089 // left/tmp1: [ a3, a2 | a1, a0 ]
2090 // right/tmp2: [ b3, b2 | b1, b0 ]
2091 //
2092 // We want to multiply the low 32 bits of left with high 32 bits of right,
2093 // for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d
2094 // registers, and multiply the corresponding low/high 32 bits, to get a
2095 // 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose
2096 // the vectors, so that we get the low 32 bits of each 64-bit integer into
2097 // the same lane, similarly for high 32 bits.
2098 __ vtrn(Neon32, tmp1.low(), tmp1.high());
2099 // tmp1: [ a3, a1 | a2, a0 ]
2100 __ vtrn(Neon32, tmp2.low(), tmp2.high());
2101 // tmp2: [ b3, b1 | b2, b0 ]
2102
2103 __ vmull(NeonU32, dst, tmp1.low(), tmp2.high());
2104 // dst: [ a2*b3 | a0*b1 ]
2105 __ vmlal(NeonU32, dst, tmp1.high(), tmp2.low());
2106 // dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ]
2107 __ vshl(NeonU64, dst, dst, 32);
2108 // dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ]
2109
2110 __ vmlal(NeonU32, dst, tmp1.low(), tmp2.low());
2111 // dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ]
2112 break;
2113 }
2114 case kArmI64x2Abs: {
2115 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2116 break;
2117 }
2118 case kArmI64x2Neg: {
2119 Simd128Register dst = i.OutputSimd128Register();
2120 __ vmov(dst, uint64_t{0});
2121 __ vsub(Neon64, dst, dst, i.InputSimd128Register(0));
2122 break;
2123 }
2124 case kArmI64x2Shl: {
2125 ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 6, Neon32, NeonS64);
2126 break;
2127 }
2128 case kArmI64x2ShrS: {
2129 // Only the least significant byte of each lane is used, so we can use
2130 // Neon32 as the size.
2131 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonS64);
2132 break;
2133 }
2134 case kArmI64x2ShrU: {
2135 // Only the least significant byte of each lane is used, so we can use
2136 // Neon32 as the size.
2137 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
2138 break;
2139 }
2140 case kArmI64x2BitMask: {
2141 __ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0));
2142 break;
2143 }
2144 case kArmI64x2SConvertI32x4Low: {
2145 __ vmovl(NeonS32, i.OutputSimd128Register(),
2146 i.InputSimd128Register(0).low());
2147 break;
2148 }
2149 case kArmI64x2SConvertI32x4High: {
2150 __ vmovl(NeonS32, i.OutputSimd128Register(),
2151 i.InputSimd128Register(0).high());
2152 break;
2153 }
2154 case kArmI64x2UConvertI32x4Low: {
2155 __ vmovl(NeonU32, i.OutputSimd128Register(),
2156 i.InputSimd128Register(0).low());
2157 break;
2158 }
2159 case kArmI64x2UConvertI32x4High: {
2160 __ vmovl(NeonU32, i.OutputSimd128Register(),
2161 i.InputSimd128Register(0).high());
2162 break;
2163 }
2164 case kArmF32x4Splat: {
2165 int src_code = i.InputFloatRegister(0).code();
2166 __ vdup(Neon32, i.OutputSimd128Register(),
2167 DwVfpRegister::from_code(src_code / 2), src_code % 2);
2168 break;
2169 }
2170 case kArmF32x4ExtractLane: {
2171 __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2172 i.InputInt8(1));
2173 break;
2174 }
2175 case kArmF32x4ReplaceLane: {
2176 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2177 i.InputFloatRegister(2), i.InputInt8(1));
2178 break;
2179 }
2180 case kArmF32x4SConvertI32x4: {
2181 __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2182 break;
2183 }
2184 case kArmF32x4UConvertI32x4: {
2185 __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2186 break;
2187 }
2188 case kArmF32x4Abs: {
2189 __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2190 break;
2191 }
2192 case kArmF32x4Neg: {
2193 __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
2194 break;
2195 }
2196 case kArmF32x4Sqrt: {
2197 QwNeonRegister dst = i.OutputSimd128Register();
2198 QwNeonRegister src1 = i.InputSimd128Register(0);
2199 DCHECK_EQ(dst, q0);
2200 DCHECK_EQ(src1, q0);
2201 #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2202 __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
2203 __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
2204 __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
2205 __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
2206 #undef S_FROM_Q
2207 break;
2208 }
2209 case kArmF32x4RecipApprox: {
2210 __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
2211 break;
2212 }
2213 case kArmF32x4RecipSqrtApprox: {
2214 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
2215 break;
2216 }
2217 case kArmF32x4Add: {
2218 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2219 i.InputSimd128Register(1));
2220 break;
2221 }
2222 case kArmF32x4Sub: {
2223 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2224 i.InputSimd128Register(1));
2225 break;
2226 }
2227 case kArmF32x4Mul: {
2228 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2229 i.InputSimd128Register(1));
2230 break;
2231 }
2232 case kArmF32x4Div: {
2233 QwNeonRegister dst = i.OutputSimd128Register();
2234 QwNeonRegister src1 = i.InputSimd128Register(0);
2235 QwNeonRegister src2 = i.InputSimd128Register(1);
2236 DCHECK_EQ(dst, q0);
2237 DCHECK_EQ(src1, q0);
2238 DCHECK_EQ(src2, q1);
2239 #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2240 __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
2241 __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
2242 __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
2243 __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
2244 #undef S_FROM_Q
2245 break;
2246 }
2247 case kArmF32x4Min: {
2248 __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
2249 i.InputSimd128Register(1));
2250 break;
2251 }
2252 case kArmF32x4Max: {
2253 __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
2254 i.InputSimd128Register(1));
2255 break;
2256 }
2257 case kArmF32x4Eq: {
2258 __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2259 i.InputSimd128Register(1));
2260 break;
2261 }
2262 case kArmF32x4Ne: {
2263 Simd128Register dst = i.OutputSimd128Register();
2264 __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2265 __ vmvn(dst, dst);
2266 break;
2267 }
2268 case kArmF32x4Lt: {
2269 __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
2270 i.InputSimd128Register(0));
2271 break;
2272 }
2273 case kArmF32x4Le: {
2274 __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
2275 i.InputSimd128Register(0));
2276 break;
2277 }
2278 case kArmF32x4Pmin: {
2279 Simd128Register dst = i.OutputSimd128Register();
2280 Simd128Register lhs = i.InputSimd128Register(0);
2281 Simd128Register rhs = i.InputSimd128Register(1);
2282 DCHECK_NE(dst, lhs);
2283 DCHECK_NE(dst, rhs);
2284
2285 // f32x4.pmin(lhs, rhs)
2286 // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2287 // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2288 __ vcgt(dst, lhs, rhs);
2289 __ vbsl(dst, rhs, lhs);
2290 break;
2291 }
2292 case kArmF32x4Pmax: {
2293 Simd128Register dst = i.OutputSimd128Register();
2294 Simd128Register lhs = i.InputSimd128Register(0);
2295 Simd128Register rhs = i.InputSimd128Register(1);
2296 DCHECK_NE(dst, lhs);
2297 DCHECK_NE(dst, rhs);
2298
2299 // f32x4.pmax(lhs, rhs)
2300 // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2301 __ vcgt(dst, rhs, lhs);
2302 __ vbsl(dst, rhs, lhs);
2303 break;
2304 }
2305 case kArmF32x4DemoteF64x2Zero: {
2306 Simd128Register dst = i.OutputSimd128Register();
2307 Simd128Register src = i.InputSimd128Register(0);
2308 __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2309 __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2310 __ vmov(dst.high(), 0);
2311 break;
2312 }
2313 case kArmI32x4Splat: {
2314 __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
2315 break;
2316 }
2317 case kArmI32x4ExtractLane: {
2318 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
2319 i.InputInt8(1));
2320 break;
2321 }
2322 case kArmI32x4ReplaceLane: {
2323 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2324 i.InputRegister(2), NeonS32, i.InputInt8(1));
2325 break;
2326 }
2327 case kArmI32x4SConvertF32x4: {
2328 __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2329 break;
2330 }
2331 case kArmI32x4SConvertI16x8Low: {
2332 __ vmovl(NeonS16, i.OutputSimd128Register(),
2333 i.InputSimd128Register(0).low());
2334 break;
2335 }
2336 case kArmI32x4SConvertI16x8High: {
2337 __ vmovl(NeonS16, i.OutputSimd128Register(),
2338 i.InputSimd128Register(0).high());
2339 break;
2340 }
2341 case kArmI32x4Neg: {
2342 __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2343 break;
2344 }
2345 case kArmI32x4Shl: {
2346 ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 5, Neon32, NeonS32);
2347 break;
2348 }
2349 case kArmI32x4ShrS: {
2350 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonS32);
2351 break;
2352 }
2353 case kArmI32x4Add: {
2354 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2355 i.InputSimd128Register(1));
2356 break;
2357 }
2358 case kArmI32x4Sub: {
2359 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2360 i.InputSimd128Register(1));
2361 break;
2362 }
2363 case kArmI32x4Mul: {
2364 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2365 i.InputSimd128Register(1));
2366 break;
2367 }
2368 case kArmI32x4MinS: {
2369 __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2370 i.InputSimd128Register(1));
2371 break;
2372 }
2373 case kArmI32x4MaxS: {
2374 __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2375 i.InputSimd128Register(1));
2376 break;
2377 }
2378 case kArmI64x2Eq: {
2379 __ I64x2Eq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2380 i.InputSimd128Register(1));
2381 break;
2382 }
2383 case kArmI64x2Ne: {
2384 __ I64x2Ne(i.OutputSimd128Register(), i.InputSimd128Register(0),
2385 i.InputSimd128Register(1));
2386 break;
2387 }
2388 case kArmI64x2GtS: {
2389 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2390 i.InputSimd128Register(1));
2391 break;
2392 }
2393 case kArmI64x2GeS: {
2394 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2395 i.InputSimd128Register(1));
2396 break;
2397 }
2398 case kArmI32x4Eq: {
2399 __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2400 i.InputSimd128Register(1));
2401 break;
2402 }
2403 case kArmI32x4Ne: {
2404 Simd128Register dst = i.OutputSimd128Register();
2405 __ vceq(Neon32, dst, i.InputSimd128Register(0),
2406 i.InputSimd128Register(1));
2407 __ vmvn(dst, dst);
2408 break;
2409 }
2410 case kArmI32x4GtS: {
2411 __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2412 i.InputSimd128Register(1));
2413 break;
2414 }
2415 case kArmI32x4GeS: {
2416 __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2417 i.InputSimd128Register(1));
2418 break;
2419 }
2420 case kArmI32x4UConvertF32x4: {
2421 __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2422 break;
2423 }
2424 case kArmI32x4UConvertI16x8Low: {
2425 __ vmovl(NeonU16, i.OutputSimd128Register(),
2426 i.InputSimd128Register(0).low());
2427 break;
2428 }
2429 case kArmI32x4UConvertI16x8High: {
2430 __ vmovl(NeonU16, i.OutputSimd128Register(),
2431 i.InputSimd128Register(0).high());
2432 break;
2433 }
2434 case kArmI32x4ShrU: {
2435 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonU32);
2436 break;
2437 }
2438 case kArmI32x4MinU: {
2439 __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2440 i.InputSimd128Register(1));
2441 break;
2442 }
2443 case kArmI32x4MaxU: {
2444 __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2445 i.InputSimd128Register(1));
2446 break;
2447 }
2448 case kArmI32x4GtU: {
2449 __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2450 i.InputSimd128Register(1));
2451 break;
2452 }
2453 case kArmI32x4GeU: {
2454 __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2455 i.InputSimd128Register(1));
2456 break;
2457 }
2458 case kArmI32x4Abs: {
2459 __ vabs(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2460 break;
2461 }
2462 case kArmI32x4BitMask: {
2463 Register dst = i.OutputRegister();
2464 UseScratchRegisterScope temps(tasm());
2465 Simd128Register src = i.InputSimd128Register(0);
2466 Simd128Register tmp = temps.AcquireQ();
2467 Simd128Register mask = i.TempSimd128Register(0);
2468
2469 __ vshr(NeonS32, tmp, src, 31);
2470 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2471 // are signed will have i-th bit set, unsigned will be 0.
2472 __ vmov(mask.low(), base::Double(uint64_t{0x0000'0002'0000'0001}));
2473 __ vmov(mask.high(), base::Double(uint64_t{0x0000'0008'0000'0004}));
2474 __ vand(tmp, mask, tmp);
2475 __ vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
2476 __ vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
2477 __ VmovLow(dst, tmp.low());
2478 break;
2479 }
2480 case kArmI32x4DotI16x8S: {
2481 Simd128Register dst = i.OutputSimd128Register();
2482 Simd128Register lhs = i.InputSimd128Register(0);
2483 Simd128Register rhs = i.InputSimd128Register(1);
2484 Simd128Register tmp1 = i.TempSimd128Register(0);
2485 UseScratchRegisterScope temps(tasm());
2486 Simd128Register scratch = temps.AcquireQ();
2487 __ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
2488 __ vmull(NeonS16, scratch, lhs.high(), rhs.high());
2489 __ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
2490 __ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
2491 break;
2492 }
2493 case kArmI32x4TruncSatF64x2SZero: {
2494 Simd128Register dst = i.OutputSimd128Register();
2495 Simd128Register src = i.InputSimd128Register(0);
2496 __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2497 __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2498 __ vmov(dst.high(), 0);
2499 break;
2500 }
2501 case kArmI32x4TruncSatF64x2UZero: {
2502 Simd128Register dst = i.OutputSimd128Register();
2503 Simd128Register src = i.InputSimd128Register(0);
2504 __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2505 __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2506 __ vmov(dst.high(), 0);
2507 break;
2508 }
2509 case kArmI16x8Splat: {
2510 __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2511 break;
2512 }
2513 case kArmI16x8ExtractLaneU: {
2514 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
2515 i.InputInt8(1));
2516 break;
2517 }
2518 case kArmI16x8ExtractLaneS: {
2519 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2520 i.InputInt8(1));
2521 break;
2522 }
2523 case kArmI16x8ReplaceLane: {
2524 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2525 i.InputRegister(2), NeonS16, i.InputInt8(1));
2526 break;
2527 }
2528 case kArmI16x8SConvertI8x16Low: {
2529 __ vmovl(NeonS8, i.OutputSimd128Register(),
2530 i.InputSimd128Register(0).low());
2531 break;
2532 }
2533 case kArmI16x8SConvertI8x16High: {
2534 __ vmovl(NeonS8, i.OutputSimd128Register(),
2535 i.InputSimd128Register(0).high());
2536 break;
2537 }
2538 case kArmI16x8Neg: {
2539 __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2540 break;
2541 }
2542 case kArmI16x8Shl: {
2543 ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 4, Neon16, NeonS16);
2544 break;
2545 }
2546 case kArmI16x8ShrS: {
2547 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonS16);
2548 break;
2549 }
2550 case kArmI16x8SConvertI32x4:
2551 ASSEMBLE_NEON_NARROWING_OP(NeonS16, NeonS16);
2552 break;
2553 case kArmI16x8Add: {
2554 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2555 i.InputSimd128Register(1));
2556 break;
2557 }
2558 case kArmI16x8AddSatS: {
2559 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2560 i.InputSimd128Register(1));
2561 break;
2562 }
2563 case kArmI16x8Sub: {
2564 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2565 i.InputSimd128Register(1));
2566 break;
2567 }
2568 case kArmI16x8SubSatS: {
2569 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2570 i.InputSimd128Register(1));
2571 break;
2572 }
2573 case kArmI16x8Mul: {
2574 __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2575 i.InputSimd128Register(1));
2576 break;
2577 }
2578 case kArmI16x8MinS: {
2579 __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2580 i.InputSimd128Register(1));
2581 break;
2582 }
2583 case kArmI16x8MaxS: {
2584 __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2585 i.InputSimd128Register(1));
2586 break;
2587 }
2588 case kArmI16x8Eq: {
2589 __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2590 i.InputSimd128Register(1));
2591 break;
2592 }
2593 case kArmI16x8Ne: {
2594 Simd128Register dst = i.OutputSimd128Register();
2595 __ vceq(Neon16, dst, i.InputSimd128Register(0),
2596 i.InputSimd128Register(1));
2597 __ vmvn(dst, dst);
2598 break;
2599 }
2600 case kArmI16x8GtS: {
2601 __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2602 i.InputSimd128Register(1));
2603 break;
2604 }
2605 case kArmI16x8GeS: {
2606 __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2607 i.InputSimd128Register(1));
2608 break;
2609 }
2610 case kArmI16x8UConvertI8x16Low: {
2611 __ vmovl(NeonU8, i.OutputSimd128Register(),
2612 i.InputSimd128Register(0).low());
2613 break;
2614 }
2615 case kArmI16x8UConvertI8x16High: {
2616 __ vmovl(NeonU8, i.OutputSimd128Register(),
2617 i.InputSimd128Register(0).high());
2618 break;
2619 }
2620 case kArmI16x8ShrU: {
2621 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonU16);
2622 break;
2623 }
2624 case kArmI16x8UConvertI32x4:
2625 ASSEMBLE_NEON_NARROWING_OP(NeonU16, NeonS16);
2626 break;
2627 case kArmI16x8AddSatU: {
2628 __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2629 i.InputSimd128Register(1));
2630 break;
2631 }
2632 case kArmI16x8SubSatU: {
2633 __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2634 i.InputSimd128Register(1));
2635 break;
2636 }
2637 case kArmI16x8MinU: {
2638 __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2639 i.InputSimd128Register(1));
2640 break;
2641 }
2642 case kArmI16x8MaxU: {
2643 __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2644 i.InputSimd128Register(1));
2645 break;
2646 }
2647 case kArmI16x8GtU: {
2648 __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2649 i.InputSimd128Register(1));
2650 break;
2651 }
2652 case kArmI16x8GeU: {
2653 __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2654 i.InputSimd128Register(1));
2655 break;
2656 }
2657 case kArmI16x8RoundingAverageU: {
2658 __ vrhadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2659 i.InputSimd128Register(1));
2660 break;
2661 }
2662 case kArmI16x8Abs: {
2663 __ vabs(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2664 break;
2665 }
2666 case kArmI16x8BitMask: {
2667 UseScratchRegisterScope temps(tasm());
2668 Register dst = i.OutputRegister();
2669 Simd128Register src = i.InputSimd128Register(0);
2670 Simd128Register tmp = temps.AcquireQ();
2671 Simd128Register mask = i.TempSimd128Register(0);
2672
2673 __ vshr(NeonS16, tmp, src, 15);
2674 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2675 // are signed will have i-th bit set, unsigned will be 0.
2676 __ vmov(mask.low(), base::Double(uint64_t{0x0008'0004'0002'0001}));
2677 __ vmov(mask.high(), base::Double(uint64_t{0x0080'0040'0020'0010}));
2678 __ vand(tmp, mask, tmp);
2679 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2680 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2681 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2682 __ vmov(NeonU16, dst, tmp.low(), 0);
2683 break;
2684 }
2685 case kArmI16x8Q15MulRSatS: {
2686 __ vqrdmulh(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2687 i.InputSimd128Register(1));
2688 break;
2689 }
2690 case kArmI8x16Splat: {
2691 __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2692 break;
2693 }
2694 case kArmI8x16ExtractLaneU: {
2695 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
2696 i.InputInt8(1));
2697 break;
2698 }
2699 case kArmI8x16ExtractLaneS: {
2700 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2701 i.InputInt8(1));
2702 break;
2703 }
2704 case kArmI8x16ReplaceLane: {
2705 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2706 i.InputRegister(2), NeonS8, i.InputInt8(1));
2707 break;
2708 }
2709 case kArmI8x16Neg: {
2710 __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2711 break;
2712 }
2713 case kArmI8x16Shl: {
2714 ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 3, Neon8, NeonS8);
2715 break;
2716 }
2717 case kArmI8x16ShrS: {
2718 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonS8);
2719 break;
2720 }
2721 case kArmI8x16SConvertI16x8:
2722 ASSEMBLE_NEON_NARROWING_OP(NeonS8, NeonS8);
2723 break;
2724 case kArmI8x16Add: {
2725 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2726 i.InputSimd128Register(1));
2727 break;
2728 }
2729 case kArmI8x16AddSatS: {
2730 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2731 i.InputSimd128Register(1));
2732 break;
2733 }
2734 case kArmI8x16Sub: {
2735 __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2736 i.InputSimd128Register(1));
2737 break;
2738 }
2739 case kArmI8x16SubSatS: {
2740 __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2741 i.InputSimd128Register(1));
2742 break;
2743 }
2744 case kArmI8x16MinS: {
2745 __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2746 i.InputSimd128Register(1));
2747 break;
2748 }
2749 case kArmI8x16MaxS: {
2750 __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2751 i.InputSimd128Register(1));
2752 break;
2753 }
2754 case kArmI8x16Eq: {
2755 __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2756 i.InputSimd128Register(1));
2757 break;
2758 }
2759 case kArmI8x16Ne: {
2760 Simd128Register dst = i.OutputSimd128Register();
2761 __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2762 __ vmvn(dst, dst);
2763 break;
2764 }
2765 case kArmI8x16GtS: {
2766 __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2767 i.InputSimd128Register(1));
2768 break;
2769 }
2770 case kArmI8x16GeS: {
2771 __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2772 i.InputSimd128Register(1));
2773 break;
2774 }
2775 case kArmI8x16ShrU: {
2776 ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonU8);
2777 break;
2778 }
2779 case kArmI8x16UConvertI16x8:
2780 ASSEMBLE_NEON_NARROWING_OP(NeonU8, NeonS8);
2781 break;
2782 case kArmI8x16AddSatU: {
2783 __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2784 i.InputSimd128Register(1));
2785 break;
2786 }
2787 case kArmI8x16SubSatU: {
2788 __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2789 i.InputSimd128Register(1));
2790 break;
2791 }
2792 case kArmI8x16MinU: {
2793 __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2794 i.InputSimd128Register(1));
2795 break;
2796 }
2797 case kArmI8x16MaxU: {
2798 __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2799 i.InputSimd128Register(1));
2800 break;
2801 }
2802 case kArmI8x16GtU: {
2803 __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2804 i.InputSimd128Register(1));
2805 break;
2806 }
2807 case kArmI8x16GeU: {
2808 __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2809 i.InputSimd128Register(1));
2810 break;
2811 }
2812 case kArmI8x16RoundingAverageU: {
2813 __ vrhadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2814 i.InputSimd128Register(1));
2815 break;
2816 }
2817 case kArmI8x16Abs: {
2818 __ vabs(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2819 break;
2820 }
2821 case kArmI8x16BitMask: {
2822 UseScratchRegisterScope temps(tasm());
2823 Register dst = i.OutputRegister();
2824 Simd128Register src = i.InputSimd128Register(0);
2825 Simd128Register tmp = temps.AcquireQ();
2826 Simd128Register mask = i.TempSimd128Register(0);
2827
2828 __ vshr(NeonS8, tmp, src, 7);
2829 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2830 // are signed will have i-th bit set, unsigned will be 0.
2831 __ vmov(mask.low(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2832 __ vmov(mask.high(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2833 __ vand(tmp, mask, tmp);
2834 __ vext(mask, tmp, tmp, 8);
2835 __ vzip(Neon8, mask, tmp);
2836 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2837 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2838 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2839 __ vmov(NeonU16, dst, tmp.low(), 0);
2840 break;
2841 }
2842 case kArmS128Const: {
2843 QwNeonRegister dst = i.OutputSimd128Register();
2844 uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
2845 uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
2846 __ vmov(dst.low(), base::Double(imm1));
2847 __ vmov(dst.high(), base::Double(imm2));
2848 break;
2849 }
2850 case kArmS128Zero: {
2851 __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2852 i.OutputSimd128Register());
2853 break;
2854 }
2855 case kArmS128AllOnes: {
2856 __ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
2857 break;
2858 }
2859 case kArmS128Dup: {
2860 NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2861 int lanes = kSimd128Size >> size;
2862 int index = i.InputInt32(2);
2863 DCHECK(index < lanes);
2864 int d_lanes = lanes / 2;
2865 int src_d_index = index & (d_lanes - 1);
2866 int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2867 __ vdup(size, i.OutputSimd128Register(),
2868 DwVfpRegister::from_code(src_d_code), src_d_index);
2869 break;
2870 }
2871 case kArmS128And: {
2872 __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2873 i.InputSimd128Register(1));
2874 break;
2875 }
2876 case kArmS128Or: {
2877 __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2878 i.InputSimd128Register(1));
2879 break;
2880 }
2881 case kArmS128Xor: {
2882 __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2883 i.InputSimd128Register(1));
2884 break;
2885 }
2886 case kArmS128Not: {
2887 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2888 break;
2889 }
2890 case kArmS128Select: {
2891 Simd128Register dst = i.OutputSimd128Register();
2892 DCHECK(dst == i.InputSimd128Register(0));
2893 __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2894 break;
2895 }
2896 case kArmS128AndNot: {
2897 __ vbic(i.OutputSimd128Register(), i.InputSimd128Register(0),
2898 i.InputSimd128Register(1));
2899 break;
2900 }
2901 case kArmS32x4ZipLeft: {
2902 Simd128Register dst = i.OutputSimd128Register(),
2903 src1 = i.InputSimd128Register(1);
2904 DCHECK(dst == i.InputSimd128Register(0));
2905 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2906 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2907 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2908 break;
2909 }
2910 case kArmS32x4ZipRight: {
2911 Simd128Register dst = i.OutputSimd128Register(),
2912 src1 = i.InputSimd128Register(1);
2913 DCHECK(dst == i.InputSimd128Register(0));
2914 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2915 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2916 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2917 break;
2918 }
2919 case kArmS32x4UnzipLeft: {
2920 Simd128Register dst = i.OutputSimd128Register(),
2921 src1 = i.InputSimd128Register(1);
2922 DCHECK(dst == i.InputSimd128Register(0));
2923 UseScratchRegisterScope temps(tasm());
2924 Simd128Register scratch = temps.AcquireQ();
2925 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2926 __ vmov(scratch, src1);
2927 __ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
2928 break;
2929 }
2930 case kArmS32x4UnzipRight: {
2931 Simd128Register dst = i.OutputSimd128Register(),
2932 src1 = i.InputSimd128Register(1);
2933 DCHECK(dst == i.InputSimd128Register(0));
2934 UseScratchRegisterScope temps(tasm());
2935 Simd128Register scratch = temps.AcquireQ();
2936 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2937 __ vmov(scratch, src1);
2938 __ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
2939 break;
2940 }
2941 case kArmS32x4TransposeLeft: {
2942 Simd128Register dst = i.OutputSimd128Register(),
2943 src1 = i.InputSimd128Register(1);
2944 DCHECK(dst == i.InputSimd128Register(0));
2945 UseScratchRegisterScope temps(tasm());
2946 Simd128Register scratch = temps.AcquireQ();
2947 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2948 __ vmov(scratch, src1);
2949 __ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
2950 break;
2951 }
2952 case kArmS32x4Shuffle: {
2953 Simd128Register dst = i.OutputSimd128Register(),
2954 src0 = i.InputSimd128Register(0),
2955 src1 = i.InputSimd128Register(1);
2956 DCHECK_NE(dst, src0);
2957 DCHECK_NE(dst, src1);
2958 // Perform shuffle as a vmov per lane.
2959 int dst_code = dst.code() * 4;
2960 int src0_code = src0.code() * 4;
2961 int src1_code = src1.code() * 4;
2962 int32_t shuffle = i.InputInt32(2);
2963 for (int i = 0; i < 4; i++) {
2964 int lane = shuffle & 0x7;
2965 int src_code = src0_code;
2966 if (lane >= 4) {
2967 src_code = src1_code;
2968 lane &= 0x3;
2969 }
2970 __ VmovExtended(dst_code + i, src_code + lane);
2971 shuffle >>= 8;
2972 }
2973 break;
2974 }
2975 case kArmS32x4TransposeRight: {
2976 Simd128Register dst = i.OutputSimd128Register(),
2977 src1 = i.InputSimd128Register(1);
2978 UseScratchRegisterScope temps(tasm());
2979 Simd128Register scratch = temps.AcquireQ();
2980 DCHECK(dst == i.InputSimd128Register(0));
2981 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2982 __ vmov(scratch, src1);
2983 __ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
2984 break;
2985 }
2986 case kArmS16x8ZipLeft: {
2987 Simd128Register dst = i.OutputSimd128Register(),
2988 src1 = i.InputSimd128Register(1);
2989 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2990 DCHECK(dst == i.InputSimd128Register(0));
2991 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
2992 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
2993 break;
2994 }
2995 case kArmS16x8ZipRight: {
2996 Simd128Register dst = i.OutputSimd128Register(),
2997 src1 = i.InputSimd128Register(1);
2998 DCHECK(dst == i.InputSimd128Register(0));
2999 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3000 __ vmov(dst.low(), src1.high());
3001 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
3002 break;
3003 }
3004 case kArmS16x8UnzipLeft: {
3005 Simd128Register dst = i.OutputSimd128Register(),
3006 src1 = i.InputSimd128Register(1);
3007 UseScratchRegisterScope temps(tasm());
3008 Simd128Register scratch = temps.AcquireQ();
3009 DCHECK(dst == i.InputSimd128Register(0));
3010 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3011 __ vmov(scratch, src1);
3012 __ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
3013 break;
3014 }
3015 case kArmS16x8UnzipRight: {
3016 Simd128Register dst = i.OutputSimd128Register(),
3017 src1 = i.InputSimd128Register(1);
3018 UseScratchRegisterScope temps(tasm());
3019 Simd128Register scratch = temps.AcquireQ();
3020 DCHECK(dst == i.InputSimd128Register(0));
3021 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3022 __ vmov(scratch, src1);
3023 __ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
3024 break;
3025 }
3026 case kArmS16x8TransposeLeft: {
3027 Simd128Register dst = i.OutputSimd128Register(),
3028 src1 = i.InputSimd128Register(1);
3029 UseScratchRegisterScope temps(tasm());
3030 Simd128Register scratch = temps.AcquireQ();
3031 DCHECK(dst == i.InputSimd128Register(0));
3032 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3033 __ vmov(scratch, src1);
3034 __ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
3035 break;
3036 }
3037 case kArmS16x8TransposeRight: {
3038 Simd128Register dst = i.OutputSimd128Register(),
3039 src1 = i.InputSimd128Register(1);
3040 UseScratchRegisterScope temps(tasm());
3041 Simd128Register scratch = temps.AcquireQ();
3042 DCHECK(dst == i.InputSimd128Register(0));
3043 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3044 __ vmov(scratch, src1);
3045 __ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
3046 break;
3047 }
3048 case kArmS8x16ZipLeft: {
3049 Simd128Register dst = i.OutputSimd128Register(),
3050 src1 = i.InputSimd128Register(1);
3051 DCHECK(dst == i.InputSimd128Register(0));
3052 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3053 __ vmov(dst.high(), src1.low());
3054 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
3055 break;
3056 }
3057 case kArmS8x16ZipRight: {
3058 Simd128Register dst = i.OutputSimd128Register(),
3059 src1 = i.InputSimd128Register(1);
3060 DCHECK(dst == i.InputSimd128Register(0));
3061 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3062 __ vmov(dst.low(), src1.high());
3063 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
3064 break;
3065 }
3066 case kArmS8x16UnzipLeft: {
3067 Simd128Register dst = i.OutputSimd128Register(),
3068 src1 = i.InputSimd128Register(1);
3069 UseScratchRegisterScope temps(tasm());
3070 Simd128Register scratch = temps.AcquireQ();
3071 DCHECK(dst == i.InputSimd128Register(0));
3072 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3073 __ vmov(scratch, src1);
3074 __ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
3075 break;
3076 }
3077 case kArmS8x16UnzipRight: {
3078 Simd128Register dst = i.OutputSimd128Register(),
3079 src1 = i.InputSimd128Register(1);
3080 UseScratchRegisterScope temps(tasm());
3081 Simd128Register scratch = temps.AcquireQ();
3082 DCHECK(dst == i.InputSimd128Register(0));
3083 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3084 __ vmov(scratch, src1);
3085 __ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
3086 break;
3087 }
3088 case kArmS8x16TransposeLeft: {
3089 Simd128Register dst = i.OutputSimd128Register(),
3090 src1 = i.InputSimd128Register(1);
3091 UseScratchRegisterScope temps(tasm());
3092 Simd128Register scratch = temps.AcquireQ();
3093 DCHECK(dst == i.InputSimd128Register(0));
3094 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3095 __ vmov(scratch, src1);
3096 __ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
3097 break;
3098 }
3099 case kArmS8x16TransposeRight: {
3100 Simd128Register dst = i.OutputSimd128Register(),
3101 src1 = i.InputSimd128Register(1);
3102 UseScratchRegisterScope temps(tasm());
3103 Simd128Register scratch = temps.AcquireQ();
3104 DCHECK(dst == i.InputSimd128Register(0));
3105 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3106 __ vmov(scratch, src1);
3107 __ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
3108 break;
3109 }
3110 case kArmS8x16Concat: {
3111 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
3112 i.InputSimd128Register(1), i.InputInt4(2));
3113 break;
3114 }
3115 case kArmI8x16Swizzle: {
3116 Simd128Register dst = i.OutputSimd128Register(),
3117 tbl = i.InputSimd128Register(0),
3118 src = i.InputSimd128Register(1);
3119 NeonListOperand table(tbl);
3120 __ vtbl(dst.low(), table, src.low());
3121 __ vtbl(dst.high(), table, src.high());
3122 break;
3123 }
3124 case kArmI8x16Shuffle: {
3125 Simd128Register dst = i.OutputSimd128Register(),
3126 src0 = i.InputSimd128Register(0),
3127 src1 = i.InputSimd128Register(1);
3128 DwVfpRegister table_base = src0.low();
3129 UseScratchRegisterScope temps(tasm());
3130 Simd128Register scratch = temps.AcquireQ();
3131 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
3132 // src1. They must be consecutive.
3133 int table_size = src0 == src1 ? 2 : 4;
3134 DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
3135 // The shuffle lane mask is a byte mask, materialize in scratch.
3136 int scratch_s_base = scratch.code() * 4;
3137 for (int j = 0; j < 4; j++) {
3138 uint32_t four_lanes = i.InputUint32(2 + j);
3139 DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3140 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
3141 Float32::FromBits(four_lanes));
3142 }
3143 NeonListOperand table(table_base, table_size);
3144 if (dst != src0 && dst != src1) {
3145 __ vtbl(dst.low(), table, scratch.low());
3146 __ vtbl(dst.high(), table, scratch.high());
3147 } else {
3148 __ vtbl(scratch.low(), table, scratch.low());
3149 __ vtbl(scratch.high(), table, scratch.high());
3150 __ vmov(dst, scratch);
3151 }
3152 break;
3153 }
3154 case kArmS32x2Reverse: {
3155 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
3156 break;
3157 }
3158 case kArmS16x4Reverse: {
3159 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3160 break;
3161 }
3162 case kArmS16x2Reverse: {
3163 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3164 break;
3165 }
3166 case kArmS8x8Reverse: {
3167 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3168 break;
3169 }
3170 case kArmS8x4Reverse: {
3171 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3172 break;
3173 }
3174 case kArmS8x2Reverse: {
3175 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3176 break;
3177 }
3178 case kArmV128AnyTrue: {
3179 const QwNeonRegister& src = i.InputSimd128Register(0);
3180 UseScratchRegisterScope temps(tasm());
3181 DwVfpRegister scratch = temps.AcquireD();
3182 __ vpmax(NeonU32, scratch, src.low(), src.high());
3183 __ vpmax(NeonU32, scratch, scratch, scratch);
3184 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3185 __ cmp(i.OutputRegister(), Operand(0));
3186 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3187 break;
3188 }
3189 case kArmI64x2AllTrue: {
3190 __ I64x2AllTrue(i.OutputRegister(), i.InputSimd128Register(0));
3191 break;
3192 }
3193 case kArmI32x4AllTrue: {
3194 const QwNeonRegister& src = i.InputSimd128Register(0);
3195 UseScratchRegisterScope temps(tasm());
3196 DwVfpRegister scratch = temps.AcquireD();
3197 __ vpmin(NeonU32, scratch, src.low(), src.high());
3198 __ vpmin(NeonU32, scratch, scratch, scratch);
3199 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3200 __ cmp(i.OutputRegister(), Operand(0));
3201 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3202 break;
3203 }
3204 case kArmI16x8AllTrue: {
3205 const QwNeonRegister& src = i.InputSimd128Register(0);
3206 UseScratchRegisterScope temps(tasm());
3207 DwVfpRegister scratch = temps.AcquireD();
3208 __ vpmin(NeonU16, scratch, src.low(), src.high());
3209 __ vpmin(NeonU16, scratch, scratch, scratch);
3210 __ vpmin(NeonU16, scratch, scratch, scratch);
3211 __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
3212 __ cmp(i.OutputRegister(), Operand(0));
3213 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3214 break;
3215 }
3216 case kArmI8x16AllTrue: {
3217 const QwNeonRegister& src = i.InputSimd128Register(0);
3218 UseScratchRegisterScope temps(tasm());
3219 DwVfpRegister scratch = temps.AcquireD();
3220 __ vpmin(NeonU8, scratch, src.low(), src.high());
3221 __ vpmin(NeonU8, scratch, scratch, scratch);
3222 __ vpmin(NeonU8, scratch, scratch, scratch);
3223 __ vpmin(NeonU8, scratch, scratch, scratch);
3224 __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
3225 __ cmp(i.OutputRegister(), Operand(0));
3226 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3227 break;
3228 }
3229 case kArmS128Load8Splat: {
3230 __ vld1r(Neon8, NeonListOperand(i.OutputSimd128Register()),
3231 i.NeonInputOperand(0));
3232 break;
3233 }
3234 case kArmS128Load16Splat: {
3235 __ vld1r(Neon16, NeonListOperand(i.OutputSimd128Register()),
3236 i.NeonInputOperand(0));
3237 break;
3238 }
3239 case kArmS128Load32Splat: {
3240 __ vld1r(Neon32, NeonListOperand(i.OutputSimd128Register()),
3241 i.NeonInputOperand(0));
3242 break;
3243 }
3244 case kArmS128Load64Splat: {
3245 Simd128Register dst = i.OutputSimd128Register();
3246 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3247 __ Move(dst.high(), dst.low());
3248 break;
3249 }
3250 case kArmS128Load8x8S: {
3251 Simd128Register dst = i.OutputSimd128Register();
3252 __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3253 __ vmovl(NeonS8, dst, dst.low());
3254 break;
3255 }
3256 case kArmS128Load8x8U: {
3257 Simd128Register dst = i.OutputSimd128Register();
3258 __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3259 __ vmovl(NeonU8, dst, dst.low());
3260 break;
3261 }
3262 case kArmS128Load16x4S: {
3263 Simd128Register dst = i.OutputSimd128Register();
3264 __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3265 __ vmovl(NeonS16, dst, dst.low());
3266 break;
3267 }
3268 case kArmS128Load16x4U: {
3269 Simd128Register dst = i.OutputSimd128Register();
3270 __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3271 __ vmovl(NeonU16, dst, dst.low());
3272 break;
3273 }
3274 case kArmS128Load32x2S: {
3275 Simd128Register dst = i.OutputSimd128Register();
3276 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3277 __ vmovl(NeonS32, dst, dst.low());
3278 break;
3279 }
3280 case kArmS128Load32x2U: {
3281 Simd128Register dst = i.OutputSimd128Register();
3282 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3283 __ vmovl(NeonU32, dst, dst.low());
3284 break;
3285 }
3286 case kArmS128Load32Zero: {
3287 Simd128Register dst = i.OutputSimd128Register();
3288 __ vmov(dst, 0);
3289 __ vld1s(Neon32, NeonListOperand(dst.low()), 0, i.NeonInputOperand(0));
3290 break;
3291 }
3292 case kArmS128Load64Zero: {
3293 Simd128Register dst = i.OutputSimd128Register();
3294 __ vmov(dst.high(), 0);
3295 __ vld1(Neon64, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3296 break;
3297 }
3298 case kArmS128LoadLaneLow: {
3299 Simd128Register dst = i.OutputSimd128Register();
3300 DCHECK_EQ(dst, i.InputSimd128Register(0));
3301 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3302 NeonListOperand dst_list = NeonListOperand(dst.low());
3303 __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3304 break;
3305 }
3306 case kArmS128LoadLaneHigh: {
3307 Simd128Register dst = i.OutputSimd128Register();
3308 DCHECK_EQ(dst, i.InputSimd128Register(0));
3309 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3310 NeonListOperand dst_list = NeonListOperand(dst.high());
3311 __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3312 break;
3313 }
3314 case kArmS128StoreLaneLow: {
3315 Simd128Register src = i.InputSimd128Register(0);
3316 NeonListOperand src_list = NeonListOperand(src.low());
3317 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3318 __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3319 break;
3320 }
3321 case kArmS128StoreLaneHigh: {
3322 Simd128Register src = i.InputSimd128Register(0);
3323 NeonListOperand src_list = NeonListOperand(src.high());
3324 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3325 __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3326 break;
3327 }
3328 case kAtomicLoadInt8:
3329 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
3330 break;
3331 case kAtomicLoadUint8:
3332 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
3333 break;
3334 case kAtomicLoadInt16:
3335 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
3336 break;
3337 case kAtomicLoadUint16:
3338 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
3339 break;
3340 case kAtomicLoadWord32:
3341 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
3342 break;
3343 case kAtomicStoreWord8:
3344 ASSEMBLE_ATOMIC_STORE_INTEGER(strb,
3345 AtomicMemoryOrderField::decode(opcode));
3346 break;
3347 case kAtomicStoreWord16:
3348 ASSEMBLE_ATOMIC_STORE_INTEGER(strh,
3349 AtomicMemoryOrderField::decode(opcode));
3350 break;
3351 case kAtomicStoreWord32:
3352 ASSEMBLE_ATOMIC_STORE_INTEGER(str,
3353 AtomicMemoryOrderField::decode(opcode));
3354 break;
3355 case kAtomicExchangeInt8:
3356 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3357 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3358 break;
3359 case kAtomicExchangeUint8:
3360 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3361 break;
3362 case kAtomicExchangeInt16:
3363 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3364 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3365 break;
3366 case kAtomicExchangeUint16:
3367 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3368 break;
3369 case kAtomicExchangeWord32:
3370 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
3371 break;
3372 case kAtomicCompareExchangeInt8:
3373 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3374 __ uxtb(i.TempRegister(2), i.InputRegister(2));
3375 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
3376 i.TempRegister(2));
3377 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3378 break;
3379 case kAtomicCompareExchangeUint8:
3380 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3381 __ uxtb(i.TempRegister(2), i.InputRegister(2));
3382 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
3383 i.TempRegister(2));
3384 break;
3385 case kAtomicCompareExchangeInt16:
3386 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3387 __ uxth(i.TempRegister(2), i.InputRegister(2));
3388 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
3389 i.TempRegister(2));
3390 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3391 break;
3392 case kAtomicCompareExchangeUint16:
3393 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3394 __ uxth(i.TempRegister(2), i.InputRegister(2));
3395 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
3396 i.TempRegister(2));
3397 break;
3398 case kAtomicCompareExchangeWord32:
3399 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3400 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
3401 i.InputRegister(2));
3402 break;
3403 #define ATOMIC_BINOP_CASE(op, inst) \
3404 case kAtomic##op##Int8: \
3405 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3406 __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
3407 break; \
3408 case kAtomic##op##Uint8: \
3409 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3410 break; \
3411 case kAtomic##op##Int16: \
3412 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3413 __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
3414 break; \
3415 case kAtomic##op##Uint16: \
3416 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3417 break; \
3418 case kAtomic##op##Word32: \
3419 ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
3420 break;
3421 ATOMIC_BINOP_CASE(Add, add)
3422 ATOMIC_BINOP_CASE(Sub, sub)
3423 ATOMIC_BINOP_CASE(And, and_)
3424 ATOMIC_BINOP_CASE(Or, orr)
3425 ATOMIC_BINOP_CASE(Xor, eor)
3426 #undef ATOMIC_BINOP_CASE
3427 case kArmWord32AtomicPairLoad: {
3428 if (instr->OutputCount() == 2) {
3429 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
3430 __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
3431 __ ldrexd(r0, r1, i.TempRegister(0));
3432 __ dmb(ISH);
3433 } else {
3434 // A special case of this instruction: even though this is a pair load,
3435 // we only need one of the two words. We emit a normal atomic load.
3436 DCHECK_EQ(instr->OutputCount(), 1);
3437 Register base = i.InputRegister(0);
3438 Register offset = i.InputRegister(1);
3439 DCHECK(instr->InputAt(2)->IsImmediate());
3440 int32_t offset_imm = i.InputInt32(2);
3441 if (offset_imm != 0) {
3442 Register temp = i.TempRegister(0);
3443 __ add(temp, offset, Operand(offset_imm));
3444 offset = temp;
3445 }
3446 __ ldr(i.OutputRegister(), MemOperand(base, offset));
3447 __ dmb(ISH);
3448 }
3449 break;
3450 }
3451 case kArmWord32AtomicPairStore: {
3452 Label store;
3453 Register base = i.InputRegister(0);
3454 Register offset = i.InputRegister(1);
3455 Register value_low = i.InputRegister(2);
3456 Register value_high = i.InputRegister(3);
3457 Register actual_addr = i.TempRegister(0);
3458 // The {ldrexd} instruction needs two temp registers. We do not need the
3459 // result of {ldrexd}, but {strexd} likely fails without the {ldrexd}.
3460 Register tmp1 = i.TempRegister(1);
3461 Register tmp2 = i.TempRegister(2);
3462 // Reuse one of the temp registers for the result of {strexd}.
3463 Register store_result = tmp1;
3464 __ add(actual_addr, base, offset);
3465 __ dmb(ISH);
3466 __ bind(&store);
3467 // Add this {ldrexd} instruction here so that {strexd} below can succeed.
3468 // We don't need the result of {ldrexd} itself.
3469 __ ldrexd(tmp1, tmp2, actual_addr);
3470 __ strexd(store_result, value_low, value_high, actual_addr);
3471 __ cmp(store_result, Operand(0));
3472 __ b(ne, &store);
3473 __ dmb(ISH);
3474 break;
3475 }
3476 #define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
3477 case kArmWord32AtomicPair##op: { \
3478 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3479 ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
3480 break; \
3481 }
3482 ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
3483 ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
3484 #undef ATOMIC_ARITH_BINOP_CASE
3485 #define ATOMIC_LOGIC_BINOP_CASE(op, instr1) \
3486 case kArmWord32AtomicPair##op: { \
3487 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3488 ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1); \
3489 break; \
3490 }
3491 ATOMIC_LOGIC_BINOP_CASE(And, and_)
3492 ATOMIC_LOGIC_BINOP_CASE(Or, orr)
3493 ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
3494 #undef ATOMIC_LOGIC_BINOP_CASE
3495 case kArmWord32AtomicPairExchange: {
3496 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
3497 Label exchange;
3498 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
3499 __ dmb(ISH);
3500 __ bind(&exchange);
3501 __ ldrexd(r6, r7, i.TempRegister(0));
3502 __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
3503 i.TempRegister(0));
3504 __ teq(i.TempRegister(1), Operand(0));
3505 __ b(ne, &exchange);
3506 __ dmb(ISH);
3507 break;
3508 }
3509 case kArmWord32AtomicPairCompareExchange: {
3510 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
3511 __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
3512 Label compareExchange;
3513 Label exit;
3514 __ dmb(ISH);
3515 __ bind(&compareExchange);
3516 __ ldrexd(r2, r3, i.TempRegister(0));
3517 __ teq(i.InputRegister(0), Operand(r2));
3518 __ b(ne, &exit);
3519 __ teq(i.InputRegister(1), Operand(r3));
3520 __ b(ne, &exit);
3521 __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
3522 i.TempRegister(0));
3523 __ teq(i.TempRegister(1), Operand(0));
3524 __ b(ne, &compareExchange);
3525 __ bind(&exit);
3526 __ dmb(ISH);
3527 break;
3528 }
3529 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
3530 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
3531 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
3532 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
3533 #undef ASSEMBLE_ATOMIC_BINOP
3534 #undef ASSEMBLE_ATOMIC64_ARITH_BINOP
3535 #undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
3536 #undef ASSEMBLE_IEEE754_BINOP
3537 #undef ASSEMBLE_IEEE754_UNOP
3538 #undef ASSEMBLE_NEON_NARROWING_OP
3539 #undef ASSEMBLE_SIMD_SHIFT_LEFT
3540 #undef ASSEMBLE_SIMD_SHIFT_RIGHT
3541 }
3542 return kSuccess;
3543 }
3544
3545 // Assembles branches after an instruction.
3546 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3547 ArmOperandConverter i(this, instr);
3548 Label* tlabel = branch->true_label;
3549 Label* flabel = branch->false_label;
3550 Condition cc = FlagsConditionToCondition(branch->condition);
3551 __ b(cc, tlabel);
3552 if (!branch->fallthru) __ b(flabel); // no fallthru to flabel.
3553 }
3554
3555 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3556 BranchInfo* branch) {
3557 AssembleArchBranch(instr, branch);
3558 }
3559
3560 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
3561 RpoNumber target) {
3562 __ b(GetLabel(target));
3563 }
3564
3565 #if V8_ENABLE_WEBASSEMBLY
3566 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3567 FlagsCondition condition) {
3568 class OutOfLineTrap final : public OutOfLineCode {
3569 public:
3570 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3571 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3572
3573 void Generate() final {
3574 ArmOperandConverter i(gen_, instr_);
3575 TrapId trap_id =
3576 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3577 GenerateCallToTrap(trap_id);
3578 }
3579
3580 private:
3581 void GenerateCallToTrap(TrapId trap_id) {
3582 if (trap_id == TrapId::kInvalid) {
3583 // We cannot test calls to the runtime in cctest/test-run-wasm.
3584 // Therefore we emit a call to C here instead of a call to the runtime.
3585 // We use the context register as the scratch register, because we do
3586 // not have a context here.
3587 __ PrepareCallCFunction(0, 0);
3588 __ CallCFunction(
3589 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3590 __ LeaveFrame(StackFrame::WASM);
3591 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3592 int pop_count = static_cast<int>(call_descriptor->ParameterSlotCount());
3593 __ Drop(pop_count);
3594 __ Ret();
3595 } else {
3596 gen_->AssembleSourcePosition(instr_);
3597 // A direct call to a wasm runtime stub defined in this module.
3598 // Just encode the stub index. This will be patched when the code
3599 // is added to the native module and copied into wasm code space.
3600 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3601 ReferenceMap* reference_map =
3602 gen_->zone()->New<ReferenceMap>(gen_->zone());
3603 gen_->RecordSafepoint(reference_map);
3604 if (FLAG_debug_code) {
3605 __ stop();
3606 }
3607 }
3608 }
3609
3610 Instruction* instr_;
3611 CodeGenerator* gen_;
3612 };
3613 auto ool = zone()->New<OutOfLineTrap>(this, instr);
3614 Label* tlabel = ool->entry();
3615 Condition cc = FlagsConditionToCondition(condition);
3616 __ b(cc, tlabel);
3617 }
3618 #endif // V8_ENABLE_WEBASSEMBLY
3619
3620 // Assembles boolean materializations after an instruction.
3621 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3622 FlagsCondition condition) {
3623 ArmOperandConverter i(this, instr);
3624
3625 // Materialize a full 32-bit 1 or 0 value. The result register is always the
3626 // last output of the instruction.
3627 DCHECK_NE(0u, instr->OutputCount());
3628 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3629 Condition cc = FlagsConditionToCondition(condition);
3630 __ mov(reg, Operand(0));
3631 __ mov(reg, Operand(1), LeaveCC, cc);
3632 }
3633
3634 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3635 ArmOperandConverter i(this, instr);
3636 Register input = i.InputRegister(0);
3637 std::vector<std::pair<int32_t, Label*>> cases;
3638 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3639 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3640 }
3641 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3642 cases.data() + cases.size());
3643 }
3644
3645 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3646 ArmOperandConverter i(this, instr);
3647 Register input = i.InputRegister(0);
3648 size_t const case_count = instr->InputCount() - 2;
3649 // This {cmp} might still emit a constant pool entry.
3650 __ cmp(input, Operand(case_count));
3651 // Ensure to emit the constant pool first if necessary.
3652 __ CheckConstPool(true, true);
3653 __ BlockConstPoolFor(case_count + 2);
3654 __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
3655 __ b(GetLabel(i.InputRpo(1)));
3656 for (size_t index = 0; index < case_count; ++index) {
3657 __ b(GetLabel(i.InputRpo(index + 2)));
3658 }
3659 }
3660
3661 void CodeGenerator::AssembleArchSelect(Instruction* instr,
3662 FlagsCondition condition) {
3663 UNIMPLEMENTED();
3664 }
3665
3666 void CodeGenerator::FinishFrame(Frame* frame) {
3667 auto call_descriptor = linkage()->GetIncomingDescriptor();
3668
3669 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3670 if (!saves_fp.is_empty()) {
3671 frame->AlignSavedCalleeRegisterSlots();
3672 }
3673
3674 if (!saves_fp.is_empty()) {
3675 // Save callee-saved FP registers.
3676 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3677 uint32_t last = base::bits::CountLeadingZeros32(saves_fp.bits()) - 1;
3678 uint32_t first = base::bits::CountTrailingZeros32(saves_fp.bits());
3679 DCHECK_EQ((last - first + 1), saves_fp.Count());
3680 frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3681 (kDoubleSize / kSystemPointerSize));
3682 }
3683 const RegList saves = call_descriptor->CalleeSavedRegisters();
3684 if (!saves.is_empty()) {
3685 // Save callee-saved registers.
3686 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
3687 }
3688 }
3689
3690 void CodeGenerator::AssembleConstructFrame() {
3691 auto call_descriptor = linkage()->GetIncomingDescriptor();
3692 if (frame_access_state()->has_frame()) {
3693 if (call_descriptor->IsCFunctionCall()) {
3694 #if V8_ENABLE_WEBASSEMBLY
3695 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3696 __ StubPrologue(StackFrame::C_WASM_ENTRY);
3697 // Reserve stack space for saving the c_entry_fp later.
3698 __ AllocateStackSpace(kSystemPointerSize);
3699 #else
3700 // For balance.
3701 if (false) {
3702 #endif // V8_ENABLE_WEBASSEMBLY
3703 } else {
3704 __ Push(lr, fp);
3705 __ mov(fp, sp);
3706 }
3707 } else if (call_descriptor->IsJSFunctionCall()) {
3708 __ Prologue();
3709 } else {
3710 __ StubPrologue(info()->GetOutputStackFrameType());
3711 #if V8_ENABLE_WEBASSEMBLY
3712 if (call_descriptor->IsWasmFunctionCall() ||
3713 call_descriptor->IsWasmImportWrapper() ||
3714 call_descriptor->IsWasmCapiFunction()) {
3715 __ Push(kWasmInstanceRegister);
3716 }
3717 if (call_descriptor->IsWasmCapiFunction()) {
3718 // Reserve space for saving the PC later.
3719 __ AllocateStackSpace(kSystemPointerSize);
3720 }
3721 #endif // V8_ENABLE_WEBASSEMBLY
3722 }
3723
3724 unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
3725 }
3726
3727 int required_slots =
3728 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
3729
3730 if (info()->is_osr()) {
3731 // TurboFan OSR-compiled functions cannot be entered directly.
3732 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3733
3734 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3735 // frame is still on the stack. Optimized code uses OSR values directly from
3736 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3737 // remaining stack slots.
3738 __ RecordComment("-- OSR entrypoint --");
3739 osr_pc_offset_ = __ pc_offset();
3740 required_slots -= osr_helper()->UnoptimizedFrameSlots();
3741 }
3742
3743 const RegList saves = call_descriptor->CalleeSavedRegisters();
3744 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3745
3746 if (required_slots > 0) {
3747 DCHECK(frame_access_state()->has_frame());
3748 #if V8_ENABLE_WEBASSEMBLY
3749 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
3750 // For WebAssembly functions with big frames we have to do the stack
3751 // overflow check before we construct the frame. Otherwise we may not
3752 // have enough space on the stack to call the runtime for the stack
3753 // overflow.
3754 Label done;
3755
3756 // If the frame is bigger than the stack, we throw the stack overflow
3757 // exception unconditionally. Thereby we can avoid the integer overflow
3758 // check in the condition code.
3759 if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
3760 UseScratchRegisterScope temps(tasm());
3761 Register scratch = temps.Acquire();
3762 __ ldr(scratch, FieldMemOperand(
3763 kWasmInstanceRegister,
3764 WasmInstanceObject::kRealStackLimitAddressOffset));
3765 __ ldr(scratch, MemOperand(scratch));
3766 __ add(scratch, scratch, Operand(required_slots * kSystemPointerSize));
3767 __ cmp(sp, scratch);
3768 __ b(cs, &done);
3769 }
3770
3771 __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
3772 // The call does not return, hence we can ignore any references and just
3773 // define an empty safepoint.
3774 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3775 RecordSafepoint(reference_map);
3776 if (FLAG_debug_code) __ stop();
3777
3778 __ bind(&done);
3779 }
3780 #endif // V8_ENABLE_WEBASSEMBLY
3781
3782 // Skip callee-saved and return slots, which are pushed below.
3783 required_slots -= saves.Count();
3784 required_slots -= frame()->GetReturnSlotCount();
3785 required_slots -= 2 * saves_fp.Count();
3786 if (required_slots > 0) {
3787 __ AllocateStackSpace(required_slots * kSystemPointerSize);
3788 }
3789 }
3790
3791 if (!saves_fp.is_empty()) {
3792 // Save callee-saved FP registers.
3793 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3794 __ vstm(db_w, sp, saves_fp.first(), saves_fp.last());
3795 }
3796
3797 if (!saves.is_empty()) {
3798 // Save callee-saved registers.
3799 __ stm(db_w, sp, saves);
3800 }
3801
3802 const int returns = frame()->GetReturnSlotCount();
3803 // Create space for returns.
3804 __ AllocateStackSpace(returns * kSystemPointerSize);
3805 }
3806
3807 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3808 auto call_descriptor = linkage()->GetIncomingDescriptor();
3809
3810 const int returns = frame()->GetReturnSlotCount();
3811 if (returns != 0) {
3812 // Free space of returns.
3813 __ add(sp, sp, Operand(returns * kSystemPointerSize));
3814 }
3815
3816 // Restore registers.
3817 const RegList saves = call_descriptor->CalleeSavedRegisters();
3818 if (!saves.is_empty()) {
3819 __ ldm(ia_w, sp, saves);
3820 }
3821
3822 // Restore FP registers.
3823 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3824 if (!saves_fp.is_empty()) {
3825 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3826 __ vldm(ia_w, sp, saves_fp.first(), saves_fp.last());
3827 }
3828
3829 unwinding_info_writer_.MarkBlockWillExit();
3830
3831 ArmOperandConverter g(this, nullptr);
3832 const int parameter_slots =
3833 static_cast<int>(call_descriptor->ParameterSlotCount());
3834
3835 // {additional_pop_count} is only greater than zero if {parameter_slots = 0}.
3836 // Check RawMachineAssembler::PopAndReturn.
3837 if (parameter_slots != 0) {
3838 if (additional_pop_count->IsImmediate()) {
3839 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
3840 } else if (FLAG_debug_code) {
3841 __ cmp(g.ToRegister(additional_pop_count), Operand(0));
3842 __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
3843 }
3844 }
3845
3846 Register argc_reg = r3;
3847 // Functions with JS linkage have at least one parameter (the receiver).
3848 // If {parameter_slots} == 0, it means it is a builtin with
3849 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
3850 // itself.
3851 const bool drop_jsargs = parameter_slots != 0 &&
3852 frame_access_state()->has_frame() &&
3853 call_descriptor->IsJSFunctionCall();
3854 if (call_descriptor->IsCFunctionCall()) {
3855 AssembleDeconstructFrame();
3856 } else if (frame_access_state()->has_frame()) {
3857 // Canonicalize JSFunction return sites for now unless they have an variable
3858 // number of stack slot pops.
3859 if (additional_pop_count->IsImmediate() &&
3860 g.ToConstant(additional_pop_count).ToInt32() == 0) {
3861 if (return_label_.is_bound()) {
3862 __ b(&return_label_);
3863 return;
3864 } else {
3865 __ bind(&return_label_);
3866 }
3867 }
3868 if (drop_jsargs) {
3869 // Get the actual argument count.
3870 __ ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
3871 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
3872 }
3873 AssembleDeconstructFrame();
3874 }
3875
3876 if (drop_jsargs) {
3877 // We must pop all arguments from the stack (including the receiver).
3878 // The number of arguments without the receiver is
3879 // max(argc_reg, parameter_slots-1), and the receiver is added in
3880 // DropArguments().
3881 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
3882 if (parameter_slots > 1) {
3883 __ cmp(argc_reg, Operand(parameter_slots));
3884 __ mov(argc_reg, Operand(parameter_slots), LeaveCC, lt);
3885 }
3886 __ DropArguments(argc_reg, TurboAssembler::kCountIsInteger,
3887 TurboAssembler::kCountIncludesReceiver);
3888 } else if (additional_pop_count->IsImmediate()) {
3889 DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
3890 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
3891 __ Drop(parameter_slots + additional_count);
3892 } else if (parameter_slots == 0) {
3893 __ Drop(g.ToRegister(additional_pop_count));
3894 } else {
3895 // {additional_pop_count} is guaranteed to be zero if {parameter_slots !=
3896 // 0}. Check RawMachineAssembler::PopAndReturn.
3897 __ Drop(parameter_slots);
3898 }
3899 __ Ret();
3900 }
3901
3902 void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
3903
3904 void CodeGenerator::PrepareForDeoptimizationExits(
3905 ZoneDeque<DeoptimizationExit*>* exits) {
3906 __ CheckConstPool(true, false);
3907 }
3908
3909 void CodeGenerator::AssembleMove(InstructionOperand* source,
3910 InstructionOperand* destination) {
3911 ArmOperandConverter g(this, nullptr);
3912 // Helper function to write the given constant to the dst register.
3913 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3914 if (src.type() == Constant::kHeapObject) {
3915 Handle<HeapObject> src_object = src.ToHeapObject();
3916 RootIndex index;
3917 if (IsMaterializableFromRoot(src_object, &index)) {
3918 __ LoadRoot(dst, index);
3919 } else {
3920 __ Move(dst, src_object);
3921 }
3922 } else if (src.type() == Constant::kExternalReference) {
3923 __ Move(dst, src.ToExternalReference());
3924 } else {
3925 __ mov(dst, g.ToImmediate(source));
3926 }
3927 };
3928 switch (MoveType::InferMove(source, destination)) {
3929 case MoveType::kRegisterToRegister:
3930 if (source->IsRegister()) {
3931 __ mov(g.ToRegister(destination), g.ToRegister(source));
3932 } else if (source->IsFloatRegister()) {
3933 DCHECK(destination->IsFloatRegister());
3934 // GapResolver may give us reg codes that don't map to actual
3935 // s-registers. Generate code to work around those cases.
3936 int src_code = LocationOperand::cast(source)->register_code();
3937 int dst_code = LocationOperand::cast(destination)->register_code();
3938 __ VmovExtended(dst_code, src_code);
3939 } else if (source->IsDoubleRegister()) {
3940 __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3941 } else {
3942 __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
3943 }
3944 return;
3945 case MoveType::kRegisterToStack: {
3946 MemOperand dst = g.ToMemOperand(destination);
3947 if (source->IsRegister()) {
3948 __ str(g.ToRegister(source), dst);
3949 } else if (source->IsFloatRegister()) {
3950 // GapResolver may give us reg codes that don't map to actual
3951 // s-registers. Generate code to work around those cases.
3952 int src_code = LocationOperand::cast(source)->register_code();
3953 __ VmovExtended(dst, src_code);
3954 } else if (source->IsDoubleRegister()) {
3955 __ vstr(g.ToDoubleRegister(source), dst);
3956 } else {
3957 UseScratchRegisterScope temps(tasm());
3958 Register temp = temps.Acquire();
3959 QwNeonRegister src = g.ToSimd128Register(source);
3960 __ add(temp, dst.rn(), Operand(dst.offset()));
3961 __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3962 }
3963 return;
3964 }
3965 case MoveType::kStackToRegister: {
3966 MemOperand src = g.ToMemOperand(source);
3967 if (source->IsStackSlot()) {
3968 __ ldr(g.ToRegister(destination), src);
3969 } else if (source->IsFloatStackSlot()) {
3970 DCHECK(destination->IsFloatRegister());
3971 // GapResolver may give us reg codes that don't map to actual
3972 // s-registers. Generate code to work around those cases.
3973 int dst_code = LocationOperand::cast(destination)->register_code();
3974 __ VmovExtended(dst_code, src);
3975 } else if (source->IsDoubleStackSlot()) {
3976 __ vldr(g.ToDoubleRegister(destination), src);
3977 } else {
3978 UseScratchRegisterScope temps(tasm());
3979 Register temp = temps.Acquire();
3980 QwNeonRegister dst = g.ToSimd128Register(destination);
3981 __ add(temp, src.rn(), Operand(src.offset()));
3982 __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
3983 }
3984 return;
3985 }
3986 case MoveType::kStackToStack: {
3987 MemOperand src = g.ToMemOperand(source);
3988 MemOperand dst = g.ToMemOperand(destination);
3989 UseScratchRegisterScope temps(tasm());
3990 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3991 SwVfpRegister temp = temps.AcquireS();
3992 __ vldr(temp, src);
3993 __ vstr(temp, dst);
3994 } else if (source->IsDoubleStackSlot()) {
3995 DwVfpRegister temp = temps.AcquireD();
3996 __ vldr(temp, src);
3997 __ vstr(temp, dst);
3998 } else {
3999 DCHECK(source->IsSimd128StackSlot());
4000 Register temp = temps.Acquire();
4001 QwNeonRegister temp_q = temps.AcquireQ();
4002 __ add(temp, src.rn(), Operand(src.offset()));
4003 __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4004 __ add(temp, dst.rn(), Operand(dst.offset()));
4005 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4006 }
4007 return;
4008 }
4009 case MoveType::kConstantToRegister: {
4010 Constant src = g.ToConstant(source);
4011 if (destination->IsRegister()) {
4012 MoveConstantToRegister(g.ToRegister(destination), src);
4013 } else if (destination->IsFloatRegister()) {
4014 __ vmov(g.ToFloatRegister(destination),
4015 Float32::FromBits(src.ToFloat32AsInt()));
4016 } else {
4017 // TODO(arm): Look into optimizing this further if possible. Supporting
4018 // the NEON version of VMOV may help.
4019 __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
4020 }
4021 return;
4022 }
4023 case MoveType::kConstantToStack: {
4024 Constant src = g.ToConstant(source);
4025 MemOperand dst = g.ToMemOperand(destination);
4026 if (destination->IsStackSlot()) {
4027 UseScratchRegisterScope temps(tasm());
4028 // Acquire a S register instead of a general purpose register in case
4029 // `vstr` needs one to compute the address of `dst`.
4030 SwVfpRegister s_temp = temps.AcquireS();
4031 {
4032 // TODO(arm): This sequence could be optimized further if necessary by
4033 // writing the constant directly into `s_temp`.
4034 UseScratchRegisterScope temps(tasm());
4035 Register temp = temps.Acquire();
4036 MoveConstantToRegister(temp, src);
4037 __ vmov(s_temp, temp);
4038 }
4039 __ vstr(s_temp, dst);
4040 } else if (destination->IsFloatStackSlot()) {
4041 UseScratchRegisterScope temps(tasm());
4042 SwVfpRegister temp = temps.AcquireS();
4043 __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
4044 __ vstr(temp, dst);
4045 } else {
4046 DCHECK(destination->IsDoubleStackSlot());
4047 UseScratchRegisterScope temps(tasm());
4048 DwVfpRegister temp = temps.AcquireD();
4049 // TODO(arm): Look into optimizing this further if possible. Supporting
4050 // the NEON version of VMOV may help.
4051 __ vmov(temp, src.ToFloat64());
4052 __ vstr(temp, g.ToMemOperand(destination));
4053 }
4054 return;
4055 }
4056 }
4057 UNREACHABLE();
4058 }
4059
4060 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4061 InstructionOperand* destination) {
4062 ArmOperandConverter g(this, nullptr);
4063 switch (MoveType::InferSwap(source, destination)) {
4064 case MoveType::kRegisterToRegister:
4065 if (source->IsRegister()) {
4066 __ Swap(g.ToRegister(source), g.ToRegister(destination));
4067 } else if (source->IsFloatRegister()) {
4068 DCHECK(destination->IsFloatRegister());
4069 // GapResolver may give us reg codes that don't map to actual
4070 // s-registers. Generate code to work around those cases.
4071 UseScratchRegisterScope temps(tasm());
4072 LowDwVfpRegister temp = temps.AcquireLowD();
4073 int src_code = LocationOperand::cast(source)->register_code();
4074 int dst_code = LocationOperand::cast(destination)->register_code();
4075 __ VmovExtended(temp.low().code(), src_code);
4076 __ VmovExtended(src_code, dst_code);
4077 __ VmovExtended(dst_code, temp.low().code());
4078 } else if (source->IsDoubleRegister()) {
4079 __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
4080 } else {
4081 __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
4082 }
4083 return;
4084 case MoveType::kRegisterToStack: {
4085 MemOperand dst = g.ToMemOperand(destination);
4086 if (source->IsRegister()) {
4087 Register src = g.ToRegister(source);
4088 UseScratchRegisterScope temps(tasm());
4089 SwVfpRegister temp = temps.AcquireS();
4090 __ vmov(temp, src);
4091 __ ldr(src, dst);
4092 __ vstr(temp, dst);
4093 } else if (source->IsFloatRegister()) {
4094 int src_code = LocationOperand::cast(source)->register_code();
4095 UseScratchRegisterScope temps(tasm());
4096 LowDwVfpRegister temp = temps.AcquireLowD();
4097 __ VmovExtended(temp.low().code(), src_code);
4098 __ VmovExtended(src_code, dst);
4099 __ vstr(temp.low(), dst);
4100 } else if (source->IsDoubleRegister()) {
4101 UseScratchRegisterScope temps(tasm());
4102 DwVfpRegister temp = temps.AcquireD();
4103 DwVfpRegister src = g.ToDoubleRegister(source);
4104 __ Move(temp, src);
4105 __ vldr(src, dst);
4106 __ vstr(temp, dst);
4107 } else {
4108 QwNeonRegister src = g.ToSimd128Register(source);
4109 UseScratchRegisterScope temps(tasm());
4110 Register temp = temps.Acquire();
4111 QwNeonRegister temp_q = temps.AcquireQ();
4112 __ Move(temp_q, src);
4113 __ add(temp, dst.rn(), Operand(dst.offset()));
4114 __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
4115 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4116 }
4117 return;
4118 }
4119 case MoveType::kStackToStack: {
4120 MemOperand src = g.ToMemOperand(source);
4121 MemOperand dst = g.ToMemOperand(destination);
4122 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
4123 UseScratchRegisterScope temps(tasm());
4124 SwVfpRegister temp_0 = temps.AcquireS();
4125 SwVfpRegister temp_1 = temps.AcquireS();
4126 __ vldr(temp_0, dst);
4127 __ vldr(temp_1, src);
4128 __ vstr(temp_0, src);
4129 __ vstr(temp_1, dst);
4130 } else if (source->IsDoubleStackSlot()) {
4131 UseScratchRegisterScope temps(tasm());
4132 LowDwVfpRegister temp = temps.AcquireLowD();
4133 if (temps.CanAcquireD()) {
4134 DwVfpRegister temp_0 = temp;
4135 DwVfpRegister temp_1 = temps.AcquireD();
4136 __ vldr(temp_0, dst);
4137 __ vldr(temp_1, src);
4138 __ vstr(temp_0, src);
4139 __ vstr(temp_1, dst);
4140 } else {
4141 // We only have a single D register available. However, we can split
4142 // it into 2 S registers and swap the slots 32 bits at a time.
4143 MemOperand src0 = src;
4144 MemOperand dst0 = dst;
4145 MemOperand src1(src.rn(), src.offset() + kFloatSize);
4146 MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
4147 SwVfpRegister temp_0 = temp.low();
4148 SwVfpRegister temp_1 = temp.high();
4149 __ vldr(temp_0, dst0);
4150 __ vldr(temp_1, src0);
4151 __ vstr(temp_0, src0);
4152 __ vstr(temp_1, dst0);
4153 __ vldr(temp_0, dst1);
4154 __ vldr(temp_1, src1);
4155 __ vstr(temp_0, src1);
4156 __ vstr(temp_1, dst1);
4157 }
4158 } else {
4159 DCHECK(source->IsSimd128StackSlot());
4160 MemOperand src0 = src;
4161 MemOperand dst0 = dst;
4162 MemOperand src1(src.rn(), src.offset() + kDoubleSize);
4163 MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
4164 UseScratchRegisterScope temps(tasm());
4165 DwVfpRegister temp_0 = temps.AcquireD();
4166 DwVfpRegister temp_1 = temps.AcquireD();
4167 __ vldr(temp_0, dst0);
4168 __ vldr(temp_1, src0);
4169 __ vstr(temp_0, src0);
4170 __ vstr(temp_1, dst0);
4171 __ vldr(temp_0, dst1);
4172 __ vldr(temp_1, src1);
4173 __ vstr(temp_0, src1);
4174 __ vstr(temp_1, dst1);
4175 }
4176 return;
4177 }
4178 default:
4179 UNREACHABLE();
4180 }
4181 }
4182
4183 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4184 // On 32-bit ARM we emit the jump tables inline.
4185 UNREACHABLE();
4186 }
4187
4188 #undef __
4189
4190 } // namespace compiler
4191 } // namespace internal
4192 } // namespace v8
4193