1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
6 #define V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/heap/memory-chunk.h"
10 #include "src/wasm/baseline/liftoff-assembler.h"
11 #include "src/wasm/wasm-objects.h"
12
13 namespace v8 {
14 namespace internal {
15 namespace wasm {
16
17 namespace liftoff {
18
ToCondition(LiftoffCondition liftoff_cond)19 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
20 switch (liftoff_cond) {
21 case kEqual:
22 return eq;
23 case kUnequal:
24 return ne;
25 case kSignedLessThan:
26 return lt;
27 case kSignedLessEqual:
28 return le;
29 case kSignedGreaterThan:
30 return gt;
31 case kSignedGreaterEqual:
32 return ge;
33 case kUnsignedLessThan:
34 return lo;
35 case kUnsignedLessEqual:
36 return ls;
37 case kUnsignedGreaterThan:
38 return hi;
39 case kUnsignedGreaterEqual:
40 return hs;
41 }
42 }
43
44 // Liftoff Frames.
45 //
46 // slot Frame
47 // +--------------------+---------------------------
48 // n+4 | optional padding slot to keep the stack 16 byte aligned.
49 // n+3 | parameter n |
50 // ... | ... |
51 // 4 | parameter 1 | or parameter 2
52 // 3 | parameter 0 | or parameter 1
53 // 2 | (result address) | or parameter 0
54 // -----+--------------------+---------------------------
55 // 1 | return addr (lr) |
56 // 0 | previous frame (fp)|
57 // -----+--------------------+ <-- frame ptr (fp)
58 // -1 | StackFrame::WASM |
59 // -2 | instance |
60 // -3 | feedback vector|
61 // -4 | tiering budget |
62 // -----+--------------------+---------------------------
63 // -5 | slot 0 | ^
64 // -6 | slot 1 | |
65 // | | Frame slots
66 // | | |
67 // | | v
68 // | optional padding slot to keep the stack 16 byte aligned.
69 // -----+--------------------+ <-- stack ptr (sp)
70 //
71
72 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
73 constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
74 constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
75
GetStackSlot(int offset)76 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
77
GetInstanceOperand()78 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
79
GetRegFromType(const LiftoffRegister& reg, ValueKind kind)80 inline CPURegister GetRegFromType(const LiftoffRegister& reg, ValueKind kind) {
81 switch (kind) {
82 case kI32:
83 return reg.gp().W();
84 case kI64:
85 case kRef:
86 case kOptRef:
87 case kRtt:
88 return reg.gp().X();
89 case kF32:
90 return reg.fp().S();
91 case kF64:
92 return reg.fp().D();
93 case kS128:
94 return reg.fp().Q();
95 default:
96 UNREACHABLE();
97 }
98 }
99
PadRegList(RegList list)100 inline CPURegList PadRegList(RegList list) {
101 if ((list.Count() & 1) != 0) list.set(padreg);
102 return CPURegList(kXRegSizeInBits, list);
103 }
104
PadVRegList(DoubleRegList list)105 inline CPURegList PadVRegList(DoubleRegList list) {
106 if ((list.Count() & 1) != 0) list.set(fp_scratch);
107 return CPURegList(kQRegSizeInBits, list);
108 }
109
AcquireByType(UseScratchRegisterScope* temps, ValueKind kind)110 inline CPURegister AcquireByType(UseScratchRegisterScope* temps,
111 ValueKind kind) {
112 switch (kind) {
113 case kI32:
114 return temps->AcquireW();
115 case kI64:
116 case kRef:
117 case kOptRef:
118 return temps->AcquireX();
119 case kF32:
120 return temps->AcquireS();
121 case kF64:
122 return temps->AcquireD();
123 case kS128:
124 return temps->AcquireQ();
125 default:
126 UNREACHABLE();
127 }
128 }
129
130 template <typename T>
GetMemOp(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr, Register offset, T offset_imm, bool i64_offset = false)131 inline MemOperand GetMemOp(LiftoffAssembler* assm,
132 UseScratchRegisterScope* temps, Register addr,
133 Register offset, T offset_imm,
134 bool i64_offset = false) {
135 if (!offset.is_valid()) return MemOperand(addr.X(), offset_imm);
136 Register effective_addr = addr.X();
137 if (offset_imm) {
138 effective_addr = temps->AcquireX();
139 assm->Add(effective_addr, addr.X(), offset_imm);
140 }
141 return i64_offset ? MemOperand(effective_addr, offset.X())
142 : MemOperand(effective_addr, offset.W(), UXTW);
143 }
144
145 // Compute the effective address (sum of |addr|, |offset| (if given) and
146 // |offset_imm|) into a temporary register. This is needed for certain load
147 // instructions that do not support an offset (register or immediate).
148 // Returns |addr| if both |offset| and |offset_imm| are zero.
GetEffectiveAddress(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr, Register offset, uintptr_t offset_imm)149 inline Register GetEffectiveAddress(LiftoffAssembler* assm,
150 UseScratchRegisterScope* temps,
151 Register addr, Register offset,
152 uintptr_t offset_imm) {
153 if (!offset.is_valid() && offset_imm == 0) return addr;
154 Register tmp = temps->AcquireX();
155 if (offset.is_valid()) {
156 // TODO(clemensb): This needs adaption for memory64.
157 assm->Add(tmp, addr, Operand(offset, UXTW));
158 addr = tmp;
159 }
160 if (offset_imm != 0) assm->Add(tmp, addr, offset_imm);
161 return tmp;
162 }
163
164 enum class ShiftDirection : bool { kLeft, kRight };
165
166 enum class ShiftSign : bool { kSigned, kUnsigned };
167
168 template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned>
EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs, Register rhs, VectorFormat format)169 inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs,
170 Register rhs, VectorFormat format) {
171 DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned);
172 DCHECK(dst.IsSameFormat(lhs));
173 DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format));
174
175 UseScratchRegisterScope temps(assm);
176 VRegister tmp = temps.AcquireV(format);
177 Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW();
178 int mask = LaneSizeInBitsFromFormat(format) - 1;
179 assm->And(shift, rhs, mask);
180 assm->Dup(tmp, shift);
181
182 if (dir == ShiftDirection::kRight) {
183 assm->Neg(tmp, tmp);
184 }
185
186 if (sign == ShiftSign::kSigned) {
187 assm->Sshl(dst, lhs, tmp);
188 } else {
189 assm->Ushl(dst, lhs, tmp);
190 }
191 }
192
193 template <VectorFormat format, ShiftSign sign>
EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst, VRegister lhs, int32_t rhs)194 inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst,
195 VRegister lhs, int32_t rhs) {
196 // Sshr and Ushr does not allow shifts to be 0, so check for that here.
197 int mask = LaneSizeInBitsFromFormat(format) - 1;
198 int32_t shift = rhs & mask;
199 if (!shift) {
200 if (dst != lhs) {
201 assm->Mov(dst, lhs);
202 }
203 return;
204 }
205
206 if (sign == ShiftSign::kSigned) {
207 assm->Sshr(dst, lhs, rhs & mask);
208 } else {
209 assm->Ushr(dst, lhs, rhs & mask);
210 }
211 }
212
EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src)213 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
214 LiftoffRegister src) {
215 // AnyTrue does not depend on the number of lanes, so we can use V4S for all.
216 UseScratchRegisterScope scope(assm);
217 VRegister temp = scope.AcquireV(kFormatS);
218 assm->Umaxv(temp, src.fp().V4S());
219 assm->Umov(dst.gp().W(), temp, 0);
220 assm->Cmp(dst.gp().W(), 0);
221 assm->Cset(dst.gp().W(), ne);
222 }
223
EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src, VectorFormat format)224 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
225 LiftoffRegister src, VectorFormat format) {
226 UseScratchRegisterScope scope(assm);
227 VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format));
228 assm->Uminv(temp, VRegister::Create(src.fp().code(), format));
229 assm->Umov(dst.gp().W(), temp, 0);
230 assm->Cmp(dst.gp().W(), 0);
231 assm->Cset(dst.gp().W(), ne);
232 }
233
234 } // namespace liftoff
235
PrepareStackFrame()236 int LiftoffAssembler::PrepareStackFrame() {
237 int offset = pc_offset();
238 InstructionAccurateScope scope(this, 1);
239 // Next we reserve the memory for the whole stack frame. We do not know yet
240 // how big the stack frame will be so we just emit a placeholder instruction.
241 // PatchPrepareStackFrame will patch this in order to increase the stack
242 // appropriately.
243 sub(sp, sp, 0);
244 return offset;
245 }
246
PrepareTailCall(int num_callee_stack_params, int stack_param_delta)247 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
248 int stack_param_delta) {
249 UseScratchRegisterScope temps(this);
250 temps.Exclude(x16, x17);
251
252 // This is the previous stack pointer value (before we push the lr and the
253 // fp). We need to keep it to autenticate the lr and adjust the new stack
254 // pointer afterwards.
255 Add(x16, fp, 16);
256
257 // Load the fp and lr of the old frame, they will be pushed in the new frame
258 // during the actual call.
259 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
260 Ldp(fp, x17, MemOperand(fp));
261 Autib1716();
262 Mov(lr, x17);
263 #else
264 Ldp(fp, lr, MemOperand(fp));
265 #endif
266
267 temps.Include(x17);
268
269 Register scratch = temps.AcquireX();
270
271 // Shift the whole frame upwards, except for fp and lr.
272 int slot_count = num_callee_stack_params;
273 for (int i = slot_count - 1; i >= 0; --i) {
274 ldr(scratch, MemOperand(sp, i * 8));
275 str(scratch, MemOperand(x16, (i - stack_param_delta) * 8));
276 }
277
278 // Set the new stack pointer.
279 Sub(sp, x16, stack_param_delta * 8);
280 }
281
AlignFrameSize()282 void LiftoffAssembler::AlignFrameSize() {
283 // The frame_size includes the frame marker. The frame marker has already been
284 // pushed on the stack though, so we don't need to allocate memory for it
285 // anymore.
286 int initial_frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
287 int frame_size = initial_frame_size;
288
289 static_assert(kStackSlotSize == kXRegSize,
290 "kStackSlotSize must equal kXRegSize");
291 // The stack pointer is required to be quadword aligned.
292 // Misalignment will cause a stack alignment fault.
293 frame_size = RoundUp(frame_size, kQuadWordSizeInBytes);
294 if (!IsImmAddSub(frame_size)) {
295 // Round the stack to a page to try to fit a add/sub immediate.
296 frame_size = RoundUp(frame_size, 0x1000);
297 if (!IsImmAddSub(frame_size)) {
298 // Stack greater than 4M! Because this is a quite improbable case, we
299 // just fallback to TurboFan.
300 bailout(kOtherReason, "Stack too big");
301 return;
302 }
303 }
304 if (frame_size > initial_frame_size) {
305 // Record the padding, as it is needed for GC offsets later.
306 max_used_spill_offset_ += (frame_size - initial_frame_size);
307 }
308 }
309
PatchPrepareStackFrame( int offset, SafepointTableBuilder* safepoint_table_builder)310 void LiftoffAssembler::PatchPrepareStackFrame(
311 int offset, SafepointTableBuilder* safepoint_table_builder) {
312 // The frame_size includes the frame marker and the instance slot. Both are
313 // pushed as part of frame construction, so we don't need to allocate memory
314 // for them anymore.
315 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
316
317 // The stack pointer is required to be quadword aligned.
318 // Misalignment will cause a stack alignment fault.
319 DCHECK_EQ(frame_size, RoundUp(frame_size, kQuadWordSizeInBytes));
320 DCHECK(IsImmAddSub(frame_size));
321
322 PatchingAssembler patching_assembler(AssemblerOptions{},
323 buffer_start_ + offset, 1);
324
325 if (V8_LIKELY(frame_size < 4 * KB)) {
326 // This is the standard case for small frames: just subtract from SP and be
327 // done with it.
328 patching_assembler.PatchSubSp(frame_size);
329 return;
330 }
331
332 // The frame size is bigger than 4KB, so we might overflow the available stack
333 // space if we first allocate the frame and then do the stack check (we will
334 // need some remaining stack space for throwing the exception). That's why we
335 // check the available stack space before we allocate the frame. To do this we
336 // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does
337 // this "extended stack check".
338 //
339 // The OOL code can simply be generated here with the normal assembler,
340 // because all other code generation, including OOL code, has already finished
341 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
342 // to the current {pc_offset()} to execute the OOL code for allocating the
343 // large frame.
344
345 // Emit the unconditional branch in the function prologue (from {offset} to
346 // {pc_offset()}).
347 patching_assembler.b((pc_offset() - offset) >> kInstrSizeLog2);
348
349 // If the frame is bigger than the stack, we throw the stack overflow
350 // exception unconditionally. Thereby we can avoid the integer overflow
351 // check in the condition code.
352 RecordComment("OOL: stack check for large frame");
353 Label continuation;
354 if (frame_size < FLAG_stack_size * 1024) {
355 UseScratchRegisterScope temps(this);
356 Register stack_limit = temps.AcquireX();
357 Ldr(stack_limit,
358 FieldMemOperand(kWasmInstanceRegister,
359 WasmInstanceObject::kRealStackLimitAddressOffset));
360 Ldr(stack_limit, MemOperand(stack_limit));
361 Add(stack_limit, stack_limit, Operand(frame_size));
362 Cmp(sp, stack_limit);
363 B(hs /* higher or same */, &continuation);
364 }
365
366 Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
367 // The call will not return; just define an empty safepoint.
368 safepoint_table_builder->DefineSafepoint(this);
369 if (FLAG_debug_code) Brk(0);
370
371 bind(&continuation);
372
373 // Now allocate the stack space. Note that this might do more than just
374 // decrementing the SP; consult {TurboAssembler::Claim}.
375 Claim(frame_size, 1);
376
377 // Jump back to the start of the function, from {pc_offset()} to
378 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
379 // is a branch now).
380 int func_start_offset = offset + kInstrSize;
381 b((func_start_offset - pc_offset()) >> kInstrSizeLog2);
382 }
383
FinishCode()384 void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
385
AbortCompilation()386 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
387
388 // static
StaticStackFrameSize()389 constexpr int LiftoffAssembler::StaticStackFrameSize() {
390 return liftoff::kTierupBudgetOffset;
391 }
392
SlotSizeForType(ValueKind kind)393 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
394 // TODO(zhin): Unaligned access typically take additional cycles, we should do
395 // some performance testing to see how big an effect it will take.
396 switch (kind) {
397 case kS128:
398 return value_kind_size(kind);
399 default:
400 return kStackSlotSize;
401 }
402 }
403
NeedsAlignment(ValueKind kind)404 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
405 return kind == kS128 || is_reference(kind);
406 }
407
LoadConstant(LiftoffRegister reg, WasmValue value, RelocInfo::Mode rmode)408 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
409 RelocInfo::Mode rmode) {
410 switch (value.type().kind()) {
411 case kI32:
412 Mov(reg.gp().W(), Immediate(value.to_i32(), rmode));
413 break;
414 case kI64:
415 Mov(reg.gp().X(), Immediate(value.to_i64(), rmode));
416 break;
417 case kF32:
418 Fmov(reg.fp().S(), value.to_f32_boxed().get_scalar());
419 break;
420 case kF64:
421 Fmov(reg.fp().D(), value.to_f64_boxed().get_scalar());
422 break;
423 default:
424 UNREACHABLE();
425 }
426 }
427
LoadInstanceFromFrame(Register dst)428 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
429 Ldr(dst, liftoff::GetInstanceOperand());
430 }
431
LoadFromInstance(Register dst, Register instance, int offset, int size)432 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
433 int offset, int size) {
434 DCHECK_LE(0, offset);
435 MemOperand src{instance, offset};
436 switch (size) {
437 case 1:
438 Ldrb(dst.W(), src);
439 break;
440 case 4:
441 Ldr(dst.W(), src);
442 break;
443 case 8:
444 Ldr(dst, src);
445 break;
446 default:
447 UNIMPLEMENTED();
448 }
449 }
450
LoadTaggedPointerFromInstance(Register dst, Register instance, int offset)451 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
452 Register instance,
453 int offset) {
454 DCHECK_LE(0, offset);
455 LoadTaggedPointerField(dst, MemOperand{instance, offset});
456 }
457
LoadExternalPointer(Register dst, Register instance, int offset, ExternalPointerTag tag, Register isolate_root)458 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
459 int offset, ExternalPointerTag tag,
460 Register isolate_root) {
461 LoadExternalPointerField(dst, FieldMemOperand(instance, offset), tag,
462 isolate_root);
463 }
464
SpillInstance(Register instance)465 void LiftoffAssembler::SpillInstance(Register instance) {
466 Str(instance, liftoff::GetInstanceOperand());
467 }
468
ResetOSRTarget()469 void LiftoffAssembler::ResetOSRTarget() {}
470
LoadTaggedPointer(Register dst, Register src_addr, Register offset_reg, int32_t offset_imm, LiftoffRegList pinned)471 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
472 Register offset_reg,
473 int32_t offset_imm,
474 LiftoffRegList pinned) {
475 UseScratchRegisterScope temps(this);
476 MemOperand src_op =
477 liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
478 LoadTaggedPointerField(dst, src_op);
479 }
480
LoadFullPointer(Register dst, Register src_addr, int32_t offset_imm)481 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
482 int32_t offset_imm) {
483 UseScratchRegisterScope temps(this);
484 MemOperand src_op =
485 liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm);
486 Ldr(dst.X(), src_op);
487 }
488
StoreTaggedPointer(Register dst_addr, Register offset_reg, int32_t offset_imm, LiftoffRegister src, LiftoffRegList pinned, SkipWriteBarrier skip_write_barrier)489 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
490 Register offset_reg,
491 int32_t offset_imm,
492 LiftoffRegister src,
493 LiftoffRegList pinned,
494 SkipWriteBarrier skip_write_barrier) {
495 UseScratchRegisterScope temps(this);
496 Operand offset_op = offset_reg.is_valid() ? Operand(offset_reg.W(), UXTW)
497 : Operand(offset_imm);
498 // For the write barrier (below), we cannot have both an offset register and
499 // an immediate offset. Add them to a 32-bit offset initially, but in a 64-bit
500 // register, because that's needed in the MemOperand below.
501 if (offset_reg.is_valid() && offset_imm) {
502 Register effective_offset = temps.AcquireX();
503 Add(effective_offset.W(), offset_reg.W(), offset_imm);
504 offset_op = effective_offset;
505 }
506 StoreTaggedField(src.gp(), MemOperand(dst_addr.X(), offset_op));
507
508 if (skip_write_barrier || FLAG_disable_write_barriers) return;
509
510 // The write barrier.
511 Label write_barrier;
512 Label exit;
513 CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, eq,
514 &write_barrier);
515 b(&exit);
516 bind(&write_barrier);
517 JumpIfSmi(src.gp(), &exit);
518 if (COMPRESS_POINTERS_BOOL) {
519 DecompressTaggedPointer(src.gp(), src.gp());
520 }
521 CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, ne,
522 &exit);
523 CallRecordWriteStubSaveRegisters(
524 dst_addr, offset_op, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
525 StubCallMode::kCallWasmRuntimeStub);
526 bind(&exit);
527 }
528
Load(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LiftoffRegList pinned, uint32_t* protected_load_pc, bool is_load_mem, bool i64_offset)529 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
530 Register offset_reg, uintptr_t offset_imm,
531 LoadType type, LiftoffRegList pinned,
532 uint32_t* protected_load_pc, bool is_load_mem,
533 bool i64_offset) {
534 UseScratchRegisterScope temps(this);
535 MemOperand src_op = liftoff::GetMemOp(this, &temps, src_addr, offset_reg,
536 offset_imm, i64_offset);
537 if (protected_load_pc) *protected_load_pc = pc_offset();
538 switch (type.value()) {
539 case LoadType::kI32Load8U:
540 case LoadType::kI64Load8U:
541 Ldrb(dst.gp().W(), src_op);
542 break;
543 case LoadType::kI32Load8S:
544 Ldrsb(dst.gp().W(), src_op);
545 break;
546 case LoadType::kI64Load8S:
547 Ldrsb(dst.gp().X(), src_op);
548 break;
549 case LoadType::kI32Load16U:
550 case LoadType::kI64Load16U:
551 Ldrh(dst.gp().W(), src_op);
552 break;
553 case LoadType::kI32Load16S:
554 Ldrsh(dst.gp().W(), src_op);
555 break;
556 case LoadType::kI64Load16S:
557 Ldrsh(dst.gp().X(), src_op);
558 break;
559 case LoadType::kI32Load:
560 case LoadType::kI64Load32U:
561 Ldr(dst.gp().W(), src_op);
562 break;
563 case LoadType::kI64Load32S:
564 Ldrsw(dst.gp().X(), src_op);
565 break;
566 case LoadType::kI64Load:
567 Ldr(dst.gp().X(), src_op);
568 break;
569 case LoadType::kF32Load:
570 Ldr(dst.fp().S(), src_op);
571 break;
572 case LoadType::kF64Load:
573 Ldr(dst.fp().D(), src_op);
574 break;
575 case LoadType::kS128Load:
576 Ldr(dst.fp().Q(), src_op);
577 break;
578 }
579 }
580
Store(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned, uint32_t* protected_store_pc, bool is_store_mem)581 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
582 uintptr_t offset_imm, LiftoffRegister src,
583 StoreType type, LiftoffRegList pinned,
584 uint32_t* protected_store_pc, bool is_store_mem) {
585 UseScratchRegisterScope temps(this);
586 MemOperand dst_op =
587 liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
588 if (protected_store_pc) *protected_store_pc = pc_offset();
589 switch (type.value()) {
590 case StoreType::kI32Store8:
591 case StoreType::kI64Store8:
592 Strb(src.gp().W(), dst_op);
593 break;
594 case StoreType::kI32Store16:
595 case StoreType::kI64Store16:
596 Strh(src.gp().W(), dst_op);
597 break;
598 case StoreType::kI32Store:
599 case StoreType::kI64Store32:
600 Str(src.gp().W(), dst_op);
601 break;
602 case StoreType::kI64Store:
603 Str(src.gp().X(), dst_op);
604 break;
605 case StoreType::kF32Store:
606 Str(src.fp().S(), dst_op);
607 break;
608 case StoreType::kF64Store:
609 Str(src.fp().D(), dst_op);
610 break;
611 case StoreType::kS128Store:
612 Str(src.fp().Q(), dst_op);
613 break;
614 }
615 }
616
617 namespace liftoff {
618 #define __ lasm->
619
CalculateActualAddress(LiftoffAssembler* lasm, Register addr_reg, Register offset_reg, uintptr_t offset_imm, Register result_reg)620 inline Register CalculateActualAddress(LiftoffAssembler* lasm,
621 Register addr_reg, Register offset_reg,
622 uintptr_t offset_imm,
623 Register result_reg) {
624 DCHECK_NE(offset_reg, no_reg);
625 DCHECK_NE(addr_reg, no_reg);
626 __ Add(result_reg, addr_reg, Operand(offset_reg));
627 if (offset_imm != 0) {
628 __ Add(result_reg, result_reg, Operand(offset_imm));
629 }
630 return result_reg;
631 }
632
633 enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
634
AtomicBinop(LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type, Binop op)635 inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr,
636 Register offset_reg, uintptr_t offset_imm,
637 LiftoffRegister value, LiftoffRegister result,
638 StoreType type, Binop op) {
639 LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
640 Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
641
642 // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique.
643 DCHECK(result.gp() != value.gp() && result.gp() != dst_addr &&
644 result.gp() != offset_reg);
645
646 UseScratchRegisterScope temps(lasm);
647 Register actual_addr = liftoff::CalculateActualAddress(
648 lasm, dst_addr, offset_reg, offset_imm, temps.AcquireX());
649
650 // Allocate an additional {temp} register to hold the result that should be
651 // stored to memory. Note that {temp} and {store_result} are not allowed to be
652 // the same register.
653 Register temp = temps.AcquireX();
654
655 Label retry;
656 __ Bind(&retry);
657 switch (type.value()) {
658 case StoreType::kI64Store8:
659 case StoreType::kI32Store8:
660 __ ldaxrb(result.gp().W(), actual_addr);
661 break;
662 case StoreType::kI64Store16:
663 case StoreType::kI32Store16:
664 __ ldaxrh(result.gp().W(), actual_addr);
665 break;
666 case StoreType::kI64Store32:
667 case StoreType::kI32Store:
668 __ ldaxr(result.gp().W(), actual_addr);
669 break;
670 case StoreType::kI64Store:
671 __ ldaxr(result.gp().X(), actual_addr);
672 break;
673 default:
674 UNREACHABLE();
675 }
676
677 switch (op) {
678 case Binop::kAdd:
679 __ add(temp, result.gp(), value.gp());
680 break;
681 case Binop::kSub:
682 __ sub(temp, result.gp(), value.gp());
683 break;
684 case Binop::kAnd:
685 __ and_(temp, result.gp(), value.gp());
686 break;
687 case Binop::kOr:
688 __ orr(temp, result.gp(), value.gp());
689 break;
690 case Binop::kXor:
691 __ eor(temp, result.gp(), value.gp());
692 break;
693 case Binop::kExchange:
694 __ mov(temp, value.gp());
695 break;
696 }
697
698 switch (type.value()) {
699 case StoreType::kI64Store8:
700 case StoreType::kI32Store8:
701 __ stlxrb(store_result.W(), temp.W(), actual_addr);
702 break;
703 case StoreType::kI64Store16:
704 case StoreType::kI32Store16:
705 __ stlxrh(store_result.W(), temp.W(), actual_addr);
706 break;
707 case StoreType::kI64Store32:
708 case StoreType::kI32Store:
709 __ stlxr(store_result.W(), temp.W(), actual_addr);
710 break;
711 case StoreType::kI64Store:
712 __ stlxr(store_result.W(), temp.X(), actual_addr);
713 break;
714 default:
715 UNREACHABLE();
716 }
717
718 __ Cbnz(store_result.W(), &retry);
719 }
720
721 #undef __
722 } // namespace liftoff
723
AtomicLoad(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LiftoffRegList pinned)724 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
725 Register offset_reg, uintptr_t offset_imm,
726 LoadType type, LiftoffRegList pinned) {
727 UseScratchRegisterScope temps(this);
728 Register src_reg = liftoff::CalculateActualAddress(
729 this, src_addr, offset_reg, offset_imm, temps.AcquireX());
730 switch (type.value()) {
731 case LoadType::kI32Load8U:
732 case LoadType::kI64Load8U:
733 Ldarb(dst.gp().W(), src_reg);
734 return;
735 case LoadType::kI32Load16U:
736 case LoadType::kI64Load16U:
737 Ldarh(dst.gp().W(), src_reg);
738 return;
739 case LoadType::kI32Load:
740 case LoadType::kI64Load32U:
741 Ldar(dst.gp().W(), src_reg);
742 return;
743 case LoadType::kI64Load:
744 Ldar(dst.gp().X(), src_reg);
745 return;
746 default:
747 UNREACHABLE();
748 }
749 }
750
AtomicStore(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned)751 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
752 uintptr_t offset_imm, LiftoffRegister src,
753 StoreType type, LiftoffRegList pinned) {
754 UseScratchRegisterScope temps(this);
755 Register dst_reg = liftoff::CalculateActualAddress(
756 this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
757 switch (type.value()) {
758 case StoreType::kI64Store8:
759 case StoreType::kI32Store8:
760 Stlrb(src.gp().W(), dst_reg);
761 return;
762 case StoreType::kI64Store16:
763 case StoreType::kI32Store16:
764 Stlrh(src.gp().W(), dst_reg);
765 return;
766 case StoreType::kI64Store32:
767 case StoreType::kI32Store:
768 Stlr(src.gp().W(), dst_reg);
769 return;
770 case StoreType::kI64Store:
771 Stlr(src.gp().X(), dst_reg);
772 return;
773 default:
774 UNREACHABLE();
775 }
776 }
777
AtomicAdd(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)778 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
779 uintptr_t offset_imm, LiftoffRegister value,
780 LiftoffRegister result, StoreType type) {
781 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
782 type, liftoff::Binop::kAdd);
783 }
784
AtomicSub(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)785 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
786 uintptr_t offset_imm, LiftoffRegister value,
787 LiftoffRegister result, StoreType type) {
788 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
789 type, liftoff::Binop::kSub);
790 }
791
AtomicAnd(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)792 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
793 uintptr_t offset_imm, LiftoffRegister value,
794 LiftoffRegister result, StoreType type) {
795 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
796 type, liftoff::Binop::kAnd);
797 }
798
AtomicOr(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)799 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
800 uintptr_t offset_imm, LiftoffRegister value,
801 LiftoffRegister result, StoreType type) {
802 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
803 type, liftoff::Binop::kOr);
804 }
805
AtomicXor(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)806 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
807 uintptr_t offset_imm, LiftoffRegister value,
808 LiftoffRegister result, StoreType type) {
809 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
810 type, liftoff::Binop::kXor);
811 }
812
AtomicExchange(Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)813 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
814 uintptr_t offset_imm,
815 LiftoffRegister value,
816 LiftoffRegister result, StoreType type) {
817 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
818 type, liftoff::Binop::kExchange);
819 }
820
AtomicCompareExchange( Register dst_addr, Register offset_reg, uintptr_t offset_imm, LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, StoreType type)821 void LiftoffAssembler::AtomicCompareExchange(
822 Register dst_addr, Register offset_reg, uintptr_t offset_imm,
823 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
824 StoreType type) {
825 LiftoffRegList pinned = {dst_addr, offset_reg, expected, new_value};
826
827 Register result_reg = result.gp();
828 if (pinned.has(result)) {
829 result_reg = GetUnusedRegister(kGpReg, pinned).gp();
830 }
831
832 UseScratchRegisterScope temps(this);
833
834 Register actual_addr = liftoff::CalculateActualAddress(
835 this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
836
837 Register store_result = temps.AcquireW();
838
839 Label retry;
840 Label done;
841 Bind(&retry);
842 switch (type.value()) {
843 case StoreType::kI64Store8:
844 case StoreType::kI32Store8:
845 ldaxrb(result_reg.W(), actual_addr);
846 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTB));
847 B(ne, &done);
848 stlxrb(store_result.W(), new_value.gp().W(), actual_addr);
849 break;
850 case StoreType::kI64Store16:
851 case StoreType::kI32Store16:
852 ldaxrh(result_reg.W(), actual_addr);
853 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTH));
854 B(ne, &done);
855 stlxrh(store_result.W(), new_value.gp().W(), actual_addr);
856 break;
857 case StoreType::kI64Store32:
858 case StoreType::kI32Store:
859 ldaxr(result_reg.W(), actual_addr);
860 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTW));
861 B(ne, &done);
862 stlxr(store_result.W(), new_value.gp().W(), actual_addr);
863 break;
864 case StoreType::kI64Store:
865 ldaxr(result_reg.X(), actual_addr);
866 Cmp(result.gp().X(), Operand(expected.gp().X(), UXTX));
867 B(ne, &done);
868 stlxr(store_result.W(), new_value.gp().X(), actual_addr);
869 break;
870 default:
871 UNREACHABLE();
872 }
873
874 Cbnz(store_result.W(), &retry);
875 Bind(&done);
876
877 if (result_reg != result.gp()) {
878 mov(result.gp(), result_reg);
879 }
880 }
881
AtomicFence()882 void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); }
883
LoadCallerFrameSlot(LiftoffRegister dst, uint32_t caller_slot_idx, ValueKind kind)884 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
885 uint32_t caller_slot_idx,
886 ValueKind kind) {
887 int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
888 Ldr(liftoff::GetRegFromType(dst, kind), MemOperand(fp, offset));
889 }
890
StoreCallerFrameSlot(LiftoffRegister src, uint32_t caller_slot_idx, ValueKind kind)891 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
892 uint32_t caller_slot_idx,
893 ValueKind kind) {
894 int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
895 Str(liftoff::GetRegFromType(src, kind), MemOperand(fp, offset));
896 }
897
LoadReturnStackSlot(LiftoffRegister dst, int offset, ValueKind kind)898 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
899 ValueKind kind) {
900 Ldr(liftoff::GetRegFromType(dst, kind), MemOperand(sp, offset));
901 }
902
MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueKind kind)903 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
904 ValueKind kind) {
905 UseScratchRegisterScope temps(this);
906 CPURegister scratch = liftoff::AcquireByType(&temps, kind);
907 Ldr(scratch, liftoff::GetStackSlot(src_offset));
908 Str(scratch, liftoff::GetStackSlot(dst_offset));
909 }
910
Move(Register dst, Register src, ValueKind kind)911 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
912 if (kind == kI32) {
913 Mov(dst.W(), src.W());
914 } else {
915 DCHECK(kI64 == kind || is_reference(kind));
916 Mov(dst.X(), src.X());
917 }
918 }
919
Move(DoubleRegister dst, DoubleRegister src, ValueKind kind)920 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
921 ValueKind kind) {
922 if (kind == kF32) {
923 Fmov(dst.S(), src.S());
924 } else if (kind == kF64) {
925 Fmov(dst.D(), src.D());
926 } else {
927 DCHECK_EQ(kS128, kind);
928 Mov(dst.Q(), src.Q());
929 }
930 }
931
Spill(int offset, LiftoffRegister reg, ValueKind kind)932 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
933 RecordUsedSpillOffset(offset);
934 MemOperand dst = liftoff::GetStackSlot(offset);
935 Str(liftoff::GetRegFromType(reg, kind), dst);
936 }
937
Spill(int offset, WasmValue value)938 void LiftoffAssembler::Spill(int offset, WasmValue value) {
939 RecordUsedSpillOffset(offset);
940 MemOperand dst = liftoff::GetStackSlot(offset);
941 UseScratchRegisterScope temps(this);
942 CPURegister src = CPURegister::no_reg();
943 switch (value.type().kind()) {
944 case kI32:
945 if (value.to_i32() == 0) {
946 src = wzr;
947 } else {
948 src = temps.AcquireW();
949 Mov(src.W(), value.to_i32());
950 }
951 break;
952 case kI64:
953 if (value.to_i64() == 0) {
954 src = xzr;
955 } else {
956 src = temps.AcquireX();
957 Mov(src.X(), value.to_i64());
958 }
959 break;
960 default:
961 // We do not track f32 and f64 constants, hence they are unreachable.
962 UNREACHABLE();
963 }
964 Str(src, dst);
965 }
966
Fill(LiftoffRegister reg, int offset, ValueKind kind)967 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
968 MemOperand src = liftoff::GetStackSlot(offset);
969 Ldr(liftoff::GetRegFromType(reg, kind), src);
970 }
971
FillI64Half(Register, int offset, RegPairHalf)972 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
973 UNREACHABLE();
974 }
975
FillStackSlotsWithZero(int start, int size)976 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
977 // Zero 'size' bytes *below* start, byte at offset 'start' is untouched.
978 DCHECK_LE(0, start);
979 DCHECK_LT(0, size);
980 DCHECK_EQ(0, size % 4);
981 RecordUsedSpillOffset(start + size);
982
983 int max_stp_offset = -start - size;
984 // We check IsImmLSUnscaled(-start-12) because str only allows for unscaled
985 // 9-bit immediate offset [-256,256]. If start is large enough, which can
986 // happen when a function has many params (>=32 i64), str cannot be encoded
987 // properly. We can use Str, which will generate more instructions, so
988 // fallback to the general case below.
989 if (size <= 12 * kStackSlotSize &&
990 IsImmLSPair(max_stp_offset, kXRegSizeLog2) &&
991 IsImmLSUnscaled(-start - 12)) {
992 // Special straight-line code for up to 12 slots. Generates one
993 // instruction per two slots (<= 7 instructions total).
994 STATIC_ASSERT(kStackSlotSize == kSystemPointerSize);
995 uint32_t remainder = size;
996 for (; remainder >= 2 * kStackSlotSize; remainder -= 2 * kStackSlotSize) {
997 stp(xzr, xzr, liftoff::GetStackSlot(start + remainder));
998 }
999
1000 DCHECK_GE(12, remainder);
1001 switch (remainder) {
1002 case 12:
1003 str(xzr, liftoff::GetStackSlot(start + remainder));
1004 str(wzr, liftoff::GetStackSlot(start + remainder - 8));
1005 break;
1006 case 8:
1007 str(xzr, liftoff::GetStackSlot(start + remainder));
1008 break;
1009 case 4:
1010 str(wzr, liftoff::GetStackSlot(start + remainder));
1011 break;
1012 case 0:
1013 break;
1014 default:
1015 UNREACHABLE();
1016 }
1017 } else {
1018 // General case for bigger counts (5-8 instructions).
1019 UseScratchRegisterScope temps(this);
1020 Register address_reg = temps.AcquireX();
1021 // This {Sub} might use another temp register if the offset is too large.
1022 Sub(address_reg, fp, start + size);
1023 Register count_reg = temps.AcquireX();
1024 Mov(count_reg, size / 4);
1025
1026 Label loop;
1027 bind(&loop);
1028 sub(count_reg, count_reg, 1);
1029 str(wzr, MemOperand(address_reg, kSystemPointerSize / 2, PostIndex));
1030 cbnz(count_reg, &loop);
1031 }
1032 }
1033
1034 #define I32_BINOP(name, instruction) \
1035 void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
1036 Register rhs) { \
1037 instruction(dst.W(), lhs.W(), rhs.W()); \
1038 }
1039 #define I32_BINOP_I(name, instruction) \
1040 I32_BINOP(name, instruction) \
1041 void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
1042 int32_t imm) { \
1043 instruction(dst.W(), lhs.W(), Immediate(imm)); \
1044 }
1045 #define I64_BINOP(name, instruction) \
1046 void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
1047 LiftoffRegister rhs) { \
1048 instruction(dst.gp().X(), lhs.gp().X(), rhs.gp().X()); \
1049 }
1050 #define I64_BINOP_I(name, instruction) \
1051 I64_BINOP(name, instruction) \
1052 void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
1053 LiftoffRegister lhs, int32_t imm) { \
1054 instruction(dst.gp().X(), lhs.gp().X(), imm); \
1055 }
1056 #define FP32_BINOP(name, instruction) \
1057 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1058 DoubleRegister rhs) { \
1059 instruction(dst.S(), lhs.S(), rhs.S()); \
1060 }
1061 #define FP32_UNOP(name, instruction) \
1062 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1063 instruction(dst.S(), src.S()); \
1064 }
1065 #define FP32_UNOP_RETURN_TRUE(name, instruction) \
1066 bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1067 instruction(dst.S(), src.S()); \
1068 return true; \
1069 }
1070 #define FP64_BINOP(name, instruction) \
1071 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1072 DoubleRegister rhs) { \
1073 instruction(dst.D(), lhs.D(), rhs.D()); \
1074 }
1075 #define FP64_UNOP(name, instruction) \
1076 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1077 instruction(dst.D(), src.D()); \
1078 }
1079 #define FP64_UNOP_RETURN_TRUE(name, instruction) \
1080 bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1081 instruction(dst.D(), src.D()); \
1082 return true; \
1083 }
1084 #define I32_SHIFTOP(name, instruction) \
1085 void LiftoffAssembler::emit_##name(Register dst, Register src, \
1086 Register amount) { \
1087 instruction(dst.W(), src.W(), amount.W()); \
1088 } \
1089 void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
1090 int32_t amount) { \
1091 instruction(dst.W(), src.W(), amount & 31); \
1092 }
1093 #define I64_SHIFTOP(name, instruction) \
1094 void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
1095 Register amount) { \
1096 instruction(dst.gp().X(), src.gp().X(), amount.X()); \
1097 } \
1098 void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
1099 LiftoffRegister src, int32_t amount) { \
1100 instruction(dst.gp().X(), src.gp().X(), amount & 63); \
1101 }
1102
1103 I32_BINOP_I(i32_add, Add)
1104 I32_BINOP_I(i32_sub, Sub)
1105 I32_BINOP(i32_mul, Mul)
1106 I32_BINOP_I(i32_and, And)
1107 I32_BINOP_I(i32_or, Orr)
1108 I32_BINOP_I(i32_xor, Eor)
1109 I32_SHIFTOP(i32_shl, Lsl)
1110 I32_SHIFTOP(i32_sar, Asr)
1111 I32_SHIFTOP(i32_shr, Lsr)
1112 I64_BINOP(i64_add, Add)
1113 I64_BINOP(i64_sub, Sub)
1114 I64_BINOP(i64_mul, Mul)
1115 I64_BINOP_I(i64_and, And)
1116 I64_BINOP_I(i64_or, Orr)
1117 I64_BINOP_I(i64_xor, Eor)
1118 I64_SHIFTOP(i64_shl, Lsl)
1119 I64_SHIFTOP(i64_sar, Asr)
1120 I64_SHIFTOP(i64_shr, Lsr)
1121 FP32_BINOP(f32_add, Fadd)
1122 FP32_BINOP(f32_sub, Fsub)
1123 FP32_BINOP(f32_mul, Fmul)
1124 FP32_BINOP(f32_div, Fdiv)
1125 FP32_BINOP(f32_min, Fmin)
1126 FP32_BINOP(f32_max, Fmax)
1127 FP32_UNOP(f32_abs, Fabs)
1128 FP32_UNOP(f32_neg, Fneg)
1129 FP32_UNOP_RETURN_TRUE(f32_ceil, Frintp)
1130 FP32_UNOP_RETURN_TRUE(f32_floor, Frintm)
1131 FP32_UNOP_RETURN_TRUE(f32_trunc, Frintz)
1132 FP32_UNOP_RETURN_TRUE(f32_nearest_int, Frintn)
1133 FP32_UNOP(f32_sqrt, Fsqrt)
1134 FP64_BINOP(f64_add, Fadd)
1135 FP64_BINOP(f64_sub, Fsub)
1136 FP64_BINOP(f64_mul, Fmul)
1137 FP64_BINOP(f64_div, Fdiv)
1138 FP64_BINOP(f64_min, Fmin)
1139 FP64_BINOP(f64_max, Fmax)
1140 FP64_UNOP(f64_abs, Fabs)
1141 FP64_UNOP(f64_neg, Fneg)
1142 FP64_UNOP_RETURN_TRUE(f64_ceil, Frintp)
1143 FP64_UNOP_RETURN_TRUE(f64_floor, Frintm)
1144 FP64_UNOP_RETURN_TRUE(f64_trunc, Frintz)
1145 FP64_UNOP_RETURN_TRUE(f64_nearest_int, Frintn)
1146 FP64_UNOP(f64_sqrt, Fsqrt)
1147
1148 #undef I32_BINOP
1149 #undef I64_BINOP
1150 #undef FP32_BINOP
1151 #undef FP32_UNOP
1152 #undef FP64_BINOP
1153 #undef FP64_UNOP
1154 #undef FP64_UNOP_RETURN_TRUE
1155 #undef I32_SHIFTOP
1156 #undef I64_SHIFTOP
1157
emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs, int64_t imm)1158 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1159 int64_t imm) {
1160 Add(dst.gp().X(), lhs.gp().X(), imm);
1161 }
1162
emit_i32_clz(Register dst, Register src)1163 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1164 Clz(dst.W(), src.W());
1165 }
1166
emit_i32_ctz(Register dst, Register src)1167 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1168 Rbit(dst.W(), src.W());
1169 Clz(dst.W(), dst.W());
1170 }
1171
emit_i32_popcnt(Register dst, Register src)1172 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1173 PopcntHelper(dst.W(), src.W());
1174 return true;
1175 }
1176
emit_i64_clz(LiftoffRegister dst, LiftoffRegister src)1177 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1178 Clz(dst.gp().X(), src.gp().X());
1179 }
1180
emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src)1181 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1182 Rbit(dst.gp().X(), src.gp().X());
1183 Clz(dst.gp().X(), dst.gp().X());
1184 }
1185
emit_i64_popcnt(LiftoffRegister dst, LiftoffRegister src)1186 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1187 LiftoffRegister src) {
1188 PopcntHelper(dst.gp().X(), src.gp().X());
1189 return true;
1190 }
1191
IncrementSmi(LiftoffRegister dst, int offset)1192 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1193 UseScratchRegisterScope temps(this);
1194 if (COMPRESS_POINTERS_BOOL) {
1195 DCHECK(SmiValuesAre31Bits());
1196 Register scratch = temps.AcquireW();
1197 Ldr(scratch, MemOperand(dst.gp(), offset));
1198 Add(scratch, scratch, Operand(Smi::FromInt(1)));
1199 Str(scratch, MemOperand(dst.gp(), offset));
1200 } else {
1201 Register scratch = temps.AcquireX();
1202 SmiUntag(scratch, MemOperand(dst.gp(), offset));
1203 Add(scratch, scratch, Operand(1));
1204 SmiTag(scratch);
1205 Str(scratch, MemOperand(dst.gp(), offset));
1206 }
1207 }
1208
emit_i32_divs(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable)1209 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1210 Label* trap_div_by_zero,
1211 Label* trap_div_unrepresentable) {
1212 Register dst_w = dst.W();
1213 Register lhs_w = lhs.W();
1214 Register rhs_w = rhs.W();
1215 bool can_use_dst = !dst_w.Aliases(lhs_w) && !dst_w.Aliases(rhs_w);
1216 if (can_use_dst) {
1217 // Do div early.
1218 Sdiv(dst_w, lhs_w, rhs_w);
1219 }
1220 // Check for division by zero.
1221 Cbz(rhs_w, trap_div_by_zero);
1222 // Check for kMinInt / -1. This is unrepresentable.
1223 Cmp(rhs_w, -1);
1224 Ccmp(lhs_w, 1, NoFlag, eq);
1225 B(trap_div_unrepresentable, vs);
1226 if (!can_use_dst) {
1227 // Do div.
1228 Sdiv(dst_w, lhs_w, rhs_w);
1229 }
1230 }
1231
emit_i32_divu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1232 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1233 Label* trap_div_by_zero) {
1234 // Check for division by zero.
1235 Cbz(rhs.W(), trap_div_by_zero);
1236 // Do div.
1237 Udiv(dst.W(), lhs.W(), rhs.W());
1238 }
1239
emit_i32_rems(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1240 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1241 Label* trap_div_by_zero) {
1242 Register dst_w = dst.W();
1243 Register lhs_w = lhs.W();
1244 Register rhs_w = rhs.W();
1245 // Do early div.
1246 // No need to check kMinInt / -1 because the result is kMinInt and then
1247 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1248 UseScratchRegisterScope temps(this);
1249 Register scratch = temps.AcquireW();
1250 Sdiv(scratch, lhs_w, rhs_w);
1251 // Check for division by zero.
1252 Cbz(rhs_w, trap_div_by_zero);
1253 // Compute remainder.
1254 Msub(dst_w, scratch, rhs_w, lhs_w);
1255 }
1256
emit_i32_remu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero)1257 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1258 Label* trap_div_by_zero) {
1259 Register dst_w = dst.W();
1260 Register lhs_w = lhs.W();
1261 Register rhs_w = rhs.W();
1262 // Do early div.
1263 UseScratchRegisterScope temps(this);
1264 Register scratch = temps.AcquireW();
1265 Udiv(scratch, lhs_w, rhs_w);
1266 // Check for division by zero.
1267 Cbz(rhs_w, trap_div_by_zero);
1268 // Compute remainder.
1269 Msub(dst_w, scratch, rhs_w, lhs_w);
1270 }
1271
emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable)1272 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1273 LiftoffRegister rhs,
1274 Label* trap_div_by_zero,
1275 Label* trap_div_unrepresentable) {
1276 Register dst_x = dst.gp().X();
1277 Register lhs_x = lhs.gp().X();
1278 Register rhs_x = rhs.gp().X();
1279 bool can_use_dst = !dst_x.Aliases(lhs_x) && !dst_x.Aliases(rhs_x);
1280 if (can_use_dst) {
1281 // Do div early.
1282 Sdiv(dst_x, lhs_x, rhs_x);
1283 }
1284 // Check for division by zero.
1285 Cbz(rhs_x, trap_div_by_zero);
1286 // Check for kMinInt / -1. This is unrepresentable.
1287 Cmp(rhs_x, -1);
1288 Ccmp(lhs_x, 1, NoFlag, eq);
1289 B(trap_div_unrepresentable, vs);
1290 if (!can_use_dst) {
1291 // Do div.
1292 Sdiv(dst_x, lhs_x, rhs_x);
1293 }
1294 return true;
1295 }
1296
emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1297 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1298 LiftoffRegister rhs,
1299 Label* trap_div_by_zero) {
1300 // Check for division by zero.
1301 Cbz(rhs.gp().X(), trap_div_by_zero);
1302 // Do div.
1303 Udiv(dst.gp().X(), lhs.gp().X(), rhs.gp().X());
1304 return true;
1305 }
1306
emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1307 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1308 LiftoffRegister rhs,
1309 Label* trap_div_by_zero) {
1310 Register dst_x = dst.gp().X();
1311 Register lhs_x = lhs.gp().X();
1312 Register rhs_x = rhs.gp().X();
1313 // Do early div.
1314 // No need to check kMinInt / -1 because the result is kMinInt and then
1315 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1316 UseScratchRegisterScope temps(this);
1317 Register scratch = temps.AcquireX();
1318 Sdiv(scratch, lhs_x, rhs_x);
1319 // Check for division by zero.
1320 Cbz(rhs_x, trap_div_by_zero);
1321 // Compute remainder.
1322 Msub(dst_x, scratch, rhs_x, lhs_x);
1323 return true;
1324 }
1325
emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero)1326 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1327 LiftoffRegister rhs,
1328 Label* trap_div_by_zero) {
1329 Register dst_x = dst.gp().X();
1330 Register lhs_x = lhs.gp().X();
1331 Register rhs_x = rhs.gp().X();
1332 // Do early div.
1333 UseScratchRegisterScope temps(this);
1334 Register scratch = temps.AcquireX();
1335 Udiv(scratch, lhs_x, rhs_x);
1336 // Check for division by zero.
1337 Cbz(rhs_x, trap_div_by_zero);
1338 // Compute remainder.
1339 Msub(dst_x, scratch, rhs_x, lhs_x);
1340 return true;
1341 }
1342
emit_u32_to_uintptr(Register dst, Register src)1343 void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
1344 Uxtw(dst, src);
1345 }
1346
emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs)1347 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1348 DoubleRegister rhs) {
1349 UseScratchRegisterScope temps(this);
1350 DoubleRegister scratch = temps.AcquireD();
1351 Ushr(scratch.V2S(), rhs.V2S(), 31);
1352 if (dst != lhs) {
1353 Fmov(dst.S(), lhs.S());
1354 }
1355 Sli(dst.V2S(), scratch.V2S(), 31);
1356 }
1357
emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs)1358 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1359 DoubleRegister rhs) {
1360 UseScratchRegisterScope temps(this);
1361 DoubleRegister scratch = temps.AcquireD();
1362 Ushr(scratch.V1D(), rhs.V1D(), 63);
1363 if (dst != lhs) {
1364 Fmov(dst.D(), lhs.D());
1365 }
1366 Sli(dst.V1D(), scratch.V1D(), 63);
1367 }
1368
emit_type_conversion(WasmOpcode opcode, LiftoffRegister dst, LiftoffRegister src, Label* trap)1369 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1370 LiftoffRegister dst,
1371 LiftoffRegister src, Label* trap) {
1372 switch (opcode) {
1373 case kExprI32ConvertI64:
1374 Mov(dst.gp().W(), src.gp().W());
1375 return true;
1376 case kExprI32SConvertF32:
1377 Fcvtzs(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
1378 // Check underflow and NaN.
1379 Fcmp(src.fp().S(), static_cast<float>(INT32_MIN));
1380 // Check overflow.
1381 Ccmp(dst.gp().W(), -1, VFlag, ge);
1382 B(trap, vs);
1383 return true;
1384 case kExprI32UConvertF32:
1385 Fcvtzu(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
1386 // Check underflow and NaN.
1387 Fcmp(src.fp().S(), -1.0);
1388 // Check overflow.
1389 Ccmp(dst.gp().W(), -1, ZFlag, gt);
1390 B(trap, eq);
1391 return true;
1392 case kExprI32SConvertF64: {
1393 // INT32_MIN and INT32_MAX are valid results, we cannot test the result
1394 // to detect the overflows. We could have done two immediate floating
1395 // point comparisons but it would have generated two conditional branches.
1396 UseScratchRegisterScope temps(this);
1397 VRegister fp_ref = temps.AcquireD();
1398 VRegister fp_cmp = temps.AcquireD();
1399 Fcvtzs(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
1400 Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
1401 Scvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
1402 // If comparison fails, we have an overflow or a NaN.
1403 Fcmp(fp_cmp, fp_ref);
1404 B(trap, ne);
1405 return true;
1406 }
1407 case kExprI32UConvertF64: {
1408 // INT32_MAX is a valid result, we cannot test the result to detect the
1409 // overflows. We could have done two immediate floating point comparisons
1410 // but it would have generated two conditional branches.
1411 UseScratchRegisterScope temps(this);
1412 VRegister fp_ref = temps.AcquireD();
1413 VRegister fp_cmp = temps.AcquireD();
1414 Fcvtzu(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
1415 Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
1416 Ucvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
1417 // If comparison fails, we have an overflow or a NaN.
1418 Fcmp(fp_cmp, fp_ref);
1419 B(trap, ne);
1420 return true;
1421 }
1422 case kExprI32SConvertSatF32:
1423 Fcvtzs(dst.gp().W(), src.fp().S());
1424 return true;
1425 case kExprI32UConvertSatF32:
1426 Fcvtzu(dst.gp().W(), src.fp().S());
1427 return true;
1428 case kExprI32SConvertSatF64:
1429 Fcvtzs(dst.gp().W(), src.fp().D());
1430 return true;
1431 case kExprI32UConvertSatF64:
1432 Fcvtzu(dst.gp().W(), src.fp().D());
1433 return true;
1434 case kExprI64SConvertSatF32:
1435 Fcvtzs(dst.gp().X(), src.fp().S());
1436 return true;
1437 case kExprI64UConvertSatF32:
1438 Fcvtzu(dst.gp().X(), src.fp().S());
1439 return true;
1440 case kExprI64SConvertSatF64:
1441 Fcvtzs(dst.gp().X(), src.fp().D());
1442 return true;
1443 case kExprI64UConvertSatF64:
1444 Fcvtzu(dst.gp().X(), src.fp().D());
1445 return true;
1446 case kExprI32ReinterpretF32:
1447 Fmov(dst.gp().W(), src.fp().S());
1448 return true;
1449 case kExprI64SConvertI32:
1450 Sxtw(dst.gp().X(), src.gp().W());
1451 return true;
1452 case kExprI64SConvertF32:
1453 Fcvtzs(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
1454 // Check underflow and NaN.
1455 Fcmp(src.fp().S(), static_cast<float>(INT64_MIN));
1456 // Check overflow.
1457 Ccmp(dst.gp().X(), -1, VFlag, ge);
1458 B(trap, vs);
1459 return true;
1460 case kExprI64UConvertF32:
1461 Fcvtzu(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
1462 // Check underflow and NaN.
1463 Fcmp(src.fp().S(), -1.0);
1464 // Check overflow.
1465 Ccmp(dst.gp().X(), -1, ZFlag, gt);
1466 B(trap, eq);
1467 return true;
1468 case kExprI64SConvertF64:
1469 Fcvtzs(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
1470 // Check underflow and NaN.
1471 Fcmp(src.fp().D(), static_cast<float>(INT64_MIN));
1472 // Check overflow.
1473 Ccmp(dst.gp().X(), -1, VFlag, ge);
1474 B(trap, vs);
1475 return true;
1476 case kExprI64UConvertF64:
1477 Fcvtzu(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
1478 // Check underflow and NaN.
1479 Fcmp(src.fp().D(), -1.0);
1480 // Check overflow.
1481 Ccmp(dst.gp().X(), -1, ZFlag, gt);
1482 B(trap, eq);
1483 return true;
1484 case kExprI64UConvertI32:
1485 Mov(dst.gp().W(), src.gp().W());
1486 return true;
1487 case kExprI64ReinterpretF64:
1488 Fmov(dst.gp().X(), src.fp().D());
1489 return true;
1490 case kExprF32SConvertI32:
1491 Scvtf(dst.fp().S(), src.gp().W());
1492 return true;
1493 case kExprF32UConvertI32:
1494 Ucvtf(dst.fp().S(), src.gp().W());
1495 return true;
1496 case kExprF32SConvertI64:
1497 Scvtf(dst.fp().S(), src.gp().X());
1498 return true;
1499 case kExprF32UConvertI64:
1500 Ucvtf(dst.fp().S(), src.gp().X());
1501 return true;
1502 case kExprF32ConvertF64:
1503 Fcvt(dst.fp().S(), src.fp().D());
1504 return true;
1505 case kExprF32ReinterpretI32:
1506 Fmov(dst.fp().S(), src.gp().W());
1507 return true;
1508 case kExprF64SConvertI32:
1509 Scvtf(dst.fp().D(), src.gp().W());
1510 return true;
1511 case kExprF64UConvertI32:
1512 Ucvtf(dst.fp().D(), src.gp().W());
1513 return true;
1514 case kExprF64SConvertI64:
1515 Scvtf(dst.fp().D(), src.gp().X());
1516 return true;
1517 case kExprF64UConvertI64:
1518 Ucvtf(dst.fp().D(), src.gp().X());
1519 return true;
1520 case kExprF64ConvertF32:
1521 Fcvt(dst.fp().D(), src.fp().S());
1522 return true;
1523 case kExprF64ReinterpretI64:
1524 Fmov(dst.fp().D(), src.gp().X());
1525 return true;
1526 default:
1527 UNREACHABLE();
1528 }
1529 }
1530
emit_i32_signextend_i8(Register dst, Register src)1531 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
1532 sxtb(dst.W(), src.W());
1533 }
1534
emit_i32_signextend_i16(Register dst, Register src)1535 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
1536 sxth(dst.W(), src.W());
1537 }
1538
emit_i64_signextend_i8(LiftoffRegister dst, LiftoffRegister src)1539 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
1540 LiftoffRegister src) {
1541 sxtb(dst.gp(), src.gp());
1542 }
1543
emit_i64_signextend_i16(LiftoffRegister dst, LiftoffRegister src)1544 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
1545 LiftoffRegister src) {
1546 sxth(dst.gp(), src.gp());
1547 }
1548
emit_i64_signextend_i32(LiftoffRegister dst, LiftoffRegister src)1549 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
1550 LiftoffRegister src) {
1551 sxtw(dst.gp(), src.gp());
1552 }
1553
emit_jump(Label* label)1554 void LiftoffAssembler::emit_jump(Label* label) { B(label); }
1555
emit_jump(Register target)1556 void LiftoffAssembler::emit_jump(Register target) { Br(target); }
1557
emit_cond_jump(LiftoffCondition liftoff_cond, Label* label, ValueKind kind, Register lhs, Register rhs)1558 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
1559 Label* label, ValueKind kind,
1560 Register lhs, Register rhs) {
1561 Condition cond = liftoff::ToCondition(liftoff_cond);
1562 switch (kind) {
1563 case kI32:
1564 if (rhs.is_valid()) {
1565 Cmp(lhs.W(), rhs.W());
1566 } else {
1567 Cmp(lhs.W(), wzr);
1568 }
1569 break;
1570 case kRef:
1571 case kOptRef:
1572 case kRtt:
1573 DCHECK(rhs.is_valid());
1574 DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
1575 V8_FALLTHROUGH;
1576 case kI64:
1577 if (rhs.is_valid()) {
1578 Cmp(lhs.X(), rhs.X());
1579 } else {
1580 Cmp(lhs.X(), xzr);
1581 }
1582 break;
1583 default:
1584 UNREACHABLE();
1585 }
1586 B(label, cond);
1587 }
1588
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, Label* label, Register lhs, int32_t imm)1589 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
1590 Label* label, Register lhs,
1591 int32_t imm) {
1592 Condition cond = liftoff::ToCondition(liftoff_cond);
1593 Cmp(lhs.W(), Operand(imm));
1594 B(label, cond);
1595 }
1596
emit_i32_subi_jump_negative(Register value, int subtrahend, Label* result_negative)1597 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
1598 int subtrahend,
1599 Label* result_negative) {
1600 Subs(value.W(), value.W(), Immediate(subtrahend));
1601 B(result_negative, mi);
1602 }
1603
emit_i32_eqz(Register dst, Register src)1604 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
1605 Cmp(src.W(), wzr);
1606 Cset(dst.W(), eq);
1607 }
1608
emit_i32_set_cond(LiftoffCondition liftoff_cond, Register dst, Register lhs, Register rhs)1609 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
1610 Register dst, Register lhs,
1611 Register rhs) {
1612 Condition cond = liftoff::ToCondition(liftoff_cond);
1613 Cmp(lhs.W(), rhs.W());
1614 Cset(dst.W(), cond);
1615 }
1616
emit_i64_eqz(Register dst, LiftoffRegister src)1617 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
1618 Cmp(src.gp().X(), xzr);
1619 Cset(dst.W(), eq);
1620 }
1621
emit_i64_set_cond(LiftoffCondition liftoff_cond, Register dst, LiftoffRegister lhs, LiftoffRegister rhs)1622 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
1623 Register dst, LiftoffRegister lhs,
1624 LiftoffRegister rhs) {
1625 Condition cond = liftoff::ToCondition(liftoff_cond);
1626 Cmp(lhs.gp().X(), rhs.gp().X());
1627 Cset(dst.W(), cond);
1628 }
1629
emit_f32_set_cond(LiftoffCondition liftoff_cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)1630 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
1631 Register dst, DoubleRegister lhs,
1632 DoubleRegister rhs) {
1633 Condition cond = liftoff::ToCondition(liftoff_cond);
1634 Fcmp(lhs.S(), rhs.S());
1635 Cset(dst.W(), cond);
1636 if (cond != ne) {
1637 // If V flag set, at least one of the arguments was a Nan -> false.
1638 Csel(dst.W(), wzr, dst.W(), vs);
1639 }
1640 }
1641
emit_f64_set_cond(LiftoffCondition liftoff_cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)1642 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
1643 Register dst, DoubleRegister lhs,
1644 DoubleRegister rhs) {
1645 Condition cond = liftoff::ToCondition(liftoff_cond);
1646 Fcmp(lhs.D(), rhs.D());
1647 Cset(dst.W(), cond);
1648 if (cond != ne) {
1649 // If V flag set, at least one of the arguments was a Nan -> false.
1650 Csel(dst.W(), wzr, dst.W(), vs);
1651 }
1652 }
1653
emit_select(LiftoffRegister dst, Register condition, LiftoffRegister true_value, LiftoffRegister false_value, ValueKind kind)1654 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
1655 LiftoffRegister true_value,
1656 LiftoffRegister false_value,
1657 ValueKind kind) {
1658 return false;
1659 }
1660
emit_smi_check(Register obj, Label* target, SmiCheckMode mode)1661 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
1662 SmiCheckMode mode) {
1663 Label* smi_label = mode == kJumpOnSmi ? target : nullptr;
1664 Label* not_smi_label = mode == kJumpOnNotSmi ? target : nullptr;
1665 JumpIfSmi(obj, smi_label, not_smi_label);
1666 }
1667
LoadTransform(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, LoadTransformationKind transform, uint32_t* protected_load_pc)1668 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
1669 Register offset_reg, uintptr_t offset_imm,
1670 LoadType type,
1671 LoadTransformationKind transform,
1672 uint32_t* protected_load_pc) {
1673 UseScratchRegisterScope temps(this);
1674 MemOperand src_op =
1675 transform == LoadTransformationKind::kSplat
1676 ? MemOperand{liftoff::GetEffectiveAddress(this, &temps, src_addr,
1677 offset_reg, offset_imm)}
1678 : liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
1679 *protected_load_pc = pc_offset();
1680 MachineType memtype = type.mem_type();
1681
1682 if (transform == LoadTransformationKind::kExtend) {
1683 if (memtype == MachineType::Int8()) {
1684 Ldr(dst.fp().D(), src_op);
1685 Sxtl(dst.fp().V8H(), dst.fp().V8B());
1686 } else if (memtype == MachineType::Uint8()) {
1687 Ldr(dst.fp().D(), src_op);
1688 Uxtl(dst.fp().V8H(), dst.fp().V8B());
1689 } else if (memtype == MachineType::Int16()) {
1690 Ldr(dst.fp().D(), src_op);
1691 Sxtl(dst.fp().V4S(), dst.fp().V4H());
1692 } else if (memtype == MachineType::Uint16()) {
1693 Ldr(dst.fp().D(), src_op);
1694 Uxtl(dst.fp().V4S(), dst.fp().V4H());
1695 } else if (memtype == MachineType::Int32()) {
1696 Ldr(dst.fp().D(), src_op);
1697 Sxtl(dst.fp().V2D(), dst.fp().V2S());
1698 } else if (memtype == MachineType::Uint32()) {
1699 Ldr(dst.fp().D(), src_op);
1700 Uxtl(dst.fp().V2D(), dst.fp().V2S());
1701 }
1702 } else if (transform == LoadTransformationKind::kZeroExtend) {
1703 if (memtype == MachineType::Int32()) {
1704 Ldr(dst.fp().S(), src_op);
1705 } else {
1706 DCHECK_EQ(MachineType::Int64(), memtype);
1707 Ldr(dst.fp().D(), src_op);
1708 }
1709 } else {
1710 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
1711 if (memtype == MachineType::Int8()) {
1712 ld1r(dst.fp().V16B(), src_op);
1713 } else if (memtype == MachineType::Int16()) {
1714 ld1r(dst.fp().V8H(), src_op);
1715 } else if (memtype == MachineType::Int32()) {
1716 ld1r(dst.fp().V4S(), src_op);
1717 } else if (memtype == MachineType::Int64()) {
1718 ld1r(dst.fp().V2D(), src_op);
1719 }
1720 }
1721 }
1722
LoadLane(LiftoffRegister dst, LiftoffRegister src, Register addr, Register offset_reg, uintptr_t offset_imm, LoadType type, uint8_t laneidx, uint32_t* protected_load_pc)1723 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
1724 Register addr, Register offset_reg,
1725 uintptr_t offset_imm, LoadType type,
1726 uint8_t laneidx, uint32_t* protected_load_pc) {
1727 UseScratchRegisterScope temps(this);
1728 MemOperand src_op{
1729 liftoff::GetEffectiveAddress(this, &temps, addr, offset_reg, offset_imm)};
1730
1731 MachineType mem_type = type.mem_type();
1732 if (dst != src) {
1733 Mov(dst.fp().Q(), src.fp().Q());
1734 }
1735
1736 *protected_load_pc = pc_offset();
1737 if (mem_type == MachineType::Int8()) {
1738 ld1(dst.fp().B(), laneidx, src_op);
1739 } else if (mem_type == MachineType::Int16()) {
1740 ld1(dst.fp().H(), laneidx, src_op);
1741 } else if (mem_type == MachineType::Int32()) {
1742 ld1(dst.fp().S(), laneidx, src_op);
1743 } else if (mem_type == MachineType::Int64()) {
1744 ld1(dst.fp().D(), laneidx, src_op);
1745 } else {
1746 UNREACHABLE();
1747 }
1748 }
1749
StoreLane(Register dst, Register offset, uintptr_t offset_imm, LiftoffRegister src, StoreType type, uint8_t lane, uint32_t* protected_store_pc)1750 void LiftoffAssembler::StoreLane(Register dst, Register offset,
1751 uintptr_t offset_imm, LiftoffRegister src,
1752 StoreType type, uint8_t lane,
1753 uint32_t* protected_store_pc) {
1754 UseScratchRegisterScope temps(this);
1755 MemOperand dst_op{
1756 liftoff::GetEffectiveAddress(this, &temps, dst, offset, offset_imm)};
1757 if (protected_store_pc) *protected_store_pc = pc_offset();
1758
1759 MachineRepresentation rep = type.mem_rep();
1760 if (rep == MachineRepresentation::kWord8) {
1761 st1(src.fp().B(), lane, dst_op);
1762 } else if (rep == MachineRepresentation::kWord16) {
1763 st1(src.fp().H(), lane, dst_op);
1764 } else if (rep == MachineRepresentation::kWord32) {
1765 st1(src.fp().S(), lane, dst_op);
1766 } else {
1767 DCHECK_EQ(MachineRepresentation::kWord64, rep);
1768 st1(src.fp().D(), lane, dst_op);
1769 }
1770 }
1771
emit_i8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1772 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
1773 LiftoffRegister lhs,
1774 LiftoffRegister rhs) {
1775 Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
1776 }
1777
emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src)1778 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
1779 LiftoffRegister src) {
1780 Dup(dst.fp().V2D(), src.fp().D(), 0);
1781 }
1782
emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)1783 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
1784 LiftoffRegister lhs,
1785 uint8_t imm_lane_idx) {
1786 Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
1787 }
1788
emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)1789 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
1790 LiftoffRegister src1,
1791 LiftoffRegister src2,
1792 uint8_t imm_lane_idx) {
1793 if (dst != src1) {
1794 Mov(dst.fp().V2D(), src1.fp().V2D());
1795 }
1796 Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
1797 }
1798
emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src)1799 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
1800 LiftoffRegister src) {
1801 Fabs(dst.fp().V2D(), src.fp().V2D());
1802 }
1803
emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src)1804 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
1805 LiftoffRegister src) {
1806 Fneg(dst.fp().V2D(), src.fp().V2D());
1807 }
1808
emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src)1809 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
1810 LiftoffRegister src) {
1811 Fsqrt(dst.fp().V2D(), src.fp().V2D());
1812 }
1813
emit_f64x2_ceil(LiftoffRegister dst, LiftoffRegister src)1814 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
1815 LiftoffRegister src) {
1816 Frintp(dst.fp().V2D(), src.fp().V2D());
1817 return true;
1818 }
1819
emit_f64x2_floor(LiftoffRegister dst, LiftoffRegister src)1820 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
1821 LiftoffRegister src) {
1822 Frintm(dst.fp().V2D(), src.fp().V2D());
1823 return true;
1824 }
1825
emit_f64x2_trunc(LiftoffRegister dst, LiftoffRegister src)1826 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
1827 LiftoffRegister src) {
1828 Frintz(dst.fp().V2D(), src.fp().V2D());
1829 return true;
1830 }
1831
emit_f64x2_nearest_int(LiftoffRegister dst, LiftoffRegister src)1832 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
1833 LiftoffRegister src) {
1834 Frintn(dst.fp().V2D(), src.fp().V2D());
1835 return true;
1836 }
1837
emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1838 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
1839 LiftoffRegister rhs) {
1840 Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1841 }
1842
emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1843 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
1844 LiftoffRegister rhs) {
1845 Fsub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1846 }
1847
emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1848 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
1849 LiftoffRegister rhs) {
1850 Fmul(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1851 }
1852
emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1853 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
1854 LiftoffRegister rhs) {
1855 Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1856 }
1857
emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1858 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
1859 LiftoffRegister rhs) {
1860 Fmin(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1861 }
1862
emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1863 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
1864 LiftoffRegister rhs) {
1865 Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1866 }
1867
emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1868 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
1869 LiftoffRegister rhs) {
1870 UseScratchRegisterScope temps(this);
1871
1872 VRegister tmp = dst.fp();
1873 if (dst == lhs || dst == rhs) {
1874 tmp = temps.AcquireV(kFormat2D);
1875 }
1876
1877 Fcmgt(tmp.V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1878 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1879
1880 if (dst == lhs || dst == rhs) {
1881 Mov(dst.fp().V2D(), tmp);
1882 }
1883 }
1884
emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1885 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
1886 LiftoffRegister rhs) {
1887 UseScratchRegisterScope temps(this);
1888
1889 VRegister tmp = dst.fp();
1890 if (dst == lhs || dst == rhs) {
1891 tmp = temps.AcquireV(kFormat2D);
1892 }
1893
1894 Fcmgt(tmp.V2D(), rhs.fp().V2D(), lhs.fp().V2D());
1895 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1896
1897 if (dst == lhs || dst == rhs) {
1898 Mov(dst.fp().V2D(), tmp);
1899 }
1900 }
1901
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst, LiftoffRegister src)1902 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
1903 LiftoffRegister src) {
1904 Sxtl(dst.fp().V2D(), src.fp().V2S());
1905 Scvtf(dst.fp().V2D(), dst.fp().V2D());
1906 }
1907
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst, LiftoffRegister src)1908 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
1909 LiftoffRegister src) {
1910 Uxtl(dst.fp().V2D(), src.fp().V2S());
1911 Ucvtf(dst.fp().V2D(), dst.fp().V2D());
1912 }
1913
emit_f64x2_promote_low_f32x4(LiftoffRegister dst, LiftoffRegister src)1914 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
1915 LiftoffRegister src) {
1916 Fcvtl(dst.fp().V2D(), src.fp().V2S());
1917 }
1918
emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src)1919 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
1920 LiftoffRegister src) {
1921 Dup(dst.fp().V4S(), src.fp().S(), 0);
1922 }
1923
emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)1924 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
1925 LiftoffRegister lhs,
1926 uint8_t imm_lane_idx) {
1927 Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
1928 }
1929
emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)1930 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
1931 LiftoffRegister src1,
1932 LiftoffRegister src2,
1933 uint8_t imm_lane_idx) {
1934 if (dst != src1) {
1935 Mov(dst.fp().V4S(), src1.fp().V4S());
1936 }
1937 Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
1938 }
1939
emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src)1940 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
1941 LiftoffRegister src) {
1942 Fabs(dst.fp().V4S(), src.fp().V4S());
1943 }
1944
emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src)1945 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
1946 LiftoffRegister src) {
1947 Fneg(dst.fp().V4S(), src.fp().V4S());
1948 }
1949
emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src)1950 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
1951 LiftoffRegister src) {
1952 Fsqrt(dst.fp().V4S(), src.fp().V4S());
1953 }
1954
emit_f32x4_ceil(LiftoffRegister dst, LiftoffRegister src)1955 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
1956 LiftoffRegister src) {
1957 Frintp(dst.fp().V4S(), src.fp().V4S());
1958 return true;
1959 }
1960
emit_f32x4_floor(LiftoffRegister dst, LiftoffRegister src)1961 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
1962 LiftoffRegister src) {
1963 Frintm(dst.fp().V4S(), src.fp().V4S());
1964 return true;
1965 }
1966
emit_f32x4_trunc(LiftoffRegister dst, LiftoffRegister src)1967 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
1968 LiftoffRegister src) {
1969 Frintz(dst.fp().V4S(), src.fp().V4S());
1970 return true;
1971 }
1972
emit_f32x4_nearest_int(LiftoffRegister dst, LiftoffRegister src)1973 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
1974 LiftoffRegister src) {
1975 Frintn(dst.fp().V4S(), src.fp().V4S());
1976 return true;
1977 }
1978
emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1979 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
1980 LiftoffRegister rhs) {
1981 Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1982 }
1983
emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1984 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
1985 LiftoffRegister rhs) {
1986 Fsub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1987 }
1988
emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1989 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
1990 LiftoffRegister rhs) {
1991 Fmul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1992 }
1993
emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1994 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
1995 LiftoffRegister rhs) {
1996 Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1997 }
1998
emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)1999 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
2000 LiftoffRegister rhs) {
2001 Fmin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2002 }
2003
emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2004 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
2005 LiftoffRegister rhs) {
2006 Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2007 }
2008
emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2009 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2010 LiftoffRegister rhs) {
2011 UseScratchRegisterScope temps(this);
2012
2013 VRegister tmp = dst.fp();
2014 if (dst == lhs || dst == rhs) {
2015 tmp = temps.AcquireV(kFormat4S);
2016 }
2017
2018 Fcmgt(tmp.V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2019 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
2020
2021 if (dst == lhs || dst == rhs) {
2022 Mov(dst.fp().V4S(), tmp);
2023 }
2024 }
2025
emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2026 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2027 LiftoffRegister rhs) {
2028 UseScratchRegisterScope temps(this);
2029
2030 VRegister tmp = dst.fp();
2031 if (dst == lhs || dst == rhs) {
2032 tmp = temps.AcquireV(kFormat4S);
2033 }
2034
2035 Fcmgt(tmp.V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2036 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
2037
2038 if (dst == lhs || dst == rhs) {
2039 Mov(dst.fp().V4S(), tmp);
2040 }
2041 }
2042
emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src)2043 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2044 LiftoffRegister src) {
2045 Dup(dst.fp().V2D(), src.gp().X());
2046 }
2047
emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2048 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
2049 LiftoffRegister lhs,
2050 uint8_t imm_lane_idx) {
2051 Mov(dst.gp().X(), lhs.fp().V2D(), imm_lane_idx);
2052 }
2053
emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2054 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
2055 LiftoffRegister src1,
2056 LiftoffRegister src2,
2057 uint8_t imm_lane_idx) {
2058 if (dst != src1) {
2059 Mov(dst.fp().V2D(), src1.fp().V2D());
2060 }
2061 Mov(dst.fp().V2D(), imm_lane_idx, src2.gp().X());
2062 }
2063
emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src)2064 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2065 LiftoffRegister src) {
2066 Neg(dst.fp().V2D(), src.fp().V2D());
2067 }
2068
emit_i64x2_alltrue(LiftoffRegister dst, LiftoffRegister src)2069 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2070 LiftoffRegister src) {
2071 I64x2AllTrue(dst.gp(), src.fp());
2072 }
2073
emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2074 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2075 LiftoffRegister rhs) {
2076 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2077 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2078 }
2079
emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2080 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2081 int32_t rhs) {
2082 Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63);
2083 }
2084
emit_i64x2_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2085 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2086 LiftoffRegister lhs,
2087 LiftoffRegister rhs) {
2088 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2089 liftoff::ShiftSign::kSigned>(
2090 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2091 }
2092
emit_i64x2_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2093 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2094 LiftoffRegister lhs, int32_t rhs) {
2095 liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>(
2096 this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
2097 }
2098
emit_i64x2_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2099 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2100 LiftoffRegister lhs,
2101 LiftoffRegister rhs) {
2102 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2103 liftoff::ShiftSign::kUnsigned>(
2104 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
2105 }
2106
emit_i64x2_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2107 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2108 LiftoffRegister lhs, int32_t rhs) {
2109 liftoff::EmitSimdShiftRightImmediate<kFormat2D,
2110 liftoff::ShiftSign::kUnsigned>(
2111 this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
2112 }
2113
emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2114 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2115 LiftoffRegister rhs) {
2116 Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2117 }
2118
emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2119 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2120 LiftoffRegister rhs) {
2121 Sub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2122 }
2123
emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2124 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2125 LiftoffRegister rhs) {
2126 UseScratchRegisterScope temps(this);
2127 VRegister tmp1 = temps.AcquireV(kFormat2D);
2128 VRegister tmp2 = temps.AcquireV(kFormat2D);
2129
2130 // Algorithm copied from code-generator-arm64.cc with minor modifications:
2131 // - 2 (max number of scratch registers in Liftoff) temporaries instead of 3
2132 // - 1 more Umull instruction to calculate | cg | ae |,
2133 // - so, we can no longer use Umlal in the last step, and use Add instead.
2134 // Refer to comments there for details.
2135 Xtn(tmp1.V2S(), lhs.fp().V2D());
2136 Xtn(tmp2.V2S(), rhs.fp().V2D());
2137 Umull(tmp1.V2D(), tmp1.V2S(), tmp2.V2S());
2138 Rev64(tmp2.V4S(), rhs.fp().V4S());
2139 Mul(tmp2.V4S(), tmp2.V4S(), lhs.fp().V4S());
2140 Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2141 Shll(dst.fp().V2D(), tmp2.V2S(), 32);
2142 Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
2143 }
2144
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2145 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2146 LiftoffRegister src1,
2147 LiftoffRegister src2) {
2148 Smull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
2149 }
2150
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2151 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2152 LiftoffRegister src1,
2153 LiftoffRegister src2) {
2154 Umull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
2155 }
2156
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2157 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2158 LiftoffRegister src1,
2159 LiftoffRegister src2) {
2160 Smull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
2161 }
2162
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2163 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2164 LiftoffRegister src1,
2165 LiftoffRegister src2) {
2166 Umull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
2167 }
2168
emit_i64x2_bitmask(LiftoffRegister dst, LiftoffRegister src)2169 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2170 LiftoffRegister src) {
2171 I64x2BitMask(dst.gp(), src.fp());
2172 }
2173
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst, LiftoffRegister src)2174 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2175 LiftoffRegister src) {
2176 Sxtl(dst.fp().V2D(), src.fp().V2S());
2177 }
2178
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst, LiftoffRegister src)2179 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
2180 LiftoffRegister src) {
2181 Sxtl2(dst.fp().V2D(), src.fp().V4S());
2182 }
2183
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst, LiftoffRegister src)2184 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
2185 LiftoffRegister src) {
2186 Uxtl(dst.fp().V2D(), src.fp().V2S());
2187 }
2188
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst, LiftoffRegister src)2189 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
2190 LiftoffRegister src) {
2191 Uxtl2(dst.fp().V2D(), src.fp().V4S());
2192 }
2193
emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src)2194 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2195 LiftoffRegister src) {
2196 Dup(dst.fp().V4S(), src.gp().W());
2197 }
2198
emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2199 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
2200 LiftoffRegister lhs,
2201 uint8_t imm_lane_idx) {
2202 Mov(dst.gp().W(), lhs.fp().V4S(), imm_lane_idx);
2203 }
2204
emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2205 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
2206 LiftoffRegister src1,
2207 LiftoffRegister src2,
2208 uint8_t imm_lane_idx) {
2209 if (dst != src1) {
2210 Mov(dst.fp().V4S(), src1.fp().V4S());
2211 }
2212 Mov(dst.fp().V4S(), imm_lane_idx, src2.gp().W());
2213 }
2214
emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src)2215 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
2216 LiftoffRegister src) {
2217 Neg(dst.fp().V4S(), src.fp().V4S());
2218 }
2219
emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src)2220 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
2221 LiftoffRegister src) {
2222 liftoff::EmitAllTrue(this, dst, src, kFormat4S);
2223 }
2224
emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src)2225 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
2226 LiftoffRegister src) {
2227 UseScratchRegisterScope temps(this);
2228 VRegister tmp = temps.AcquireQ();
2229 VRegister mask = temps.AcquireQ();
2230
2231 Sshr(tmp.V4S(), src.fp().V4S(), 31);
2232 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2233 // are signed will have i-th bit set, unsigned will be 0.
2234 Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
2235 And(tmp.V16B(), mask.V16B(), tmp.V16B());
2236 Addv(tmp.S(), tmp.V4S());
2237 Mov(dst.gp().W(), tmp.V4S(), 0);
2238 }
2239
emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2240 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
2241 LiftoffRegister rhs) {
2242 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2243 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2244 }
2245
emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2246 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
2247 int32_t rhs) {
2248 Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31);
2249 }
2250
emit_i32x4_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2251 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
2252 LiftoffRegister lhs,
2253 LiftoffRegister rhs) {
2254 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2255 liftoff::ShiftSign::kSigned>(
2256 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2257 }
2258
emit_i32x4_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2259 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
2260 LiftoffRegister lhs, int32_t rhs) {
2261 liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>(
2262 this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
2263 }
2264
emit_i32x4_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2265 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
2266 LiftoffRegister lhs,
2267 LiftoffRegister rhs) {
2268 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2269 liftoff::ShiftSign::kUnsigned>(
2270 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
2271 }
2272
emit_i32x4_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2273 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
2274 LiftoffRegister lhs, int32_t rhs) {
2275 liftoff::EmitSimdShiftRightImmediate<kFormat4S,
2276 liftoff::ShiftSign::kUnsigned>(
2277 this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
2278 }
2279
emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2280 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2281 LiftoffRegister rhs) {
2282 Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2283 }
2284
emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2285 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2286 LiftoffRegister rhs) {
2287 Sub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2288 }
2289
emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2290 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2291 LiftoffRegister rhs) {
2292 Mul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2293 }
2294
emit_i32x4_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2295 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
2296 LiftoffRegister lhs,
2297 LiftoffRegister rhs) {
2298 Smin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2299 }
2300
emit_i32x4_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2301 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
2302 LiftoffRegister lhs,
2303 LiftoffRegister rhs) {
2304 Umin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2305 }
2306
emit_i32x4_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2307 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
2308 LiftoffRegister lhs,
2309 LiftoffRegister rhs) {
2310 Smax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2311 }
2312
emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2313 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
2314 LiftoffRegister lhs,
2315 LiftoffRegister rhs) {
2316 Umax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2317 }
2318
emit_i32x4_dot_i16x8_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2319 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
2320 LiftoffRegister lhs,
2321 LiftoffRegister rhs) {
2322 UseScratchRegisterScope scope(this);
2323 VRegister tmp1 = scope.AcquireV(kFormat4S);
2324 VRegister tmp2 = scope.AcquireV(kFormat4S);
2325 Smull(tmp1, lhs.fp().V4H(), rhs.fp().V4H());
2326 Smull2(tmp2, lhs.fp().V8H(), rhs.fp().V8H());
2327 Addp(dst.fp().V4S(), tmp1, tmp2);
2328 }
2329
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst, LiftoffRegister src)2330 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
2331 LiftoffRegister src) {
2332 Saddlp(dst.fp().V4S(), src.fp().V8H());
2333 }
2334
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst, LiftoffRegister src)2335 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
2336 LiftoffRegister src) {
2337 Uaddlp(dst.fp().V4S(), src.fp().V8H());
2338 }
2339
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2340 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
2341 LiftoffRegister src1,
2342 LiftoffRegister src2) {
2343 Smull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
2344 }
2345
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2346 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
2347 LiftoffRegister src1,
2348 LiftoffRegister src2) {
2349 Umull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
2350 }
2351
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2352 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
2353 LiftoffRegister src1,
2354 LiftoffRegister src2) {
2355 Smull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
2356 }
2357
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2)2358 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
2359 LiftoffRegister src1,
2360 LiftoffRegister src2) {
2361 Umull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
2362 }
2363
emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src)2364 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2365 LiftoffRegister src) {
2366 Dup(dst.fp().V8H(), src.gp().W());
2367 }
2368
emit_i16x8_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2369 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
2370 LiftoffRegister lhs,
2371 uint8_t imm_lane_idx) {
2372 Umov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2373 }
2374
emit_i16x8_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2375 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
2376 LiftoffRegister lhs,
2377 uint8_t imm_lane_idx) {
2378 Smov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2379 }
2380
emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2381 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
2382 LiftoffRegister src1,
2383 LiftoffRegister src2,
2384 uint8_t imm_lane_idx) {
2385 if (dst != src1) {
2386 Mov(dst.fp().V8H(), src1.fp().V8H());
2387 }
2388 Mov(dst.fp().V8H(), imm_lane_idx, src2.gp().W());
2389 }
2390
emit_i16x8_neg(LiftoffRegister dst, LiftoffRegister src)2391 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2392 LiftoffRegister src) {
2393 Neg(dst.fp().V8H(), src.fp().V8H());
2394 }
2395
emit_i16x8_alltrue(LiftoffRegister dst, LiftoffRegister src)2396 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
2397 LiftoffRegister src) {
2398 liftoff::EmitAllTrue(this, dst, src, kFormat8H);
2399 }
2400
emit_i16x8_bitmask(LiftoffRegister dst, LiftoffRegister src)2401 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
2402 LiftoffRegister src) {
2403 UseScratchRegisterScope temps(this);
2404 VRegister tmp = temps.AcquireQ();
2405 VRegister mask = temps.AcquireQ();
2406
2407 Sshr(tmp.V8H(), src.fp().V8H(), 15);
2408 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2409 // are signed will have i-th bit set, unsigned will be 0.
2410 Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2411 And(tmp.V16B(), mask.V16B(), tmp.V16B());
2412 Addv(tmp.H(), tmp.V8H());
2413 Mov(dst.gp().W(), tmp.V8H(), 0);
2414 }
2415
emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2416 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
2417 LiftoffRegister rhs) {
2418 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2419 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2420 }
2421
emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2422 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
2423 int32_t rhs) {
2424 Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15);
2425 }
2426
emit_i16x8_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2427 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
2428 LiftoffRegister lhs,
2429 LiftoffRegister rhs) {
2430 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2431 liftoff::ShiftSign::kSigned>(
2432 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2433 }
2434
emit_i16x8_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2435 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
2436 LiftoffRegister lhs, int32_t rhs) {
2437 liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>(
2438 this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2439 }
2440
emit_i16x8_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2441 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
2442 LiftoffRegister lhs,
2443 LiftoffRegister rhs) {
2444 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2445 liftoff::ShiftSign::kUnsigned>(
2446 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2447 }
2448
emit_i16x8_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs)2449 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
2450 LiftoffRegister lhs, int32_t rhs) {
2451 liftoff::EmitSimdShiftRightImmediate<kFormat8H,
2452 liftoff::ShiftSign::kUnsigned>(
2453 this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2454 }
2455
emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2456 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
2457 LiftoffRegister rhs) {
2458 Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2459 }
2460
emit_i16x8_add_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2461 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
2462 LiftoffRegister lhs,
2463 LiftoffRegister rhs) {
2464 Sqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2465 }
2466
emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2467 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
2468 LiftoffRegister rhs) {
2469 Sub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2470 }
2471
emit_i16x8_sub_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2472 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
2473 LiftoffRegister lhs,
2474 LiftoffRegister rhs) {
2475 Sqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2476 }
2477
emit_i16x8_sub_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2478 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
2479 LiftoffRegister lhs,
2480 LiftoffRegister rhs) {
2481 Uqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2482 }
2483
emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2484 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
2485 LiftoffRegister rhs) {
2486 Mul(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2487 }
2488
emit_i16x8_add_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2489 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
2490 LiftoffRegister lhs,
2491 LiftoffRegister rhs) {
2492 Uqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2493 }
2494
emit_i16x8_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2495 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
2496 LiftoffRegister lhs,
2497 LiftoffRegister rhs) {
2498 Smin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2499 }
2500
emit_i16x8_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2501 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
2502 LiftoffRegister lhs,
2503 LiftoffRegister rhs) {
2504 Umin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2505 }
2506
emit_i16x8_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2507 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
2508 LiftoffRegister lhs,
2509 LiftoffRegister rhs) {
2510 Smax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2511 }
2512
emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)2513 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
2514 LiftoffRegister lhs,
2515 LiftoffRegister rhs) {
2516 Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2517 }
2518
emit_i8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, const uint8_t shuffle[16], bool is_swizzle)2519 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2520 LiftoffRegister lhs,
2521 LiftoffRegister rhs,
2522 const uint8_t shuffle[16],
2523 bool is_swizzle) {
2524 VRegister src1 = lhs.fp();
2525 VRegister src2 = rhs.fp();
2526 VRegister temp = dst.fp();
2527 if (dst == lhs || dst == rhs) {
2528 // dst overlaps with lhs or rhs, so we need a temporary.
2529 temp = GetUnusedRegister(kFpReg, LiftoffRegList{lhs, rhs}).fp();
2530 }
2531
2532 UseScratchRegisterScope scope(this);
2533
2534 if (src1 != src2 && !AreConsecutive(src1, src2)) {
2535 // Tbl needs consecutive registers, which our scratch registers are.
2536 src1 = scope.AcquireV(kFormat16B);
2537 src2 = scope.AcquireV(kFormat16B);
2538 DCHECK(AreConsecutive(src1, src2));
2539 Mov(src1.Q(), lhs.fp().Q());
2540 Mov(src2.Q(), rhs.fp().Q());
2541 }
2542
2543 int64_t imms[2] = {0, 0};
2544 for (int i = 7; i >= 0; i--) {
2545 imms[0] = (imms[0] << 8) | (shuffle[i]);
2546 imms[1] = (imms[1] << 8) | (shuffle[i + 8]);
2547 }
2548 DCHECK_EQ(0, (imms[0] | imms[1]) &
2549 (lhs == rhs ? 0xF0F0F0F0F0F0F0F0 : 0xE0E0E0E0E0E0E0E0));
2550
2551 Movi(temp.V16B(), imms[1], imms[0]);
2552
2553 if (src1 == src2) {
2554 Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B());
2555 } else {
2556 Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B());
2557 }
2558 }
2559
emit_i8x16_popcnt(LiftoffRegister dst, LiftoffRegister src)2560 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2561 LiftoffRegister src) {
2562 Cnt(dst.fp().V16B(), src.fp().V16B());
2563 }
2564
emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src)2565 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2566 LiftoffRegister src) {
2567 Dup(dst.fp().V16B(), src.gp().W());
2568 }
2569
emit_i8x16_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2570 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
2571 LiftoffRegister lhs,
2572 uint8_t imm_lane_idx) {
2573 Umov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2574 }
2575
emit_i8x16_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx)2576 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
2577 LiftoffRegister lhs,
2578 uint8_t imm_lane_idx) {
2579 Smov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2580 }
2581
emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx)2582 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
2583 LiftoffRegister src1,
2584 LiftoffRegister src2,
2585 uint8_t imm_lane_idx) {
2586 if (dst != src1) {
2587 Mov(dst.fp().V16B(), src1.fp().V16B());
2588 }
2589 Mov(dst.fp().V16B(), imm_lane_idx, src2.gp().W());
2590 }
2591
emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src)2592 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2593 LiftoffRegister src) {
2594 Neg(dst.fp().V16B(), src.fp().V16B());
2595 }
2596
emit_v128_anytrue(LiftoffRegister dst, LiftoffRegister src)2597 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2598 LiftoffRegister src) {
2599 liftoff::EmitAnyTrue(this, dst, src);
2600 }
2601
emit_i8x16_alltrue(LiftoffRegister dst, LiftoffRegister src)2602 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2603 LiftoffRegister src) {
2604 liftoff::EmitAllTrue(this, dst, src, kFormat16B);
2605 }
2606
emit_i8x16_bitmask(LiftoffRegister dst, LiftoffRegister src)2607 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2608 LiftoffRegister src) {
2609 UseScratchRegisterScope temps(this);
2610 VRegister tmp = temps.AcquireQ();
2611 VRegister mask = temps.AcquireQ();
2612
2613 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2614 // are signed will have i-th bit set, unsigned will be 0.
2615 Sshr(tmp.V16B(), src.fp().V16B(), 7);
2616 Movi(mask.V2D(), 0x8040'2010'0804'0201);
2617 And(tmp.V16B(), mask.V16B(), tmp.V16B());
2618 Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2619 Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2620 Addv(tmp.H(), tmp.V8H());
2621 Mov(dst.gp().W(), tmp.V8H(), 0);
2622 }
2623
2624 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2625 LiftoffRegister rhs) {
2626 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2627 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2628 }
2629
2630 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2631 int32_t rhs) {
2632 Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7);
2633 }
2634
2635 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2636 LiftoffRegister lhs,
2637 LiftoffRegister rhs) {
2638 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2639 liftoff::ShiftSign::kSigned>(
2640 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2641 }
2642
2643 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2644 LiftoffRegister lhs, int32_t rhs) {
2645 liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>(
2646 this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2647 }
2648
2649 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2650 LiftoffRegister lhs,
2651 LiftoffRegister rhs) {
2652 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2653 liftoff::ShiftSign::kUnsigned>(
2654 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2655 }
2656
2657 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2658 LiftoffRegister lhs, int32_t rhs) {
2659 liftoff::EmitSimdShiftRightImmediate<kFormat16B,
2660 liftoff::ShiftSign::kUnsigned>(
2661 this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2662 }
2663
2664 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2665 LiftoffRegister rhs) {
2666 Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2667 }
2668
2669 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2670 LiftoffRegister lhs,
2671 LiftoffRegister rhs) {
2672 Sqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2673 }
2674
2675 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2676 LiftoffRegister rhs) {
2677 Sub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2678 }
2679
2680 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2681 LiftoffRegister lhs,
2682 LiftoffRegister rhs) {
2683 Sqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2684 }
2685
2686 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2687 LiftoffRegister lhs,
2688 LiftoffRegister rhs) {
2689 Uqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2690 }
2691
2692 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2693 LiftoffRegister lhs,
2694 LiftoffRegister rhs) {
2695 Uqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2696 }
2697
2698 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2699 LiftoffRegister lhs,
2700 LiftoffRegister rhs) {
2701 Smin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2702 }
2703
2704 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2705 LiftoffRegister lhs,
2706 LiftoffRegister rhs) {
2707 Umin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2708 }
2709
2710 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2711 LiftoffRegister lhs,
2712 LiftoffRegister rhs) {
2713 Smax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2714 }
2715
2716 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2717 LiftoffRegister lhs,
2718 LiftoffRegister rhs) {
2719 Umax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2720 }
2721
2722 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2723 LiftoffRegister rhs) {
2724 Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2725 }
2726
2727 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2728 LiftoffRegister rhs) {
2729 Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2730 Mvn(dst.fp().V16B(), dst.fp().V16B());
2731 }
2732
2733 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2734 LiftoffRegister rhs) {
2735 Cmgt(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2736 }
2737
2738 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2739 LiftoffRegister rhs) {
2740 Cmhi(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2741 }
2742
2743 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2744 LiftoffRegister rhs) {
2745 Cmge(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2746 }
2747
2748 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2749 LiftoffRegister rhs) {
2750 Cmhs(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2751 }
2752
2753 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2754 LiftoffRegister rhs) {
2755 Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2756 }
2757
2758 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2759 LiftoffRegister rhs) {
2760 Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2761 Mvn(dst.fp().V8H(), dst.fp().V8H());
2762 }
2763
2764 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2765 LiftoffRegister rhs) {
2766 Cmgt(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2767 }
2768
2769 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2770 LiftoffRegister rhs) {
2771 Cmhi(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2772 }
2773
2774 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2775 LiftoffRegister rhs) {
2776 Cmge(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2777 }
2778
2779 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2780 LiftoffRegister rhs) {
2781 Cmhs(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2782 }
2783
2784 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2785 LiftoffRegister rhs) {
2786 Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2787 }
2788
2789 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2790 LiftoffRegister rhs) {
2791 Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2792 Mvn(dst.fp().V4S(), dst.fp().V4S());
2793 }
2794
2795 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2796 LiftoffRegister rhs) {
2797 Cmgt(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2798 }
2799
2800 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2801 LiftoffRegister rhs) {
2802 Cmhi(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2803 }
2804
2805 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2806 LiftoffRegister rhs) {
2807 Cmge(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2808 }
2809
2810 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2811 LiftoffRegister rhs) {
2812 Cmhs(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2813 }
2814
2815 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2816 LiftoffRegister rhs) {
2817 Cmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2818 }
2819
2820 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2821 LiftoffRegister rhs) {
2822 Cmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2823 Mvn(dst.fp().V2D(), dst.fp().V2D());
2824 }
2825
2826 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2827 LiftoffRegister rhs) {
2828 Cmgt(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2829 }
2830
2831 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2832 LiftoffRegister rhs) {
2833 Cmge(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2834 }
2835
2836 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2837 LiftoffRegister rhs) {
2838 Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2839 }
2840
2841 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2842 LiftoffRegister rhs) {
2843 Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2844 Mvn(dst.fp().V4S(), dst.fp().V4S());
2845 }
2846
2847 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2848 LiftoffRegister rhs) {
2849 Fcmgt(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2850 }
2851
2852 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2853 LiftoffRegister rhs) {
2854 Fcmge(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2855 }
2856
2857 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2858 LiftoffRegister rhs) {
2859 Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2860 }
2861
2862 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2863 LiftoffRegister rhs) {
2864 Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2865 Mvn(dst.fp().V2D(), dst.fp().V2D());
2866 }
2867
2868 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2869 LiftoffRegister rhs) {
2870 Fcmgt(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2871 }
2872
2873 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2874 LiftoffRegister rhs) {
2875 Fcmge(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2876 }
2877
2878 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2879 const uint8_t imms[16]) {
2880 uint64_t vals[2];
2881 memcpy(vals, imms, sizeof(vals));
2882 Movi(dst.fp().V16B(), vals[1], vals[0]);
2883 }
2884
2885 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2886 Mvn(dst.fp().V16B(), src.fp().V16B());
2887 }
2888
2889 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2890 LiftoffRegister rhs) {
2891 And(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2892 }
2893
2894 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2895 LiftoffRegister rhs) {
2896 Orr(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2897 }
2898
2899 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2900 LiftoffRegister rhs) {
2901 Eor(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2902 }
2903
2904 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2905 LiftoffRegister src1,
2906 LiftoffRegister src2,
2907 LiftoffRegister mask) {
2908 if (dst != mask) {
2909 Mov(dst.fp().V16B(), mask.fp().V16B());
2910 }
2911 Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
2912 }
2913
2914 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
2915 LiftoffRegister src) {
2916 Fcvtzs(dst.fp().V4S(), src.fp().V4S());
2917 }
2918
2919 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
2920 LiftoffRegister src) {
2921 Fcvtzu(dst.fp().V4S(), src.fp().V4S());
2922 }
2923
2924 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
2925 LiftoffRegister src) {
2926 Scvtf(dst.fp().V4S(), src.fp().V4S());
2927 }
2928
2929 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
2930 LiftoffRegister src) {
2931 Ucvtf(dst.fp().V4S(), src.fp().V4S());
2932 }
2933
2934 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
2935 LiftoffRegister src) {
2936 Fcvtn(dst.fp().V2S(), src.fp().V2D());
2937 }
2938
2939 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
2940 LiftoffRegister lhs,
2941 LiftoffRegister rhs) {
2942 UseScratchRegisterScope temps(this);
2943 VRegister tmp = temps.AcquireV(kFormat8H);
2944 VRegister right = rhs.fp().V8H();
2945 if (dst == rhs) {
2946 Mov(tmp, right);
2947 right = tmp;
2948 }
2949 Sqxtn(dst.fp().V8B(), lhs.fp().V8H());
2950 Sqxtn2(dst.fp().V16B(), right);
2951 }
2952
2953 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
2954 LiftoffRegister lhs,
2955 LiftoffRegister rhs) {
2956 UseScratchRegisterScope temps(this);
2957 VRegister tmp = temps.AcquireV(kFormat8H);
2958 VRegister right = rhs.fp().V8H();
2959 if (dst == rhs) {
2960 Mov(tmp, right);
2961 right = tmp;
2962 }
2963 Sqxtun(dst.fp().V8B(), lhs.fp().V8H());
2964 Sqxtun2(dst.fp().V16B(), right);
2965 }
2966
2967 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
2968 LiftoffRegister lhs,
2969 LiftoffRegister rhs) {
2970 UseScratchRegisterScope temps(this);
2971 VRegister tmp = temps.AcquireV(kFormat4S);
2972 VRegister right = rhs.fp().V4S();
2973 if (dst == rhs) {
2974 Mov(tmp, right);
2975 right = tmp;
2976 }
2977 Sqxtn(dst.fp().V4H(), lhs.fp().V4S());
2978 Sqxtn2(dst.fp().V8H(), right);
2979 }
2980
2981 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
2982 LiftoffRegister lhs,
2983 LiftoffRegister rhs) {
2984 UseScratchRegisterScope temps(this);
2985 VRegister tmp = temps.AcquireV(kFormat4S);
2986 VRegister right = rhs.fp().V4S();
2987 if (dst == rhs) {
2988 Mov(tmp, right);
2989 right = tmp;
2990 }
2991 Sqxtun(dst.fp().V4H(), lhs.fp().V4S());
2992 Sqxtun2(dst.fp().V8H(), right);
2993 }
2994
2995 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
2996 LiftoffRegister src) {
2997 Sxtl(dst.fp().V8H(), src.fp().V8B());
2998 }
2999
3000 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3001 LiftoffRegister src) {
3002 Sxtl2(dst.fp().V8H(), src.fp().V16B());
3003 }
3004
3005 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3006 LiftoffRegister src) {
3007 Uxtl(dst.fp().V8H(), src.fp().V8B());
3008 }
3009
3010 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3011 LiftoffRegister src) {
3012 Uxtl2(dst.fp().V8H(), src.fp().V16B());
3013 }
3014
3015 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3016 LiftoffRegister src) {
3017 Sxtl(dst.fp().V4S(), src.fp().V4H());
3018 }
3019
3020 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3021 LiftoffRegister src) {
3022 Sxtl2(dst.fp().V4S(), src.fp().V8H());
3023 }
3024
3025 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3026 LiftoffRegister src) {
3027 Uxtl(dst.fp().V4S(), src.fp().V4H());
3028 }
3029
3030 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3031 LiftoffRegister src) {
3032 Uxtl2(dst.fp().V4S(), src.fp().V8H());
3033 }
3034
3035 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3036 LiftoffRegister src) {
3037 Fcvtzs(dst.fp().V2D(), src.fp().V2D());
3038 Sqxtn(dst.fp().V2S(), dst.fp().V2D());
3039 }
3040
3041 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3042 LiftoffRegister src) {
3043 Fcvtzu(dst.fp().V2D(), src.fp().V2D());
3044 Uqxtn(dst.fp().V2S(), dst.fp().V2D());
3045 }
3046
3047 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3048 LiftoffRegister lhs,
3049 LiftoffRegister rhs) {
3050 Bic(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
3051 }
3052
3053 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3054 LiftoffRegister lhs,
3055 LiftoffRegister rhs) {
3056 Urhadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
3057 }
3058
3059 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3060 LiftoffRegister lhs,
3061 LiftoffRegister rhs) {
3062 Urhadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
3063 }
3064
3065 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3066 LiftoffRegister src) {
3067 Abs(dst.fp().V16B(), src.fp().V16B());
3068 }
3069
3070 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3071 LiftoffRegister src) {
3072 Abs(dst.fp().V8H(), src.fp().V8H());
3073 }
3074
3075 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3076 LiftoffRegister src) {
3077 Saddlp(dst.fp().V8H(), src.fp().V16B());
3078 }
3079
3080 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3081 LiftoffRegister src) {
3082 Uaddlp(dst.fp().V8H(), src.fp().V16B());
3083 }
3084
3085 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3086 LiftoffRegister src1,
3087 LiftoffRegister src2) {
3088 Smull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
3089 }
3090
3091 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3092 LiftoffRegister src1,
3093 LiftoffRegister src2) {
3094 Umull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
3095 }
3096
3097 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3098 LiftoffRegister src1,
3099 LiftoffRegister src2) {
3100 Smull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
3101 }
3102
3103 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3104 LiftoffRegister src1,
3105 LiftoffRegister src2) {
3106 Umull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
3107 }
3108
3109 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3110 LiftoffRegister src1,
3111 LiftoffRegister src2) {
3112 Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H());
3113 }
3114
3115 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3116 LiftoffRegister src) {
3117 Abs(dst.fp().V4S(), src.fp().V4S());
3118 }
3119
3120 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3121 LiftoffRegister src) {
3122 Abs(dst.fp().V2D(), src.fp().V2D());
3123 }
3124
3125 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
3126 Ldr(limit_address, MemOperand(limit_address));
3127 Cmp(sp, limit_address);
3128 B(ool_code, ls);
3129 }
3130
3131 void LiftoffAssembler::CallTrapCallbackForTesting() {
3132 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3133 }
3134
3135 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
3136 TurboAssembler::AssertUnreachable(reason);
3137 }
3138
3139 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
3140 PushCPURegList(liftoff::PadRegList(regs.GetGpList()));
3141 PushCPURegList(liftoff::PadVRegList(regs.GetFpList()));
3142 }
3143
3144 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
3145 PopCPURegList(liftoff::PadVRegList(regs.GetFpList()));
3146 PopCPURegList(liftoff::PadRegList(regs.GetGpList()));
3147 }
3148
3149 void LiftoffAssembler::RecordSpillsInSafepoint(
3150 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
3151 LiftoffRegList ref_spills, int spill_offset) {
3152 int spill_space_size = 0;
3153 bool needs_padding = (all_spills.GetGpList().Count() & 1) != 0;
3154 if (needs_padding) {
3155 spill_space_size += kSystemPointerSize;
3156 ++spill_offset;
3157 }
3158 while (!all_spills.is_empty()) {
3159 LiftoffRegister reg = all_spills.GetLastRegSet();
3160 if (ref_spills.has(reg)) {
3161 safepoint.DefineTaggedStackSlot(spill_offset);
3162 }
3163 all_spills.clear(reg);
3164 ++spill_offset;
3165 spill_space_size += kSystemPointerSize;
3166 }
3167 // Record the number of additional spill slots.
3168 RecordOolSpillSpaceSize(spill_space_size);
3169 }
3170
3171 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
3172 DropSlots(num_stack_slots);
3173 Ret();
3174 }
3175
3176 void LiftoffAssembler::CallC(const ValueKindSig* sig,
3177 const LiftoffRegister* args,
3178 const LiftoffRegister* rets,
3179 ValueKind out_argument_kind, int stack_bytes,
3180 ExternalReference ext_ref) {
3181 // The stack pointer is required to be quadword aligned.
3182 int total_size = RoundUp(stack_bytes, kQuadWordSizeInBytes);
3183 // Reserve space in the stack.
3184 Claim(total_size, 1);
3185
3186 int arg_bytes = 0;
3187 for (ValueKind param_kind : sig->parameters()) {
3188 Poke(liftoff::GetRegFromType(*args++, param_kind), arg_bytes);
3189 arg_bytes += value_kind_size(param_kind);
3190 }
3191 DCHECK_LE(arg_bytes, stack_bytes);
3192
3193 // Pass a pointer to the buffer with the arguments to the C function.
3194 Mov(x0, sp);
3195
3196 // Now call the C function.
3197 constexpr int kNumCCallArgs = 1;
3198 CallCFunction(ext_ref, kNumCCallArgs);
3199
3200 // Move return value to the right register.
3201 const LiftoffRegister* next_result_reg = rets;
3202 if (sig->return_count() > 0) {
3203 DCHECK_EQ(1, sig->return_count());
3204 constexpr Register kReturnReg = x0;
3205 if (kReturnReg != next_result_reg->gp()) {
3206 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
3207 }
3208 ++next_result_reg;
3209 }
3210
3211 // Load potential output value from the buffer on the stack.
3212 if (out_argument_kind != kVoid) {
3213 Peek(liftoff::GetRegFromType(*next_result_reg, out_argument_kind), 0);
3214 }
3215
3216 Drop(total_size, 1);
3217 }
3218
3219 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
3220 Call(addr, RelocInfo::WASM_CALL);
3221 }
3222
3223 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
3224 Jump(addr, RelocInfo::WASM_CALL);
3225 }
3226
3227 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
3228 compiler::CallDescriptor* call_descriptor,
3229 Register target) {
3230 // For Arm64, we have more cache registers than wasm parameters. That means
3231 // that target will always be in a register.
3232 DCHECK(target.is_valid());
3233 Call(target);
3234 }
3235
3236 void LiftoffAssembler::TailCallIndirect(Register target) {
3237 DCHECK(target.is_valid());
3238 // When control flow integrity is enabled, the target is a "bti c"
3239 // instruction, which enforces that the jump instruction is either a "blr", or
3240 // a "br" with x16 or x17 as its destination.
3241 UseScratchRegisterScope temps(this);
3242 temps.Exclude(x17);
3243 Mov(x17, target);
3244 Jump(x17);
3245 }
3246
3247 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
3248 // A direct call to a wasm runtime stub defined in this module.
3249 // Just encode the stub index. This will be patched at relocation.
3250 Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
3251 }
3252
3253 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
3254 // The stack pointer is required to be quadword aligned.
3255 size = RoundUp(size, kQuadWordSizeInBytes);
3256 Claim(size, 1);
3257 Mov(addr, sp);
3258 }
3259
3260 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
3261 // The stack pointer is required to be quadword aligned.
3262 size = RoundUp(size, kQuadWordSizeInBytes);
3263 Drop(size, 1);
3264 }
3265
3266 void LiftoffAssembler::MaybeOSR() {}
3267
3268 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
3269 ValueKind kind) {
3270 Label not_nan;
3271 if (kind == kF32) {
3272 Fcmp(src.S(), src.S());
3273 B(eq, ¬_nan); // x != x iff isnan(x)
3274 // If it's a NaN, it must be non-zero, so store that as the set value.
3275 Str(src.S(), MemOperand(dst));
3276 } else {
3277 DCHECK_EQ(kind, kF64);
3278 Fcmp(src.D(), src.D());
3279 B(eq, ¬_nan); // x != x iff isnan(x)
3280 // Double-precision NaNs must be non-zero in the most-significant 32
3281 // bits, so store that.
3282 St1(src.V4S(), 1, MemOperand(dst));
3283 }
3284 Bind(¬_nan);
3285 }
3286
3287 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
3288 Register tmp_gp,
3289 LiftoffRegister tmp_s128,
3290 ValueKind lane_kind) {
3291 DoubleRegister tmp_fp = tmp_s128.fp();
3292 if (lane_kind == kF32) {
3293 Fmaxv(tmp_fp.S(), src.fp().V4S());
3294 } else {
3295 DCHECK_EQ(lane_kind, kF64);
3296 Fmaxp(tmp_fp.D(), src.fp().V2D());
3297 }
3298 emit_set_if_nan(dst, tmp_fp, lane_kind);
3299 }
3300
3301 void LiftoffStackSlots::Construct(int param_slots) {
3302 DCHECK_LT(0, slots_.size());
3303 // The stack pointer is required to be quadword aligned.
3304 asm_->Claim(RoundUp(param_slots, 2));
3305 for (auto& slot : slots_) {
3306 int poke_offset = slot.dst_slot_ * kSystemPointerSize;
3307 switch (slot.src_.loc()) {
3308 case LiftoffAssembler::VarState::kStack: {
3309 UseScratchRegisterScope temps(asm_);
3310 CPURegister scratch = liftoff::AcquireByType(&temps, slot.src_.kind());
3311 asm_->Ldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
3312 asm_->Poke(scratch, poke_offset);
3313 break;
3314 }
3315 case LiftoffAssembler::VarState::kRegister:
3316 asm_->Poke(liftoff::GetRegFromType(slot.src_.reg(), slot.src_.kind()),
3317 poke_offset);
3318 break;
3319 case LiftoffAssembler::VarState::kIntConst:
3320 DCHECK(slot.src_.kind() == kI32 || slot.src_.kind() == kI64);
3321 if (slot.src_.i32_const() == 0) {
3322 Register zero_reg = slot.src_.kind() == kI32 ? wzr : xzr;
3323 asm_->Poke(zero_reg, poke_offset);
3324 } else {
3325 UseScratchRegisterScope temps(asm_);
3326 Register scratch =
3327 slot.src_.kind() == kI32 ? temps.AcquireW() : temps.AcquireX();
3328 asm_->Mov(scratch, int64_t{slot.src_.i32_const()});
3329 asm_->Poke(scratch, poke_offset);
3330 }
3331 break;
3332 }
3333 }
3334 }
3335
3336 } // namespace wasm
3337 } // namespace internal
3338 } // namespace v8
3339
3340 #endif // V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
3341