1// Copyright 2017 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ 6#define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ 7 8#include "src/base/platform/wrappers.h" 9#include "src/codegen/arm/register-arm.h" 10#include "src/heap/memory-chunk.h" 11#include "src/wasm/baseline/liftoff-assembler.h" 12#include "src/wasm/baseline/liftoff-register.h" 13#include "src/wasm/wasm-objects.h" 14 15namespace v8 { 16namespace internal { 17namespace wasm { 18 19namespace liftoff { 20 21inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) { 22 switch (liftoff_cond) { 23 case kEqual: 24 return eq; 25 case kUnequal: 26 return ne; 27 case kSignedLessThan: 28 return lt; 29 case kSignedLessEqual: 30 return le; 31 case kSignedGreaterThan: 32 return gt; 33 case kSignedGreaterEqual: 34 return ge; 35 case kUnsignedLessThan: 36 return lo; 37 case kUnsignedLessEqual: 38 return ls; 39 case kUnsignedGreaterThan: 40 return hi; 41 case kUnsignedGreaterEqual: 42 return hs; 43 } 44} 45 46// half 47// slot Frame 48// -----+--------------------+--------------------------- 49// n+3 | parameter n | 50// ... | ... | 51// 4 | parameter 1 | or parameter 2 52// 3 | parameter 0 | or parameter 1 53// 2 | (result address) | or parameter 0 54// -----+--------------------+--------------------------- 55// 1 | return addr (lr) | 56// 0 | previous frame (fp)| 57// -----+--------------------+ <-- frame ptr (fp) 58// -1 | StackFrame::WASM | 59// -2 | instance | 60// -3 | feedback vector | 61// -4 | tiering budget | 62// -----+--------------------+--------------------------- 63// -5 | slot 0 (high) | ^ 64// -6 | slot 0 (low) | | 65// -7 | slot 1 (high) | Frame slots 66// -8 | slot 1 (low) | | 67// | | v 68// -----+--------------------+ <-- stack ptr (sp) 69// 70static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize, 71 "Slot size should be twice the size of the 32 bit pointer."); 72constexpr int kInstanceOffset = 2 * kSystemPointerSize; 73constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize; 74constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize; 75// kPatchInstructionsRequired sets a maximum limit of how many instructions that 76// PatchPrepareStackFrame will use in order to increase the stack appropriately. 77// Three instructions are required to sub a large constant, movw + movt + sub. 78constexpr int32_t kPatchInstructionsRequired = 3; 79constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1; 80 81inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); } 82 83inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) { 84 int32_t half_offset = 85 half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2; 86 return MemOperand(offset > 0 ? fp : sp, -offset + half_offset); 87} 88 89inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); } 90 91inline MemOperand GetMemOp(LiftoffAssembler* assm, 92 UseScratchRegisterScope* temps, Register addr, 93 Register offset, int32_t offset_imm) { 94 if (offset != no_reg) { 95 if (offset_imm == 0) return MemOperand(addr, offset); 96 Register tmp = temps->Acquire(); 97 assm->add(tmp, offset, Operand(offset_imm)); 98 return MemOperand(addr, tmp); 99 } 100 return MemOperand(addr, offset_imm); 101} 102 103inline Register CalculateActualAddress(LiftoffAssembler* assm, 104 UseScratchRegisterScope* temps, 105 Register addr_reg, Register offset_reg, 106 uintptr_t offset_imm, 107 Register result_reg = no_reg) { 108 if (offset_reg == no_reg && offset_imm == 0) { 109 if (result_reg == no_reg) { 110 return addr_reg; 111 } else { 112 assm->mov(result_reg, addr_reg); 113 return result_reg; 114 } 115 } 116 Register actual_addr_reg = 117 result_reg != no_reg ? result_reg : temps->Acquire(); 118 if (offset_reg == no_reg) { 119 assm->add(actual_addr_reg, addr_reg, Operand(offset_imm)); 120 } else { 121 assm->add(actual_addr_reg, addr_reg, Operand(offset_reg)); 122 if (offset_imm != 0) { 123 assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm)); 124 } 125 } 126 return actual_addr_reg; 127} 128 129inline LiftoffCondition MakeUnsigned(LiftoffCondition cond) { 130 switch (cond) { 131 case kSignedLessThan: 132 return kUnsignedLessThan; 133 case kSignedLessEqual: 134 return kUnsignedLessEqual; 135 case kSignedGreaterThan: 136 return kUnsignedGreaterThan; 137 case kSignedGreaterEqual: 138 return kUnsignedGreaterEqual; 139 case kEqual: 140 case kUnequal: 141 case kUnsignedLessThan: 142 case kUnsignedLessEqual: 143 case kUnsignedGreaterThan: 144 case kUnsignedGreaterEqual: 145 return cond; 146 default: 147 UNREACHABLE(); 148 } 149} 150 151template <void (Assembler::*op)(Register, Register, Register, SBit, Condition), 152 void (Assembler::*op_with_carry)(Register, Register, const Operand&, 153 SBit, Condition)> 154inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst, 155 LiftoffRegister lhs, LiftoffRegister rhs) { 156 Register dst_low = dst.low_gp(); 157 if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) { 158 dst_low = 159 assm->GetUnusedRegister(kGpReg, LiftoffRegList{lhs, rhs, dst.high_gp()}) 160 .gp(); 161 } 162 (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al); 163 (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()), 164 LeaveCC, al); 165 if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low); 166} 167 168template <void (Assembler::*op)(Register, Register, const Operand&, SBit, 169 Condition), 170 void (Assembler::*op_with_carry)(Register, Register, const Operand&, 171 SBit, Condition)> 172inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst, 173 LiftoffRegister lhs, int64_t imm) { 174 // The compiler allocated registers such that either {dst == lhs} or there is 175 // no overlap between the two. 176 DCHECK_NE(dst.low_gp(), lhs.high_gp()); 177 int32_t imm_low_word = static_cast<int32_t>(imm); 178 int32_t imm_high_word = static_cast<int32_t>(imm >> 32); 179 (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm_low_word), SetCC, al); 180 (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(imm_high_word), 181 LeaveCC, al); 182} 183 184template <void (TurboAssembler::*op)(Register, Register, Register, Register, 185 Register), 186 bool is_left_shift> 187inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst, 188 LiftoffRegister src, Register amount) { 189 Register src_low = src.low_gp(); 190 Register src_high = src.high_gp(); 191 Register dst_low = dst.low_gp(); 192 Register dst_high = dst.high_gp(); 193 // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low} 194 // then {dst_high}. 195 Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low; 196 LiftoffRegList pinned = {clobbered_dst_reg, src}; 197 Register amount_capped = 198 pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp(); 199 assm->and_(amount_capped, amount, Operand(0x3F)); 200 201 // Ensure that writing the first half of {dst} does not overwrite the still 202 // needed half of {src}. 203 Register* later_src_reg = is_left_shift ? &src_low : &src_high; 204 if (*later_src_reg == clobbered_dst_reg) { 205 *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp(); 206 assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg); 207 } 208 209 (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped); 210} 211 212inline FloatRegister GetFloatRegister(DoubleRegister reg) { 213 DCHECK_LT(reg.code(), kDoubleCode_d16); 214 return LowDwVfpRegister::from_code(reg.code()).low(); 215} 216 217inline Simd128Register GetSimd128Register(DoubleRegister reg) { 218 return QwNeonRegister::from_code(reg.code() / 2); 219} 220 221inline Simd128Register GetSimd128Register(LiftoffRegister reg) { 222 return liftoff::GetSimd128Register(reg.low_fp()); 223} 224 225enum class MinOrMax : uint8_t { kMin, kMax }; 226template <typename RegisterType> 227inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst, 228 RegisterType lhs, RegisterType rhs, 229 MinOrMax min_or_max) { 230 DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8); 231 if (lhs == rhs) { 232 assm->TurboAssembler::Move(dst, lhs); 233 return; 234 } 235 Label done, is_nan; 236 if (min_or_max == MinOrMax::kMin) { 237 assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan); 238 } else { 239 assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan); 240 } 241 assm->b(&done); 242 assm->bind(&is_nan); 243 // Create a NaN output. 244 assm->vadd(dst, lhs, rhs); 245 assm->bind(&done); 246} 247 248inline Register EnsureNoAlias(Assembler* assm, Register reg, 249 Register must_not_alias, 250 UseScratchRegisterScope* temps) { 251 if (reg != must_not_alias) return reg; 252 Register tmp = temps->Acquire(); 253 DCHECK_NE(reg, tmp); 254 assm->mov(tmp, reg); 255 return tmp; 256} 257 258inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt, 259 NeonDataType sdt, LiftoffRegister dst, 260 LiftoffRegister lhs, LiftoffRegister rhs) { 261 if (dst == lhs) { 262 assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); 263 assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); 264 } else { 265 assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); 266 assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); 267 } 268} 269 270inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst, 271 LiftoffRegister lhs, LiftoffRegister rhs, 272 Condition cond) { 273 DCHECK(cond == eq || cond == ne || cond == lt || cond == le); 274 275 QwNeonRegister dest = liftoff::GetSimd128Register(dst); 276 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 277 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 278 UseScratchRegisterScope temps(assm); 279 Register scratch = temps.Acquire(); 280 281 assm->mov(scratch, Operand(0)); 282 assm->VFPCompareAndSetFlags(left.low(), right.low()); 283 assm->mov(scratch, Operand(-1), LeaveCC, cond); 284 if (cond == lt || cond == le) { 285 // Check for NaN. 286 assm->mov(scratch, Operand(0), LeaveCC, vs); 287 } 288 assm->vmov(dest.low(), scratch, scratch); 289 290 assm->mov(scratch, Operand(0)); 291 assm->VFPCompareAndSetFlags(left.high(), right.high()); 292 assm->mov(scratch, Operand(-1), LeaveCC, cond); 293 if (cond == lt || cond == le) { 294 // Check for NaN. 295 assm->mov(scratch, Operand(0), LeaveCC, vs); 296 } 297 assm->vmov(dest.high(), scratch, scratch); 298} 299 300inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst, 301 ValueKind kind) { 302#ifdef DEBUG 303 // The {str} instruction needs a temp register when the immediate in the 304 // provided MemOperand does not fit into 12 bits. This happens for large stack 305 // frames. This DCHECK checks that the temp register is available when needed. 306 DCHECK(UseScratchRegisterScope{assm}.CanAcquire()); 307#endif 308 switch (kind) { 309 case kI32: 310 case kOptRef: 311 case kRef: 312 case kRtt: 313 assm->str(src.gp(), dst); 314 break; 315 case kI64: 316 // Positive offsets should be lowered to kI32. 317 assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset())); 318 assm->str( 319 src.high_gp(), 320 MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize)); 321 break; 322 case kF32: 323 assm->vstr(liftoff::GetFloatRegister(src.fp()), dst); 324 break; 325 case kF64: 326 assm->vstr(src.fp(), dst); 327 break; 328 case kS128: { 329 UseScratchRegisterScope temps(assm); 330 Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(), 331 no_reg, dst.offset()); 332 assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr)); 333 break; 334 } 335 default: 336 UNREACHABLE(); 337 } 338} 339 340inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, 341 ValueKind kind) { 342 switch (kind) { 343 case kI32: 344 case kOptRef: 345 case kRef: 346 case kRtt: 347 assm->ldr(dst.gp(), src); 348 break; 349 case kI64: 350 assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset())); 351 assm->ldr( 352 dst.high_gp(), 353 MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize)); 354 break; 355 case kF32: 356 assm->vldr(liftoff::GetFloatRegister(dst.fp()), src); 357 break; 358 case kF64: 359 assm->vldr(dst.fp(), src); 360 break; 361 case kS128: { 362 // Get memory address of slot to fill from. 363 UseScratchRegisterScope temps(assm); 364 Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(), 365 no_reg, src.offset()); 366 assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr)); 367 break; 368 } 369 default: 370 UNREACHABLE(); 371 } 372} 373 374constexpr int MaskFromNeonDataType(NeonDataType dt) { 375 switch (dt) { 376 case NeonS8: 377 case NeonU8: 378 return 7; 379 case NeonS16: 380 case NeonU16: 381 return 15; 382 case NeonS32: 383 case NeonU32: 384 return 31; 385 case NeonS64: 386 case NeonU64: 387 return 63; 388 } 389} 390 391enum ShiftDirection { kLeft, kRight }; 392 393template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz> 394inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst, 395 LiftoffRegister lhs, LiftoffRegister rhs) { 396 constexpr int mask = MaskFromNeonDataType(dt); 397 UseScratchRegisterScope temps(assm); 398 QwNeonRegister tmp = temps.AcquireQ(); 399 Register shift = temps.Acquire(); 400 assm->and_(shift, rhs.gp(), Operand(mask)); 401 assm->vdup(sz, tmp, shift); 402 if (dir == kRight) { 403 assm->vneg(sz, tmp, tmp); 404 } 405 assm->vshl(dt, liftoff::GetSimd128Register(dst), 406 liftoff::GetSimd128Register(lhs), tmp); 407} 408 409template <ShiftDirection dir, NeonDataType dt> 410inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst, 411 LiftoffRegister lhs, int32_t rhs) { 412 // vshr by 0 is not allowed, so check for it, and only move if dst != lhs. 413 int32_t shift = rhs & MaskFromNeonDataType(dt); 414 if (shift) { 415 if (dir == kLeft) { 416 assm->vshl(dt, liftoff::GetSimd128Register(dst), 417 liftoff::GetSimd128Register(lhs), shift); 418 } else { 419 assm->vshr(dt, liftoff::GetSimd128Register(dst), 420 liftoff::GetSimd128Register(lhs), shift); 421 } 422 } else if (dst != lhs) { 423 assm->vmov(liftoff::GetSimd128Register(dst), 424 liftoff::GetSimd128Register(lhs)); 425 } 426} 427 428inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, 429 LiftoffRegister src) { 430 UseScratchRegisterScope temps(assm); 431 DwVfpRegister scratch = temps.AcquireD(); 432 assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp()); 433 assm->vpmax(NeonU32, scratch, scratch, scratch); 434 assm->ExtractLane(dst.gp(), scratch, NeonS32, 0); 435 assm->cmp(dst.gp(), Operand(0)); 436 assm->mov(dst.gp(), Operand(1), LeaveCC, ne); 437} 438 439} // namespace liftoff 440 441int LiftoffAssembler::PrepareStackFrame() { 442 if (!CpuFeatures::IsSupported(ARMv7)) { 443 bailout(kUnsupportedArchitecture, "Liftoff needs ARMv7"); 444 return 0; 445 } 446 uint32_t offset = static_cast<uint32_t>(pc_offset()); 447 // PatchPrepareStackFrame will patch this in order to increase the stack 448 // appropriately. Additional nops are required as the bytes operand might 449 // require extra moves to encode. 450 for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) { 451 nop(); 452 } 453 DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize, 454 pc_offset()); 455 return offset; 456} 457 458void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params, 459 int stack_param_delta) { 460 UseScratchRegisterScope temps(this); 461 Register scratch = temps.Acquire(); 462 463 // Push the return address and frame pointer to complete the stack frame. 464 sub(sp, sp, Operand(8)); 465 ldr(scratch, MemOperand(fp, 4)); 466 str(scratch, MemOperand(sp, 4)); 467 ldr(scratch, MemOperand(fp, 0)); 468 str(scratch, MemOperand(sp, 0)); 469 470 // Shift the whole frame upwards. 471 int slot_count = num_callee_stack_params + 2; 472 for (int i = slot_count - 1; i >= 0; --i) { 473 ldr(scratch, MemOperand(sp, i * 4)); 474 str(scratch, MemOperand(fp, (i - stack_param_delta) * 4)); 475 } 476 477 // Set the new stack and frame pointer. 478 sub(sp, fp, Operand(stack_param_delta * 4)); 479 Pop(lr, fp); 480} 481 482void LiftoffAssembler::AlignFrameSize() {} 483 484void LiftoffAssembler::PatchPrepareStackFrame( 485 int offset, SafepointTableBuilder* safepoint_table_builder) { 486 // The frame_size includes the frame marker and the instance slot. Both are 487 // pushed as part of frame construction, so we don't need to allocate memory 488 // for them anymore. 489 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize; 490 491 PatchingAssembler patching_assembler(AssemblerOptions{}, 492 buffer_start_ + offset, 493 liftoff::kPatchInstructionsRequired); 494 if (V8_LIKELY(frame_size < 4 * KB)) { 495 // This is the standard case for small frames: just subtract from SP and be 496 // done with it. 497 patching_assembler.sub(sp, sp, Operand(frame_size)); 498 patching_assembler.PadWithNops(); 499 return; 500 } 501 502 // The frame size is bigger than 4KB, so we might overflow the available stack 503 // space if we first allocate the frame and then do the stack check (we will 504 // need some remaining stack space for throwing the exception). That's why we 505 // check the available stack space before we allocate the frame. To do this we 506 // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does 507 // this "extended stack check". 508 // 509 // The OOL code can simply be generated here with the normal assembler, 510 // because all other code generation, including OOL code, has already finished 511 // when {PatchPrepareStackFrame} is called. The function prologue then jumps 512 // to the current {pc_offset()} to execute the OOL code for allocating the 513 // large frame. 514 515 // Emit the unconditional branch in the function prologue (from {offset} to 516 // {pc_offset()}). 517 patching_assembler.b(pc_offset() - offset - Instruction::kPcLoadDelta); 518 patching_assembler.PadWithNops(); 519 520 // If the frame is bigger than the stack, we throw the stack overflow 521 // exception unconditionally. Thereby we can avoid the integer overflow 522 // check in the condition code. 523 RecordComment("OOL: stack check for large frame"); 524 Label continuation; 525 if (frame_size < FLAG_stack_size * 1024) { 526 UseScratchRegisterScope temps(this); 527 Register stack_limit = temps.Acquire(); 528 ldr(stack_limit, 529 FieldMemOperand(kWasmInstanceRegister, 530 WasmInstanceObject::kRealStackLimitAddressOffset)); 531 ldr(stack_limit, MemOperand(stack_limit)); 532 add(stack_limit, stack_limit, Operand(frame_size)); 533 cmp(sp, stack_limit); 534 b(cs /* higher or same */, &continuation); 535 } 536 537 Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL); 538 // The call will not return; just define an empty safepoint. 539 safepoint_table_builder->DefineSafepoint(this); 540 if (FLAG_debug_code) stop(); 541 542 bind(&continuation); 543 544 // Now allocate the stack space. Note that this might do more than just 545 // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}. 546 AllocateStackSpace(frame_size); 547 548 // Jump back to the start of the function, from {pc_offset()} to 549 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which 550 // is a branch now). 551 int func_start_offset = 552 offset + liftoff::kPatchInstructionsRequired * kInstrSize; 553 b(func_start_offset - pc_offset() - Instruction::kPcLoadDelta); 554} 555 556void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); } 557 558void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); } 559 560// static 561constexpr int LiftoffAssembler::StaticStackFrameSize() { 562 return liftoff::kTierupBudgetOffset; 563} 564 565int LiftoffAssembler::SlotSizeForType(ValueKind kind) { 566 switch (kind) { 567 case kS128: 568 return value_kind_size(kind); 569 default: 570 return kStackSlotSize; 571 } 572} 573 574bool LiftoffAssembler::NeedsAlignment(ValueKind kind) { 575 return kind == kS128 || is_reference(kind); 576} 577 578void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, 579 RelocInfo::Mode rmode) { 580 switch (value.type().kind()) { 581 case kI32: 582 TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode)); 583 break; 584 case kI64: { 585 DCHECK(RelocInfo::IsNoInfo(rmode)); 586 int32_t low_word = value.to_i64(); 587 int32_t high_word = value.to_i64() >> 32; 588 TurboAssembler::Move(reg.low_gp(), Operand(low_word)); 589 TurboAssembler::Move(reg.high_gp(), Operand(high_word)); 590 break; 591 } 592 case kF32: 593 vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed()); 594 break; 595 case kF64: { 596 Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp(); 597 vmov(reg.fp(), base::Double(value.to_f64_boxed().get_bits()), 598 extra_scratch); 599 break; 600 } 601 default: 602 UNREACHABLE(); 603 } 604} 605 606void LiftoffAssembler::LoadInstanceFromFrame(Register dst) { 607 ldr(dst, liftoff::GetInstanceOperand()); 608} 609 610void LiftoffAssembler::LoadFromInstance(Register dst, Register instance, 611 int offset, int size) { 612 DCHECK_LE(0, offset); 613 MemOperand src{instance, offset}; 614 switch (size) { 615 case 1: 616 ldrb(dst, src); 617 break; 618 case 4: 619 ldr(dst, src); 620 break; 621 default: 622 UNIMPLEMENTED(); 623 } 624} 625 626void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, 627 Register instance, 628 int offset) { 629 STATIC_ASSERT(kTaggedSize == kSystemPointerSize); 630 ldr(dst, MemOperand{instance, offset}); 631} 632 633void LiftoffAssembler::SpillInstance(Register instance) { 634 str(instance, liftoff::GetInstanceOperand()); 635} 636 637void LiftoffAssembler::ResetOSRTarget() {} 638 639namespace liftoff { 640#define __ lasm-> 641inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst, 642 Register src_addr, Register offset_reg, 643 int32_t offset_imm, LoadType type, 644 LiftoffRegList pinned, 645 uint32_t* protected_load_pc = nullptr, 646 bool is_load_mem = false) { 647 DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair()); 648 UseScratchRegisterScope temps(lasm); 649 if (type.value() == LoadType::kF64Load || 650 type.value() == LoadType::kF32Load || 651 type.value() == LoadType::kS128Load) { 652 Register actual_src_addr = liftoff::CalculateActualAddress( 653 lasm, &temps, src_addr, offset_reg, offset_imm); 654 if (type.value() == LoadType::kF64Load) { 655 // Armv6 is not supported so Neon can be used to avoid alignment issues. 656 CpuFeatureScope scope(lasm, NEON); 657 __ vld1(Neon64, NeonListOperand(dst.fp()), 658 NeonMemOperand(actual_src_addr)); 659 } else if (type.value() == LoadType::kF32Load) { 660 // TODO(arm): Use vld1 for f32 when implemented in simulator as used for 661 // f64. It supports unaligned access. 662 Register scratch = 663 (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr; 664 __ ldr(scratch, MemOperand(actual_src_addr)); 665 __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch); 666 } else { 667 // Armv6 is not supported so Neon can be used to avoid alignment issues. 668 CpuFeatureScope scope(lasm, NEON); 669 __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2), 670 NeonMemOperand(actual_src_addr)); 671 } 672 } else { 673 MemOperand src_op = 674 liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm); 675 if (protected_load_pc) *protected_load_pc = __ pc_offset(); 676 switch (type.value()) { 677 case LoadType::kI32Load8U: 678 __ ldrb(dst.gp(), src_op); 679 break; 680 case LoadType::kI64Load8U: 681 __ ldrb(dst.low_gp(), src_op); 682 __ mov(dst.high_gp(), Operand(0)); 683 break; 684 case LoadType::kI32Load8S: 685 __ ldrsb(dst.gp(), src_op); 686 break; 687 case LoadType::kI64Load8S: 688 __ ldrsb(dst.low_gp(), src_op); 689 __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); 690 break; 691 case LoadType::kI32Load16U: 692 __ ldrh(dst.gp(), src_op); 693 break; 694 case LoadType::kI64Load16U: 695 __ ldrh(dst.low_gp(), src_op); 696 __ mov(dst.high_gp(), Operand(0)); 697 break; 698 case LoadType::kI32Load16S: 699 __ ldrsh(dst.gp(), src_op); 700 break; 701 case LoadType::kI32Load: 702 __ ldr(dst.gp(), src_op); 703 break; 704 case LoadType::kI64Load16S: 705 __ ldrsh(dst.low_gp(), src_op); 706 __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); 707 break; 708 case LoadType::kI64Load32U: 709 __ ldr(dst.low_gp(), src_op); 710 __ mov(dst.high_gp(), Operand(0)); 711 break; 712 case LoadType::kI64Load32S: 713 __ ldr(dst.low_gp(), src_op); 714 __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); 715 break; 716 case LoadType::kI64Load: 717 __ ldr(dst.low_gp(), src_op); 718 // GetMemOp may use a scratch register as the offset register, in which 719 // case, calling GetMemOp again will fail due to the assembler having 720 // ran out of scratch registers. 721 if (temps.CanAcquire()) { 722 src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, 723 offset_imm + kSystemPointerSize); 724 } else { 725 __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize)); 726 } 727 __ ldr(dst.high_gp(), src_op); 728 break; 729 default: 730 UNREACHABLE(); 731 } 732 } 733} 734#undef __ 735} // namespace liftoff 736 737void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr, 738 Register offset_reg, 739 int32_t offset_imm, 740 LiftoffRegList pinned) { 741 STATIC_ASSERT(kTaggedSize == kInt32Size); 742 liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg, 743 offset_imm, LoadType::kI32Load, pinned); 744} 745 746void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr, 747 int32_t offset_imm) { 748 UseScratchRegisterScope temps(this); 749 MemOperand src_op = 750 liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm); 751 ldr(dst, src_op); 752} 753 754void LiftoffAssembler::StoreTaggedPointer(Register dst_addr, 755 Register offset_reg, 756 int32_t offset_imm, 757 LiftoffRegister src, 758 LiftoffRegList pinned, 759 SkipWriteBarrier skip_write_barrier) { 760 STATIC_ASSERT(kTaggedSize == kInt32Size); 761 Register actual_offset_reg = offset_reg; 762 if (offset_reg != no_reg && offset_imm != 0) { 763 if (cache_state()->is_used(LiftoffRegister(offset_reg))) { 764 actual_offset_reg = GetUnusedRegister(kGpReg, pinned).gp(); 765 } 766 add(actual_offset_reg, offset_reg, Operand(offset_imm)); 767 } 768 MemOperand dst_op = actual_offset_reg == no_reg 769 ? MemOperand(dst_addr, offset_imm) 770 : MemOperand(dst_addr, actual_offset_reg); 771 str(src.gp(), dst_op); 772 773 if (skip_write_barrier || FLAG_disable_write_barriers) return; 774 775 // The write barrier. 776 Label write_barrier; 777 Label exit; 778 CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne, 779 &write_barrier); 780 b(&exit); 781 bind(&write_barrier); 782 JumpIfSmi(src.gp(), &exit); 783 CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq, 784 &exit); 785 CallRecordWriteStubSaveRegisters( 786 dst_addr, 787 actual_offset_reg == no_reg ? Operand(offset_imm) 788 : Operand(actual_offset_reg), 789 RememberedSetAction::kEmit, SaveFPRegsMode::kSave, 790 StubCallMode::kCallWasmRuntimeStub); 791 bind(&exit); 792} 793 794void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, 795 Register offset_reg, uint32_t offset_imm, 796 LoadType type, LiftoffRegList pinned, 797 uint32_t* protected_load_pc, bool is_load_mem, 798 bool i64_offset) { 799 // Offsets >=2GB are statically OOB on 32-bit systems. 800 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max()); 801 liftoff::LoadInternal(this, dst, src_addr, offset_reg, 802 static_cast<int32_t>(offset_imm), type, pinned, 803 protected_load_pc, is_load_mem); 804} 805 806void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, 807 uint32_t offset_imm, LiftoffRegister src, 808 StoreType type, LiftoffRegList pinned, 809 uint32_t* protected_store_pc, bool is_store_mem) { 810 // Offsets >=2GB are statically OOB on 32-bit systems. 811 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max()); 812 UseScratchRegisterScope temps(this); 813 if (type.value() == StoreType::kF64Store) { 814 Register actual_dst_addr = liftoff::CalculateActualAddress( 815 this, &temps, dst_addr, offset_reg, offset_imm); 816 // Armv6 is not supported so Neon can be used to avoid alignment issues. 817 CpuFeatureScope scope(this, NEON); 818 vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr)); 819 } else if (type.value() == StoreType::kS128Store) { 820 Register actual_dst_addr = liftoff::CalculateActualAddress( 821 this, &temps, dst_addr, offset_reg, offset_imm); 822 // Armv6 is not supported so Neon can be used to avoid alignment issues. 823 CpuFeatureScope scope(this, NEON); 824 vst1(Neon8, NeonListOperand(src.low_fp(), 2), 825 NeonMemOperand(actual_dst_addr)); 826 } else if (type.value() == StoreType::kF32Store) { 827 // TODO(arm): Use vst1 for f32 when implemented in simulator as used for 828 // f64. It supports unaligned access. 829 // CalculateActualAddress will only not use a scratch register if the 830 // following condition holds, otherwise another register must be 831 // retrieved. 832 Register scratch = (offset_reg == no_reg && offset_imm == 0) 833 ? temps.Acquire() 834 : GetUnusedRegister(kGpReg, pinned).gp(); 835 Register actual_dst_addr = liftoff::CalculateActualAddress( 836 this, &temps, dst_addr, offset_reg, offset_imm); 837 vmov(scratch, liftoff::GetFloatRegister(src.fp())); 838 str(scratch, MemOperand(actual_dst_addr)); 839 } else { 840 MemOperand dst_op = 841 liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm); 842 if (protected_store_pc) *protected_store_pc = pc_offset(); 843 switch (type.value()) { 844 case StoreType::kI64Store8: 845 src = src.low(); 846 V8_FALLTHROUGH; 847 case StoreType::kI32Store8: 848 strb(src.gp(), dst_op); 849 break; 850 case StoreType::kI64Store16: 851 src = src.low(); 852 V8_FALLTHROUGH; 853 case StoreType::kI32Store16: 854 strh(src.gp(), dst_op); 855 break; 856 case StoreType::kI64Store32: 857 src = src.low(); 858 V8_FALLTHROUGH; 859 case StoreType::kI32Store: 860 str(src.gp(), dst_op); 861 break; 862 case StoreType::kI64Store: 863 str(src.low_gp(), dst_op); 864 // GetMemOp may use a scratch register as the offset register, in which 865 // case, calling GetMemOp again will fail due to the assembler having 866 // ran out of scratch registers. 867 if (temps.CanAcquire()) { 868 dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, 869 offset_imm + kSystemPointerSize); 870 } else { 871 add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize)); 872 } 873 str(src.high_gp(), dst_op); 874 break; 875 default: 876 UNREACHABLE(); 877 } 878 } 879} 880 881namespace liftoff { 882#define __ lasm-> 883 884inline void AtomicOp32( 885 LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, 886 uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, 887 LiftoffRegList pinned, 888 void (Assembler::*load)(Register, Register, Condition), 889 void (Assembler::*store)(Register, Register, Register, Condition), 890 void (*op)(LiftoffAssembler*, Register, Register, Register)) { 891 Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); 892 893 // Allocate an additional {temp} register to hold the result that should be 894 // stored to memory. Note that {temp} and {store_result} are not allowed to be 895 // the same register. 896 Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); 897 898 // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique. 899 DCHECK(result.gp() != value.gp() && result.gp() != dst_addr && 900 result.gp() != offset_reg); 901 902 UseScratchRegisterScope temps(lasm); 903 Register actual_addr = liftoff::CalculateActualAddress( 904 lasm, &temps, dst_addr, offset_reg, offset_imm); 905 906 __ dmb(ISH); 907 Label retry; 908 __ bind(&retry); 909 (lasm->*load)(result.gp(), actual_addr, al); 910 op(lasm, temp, result.gp(), value.gp()); 911 (lasm->*store)(store_result, temp, actual_addr, al); 912 __ cmp(store_result, Operand(0)); 913 __ b(ne, &retry); 914 __ dmb(ISH); 915} 916 917inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs, 918 Register rhs) { 919 __ add(dst, lhs, rhs); 920} 921 922inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs, 923 Register rhs) { 924 __ sub(dst, lhs, rhs); 925} 926 927inline void And(LiftoffAssembler* lasm, Register dst, Register lhs, 928 Register rhs) { 929 __ and_(dst, lhs, rhs); 930} 931 932inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs, 933 Register rhs) { 934 __ orr(dst, lhs, rhs); 935} 936 937inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs, 938 Register rhs) { 939 __ eor(dst, lhs, rhs); 940} 941 942inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs, 943 Register rhs) { 944 __ mov(dst, rhs); 945} 946 947inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr, 948 Register offset_reg, uint32_t offset_imm, 949 LiftoffRegister value, LiftoffRegister result, 950 StoreType type, 951 void (*op)(LiftoffAssembler*, Register, Register, 952 Register)) { 953 LiftoffRegList pinned = {dst_addr, offset_reg, value, result}; 954 switch (type.value()) { 955 case StoreType::kI64Store8: 956 __ LoadConstant(result.high(), WasmValue(0)); 957 result = result.low(); 958 value = value.low(); 959 V8_FALLTHROUGH; 960 case StoreType::kI32Store8: 961 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, 962 pinned, &Assembler::ldrexb, &Assembler::strexb, op); 963 return; 964 case StoreType::kI64Store16: 965 __ LoadConstant(result.high(), WasmValue(0)); 966 result = result.low(); 967 value = value.low(); 968 V8_FALLTHROUGH; 969 case StoreType::kI32Store16: 970 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, 971 pinned, &Assembler::ldrexh, &Assembler::strexh, op); 972 return; 973 case StoreType::kI64Store32: 974 __ LoadConstant(result.high(), WasmValue(0)); 975 result = result.low(); 976 value = value.low(); 977 V8_FALLTHROUGH; 978 case StoreType::kI32Store: 979 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, 980 pinned, &Assembler::ldrex, &Assembler::strex, op); 981 return; 982 default: 983 UNREACHABLE(); 984 } 985} 986 987inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr, 988 Register offset_reg, uint32_t offset_imm, 989 LiftoffRegister value, 990 base::Optional<LiftoffRegister> result, 991 void (*op)(LiftoffAssembler*, LiftoffRegister, 992 LiftoffRegister, LiftoffRegister)) { 993 // strexd loads a 64 bit word into two registers. The first register needs 994 // to have an even index, e.g. r8, the second register needs to be the one 995 // with the next higher index, e.g. r9 if the first register is r8. In the 996 // following code we use the fixed register pair r8/r9 to make the code here 997 // simpler, even though other register pairs would also be possible. 998 constexpr Register dst_low = r8; 999 constexpr Register dst_high = r9; 1000 1001 // Make sure {dst_low} and {dst_high} are not occupied by any other value. 1002 Register value_low = value.low_gp(); 1003 Register value_high = value.high_gp(); 1004 LiftoffRegList pinned = {dst_addr, offset_reg, value_low, 1005 value_high, dst_low, dst_high}; 1006 __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high}, 1007 pinned); 1008 pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high}; 1009 __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high}, 1010 pinned); 1011 pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high}; 1012 1013 // Make sure that {result}, if it exists, also does not overlap with 1014 // {dst_low} and {dst_high}. We don't have to transfer the value stored in 1015 // {result}. 1016 Register result_low = no_reg; 1017 Register result_high = no_reg; 1018 if (result.has_value()) { 1019 result_low = result.value().low_gp(); 1020 if (pinned.has(result_low)) { 1021 result_low = __ GetUnusedRegister(kGpReg, pinned).gp(); 1022 } 1023 pinned.set(result_low); 1024 1025 result_high = result.value().high_gp(); 1026 if (pinned.has(result_high)) { 1027 result_high = __ GetUnusedRegister(kGpReg, pinned).gp(); 1028 } 1029 pinned.set(result_high); 1030 } 1031 1032 Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp(); 1033 1034 UseScratchRegisterScope temps(lasm); 1035 Register actual_addr = liftoff::CalculateActualAddress( 1036 lasm, &temps, dst_addr, offset_reg, offset_imm); 1037 1038 __ dmb(ISH); 1039 Label retry; 1040 __ bind(&retry); 1041 // {ldrexd} is needed here so that the {strexd} instruction below can 1042 // succeed. We don't need the value we are reading. We use {dst_low} and 1043 // {dst_high} as the destination registers because {ldrexd} has the same 1044 // restrictions on registers as {strexd}, see the comment above. 1045 __ ldrexd(dst_low, dst_high, actual_addr); 1046 if (result.has_value()) { 1047 __ mov(result_low, dst_low); 1048 __ mov(result_high, dst_high); 1049 } 1050 op(lasm, LiftoffRegister::ForPair(dst_low, dst_high), 1051 LiftoffRegister::ForPair(dst_low, dst_high), 1052 LiftoffRegister::ForPair(value_low, value_high)); 1053 __ strexd(store_result, dst_low, dst_high, actual_addr); 1054 __ cmp(store_result, Operand(0)); 1055 __ b(ne, &retry); 1056 __ dmb(ISH); 1057 1058 if (result.has_value()) { 1059 if (result_low != result.value().low_gp()) { 1060 __ mov(result.value().low_gp(), result_low); 1061 } 1062 if (result_high != result.value().high_gp()) { 1063 __ mov(result.value().high_gp(), result_high); 1064 } 1065 } 1066} 1067 1068inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst, 1069 LiftoffRegister, LiftoffRegister src) { 1070 __ mov(dst.low_gp(), src.low_gp()); 1071 __ mov(dst.high_gp(), src.high_gp()); 1072} 1073 1074#undef __ 1075} // namespace liftoff 1076 1077void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr, 1078 Register offset_reg, uint32_t offset_imm, 1079 LoadType type, LiftoffRegList pinned) { 1080 if (type.value() != LoadType::kI64Load) { 1081 Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true); 1082 dmb(ISH); 1083 return; 1084 } 1085 // ldrexd loads a 64 bit word into two registers. The first register needs to 1086 // have an even index, e.g. r8, the second register needs to be the one with 1087 // the next higher index, e.g. r9 if the first register is r8. In the 1088 // following code we use the fixed register pair r8/r9 to make the code here 1089 // simpler, even though other register pairs would also be possible. 1090 constexpr Register dst_low = r8; 1091 constexpr Register dst_high = r9; 1092 SpillRegisters(dst_low, dst_high); 1093 { 1094 UseScratchRegisterScope temps(this); 1095 Register actual_addr = liftoff::CalculateActualAddress( 1096 this, &temps, src_addr, offset_reg, offset_imm); 1097 ldrexd(dst_low, dst_high, actual_addr); 1098 dmb(ISH); 1099 } 1100 1101 ParallelRegisterMove( 1102 {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kI64}}); 1103} 1104 1105void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, 1106 uint32_t offset_imm, LiftoffRegister src, 1107 StoreType type, LiftoffRegList pinned) { 1108 if (type.value() == StoreType::kI64Store) { 1109 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {}, 1110 liftoff::I64Store); 1111 return; 1112 } 1113 1114 dmb(ISH); 1115 Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true); 1116 dmb(ISH); 1117 return; 1118} 1119 1120void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, 1121 uint32_t offset_imm, LiftoffRegister value, 1122 LiftoffRegister result, StoreType type) { 1123 if (type.value() == StoreType::kI64Store) { 1124 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1125 liftoff::I64Binop<&Assembler::add, &Assembler::adc>); 1126 return; 1127 } 1128 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1129 type, &liftoff::Add); 1130} 1131 1132void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, 1133 uint32_t offset_imm, LiftoffRegister value, 1134 LiftoffRegister result, StoreType type) { 1135 if (type.value() == StoreType::kI64Store) { 1136 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1137 liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>); 1138 return; 1139 } 1140 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1141 type, &liftoff::Sub); 1142} 1143 1144void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg, 1145 uint32_t offset_imm, LiftoffRegister value, 1146 LiftoffRegister result, StoreType type) { 1147 if (type.value() == StoreType::kI64Store) { 1148 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1149 liftoff::I64Binop<&Assembler::and_, &Assembler::and_>); 1150 return; 1151 } 1152 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1153 type, &liftoff::And); 1154} 1155 1156void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg, 1157 uint32_t offset_imm, LiftoffRegister value, 1158 LiftoffRegister result, StoreType type) { 1159 if (type.value() == StoreType::kI64Store) { 1160 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1161 liftoff::I64Binop<&Assembler::orr, &Assembler::orr>); 1162 return; 1163 } 1164 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1165 type, &liftoff::Or); 1166} 1167 1168void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg, 1169 uint32_t offset_imm, LiftoffRegister value, 1170 LiftoffRegister result, StoreType type) { 1171 if (type.value() == StoreType::kI64Store) { 1172 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1173 liftoff::I64Binop<&Assembler::eor, &Assembler::eor>); 1174 return; 1175 } 1176 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1177 type, &liftoff::Xor); 1178} 1179 1180void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, 1181 uint32_t offset_imm, 1182 LiftoffRegister value, 1183 LiftoffRegister result, StoreType type) { 1184 if (type.value() == StoreType::kI64Store) { 1185 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, 1186 liftoff::I64Store); 1187 return; 1188 } 1189 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, 1190 type, &liftoff::Exchange); 1191} 1192 1193namespace liftoff { 1194#define __ lasm-> 1195 1196inline void AtomicI64CompareExchange(LiftoffAssembler* lasm, 1197 Register dst_addr_reg, Register offset_reg, 1198 uint32_t offset_imm, 1199 LiftoffRegister expected, 1200 LiftoffRegister new_value, 1201 LiftoffRegister result) { 1202 // To implement I64AtomicCompareExchange, we nearly need all registers, with 1203 // some registers having special constraints, e.g. like for {new_value} and 1204 // {result} the low-word register has to have an even register code, and the 1205 // high-word has to be in the next higher register. To avoid complicated 1206 // register allocation code here, we just assign fixed registers to all 1207 // values here, and then move all values into the correct register. 1208 Register dst_addr = r0; 1209 Register offset = r1; 1210 Register result_low = r4; 1211 Register result_high = r5; 1212 Register new_value_low = r2; 1213 Register new_value_high = r3; 1214 Register store_result = r6; 1215 Register expected_low = r8; 1216 Register expected_high = r9; 1217 1218 // We spill all registers, so that we can re-assign them afterwards. 1219 __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low, 1220 new_value_high, store_result, expected_low, expected_high); 1221 1222 __ ParallelRegisterMove( 1223 {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value, 1224 kI64}, 1225 {LiftoffRegister::ForPair(expected_low, expected_high), expected, kI64}, 1226 {dst_addr, dst_addr_reg, kI32}, 1227 {offset, offset_reg != no_reg ? offset_reg : offset, kI32}}); 1228 1229 { 1230 UseScratchRegisterScope temps(lasm); 1231 Register temp = liftoff::CalculateActualAddress( 1232 lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset, 1233 offset_imm, dst_addr); 1234 // Make sure the actual address is stored in the right register. 1235 DCHECK_EQ(dst_addr, temp); 1236 USE(temp); 1237 } 1238 1239 Label retry; 1240 Label done; 1241 __ dmb(ISH); 1242 __ bind(&retry); 1243 __ ldrexd(result_low, result_high, dst_addr); 1244 __ cmp(result_low, expected_low); 1245 __ b(ne, &done); 1246 __ cmp(result_high, expected_high); 1247 __ b(ne, &done); 1248 __ strexd(store_result, new_value_low, new_value_high, dst_addr); 1249 __ cmp(store_result, Operand(0)); 1250 __ b(ne, &retry); 1251 __ dmb(ISH); 1252 __ bind(&done); 1253 1254 __ ParallelRegisterMove( 1255 {{result, LiftoffRegister::ForPair(result_low, result_high), kI64}}); 1256} 1257#undef __ 1258} // namespace liftoff 1259 1260void LiftoffAssembler::AtomicCompareExchange( 1261 Register dst_addr, Register offset_reg, uint32_t offset_imm, 1262 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, 1263 StoreType type) { 1264 if (type.value() == StoreType::kI64Store) { 1265 liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm, 1266 expected, new_value, result); 1267 return; 1268 } 1269 1270 // The other versions of CompareExchange can share code, but need special load 1271 // and store instructions. 1272 void (Assembler::*load)(Register, Register, Condition) = nullptr; 1273 void (Assembler::*store)(Register, Register, Register, Condition) = nullptr; 1274 1275 LiftoffRegList pinned = {dst_addr, offset_reg}; 1276 // We need to remember the high word of {result}, so we can set it to zero in 1277 // the end if necessary. 1278 Register result_high = no_reg; 1279 switch (type.value()) { 1280 case StoreType::kI64Store8: 1281 result_high = result.high_gp(); 1282 result = result.low(); 1283 new_value = new_value.low(); 1284 expected = expected.low(); 1285 V8_FALLTHROUGH; 1286 case StoreType::kI32Store8: 1287 load = &Assembler::ldrexb; 1288 store = &Assembler::strexb; 1289 // We have to clear the high bits of {expected}, as we can only do a 1290 // 32-bit comparison. If the {expected} register is used, we spill it 1291 // first. 1292 if (cache_state()->is_used(expected)) { 1293 SpillRegister(expected); 1294 } 1295 uxtb(expected.gp(), expected.gp()); 1296 break; 1297 case StoreType::kI64Store16: 1298 result_high = result.high_gp(); 1299 result = result.low(); 1300 new_value = new_value.low(); 1301 expected = expected.low(); 1302 V8_FALLTHROUGH; 1303 case StoreType::kI32Store16: 1304 load = &Assembler::ldrexh; 1305 store = &Assembler::strexh; 1306 // We have to clear the high bits of {expected}, as we can only do a 1307 // 32-bit comparison. If the {expected} register is used, we spill it 1308 // first. 1309 if (cache_state()->is_used(expected)) { 1310 SpillRegister(expected); 1311 } 1312 uxth(expected.gp(), expected.gp()); 1313 break; 1314 case StoreType::kI64Store32: 1315 result_high = result.high_gp(); 1316 result = result.low(); 1317 new_value = new_value.low(); 1318 expected = expected.low(); 1319 V8_FALLTHROUGH; 1320 case StoreType::kI32Store: 1321 load = &Assembler::ldrex; 1322 store = &Assembler::strex; 1323 break; 1324 default: 1325 UNREACHABLE(); 1326 } 1327 pinned.set(new_value); 1328 pinned.set(expected); 1329 1330 Register result_reg = result.gp(); 1331 if (pinned.has(result)) { 1332 result_reg = GetUnusedRegister(kGpReg, pinned).gp(); 1333 } 1334 pinned.set(LiftoffRegister(result)); 1335 Register store_result = GetUnusedRegister(kGpReg, pinned).gp(); 1336 1337 UseScratchRegisterScope temps(this); 1338 Register actual_addr = liftoff::CalculateActualAddress( 1339 this, &temps, dst_addr, offset_reg, offset_imm); 1340 1341 Label retry; 1342 Label done; 1343 dmb(ISH); 1344 bind(&retry); 1345 (this->*load)(result.gp(), actual_addr, al); 1346 cmp(result.gp(), expected.gp()); 1347 b(ne, &done); 1348 (this->*store)(store_result, new_value.gp(), actual_addr, al); 1349 cmp(store_result, Operand(0)); 1350 b(ne, &retry); 1351 dmb(ISH); 1352 bind(&done); 1353 1354 if (result.gp() != result_reg) { 1355 mov(result.gp(), result_reg); 1356 } 1357 if (result_high != no_reg) { 1358 LoadConstant(LiftoffRegister(result_high), WasmValue(0)); 1359 } 1360} 1361 1362void LiftoffAssembler::AtomicFence() { dmb(ISH); } 1363 1364void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst, 1365 uint32_t caller_slot_idx, 1366 ValueKind kind) { 1367 MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize); 1368 liftoff::Load(this, dst, src, kind); 1369} 1370 1371void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, 1372 uint32_t caller_slot_idx, 1373 ValueKind kind) { 1374 MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize); 1375 liftoff::Store(this, src, dst, kind); 1376} 1377 1378void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset, 1379 ValueKind kind) { 1380 MemOperand src(sp, offset); 1381 liftoff::Load(this, dst, src, kind); 1382} 1383 1384void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, 1385 ValueKind kind) { 1386 DCHECK_NE(dst_offset, src_offset); 1387 LiftoffRegister reg = GetUnusedRegister(reg_class_for(kind), {}); 1388 Fill(reg, src_offset, kind); 1389 Spill(dst_offset, reg, kind); 1390} 1391 1392void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) { 1393 DCHECK_NE(dst, src); 1394 DCHECK(kind == kI32 || is_reference(kind)); 1395 TurboAssembler::Move(dst, src); 1396} 1397 1398void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src, 1399 ValueKind kind) { 1400 DCHECK_NE(dst, src); 1401 if (kind == kF32) { 1402 vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); 1403 } else if (kind == kF64) { 1404 vmov(dst, src); 1405 } else { 1406 DCHECK_EQ(kS128, kind); 1407 vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 1408 } 1409} 1410 1411void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) { 1412 // The {str} instruction needs a temp register when the immediate in the 1413 // provided MemOperand does not fit into 12 bits. This happens for large stack 1414 // frames. This DCHECK checks that the temp register is available when needed. 1415 DCHECK(UseScratchRegisterScope{this}.CanAcquire()); 1416 DCHECK_LT(0, offset); 1417 RecordUsedSpillOffset(offset); 1418 MemOperand dst(fp, -offset); 1419 liftoff::Store(this, reg, dst, kind); 1420} 1421 1422void LiftoffAssembler::Spill(int offset, WasmValue value) { 1423 RecordUsedSpillOffset(offset); 1424 MemOperand dst = liftoff::GetStackSlot(offset); 1425 UseScratchRegisterScope temps(this); 1426 Register src = no_reg; 1427 // The scratch register will be required by str if multiple instructions 1428 // are required to encode the offset, and so we cannot use it in that case. 1429 if (!ImmediateFitsAddrMode2Instruction(dst.offset())) { 1430 src = GetUnusedRegister(kGpReg, {}).gp(); 1431 } else { 1432 src = temps.Acquire(); 1433 } 1434 switch (value.type().kind()) { 1435 case kI32: 1436 mov(src, Operand(value.to_i32())); 1437 str(src, dst); 1438 break; 1439 case kI64: { 1440 int32_t low_word = value.to_i64(); 1441 mov(src, Operand(low_word)); 1442 str(src, liftoff::GetHalfStackSlot(offset, kLowWord)); 1443 int32_t high_word = value.to_i64() >> 32; 1444 mov(src, Operand(high_word)); 1445 str(src, liftoff::GetHalfStackSlot(offset, kHighWord)); 1446 break; 1447 } 1448 default: 1449 // We do not track f32 and f64 constants, hence they are unreachable. 1450 UNREACHABLE(); 1451 } 1452} 1453 1454void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) { 1455 liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind); 1456} 1457 1458void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) { 1459 ldr(reg, liftoff::GetHalfStackSlot(offset, half)); 1460} 1461 1462void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) { 1463 DCHECK_LT(0, size); 1464 DCHECK_EQ(0, size % 4); 1465 RecordUsedSpillOffset(start + size); 1466 1467 // We need a zero reg. Always use r0 for that, and push it before to restore 1468 // its value afterwards. 1469 push(r0); 1470 mov(r0, Operand(0)); 1471 1472 if (size <= 36) { 1473 // Special straight-line code for up to 9 words. Generates one 1474 // instruction per word. 1475 for (int offset = 4; offset <= size; offset += 4) { 1476 str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord)); 1477 } 1478 } else { 1479 // General case for bigger counts (9 instructions). 1480 // Use r1 for start address (inclusive), r2 for end address (exclusive). 1481 push(r1); 1482 push(r2); 1483 sub(r1, fp, Operand(start + size)); 1484 sub(r2, fp, Operand(start)); 1485 1486 Label loop; 1487 bind(&loop); 1488 str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex)); 1489 cmp(r1, r2); 1490 b(&loop, ne); 1491 1492 pop(r2); 1493 pop(r1); 1494 } 1495 1496 pop(r0); 1497} 1498 1499#define I32_BINOP(name, instruction) \ 1500 void LiftoffAssembler::emit_##name(Register dst, Register lhs, \ 1501 Register rhs) { \ 1502 instruction(dst, lhs, rhs); \ 1503 } 1504#define I32_BINOP_I(name, instruction) \ 1505 I32_BINOP(name, instruction) \ 1506 void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \ 1507 int32_t imm) { \ 1508 instruction(dst, lhs, Operand(imm)); \ 1509 } 1510#define I32_SHIFTOP(name, instruction) \ 1511 void LiftoffAssembler::emit_##name(Register dst, Register src, \ 1512 Register amount) { \ 1513 UseScratchRegisterScope temps(this); \ 1514 Register scratch = temps.Acquire(); \ 1515 and_(scratch, amount, Operand(0x1f)); \ 1516 instruction(dst, src, Operand(scratch)); \ 1517 } \ 1518 void LiftoffAssembler::emit_##name##i(Register dst, Register src, \ 1519 int32_t amount) { \ 1520 if (V8_LIKELY((amount & 31) != 0)) { \ 1521 instruction(dst, src, Operand(amount & 31)); \ 1522 } else if (dst != src) { \ 1523 mov(dst, src); \ 1524 } \ 1525 } 1526#define FP32_UNOP(name, instruction) \ 1527 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ 1528 instruction(liftoff::GetFloatRegister(dst), \ 1529 liftoff::GetFloatRegister(src)); \ 1530 } 1531#define FP32_BINOP(name, instruction) \ 1532 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ 1533 DoubleRegister rhs) { \ 1534 instruction(liftoff::GetFloatRegister(dst), \ 1535 liftoff::GetFloatRegister(lhs), \ 1536 liftoff::GetFloatRegister(rhs)); \ 1537 } 1538#define FP64_UNOP(name, instruction) \ 1539 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ 1540 instruction(dst, src); \ 1541 } 1542#define FP64_BINOP(name, instruction) \ 1543 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ 1544 DoubleRegister rhs) { \ 1545 instruction(dst, lhs, rhs); \ 1546 } 1547 1548I32_BINOP_I(i32_add, add) 1549I32_BINOP_I(i32_sub, sub) 1550I32_BINOP(i32_mul, mul) 1551I32_BINOP_I(i32_and, and_) 1552I32_BINOP_I(i32_or, orr) 1553I32_BINOP_I(i32_xor, eor) 1554I32_SHIFTOP(i32_shl, lsl) 1555I32_SHIFTOP(i32_sar, asr) 1556I32_SHIFTOP(i32_shr, lsr) 1557FP32_BINOP(f32_add, vadd) 1558FP32_BINOP(f32_sub, vsub) 1559FP32_BINOP(f32_mul, vmul) 1560FP32_BINOP(f32_div, vdiv) 1561FP32_UNOP(f32_abs, vabs) 1562FP32_UNOP(f32_neg, vneg) 1563FP32_UNOP(f32_sqrt, vsqrt) 1564FP64_BINOP(f64_add, vadd) 1565FP64_BINOP(f64_sub, vsub) 1566FP64_BINOP(f64_mul, vmul) 1567FP64_BINOP(f64_div, vdiv) 1568FP64_UNOP(f64_abs, vabs) 1569FP64_UNOP(f64_neg, vneg) 1570FP64_UNOP(f64_sqrt, vsqrt) 1571 1572#undef I32_BINOP 1573#undef I32_SHIFTOP 1574#undef FP32_UNOP 1575#undef FP32_BINOP 1576#undef FP64_UNOP 1577#undef FP64_BINOP 1578 1579void LiftoffAssembler::emit_i32_clz(Register dst, Register src) { 1580 clz(dst, src); 1581} 1582 1583void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) { 1584 rbit(dst, src); 1585 clz(dst, dst); 1586} 1587 1588namespace liftoff { 1589inline void GeneratePopCnt(Assembler* assm, Register dst, Register src, 1590 Register scratch1, Register scratch2) { 1591 DCHECK(!AreAliased(dst, scratch1, scratch2)); 1592 if (src == scratch1) std::swap(scratch1, scratch2); 1593 // x = x - ((x & (0x55555555 << 1)) >> 1) 1594 assm->and_(scratch1, src, Operand(0xaaaaaaaa)); 1595 assm->sub(dst, src, Operand(scratch1, LSR, 1)); 1596 // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2) 1597 assm->mov(scratch1, Operand(0x33333333)); 1598 assm->and_(scratch2, dst, Operand(scratch1, LSL, 2)); 1599 assm->and_(scratch1, dst, scratch1); 1600 assm->add(dst, scratch1, Operand(scratch2, LSR, 2)); 1601 // x = (x + (x >> 4)) & 0x0F0F0F0F 1602 assm->add(dst, dst, Operand(dst, LSR, 4)); 1603 assm->and_(dst, dst, Operand(0x0f0f0f0f)); 1604 // x = x + (x >> 8) 1605 assm->add(dst, dst, Operand(dst, LSR, 8)); 1606 // x = x + (x >> 16) 1607 assm->add(dst, dst, Operand(dst, LSR, 16)); 1608 // x = x & 0x3F 1609 assm->and_(dst, dst, Operand(0x3f)); 1610} 1611} // namespace liftoff 1612 1613bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) { 1614 LiftoffRegList pinned = {dst}; 1615 Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); 1616 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); 1617 liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2); 1618 return true; 1619} 1620 1621void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs, 1622 Label* trap_div_by_zero, 1623 Label* trap_div_unrepresentable) { 1624 if (!CpuFeatures::IsSupported(SUDIV)) { 1625 bailout(kMissingCPUFeature, "i32_divs"); 1626 return; 1627 } 1628 CpuFeatureScope scope(this, SUDIV); 1629 // Issue division early so we can perform the trapping checks whilst it 1630 // completes. 1631 bool speculative_sdiv = dst != lhs && dst != rhs; 1632 if (speculative_sdiv) { 1633 sdiv(dst, lhs, rhs); 1634 } 1635 Label noTrap; 1636 // Check for division by zero. 1637 cmp(rhs, Operand(0)); 1638 b(trap_div_by_zero, eq); 1639 // Check for kMinInt / -1. This is unrepresentable. 1640 cmp(rhs, Operand(-1)); 1641 b(&noTrap, ne); 1642 cmp(lhs, Operand(kMinInt)); 1643 b(trap_div_unrepresentable, eq); 1644 bind(&noTrap); 1645 if (!speculative_sdiv) { 1646 sdiv(dst, lhs, rhs); 1647 } 1648} 1649 1650void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs, 1651 Label* trap_div_by_zero) { 1652 if (!CpuFeatures::IsSupported(SUDIV)) { 1653 bailout(kMissingCPUFeature, "i32_divu"); 1654 return; 1655 } 1656 CpuFeatureScope scope(this, SUDIV); 1657 // Check for division by zero. 1658 cmp(rhs, Operand(0)); 1659 b(trap_div_by_zero, eq); 1660 udiv(dst, lhs, rhs); 1661} 1662 1663void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs, 1664 Label* trap_div_by_zero) { 1665 if (!CpuFeatures::IsSupported(SUDIV)) { 1666 // When this case is handled, a check for ARMv7 is required to use mls. 1667 // Mls support is implied with SUDIV support. 1668 bailout(kMissingCPUFeature, "i32_rems"); 1669 return; 1670 } 1671 CpuFeatureScope scope(this, SUDIV); 1672 // No need to check kMinInt / -1 because the result is kMinInt and then 1673 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. 1674 UseScratchRegisterScope temps(this); 1675 Register scratch = temps.Acquire(); 1676 sdiv(scratch, lhs, rhs); 1677 // Check for division by zero. 1678 cmp(rhs, Operand(0)); 1679 b(trap_div_by_zero, eq); 1680 // Compute remainder. 1681 mls(dst, scratch, rhs, lhs); 1682} 1683 1684void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs, 1685 Label* trap_div_by_zero) { 1686 if (!CpuFeatures::IsSupported(SUDIV)) { 1687 // When this case is handled, a check for ARMv7 is required to use mls. 1688 // Mls support is implied with SUDIV support. 1689 bailout(kMissingCPUFeature, "i32_remu"); 1690 return; 1691 } 1692 CpuFeatureScope scope(this, SUDIV); 1693 // No need to check kMinInt / -1 because the result is kMinInt and then 1694 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. 1695 UseScratchRegisterScope temps(this); 1696 Register scratch = temps.Acquire(); 1697 udiv(scratch, lhs, rhs); 1698 // Check for division by zero. 1699 cmp(rhs, Operand(0)); 1700 b(trap_div_by_zero, eq); 1701 // Compute remainder. 1702 mls(dst, scratch, rhs, lhs); 1703} 1704 1705void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs, 1706 LiftoffRegister rhs) { 1707 liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs); 1708} 1709 1710void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs, 1711 int64_t imm) { 1712 liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm); 1713} 1714 1715void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs, 1716 LiftoffRegister rhs) { 1717 liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs); 1718} 1719 1720void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs, 1721 LiftoffRegister rhs) { 1722 // Idea: 1723 // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ] 1724 // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32) 1725 // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32) 1726 // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0) 1727 UseScratchRegisterScope temps(this); 1728 Register scratch = temps.Acquire(); 1729 // scratch = lhs_hi * rhs_lo 1730 mul(scratch, lhs.high_gp(), rhs.low_gp()); 1731 // scratch += lhs_lo * rhs_hi 1732 mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch); 1733 // TODO(arm): use umlal once implemented correctly in the simulator. 1734 // [dst_hi|dst_lo] = lhs_lo * rhs_lo 1735 umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp()); 1736 // dst_hi += scratch 1737 add(dst.high_gp(), dst.high_gp(), scratch); 1738} 1739 1740bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, 1741 LiftoffRegister rhs, 1742 Label* trap_div_by_zero, 1743 Label* trap_div_unrepresentable) { 1744 return false; 1745} 1746 1747bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, 1748 LiftoffRegister rhs, 1749 Label* trap_div_by_zero) { 1750 return false; 1751} 1752 1753bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, 1754 LiftoffRegister rhs, 1755 Label* trap_div_by_zero) { 1756 return false; 1757} 1758 1759bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, 1760 LiftoffRegister rhs, 1761 Label* trap_div_by_zero) { 1762 return false; 1763} 1764 1765void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, 1766 Register amount) { 1767 liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount); 1768} 1769 1770void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src, 1771 int32_t amount) { 1772 UseScratchRegisterScope temps(this); 1773 // {src.low_gp()} will still be needed after writing {dst.high_gp()}. 1774 Register src_low = 1775 liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps); 1776 1777 LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63); 1778} 1779 1780void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, 1781 Register amount) { 1782 liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount); 1783} 1784 1785void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src, 1786 int32_t amount) { 1787 UseScratchRegisterScope temps(this); 1788 // {src.high_gp()} will still be needed after writing {dst.low_gp()}. 1789 Register src_high = 1790 liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); 1791 1792 AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); 1793} 1794 1795void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, 1796 Register amount) { 1797 liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount); 1798} 1799 1800void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src, 1801 int32_t amount) { 1802 UseScratchRegisterScope temps(this); 1803 // {src.high_gp()} will still be needed after writing {dst.low_gp()}. 1804 Register src_high = 1805 liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); 1806 1807 LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); 1808} 1809 1810void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { 1811 // return high == 0 ? 32 + CLZ32(low) : CLZ32(high); 1812 Label done; 1813 Label high_is_zero; 1814 cmp(src.high_gp(), Operand(0)); 1815 b(&high_is_zero, eq); 1816 1817 clz(dst.low_gp(), src.high_gp()); 1818 jmp(&done); 1819 1820 bind(&high_is_zero); 1821 clz(dst.low_gp(), src.low_gp()); 1822 add(dst.low_gp(), dst.low_gp(), Operand(32)); 1823 1824 bind(&done); 1825 mov(dst.high_gp(), Operand(0)); // High word of result is always 0. 1826} 1827 1828void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) { 1829 // return low == 0 ? 32 + CTZ32(high) : CTZ32(low); 1830 // CTZ32(x) = CLZ(RBIT(x)) 1831 Label done; 1832 Label low_is_zero; 1833 cmp(src.low_gp(), Operand(0)); 1834 b(&low_is_zero, eq); 1835 1836 rbit(dst.low_gp(), src.low_gp()); 1837 clz(dst.low_gp(), dst.low_gp()); 1838 jmp(&done); 1839 1840 bind(&low_is_zero); 1841 rbit(dst.low_gp(), src.high_gp()); 1842 clz(dst.low_gp(), dst.low_gp()); 1843 add(dst.low_gp(), dst.low_gp(), Operand(32)); 1844 1845 bind(&done); 1846 mov(dst.high_gp(), Operand(0)); // High word of result is always 0. 1847} 1848 1849bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst, 1850 LiftoffRegister src) { 1851 // Produce partial popcnts in the two dst registers, making sure not to 1852 // overwrite the second src register before using it. 1853 Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp(); 1854 Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp(); 1855 LiftoffRegList pinned = {dst, src2}; 1856 Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); 1857 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); 1858 liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2); 1859 liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2); 1860 // Now add the two into the lower dst reg and clear the higher dst reg. 1861 add(dst.low_gp(), dst.low_gp(), dst.high_gp()); 1862 mov(dst.high_gp(), Operand(0)); 1863 return true; 1864} 1865 1866void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) { 1867 UseScratchRegisterScope temps(this); 1868 Register scratch = temps.Acquire(); 1869 ldr(scratch, MemOperand(dst.gp(), offset)); 1870 add(scratch, scratch, Operand(Smi::FromInt(1))); 1871 str(scratch, MemOperand(dst.gp(), offset)); 1872} 1873 1874bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) { 1875 if (CpuFeatures::IsSupported(ARMv8)) { 1876 CpuFeatureScope scope(this, ARMv8); 1877 vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); 1878 return true; 1879 } 1880 return false; 1881} 1882 1883bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) { 1884 if (CpuFeatures::IsSupported(ARMv8)) { 1885 CpuFeatureScope scope(this, ARMv8); 1886 vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); 1887 return true; 1888 } 1889 return false; 1890} 1891 1892bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) { 1893 if (CpuFeatures::IsSupported(ARMv8)) { 1894 CpuFeatureScope scope(this, ARMv8); 1895 vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); 1896 return true; 1897 } 1898 return false; 1899} 1900 1901bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst, 1902 DoubleRegister src) { 1903 if (CpuFeatures::IsSupported(ARMv8)) { 1904 CpuFeatureScope scope(this, ARMv8); 1905 vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); 1906 return true; 1907 } 1908 return false; 1909} 1910 1911void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs, 1912 DoubleRegister rhs) { 1913 liftoff::EmitFloatMinOrMax( 1914 this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), 1915 liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin); 1916} 1917 1918void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, 1919 DoubleRegister rhs) { 1920 liftoff::EmitFloatMinOrMax( 1921 this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), 1922 liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax); 1923} 1924 1925bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) { 1926 if (CpuFeatures::IsSupported(ARMv8)) { 1927 CpuFeatureScope scope(this, ARMv8); 1928 vrintp(dst, src); 1929 return true; 1930 } 1931 return false; 1932} 1933 1934bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) { 1935 if (CpuFeatures::IsSupported(ARMv8)) { 1936 CpuFeatureScope scope(this, ARMv8); 1937 vrintm(dst, src); 1938 return true; 1939 } 1940 return false; 1941} 1942 1943bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) { 1944 if (CpuFeatures::IsSupported(ARMv8)) { 1945 CpuFeatureScope scope(this, ARMv8); 1946 vrintz(dst, src); 1947 return true; 1948 } 1949 return false; 1950} 1951 1952bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst, 1953 DoubleRegister src) { 1954 if (CpuFeatures::IsSupported(ARMv8)) { 1955 CpuFeatureScope scope(this, ARMv8); 1956 vrintn(dst, src); 1957 return true; 1958 } 1959 return false; 1960} 1961 1962void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs, 1963 DoubleRegister rhs) { 1964 liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin); 1965} 1966 1967void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs, 1968 DoubleRegister rhs) { 1969 liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax); 1970} 1971 1972void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, 1973 DoubleRegister rhs) { 1974 constexpr uint32_t kF32SignBit = uint32_t{1} << 31; 1975 UseScratchRegisterScope temps(this); 1976 Register scratch = GetUnusedRegister(kGpReg, {}).gp(); 1977 Register scratch2 = temps.Acquire(); 1978 VmovLow(scratch, lhs); 1979 // Clear sign bit in {scratch}. 1980 bic(scratch, scratch, Operand(kF32SignBit)); 1981 VmovLow(scratch2, rhs); 1982 // Isolate sign bit in {scratch2}. 1983 and_(scratch2, scratch2, Operand(kF32SignBit)); 1984 // Combine {scratch2} into {scratch}. 1985 orr(scratch, scratch, scratch2); 1986 VmovLow(dst, scratch); 1987} 1988 1989void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, 1990 DoubleRegister rhs) { 1991 constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31; 1992 // On arm, we cannot hold the whole f64 value in a gp register, so we just 1993 // operate on the upper half (UH). 1994 UseScratchRegisterScope temps(this); 1995 Register scratch = GetUnusedRegister(kGpReg, {}).gp(); 1996 Register scratch2 = temps.Acquire(); 1997 VmovHigh(scratch, lhs); 1998 // Clear sign bit in {scratch}. 1999 bic(scratch, scratch, Operand(kF64SignBitHighWord)); 2000 VmovHigh(scratch2, rhs); 2001 // Isolate sign bit in {scratch2}. 2002 and_(scratch2, scratch2, Operand(kF64SignBitHighWord)); 2003 // Combine {scratch2} into {scratch}. 2004 orr(scratch, scratch, scratch2); 2005 vmov(dst, lhs); 2006 VmovHigh(dst, scratch); 2007} 2008 2009bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, 2010 LiftoffRegister dst, 2011 LiftoffRegister src, Label* trap) { 2012 switch (opcode) { 2013 case kExprI32ConvertI64: 2014 TurboAssembler::Move(dst.gp(), src.low_gp()); 2015 return true; 2016 case kExprI32SConvertF32: { 2017 UseScratchRegisterScope temps(this); 2018 SwVfpRegister scratch_f = temps.AcquireS(); 2019 vcvt_s32_f32( 2020 scratch_f, 2021 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. 2022 vmov(dst.gp(), scratch_f); 2023 // Check underflow and NaN. 2024 vmov(scratch_f, Float32(static_cast<float>(INT32_MIN))); 2025 VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); 2026 b(trap, lt); 2027 // Check overflow. 2028 cmp(dst.gp(), Operand(-1)); 2029 b(trap, vs); 2030 return true; 2031 } 2032 case kExprI32UConvertF32: { 2033 UseScratchRegisterScope temps(this); 2034 SwVfpRegister scratch_f = temps.AcquireS(); 2035 vcvt_u32_f32( 2036 scratch_f, 2037 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. 2038 vmov(dst.gp(), scratch_f); 2039 // Check underflow and NaN. 2040 vmov(scratch_f, Float32(-1.0f)); 2041 VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); 2042 b(trap, le); 2043 // Check overflow. 2044 cmp(dst.gp(), Operand(-1)); 2045 b(trap, eq); 2046 return true; 2047 } 2048 case kExprI32SConvertF64: { 2049 UseScratchRegisterScope temps(this); 2050 SwVfpRegister scratch_f = temps.AcquireS(); 2051 vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. 2052 vmov(dst.gp(), scratch_f); 2053 // Check underflow and NaN. 2054 DwVfpRegister scratch_d = temps.AcquireD(); 2055 vmov(scratch_d, base::Double(static_cast<double>(INT32_MIN - 1.0))); 2056 VFPCompareAndSetFlags(src.fp(), scratch_d); 2057 b(trap, le); 2058 // Check overflow. 2059 vmov(scratch_d, base::Double(static_cast<double>(INT32_MAX + 1.0))); 2060 VFPCompareAndSetFlags(src.fp(), scratch_d); 2061 b(trap, ge); 2062 return true; 2063 } 2064 case kExprI32UConvertF64: { 2065 UseScratchRegisterScope temps(this); 2066 SwVfpRegister scratch_f = temps.AcquireS(); 2067 vcvt_u32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. 2068 vmov(dst.gp(), scratch_f); 2069 // Check underflow and NaN. 2070 DwVfpRegister scratch_d = temps.AcquireD(); 2071 vmov(scratch_d, base::Double(static_cast<double>(-1.0))); 2072 VFPCompareAndSetFlags(src.fp(), scratch_d); 2073 b(trap, le); 2074 // Check overflow. 2075 vmov(scratch_d, base::Double(static_cast<double>(UINT32_MAX + 1.0))); 2076 VFPCompareAndSetFlags(src.fp(), scratch_d); 2077 b(trap, ge); 2078 return true; 2079 } 2080 case kExprI32SConvertSatF32: { 2081 UseScratchRegisterScope temps(this); 2082 SwVfpRegister scratch_f = temps.AcquireS(); 2083 vcvt_s32_f32( 2084 scratch_f, 2085 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. 2086 vmov(dst.gp(), scratch_f); 2087 return true; 2088 } 2089 case kExprI32UConvertSatF32: { 2090 UseScratchRegisterScope temps(this); 2091 SwVfpRegister scratch_f = temps.AcquireS(); 2092 vcvt_u32_f32( 2093 scratch_f, 2094 liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero. 2095 vmov(dst.gp(), scratch_f); 2096 return true; 2097 } 2098 case kExprI32SConvertSatF64: { 2099 UseScratchRegisterScope temps(this); 2100 SwVfpRegister scratch_f = temps.AcquireS(); 2101 vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. 2102 vmov(dst.gp(), scratch_f); 2103 return true; 2104 } 2105 case kExprI32UConvertSatF64: { 2106 UseScratchRegisterScope temps(this); 2107 SwVfpRegister scratch_f = temps.AcquireS(); 2108 vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero. 2109 vmov(dst.gp(), scratch_f); 2110 return true; 2111 } 2112 case kExprI32ReinterpretF32: 2113 vmov(dst.gp(), liftoff::GetFloatRegister(src.fp())); 2114 return true; 2115 case kExprI64SConvertI32: 2116 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); 2117 mov(dst.high_gp(), Operand(src.gp(), ASR, 31)); 2118 return true; 2119 case kExprI64UConvertI32: 2120 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); 2121 mov(dst.high_gp(), Operand(0)); 2122 return true; 2123 case kExprI64ReinterpretF64: 2124 vmov(dst.low_gp(), dst.high_gp(), src.fp()); 2125 return true; 2126 case kExprF32SConvertI32: { 2127 SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); 2128 vmov(dst_float, src.gp()); 2129 vcvt_f32_s32(dst_float, dst_float); 2130 return true; 2131 } 2132 case kExprF32UConvertI32: { 2133 SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); 2134 vmov(dst_float, src.gp()); 2135 vcvt_f32_u32(dst_float, dst_float); 2136 return true; 2137 } 2138 case kExprF32ConvertF64: 2139 vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp()); 2140 return true; 2141 case kExprF32ReinterpretI32: 2142 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); 2143 return true; 2144 case kExprF64SConvertI32: { 2145 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); 2146 vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); 2147 return true; 2148 } 2149 case kExprF64UConvertI32: { 2150 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); 2151 vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); 2152 return true; 2153 } 2154 case kExprF64ConvertF32: 2155 vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp())); 2156 return true; 2157 case kExprF64ReinterpretI64: 2158 vmov(dst.fp(), src.low_gp(), src.high_gp()); 2159 return true; 2160 case kExprF64SConvertI64: 2161 case kExprF64UConvertI64: 2162 case kExprI64SConvertF32: 2163 case kExprI64UConvertF32: 2164 case kExprI64SConvertSatF32: 2165 case kExprI64UConvertSatF32: 2166 case kExprF32SConvertI64: 2167 case kExprF32UConvertI64: 2168 case kExprI64SConvertF64: 2169 case kExprI64UConvertF64: 2170 case kExprI64SConvertSatF64: 2171 case kExprI64UConvertSatF64: 2172 // These cases can be handled by the C fallback function. 2173 return false; 2174 default: 2175 UNREACHABLE(); 2176 } 2177} 2178 2179void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) { 2180 sxtb(dst, src); 2181} 2182 2183void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) { 2184 sxth(dst, src); 2185} 2186 2187void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst, 2188 LiftoffRegister src) { 2189 emit_i32_signextend_i8(dst.low_gp(), src.low_gp()); 2190 mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); 2191} 2192 2193void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst, 2194 LiftoffRegister src) { 2195 emit_i32_signextend_i16(dst.low_gp(), src.low_gp()); 2196 mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); 2197} 2198 2199void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst, 2200 LiftoffRegister src) { 2201 TurboAssembler::Move(dst.low_gp(), src.low_gp()); 2202 mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31)); 2203} 2204 2205void LiftoffAssembler::emit_jump(Label* label) { b(label); } 2206 2207void LiftoffAssembler::emit_jump(Register target) { bx(target); } 2208 2209void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond, 2210 Label* label, ValueKind kind, 2211 Register lhs, Register rhs) { 2212 Condition cond = liftoff::ToCondition(liftoff_cond); 2213 2214 if (rhs == no_reg) { 2215 DCHECK_EQ(kind, kI32); 2216 cmp(lhs, Operand(0)); 2217 } else { 2218 DCHECK(kind == kI32 || (is_reference(kind) && (liftoff_cond == kEqual || 2219 liftoff_cond == kUnequal))); 2220 cmp(lhs, rhs); 2221 } 2222 b(label, cond); 2223} 2224 2225void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, 2226 Label* label, Register lhs, 2227 int32_t imm) { 2228 Condition cond = liftoff::ToCondition(liftoff_cond); 2229 cmp(lhs, Operand(imm)); 2230 b(label, cond); 2231} 2232 2233void LiftoffAssembler::emit_i32_subi_jump_negative(Register value, 2234 int subtrahend, 2235 Label* result_negative) { 2236 sub(value, value, Operand(subtrahend), SetCC); 2237 b(result_negative, mi); 2238} 2239 2240void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { 2241 clz(dst, src); 2242 mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2)); 2243} 2244 2245void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond, 2246 Register dst, Register lhs, 2247 Register rhs) { 2248 Condition cond = liftoff::ToCondition(liftoff_cond); 2249 cmp(lhs, rhs); 2250 mov(dst, Operand(0), LeaveCC); 2251 mov(dst, Operand(1), LeaveCC, cond); 2252} 2253 2254void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) { 2255 orr(dst, src.low_gp(), src.high_gp()); 2256 clz(dst, dst); 2257 mov(dst, Operand(dst, LSR, 5)); 2258} 2259 2260void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond, 2261 Register dst, LiftoffRegister lhs, 2262 LiftoffRegister rhs) { 2263 // For signed i64 comparisons, we still need to use unsigned comparison for 2264 // the low word (the only bit carrying signedness information is the MSB in 2265 // the high word). 2266 Condition cond = liftoff::ToCondition(liftoff_cond); 2267 Condition unsigned_cond = 2268 liftoff::ToCondition(liftoff::MakeUnsigned(liftoff_cond)); 2269 Label set_cond; 2270 Label cont; 2271 LiftoffRegister dest = LiftoffRegister(dst); 2272 bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs); 2273 if (speculative_move) { 2274 mov(dst, Operand(0)); 2275 } 2276 // Compare high word first. If it differs, use it for the set_cond. If it's 2277 // equal, compare the low word and use that for set_cond. 2278 cmp(lhs.high_gp(), rhs.high_gp()); 2279 if (unsigned_cond == cond) { 2280 cmp(lhs.low_gp(), rhs.low_gp(), eq); 2281 if (!speculative_move) { 2282 mov(dst, Operand(0)); 2283 } 2284 mov(dst, Operand(1), LeaveCC, cond); 2285 } else { 2286 // If the condition predicate for the low differs from that for the high 2287 // word, the conditional move instructions must be separated. 2288 b(ne, &set_cond); 2289 cmp(lhs.low_gp(), rhs.low_gp()); 2290 if (!speculative_move) { 2291 mov(dst, Operand(0)); 2292 } 2293 mov(dst, Operand(1), LeaveCC, unsigned_cond); 2294 b(&cont); 2295 bind(&set_cond); 2296 if (!speculative_move) { 2297 mov(dst, Operand(0)); 2298 } 2299 mov(dst, Operand(1), LeaveCC, cond); 2300 bind(&cont); 2301 } 2302} 2303 2304void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond, 2305 Register dst, DoubleRegister lhs, 2306 DoubleRegister rhs) { 2307 Condition cond = liftoff::ToCondition(liftoff_cond); 2308 VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs), 2309 liftoff::GetFloatRegister(rhs)); 2310 mov(dst, Operand(0), LeaveCC); 2311 mov(dst, Operand(1), LeaveCC, cond); 2312 if (cond != ne) { 2313 // If V flag set, at least one of the arguments was a Nan -> false. 2314 mov(dst, Operand(0), LeaveCC, vs); 2315 } 2316} 2317 2318void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond, 2319 Register dst, DoubleRegister lhs, 2320 DoubleRegister rhs) { 2321 Condition cond = liftoff::ToCondition(liftoff_cond); 2322 VFPCompareAndSetFlags(lhs, rhs); 2323 mov(dst, Operand(0), LeaveCC); 2324 mov(dst, Operand(1), LeaveCC, cond); 2325 if (cond != ne) { 2326 // If V flag set, at least one of the arguments was a Nan -> false. 2327 mov(dst, Operand(0), LeaveCC, vs); 2328 } 2329} 2330 2331bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition, 2332 LiftoffRegister true_value, 2333 LiftoffRegister false_value, 2334 ValueKind kind) { 2335 return false; 2336} 2337 2338void LiftoffAssembler::emit_smi_check(Register obj, Label* target, 2339 SmiCheckMode mode) { 2340 tst(obj, Operand(kSmiTagMask)); 2341 Condition condition = mode == kJumpOnSmi ? eq : ne; 2342 b(condition, target); 2343} 2344 2345void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, 2346 Register offset_reg, uintptr_t offset_imm, 2347 LoadType type, 2348 LoadTransformationKind transform, 2349 uint32_t* protected_load_pc) { 2350 UseScratchRegisterScope temps(this); 2351 Register actual_src_addr = liftoff::CalculateActualAddress( 2352 this, &temps, src_addr, offset_reg, offset_imm); 2353 *protected_load_pc = pc_offset(); 2354 MachineType memtype = type.mem_type(); 2355 2356 if (transform == LoadTransformationKind::kExtend) { 2357 if (memtype == MachineType::Int8()) { 2358 vld1(Neon8, NeonListOperand(dst.low_fp()), 2359 NeonMemOperand(actual_src_addr)); 2360 vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp()); 2361 } else if (memtype == MachineType::Uint8()) { 2362 vld1(Neon8, NeonListOperand(dst.low_fp()), 2363 NeonMemOperand(actual_src_addr)); 2364 vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp()); 2365 } else if (memtype == MachineType::Int16()) { 2366 vld1(Neon16, NeonListOperand(dst.low_fp()), 2367 NeonMemOperand(actual_src_addr)); 2368 vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp()); 2369 } else if (memtype == MachineType::Uint16()) { 2370 vld1(Neon16, NeonListOperand(dst.low_fp()), 2371 NeonMemOperand(actual_src_addr)); 2372 vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp()); 2373 } else if (memtype == MachineType::Int32()) { 2374 vld1(Neon32, NeonListOperand(dst.low_fp()), 2375 NeonMemOperand(actual_src_addr)); 2376 vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp()); 2377 } else if (memtype == MachineType::Uint32()) { 2378 vld1(Neon32, NeonListOperand(dst.low_fp()), 2379 NeonMemOperand(actual_src_addr)); 2380 vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp()); 2381 } 2382 } else if (transform == LoadTransformationKind::kZeroExtend) { 2383 Simd128Register dest = liftoff::GetSimd128Register(dst); 2384 if (memtype == MachineType::Int32()) { 2385 vmov(dest, 0); 2386 vld1s(Neon32, NeonListOperand(dst.low_fp()), 0, 2387 NeonMemOperand(actual_src_addr)); 2388 } else { 2389 DCHECK_EQ(MachineType::Int64(), memtype); 2390 vmov(dest.high(), 0); 2391 vld1(Neon64, NeonListOperand(dest.low()), 2392 NeonMemOperand(actual_src_addr)); 2393 } 2394 } else { 2395 DCHECK_EQ(LoadTransformationKind::kSplat, transform); 2396 if (memtype == MachineType::Int8()) { 2397 vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)), 2398 NeonMemOperand(actual_src_addr)); 2399 } else if (memtype == MachineType::Int16()) { 2400 vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)), 2401 NeonMemOperand(actual_src_addr)); 2402 } else if (memtype == MachineType::Int32()) { 2403 vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)), 2404 NeonMemOperand(actual_src_addr)); 2405 } else if (memtype == MachineType::Int64()) { 2406 vld1(Neon32, NeonListOperand(dst.low_fp()), 2407 NeonMemOperand(actual_src_addr)); 2408 TurboAssembler::Move(dst.high_fp(), dst.low_fp()); 2409 } 2410 } 2411} 2412 2413void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src, 2414 Register addr, Register offset_reg, 2415 uintptr_t offset_imm, LoadType type, 2416 uint8_t laneidx, uint32_t* protected_load_pc) { 2417 UseScratchRegisterScope temps(this); 2418 Register actual_src_addr = liftoff::CalculateActualAddress( 2419 this, &temps, addr, offset_reg, offset_imm); 2420 TurboAssembler::Move(liftoff::GetSimd128Register(dst), 2421 liftoff::GetSimd128Register(src)); 2422 *protected_load_pc = pc_offset(); 2423 LoadStoreLaneParams load_params(type.mem_type().representation(), laneidx); 2424 NeonListOperand dst_op = 2425 NeonListOperand(load_params.low_op ? dst.low_fp() : dst.high_fp()); 2426 TurboAssembler::LoadLane(load_params.sz, dst_op, load_params.laneidx, 2427 NeonMemOperand(actual_src_addr)); 2428} 2429 2430void LiftoffAssembler::StoreLane(Register dst, Register offset, 2431 uintptr_t offset_imm, LiftoffRegister src, 2432 StoreType type, uint8_t laneidx, 2433 uint32_t* protected_store_pc) { 2434 UseScratchRegisterScope temps(this); 2435 Register actual_dst_addr = 2436 liftoff::CalculateActualAddress(this, &temps, dst, offset, offset_imm); 2437 *protected_store_pc = pc_offset(); 2438 2439 LoadStoreLaneParams store_params(type.mem_rep(), laneidx); 2440 NeonListOperand src_op = 2441 NeonListOperand(store_params.low_op ? src.low_fp() : src.high_fp()); 2442 TurboAssembler::StoreLane(store_params.sz, src_op, store_params.laneidx, 2443 NeonMemOperand(actual_dst_addr)); 2444} 2445 2446void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst, 2447 LiftoffRegister lhs, 2448 LiftoffRegister rhs) { 2449 UseScratchRegisterScope temps(this); 2450 2451 NeonListOperand table(liftoff::GetSimd128Register(lhs)); 2452 if (dst == lhs) { 2453 // dst will be overwritten, so keep the table somewhere else. 2454 QwNeonRegister tbl = temps.AcquireQ(); 2455 TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs)); 2456 table = NeonListOperand(tbl); 2457 } 2458 2459 vtbl(dst.low_fp(), table, rhs.low_fp()); 2460 vtbl(dst.high_fp(), table, rhs.high_fp()); 2461} 2462 2463void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, 2464 LiftoffRegister src) { 2465 TurboAssembler::Move(dst.low_fp(), src.fp()); 2466 TurboAssembler::Move(dst.high_fp(), src.fp()); 2467} 2468 2469void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, 2470 LiftoffRegister lhs, 2471 uint8_t imm_lane_idx) { 2472 ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx); 2473} 2474 2475void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, 2476 LiftoffRegister src1, 2477 LiftoffRegister src2, 2478 uint8_t imm_lane_idx) { 2479 ReplaceLane(liftoff::GetSimd128Register(dst), 2480 liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx); 2481} 2482 2483void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, 2484 LiftoffRegister src) { 2485 vabs(dst.low_fp(), src.low_fp()); 2486 vabs(dst.high_fp(), src.high_fp()); 2487} 2488 2489void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst, 2490 LiftoffRegister src) { 2491 vneg(dst.low_fp(), src.low_fp()); 2492 vneg(dst.high_fp(), src.high_fp()); 2493} 2494 2495void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst, 2496 LiftoffRegister src) { 2497 vsqrt(dst.low_fp(), src.low_fp()); 2498 vsqrt(dst.high_fp(), src.high_fp()); 2499} 2500 2501bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst, 2502 LiftoffRegister src) { 2503 if (!CpuFeatures::IsSupported(ARMv8)) { 2504 return false; 2505 } 2506 2507 CpuFeatureScope scope(this, ARMv8); 2508 vrintp(dst.low_fp(), src.low_fp()); 2509 vrintp(dst.high_fp(), src.high_fp()); 2510 return true; 2511} 2512 2513bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst, 2514 LiftoffRegister src) { 2515 if (!CpuFeatures::IsSupported(ARMv8)) { 2516 return false; 2517 } 2518 2519 CpuFeatureScope scope(this, ARMv8); 2520 vrintm(dst.low_fp(), src.low_fp()); 2521 vrintm(dst.high_fp(), src.high_fp()); 2522 return true; 2523} 2524 2525bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst, 2526 LiftoffRegister src) { 2527 if (!CpuFeatures::IsSupported(ARMv8)) { 2528 return false; 2529 } 2530 2531 CpuFeatureScope scope(this, ARMv8); 2532 vrintz(dst.low_fp(), src.low_fp()); 2533 vrintz(dst.high_fp(), src.high_fp()); 2534 return true; 2535} 2536 2537bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst, 2538 LiftoffRegister src) { 2539 if (!CpuFeatures::IsSupported(ARMv8)) { 2540 return false; 2541 } 2542 2543 CpuFeatureScope scope(this, ARMv8); 2544 vrintn(dst.low_fp(), src.low_fp()); 2545 vrintn(dst.high_fp(), src.high_fp()); 2546 return true; 2547} 2548 2549void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, 2550 LiftoffRegister rhs) { 2551 vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); 2552 vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); 2553} 2554 2555void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, 2556 LiftoffRegister rhs) { 2557 vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); 2558 vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); 2559} 2560 2561void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, 2562 LiftoffRegister rhs) { 2563 vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); 2564 vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); 2565} 2566 2567void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs, 2568 LiftoffRegister rhs) { 2569 vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); 2570 vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); 2571} 2572 2573void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs, 2574 LiftoffRegister rhs) { 2575 Simd128Register dest = liftoff::GetSimd128Register(dst); 2576 Simd128Register left = liftoff::GetSimd128Register(lhs); 2577 Simd128Register right = liftoff::GetSimd128Register(rhs); 2578 2579 liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), 2580 liftoff::MinOrMax::kMin); 2581 liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), 2582 liftoff::MinOrMax::kMin); 2583} 2584 2585void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, 2586 LiftoffRegister rhs) { 2587 Simd128Register dest = liftoff::GetSimd128Register(dst); 2588 Simd128Register left = liftoff::GetSimd128Register(lhs); 2589 Simd128Register right = liftoff::GetSimd128Register(rhs); 2590 2591 liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), 2592 liftoff::MinOrMax::kMax); 2593 liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), 2594 liftoff::MinOrMax::kMax); 2595} 2596 2597void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs, 2598 LiftoffRegister rhs) { 2599 QwNeonRegister dest = liftoff::GetSimd128Register(dst); 2600 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 2601 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 2602 2603 if (dst != rhs) { 2604 vmov(dest, left); 2605 } 2606 2607 VFPCompareAndSetFlags(right.low(), left.low()); 2608 vmov(dest.low(), right.low(), mi); 2609 VFPCompareAndSetFlags(right.high(), left.high()); 2610 vmov(dest.high(), right.high(), mi); 2611} 2612 2613void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs, 2614 LiftoffRegister rhs) { 2615 QwNeonRegister dest = liftoff::GetSimd128Register(dst); 2616 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 2617 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 2618 2619 if (dst != rhs) { 2620 vmov(dest, left); 2621 } 2622 2623 VFPCompareAndSetFlags(right.low(), left.low()); 2624 vmov(dest.low(), right.low(), gt); 2625 VFPCompareAndSetFlags(right.high(), left.high()); 2626 vmov(dest.high(), right.high(), gt); 2627} 2628 2629void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst, 2630 LiftoffRegister src) { 2631 F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst), 2632 liftoff::GetSimd128Register(src)); 2633} 2634 2635void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst, 2636 LiftoffRegister src) { 2637 F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst), 2638 liftoff::GetSimd128Register(src)); 2639} 2640 2641void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst, 2642 LiftoffRegister src) { 2643 F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst), 2644 liftoff::GetSimd128Register(src)); 2645} 2646 2647void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, 2648 LiftoffRegister src) { 2649 vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0); 2650} 2651 2652void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, 2653 LiftoffRegister lhs, 2654 uint8_t imm_lane_idx) { 2655 ExtractLane(liftoff::GetFloatRegister(dst.fp()), 2656 liftoff::GetSimd128Register(lhs), imm_lane_idx); 2657} 2658 2659void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, 2660 LiftoffRegister src1, 2661 LiftoffRegister src2, 2662 uint8_t imm_lane_idx) { 2663 ReplaceLane(liftoff::GetSimd128Register(dst), 2664 liftoff::GetSimd128Register(src1), 2665 liftoff::GetFloatRegister(src2.fp()), imm_lane_idx); 2666} 2667 2668void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, 2669 LiftoffRegister src) { 2670 vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 2671} 2672 2673void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst, 2674 LiftoffRegister src) { 2675 vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 2676} 2677 2678void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst, 2679 LiftoffRegister src) { 2680 // The list of d registers available to us is from d0 to d15, which always 2681 // maps to 2 s registers. 2682 LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); 2683 LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code()); 2684 2685 LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); 2686 LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code()); 2687 2688 vsqrt(dst_low.low(), src_low.low()); 2689 vsqrt(dst_low.high(), src_low.high()); 2690 vsqrt(dst_high.low(), src_high.low()); 2691 vsqrt(dst_high.high(), src_high.high()); 2692} 2693 2694bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst, 2695 LiftoffRegister src) { 2696 if (!CpuFeatures::IsSupported(ARMv8)) { 2697 return false; 2698 } 2699 2700 CpuFeatureScope scope(this, ARMv8); 2701 vrintp(NeonS32, liftoff::GetSimd128Register(dst), 2702 liftoff::GetSimd128Register(src)); 2703 return true; 2704} 2705 2706bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst, 2707 LiftoffRegister src) { 2708 if (!CpuFeatures::IsSupported(ARMv8)) { 2709 return false; 2710 } 2711 2712 CpuFeatureScope scope(this, ARMv8); 2713 vrintm(NeonS32, liftoff::GetSimd128Register(dst), 2714 liftoff::GetSimd128Register(src)); 2715 return true; 2716} 2717 2718bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst, 2719 LiftoffRegister src) { 2720 if (!CpuFeatures::IsSupported(ARMv8)) { 2721 return false; 2722 } 2723 2724 CpuFeatureScope scope(this, ARMv8); 2725 vrintz(NeonS32, liftoff::GetSimd128Register(dst), 2726 liftoff::GetSimd128Register(src)); 2727 return true; 2728} 2729 2730bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst, 2731 LiftoffRegister src) { 2732 if (!CpuFeatures::IsSupported(ARMv8)) { 2733 return false; 2734 } 2735 2736 CpuFeatureScope scope(this, ARMv8); 2737 vrintn(NeonS32, liftoff::GetSimd128Register(dst), 2738 liftoff::GetSimd128Register(src)); 2739 return true; 2740} 2741 2742void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, 2743 LiftoffRegister rhs) { 2744 vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 2745 liftoff::GetSimd128Register(rhs)); 2746} 2747 2748void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, 2749 LiftoffRegister rhs) { 2750 vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 2751 liftoff::GetSimd128Register(rhs)); 2752} 2753 2754void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, 2755 LiftoffRegister rhs) { 2756 vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 2757 liftoff::GetSimd128Register(rhs)); 2758} 2759 2760void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs, 2761 LiftoffRegister rhs) { 2762 // The list of d registers available to us is from d0 to d15, which always 2763 // maps to 2 s registers. 2764 LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); 2765 LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code()); 2766 LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code()); 2767 2768 LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); 2769 LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code()); 2770 LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code()); 2771 2772 vdiv(dst_low.low(), lhs_low.low(), rhs_low.low()); 2773 vdiv(dst_low.high(), lhs_low.high(), rhs_low.high()); 2774 vdiv(dst_high.low(), lhs_high.low(), rhs_high.low()); 2775 vdiv(dst_high.high(), lhs_high.high(), rhs_high.high()); 2776} 2777 2778void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, 2779 LiftoffRegister rhs) { 2780 vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 2781 liftoff::GetSimd128Register(rhs)); 2782} 2783 2784void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, 2785 LiftoffRegister rhs) { 2786 vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 2787 liftoff::GetSimd128Register(rhs)); 2788} 2789 2790void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, 2791 LiftoffRegister rhs) { 2792 UseScratchRegisterScope temps(this); 2793 2794 QwNeonRegister tmp = liftoff::GetSimd128Register(dst); 2795 if (dst == lhs || dst == rhs) { 2796 tmp = temps.AcquireQ(); 2797 } 2798 2799 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 2800 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 2801 vcgt(tmp, left, right); 2802 vbsl(tmp, right, left); 2803 2804 if (dst == lhs || dst == rhs) { 2805 vmov(liftoff::GetSimd128Register(dst), tmp); 2806 } 2807} 2808 2809void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, 2810 LiftoffRegister rhs) { 2811 UseScratchRegisterScope temps(this); 2812 2813 QwNeonRegister tmp = liftoff::GetSimd128Register(dst); 2814 if (dst == lhs || dst == rhs) { 2815 tmp = temps.AcquireQ(); 2816 } 2817 2818 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 2819 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 2820 vcgt(tmp, right, left); 2821 vbsl(tmp, right, left); 2822 2823 if (dst == lhs || dst == rhs) { 2824 vmov(liftoff::GetSimd128Register(dst), tmp); 2825 } 2826} 2827 2828void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, 2829 LiftoffRegister src) { 2830 Simd128Register dst_simd = liftoff::GetSimd128Register(dst); 2831 vdup(Neon32, dst_simd, src.low_gp()); 2832 ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1); 2833 ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3); 2834} 2835 2836void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, 2837 LiftoffRegister lhs, 2838 uint8_t imm_lane_idx) { 2839 ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32, 2840 imm_lane_idx * 2); 2841 ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32, 2842 imm_lane_idx * 2 + 1); 2843} 2844 2845void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, 2846 LiftoffRegister src1, 2847 LiftoffRegister src2, 2848 uint8_t imm_lane_idx) { 2849 Simd128Register dst_simd = liftoff::GetSimd128Register(dst); 2850 Simd128Register src1_simd = liftoff::GetSimd128Register(src1); 2851 ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2); 2852 ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32, 2853 imm_lane_idx * 2 + 1); 2854} 2855 2856void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, 2857 LiftoffRegister src) { 2858 UseScratchRegisterScope temps(this); 2859 QwNeonRegister zero = 2860 dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst); 2861 vmov(zero, uint64_t{0}); 2862 vsub(Neon64, liftoff::GetSimd128Register(dst), zero, 2863 liftoff::GetSimd128Register(src)); 2864} 2865 2866void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst, 2867 LiftoffRegister src) { 2868 I64x2AllTrue(dst.gp(), liftoff::GetSimd128Register(src)); 2869} 2870 2871void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, 2872 LiftoffRegister rhs) { 2873 liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs); 2874} 2875 2876void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, 2877 int32_t rhs) { 2878 vshl(NeonS64, liftoff::GetSimd128Register(dst), 2879 liftoff::GetSimd128Register(lhs), rhs & 63); 2880} 2881 2882void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, 2883 LiftoffRegister lhs, 2884 LiftoffRegister rhs) { 2885 liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs); 2886} 2887 2888void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, 2889 LiftoffRegister lhs, int32_t rhs) { 2890 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs, 2891 rhs); 2892} 2893 2894void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, 2895 LiftoffRegister lhs, 2896 LiftoffRegister rhs) { 2897 liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs); 2898} 2899 2900void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, 2901 LiftoffRegister lhs, int32_t rhs) { 2902 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs, 2903 rhs); 2904} 2905 2906void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, 2907 LiftoffRegister rhs) { 2908 vadd(Neon64, liftoff::GetSimd128Register(dst), 2909 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 2910} 2911 2912void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, 2913 LiftoffRegister rhs) { 2914 vsub(Neon64, liftoff::GetSimd128Register(dst), 2915 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 2916} 2917 2918void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, 2919 LiftoffRegister rhs) { 2920 UseScratchRegisterScope temps(this); 2921 2922 QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst); 2923 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 2924 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 2925 2926 // These temporary registers will be modified. We can directly modify lhs and 2927 // rhs if they are not uesd, saving on temporaries. 2928 QwNeonRegister tmp1 = left; 2929 QwNeonRegister tmp2 = right; 2930 2931 LiftoffRegList used_plus_dst = 2932 cache_state()->used_registers | LiftoffRegList{dst}; 2933 2934 if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) { 2935 tmp1 = temps.AcquireQ(); 2936 // We only have 1 scratch Q register, so acquire another ourselves. 2937 LiftoffRegList pinned = {dst}; 2938 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); 2939 tmp2 = liftoff::GetSimd128Register(unused_pair); 2940 } else if (used_plus_dst.has(lhs)) { 2941 tmp1 = temps.AcquireQ(); 2942 } else if (used_plus_dst.has(rhs)) { 2943 tmp2 = temps.AcquireQ(); 2944 } 2945 2946 // Algorithm from code-generator-arm.cc, refer to comments there for details. 2947 if (tmp1 != left) { 2948 vmov(tmp1, left); 2949 } 2950 if (tmp2 != right) { 2951 vmov(tmp2, right); 2952 } 2953 2954 vtrn(Neon32, tmp1.low(), tmp1.high()); 2955 vtrn(Neon32, tmp2.low(), tmp2.high()); 2956 2957 vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high()); 2958 vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low()); 2959 vshl(NeonU64, dst_neon, dst_neon, 32); 2960 2961 vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low()); 2962} 2963 2964void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst, 2965 LiftoffRegister src1, 2966 LiftoffRegister src2) { 2967 vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.low_fp(), 2968 src2.low_fp()); 2969} 2970 2971void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst, 2972 LiftoffRegister src1, 2973 LiftoffRegister src2) { 2974 vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.low_fp(), 2975 src2.low_fp()); 2976} 2977 2978void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst, 2979 LiftoffRegister src1, 2980 LiftoffRegister src2) { 2981 vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.high_fp(), 2982 src2.high_fp()); 2983} 2984 2985void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst, 2986 LiftoffRegister src1, 2987 LiftoffRegister src2) { 2988 vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.high_fp(), 2989 src2.high_fp()); 2990} 2991 2992void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, 2993 LiftoffRegister src) { 2994 I64x2BitMask(dst.gp(), liftoff::GetSimd128Register(src)); 2995} 2996 2997void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst, 2998 LiftoffRegister src) { 2999 vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.low_fp()); 3000} 3001 3002void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst, 3003 LiftoffRegister src) { 3004 vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.high_fp()); 3005} 3006 3007void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst, 3008 LiftoffRegister src) { 3009 vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.low_fp()); 3010} 3011 3012void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst, 3013 LiftoffRegister src) { 3014 vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.high_fp()); 3015} 3016 3017void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, 3018 LiftoffRegister src) { 3019 vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp()); 3020} 3021 3022void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, 3023 LiftoffRegister lhs, 3024 uint8_t imm_lane_idx) { 3025 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32, 3026 imm_lane_idx); 3027} 3028 3029void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, 3030 LiftoffRegister src1, 3031 LiftoffRegister src2, 3032 uint8_t imm_lane_idx) { 3033 ReplaceLane(liftoff::GetSimd128Register(dst), 3034 liftoff::GetSimd128Register(src1), src2.gp(), NeonS32, 3035 imm_lane_idx); 3036} 3037 3038void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, 3039 LiftoffRegister src) { 3040 vneg(Neon32, liftoff::GetSimd128Register(dst), 3041 liftoff::GetSimd128Register(src)); 3042} 3043 3044void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst, 3045 LiftoffRegister src) { 3046 UseScratchRegisterScope temps(this); 3047 DwVfpRegister scratch = temps.AcquireD(); 3048 vpmin(NeonU32, scratch, src.low_fp(), src.high_fp()); 3049 vpmin(NeonU32, scratch, scratch, scratch); 3050 ExtractLane(dst.gp(), scratch, NeonS32, 0); 3051 cmp(dst.gp(), Operand(0)); 3052 mov(dst.gp(), Operand(1), LeaveCC, ne); 3053} 3054 3055void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, 3056 LiftoffRegister src) { 3057 UseScratchRegisterScope temps(this); 3058 Simd128Register tmp = liftoff::GetSimd128Register(src); 3059 Simd128Register mask = temps.AcquireQ(); 3060 3061 if (cache_state()->is_used(src)) { 3062 // We only have 1 scratch Q register, so try and reuse src. 3063 LiftoffRegList pinned = {src}; 3064 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); 3065 mask = liftoff::GetSimd128Register(unused_pair); 3066 } 3067 3068 vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31); 3069 // Set i-th bit of each lane i. When AND with tmp, the lanes that 3070 // are signed will have i-th bit set, unsigned will be 0. 3071 vmov(mask.low(), base::Double((uint64_t)0x0000'0002'0000'0001)); 3072 vmov(mask.high(), base::Double((uint64_t)0x0000'0008'0000'0004)); 3073 vand(tmp, mask, tmp); 3074 vpadd(Neon32, tmp.low(), tmp.low(), tmp.high()); 3075 vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero); 3076 VmovLow(dst.gp(), tmp.low()); 3077} 3078 3079void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, 3080 LiftoffRegister rhs) { 3081 liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs); 3082} 3083 3084void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, 3085 int32_t rhs) { 3086 vshl(NeonS32, liftoff::GetSimd128Register(dst), 3087 liftoff::GetSimd128Register(lhs), rhs & 31); 3088} 3089 3090void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, 3091 LiftoffRegister lhs, 3092 LiftoffRegister rhs) { 3093 liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs); 3094} 3095 3096void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, 3097 LiftoffRegister lhs, int32_t rhs) { 3098 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs, 3099 rhs); 3100} 3101 3102void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, 3103 LiftoffRegister lhs, 3104 LiftoffRegister rhs) { 3105 liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs); 3106} 3107 3108void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, 3109 LiftoffRegister lhs, int32_t rhs) { 3110 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs, 3111 rhs); 3112} 3113 3114void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, 3115 LiftoffRegister rhs) { 3116 vadd(Neon32, liftoff::GetSimd128Register(dst), 3117 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3118} 3119 3120void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, 3121 LiftoffRegister rhs) { 3122 vsub(Neon32, liftoff::GetSimd128Register(dst), 3123 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3124} 3125 3126void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, 3127 LiftoffRegister rhs) { 3128 vmul(Neon32, liftoff::GetSimd128Register(dst), 3129 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3130} 3131 3132void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst, 3133 LiftoffRegister lhs, 3134 LiftoffRegister rhs) { 3135 vmin(NeonS32, liftoff::GetSimd128Register(dst), 3136 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3137} 3138 3139void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst, 3140 LiftoffRegister lhs, 3141 LiftoffRegister rhs) { 3142 vmin(NeonU32, liftoff::GetSimd128Register(dst), 3143 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3144} 3145 3146void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst, 3147 LiftoffRegister lhs, 3148 LiftoffRegister rhs) { 3149 vmax(NeonS32, liftoff::GetSimd128Register(dst), 3150 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3151} 3152 3153void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst, 3154 LiftoffRegister lhs, 3155 LiftoffRegister rhs) { 3156 vmax(NeonU32, liftoff::GetSimd128Register(dst), 3157 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3158} 3159 3160void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst, 3161 LiftoffRegister lhs, 3162 LiftoffRegister rhs) { 3163 QwNeonRegister dest = liftoff::GetSimd128Register(dst); 3164 QwNeonRegister left = liftoff::GetSimd128Register(lhs); 3165 QwNeonRegister right = liftoff::GetSimd128Register(rhs); 3166 3167 UseScratchRegisterScope temps(this); 3168 Simd128Register scratch = temps.AcquireQ(); 3169 3170 vmull(NeonS16, scratch, left.low(), right.low()); 3171 vpadd(Neon32, dest.low(), scratch.low(), scratch.high()); 3172 3173 vmull(NeonS16, scratch, left.high(), right.high()); 3174 vpadd(Neon32, dest.high(), scratch.low(), scratch.high()); 3175} 3176 3177void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst, 3178 LiftoffRegister src) { 3179 vpaddl(NeonS16, liftoff::GetSimd128Register(dst), 3180 liftoff::GetSimd128Register(src)); 3181} 3182 3183void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst, 3184 LiftoffRegister src) { 3185 vpaddl(NeonU16, liftoff::GetSimd128Register(dst), 3186 liftoff::GetSimd128Register(src)); 3187} 3188 3189void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst, 3190 LiftoffRegister src1, 3191 LiftoffRegister src2) { 3192 vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.low_fp(), 3193 src2.low_fp()); 3194} 3195 3196void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst, 3197 LiftoffRegister src1, 3198 LiftoffRegister src2) { 3199 vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.low_fp(), 3200 src2.low_fp()); 3201} 3202 3203void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst, 3204 LiftoffRegister src1, 3205 LiftoffRegister src2) { 3206 vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.high_fp(), 3207 src2.high_fp()); 3208} 3209 3210void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst, 3211 LiftoffRegister src1, 3212 LiftoffRegister src2) { 3213 vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.high_fp(), 3214 src2.high_fp()); 3215} 3216 3217void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, 3218 LiftoffRegister src) { 3219 vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp()); 3220} 3221 3222void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, 3223 LiftoffRegister src) { 3224 vneg(Neon16, liftoff::GetSimd128Register(dst), 3225 liftoff::GetSimd128Register(src)); 3226} 3227 3228void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst, 3229 LiftoffRegister src) { 3230 UseScratchRegisterScope temps(this); 3231 DwVfpRegister scratch = temps.AcquireD(); 3232 vpmin(NeonU16, scratch, src.low_fp(), src.high_fp()); 3233 vpmin(NeonU16, scratch, scratch, scratch); 3234 vpmin(NeonU16, scratch, scratch, scratch); 3235 ExtractLane(dst.gp(), scratch, NeonS16, 0); 3236 cmp(dst.gp(), Operand(0)); 3237 mov(dst.gp(), Operand(1), LeaveCC, ne); 3238} 3239 3240void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, 3241 LiftoffRegister src) { 3242 UseScratchRegisterScope temps(this); 3243 Simd128Register tmp = liftoff::GetSimd128Register(src); 3244 Simd128Register mask = temps.AcquireQ(); 3245 3246 if (cache_state()->is_used(src)) { 3247 // We only have 1 scratch Q register, so try and reuse src. 3248 LiftoffRegList pinned = {src}; 3249 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); 3250 mask = liftoff::GetSimd128Register(unused_pair); 3251 } 3252 3253 vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15); 3254 // Set i-th bit of each lane i. When AND with tmp, the lanes that 3255 // are signed will have i-th bit set, unsigned will be 0. 3256 vmov(mask.low(), base::Double((uint64_t)0x0008'0004'0002'0001)); 3257 vmov(mask.high(), base::Double((uint64_t)0x0080'0040'0020'0010)); 3258 vand(tmp, mask, tmp); 3259 vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); 3260 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); 3261 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); 3262 vmov(NeonU16, dst.gp(), tmp.low(), 0); 3263} 3264 3265void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, 3266 LiftoffRegister rhs) { 3267 liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs); 3268} 3269 3270void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, 3271 int32_t rhs) { 3272 vshl(NeonS16, liftoff::GetSimd128Register(dst), 3273 liftoff::GetSimd128Register(lhs), rhs & 15); 3274} 3275 3276void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, 3277 LiftoffRegister lhs, 3278 LiftoffRegister rhs) { 3279 liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs); 3280} 3281 3282void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, 3283 LiftoffRegister lhs, int32_t rhs) { 3284 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs, 3285 rhs); 3286} 3287 3288void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, 3289 LiftoffRegister lhs, 3290 LiftoffRegister rhs) { 3291 liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs); 3292} 3293 3294void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, 3295 LiftoffRegister lhs, int32_t rhs) { 3296 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs, 3297 rhs); 3298} 3299 3300void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, 3301 LiftoffRegister rhs) { 3302 vadd(Neon16, liftoff::GetSimd128Register(dst), 3303 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3304} 3305 3306void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst, 3307 LiftoffRegister lhs, 3308 LiftoffRegister rhs) { 3309 vqadd(NeonS16, liftoff::GetSimd128Register(dst), 3310 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3311} 3312 3313void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, 3314 LiftoffRegister rhs) { 3315 vsub(Neon16, liftoff::GetSimd128Register(dst), 3316 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3317} 3318 3319void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst, 3320 LiftoffRegister lhs, 3321 LiftoffRegister rhs) { 3322 vqsub(NeonS16, liftoff::GetSimd128Register(dst), 3323 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3324} 3325 3326void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst, 3327 LiftoffRegister lhs, 3328 LiftoffRegister rhs) { 3329 vqsub(NeonU16, liftoff::GetSimd128Register(dst), 3330 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3331} 3332 3333void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, 3334 LiftoffRegister rhs) { 3335 vmul(Neon16, liftoff::GetSimd128Register(dst), 3336 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3337} 3338 3339void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst, 3340 LiftoffRegister lhs, 3341 LiftoffRegister rhs) { 3342 vqadd(NeonU16, liftoff::GetSimd128Register(dst), 3343 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3344} 3345 3346void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst, 3347 LiftoffRegister lhs, 3348 LiftoffRegister rhs) { 3349 vmin(NeonS16, liftoff::GetSimd128Register(dst), 3350 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3351} 3352 3353void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst, 3354 LiftoffRegister lhs, 3355 LiftoffRegister rhs) { 3356 vmin(NeonU16, liftoff::GetSimd128Register(dst), 3357 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3358} 3359 3360void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst, 3361 LiftoffRegister lhs, 3362 LiftoffRegister rhs) { 3363 vmax(NeonS16, liftoff::GetSimd128Register(dst), 3364 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3365} 3366 3367void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, 3368 LiftoffRegister lhs, 3369 LiftoffRegister rhs) { 3370 vmax(NeonU16, liftoff::GetSimd128Register(dst), 3371 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3372} 3373 3374void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst, 3375 LiftoffRegister lhs, 3376 uint8_t imm_lane_idx) { 3377 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16, 3378 imm_lane_idx); 3379} 3380 3381void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, 3382 LiftoffRegister lhs, 3383 uint8_t imm_lane_idx) { 3384 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16, 3385 imm_lane_idx); 3386} 3387 3388void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, 3389 LiftoffRegister src1, 3390 LiftoffRegister src2, 3391 uint8_t imm_lane_idx) { 3392 ReplaceLane(liftoff::GetSimd128Register(dst), 3393 liftoff::GetSimd128Register(src1), src2.gp(), NeonS16, 3394 imm_lane_idx); 3395} 3396 3397void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst, 3398 LiftoffRegister src) { 3399 vpaddl(NeonS8, liftoff::GetSimd128Register(dst), 3400 liftoff::GetSimd128Register(src)); 3401} 3402 3403void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst, 3404 LiftoffRegister src) { 3405 vpaddl(NeonU8, liftoff::GetSimd128Register(dst), 3406 liftoff::GetSimd128Register(src)); 3407} 3408 3409void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst, 3410 LiftoffRegister src1, 3411 LiftoffRegister src2) { 3412 vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp()); 3413} 3414 3415void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst, 3416 LiftoffRegister src1, 3417 LiftoffRegister src2) { 3418 vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp()); 3419} 3420 3421void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst, 3422 LiftoffRegister src1, 3423 LiftoffRegister src2) { 3424 vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.high_fp(), 3425 src2.high_fp()); 3426} 3427 3428void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst, 3429 LiftoffRegister src1, 3430 LiftoffRegister src2) { 3431 vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.high_fp(), 3432 src2.high_fp()); 3433} 3434 3435void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst, 3436 LiftoffRegister src1, 3437 LiftoffRegister src2) { 3438 vqrdmulh(NeonS16, liftoff::GetSimd128Register(dst), 3439 liftoff::GetSimd128Register(src1), 3440 liftoff::GetSimd128Register(src2)); 3441} 3442 3443void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst, 3444 LiftoffRegister lhs, 3445 LiftoffRegister rhs, 3446 const uint8_t shuffle[16], 3447 bool is_swizzle) { 3448 Simd128Register dest = liftoff::GetSimd128Register(dst); 3449 Simd128Register src1 = liftoff::GetSimd128Register(lhs); 3450 Simd128Register src2 = liftoff::GetSimd128Register(rhs); 3451 UseScratchRegisterScope temps(this); 3452 Simd128Register scratch = temps.AcquireQ(); 3453 if ((src1 != src2) && src1.code() + 1 != src2.code()) { 3454 // vtbl requires the operands to be consecutive or the same. 3455 // If they are the same, we build a smaller list operand (table_size = 2). 3456 // If they are not the same, and not consecutive, we move the src1 and src2 3457 // to q14 and q15, which will be unused since they are not allocatable in 3458 // Liftoff. If the operands are the same, then we build a smaller list 3459 // operand below. 3460 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d28), 3461 "This only works if q14-q15 (d28-d31) are not used."); 3462 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d29), 3463 "This only works if q14-q15 (d28-d31) are not used."); 3464 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d30), 3465 "This only works if q14-q15 (d28-d31) are not used."); 3466 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d31), 3467 "This only works if q14-q15 (d28-d31) are not used."); 3468 vmov(q14, src1); 3469 src1 = q14; 3470 vmov(q15, src2); 3471 src2 = q15; 3472 } 3473 3474 int table_size = src1 == src2 ? 2 : 4; 3475 3476 int scratch_s_base = scratch.code() * 4; 3477 for (int j = 0; j < 4; j++) { 3478 uint32_t imm = 0; 3479 for (int i = 3; i >= 0; i--) { 3480 imm = (imm << 8) | shuffle[j * 4 + i]; 3481 } 3482 DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0)); 3483 // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. 3484 vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm)); 3485 } 3486 3487 DwVfpRegister table_base = src1.low(); 3488 NeonListOperand table(table_base, table_size); 3489 3490 if (dest != src1 && dest != src2) { 3491 vtbl(dest.low(), table, scratch.low()); 3492 vtbl(dest.high(), table, scratch.high()); 3493 } else { 3494 vtbl(scratch.low(), table, scratch.low()); 3495 vtbl(scratch.high(), table, scratch.high()); 3496 vmov(dest, scratch); 3497 } 3498} 3499 3500void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst, 3501 LiftoffRegister src) { 3502 vcnt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 3503} 3504 3505void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, 3506 LiftoffRegister src) { 3507 vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp()); 3508} 3509 3510void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, 3511 LiftoffRegister lhs, 3512 uint8_t imm_lane_idx) { 3513 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx); 3514} 3515 3516void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, 3517 LiftoffRegister lhs, 3518 uint8_t imm_lane_idx) { 3519 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx); 3520} 3521 3522void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, 3523 LiftoffRegister src1, 3524 LiftoffRegister src2, 3525 uint8_t imm_lane_idx) { 3526 ReplaceLane(liftoff::GetSimd128Register(dst), 3527 liftoff::GetSimd128Register(src1), src2.gp(), NeonS8, 3528 imm_lane_idx); 3529} 3530 3531void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, 3532 LiftoffRegister src) { 3533 vneg(Neon8, liftoff::GetSimd128Register(dst), 3534 liftoff::GetSimd128Register(src)); 3535} 3536 3537void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst, 3538 LiftoffRegister src) { 3539 liftoff::EmitAnyTrue(this, dst, src); 3540} 3541 3542void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst, 3543 LiftoffRegister src) { 3544 UseScratchRegisterScope temps(this); 3545 DwVfpRegister scratch = temps.AcquireD(); 3546 vpmin(NeonU8, scratch, src.low_fp(), src.high_fp()); 3547 vpmin(NeonU8, scratch, scratch, scratch); 3548 vpmin(NeonU8, scratch, scratch, scratch); 3549 vpmin(NeonU8, scratch, scratch, scratch); 3550 ExtractLane(dst.gp(), scratch, NeonS8, 0); 3551 cmp(dst.gp(), Operand(0)); 3552 mov(dst.gp(), Operand(1), LeaveCC, ne); 3553} 3554 3555void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, 3556 LiftoffRegister src) { 3557 UseScratchRegisterScope temps(this); 3558 Simd128Register tmp = liftoff::GetSimd128Register(src); 3559 Simd128Register mask = temps.AcquireQ(); 3560 3561 if (cache_state()->is_used(src)) { 3562 // We only have 1 scratch Q register, so try and reuse src. 3563 LiftoffRegList pinned = {src}; 3564 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); 3565 mask = liftoff::GetSimd128Register(unused_pair); 3566 } 3567 3568 vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7); 3569 // Set i-th bit of each lane i. When AND with tmp, the lanes that 3570 // are signed will have i-th bit set, unsigned will be 0. 3571 vmov(mask.low(), base::Double((uint64_t)0x8040'2010'0804'0201)); 3572 vmov(mask.high(), base::Double((uint64_t)0x8040'2010'0804'0201)); 3573 vand(tmp, mask, tmp); 3574 vext(mask, tmp, tmp, 8); 3575 vzip(Neon8, mask, tmp); 3576 vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); 3577 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); 3578 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); 3579 vmov(NeonU16, dst.gp(), tmp.low(), 0); 3580} 3581 3582void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, 3583 LiftoffRegister rhs) { 3584 liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs); 3585} 3586 3587void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, 3588 int32_t rhs) { 3589 vshl(NeonS8, liftoff::GetSimd128Register(dst), 3590 liftoff::GetSimd128Register(lhs), rhs & 7); 3591} 3592 3593void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, 3594 LiftoffRegister lhs, 3595 LiftoffRegister rhs) { 3596 liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs); 3597} 3598 3599void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, 3600 LiftoffRegister lhs, int32_t rhs) { 3601 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs); 3602} 3603 3604void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, 3605 LiftoffRegister lhs, 3606 LiftoffRegister rhs) { 3607 liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs); 3608} 3609 3610void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, 3611 LiftoffRegister lhs, int32_t rhs) { 3612 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs); 3613} 3614 3615void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, 3616 LiftoffRegister rhs) { 3617 vadd(Neon8, liftoff::GetSimd128Register(dst), 3618 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3619} 3620 3621void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst, 3622 LiftoffRegister lhs, 3623 LiftoffRegister rhs) { 3624 vqadd(NeonS8, liftoff::GetSimd128Register(dst), 3625 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3626} 3627 3628void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs, 3629 LiftoffRegister rhs) { 3630 vsub(Neon8, liftoff::GetSimd128Register(dst), 3631 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3632} 3633 3634void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst, 3635 LiftoffRegister lhs, 3636 LiftoffRegister rhs) { 3637 vqsub(NeonS8, liftoff::GetSimd128Register(dst), 3638 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3639} 3640 3641void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst, 3642 LiftoffRegister lhs, 3643 LiftoffRegister rhs) { 3644 vqsub(NeonU8, liftoff::GetSimd128Register(dst), 3645 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3646} 3647 3648void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst, 3649 LiftoffRegister lhs, 3650 LiftoffRegister rhs) { 3651 vqadd(NeonU8, liftoff::GetSimd128Register(dst), 3652 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3653} 3654 3655void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst, 3656 LiftoffRegister lhs, 3657 LiftoffRegister rhs) { 3658 vmin(NeonS8, liftoff::GetSimd128Register(dst), 3659 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3660} 3661 3662void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst, 3663 LiftoffRegister lhs, 3664 LiftoffRegister rhs) { 3665 vmin(NeonU8, liftoff::GetSimd128Register(dst), 3666 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3667} 3668 3669void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst, 3670 LiftoffRegister lhs, 3671 LiftoffRegister rhs) { 3672 vmax(NeonS8, liftoff::GetSimd128Register(dst), 3673 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3674} 3675 3676void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst, 3677 LiftoffRegister lhs, 3678 LiftoffRegister rhs) { 3679 vmax(NeonU8, liftoff::GetSimd128Register(dst), 3680 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3681} 3682 3683void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs, 3684 LiftoffRegister rhs) { 3685 vceq(Neon8, liftoff::GetSimd128Register(dst), 3686 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3687} 3688 3689void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs, 3690 LiftoffRegister rhs) { 3691 vceq(Neon8, liftoff::GetSimd128Register(dst), 3692 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3693 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); 3694} 3695 3696void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs, 3697 LiftoffRegister rhs) { 3698 vcgt(NeonS8, liftoff::GetSimd128Register(dst), 3699 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3700} 3701 3702void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs, 3703 LiftoffRegister rhs) { 3704 vcgt(NeonU8, liftoff::GetSimd128Register(dst), 3705 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3706} 3707 3708void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs, 3709 LiftoffRegister rhs) { 3710 vcge(NeonS8, liftoff::GetSimd128Register(dst), 3711 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3712} 3713 3714void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs, 3715 LiftoffRegister rhs) { 3716 vcge(NeonU8, liftoff::GetSimd128Register(dst), 3717 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3718} 3719 3720void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs, 3721 LiftoffRegister rhs) { 3722 vceq(Neon16, liftoff::GetSimd128Register(dst), 3723 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3724} 3725 3726void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs, 3727 LiftoffRegister rhs) { 3728 vceq(Neon16, liftoff::GetSimd128Register(dst), 3729 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3730 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); 3731} 3732 3733void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs, 3734 LiftoffRegister rhs) { 3735 vcgt(NeonS16, liftoff::GetSimd128Register(dst), 3736 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3737} 3738 3739void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs, 3740 LiftoffRegister rhs) { 3741 vcgt(NeonU16, liftoff::GetSimd128Register(dst), 3742 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3743} 3744 3745void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs, 3746 LiftoffRegister rhs) { 3747 vcge(NeonS16, liftoff::GetSimd128Register(dst), 3748 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3749} 3750 3751void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs, 3752 LiftoffRegister rhs) { 3753 vcge(NeonU16, liftoff::GetSimd128Register(dst), 3754 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3755} 3756 3757void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, 3758 LiftoffRegister rhs) { 3759 vceq(Neon32, liftoff::GetSimd128Register(dst), 3760 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3761} 3762 3763void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, 3764 LiftoffRegister rhs) { 3765 vceq(Neon32, liftoff::GetSimd128Register(dst), 3766 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3767 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); 3768} 3769 3770void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs, 3771 LiftoffRegister rhs) { 3772 vcgt(NeonS32, liftoff::GetSimd128Register(dst), 3773 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3774} 3775 3776void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs, 3777 LiftoffRegister rhs) { 3778 vcgt(NeonU32, liftoff::GetSimd128Register(dst), 3779 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3780} 3781 3782void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs, 3783 LiftoffRegister rhs) { 3784 vcge(NeonS32, liftoff::GetSimd128Register(dst), 3785 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3786} 3787 3788void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs, 3789 LiftoffRegister rhs) { 3790 vcge(NeonU32, liftoff::GetSimd128Register(dst), 3791 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 3792} 3793 3794void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs, 3795 LiftoffRegister rhs) { 3796 I64x2Eq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3797 liftoff::GetSimd128Register(rhs)); 3798} 3799 3800void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, 3801 LiftoffRegister rhs) { 3802 I64x2Ne(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3803 liftoff::GetSimd128Register(rhs)); 3804} 3805 3806void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs, 3807 LiftoffRegister rhs) { 3808 I64x2GtS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3809 liftoff::GetSimd128Register(rhs)); 3810} 3811 3812void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs, 3813 LiftoffRegister rhs) { 3814 I64x2GeS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3815 liftoff::GetSimd128Register(rhs)); 3816} 3817 3818void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, 3819 LiftoffRegister rhs) { 3820 vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3821 liftoff::GetSimd128Register(rhs)); 3822} 3823 3824void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, 3825 LiftoffRegister rhs) { 3826 vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3827 liftoff::GetSimd128Register(rhs)); 3828 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); 3829} 3830 3831void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs, 3832 LiftoffRegister rhs) { 3833 vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), 3834 liftoff::GetSimd128Register(lhs)); 3835} 3836 3837void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs, 3838 LiftoffRegister rhs) { 3839 vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), 3840 liftoff::GetSimd128Register(lhs)); 3841} 3842 3843void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs, 3844 LiftoffRegister rhs) { 3845 liftoff::F64x2Compare(this, dst, lhs, rhs, eq); 3846} 3847 3848void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, 3849 LiftoffRegister rhs) { 3850 liftoff::F64x2Compare(this, dst, lhs, rhs, ne); 3851} 3852 3853void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs, 3854 LiftoffRegister rhs) { 3855 liftoff::F64x2Compare(this, dst, lhs, rhs, lt); 3856} 3857 3858void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs, 3859 LiftoffRegister rhs) { 3860 liftoff::F64x2Compare(this, dst, lhs, rhs, le); 3861} 3862 3863void LiftoffAssembler::emit_s128_const(LiftoffRegister dst, 3864 const uint8_t imms[16]) { 3865 uint64_t vals[2]; 3866 memcpy(vals, imms, sizeof(vals)); 3867 vmov(dst.low_fp(), base::Double(vals[0])); 3868 vmov(dst.high_fp(), base::Double(vals[1])); 3869} 3870 3871void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) { 3872 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 3873} 3874 3875void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs, 3876 LiftoffRegister rhs) { 3877 vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3878 liftoff::GetSimd128Register(rhs)); 3879} 3880 3881void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs, 3882 LiftoffRegister rhs) { 3883 vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3884 liftoff::GetSimd128Register(rhs)); 3885} 3886 3887void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs, 3888 LiftoffRegister rhs) { 3889 veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 3890 liftoff::GetSimd128Register(rhs)); 3891} 3892 3893void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, 3894 LiftoffRegister src1, 3895 LiftoffRegister src2, 3896 LiftoffRegister mask) { 3897 if (dst != mask) { 3898 vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask)); 3899 } 3900 vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), 3901 liftoff::GetSimd128Register(src2)); 3902} 3903 3904void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, 3905 LiftoffRegister src) { 3906 vcvt_s32_f32(liftoff::GetSimd128Register(dst), 3907 liftoff::GetSimd128Register(src)); 3908} 3909 3910void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, 3911 LiftoffRegister src) { 3912 vcvt_u32_f32(liftoff::GetSimd128Register(dst), 3913 liftoff::GetSimd128Register(src)); 3914} 3915 3916void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, 3917 LiftoffRegister src) { 3918 vcvt_f32_s32(liftoff::GetSimd128Register(dst), 3919 liftoff::GetSimd128Register(src)); 3920} 3921 3922void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, 3923 LiftoffRegister src) { 3924 vcvt_f32_u32(liftoff::GetSimd128Register(dst), 3925 liftoff::GetSimd128Register(src)); 3926} 3927 3928void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst, 3929 LiftoffRegister src) { 3930 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code()); 3931 vcvt_f32_f64(dst_d.low(), src.low_fp()); 3932 vcvt_f32_f64(dst_d.high(), src.high_fp()); 3933 vmov(dst.high_fp(), 0); 3934} 3935 3936void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, 3937 LiftoffRegister lhs, 3938 LiftoffRegister rhs) { 3939 liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs); 3940} 3941 3942void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst, 3943 LiftoffRegister lhs, 3944 LiftoffRegister rhs) { 3945 liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs); 3946} 3947 3948void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst, 3949 LiftoffRegister lhs, 3950 LiftoffRegister rhs) { 3951 liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs); 3952} 3953 3954void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst, 3955 LiftoffRegister lhs, 3956 LiftoffRegister rhs) { 3957 liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs); 3958} 3959 3960void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst, 3961 LiftoffRegister src) { 3962 vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp()); 3963} 3964 3965void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst, 3966 LiftoffRegister src) { 3967 vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp()); 3968} 3969 3970void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst, 3971 LiftoffRegister src) { 3972 vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp()); 3973} 3974 3975void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst, 3976 LiftoffRegister src) { 3977 vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp()); 3978} 3979 3980void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst, 3981 LiftoffRegister src) { 3982 vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp()); 3983} 3984 3985void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst, 3986 LiftoffRegister src) { 3987 vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp()); 3988} 3989 3990void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst, 3991 LiftoffRegister src) { 3992 vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp()); 3993} 3994 3995void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst, 3996 LiftoffRegister src) { 3997 vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp()); 3998} 3999 4000void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst, 4001 LiftoffRegister src) { 4002 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code()); 4003 vcvt_s32_f64(dst_d.low(), src.low_fp()); 4004 vcvt_s32_f64(dst_d.high(), src.high_fp()); 4005 vmov(dst.high_fp(), 0); 4006} 4007 4008void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst, 4009 LiftoffRegister src) { 4010 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code()); 4011 vcvt_u32_f64(dst_d.low(), src.low_fp()); 4012 vcvt_u32_f64(dst_d.high(), src.high_fp()); 4013 vmov(dst.high_fp(), 0); 4014} 4015 4016void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst, 4017 LiftoffRegister lhs, 4018 LiftoffRegister rhs) { 4019 vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), 4020 liftoff::GetSimd128Register(rhs)); 4021} 4022 4023void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst, 4024 LiftoffRegister lhs, 4025 LiftoffRegister rhs) { 4026 vrhadd(NeonU8, liftoff::GetSimd128Register(dst), 4027 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 4028} 4029 4030void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst, 4031 LiftoffRegister lhs, 4032 LiftoffRegister rhs) { 4033 vrhadd(NeonU16, liftoff::GetSimd128Register(dst), 4034 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); 4035} 4036 4037void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst, 4038 LiftoffRegister src) { 4039 vabs(Neon8, liftoff::GetSimd128Register(dst), 4040 liftoff::GetSimd128Register(src)); 4041} 4042 4043void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst, 4044 LiftoffRegister src) { 4045 vabs(Neon16, liftoff::GetSimd128Register(dst), 4046 liftoff::GetSimd128Register(src)); 4047} 4048 4049void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst, 4050 LiftoffRegister src) { 4051 vabs(Neon32, liftoff::GetSimd128Register(dst), 4052 liftoff::GetSimd128Register(src)); 4053} 4054 4055void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst, 4056 LiftoffRegister src) { 4057 I64x2Abs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); 4058} 4059 4060void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { 4061 ldr(limit_address, MemOperand(limit_address)); 4062 cmp(sp, limit_address); 4063 b(ool_code, ls); 4064} 4065 4066void LiftoffAssembler::CallTrapCallbackForTesting() { 4067 PrepareCallCFunction(0, 0); 4068 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); 4069} 4070 4071void LiftoffAssembler::AssertUnreachable(AbortReason reason) { 4072 // Asserts unreachable within the wasm code. 4073 TurboAssembler::AssertUnreachable(reason); 4074} 4075 4076void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { 4077 RegList core_regs = regs.GetGpList(); 4078 if (!core_regs.is_empty()) { 4079 stm(db_w, sp, core_regs); 4080 } 4081 LiftoffRegList fp_regs = regs & kFpCacheRegList; 4082 while (!fp_regs.is_empty()) { 4083 LiftoffRegister reg = fp_regs.GetFirstRegSet(); 4084 DoubleRegister first = reg.fp(); 4085 DoubleRegister last = first; 4086 fp_regs.clear(reg); 4087 while (!fp_regs.is_empty()) { 4088 LiftoffRegister reg = fp_regs.GetFirstRegSet(); 4089 int code = reg.fp().code(); 4090 // vstm can not push more than 16 registers. We have to make sure the 4091 // condition is met. 4092 if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break; 4093 last = reg.fp(); 4094 fp_regs.clear(reg); 4095 } 4096 vstm(db_w, sp, first, last); 4097 } 4098} 4099 4100void LiftoffAssembler::PopRegisters(LiftoffRegList regs) { 4101 LiftoffRegList fp_regs = regs & kFpCacheRegList; 4102 while (!fp_regs.is_empty()) { 4103 LiftoffRegister reg = fp_regs.GetLastRegSet(); 4104 DoubleRegister last = reg.fp(); 4105 DoubleRegister first = last; 4106 fp_regs.clear(reg); 4107 while (!fp_regs.is_empty()) { 4108 LiftoffRegister reg = fp_regs.GetLastRegSet(); 4109 int code = reg.fp().code(); 4110 if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break; 4111 first = reg.fp(); 4112 fp_regs.clear(reg); 4113 } 4114 vldm(ia_w, sp, first, last); 4115 } 4116 RegList core_regs = regs.GetGpList(); 4117 if (!core_regs.is_empty()) { 4118 ldm(ia_w, sp, core_regs); 4119 } 4120} 4121 4122void LiftoffAssembler::RecordSpillsInSafepoint( 4123 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills, 4124 LiftoffRegList ref_spills, int spill_offset) { 4125 int spill_space_size = 0; 4126 while (!all_spills.is_empty()) { 4127 LiftoffRegister reg = all_spills.GetLastRegSet(); 4128 if (ref_spills.has(reg)) { 4129 safepoint.DefineTaggedStackSlot(spill_offset); 4130 } 4131 all_spills.clear(reg); 4132 ++spill_offset; 4133 spill_space_size += kSystemPointerSize; 4134 } 4135 // Record the number of additional spill slots. 4136 RecordOolSpillSpaceSize(spill_space_size); 4137} 4138 4139void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) { 4140 Drop(num_stack_slots); 4141 Ret(); 4142} 4143 4144void LiftoffAssembler::CallC(const ValueKindSig* sig, 4145 const LiftoffRegister* args, 4146 const LiftoffRegister* rets, 4147 ValueKind out_argument_kind, int stack_bytes, 4148 ExternalReference ext_ref) { 4149 // Arguments are passed by pushing them all to the stack and then passing 4150 // a pointer to them. 4151 DCHECK(IsAligned(stack_bytes, kSystemPointerSize)); 4152 // Reserve space in the stack. 4153 AllocateStackSpace(stack_bytes); 4154 4155 int arg_bytes = 0; 4156 for (ValueKind param_kind : sig->parameters()) { 4157 switch (param_kind) { 4158 case kI32: 4159 str(args->gp(), MemOperand(sp, arg_bytes)); 4160 break; 4161 case kI64: 4162 str(args->low_gp(), MemOperand(sp, arg_bytes)); 4163 str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize)); 4164 break; 4165 case kF32: 4166 vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes)); 4167 break; 4168 case kF64: 4169 vstr(args->fp(), MemOperand(sp, arg_bytes)); 4170 break; 4171 case kS128: 4172 vstr(args->low_fp(), MemOperand(sp, arg_bytes)); 4173 vstr(args->high_fp(), 4174 MemOperand(sp, arg_bytes + 2 * kSystemPointerSize)); 4175 break; 4176 default: 4177 UNREACHABLE(); 4178 } 4179 args++; 4180 arg_bytes += value_kind_size(param_kind); 4181 } 4182 DCHECK_LE(arg_bytes, stack_bytes); 4183 4184 // Pass a pointer to the buffer with the arguments to the C function. 4185 mov(r0, sp); 4186 4187 // Now call the C function. 4188 constexpr int kNumCCallArgs = 1; 4189 PrepareCallCFunction(kNumCCallArgs); 4190 CallCFunction(ext_ref, kNumCCallArgs); 4191 4192 // Move return value to the right register. 4193 const LiftoffRegister* result_reg = rets; 4194 if (sig->return_count() > 0) { 4195 DCHECK_EQ(1, sig->return_count()); 4196 constexpr Register kReturnReg = r0; 4197 if (kReturnReg != rets->gp()) { 4198 Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0)); 4199 } 4200 result_reg++; 4201 } 4202 4203 // Load potential output value from the buffer on the stack. 4204 if (out_argument_kind != kVoid) { 4205 switch (out_argument_kind) { 4206 case kI32: 4207 ldr(result_reg->gp(), MemOperand(sp)); 4208 break; 4209 case kI64: 4210 ldr(result_reg->low_gp(), MemOperand(sp)); 4211 ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize)); 4212 break; 4213 case kF32: 4214 vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp)); 4215 break; 4216 case kF64: 4217 vldr(result_reg->fp(), MemOperand(sp)); 4218 break; 4219 case kS128: 4220 vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2), 4221 NeonMemOperand(sp)); 4222 break; 4223 default: 4224 UNREACHABLE(); 4225 } 4226 } 4227 add(sp, sp, Operand(stack_bytes)); 4228} 4229 4230void LiftoffAssembler::CallNativeWasmCode(Address addr) { 4231 Call(addr, RelocInfo::WASM_CALL); 4232} 4233 4234void LiftoffAssembler::TailCallNativeWasmCode(Address addr) { 4235 Jump(addr, RelocInfo::WASM_CALL); 4236} 4237 4238void LiftoffAssembler::CallIndirect(const ValueKindSig* sig, 4239 compiler::CallDescriptor* call_descriptor, 4240 Register target) { 4241 DCHECK(target != no_reg); 4242 Call(target); 4243} 4244 4245void LiftoffAssembler::TailCallIndirect(Register target) { 4246 DCHECK(target != no_reg); 4247 Jump(target); 4248} 4249 4250void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { 4251 // A direct call to a wasm runtime stub defined in this module. 4252 // Just encode the stub index. This will be patched at relocation. 4253 Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL); 4254} 4255 4256void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { 4257 AllocateStackSpace(size); 4258 mov(addr, sp); 4259} 4260 4261void LiftoffAssembler::DeallocateStackSlot(uint32_t size) { 4262 add(sp, sp, Operand(size)); 4263} 4264 4265void LiftoffAssembler::MaybeOSR() {} 4266 4267void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src, 4268 ValueKind kind) { 4269 if (kind == kF32) { 4270 FloatRegister src_f = liftoff::GetFloatRegister(src); 4271 VFPCompareAndSetFlags(src_f, src_f); 4272 } else { 4273 DCHECK_EQ(kind, kF64); 4274 VFPCompareAndSetFlags(src, src); 4275 } 4276 4277 // Store a non-zero value if src is NaN. 4278 str(dst, MemOperand(dst), ne); // x != x iff isnan(x) 4279} 4280 4281void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src, 4282 Register tmp_gp, 4283 LiftoffRegister tmp_s128, 4284 ValueKind lane_kind) { 4285 QwNeonRegister src_q = liftoff::GetSimd128Register(src); 4286 QwNeonRegister tmp_q = liftoff::GetSimd128Register(tmp_s128); 4287 if (lane_kind == kF32) { 4288 vpadd(tmp_q.low(), src_q.low(), src_q.high()); 4289 LowDwVfpRegister tmp_d = 4290 LowDwVfpRegister::from_code(tmp_s128.low_fp().code()); 4291 vadd(tmp_d.low(), tmp_d.low(), tmp_d.high()); 4292 } else { 4293 DCHECK_EQ(lane_kind, kF64); 4294 vadd(tmp_q.low(), src_q.low(), src_q.high()); 4295 } 4296 emit_set_if_nan(dst, tmp_q.low(), lane_kind); 4297} 4298 4299void LiftoffStackSlots::Construct(int param_slots) { 4300 DCHECK_LT(0, slots_.size()); 4301 SortInPushOrder(); 4302 int last_stack_slot = param_slots; 4303 for (auto& slot : slots_) { 4304 const int stack_slot = slot.dst_slot_; 4305 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize; 4306 DCHECK_LT(0, stack_decrement); 4307 last_stack_slot = stack_slot; 4308 const LiftoffAssembler::VarState& src = slot.src_; 4309 switch (src.loc()) { 4310 case LiftoffAssembler::VarState::kStack: { 4311 switch (src.kind()) { 4312 // i32 and i64 can be treated as similar cases, i64 being previously 4313 // split into two i32 registers 4314 case kI32: 4315 case kI64: 4316 case kF32: 4317 case kRef: 4318 case kOptRef: { 4319 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize); 4320 UseScratchRegisterScope temps(asm_); 4321 Register scratch = temps.Acquire(); 4322 asm_->ldr(scratch, 4323 liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_)); 4324 asm_->Push(scratch); 4325 } break; 4326 case kF64: { 4327 asm_->AllocateStackSpace(stack_decrement - kDoubleSize); 4328 UseScratchRegisterScope temps(asm_); 4329 DwVfpRegister scratch = temps.AcquireD(); 4330 asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_)); 4331 asm_->vpush(scratch); 4332 } break; 4333 case kS128: { 4334 asm_->AllocateStackSpace(stack_decrement - kSimd128Size); 4335 MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_); 4336 UseScratchRegisterScope temps(asm_); 4337 Register addr = liftoff::CalculateActualAddress( 4338 asm_, &temps, mem_op.rn(), no_reg, mem_op.offset()); 4339 QwNeonRegister scratch = temps.AcquireQ(); 4340 asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr)); 4341 asm_->vpush(scratch); 4342 break; 4343 } 4344 default: 4345 UNREACHABLE(); 4346 } 4347 break; 4348 } 4349 case LiftoffAssembler::VarState::kRegister: { 4350 int pushed_bytes = SlotSizeInBytes(slot); 4351 asm_->AllocateStackSpace(stack_decrement - pushed_bytes); 4352 switch (src.kind()) { 4353 case kI64: { 4354 LiftoffRegister reg = 4355 slot.half_ == kLowWord ? src.reg().low() : src.reg().high(); 4356 asm_->push(reg.gp()); 4357 } break; 4358 case kI32: 4359 case kRef: 4360 case kOptRef: 4361 asm_->push(src.reg().gp()); 4362 break; 4363 case kF32: 4364 asm_->vpush(liftoff::GetFloatRegister(src.reg().fp())); 4365 break; 4366 case kF64: 4367 asm_->vpush(src.reg().fp()); 4368 break; 4369 case kS128: 4370 asm_->vpush(liftoff::GetSimd128Register(src.reg())); 4371 break; 4372 default: 4373 UNREACHABLE(); 4374 } 4375 break; 4376 } 4377 case LiftoffAssembler::VarState::kIntConst: { 4378 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize); 4379 DCHECK(src.kind() == kI32 || src.kind() == kI64); 4380 UseScratchRegisterScope temps(asm_); 4381 Register scratch = temps.Acquire(); 4382 // The high word is the sign extension of the low word. 4383 asm_->mov(scratch, 4384 Operand(slot.half_ == kLowWord ? src.i32_const() 4385 : src.i32_const() >> 31)); 4386 asm_->push(scratch); 4387 break; 4388 } 4389 } 4390 } 4391} 4392 4393} // namespace wasm 4394} // namespace internal 4395} // namespace v8 4396 4397#endif // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ 4398