1// Copyright (c) 1994-2006 Sun Microsystems Inc. 2// All Rights Reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions 6// are met: 7// 8// - Redistributions of source code must retain the above copyright notice, 9// this list of conditions and the following disclaimer. 10// 11// - Redistribution in binary form must reproduce the above copyright 12// notice, this list of conditions and the following disclaimer in the 13// documentation and/or other materials provided with the 14// distribution. 15// 16// - Neither the name of Sun Microsystems or the names of contributors may 17// be used to endorse or promote products derived from this software without 18// specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31// OF THE POSSIBILITY OF SUCH DAMAGE. 32 33// The original source code covered by the above license above has been 34// modified significantly by Google Inc. 35// Copyright 2012 the V8 project authors. All rights reserved. 36 37#include "src/codegen/arm/assembler-arm.h" 38 39#if V8_TARGET_ARCH_ARM 40 41#include "src/base/bits.h" 42#include "src/base/cpu.h" 43#include "src/base/overflowing-math.h" 44#include "src/codegen/arm/assembler-arm-inl.h" 45#include "src/codegen/assembler-inl.h" 46#include "src/codegen/machine-type.h" 47#include "src/codegen/macro-assembler.h" 48#include "src/codegen/string-constants.h" 49#include "src/deoptimizer/deoptimizer.h" 50#include "src/objects/objects-inl.h" 51 52namespace v8 { 53namespace internal { 54 55static const unsigned kArmv6 = 0u; 56static const unsigned kArmv7 = kArmv6 | (1u << ARMv7); 57static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV); 58static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8); 59 60static unsigned CpuFeaturesFromCommandLine() { 61 unsigned result; 62 if (strcmp(FLAG_arm_arch, "armv8") == 0) { 63 result = kArmv8; 64 } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) { 65 result = kArmv7WithSudiv; 66 } else if (strcmp(FLAG_arm_arch, "armv7") == 0) { 67 result = kArmv7; 68 } else if (strcmp(FLAG_arm_arch, "armv6") == 0) { 69 result = kArmv6; 70 } else { 71 fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n", 72 FLAG_arm_arch); 73 fprintf(stderr, 74 "Supported values are: armv8\n" 75 " armv7+sudiv\n" 76 " armv7\n" 77 " armv6\n"); 78 FATAL("arm-arch"); 79 } 80 81 // If any of the old (deprecated) flags are specified, print a warning, but 82 // otherwise try to respect them for now. 83 // TODO(jbramley): When all the old bots have been updated, remove this. 84 if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value || 85 FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value || 86 FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) { 87 // As an approximation of the old behaviour, set the default values from the 88 // arm_arch setting, then apply the flags over the top. 89 bool enable_armv7 = (result & (1u << ARMv7)) != 0; 90 bool enable_vfp3 = (result & (1u << ARMv7)) != 0; 91 bool enable_32dregs = (result & (1u << ARMv7)) != 0; 92 bool enable_neon = (result & (1u << ARMv7)) != 0; 93 bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0; 94 bool enable_armv8 = (result & (1u << ARMv8)) != 0; 95 if (FLAG_enable_armv7.has_value) { 96 fprintf(stderr, 97 "Warning: --enable_armv7 is deprecated. " 98 "Use --arm_arch instead.\n"); 99 enable_armv7 = FLAG_enable_armv7.value; 100 } 101 if (FLAG_enable_vfp3.has_value) { 102 fprintf(stderr, 103 "Warning: --enable_vfp3 is deprecated. " 104 "Use --arm_arch instead.\n"); 105 enable_vfp3 = FLAG_enable_vfp3.value; 106 } 107 if (FLAG_enable_32dregs.has_value) { 108 fprintf(stderr, 109 "Warning: --enable_32dregs is deprecated. " 110 "Use --arm_arch instead.\n"); 111 enable_32dregs = FLAG_enable_32dregs.value; 112 } 113 if (FLAG_enable_neon.has_value) { 114 fprintf(stderr, 115 "Warning: --enable_neon is deprecated. " 116 "Use --arm_arch instead.\n"); 117 enable_neon = FLAG_enable_neon.value; 118 } 119 if (FLAG_enable_sudiv.has_value) { 120 fprintf(stderr, 121 "Warning: --enable_sudiv is deprecated. " 122 "Use --arm_arch instead.\n"); 123 enable_sudiv = FLAG_enable_sudiv.value; 124 } 125 if (FLAG_enable_armv8.has_value) { 126 fprintf(stderr, 127 "Warning: --enable_armv8 is deprecated. " 128 "Use --arm_arch instead.\n"); 129 enable_armv8 = FLAG_enable_armv8.value; 130 } 131 // Emulate the old implications. 132 if (enable_armv8) { 133 enable_vfp3 = true; 134 enable_neon = true; 135 enable_32dregs = true; 136 enable_sudiv = true; 137 } 138 // Select the best available configuration. 139 if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) { 140 if (enable_sudiv) { 141 if (enable_armv8) { 142 result = kArmv8; 143 } else { 144 result = kArmv7WithSudiv; 145 } 146 } else { 147 result = kArmv7; 148 } 149 } else { 150 result = kArmv6; 151 } 152 } 153 return result; 154} 155 156// Get the CPU features enabled by the build. 157// For cross compilation the preprocessor symbols such as 158// CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to 159// enable ARMv7 and VFPv3 instructions when building the snapshot. However, 160// these flags should be consistent with a supported ARM configuration: 161// "armv6": ARMv6 + VFPv2 162// "armv7": ARMv7 + VFPv3-D32 + NEON 163// "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV 164// "armv8": ARMv8 (+ all of the above) 165static constexpr unsigned CpuFeaturesFromCompiler() { 166// TODO(jbramley): Once the build flags are simplified, these tests should 167// also be simplified. 168 169// Check *architectural* implications. 170#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS) 171#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS" 172#endif 173#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV) 174#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV" 175#endif 176#if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS) 177// V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly, 178// VFPv3 isn't available before ARMv7. 179#error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS" 180#endif 181#if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS) 182#error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS" 183#endif 184 185// Find compiler-implied features. 186#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && \ 187 defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \ 188 defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS) 189 return kArmv8; 190#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \ 191 defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS) 192 return kArmv7WithSudiv; 193#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \ 194 defined(CAN_USE_VFP3_INSTRUCTIONS) 195 return kArmv7; 196#else 197 return kArmv6; 198#endif 199} 200 201bool CpuFeatures::SupportsWasmSimd128() { return IsSupported(NEON); } 202 203void CpuFeatures::ProbeImpl(bool cross_compile) { 204 dcache_line_size_ = 64; 205 206 unsigned command_line = CpuFeaturesFromCommandLine(); 207 // Only use statically determined features for cross compile (snapshot). 208 if (cross_compile) { 209 supported_ |= command_line & CpuFeaturesFromCompiler(); 210 return; 211 } 212 213#ifndef __arm__ 214 // For the simulator build, use whatever the flags specify. 215 supported_ |= command_line; 216 217#else // __arm__ 218 // Probe for additional features at runtime. 219 base::CPU cpu; 220 // Runtime detection is slightly fuzzy, and some inferences are necessary. 221 unsigned runtime = kArmv6; 222 // NEON and VFPv3 imply at least ARMv7-A. 223 if (cpu.has_neon() && cpu.has_vfp3_d32()) { 224 DCHECK(cpu.has_vfp3()); 225 runtime |= kArmv7; 226 if (cpu.has_idiva()) { 227 runtime |= kArmv7WithSudiv; 228 if (cpu.architecture() >= 8) { 229 runtime |= kArmv8; 230 } 231 } 232 } 233 234 // Use the best of the features found by CPU detection and those inferred from 235 // the build system. In both cases, restrict available features using the 236 // command-line. Note that the command-line flags are very permissive (kArmv8) 237 // by default. 238 supported_ |= command_line & CpuFeaturesFromCompiler(); 239 supported_ |= command_line & runtime; 240 241 // Additional tuning options. 242 243 // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines. 244 if (cpu.implementer() == base::CPU::kArm && 245 (cpu.part() == base::CPU::kArmCortexA5 || 246 cpu.part() == base::CPU::kArmCortexA9)) { 247 dcache_line_size_ = 32; 248 } 249#endif 250 251 DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7)); 252 DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV)); 253 254 // Set a static value on whether Simd is supported. 255 // This variable is only used for certain archs to query SupportWasmSimd128() 256 // at runtime in builtins using an extern ref. Other callers should use 257 // CpuFeatures::SupportWasmSimd128(). 258 CpuFeatures::supports_wasm_simd_128_ = CpuFeatures::SupportsWasmSimd128(); 259} 260 261void CpuFeatures::PrintTarget() { 262 const char* arm_arch = nullptr; 263 const char* arm_target_type = ""; 264 const char* arm_no_probe = ""; 265 const char* arm_fpu = ""; 266 const char* arm_thumb = ""; 267 const char* arm_float_abi = nullptr; 268 269#if !defined __arm__ 270 arm_target_type = " simulator"; 271#endif 272 273#if defined ARM_TEST_NO_FEATURE_PROBE 274 arm_no_probe = " noprobe"; 275#endif 276 277#if defined CAN_USE_ARMV8_INSTRUCTIONS 278 arm_arch = "arm v8"; 279#elif defined CAN_USE_ARMV7_INSTRUCTIONS 280 arm_arch = "arm v7"; 281#else 282 arm_arch = "arm v6"; 283#endif 284 285#if defined CAN_USE_NEON 286 arm_fpu = " neon"; 287#elif defined CAN_USE_VFP3_INSTRUCTIONS 288#if defined CAN_USE_VFP32DREGS 289 arm_fpu = " vfp3"; 290#else 291 arm_fpu = " vfp3-d16"; 292#endif 293#else 294 arm_fpu = " vfp2"; 295#endif 296 297#ifdef __arm__ 298 arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp"; 299#elif USE_EABI_HARDFLOAT 300 arm_float_abi = "hard"; 301#else 302 arm_float_abi = "softfp"; 303#endif 304 305#if defined __arm__ && (defined __thumb__) || (defined __thumb2__) 306 arm_thumb = " thumb"; 307#endif 308 309 printf("target%s%s %s%s%s %s\n", arm_target_type, arm_no_probe, arm_arch, 310 arm_fpu, arm_thumb, arm_float_abi); 311} 312 313void CpuFeatures::PrintFeatures() { 314 printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d", 315 CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7), 316 CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS), 317 CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV)); 318#ifdef __arm__ 319 bool eabi_hardfloat = base::OS::ArmUsingHardFloat(); 320#elif USE_EABI_HARDFLOAT 321 bool eabi_hardfloat = true; 322#else 323 bool eabi_hardfloat = false; 324#endif 325 printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat); 326} 327 328// ----------------------------------------------------------------------------- 329// Implementation of RelocInfo 330 331// static 332const int RelocInfo::kApplyMask = 333 RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET); 334 335bool RelocInfo::IsCodedSpecially() { 336 // The deserializer needs to know whether a pointer is specially coded. Being 337 // specially coded on ARM means that it is a movw/movt instruction. We don't 338 // generate those for relocatable pointers. 339 return false; 340} 341 342bool RelocInfo::IsInConstantPool() { 343 return Assembler::is_constant_pool_load(pc_); 344} 345 346uint32_t RelocInfo::wasm_call_tag() const { 347 DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL); 348 return static_cast<uint32_t>( 349 Assembler::target_address_at(pc_, constant_pool_)); 350} 351 352// ----------------------------------------------------------------------------- 353// Implementation of Operand and MemOperand 354// See assembler-arm-inl.h for inlined constructors 355 356Operand::Operand(Handle<HeapObject> handle) { 357 rm_ = no_reg; 358 value_.immediate = static_cast<intptr_t>(handle.address()); 359 rmode_ = RelocInfo::FULL_EMBEDDED_OBJECT; 360} 361 362Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) { 363 DCHECK(is_uint5(shift_imm)); 364 365 rm_ = rm; 366 rs_ = no_reg; 367 shift_op_ = shift_op; 368 shift_imm_ = shift_imm & 31; 369 370 if ((shift_op == ROR) && (shift_imm == 0)) { 371 // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode 372 // RRX as ROR #0 (See below). 373 shift_op = LSL; 374 } else if (shift_op == RRX) { 375 // encoded as ROR with shift_imm == 0 376 DCHECK_EQ(shift_imm, 0); 377 shift_op_ = ROR; 378 shift_imm_ = 0; 379 } 380} 381 382Operand::Operand(Register rm, ShiftOp shift_op, Register rs) { 383 DCHECK(shift_op != RRX); 384 rm_ = rm; 385 rs_ = no_reg; 386 shift_op_ = shift_op; 387 rs_ = rs; 388} 389 390Operand Operand::EmbeddedNumber(double value) { 391 int32_t smi; 392 if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi)); 393 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT); 394 result.is_heap_object_request_ = true; 395 result.value_.heap_object_request = HeapObjectRequest(value); 396 return result; 397} 398 399Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) { 400 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT); 401 result.is_heap_object_request_ = true; 402 result.value_.heap_object_request = HeapObjectRequest(str); 403 return result; 404} 405 406MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am) 407 : rn_(rn), rm_(no_reg), offset_(offset), am_(am) { 408 // Accesses below the stack pointer are not safe, and are prohibited by the 409 // ABI. We can check obvious violations here. 410 if (rn == sp) { 411 if (am == Offset) DCHECK_LE(0, offset); 412 if (am == NegOffset) DCHECK_GE(0, offset); 413 } 414} 415 416MemOperand::MemOperand(Register rn, Register rm, AddrMode am) 417 : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {} 418 419MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op, 420 int shift_imm, AddrMode am) 421 : rn_(rn), 422 rm_(rm), 423 shift_op_(shift_op), 424 shift_imm_(shift_imm & 31), 425 am_(am) { 426 DCHECK(is_uint5(shift_imm)); 427} 428 429NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) 430 : rn_(rn), rm_(am == Offset ? pc : sp) { 431 DCHECK((am == Offset) || (am == PostIndex)); 432 SetAlignment(align); 433} 434 435NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) 436 : rn_(rn), rm_(rm) { 437 SetAlignment(align); 438} 439 440void NeonMemOperand::SetAlignment(int align) { 441 switch (align) { 442 case 0: 443 align_ = 0; 444 break; 445 case 64: 446 align_ = 1; 447 break; 448 case 128: 449 align_ = 2; 450 break; 451 case 256: 452 align_ = 3; 453 break; 454 default: 455 UNREACHABLE(); 456 } 457} 458 459void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) { 460 DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty()); 461 for (auto& request : heap_object_requests_) { 462 Handle<HeapObject> object; 463 switch (request.kind()) { 464 case HeapObjectRequest::kHeapNumber: 465 object = isolate->factory()->NewHeapNumber<AllocationType::kOld>( 466 request.heap_number()); 467 break; 468 case HeapObjectRequest::kStringConstant: { 469 const StringConstantBase* str = request.string(); 470 CHECK_NOT_NULL(str); 471 object = str->AllocateStringConstant(isolate); 472 break; 473 } 474 } 475 Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset(); 476 Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) = 477 object.address(); 478 } 479} 480 481// ----------------------------------------------------------------------------- 482// Specific instructions, constants, and masks. 483 484// str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r)) 485// register r is not encoded. 486const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16; 487// ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r)) 488// register r is not encoded. 489const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16; 490// ldr rd, [pc, #offset] 491const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16; 492const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16; 493// Pc-relative call or jump to a signed imm24 offset. 494// bl pc + #offset 495// b pc + #offset 496const Instr kBOrBlPCImmedMask = 0xE * B24; 497const Instr kBOrBlPCImmedPattern = 0xA * B24; 498// vldr dd, [pc, #offset] 499const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8; 500const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8; 501// blxcc rm 502const Instr kBlxRegMask = 503 15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4; 504const Instr kBlxRegPattern = B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX; 505const Instr kBlxIp = al | kBlxRegPattern | ip.code(); 506const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16; 507const Instr kMovMvnPattern = 0xD * B21; 508const Instr kMovMvnFlip = B22; 509const Instr kMovLeaveCCMask = 0xDFF * B16; 510const Instr kMovLeaveCCPattern = 0x1A0 * B16; 511const Instr kMovwPattern = 0x30 * B20; 512const Instr kMovtPattern = 0x34 * B20; 513const Instr kMovwLeaveCCFlip = 0x5 * B21; 514const Instr kMovImmedMask = 0x7F * B21; 515const Instr kMovImmedPattern = 0x1D * B21; 516const Instr kOrrImmedMask = 0x7F * B21; 517const Instr kOrrImmedPattern = 0x1C * B21; 518const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12; 519const Instr kCmpCmnPattern = 0x15 * B20; 520const Instr kCmpCmnFlip = B21; 521const Instr kAddSubFlip = 0x6 * B21; 522const Instr kAndBicFlip = 0xE * B21; 523 524// A mask for the Rd register for push, pop, ldr, str instructions. 525const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16; 526const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16; 527const Instr kLdrRegFpNegOffsetPattern = 528 al | B26 | L | NegOffset | fp.code() * B16; 529const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16; 530const Instr kLdrStrInstrTypeMask = 0xFFFF0000; 531 532Assembler::Assembler(const AssemblerOptions& options, 533 std::unique_ptr<AssemblerBuffer> buffer) 534 : AssemblerBase(options, std::move(buffer)), 535 pending_32_bit_constants_(), 536 scratch_register_list_({ip}) { 537 reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_); 538 constant_pool_deadline_ = kMaxInt; 539 const_pool_blocked_nesting_ = 0; 540 no_const_pool_before_ = 0; 541 first_const_pool_32_use_ = -1; 542 last_bound_pos_ = 0; 543 if (CpuFeatures::IsSupported(VFP32DREGS)) { 544 // Register objects tend to be abstracted and survive between scopes, so 545 // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make 546 // its use consistent with other features, we always enable it if we can. 547 EnableCpuFeature(VFP32DREGS); 548 // Make sure we pick two D registers which alias a Q register. This way, we 549 // can use a Q as a scratch if NEON is supported. 550 scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList(); 551 } else { 552 // When VFP32DREGS is not supported, d15 become allocatable. Therefore we 553 // cannot use it as a scratch. 554 scratch_vfp_register_list_ = d14.ToVfpRegList(); 555 } 556} 557 558Assembler::~Assembler() { 559 DCHECK_EQ(const_pool_blocked_nesting_, 0); 560 DCHECK_EQ(first_const_pool_32_use_, -1); 561} 562 563void Assembler::GetCode(Isolate* isolate, CodeDesc* desc, 564 SafepointTableBuilder* safepoint_table_builder, 565 int handler_table_offset) { 566 // As a crutch to avoid having to add manual Align calls wherever we use a 567 // raw workflow to create Code objects (mostly in tests), add another Align 568 // call here. It does no harm - the end of the Code object is aligned to the 569 // (larger) kCodeAlignment anyways. 570 // TODO(jgruber): Consider moving responsibility for proper alignment to 571 // metadata table builders (safepoint, handler, constant pool, code 572 // comments). 573 DataAlign(Code::kMetadataAlignment); 574 575 // Emit constant pool if necessary. 576 CheckConstPool(true, false); 577 DCHECK(pending_32_bit_constants_.empty()); 578 579 int code_comments_size = WriteCodeComments(); 580 581 AllocateAndInstallRequestedHeapObjects(isolate); 582 583 // Set up code descriptor. 584 // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to 585 // this point to make CodeDesc initialization less fiddly. 586 587 static constexpr int kConstantPoolSize = 0; 588 const int instruction_size = pc_offset(); 589 const int code_comments_offset = instruction_size - code_comments_size; 590 const int constant_pool_offset = code_comments_offset - kConstantPoolSize; 591 const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable) 592 ? constant_pool_offset 593 : handler_table_offset; 594 const int safepoint_table_offset = 595 (safepoint_table_builder == kNoSafepointTable) 596 ? handler_table_offset2 597 : safepoint_table_builder->safepoint_table_offset(); 598 const int reloc_info_offset = 599 static_cast<int>(reloc_info_writer.pos() - buffer_->start()); 600 CodeDesc::Initialize(desc, this, safepoint_table_offset, 601 handler_table_offset2, constant_pool_offset, 602 code_comments_offset, reloc_info_offset); 603} 604 605void Assembler::Align(int m) { 606 DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m)); 607 DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0); 608 while ((pc_offset() & (m - 1)) != 0) { 609 nop(); 610 } 611} 612 613void Assembler::CodeTargetAlign() { 614 // Preferred alignment of jump targets on some ARM chips. 615 Align(8); 616} 617 618Condition Assembler::GetCondition(Instr instr) { 619 return Instruction::ConditionField(instr); 620} 621 622bool Assembler::IsLdrRegisterImmediate(Instr instr) { 623 return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20); 624} 625 626bool Assembler::IsVldrDRegisterImmediate(Instr instr) { 627 return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8); 628} 629 630int Assembler::GetLdrRegisterImmediateOffset(Instr instr) { 631 DCHECK(IsLdrRegisterImmediate(instr)); 632 bool positive = (instr & B23) == B23; 633 int offset = instr & kOff12Mask; // Zero extended offset. 634 return positive ? offset : -offset; 635} 636 637int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) { 638 DCHECK(IsVldrDRegisterImmediate(instr)); 639 bool positive = (instr & B23) == B23; 640 int offset = instr & kOff8Mask; // Zero extended offset. 641 offset <<= 2; 642 return positive ? offset : -offset; 643} 644 645Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) { 646 DCHECK(IsLdrRegisterImmediate(instr)); 647 bool positive = offset >= 0; 648 if (!positive) offset = -offset; 649 DCHECK(is_uint12(offset)); 650 // Set bit indicating whether the offset should be added. 651 instr = (instr & ~B23) | (positive ? B23 : 0); 652 // Set the actual offset. 653 return (instr & ~kOff12Mask) | offset; 654} 655 656Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) { 657 DCHECK(IsVldrDRegisterImmediate(instr)); 658 DCHECK((offset & ~3) == offset); // Must be 64-bit aligned. 659 bool positive = offset >= 0; 660 if (!positive) offset = -offset; 661 DCHECK(is_uint10(offset)); 662 // Set bit indicating whether the offset should be added. 663 instr = (instr & ~B23) | (positive ? B23 : 0); 664 // Set the actual offset. Its bottom 2 bits are zero. 665 return (instr & ~kOff8Mask) | (offset >> 2); 666} 667 668bool Assembler::IsStrRegisterImmediate(Instr instr) { 669 return (instr & (B27 | B26 | B25 | B22 | B20)) == B26; 670} 671 672Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) { 673 DCHECK(IsStrRegisterImmediate(instr)); 674 bool positive = offset >= 0; 675 if (!positive) offset = -offset; 676 DCHECK(is_uint12(offset)); 677 // Set bit indicating whether the offset should be added. 678 instr = (instr & ~B23) | (positive ? B23 : 0); 679 // Set the actual offset. 680 return (instr & ~kOff12Mask) | offset; 681} 682 683bool Assembler::IsAddRegisterImmediate(Instr instr) { 684 return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23); 685} 686 687Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) { 688 DCHECK(IsAddRegisterImmediate(instr)); 689 DCHECK_GE(offset, 0); 690 DCHECK(is_uint12(offset)); 691 // Set the offset. 692 return (instr & ~kOff12Mask) | offset; 693} 694 695Register Assembler::GetRd(Instr instr) { 696 return Register::from_code(Instruction::RdValue(instr)); 697} 698 699Register Assembler::GetRn(Instr instr) { 700 return Register::from_code(Instruction::RnValue(instr)); 701} 702 703Register Assembler::GetRm(Instr instr) { 704 return Register::from_code(Instruction::RmValue(instr)); 705} 706 707bool Assembler::IsPush(Instr instr) { 708 return ((instr & ~kRdMask) == kPushRegPattern); 709} 710 711bool Assembler::IsPop(Instr instr) { 712 return ((instr & ~kRdMask) == kPopRegPattern); 713} 714 715bool Assembler::IsStrRegFpOffset(Instr instr) { 716 return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern); 717} 718 719bool Assembler::IsLdrRegFpOffset(Instr instr) { 720 return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern); 721} 722 723bool Assembler::IsStrRegFpNegOffset(Instr instr) { 724 return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern); 725} 726 727bool Assembler::IsLdrRegFpNegOffset(Instr instr) { 728 return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern); 729} 730 731bool Assembler::IsLdrPcImmediateOffset(Instr instr) { 732 // Check the instruction is indeed a 733 // ldr<cond> <Rd>, [pc +/- offset_12]. 734 return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern; 735} 736 737bool Assembler::IsBOrBlPcImmediateOffset(Instr instr) { 738 return (instr & kBOrBlPCImmedMask) == kBOrBlPCImmedPattern; 739} 740 741bool Assembler::IsVldrDPcImmediateOffset(Instr instr) { 742 // Check the instruction is indeed a 743 // vldr<cond> <Dd>, [pc +/- offset_10]. 744 return (instr & kVldrDPCMask) == kVldrDPCPattern; 745} 746 747bool Assembler::IsBlxReg(Instr instr) { 748 // Check the instruction is indeed a 749 // blxcc <Rm> 750 return (instr & kBlxRegMask) == kBlxRegPattern; 751} 752 753bool Assembler::IsBlxIp(Instr instr) { 754 // Check the instruction is indeed a 755 // blx ip 756 return instr == kBlxIp; 757} 758 759bool Assembler::IsTstImmediate(Instr instr) { 760 return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | TST | S); 761} 762 763bool Assembler::IsCmpRegister(Instr instr) { 764 return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) == 765 (CMP | S); 766} 767 768bool Assembler::IsCmpImmediate(Instr instr) { 769 return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | CMP | S); 770} 771 772Register Assembler::GetCmpImmediateRegister(Instr instr) { 773 DCHECK(IsCmpImmediate(instr)); 774 return GetRn(instr); 775} 776 777int Assembler::GetCmpImmediateRawImmediate(Instr instr) { 778 DCHECK(IsCmpImmediate(instr)); 779 return instr & kOff12Mask; 780} 781 782// Labels refer to positions in the (to be) generated code. 783// There are bound, linked, and unused labels. 784// 785// Bound labels refer to known positions in the already 786// generated code. pos() is the position the label refers to. 787// 788// Linked labels refer to unknown positions in the code 789// to be generated; pos() is the position of the last 790// instruction using the label. 791// 792// The linked labels form a link chain by making the branch offset 793// in the instruction steam to point to the previous branch 794// instruction using the same label. 795// 796// The link chain is terminated by a branch offset pointing to the 797// same position. 798 799int Assembler::target_at(int pos) { 800 Instr instr = instr_at(pos); 801 if (is_uint24(instr)) { 802 // Emitted link to a label, not part of a branch. 803 return instr; 804 } 805 DCHECK_EQ(5 * B25, instr & 7 * B25); // b, bl, or blx imm24 806 int imm26 = ((instr & kImm24Mask) << 8) >> 6; 807 if ((Instruction::ConditionField(instr) == kSpecialCondition) && 808 ((instr & B24) != 0)) { 809 // blx uses bit 24 to encode bit 2 of imm26 810 imm26 += 2; 811 } 812 return pos + Instruction::kPcLoadDelta + imm26; 813} 814 815void Assembler::target_at_put(int pos, int target_pos) { 816 Instr instr = instr_at(pos); 817 if (is_uint24(instr)) { 818 DCHECK(target_pos == pos || target_pos >= 0); 819 // Emitted link to a label, not part of a branch. 820 // Load the position of the label relative to the generated code object 821 // pointer in a register. 822 823 // The existing code must be a single 24-bit label chain link, followed by 824 // nops encoding the destination register. See mov_label_offset. 825 826 // Extract the destination register from the first nop instructions. 827 Register dst = 828 Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize))); 829 // In addition to the 24-bit label chain link, we expect to find one nop for 830 // ARMv7 and above, or two nops for ARMv6. See mov_label_offset. 831 DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code())); 832 if (!CpuFeatures::IsSupported(ARMv7)) { 833 DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code())); 834 } 835 836 // Here are the instructions we need to emit: 837 // For ARMv7: target24 => target16_1:target16_0 838 // movw dst, #target16_0 839 // movt dst, #target16_1 840 // For ARMv6: target24 => target8_2:target8_1:target8_0 841 // mov dst, #target8_0 842 // orr dst, dst, #target8_1 << 8 843 // orr dst, dst, #target8_2 << 16 844 845 uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag); 846 CHECK(is_uint24(target24)); 847 if (is_uint8(target24)) { 848 // If the target fits in a byte then only patch with a mov 849 // instruction. 850 PatchingAssembler patcher( 851 options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1); 852 patcher.mov(dst, Operand(target24)); 853 } else { 854 uint16_t target16_0 = target24 & kImm16Mask; 855 uint16_t target16_1 = target24 >> 16; 856 if (CpuFeatures::IsSupported(ARMv7)) { 857 // Patch with movw/movt. 858 if (target16_1 == 0) { 859 PatchingAssembler patcher( 860 options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1); 861 CpuFeatureScope scope(&patcher, ARMv7); 862 patcher.movw(dst, target16_0); 863 } else { 864 PatchingAssembler patcher( 865 options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2); 866 CpuFeatureScope scope(&patcher, ARMv7); 867 patcher.movw(dst, target16_0); 868 patcher.movt(dst, target16_1); 869 } 870 } else { 871 // Patch with a sequence of mov/orr/orr instructions. 872 uint8_t target8_0 = target16_0 & kImm8Mask; 873 uint8_t target8_1 = target16_0 >> 8; 874 uint8_t target8_2 = target16_1 & kImm8Mask; 875 if (target8_2 == 0) { 876 PatchingAssembler patcher( 877 options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2); 878 patcher.mov(dst, Operand(target8_0)); 879 patcher.orr(dst, dst, Operand(target8_1 << 8)); 880 } else { 881 PatchingAssembler patcher( 882 options(), reinterpret_cast<byte*>(buffer_start_ + pos), 3); 883 patcher.mov(dst, Operand(target8_0)); 884 patcher.orr(dst, dst, Operand(target8_1 << 8)); 885 patcher.orr(dst, dst, Operand(target8_2 << 16)); 886 } 887 } 888 } 889 return; 890 } 891 int imm26 = target_pos - (pos + Instruction::kPcLoadDelta); 892 DCHECK_EQ(5 * B25, instr & 7 * B25); // b, bl, or blx imm24 893 if (Instruction::ConditionField(instr) == kSpecialCondition) { 894 // blx uses bit 24 to encode bit 2 of imm26 895 DCHECK_EQ(0, imm26 & 1); 896 instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24; 897 } else { 898 DCHECK_EQ(0, imm26 & 3); 899 instr &= ~kImm24Mask; 900 } 901 int imm24 = imm26 >> 2; 902 CHECK(is_int24(imm24)); 903 instr_at_put(pos, instr | (imm24 & kImm24Mask)); 904} 905 906void Assembler::print(const Label* L) { 907 if (L->is_unused()) { 908 PrintF("unused label\n"); 909 } else if (L->is_bound()) { 910 PrintF("bound label to %d\n", L->pos()); 911 } else if (L->is_linked()) { 912 Label l; 913 l.link_to(L->pos()); 914 PrintF("unbound label"); 915 while (l.is_linked()) { 916 PrintF("@ %d ", l.pos()); 917 Instr instr = instr_at(l.pos()); 918 if ((instr & ~kImm24Mask) == 0) { 919 PrintF("value\n"); 920 } else { 921 DCHECK_EQ(instr & 7 * B25, 5 * B25); // b, bl, or blx 922 Condition cond = Instruction::ConditionField(instr); 923 const char* b; 924 const char* c; 925 if (cond == kSpecialCondition) { 926 b = "blx"; 927 c = ""; 928 } else { 929 if ((instr & B24) != 0) 930 b = "bl"; 931 else 932 b = "b"; 933 934 switch (cond) { 935 case eq: 936 c = "eq"; 937 break; 938 case ne: 939 c = "ne"; 940 break; 941 case hs: 942 c = "hs"; 943 break; 944 case lo: 945 c = "lo"; 946 break; 947 case mi: 948 c = "mi"; 949 break; 950 case pl: 951 c = "pl"; 952 break; 953 case vs: 954 c = "vs"; 955 break; 956 case vc: 957 c = "vc"; 958 break; 959 case hi: 960 c = "hi"; 961 break; 962 case ls: 963 c = "ls"; 964 break; 965 case ge: 966 c = "ge"; 967 break; 968 case lt: 969 c = "lt"; 970 break; 971 case gt: 972 c = "gt"; 973 break; 974 case le: 975 c = "le"; 976 break; 977 case al: 978 c = ""; 979 break; 980 default: 981 c = ""; 982 UNREACHABLE(); 983 } 984 } 985 PrintF("%s%s\n", b, c); 986 } 987 next(&l); 988 } 989 } else { 990 PrintF("label in inconsistent state (pos = %d)\n", L->pos_); 991 } 992} 993 994void Assembler::bind_to(Label* L, int pos) { 995 DCHECK(0 <= pos && pos <= pc_offset()); // must have a valid binding position 996 while (L->is_linked()) { 997 int fixup_pos = L->pos(); 998 next(L); // call next before overwriting link with target at fixup_pos 999 target_at_put(fixup_pos, pos); 1000 } 1001 L->bind_to(pos); 1002 1003 // Keep track of the last bound label so we don't eliminate any instructions 1004 // before a bound label. 1005 if (pos > last_bound_pos_) last_bound_pos_ = pos; 1006} 1007 1008void Assembler::bind(Label* L) { 1009 DCHECK(!L->is_bound()); // label can only be bound once 1010 bind_to(L, pc_offset()); 1011} 1012 1013void Assembler::next(Label* L) { 1014 DCHECK(L->is_linked()); 1015 int link = target_at(L->pos()); 1016 if (link == L->pos()) { 1017 // Branch target points to the same instruction. This is the end of the link 1018 // chain. 1019 L->Unuse(); 1020 } else { 1021 DCHECK_GE(link, 0); 1022 L->link_to(link); 1023 } 1024} 1025 1026namespace { 1027 1028// Low-level code emission routines depending on the addressing mode. 1029// If this returns true then you have to use the rotate_imm and immed_8 1030// that it returns, because it may have already changed the instruction 1031// to match them! 1032bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8, 1033 Instr* instr) { 1034 // imm32 must be unsigned. 1035 { 1036 // 32-bit immediates can be encoded as: 1037 // (8-bit value, 2*N bit left rotation) 1038 // e.g. 0xab00 can be encoded as 0xab shifted left by 8 == 2*4, i.e. 1039 // (0xab, 4) 1040 // 1041 // Check three categories which cover all possible shifter fits: 1042 // 1. 0x000000FF: The value is already 8-bit (no shifting necessary), 1043 // 2. 0x000FF000: The 8-bit value is somewhere in the middle of the 32-bit 1044 // value, and 1045 // 3. 0xF000000F: The 8-bit value is split over the beginning and end of 1046 // the 32-bit value. 1047 1048 // For 0x000000FF. 1049 if (imm32 <= 0xFF) { 1050 *rotate_imm = 0; 1051 *immed_8 = imm32; 1052 return true; 1053 } 1054 // For 0x000FF000, count trailing zeros and shift down to 0x000000FF. Note 1055 // that we have to round the trailing zeros down to the nearest multiple of 1056 // two, since we can only encode shifts of 2*N. Note also that we know that 1057 // imm32 isn't zero, since we already checked if it's less than 0xFF. 1058 int half_trailing_zeros = base::bits::CountTrailingZerosNonZero(imm32) / 2; 1059 uint32_t imm8 = imm32 >> (half_trailing_zeros * 2); 1060 if (imm8 <= 0xFF) { 1061 DCHECK_GT(half_trailing_zeros, 0); 1062 // Rotating right by trailing_zeros is equivalent to rotating left by 1063 // 32 - trailing_zeros. We return rotate_right / 2, so calculate 1064 // (32 - trailing_zeros)/2 == 16 - trailing_zeros/2. 1065 *rotate_imm = (16 - half_trailing_zeros); 1066 *immed_8 = imm8; 1067 return true; 1068 } 1069 // For 0xF000000F, rotate by 16 to get 0x000FF000 and continue as if it 1070 // were that case. 1071 uint32_t imm32_rot16 = base::bits::RotateLeft32(imm32, 16); 1072 half_trailing_zeros = 1073 base::bits::CountTrailingZerosNonZero(imm32_rot16) / 2; 1074 imm8 = imm32_rot16 >> (half_trailing_zeros * 2); 1075 if (imm8 <= 0xFF) { 1076 // We've rotated left by 2*8, so we can't have more than that many 1077 // trailing zeroes. 1078 DCHECK_LT(half_trailing_zeros, 8); 1079 // We've already rotated by 2*8, before calculating trailing_zeros/2, 1080 // so we need (32 - (16 + trailing_zeros))/2 == 8 - trailing_zeros/2. 1081 *rotate_imm = 8 - half_trailing_zeros; 1082 *immed_8 = imm8; 1083 return true; 1084 } 1085 } 1086 // If the opcode is one with a complementary version and the complementary 1087 // immediate fits, change the opcode. 1088 if (instr != nullptr) { 1089 if ((*instr & kMovMvnMask) == kMovMvnPattern) { 1090 if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) { 1091 *instr ^= kMovMvnFlip; 1092 return true; 1093 } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) { 1094 if (CpuFeatures::IsSupported(ARMv7)) { 1095 if (imm32 < 0x10000) { 1096 *instr ^= kMovwLeaveCCFlip; 1097 *instr |= Assembler::EncodeMovwImmediate(imm32); 1098 *rotate_imm = *immed_8 = 0; // Not used for movw. 1099 return true; 1100 } 1101 } 1102 } 1103 } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) { 1104 if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) { 1105 *instr ^= kCmpCmnFlip; 1106 return true; 1107 } 1108 } else { 1109 Instr alu_insn = (*instr & kALUMask); 1110 if (alu_insn == ADD || alu_insn == SUB) { 1111 if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, 1112 nullptr)) { 1113 *instr ^= kAddSubFlip; 1114 return true; 1115 } 1116 } else if (alu_insn == AND || alu_insn == BIC) { 1117 if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) { 1118 *instr ^= kAndBicFlip; 1119 return true; 1120 } 1121 } 1122 } 1123 } 1124 return false; 1125} 1126 1127// We have to use the temporary register for things that can be relocated even 1128// if they can be encoded in the ARM's 12 bits of immediate-offset instruction 1129// space. There is no guarantee that the relocated location can be similarly 1130// encoded. 1131bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) { 1132 if (RelocInfo::IsOnlyForSerializer(rmode)) { 1133 if (assembler->predictable_code_size()) return true; 1134 return assembler->options().record_reloc_info_for_serialization; 1135 } else if (RelocInfo::IsNoInfo(rmode)) { 1136 return false; 1137 } 1138 return true; 1139} 1140 1141bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) { 1142 DCHECK_NOT_NULL(assembler); 1143 if (x.MustOutputRelocInfo(assembler)) { 1144 // Prefer constant pool if data is likely to be patched. 1145 return false; 1146 } else { 1147 // Otherwise, use immediate load if movw / movt is available. 1148 return CpuFeatures::IsSupported(ARMv7); 1149 } 1150} 1151 1152} // namespace 1153 1154bool Operand::MustOutputRelocInfo(const Assembler* assembler) const { 1155 return v8::internal::MustOutputRelocInfo(rmode_, assembler); 1156} 1157 1158int Operand::InstructionsRequired(const Assembler* assembler, 1159 Instr instr) const { 1160 DCHECK_NOT_NULL(assembler); 1161 if (rm_.is_valid()) return 1; 1162 uint32_t dummy1, dummy2; 1163 if (MustOutputRelocInfo(assembler) || 1164 !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) { 1165 // The immediate operand cannot be encoded as a shifter operand, or use of 1166 // constant pool is required. First account for the instructions required 1167 // for the constant pool or immediate load 1168 int instructions; 1169 if (UseMovImmediateLoad(*this, assembler)) { 1170 DCHECK(CpuFeatures::IsSupported(ARMv7)); 1171 // A movw / movt immediate load. 1172 instructions = 2; 1173 } else { 1174 // A small constant pool load. 1175 instructions = 1; 1176 } 1177 if ((instr & ~kCondMask) != 13 * B21) { // mov, S not set 1178 // For a mov or mvn instruction which doesn't set the condition 1179 // code, the constant pool or immediate load is enough, otherwise we need 1180 // to account for the actual instruction being requested. 1181 instructions += 1; 1182 } 1183 return instructions; 1184 } else { 1185 // No use of constant pool and the immediate operand can be encoded as a 1186 // shifter operand. 1187 return 1; 1188 } 1189} 1190 1191void Assembler::Move32BitImmediate(Register rd, const Operand& x, 1192 Condition cond) { 1193 if (UseMovImmediateLoad(x, this)) { 1194 CpuFeatureScope scope(this, ARMv7); 1195 // UseMovImmediateLoad should return false when we need to output 1196 // relocation info, since we prefer the constant pool for values that 1197 // can be patched. 1198 DCHECK(!x.MustOutputRelocInfo(this)); 1199 UseScratchRegisterScope temps(this); 1200 // Re-use the destination register as a scratch if possible. 1201 Register target = rd != pc && rd != sp ? rd : temps.Acquire(); 1202 uint32_t imm32 = static_cast<uint32_t>(x.immediate()); 1203 movw(target, imm32 & 0xFFFF, cond); 1204 movt(target, imm32 >> 16, cond); 1205 if (target.code() != rd.code()) { 1206 mov(rd, target, LeaveCC, cond); 1207 } 1208 } else { 1209 int32_t immediate; 1210 if (x.IsHeapObjectRequest()) { 1211 RequestHeapObject(x.heap_object_request()); 1212 immediate = 0; 1213 } else { 1214 immediate = x.immediate(); 1215 } 1216 ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate); 1217 ldr_pcrel(rd, 0, cond); 1218 } 1219} 1220 1221void Assembler::AddrMode1(Instr instr, Register rd, Register rn, 1222 const Operand& x) { 1223 CheckBuffer(); 1224 uint32_t opcode = instr & kOpCodeMask; 1225 bool set_flags = (instr & S) != 0; 1226 DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) || 1227 (opcode == BIC) || (opcode == EOR) || (opcode == ORR) || 1228 (opcode == RSB) || (opcode == RSC) || (opcode == SBC) || 1229 (opcode == SUB) || (opcode == CMN) || (opcode == CMP) || 1230 (opcode == TEQ) || (opcode == TST) || (opcode == MOV) || 1231 (opcode == MVN)); 1232 // For comparison instructions, rd is not defined. 1233 DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) || 1234 (opcode == TEQ) || (opcode == TST)); 1235 // For move instructions, rn is not defined. 1236 DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN)); 1237 DCHECK(rd.is_valid() || rn.is_valid()); 1238 DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0); 1239 if (!AddrMode1TryEncodeOperand(&instr, x)) { 1240 DCHECK(x.IsImmediate()); 1241 // Upon failure to encode, the opcode should not have changed. 1242 DCHECK(opcode == (instr & kOpCodeMask)); 1243 UseScratchRegisterScope temps(this); 1244 Condition cond = Instruction::ConditionField(instr); 1245 if ((opcode == MOV) && !set_flags) { 1246 // Generate a sequence of mov instructions or a load from the constant 1247 // pool only for a MOV instruction which does not set the flags. 1248 DCHECK(!rn.is_valid()); 1249 Move32BitImmediate(rd, x, cond); 1250 } else if ((opcode == ADD) && !set_flags && (rd == rn) && 1251 !temps.CanAcquire()) { 1252 // Split the operation into a sequence of additions if we cannot use a 1253 // scratch register. In this case, we cannot re-use rn and the assembler 1254 // does not have any scratch registers to spare. 1255 uint32_t imm = x.immediate(); 1256 do { 1257 // The immediate encoding format is composed of 8 bits of data and 4 1258 // bits encoding a rotation. Each of the 16 possible rotations accounts 1259 // for a rotation by an even number. 1260 // 4 bits -> 16 rotations possible 1261 // -> 16 rotations of 2 bits each fits in a 32-bit value. 1262 // This means that finding the even number of trailing zeroes of the 1263 // immediate allows us to more efficiently split it: 1264 int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u; 1265 uint32_t mask = (0xFF << trailing_zeroes); 1266 add(rd, rd, Operand(imm & mask), LeaveCC, cond); 1267 imm = imm & ~mask; 1268 } while (!ImmediateFitsAddrMode1Instruction(imm)); 1269 add(rd, rd, Operand(imm), LeaveCC, cond); 1270 } else { 1271 // The immediate operand cannot be encoded as a shifter operand, so load 1272 // it first to a scratch register and change the original instruction to 1273 // use it. 1274 // Re-use the destination register if possible. 1275 Register scratch = (rd.is_valid() && rd != rn && rd != pc && rd != sp) 1276 ? rd 1277 : temps.Acquire(); 1278 mov(scratch, x, LeaveCC, cond); 1279 AddrMode1(instr, rd, rn, Operand(scratch)); 1280 } 1281 return; 1282 } 1283 if (!rd.is_valid()) { 1284 // Emit a comparison instruction. 1285 emit(instr | rn.code() * B16); 1286 } else if (!rn.is_valid()) { 1287 // Emit a move instruction. If the operand is a register-shifted register, 1288 // then prevent the destination from being PC as this is unpredictable. 1289 DCHECK(!x.IsRegisterShiftedRegister() || rd != pc); 1290 emit(instr | rd.code() * B12); 1291 } else { 1292 emit(instr | rn.code() * B16 | rd.code() * B12); 1293 } 1294 if (rn == pc || x.rm_ == pc) { 1295 // Block constant pool emission for one instruction after reading pc. 1296 BlockConstPoolFor(1); 1297 } 1298} 1299 1300bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) { 1301 if (x.IsImmediate()) { 1302 // Immediate. 1303 uint32_t rotate_imm; 1304 uint32_t immed_8; 1305 if (x.MustOutputRelocInfo(this) || 1306 !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) { 1307 // Let the caller handle generating multiple instructions. 1308 return false; 1309 } 1310 *instr |= I | rotate_imm * B8 | immed_8; 1311 } else if (x.IsImmediateShiftedRegister()) { 1312 *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code(); 1313 } else { 1314 DCHECK(x.IsRegisterShiftedRegister()); 1315 // It is unpredictable to use the PC in this case. 1316 DCHECK(x.rm_ != pc && x.rs_ != pc); 1317 *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code(); 1318 } 1319 1320 return true; 1321} 1322 1323void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) { 1324 DCHECK((instr & ~(kCondMask | B | L)) == B26); 1325 // This method does not handle pc-relative addresses. ldr_pcrel() should be 1326 // used instead. 1327 DCHECK(x.rn_ != pc); 1328 int am = x.am_; 1329 if (!x.rm_.is_valid()) { 1330 // Immediate offset. 1331 int offset_12 = x.offset_; 1332 if (offset_12 < 0) { 1333 offset_12 = -offset_12; 1334 am ^= U; 1335 } 1336 if (!is_uint12(offset_12)) { 1337 // Immediate offset cannot be encoded, load it first to a scratch 1338 // register. 1339 UseScratchRegisterScope temps(this); 1340 // Allow re-using rd for load instructions if possible. 1341 bool is_load = (instr & L) == L; 1342 Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp) 1343 ? rd 1344 : temps.Acquire(); 1345 mov(scratch, Operand(x.offset_), LeaveCC, 1346 Instruction::ConditionField(instr)); 1347 AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_)); 1348 return; 1349 } 1350 DCHECK_GE(offset_12, 0); // no masking needed 1351 instr |= offset_12; 1352 } else { 1353 // Register offset (shift_imm_ and shift_op_ are 0) or scaled 1354 // register offset the constructors make sure than both shift_imm_ 1355 // and shift_op_ are initialized. 1356 DCHECK(x.rm_ != pc); 1357 instr |= B25 | x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code(); 1358 } 1359 DCHECK((am & (P | W)) == P || x.rn_ != pc); // no pc base with writeback 1360 emit(instr | am | x.rn_.code() * B16 | rd.code() * B12); 1361} 1362 1363void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) { 1364 DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7)); 1365 DCHECK(x.rn_.is_valid()); 1366 // This method does not handle pc-relative addresses. ldr_pcrel() should be 1367 // used instead. 1368 DCHECK(x.rn_ != pc); 1369 int am = x.am_; 1370 bool is_load = (instr & L) == L; 1371 if (!x.rm_.is_valid()) { 1372 // Immediate offset. 1373 int offset_8 = x.offset_; 1374 if (offset_8 < 0) { 1375 offset_8 = -offset_8; 1376 am ^= U; 1377 } 1378 if (!is_uint8(offset_8)) { 1379 // Immediate offset cannot be encoded, load it first to a scratch 1380 // register. 1381 UseScratchRegisterScope temps(this); 1382 // Allow re-using rd for load instructions if possible. 1383 Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp) 1384 ? rd 1385 : temps.Acquire(); 1386 mov(scratch, Operand(x.offset_), LeaveCC, 1387 Instruction::ConditionField(instr)); 1388 AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_)); 1389 return; 1390 } 1391 DCHECK_GE(offset_8, 0); // no masking needed 1392 instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF); 1393 } else if (x.shift_imm_ != 0) { 1394 // Scaled register offsets are not supported, compute the offset separately 1395 // to a scratch register. 1396 UseScratchRegisterScope temps(this); 1397 // Allow re-using rd for load instructions if possible. 1398 Register scratch = 1399 (is_load && rd != x.rn_ && rd != pc && rd != sp) ? rd : temps.Acquire(); 1400 mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC, 1401 Instruction::ConditionField(instr)); 1402 AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_)); 1403 return; 1404 } else { 1405 // Register offset. 1406 DCHECK((am & (P | W)) == P || x.rm_ != pc); // no pc index with writeback 1407 instr |= x.rm_.code(); 1408 } 1409 DCHECK((am & (P | W)) == P || x.rn_ != pc); // no pc base with writeback 1410 emit(instr | am | x.rn_.code() * B16 | rd.code() * B12); 1411} 1412 1413void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) { 1414 DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27); 1415 DCHECK(!rl.is_empty()); 1416 DCHECK(rn != pc); 1417 emit(instr | rn.code() * B16 | rl.bits()); 1418} 1419 1420void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) { 1421 // Unindexed addressing is not encoded by this function. 1422 DCHECK_EQ((B27 | B26), 1423 (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L))); 1424 DCHECK(x.rn_.is_valid() && !x.rm_.is_valid()); 1425 int am = x.am_; 1426 int offset_8 = x.offset_; 1427 DCHECK_EQ(offset_8 & 3, 0); // offset must be an aligned word offset 1428 offset_8 >>= 2; 1429 if (offset_8 < 0) { 1430 offset_8 = -offset_8; 1431 am ^= U; 1432 } 1433 DCHECK(is_uint8(offset_8)); // unsigned word offset must fit in a byte 1434 DCHECK((am & (P | W)) == P || x.rn_ != pc); // no pc base with writeback 1435 1436 // Post-indexed addressing requires W == 1; different than in AddrMode2/3. 1437 if ((am & P) == 0) am |= W; 1438 1439 DCHECK_GE(offset_8, 0); // no masking needed 1440 emit(instr | am | x.rn_.code() * B16 | crd.code() * B12 | offset_8); 1441} 1442 1443int Assembler::branch_offset(Label* L) { 1444 int target_pos; 1445 if (L->is_bound()) { 1446 target_pos = L->pos(); 1447 } else { 1448 if (L->is_linked()) { 1449 // Point to previous instruction that uses the link. 1450 target_pos = L->pos(); 1451 } else { 1452 // First entry of the link chain points to itself. 1453 target_pos = pc_offset(); 1454 } 1455 L->link_to(pc_offset()); 1456 } 1457 1458 return target_pos - (pc_offset() + Instruction::kPcLoadDelta); 1459} 1460 1461// Branch instructions. 1462void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) { 1463 if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode); 1464 DCHECK_EQ(branch_offset & 3, 0); 1465 int imm24 = branch_offset >> 2; 1466 const bool b_imm_check = is_int24(imm24); 1467 CHECK(b_imm_check); 1468 1469 // Block the emission of the constant pool before the next instruction. 1470 // Otherwise the passed-in branch offset would be off. 1471 BlockConstPoolFor(1); 1472 1473 emit(cond | B27 | B25 | (imm24 & kImm24Mask)); 1474 1475 if (cond == al) { 1476 // Dead code is a good location to emit the constant pool. 1477 CheckConstPool(false, false); 1478 } 1479} 1480 1481void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) { 1482 if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode); 1483 DCHECK_EQ(branch_offset & 3, 0); 1484 int imm24 = branch_offset >> 2; 1485 const bool bl_imm_check = is_int24(imm24); 1486 CHECK(bl_imm_check); 1487 1488 // Block the emission of the constant pool before the next instruction. 1489 // Otherwise the passed-in branch offset would be off. 1490 BlockConstPoolFor(1); 1491 1492 emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask)); 1493} 1494 1495void Assembler::blx(int branch_offset) { 1496 DCHECK_EQ(branch_offset & 1, 0); 1497 int h = ((branch_offset & 2) >> 1) * B24; 1498 int imm24 = branch_offset >> 2; 1499 const bool blx_imm_check = is_int24(imm24); 1500 CHECK(blx_imm_check); 1501 1502 // Block the emission of the constant pool before the next instruction. 1503 // Otherwise the passed-in branch offset would be off. 1504 BlockConstPoolFor(1); 1505 1506 emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask)); 1507} 1508 1509void Assembler::blx(Register target, Condition cond) { 1510 DCHECK(target != pc); 1511 emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX | target.code()); 1512} 1513 1514void Assembler::bx(Register target, Condition cond) { 1515 DCHECK(target != pc); // use of pc is actually allowed, but discouraged 1516 emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BX | target.code()); 1517} 1518 1519void Assembler::b(Label* L, Condition cond) { 1520 CheckBuffer(); 1521 b(branch_offset(L), cond); 1522} 1523 1524void Assembler::bl(Label* L, Condition cond) { 1525 CheckBuffer(); 1526 bl(branch_offset(L), cond); 1527} 1528 1529void Assembler::blx(Label* L) { 1530 CheckBuffer(); 1531 blx(branch_offset(L)); 1532} 1533 1534// Data-processing instructions. 1535 1536void Assembler::and_(Register dst, Register src1, const Operand& src2, SBit s, 1537 Condition cond) { 1538 AddrMode1(cond | AND | s, dst, src1, src2); 1539} 1540 1541void Assembler::and_(Register dst, Register src1, Register src2, SBit s, 1542 Condition cond) { 1543 and_(dst, src1, Operand(src2), s, cond); 1544} 1545 1546void Assembler::eor(Register dst, Register src1, const Operand& src2, SBit s, 1547 Condition cond) { 1548 AddrMode1(cond | EOR | s, dst, src1, src2); 1549} 1550 1551void Assembler::eor(Register dst, Register src1, Register src2, SBit s, 1552 Condition cond) { 1553 AddrMode1(cond | EOR | s, dst, src1, Operand(src2)); 1554} 1555 1556void Assembler::sub(Register dst, Register src1, const Operand& src2, SBit s, 1557 Condition cond) { 1558 AddrMode1(cond | SUB | s, dst, src1, src2); 1559} 1560 1561void Assembler::sub(Register dst, Register src1, Register src2, SBit s, 1562 Condition cond) { 1563 sub(dst, src1, Operand(src2), s, cond); 1564} 1565 1566void Assembler::rsb(Register dst, Register src1, const Operand& src2, SBit s, 1567 Condition cond) { 1568 AddrMode1(cond | RSB | s, dst, src1, src2); 1569} 1570 1571void Assembler::add(Register dst, Register src1, const Operand& src2, SBit s, 1572 Condition cond) { 1573 AddrMode1(cond | ADD | s, dst, src1, src2); 1574} 1575 1576void Assembler::add(Register dst, Register src1, Register src2, SBit s, 1577 Condition cond) { 1578 add(dst, src1, Operand(src2), s, cond); 1579} 1580 1581void Assembler::adc(Register dst, Register src1, const Operand& src2, SBit s, 1582 Condition cond) { 1583 AddrMode1(cond | ADC | s, dst, src1, src2); 1584} 1585 1586void Assembler::sbc(Register dst, Register src1, const Operand& src2, SBit s, 1587 Condition cond) { 1588 AddrMode1(cond | SBC | s, dst, src1, src2); 1589} 1590 1591void Assembler::rsc(Register dst, Register src1, const Operand& src2, SBit s, 1592 Condition cond) { 1593 AddrMode1(cond | RSC | s, dst, src1, src2); 1594} 1595 1596void Assembler::tst(Register src1, const Operand& src2, Condition cond) { 1597 AddrMode1(cond | TST | S, no_reg, src1, src2); 1598} 1599 1600void Assembler::tst(Register src1, Register src2, Condition cond) { 1601 tst(src1, Operand(src2), cond); 1602} 1603 1604void Assembler::teq(Register src1, const Operand& src2, Condition cond) { 1605 AddrMode1(cond | TEQ | S, no_reg, src1, src2); 1606} 1607 1608void Assembler::cmp(Register src1, const Operand& src2, Condition cond) { 1609 AddrMode1(cond | CMP | S, no_reg, src1, src2); 1610} 1611 1612void Assembler::cmp(Register src1, Register src2, Condition cond) { 1613 cmp(src1, Operand(src2), cond); 1614} 1615 1616void Assembler::cmp_raw_immediate(Register src, int raw_immediate, 1617 Condition cond) { 1618 DCHECK(is_uint12(raw_immediate)); 1619 emit(cond | I | CMP | S | src.code() << 16 | raw_immediate); 1620} 1621 1622void Assembler::cmn(Register src1, const Operand& src2, Condition cond) { 1623 AddrMode1(cond | CMN | S, no_reg, src1, src2); 1624} 1625 1626void Assembler::orr(Register dst, Register src1, const Operand& src2, SBit s, 1627 Condition cond) { 1628 AddrMode1(cond | ORR | s, dst, src1, src2); 1629} 1630 1631void Assembler::orr(Register dst, Register src1, Register src2, SBit s, 1632 Condition cond) { 1633 orr(dst, src1, Operand(src2), s, cond); 1634} 1635 1636void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) { 1637 // Don't allow nop instructions in the form mov rn, rn to be generated using 1638 // the mov instruction. They must be generated using nop(int/NopMarkerTypes). 1639 DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al)); 1640 AddrMode1(cond | MOV | s, dst, no_reg, src); 1641} 1642 1643void Assembler::mov(Register dst, Register src, SBit s, Condition cond) { 1644 mov(dst, Operand(src), s, cond); 1645} 1646 1647void Assembler::mov_label_offset(Register dst, Label* label) { 1648 if (label->is_bound()) { 1649 mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag))); 1650 } else { 1651 // Emit the link to the label in the code stream followed by extra nop 1652 // instructions. 1653 // If the label is not linked, then start a new link chain by linking it to 1654 // itself, emitting pc_offset(). 1655 int link = label->is_linked() ? label->pos() : pc_offset(); 1656 label->link_to(pc_offset()); 1657 1658 // When the label is bound, these instructions will be patched with a 1659 // sequence of movw/movt or mov/orr/orr instructions. They will load the 1660 // destination register with the position of the label from the beginning 1661 // of the code. 1662 // 1663 // The link will be extracted from the first instruction and the destination 1664 // register from the second. 1665 // For ARMv7: 1666 // link 1667 // mov dst, dst 1668 // For ARMv6: 1669 // link 1670 // mov dst, dst 1671 // mov dst, dst 1672 // 1673 // When the label gets bound: target_at extracts the link and target_at_put 1674 // patches the instructions. 1675 CHECK(is_uint24(link)); 1676 BlockConstPoolScope block_const_pool(this); 1677 emit(link); 1678 nop(dst.code()); 1679 if (!CpuFeatures::IsSupported(ARMv7)) { 1680 nop(dst.code()); 1681 } 1682 } 1683} 1684 1685void Assembler::movw(Register reg, uint32_t immediate, Condition cond) { 1686 DCHECK(IsEnabled(ARMv7)); 1687 emit(cond | 0x30 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate)); 1688} 1689 1690void Assembler::movt(Register reg, uint32_t immediate, Condition cond) { 1691 DCHECK(IsEnabled(ARMv7)); 1692 emit(cond | 0x34 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate)); 1693} 1694 1695void Assembler::bic(Register dst, Register src1, const Operand& src2, SBit s, 1696 Condition cond) { 1697 AddrMode1(cond | BIC | s, dst, src1, src2); 1698} 1699 1700void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) { 1701 AddrMode1(cond | MVN | s, dst, no_reg, src); 1702} 1703 1704void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s, 1705 Condition cond) { 1706 if (src2.IsRegister()) { 1707 mov(dst, Operand(src1, ASR, src2.rm()), s, cond); 1708 } else { 1709 mov(dst, Operand(src1, ASR, src2.immediate()), s, cond); 1710 } 1711} 1712 1713void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s, 1714 Condition cond) { 1715 if (src2.IsRegister()) { 1716 mov(dst, Operand(src1, LSL, src2.rm()), s, cond); 1717 } else { 1718 mov(dst, Operand(src1, LSL, src2.immediate()), s, cond); 1719 } 1720} 1721 1722void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s, 1723 Condition cond) { 1724 if (src2.IsRegister()) { 1725 mov(dst, Operand(src1, LSR, src2.rm()), s, cond); 1726 } else { 1727 mov(dst, Operand(src1, LSR, src2.immediate()), s, cond); 1728 } 1729} 1730 1731// Multiply instructions. 1732void Assembler::mla(Register dst, Register src1, Register src2, Register srcA, 1733 SBit s, Condition cond) { 1734 DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc); 1735 emit(cond | A | s | dst.code() * B16 | srcA.code() * B12 | src2.code() * B8 | 1736 B7 | B4 | src1.code()); 1737} 1738 1739void Assembler::mls(Register dst, Register src1, Register src2, Register srcA, 1740 Condition cond) { 1741 DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc); 1742 DCHECK(IsEnabled(ARMv7)); 1743 emit(cond | B22 | B21 | dst.code() * B16 | srcA.code() * B12 | 1744 src2.code() * B8 | B7 | B4 | src1.code()); 1745} 1746 1747void Assembler::sdiv(Register dst, Register src1, Register src2, 1748 Condition cond) { 1749 DCHECK(dst != pc && src1 != pc && src2 != pc); 1750 DCHECK(IsEnabled(SUDIV)); 1751 emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 | 1752 src2.code() * B8 | B4 | src1.code()); 1753} 1754 1755void Assembler::udiv(Register dst, Register src1, Register src2, 1756 Condition cond) { 1757 DCHECK(dst != pc && src1 != pc && src2 != pc); 1758 DCHECK(IsEnabled(SUDIV)); 1759 emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 | 1760 src2.code() * B8 | B4 | src1.code()); 1761} 1762 1763void Assembler::mul(Register dst, Register src1, Register src2, SBit s, 1764 Condition cond) { 1765 DCHECK(dst != pc && src1 != pc && src2 != pc); 1766 // dst goes in bits 16-19 for this instruction! 1767 emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code()); 1768} 1769 1770void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA, 1771 Condition cond) { 1772 DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc); 1773 emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 1774 srcA.code() * B12 | src2.code() * B8 | B4 | src1.code()); 1775} 1776 1777void Assembler::smmul(Register dst, Register src1, Register src2, 1778 Condition cond) { 1779 DCHECK(dst != pc && src1 != pc && src2 != pc); 1780 emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 | 1781 src2.code() * B8 | B4 | src1.code()); 1782} 1783 1784void Assembler::smlal(Register dstL, Register dstH, Register src1, 1785 Register src2, SBit s, Condition cond) { 1786 DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc); 1787 DCHECK(dstL != dstH); 1788 emit(cond | B23 | B22 | A | s | dstH.code() * B16 | dstL.code() * B12 | 1789 src2.code() * B8 | B7 | B4 | src1.code()); 1790} 1791 1792void Assembler::smull(Register dstL, Register dstH, Register src1, 1793 Register src2, SBit s, Condition cond) { 1794 DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc); 1795 DCHECK(dstL != dstH); 1796 emit(cond | B23 | B22 | s | dstH.code() * B16 | dstL.code() * B12 | 1797 src2.code() * B8 | B7 | B4 | src1.code()); 1798} 1799 1800void Assembler::umlal(Register dstL, Register dstH, Register src1, 1801 Register src2, SBit s, Condition cond) { 1802 DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc); 1803 DCHECK(dstL != dstH); 1804 emit(cond | B23 | A | s | dstH.code() * B16 | dstL.code() * B12 | 1805 src2.code() * B8 | B7 | B4 | src1.code()); 1806} 1807 1808void Assembler::umull(Register dstL, Register dstH, Register src1, 1809 Register src2, SBit s, Condition cond) { 1810 DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc); 1811 DCHECK(dstL != dstH); 1812 emit(cond | B23 | s | dstH.code() * B16 | dstL.code() * B12 | 1813 src2.code() * B8 | B7 | B4 | src1.code()); 1814} 1815 1816// Miscellaneous arithmetic instructions. 1817void Assembler::clz(Register dst, Register src, Condition cond) { 1818 DCHECK(dst != pc && src != pc); 1819 emit(cond | B24 | B22 | B21 | 15 * B16 | dst.code() * B12 | 15 * B8 | CLZ | 1820 src.code()); 1821} 1822 1823// Saturating instructions. 1824 1825// Unsigned saturate. 1826void Assembler::usat(Register dst, int satpos, const Operand& src, 1827 Condition cond) { 1828 DCHECK(dst != pc && src.rm_ != pc); 1829 DCHECK((satpos >= 0) && (satpos <= 31)); 1830 DCHECK(src.IsImmediateShiftedRegister()); 1831 DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL)); 1832 1833 int sh = 0; 1834 if (src.shift_op_ == ASR) { 1835 sh = 1; 1836 } 1837 1838 emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 | 1839 src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code()); 1840} 1841 1842// Bitfield manipulation instructions. 1843 1844// Unsigned bit field extract. 1845// Extracts #width adjacent bits from position #lsb in a register, and 1846// writes them to the low bits of a destination register. 1847// ubfx dst, src, #lsb, #width 1848void Assembler::ubfx(Register dst, Register src, int lsb, int width, 1849 Condition cond) { 1850 DCHECK(IsEnabled(ARMv7)); 1851 DCHECK(dst != pc && src != pc); 1852 DCHECK((lsb >= 0) && (lsb <= 31)); 1853 DCHECK((width >= 1) && (width <= (32 - lsb))); 1854 emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 | 1855 lsb * B7 | B6 | B4 | src.code()); 1856} 1857 1858// Signed bit field extract. 1859// Extracts #width adjacent bits from position #lsb in a register, and 1860// writes them to the low bits of a destination register. The extracted 1861// value is sign extended to fill the destination register. 1862// sbfx dst, src, #lsb, #width 1863void Assembler::sbfx(Register dst, Register src, int lsb, int width, 1864 Condition cond) { 1865 DCHECK(IsEnabled(ARMv7)); 1866 DCHECK(dst != pc && src != pc); 1867 DCHECK((lsb >= 0) && (lsb <= 31)); 1868 DCHECK((width >= 1) && (width <= (32 - lsb))); 1869 emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 | 1870 lsb * B7 | B6 | B4 | src.code()); 1871} 1872 1873// Bit field clear. 1874// Sets #width adjacent bits at position #lsb in the destination register 1875// to zero, preserving the value of the other bits. 1876// bfc dst, #lsb, #width 1877void Assembler::bfc(Register dst, int lsb, int width, Condition cond) { 1878 DCHECK(IsEnabled(ARMv7)); 1879 DCHECK(dst != pc); 1880 DCHECK((lsb >= 0) && (lsb <= 31)); 1881 DCHECK((width >= 1) && (width <= (32 - lsb))); 1882 int msb = lsb + width - 1; 1883 emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF); 1884} 1885 1886// Bit field insert. 1887// Inserts #width adjacent bits from the low bits of the source register 1888// into position #lsb of the destination register. 1889// bfi dst, src, #lsb, #width 1890void Assembler::bfi(Register dst, Register src, int lsb, int width, 1891 Condition cond) { 1892 DCHECK(IsEnabled(ARMv7)); 1893 DCHECK(dst != pc && src != pc); 1894 DCHECK((lsb >= 0) && (lsb <= 31)); 1895 DCHECK((width >= 1) && (width <= (32 - lsb))); 1896 int msb = lsb + width - 1; 1897 emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 1898 src.code()); 1899} 1900 1901void Assembler::pkhbt(Register dst, Register src1, const Operand& src2, 1902 Condition cond) { 1903 // Instruction details available in ARM DDI 0406C.b, A8.8.125. 1904 // cond(31-28) | 01101000(27-20) | Rn(19-16) | 1905 // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0) 1906 DCHECK(dst != pc); 1907 DCHECK(src1 != pc); 1908 DCHECK(src2.IsImmediateShiftedRegister()); 1909 DCHECK(src2.rm() != pc); 1910 DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31)); 1911 DCHECK(src2.shift_op() == LSL); 1912 emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 | 1913 src2.shift_imm_ * B7 | B4 | src2.rm().code()); 1914} 1915 1916void Assembler::pkhtb(Register dst, Register src1, const Operand& src2, 1917 Condition cond) { 1918 // Instruction details available in ARM DDI 0406C.b, A8.8.125. 1919 // cond(31-28) | 01101000(27-20) | Rn(19-16) | 1920 // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0) 1921 DCHECK(dst != pc); 1922 DCHECK(src1 != pc); 1923 DCHECK(src2.IsImmediateShiftedRegister()); 1924 DCHECK(src2.rm() != pc); 1925 DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32)); 1926 DCHECK(src2.shift_op() == ASR); 1927 int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_; 1928 emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 | asr * B7 | 1929 B6 | B4 | src2.rm().code()); 1930} 1931 1932void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) { 1933 // Instruction details available in ARM DDI 0406C.b, A8.8.233. 1934 // cond(31-28) | 01101010(27-20) | 1111(19-16) | 1935 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1936 DCHECK(dst != pc); 1937 DCHECK(src != pc); 1938 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 1939 emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 | 1940 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code()); 1941} 1942 1943void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate, 1944 Condition cond) { 1945 // Instruction details available in ARM DDI 0406C.b, A8.8.233. 1946 // cond(31-28) | 01101010(27-20) | Rn(19-16) | 1947 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1948 DCHECK(dst != pc); 1949 DCHECK(src1 != pc); 1950 DCHECK(src2 != pc); 1951 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 1952 emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 | 1953 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code()); 1954} 1955 1956void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) { 1957 // Instruction details available in ARM DDI 0406C.b, A8.8.235. 1958 // cond(31-28) | 01101011(27-20) | 1111(19-16) | 1959 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1960 DCHECK(dst != pc); 1961 DCHECK(src != pc); 1962 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 1963 emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 | 1964 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code()); 1965} 1966 1967void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate, 1968 Condition cond) { 1969 // Instruction details available in ARM DDI 0406C.b, A8.8.235. 1970 // cond(31-28) | 01101011(27-20) | Rn(19-16) | 1971 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1972 DCHECK(dst != pc); 1973 DCHECK(src1 != pc); 1974 DCHECK(src2 != pc); 1975 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 1976 emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 | 1977 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code()); 1978} 1979 1980void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) { 1981 // Instruction details available in ARM DDI 0406C.b, A8.8.274. 1982 // cond(31-28) | 01101110(27-20) | 1111(19-16) | 1983 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1984 DCHECK(dst != pc); 1985 DCHECK(src != pc); 1986 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 1987 emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 | 1988 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code()); 1989} 1990 1991void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate, 1992 Condition cond) { 1993 // Instruction details available in ARM DDI 0406C.b, A8.8.271. 1994 // cond(31-28) | 01101110(27-20) | Rn(19-16) | 1995 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 1996 DCHECK(dst != pc); 1997 DCHECK(src1 != pc); 1998 DCHECK(src2 != pc); 1999 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 2000 emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 | 2001 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code()); 2002} 2003 2004void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) { 2005 // Instruction details available in ARM DDI 0406C.b, A8.8.275. 2006 // cond(31-28) | 01101100(27-20) | 1111(19-16) | 2007 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 2008 DCHECK(dst != pc); 2009 DCHECK(src != pc); 2010 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 2011 emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 | 2012 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code()); 2013} 2014 2015void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) { 2016 // Instruction details available in ARM DDI 0406C.b, A8.8.276. 2017 // cond(31-28) | 01101111(27-20) | 1111(19-16) | 2018 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 2019 DCHECK(dst != pc); 2020 DCHECK(src != pc); 2021 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 2022 emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 | 2023 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code()); 2024} 2025 2026void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate, 2027 Condition cond) { 2028 // Instruction details available in ARM DDI 0406C.b, A8.8.273. 2029 // cond(31-28) | 01101111(27-20) | Rn(19-16) | 2030 // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) 2031 DCHECK(dst != pc); 2032 DCHECK(src1 != pc); 2033 DCHECK(src2 != pc); 2034 DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24); 2035 emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 | 2036 ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code()); 2037} 2038 2039void Assembler::rbit(Register dst, Register src, Condition cond) { 2040 // Instruction details available in ARM DDI 0406C.b, A8.8.144. 2041 // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0) 2042 DCHECK(IsEnabled(ARMv7)); 2043 DCHECK(dst != pc); 2044 DCHECK(src != pc); 2045 emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code()); 2046} 2047 2048void Assembler::rev(Register dst, Register src, Condition cond) { 2049 // Instruction details available in ARM DDI 0406C.b, A8.8.144. 2050 // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0) 2051 DCHECK(dst != pc); 2052 DCHECK(src != pc); 2053 emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code()); 2054} 2055 2056// Status register access instructions. 2057void Assembler::mrs(Register dst, SRegister s, Condition cond) { 2058 DCHECK(dst != pc); 2059 emit(cond | B24 | s | 15 * B16 | dst.code() * B12); 2060} 2061 2062void Assembler::msr(SRegisterFieldMask fields, const Operand& src, 2063 Condition cond) { 2064 DCHECK_NE(fields & 0x000F0000, 0); // At least one field must be set. 2065 DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR)); 2066 Instr instr; 2067 if (src.IsImmediate()) { 2068 // Immediate. 2069 uint32_t rotate_imm; 2070 uint32_t immed_8; 2071 if (src.MustOutputRelocInfo(this) || 2072 !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) { 2073 UseScratchRegisterScope temps(this); 2074 Register scratch = temps.Acquire(); 2075 // Immediate operand cannot be encoded, load it first to a scratch 2076 // register. 2077 Move32BitImmediate(scratch, src); 2078 msr(fields, Operand(scratch), cond); 2079 return; 2080 } 2081 instr = I | rotate_imm * B8 | immed_8; 2082 } else { 2083 DCHECK(src.IsRegister()); // Only rm is allowed. 2084 instr = src.rm_.code(); 2085 } 2086 emit(cond | instr | B24 | B21 | fields | 15 * B12); 2087} 2088 2089// Load/Store instructions. 2090void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) { 2091 AddrMode2(cond | B26 | L, dst, src); 2092} 2093 2094void Assembler::str(Register src, const MemOperand& dst, Condition cond) { 2095 AddrMode2(cond | B26, src, dst); 2096} 2097 2098void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) { 2099 AddrMode2(cond | B26 | B | L, dst, src); 2100} 2101 2102void Assembler::strb(Register src, const MemOperand& dst, Condition cond) { 2103 AddrMode2(cond | B26 | B, src, dst); 2104} 2105 2106void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) { 2107 AddrMode3(cond | L | B7 | H | B4, dst, src); 2108} 2109 2110void Assembler::strh(Register src, const MemOperand& dst, Condition cond) { 2111 AddrMode3(cond | B7 | H | B4, src, dst); 2112} 2113 2114void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) { 2115 AddrMode3(cond | L | B7 | S6 | B4, dst, src); 2116} 2117 2118void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) { 2119 AddrMode3(cond | L | B7 | S6 | H | B4, dst, src); 2120} 2121 2122void Assembler::ldrd(Register dst1, Register dst2, const MemOperand& src, 2123 Condition cond) { 2124 DCHECK(src.rm() == no_reg); 2125 DCHECK(dst1 != lr); // r14. 2126 DCHECK_EQ(0, dst1.code() % 2); 2127 DCHECK_EQ(dst1.code() + 1, dst2.code()); 2128 AddrMode3(cond | B7 | B6 | B4, dst1, src); 2129} 2130 2131void Assembler::strd(Register src1, Register src2, const MemOperand& dst, 2132 Condition cond) { 2133 DCHECK(dst.rm() == no_reg); 2134 DCHECK(src1 != lr); // r14. 2135 DCHECK_EQ(0, src1.code() % 2); 2136 DCHECK_EQ(src1.code() + 1, src2.code()); 2137 AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst); 2138} 2139 2140void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) { 2141 AddrMode am = Offset; 2142 if (imm12 < 0) { 2143 imm12 = -imm12; 2144 am = NegOffset; 2145 } 2146 DCHECK(is_uint12(imm12)); 2147 emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12); 2148} 2149 2150// Load/Store exclusive instructions. 2151void Assembler::ldrex(Register dst, Register src, Condition cond) { 2152 // Instruction details available in ARM DDI 0406C.b, A8.8.75. 2153 // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0) 2154 DCHECK(dst != pc); 2155 DCHECK(src != pc); 2156 emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F); 2157} 2158 2159void Assembler::strex(Register src1, Register src2, Register dst, 2160 Condition cond) { 2161 // Instruction details available in ARM DDI 0406C.b, A8.8.212. 2162 // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) | 2163 // Rt(3-0) 2164 DCHECK(dst != pc); 2165 DCHECK(src1 != pc); 2166 DCHECK(src2 != pc); 2167 DCHECK(src1 != dst); 2168 DCHECK(src1 != src2); 2169 emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 | 2170 src2.code()); 2171} 2172 2173void Assembler::ldrexb(Register dst, Register src, Condition cond) { 2174 // Instruction details available in ARM DDI 0406C.b, A8.8.76. 2175 // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0) 2176 DCHECK(dst != pc); 2177 DCHECK(src != pc); 2178 emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 | 2179 0xF9F); 2180} 2181 2182void Assembler::strexb(Register src1, Register src2, Register dst, 2183 Condition cond) { 2184 // Instruction details available in ARM DDI 0406C.b, A8.8.213. 2185 // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) | 2186 // Rt(3-0) 2187 DCHECK(dst != pc); 2188 DCHECK(src1 != pc); 2189 DCHECK(src2 != pc); 2190 DCHECK(src1 != dst); 2191 DCHECK(src1 != src2); 2192 emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 | 2193 0xF9 * B4 | src2.code()); 2194} 2195 2196void Assembler::ldrexh(Register dst, Register src, Condition cond) { 2197 // Instruction details available in ARM DDI 0406C.b, A8.8.78. 2198 // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0) 2199 DCHECK(dst != pc); 2200 DCHECK(src != pc); 2201 emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 | 2202 dst.code() * B12 | 0xF9F); 2203} 2204 2205void Assembler::strexh(Register src1, Register src2, Register dst, 2206 Condition cond) { 2207 // Instruction details available in ARM DDI 0406C.b, A8.8.215. 2208 // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) | 2209 // Rt(3-0) 2210 DCHECK(dst != pc); 2211 DCHECK(src1 != pc); 2212 DCHECK(src2 != pc); 2213 DCHECK(src1 != dst); 2214 DCHECK(src1 != src2); 2215 emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 | 2216 0xF9 * B4 | src2.code()); 2217} 2218 2219void Assembler::ldrexd(Register dst1, Register dst2, Register src, 2220 Condition cond) { 2221 // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0) 2222 DCHECK(dst1 != lr); // r14. 2223 // The pair of destination registers is restricted to being an even-numbered 2224 // register and the odd-numbered register that immediately follows it. 2225 DCHECK_EQ(0, dst1.code() % 2); 2226 DCHECK_EQ(dst1.code() + 1, dst2.code()); 2227 emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 | 2228 0xF9F); 2229} 2230 2231void Assembler::strexd(Register res, Register src1, Register src2, Register dst, 2232 Condition cond) { 2233 // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0) 2234 DCHECK(src1 != lr); // r14. 2235 // The pair of source registers is restricted to being an even-numbered 2236 // register and the odd-numbered register that immediately follows it. 2237 DCHECK_EQ(0, src1.code() % 2); 2238 DCHECK_EQ(src1.code() + 1, src2.code()); 2239 emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 | 2240 0xF9 * B4 | src1.code()); 2241} 2242 2243// Preload instructions. 2244void Assembler::pld(const MemOperand& address) { 2245 // Instruction details available in ARM DDI 0406C.b, A8.8.128. 2246 // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) | 2247 // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) | 2248 DCHECK(address.rm() == no_reg); 2249 DCHECK(address.am() == Offset); 2250 int U = B23; 2251 int offset = address.offset(); 2252 if (offset < 0) { 2253 offset = -offset; 2254 U = 0; 2255 } 2256 DCHECK_LT(offset, 4096); 2257 emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | 2258 address.rn().code() * B16 | 0xF * B12 | offset); 2259} 2260 2261// Load/Store multiple instructions. 2262void Assembler::ldm(BlockAddrMode am, Register base, RegList dst, 2263 Condition cond) { 2264 // ABI stack constraint: ldmxx base, {..sp..} base != sp is not restartable. 2265 DCHECK(base == sp || !dst.has(sp)); 2266 2267 AddrMode4(cond | B27 | am | L, base, dst); 2268 2269 // Emit the constant pool after a function return implemented by ldm ..{..pc}. 2270 if (cond == al && dst.has(pc)) { 2271 // There is a slight chance that the ldm instruction was actually a call, 2272 // in which case it would be wrong to return into the constant pool; we 2273 // recognize this case by checking if the emission of the pool was blocked 2274 // at the pc of the ldm instruction by a mov lr, pc instruction; if this is 2275 // the case, we emit a jump over the pool. 2276 CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize); 2277 } 2278} 2279 2280void Assembler::stm(BlockAddrMode am, Register base, RegList src, 2281 Condition cond) { 2282 AddrMode4(cond | B27 | am, base, src); 2283} 2284 2285// Exception-generating instructions and debugging support. 2286// Stops with a non-negative code less than kNumOfWatchedStops support 2287// enabling/disabling and a counter feature. See simulator-arm.h . 2288void Assembler::stop(Condition cond, int32_t code) { 2289#ifndef __arm__ 2290 DCHECK_GE(code, kDefaultStopCode); 2291 { 2292 BlockConstPoolScope block_const_pool(this); 2293 if (code >= 0) { 2294 svc(kStopCode + code, cond); 2295 } else { 2296 svc(kStopCode + kMaxStopCode, cond); 2297 } 2298 } 2299#else // def __arm__ 2300 if (cond != al) { 2301 Label skip; 2302 b(&skip, NegateCondition(cond)); 2303 bkpt(0); 2304 bind(&skip); 2305 } else { 2306 bkpt(0); 2307 } 2308#endif // def __arm__ 2309} 2310 2311void Assembler::bkpt(uint32_t imm16) { 2312 DCHECK(is_uint16(imm16)); 2313 emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF)); 2314} 2315 2316void Assembler::svc(uint32_t imm24, Condition cond) { 2317 CHECK(is_uint24(imm24)); 2318 emit(cond | 15 * B24 | imm24); 2319} 2320 2321void Assembler::dmb(BarrierOption option) { 2322 if (CpuFeatures::IsSupported(ARMv7)) { 2323 // Details available in ARM DDI 0406C.b, A8-378. 2324 emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option); 2325 } else { 2326 // Details available in ARM DDI 0406C.b, B3-1750. 2327 // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored. 2328 mcr(p15, 0, r0, cr7, cr10, 5); 2329 } 2330} 2331 2332void Assembler::dsb(BarrierOption option) { 2333 if (CpuFeatures::IsSupported(ARMv7)) { 2334 // Details available in ARM DDI 0406C.b, A8-380. 2335 emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option); 2336 } else { 2337 // Details available in ARM DDI 0406C.b, B3-1750. 2338 // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored. 2339 mcr(p15, 0, r0, cr7, cr10, 4); 2340 } 2341} 2342 2343void Assembler::isb(BarrierOption option) { 2344 if (CpuFeatures::IsSupported(ARMv7)) { 2345 // Details available in ARM DDI 0406C.b, A8-389. 2346 emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option); 2347 } else { 2348 // Details available in ARM DDI 0406C.b, B3-1750. 2349 // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored. 2350 mcr(p15, 0, r0, cr7, cr5, 4); 2351 } 2352} 2353 2354void Assembler::csdb() { 2355 // Details available in Arm Cache Speculation Side-channels white paper, 2356 // version 1.1, page 4. 2357 emit(0xE320F014); 2358} 2359 2360// Coprocessor instructions. 2361void Assembler::cdp(Coprocessor coproc, int opcode_1, CRegister crd, 2362 CRegister crn, CRegister crm, int opcode_2, 2363 Condition cond) { 2364 DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2)); 2365 emit(cond | B27 | B26 | B25 | (opcode_1 & 15) * B20 | crn.code() * B16 | 2366 crd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | crm.code()); 2367} 2368 2369void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd, 2370 CRegister crn, CRegister crm, int opcode_2) { 2371 cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition); 2372} 2373 2374void Assembler::mcr(Coprocessor coproc, int opcode_1, Register rd, 2375 CRegister crn, CRegister crm, int opcode_2, 2376 Condition cond) { 2377 DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2)); 2378 emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | crn.code() * B16 | 2379 rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code()); 2380} 2381 2382void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd, 2383 CRegister crn, CRegister crm, int opcode_2) { 2384 mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition); 2385} 2386 2387void Assembler::mrc(Coprocessor coproc, int opcode_1, Register rd, 2388 CRegister crn, CRegister crm, int opcode_2, 2389 Condition cond) { 2390 DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2)); 2391 emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | L | crn.code() * B16 | 2392 rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code()); 2393} 2394 2395void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd, 2396 CRegister crn, CRegister crm, int opcode_2) { 2397 mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition); 2398} 2399 2400void Assembler::ldc(Coprocessor coproc, CRegister crd, const MemOperand& src, 2401 LFlag l, Condition cond) { 2402 AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src); 2403} 2404 2405void Assembler::ldc(Coprocessor coproc, CRegister crd, Register rn, int option, 2406 LFlag l, Condition cond) { 2407 // Unindexed addressing. 2408 DCHECK(is_uint8(option)); 2409 emit(cond | B27 | B26 | U | l | L | rn.code() * B16 | crd.code() * B12 | 2410 coproc * B8 | (option & 255)); 2411} 2412 2413void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src, 2414 LFlag l) { 2415 ldc(coproc, crd, src, l, kSpecialCondition); 2416} 2417 2418void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option, 2419 LFlag l) { 2420 ldc(coproc, crd, rn, option, l, kSpecialCondition); 2421} 2422 2423// Support for VFP. 2424 2425void Assembler::vldr(const DwVfpRegister dst, const Register base, int offset, 2426 const Condition cond) { 2427 // Ddst = MEM(Rbase + offset). 2428 // Instruction details available in ARM DDI 0406C.b, A8-924. 2429 // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) | 2430 // Vd(15-12) | 1011(11-8) | offset 2431 DCHECK(VfpRegisterIsAvailable(dst)); 2432 int u = 1; 2433 if (offset < 0) { 2434 CHECK_NE(offset, kMinInt); 2435 offset = -offset; 2436 u = 0; 2437 } 2438 int vd, d; 2439 dst.split_code(&vd, &d); 2440 2441 DCHECK_GE(offset, 0); 2442 if ((offset % 4) == 0 && (offset / 4) < 256) { 2443 emit(cond | 0xD * B24 | u * B23 | d * B22 | B20 | base.code() * B16 | 2444 vd * B12 | 0xB * B8 | ((offset / 4) & 255)); 2445 } else { 2446 UseScratchRegisterScope temps(this); 2447 Register scratch = temps.Acquire(); 2448 // Larger offsets must be handled by computing the correct address in a 2449 // scratch register. 2450 DCHECK(base != scratch); 2451 if (u == 1) { 2452 add(scratch, base, Operand(offset)); 2453 } else { 2454 sub(scratch, base, Operand(offset)); 2455 } 2456 emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 | 2457 0xB * B8); 2458 } 2459} 2460 2461void Assembler::vldr(const DwVfpRegister dst, const MemOperand& operand, 2462 const Condition cond) { 2463 DCHECK(VfpRegisterIsAvailable(dst)); 2464 DCHECK(operand.am_ == Offset); 2465 if (operand.rm().is_valid()) { 2466 UseScratchRegisterScope temps(this); 2467 Register scratch = temps.Acquire(); 2468 add(scratch, operand.rn(), 2469 Operand(operand.rm(), operand.shift_op_, operand.shift_imm_)); 2470 vldr(dst, scratch, 0, cond); 2471 } else { 2472 vldr(dst, operand.rn(), operand.offset(), cond); 2473 } 2474} 2475 2476void Assembler::vldr(const SwVfpRegister dst, const Register base, int offset, 2477 const Condition cond) { 2478 // Sdst = MEM(Rbase + offset). 2479 // Instruction details available in ARM DDI 0406A, A8-628. 2480 // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) | 2481 // Vdst(15-12) | 1010(11-8) | offset 2482 int u = 1; 2483 if (offset < 0) { 2484 offset = -offset; 2485 u = 0; 2486 } 2487 int sd, d; 2488 dst.split_code(&sd, &d); 2489 DCHECK_GE(offset, 0); 2490 2491 if ((offset % 4) == 0 && (offset / 4) < 256) { 2492 emit(cond | u * B23 | d * B22 | 0xD1 * B20 | base.code() * B16 | sd * B12 | 2493 0xA * B8 | ((offset / 4) & 255)); 2494 } else { 2495 // Larger offsets must be handled by computing the correct address in a 2496 // scratch register. 2497 UseScratchRegisterScope temps(this); 2498 Register scratch = temps.Acquire(); 2499 DCHECK(base != scratch); 2500 if (u == 1) { 2501 add(scratch, base, Operand(offset)); 2502 } else { 2503 sub(scratch, base, Operand(offset)); 2504 } 2505 emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 | 2506 0xA * B8); 2507 } 2508} 2509 2510void Assembler::vldr(const SwVfpRegister dst, const MemOperand& operand, 2511 const Condition cond) { 2512 DCHECK(operand.am_ == Offset); 2513 if (operand.rm().is_valid()) { 2514 UseScratchRegisterScope temps(this); 2515 Register scratch = temps.Acquire(); 2516 add(scratch, operand.rn(), 2517 Operand(operand.rm(), operand.shift_op_, operand.shift_imm_)); 2518 vldr(dst, scratch, 0, cond); 2519 } else { 2520 vldr(dst, operand.rn(), operand.offset(), cond); 2521 } 2522} 2523 2524void Assembler::vstr(const DwVfpRegister src, const Register base, int offset, 2525 const Condition cond) { 2526 // MEM(Rbase + offset) = Dsrc. 2527 // Instruction details available in ARM DDI 0406C.b, A8-1082. 2528 // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) | 2529 // Vd(15-12) | 1011(11-8) | (offset/4) 2530 DCHECK(VfpRegisterIsAvailable(src)); 2531 int u = 1; 2532 if (offset < 0) { 2533 CHECK_NE(offset, kMinInt); 2534 offset = -offset; 2535 u = 0; 2536 } 2537 DCHECK_GE(offset, 0); 2538 int vd, d; 2539 src.split_code(&vd, &d); 2540 2541 if ((offset % 4) == 0 && (offset / 4) < 256) { 2542 emit(cond | 0xD * B24 | u * B23 | d * B22 | base.code() * B16 | vd * B12 | 2543 0xB * B8 | ((offset / 4) & 255)); 2544 } else { 2545 // Larger offsets must be handled by computing the correct address in the a 2546 // scratch register. 2547 UseScratchRegisterScope temps(this); 2548 Register scratch = temps.Acquire(); 2549 DCHECK(base != scratch); 2550 if (u == 1) { 2551 add(scratch, base, Operand(offset)); 2552 } else { 2553 sub(scratch, base, Operand(offset)); 2554 } 2555 emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 | 2556 0xB * B8); 2557 } 2558} 2559 2560void Assembler::vstr(const DwVfpRegister src, const MemOperand& operand, 2561 const Condition cond) { 2562 DCHECK(VfpRegisterIsAvailable(src)); 2563 DCHECK(operand.am_ == Offset); 2564 if (operand.rm().is_valid()) { 2565 UseScratchRegisterScope temps(this); 2566 Register scratch = temps.Acquire(); 2567 add(scratch, operand.rn(), 2568 Operand(operand.rm(), operand.shift_op_, operand.shift_imm_)); 2569 vstr(src, scratch, 0, cond); 2570 } else { 2571 vstr(src, operand.rn(), operand.offset(), cond); 2572 } 2573} 2574 2575void Assembler::vstr(const SwVfpRegister src, const Register base, int offset, 2576 const Condition cond) { 2577 // MEM(Rbase + offset) = SSrc. 2578 // Instruction details available in ARM DDI 0406A, A8-786. 2579 // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) | 2580 // Vdst(15-12) | 1010(11-8) | (offset/4) 2581 int u = 1; 2582 if (offset < 0) { 2583 CHECK_NE(offset, kMinInt); 2584 offset = -offset; 2585 u = 0; 2586 } 2587 int sd, d; 2588 src.split_code(&sd, &d); 2589 DCHECK_GE(offset, 0); 2590 if ((offset % 4) == 0 && (offset / 4) < 256) { 2591 emit(cond | u * B23 | d * B22 | 0xD0 * B20 | base.code() * B16 | sd * B12 | 2592 0xA * B8 | ((offset / 4) & 255)); 2593 } else { 2594 // Larger offsets must be handled by computing the correct address in a 2595 // scratch register. 2596 UseScratchRegisterScope temps(this); 2597 Register scratch = temps.Acquire(); 2598 DCHECK(base != scratch); 2599 if (u == 1) { 2600 add(scratch, base, Operand(offset)); 2601 } else { 2602 sub(scratch, base, Operand(offset)); 2603 } 2604 emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 | 2605 0xA * B8); 2606 } 2607} 2608 2609void Assembler::vstr(const SwVfpRegister src, const MemOperand& operand, 2610 const Condition cond) { 2611 DCHECK(operand.am_ == Offset); 2612 if (operand.rm().is_valid()) { 2613 UseScratchRegisterScope temps(this); 2614 Register scratch = temps.Acquire(); 2615 add(scratch, operand.rn(), 2616 Operand(operand.rm(), operand.shift_op_, operand.shift_imm_)); 2617 vstr(src, scratch, 0, cond); 2618 } else { 2619 vstr(src, operand.rn(), operand.offset(), cond); 2620 } 2621} 2622 2623void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first, 2624 DwVfpRegister last, Condition cond) { 2625 // Instruction details available in ARM DDI 0406C.b, A8-922. 2626 // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) | 2627 // first(15-12) | 1011(11-8) | (count * 2) 2628 DCHECK_LE(first.code(), last.code()); 2629 DCHECK(VfpRegisterIsAvailable(last)); 2630 DCHECK(am == ia || am == ia_w || am == db_w); 2631 DCHECK(base != pc); 2632 2633 int sd, d; 2634 first.split_code(&sd, &d); 2635 int count = last.code() - first.code() + 1; 2636 DCHECK_LE(count, 16); 2637 emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 | 2638 0xB * B8 | count * 2); 2639} 2640 2641void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first, 2642 DwVfpRegister last, Condition cond) { 2643 // Instruction details available in ARM DDI 0406C.b, A8-1080. 2644 // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) | 2645 // first(15-12) | 1011(11-8) | (count * 2) 2646 DCHECK_LE(first.code(), last.code()); 2647 DCHECK(VfpRegisterIsAvailable(last)); 2648 DCHECK(am == ia || am == ia_w || am == db_w); 2649 DCHECK(base != pc); 2650 2651 int sd, d; 2652 first.split_code(&sd, &d); 2653 int count = last.code() - first.code() + 1; 2654 DCHECK_LE(count, 16); 2655 emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 | 2656 0xB * B8 | count * 2); 2657} 2658 2659void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first, 2660 SwVfpRegister last, Condition cond) { 2661 // Instruction details available in ARM DDI 0406A, A8-626. 2662 // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) | 2663 // first(15-12) | 1010(11-8) | (count/2) 2664 DCHECK_LE(first.code(), last.code()); 2665 DCHECK(am == ia || am == ia_w || am == db_w); 2666 DCHECK(base != pc); 2667 2668 int sd, d; 2669 first.split_code(&sd, &d); 2670 int count = last.code() - first.code() + 1; 2671 emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 | 2672 0xA * B8 | count); 2673} 2674 2675void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first, 2676 SwVfpRegister last, Condition cond) { 2677 // Instruction details available in ARM DDI 0406A, A8-784. 2678 // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) | 2679 // first(15-12) | 1011(11-8) | (count/2) 2680 DCHECK_LE(first.code(), last.code()); 2681 DCHECK(am == ia || am == ia_w || am == db_w); 2682 DCHECK(base != pc); 2683 2684 int sd, d; 2685 first.split_code(&sd, &d); 2686 int count = last.code() - first.code() + 1; 2687 emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 | 2688 0xA * B8 | count); 2689} 2690 2691static void DoubleAsTwoUInt32(base::Double d, uint32_t* lo, uint32_t* hi) { 2692 uint64_t i = d.AsUint64(); 2693 2694 *lo = i & 0xFFFFFFFF; 2695 *hi = i >> 32; 2696} 2697 2698static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) { 2699 // Integer promotion from uint8_t to int makes these all okay. 2700 *encoding = ((imm & 0x80) << (24 - 7)); // a 2701 *encoding |= ((imm & 0x70) << (16 - 4)); // bcd 2702 *encoding |= (imm & 0x0f); // efgh 2703} 2704 2705// This checks if imm can be encoded into an immediate for vmov. 2706// See Table A7-15 in ARM DDI 0406C.d. 2707// Currently only supports the first row and op=0 && cmode=1110. 2708static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) { 2709 uint32_t lo = imm & 0xFFFFFFFF; 2710 uint32_t hi = imm >> 32; 2711 if ((lo == hi && ((lo & 0xffffff00) == 0))) { 2712 WriteVmovIntImmEncoding(imm & 0xff, encoding); 2713 *cmode = 0; 2714 return true; 2715 } else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) && 2716 ((lo & 0xff) == (lo >> 24))) { 2717 // Check that all bytes in imm are the same. 2718 WriteVmovIntImmEncoding(imm & 0xff, encoding); 2719 *cmode = 0xe; 2720 return true; 2721 } 2722 2723 return false; 2724} 2725 2726void Assembler::vmov(const DwVfpRegister dst, uint64_t imm) { 2727 uint32_t enc; 2728 uint8_t cmode; 2729 uint8_t op = 0; 2730 if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) { 2731 CpuFeatureScope scope(this, NEON); 2732 // Instruction details available in ARM DDI 0406C.b, A8-937. 2733 // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8) 2734 // | 0(7) | 0(6) | op(5) | 4(1) | imm4(3-0) 2735 int vd, d; 2736 dst.split_code(&vd, &d); 2737 emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 | 2738 op * B5 | 0x1 * B4 | enc); 2739 } else { 2740 UNIMPLEMENTED(); 2741 } 2742} 2743 2744void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) { 2745 uint32_t enc; 2746 uint8_t cmode; 2747 uint8_t op = 0; 2748 if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) { 2749 CpuFeatureScope scope(this, NEON); 2750 // Instruction details available in ARM DDI 0406C.b, A8-937. 2751 // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8) 2752 // | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0) 2753 int vd, d; 2754 dst.split_code(&vd, &d); 2755 emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 | 2756 0x1 * B6 | op * B5 | 0x1 * B4 | enc); 2757 } else { 2758 UNIMPLEMENTED(); 2759 } 2760} 2761 2762// Only works for little endian floating point formats. 2763// We don't support VFP on the mixed endian floating point platform. 2764static bool FitsVmovFPImmediate(base::Double d, uint32_t* encoding) { 2765 // VMOV can accept an immediate of the form: 2766 // 2767 // +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7 2768 // 2769 // The immediate is encoded using an 8-bit quantity, comprised of two 2770 // 4-bit fields. For an 8-bit immediate of the form: 2771 // 2772 // [abcdefgh] 2773 // 2774 // where a is the MSB and h is the LSB, an immediate 64-bit double can be 2775 // created of the form: 2776 // 2777 // [aBbbbbbb,bbcdefgh,00000000,00000000, 2778 // 00000000,00000000,00000000,00000000] 2779 // 2780 // where B = ~b. 2781 // 2782 2783 uint32_t lo, hi; 2784 DoubleAsTwoUInt32(d, &lo, &hi); 2785 2786 // The most obvious constraint is the long block of zeroes. 2787 if ((lo != 0) || ((hi & 0xFFFF) != 0)) { 2788 return false; 2789 } 2790 2791 // Bits 61:54 must be all clear or all set. 2792 if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) { 2793 return false; 2794 } 2795 2796 // Bit 62 must be NOT bit 61. 2797 if (((hi ^ (hi << 1)) & (0x40000000)) == 0) { 2798 return false; 2799 } 2800 2801 // Create the encoded immediate in the form: 2802 // [00000000,0000abcd,00000000,0000efgh] 2803 *encoding = (hi >> 16) & 0xF; // Low nybble. 2804 *encoding |= (hi >> 4) & 0x70000; // Low three bits of the high nybble. 2805 *encoding |= (hi >> 12) & 0x80000; // Top bit of the high nybble. 2806 2807 return true; 2808} 2809 2810void Assembler::vmov(const SwVfpRegister dst, Float32 imm) { 2811 uint32_t enc; 2812 if (CpuFeatures::IsSupported(VFPv3) && 2813 FitsVmovFPImmediate(base::Double(imm.get_scalar()), &enc)) { 2814 CpuFeatureScope scope(this, VFPv3); 2815 // The float can be encoded in the instruction. 2816 // 2817 // Sd = immediate 2818 // Instruction details available in ARM DDI 0406C.b, A8-936. 2819 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) | 2820 // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0) 2821 int vd, d; 2822 dst.split_code(&vd, &d); 2823 emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc); 2824 } else { 2825 UseScratchRegisterScope temps(this); 2826 Register scratch = temps.Acquire(); 2827 mov(scratch, Operand(imm.get_bits())); 2828 vmov(dst, scratch); 2829 } 2830} 2831 2832void Assembler::vmov(const DwVfpRegister dst, base::Double imm, 2833 const Register extra_scratch) { 2834 DCHECK(VfpRegisterIsAvailable(dst)); 2835 uint32_t enc; 2836 if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) { 2837 CpuFeatureScope scope(this, VFPv3); 2838 // The double can be encoded in the instruction. 2839 // 2840 // Dd = immediate 2841 // Instruction details available in ARM DDI 0406C.b, A8-936. 2842 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) | 2843 // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0) 2844 int vd, d; 2845 dst.split_code(&vd, &d); 2846 emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | 2847 enc); 2848 } else { 2849 // Synthesise the double from ARM immediates. 2850 uint32_t lo, hi; 2851 DoubleAsTwoUInt32(imm, &lo, &hi); 2852 UseScratchRegisterScope temps(this); 2853 Register scratch = temps.Acquire(); 2854 2855 if (lo == hi) { 2856 // Move the low and high parts of the double to a D register in one 2857 // instruction. 2858 mov(scratch, Operand(lo)); 2859 vmov(dst, scratch, scratch); 2860 } else if (extra_scratch == no_reg) { 2861 // We only have one spare scratch register. 2862 mov(scratch, Operand(lo)); 2863 vmov(NeonS32, dst, 0, scratch); 2864 if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) { 2865 CpuFeatureScope scope(this, ARMv7); 2866 movt(scratch, hi >> 16); 2867 } else { 2868 mov(scratch, Operand(hi)); 2869 } 2870 vmov(NeonS32, dst, 1, scratch); 2871 } else { 2872 // Move the low and high parts of the double to a D register in one 2873 // instruction. 2874 mov(scratch, Operand(lo)); 2875 mov(extra_scratch, Operand(hi)); 2876 vmov(dst, scratch, extra_scratch); 2877 } 2878 } 2879} 2880 2881void Assembler::vmov(const SwVfpRegister dst, const SwVfpRegister src, 2882 const Condition cond) { 2883 // Sd = Sm 2884 // Instruction details available in ARM DDI 0406B, A8-642. 2885 int sd, d, sm, m; 2886 dst.split_code(&sd, &d); 2887 src.split_code(&sm, &m); 2888 emit(cond | 0xE * B24 | d * B22 | 0xB * B20 | sd * B12 | 0xA * B8 | B6 | 2889 m * B5 | sm); 2890} 2891 2892void Assembler::vmov(const DwVfpRegister dst, const DwVfpRegister src, 2893 const Condition cond) { 2894 // Dd = Dm 2895 // Instruction details available in ARM DDI 0406C.b, A8-938. 2896 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) | 2897 // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 2898 DCHECK(VfpRegisterIsAvailable(dst)); 2899 DCHECK(VfpRegisterIsAvailable(src)); 2900 int vd, d; 2901 dst.split_code(&vd, &d); 2902 int vm, m; 2903 src.split_code(&vm, &m); 2904 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B6 | 2905 m * B5 | vm); 2906} 2907 2908void Assembler::vmov(const DwVfpRegister dst, const Register src1, 2909 const Register src2, const Condition cond) { 2910 // Dm = <Rt,Rt2>. 2911 // Instruction details available in ARM DDI 0406C.b, A8-948. 2912 // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) | 2913 // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm 2914 DCHECK(VfpRegisterIsAvailable(dst)); 2915 DCHECK(src1 != pc && src2 != pc); 2916 int vm, m; 2917 dst.split_code(&vm, &m); 2918 emit(cond | 0xC * B24 | B22 | src2.code() * B16 | src1.code() * B12 | 2919 0xB * B8 | m * B5 | B4 | vm); 2920} 2921 2922void Assembler::vmov(const Register dst1, const Register dst2, 2923 const DwVfpRegister src, const Condition cond) { 2924 // <Rt,Rt2> = Dm. 2925 // Instruction details available in ARM DDI 0406C.b, A8-948. 2926 // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) | 2927 // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm 2928 DCHECK(VfpRegisterIsAvailable(src)); 2929 DCHECK(dst1 != pc && dst2 != pc); 2930 int vm, m; 2931 src.split_code(&vm, &m); 2932 emit(cond | 0xC * B24 | B22 | B20 | dst2.code() * B16 | dst1.code() * B12 | 2933 0xB * B8 | m * B5 | B4 | vm); 2934} 2935 2936void Assembler::vmov(const SwVfpRegister dst, const Register src, 2937 const Condition cond) { 2938 // Sn = Rt. 2939 // Instruction details available in ARM DDI 0406A, A8-642. 2940 // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) | 2941 // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0) 2942 DCHECK(src != pc); 2943 int sn, n; 2944 dst.split_code(&sn, &n); 2945 emit(cond | 0xE * B24 | sn * B16 | src.code() * B12 | 0xA * B8 | n * B7 | B4); 2946} 2947 2948void Assembler::vmov(const Register dst, const SwVfpRegister src, 2949 const Condition cond) { 2950 // Rt = Sn. 2951 // Instruction details available in ARM DDI 0406A, A8-642. 2952 // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) | 2953 // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0) 2954 DCHECK(dst != pc); 2955 int sn, n; 2956 src.split_code(&sn, &n); 2957 emit(cond | 0xE * B24 | B20 | sn * B16 | dst.code() * B12 | 0xA * B8 | 2958 n * B7 | B4); 2959} 2960 2961// Type of data to read from or write to VFP register. 2962// Used as specifier in generic vcvt instruction. 2963enum VFPType { S32, U32, F32, F64 }; 2964 2965static bool IsSignedVFPType(VFPType type) { 2966 switch (type) { 2967 case S32: 2968 return true; 2969 case U32: 2970 return false; 2971 default: 2972 UNREACHABLE(); 2973 } 2974} 2975 2976static bool IsIntegerVFPType(VFPType type) { 2977 switch (type) { 2978 case S32: 2979 case U32: 2980 return true; 2981 case F32: 2982 case F64: 2983 return false; 2984 default: 2985 UNREACHABLE(); 2986 } 2987} 2988 2989static bool IsDoubleVFPType(VFPType type) { 2990 switch (type) { 2991 case F32: 2992 return false; 2993 case F64: 2994 return true; 2995 default: 2996 UNREACHABLE(); 2997 } 2998} 2999 3000// Split five bit reg_code based on size of reg_type. 3001// 32-bit register codes are Vm:M 3002// 64-bit register codes are M:Vm 3003// where Vm is four bits, and M is a single bit. 3004static void SplitRegCode(VFPType reg_type, int reg_code, int* vm, int* m) { 3005 DCHECK((reg_code >= 0) && (reg_code <= 31)); 3006 if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) { 3007 SwVfpRegister::split_code(reg_code, vm, m); 3008 } else { 3009 DwVfpRegister::split_code(reg_code, vm, m); 3010 } 3011} 3012 3013// Encode vcvt.src_type.dst_type instruction. 3014static Instr EncodeVCVT(const VFPType dst_type, const int dst_code, 3015 const VFPType src_type, const int src_code, 3016 VFPConversionMode mode, const Condition cond) { 3017 DCHECK(src_type != dst_type); 3018 int D, Vd, M, Vm; 3019 SplitRegCode(src_type, src_code, &Vm, &M); 3020 SplitRegCode(dst_type, dst_code, &Vd, &D); 3021 3022 if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) { 3023 // Conversion between IEEE floating point and 32-bit integer. 3024 // Instruction details available in ARM DDI 0406B, A8.6.295. 3025 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) | 3026 // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3027 DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type)); 3028 3029 int sz, opc2, op; 3030 3031 if (IsIntegerVFPType(dst_type)) { 3032 opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4; 3033 sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0; 3034 op = mode; 3035 } else { 3036 DCHECK(IsIntegerVFPType(src_type)); 3037 opc2 = 0x0; 3038 sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0; 3039 op = IsSignedVFPType(src_type) ? 0x1 : 0x0; 3040 } 3041 3042 return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | B19 | opc2 * B16 | 3043 Vd * B12 | 0x5 * B9 | sz * B8 | op * B7 | B6 | M * B5 | Vm); 3044 } else { 3045 // Conversion between IEEE double and single precision. 3046 // Instruction details available in ARM DDI 0406B, A8.6.298. 3047 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) | 3048 // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3049 int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0; 3050 return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | 0x7 * B16 | 3051 Vd * B12 | 0x5 * B9 | sz * B8 | B7 | B6 | M * B5 | Vm); 3052 } 3053} 3054 3055void Assembler::vcvt_f64_s32(const DwVfpRegister dst, const SwVfpRegister src, 3056 VFPConversionMode mode, const Condition cond) { 3057 DCHECK(VfpRegisterIsAvailable(dst)); 3058 emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond)); 3059} 3060 3061void Assembler::vcvt_f32_s32(const SwVfpRegister dst, const SwVfpRegister src, 3062 VFPConversionMode mode, const Condition cond) { 3063 emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond)); 3064} 3065 3066void Assembler::vcvt_f64_u32(const DwVfpRegister dst, const SwVfpRegister src, 3067 VFPConversionMode mode, const Condition cond) { 3068 DCHECK(VfpRegisterIsAvailable(dst)); 3069 emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond)); 3070} 3071 3072void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src, 3073 VFPConversionMode mode, const Condition cond) { 3074 emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond)); 3075} 3076 3077void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src, 3078 VFPConversionMode mode, const Condition cond) { 3079 emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond)); 3080} 3081 3082void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src, 3083 VFPConversionMode mode, const Condition cond) { 3084 emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond)); 3085} 3086 3087void Assembler::vcvt_s32_f64(const SwVfpRegister dst, const DwVfpRegister src, 3088 VFPConversionMode mode, const Condition cond) { 3089 DCHECK(VfpRegisterIsAvailable(src)); 3090 emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond)); 3091} 3092 3093void Assembler::vcvt_u32_f64(const SwVfpRegister dst, const DwVfpRegister src, 3094 VFPConversionMode mode, const Condition cond) { 3095 DCHECK(VfpRegisterIsAvailable(src)); 3096 emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond)); 3097} 3098 3099void Assembler::vcvt_f64_f32(const DwVfpRegister dst, const SwVfpRegister src, 3100 VFPConversionMode mode, const Condition cond) { 3101 DCHECK(VfpRegisterIsAvailable(dst)); 3102 emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond)); 3103} 3104 3105void Assembler::vcvt_f32_f64(const SwVfpRegister dst, const DwVfpRegister src, 3106 VFPConversionMode mode, const Condition cond) { 3107 DCHECK(VfpRegisterIsAvailable(src)); 3108 emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond)); 3109} 3110 3111void Assembler::vcvt_f64_s32(const DwVfpRegister dst, int fraction_bits, 3112 const Condition cond) { 3113 // Instruction details available in ARM DDI 0406C.b, A8-874. 3114 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) | 3115 // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0) 3116 DCHECK(IsEnabled(VFPv3)); 3117 DCHECK(VfpRegisterIsAvailable(dst)); 3118 DCHECK(fraction_bits > 0 && fraction_bits <= 32); 3119 int vd, d; 3120 dst.split_code(&vd, &d); 3121 int imm5 = 32 - fraction_bits; 3122 int i = imm5 & 1; 3123 int imm4 = (imm5 >> 1) & 0xF; 3124 emit(cond | 0xE * B24 | B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 | 3125 vd * B12 | 0x5 * B9 | B8 | B7 | B6 | i * B5 | imm4); 3126} 3127 3128void Assembler::vneg(const DwVfpRegister dst, const DwVfpRegister src, 3129 const Condition cond) { 3130 // Instruction details available in ARM DDI 0406C.b, A8-968. 3131 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) | 3132 // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3133 DCHECK(VfpRegisterIsAvailable(dst)); 3134 DCHECK(VfpRegisterIsAvailable(src)); 3135 int vd, d; 3136 dst.split_code(&vd, &d); 3137 int vm, m; 3138 src.split_code(&vm, &m); 3139 3140 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 | 3141 B8 | B6 | m * B5 | vm); 3142} 3143 3144void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src, 3145 const Condition cond) { 3146 // Instruction details available in ARM DDI 0406C.b, A8-968. 3147 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) | 3148 // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3149 int vd, d; 3150 dst.split_code(&vd, &d); 3151 int vm, m; 3152 src.split_code(&vm, &m); 3153 3154 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 | 3155 B6 | m * B5 | vm); 3156} 3157 3158void Assembler::vabs(const DwVfpRegister dst, const DwVfpRegister src, 3159 const Condition cond) { 3160 // Instruction details available in ARM DDI 0406C.b, A8-524. 3161 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) | 3162 // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3163 DCHECK(VfpRegisterIsAvailable(dst)); 3164 DCHECK(VfpRegisterIsAvailable(src)); 3165 int vd, d; 3166 dst.split_code(&vd, &d); 3167 int vm, m; 3168 src.split_code(&vm, &m); 3169 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B7 | 3170 B6 | m * B5 | vm); 3171} 3172 3173void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src, 3174 const Condition cond) { 3175 // Instruction details available in ARM DDI 0406C.b, A8-524. 3176 // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) | 3177 // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3178 int vd, d; 3179 dst.split_code(&vd, &d); 3180 int vm, m; 3181 src.split_code(&vm, &m); 3182 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 | 3183 m * B5 | vm); 3184} 3185 3186void Assembler::vadd(const DwVfpRegister dst, const DwVfpRegister src1, 3187 const DwVfpRegister src2, const Condition cond) { 3188 // Dd = vadd(Dn, Dm) double precision floating point addition. 3189 // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. 3190 // Instruction details available in ARM DDI 0406C.b, A8-830. 3191 // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) | 3192 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3193 DCHECK(VfpRegisterIsAvailable(dst)); 3194 DCHECK(VfpRegisterIsAvailable(src1)); 3195 DCHECK(VfpRegisterIsAvailable(src2)); 3196 int vd, d; 3197 dst.split_code(&vd, &d); 3198 int vn, n; 3199 src1.split_code(&vn, &n); 3200 int vm, m; 3201 src2.split_code(&vm, &m); 3202 emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 3203 0x5 * B9 | B8 | n * B7 | m * B5 | vm); 3204} 3205 3206void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1, 3207 const SwVfpRegister src2, const Condition cond) { 3208 // Sd = vadd(Sn, Sm) single precision floating point addition. 3209 // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm. 3210 // Instruction details available in ARM DDI 0406C.b, A8-830. 3211 // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) | 3212 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3213 int vd, d; 3214 dst.split_code(&vd, &d); 3215 int vn, n; 3216 src1.split_code(&vn, &n); 3217 int vm, m; 3218 src2.split_code(&vm, &m); 3219 emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 3220 0x5 * B9 | n * B7 | m * B5 | vm); 3221} 3222 3223void Assembler::vsub(const DwVfpRegister dst, const DwVfpRegister src1, 3224 const DwVfpRegister src2, const Condition cond) { 3225 // Dd = vsub(Dn, Dm) double precision floating point subtraction. 3226 // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. 3227 // Instruction details available in ARM DDI 0406C.b, A8-1086. 3228 // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) | 3229 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3230 DCHECK(VfpRegisterIsAvailable(dst)); 3231 DCHECK(VfpRegisterIsAvailable(src1)); 3232 DCHECK(VfpRegisterIsAvailable(src2)); 3233 int vd, d; 3234 dst.split_code(&vd, &d); 3235 int vn, n; 3236 src1.split_code(&vn, &n); 3237 int vm, m; 3238 src2.split_code(&vm, &m); 3239 emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 3240 0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm); 3241} 3242 3243void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1, 3244 const SwVfpRegister src2, const Condition cond) { 3245 // Sd = vsub(Sn, Sm) single precision floating point subtraction. 3246 // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm. 3247 // Instruction details available in ARM DDI 0406C.b, A8-1086. 3248 // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) | 3249 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3250 int vd, d; 3251 dst.split_code(&vd, &d); 3252 int vn, n; 3253 src1.split_code(&vn, &n); 3254 int vm, m; 3255 src2.split_code(&vm, &m); 3256 emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 3257 0x5 * B9 | n * B7 | B6 | m * B5 | vm); 3258} 3259 3260void Assembler::vmul(const DwVfpRegister dst, const DwVfpRegister src1, 3261 const DwVfpRegister src2, const Condition cond) { 3262 // Dd = vmul(Dn, Dm) double precision floating point multiplication. 3263 // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. 3264 // Instruction details available in ARM DDI 0406C.b, A8-960. 3265 // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) | 3266 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3267 DCHECK(VfpRegisterIsAvailable(dst)); 3268 DCHECK(VfpRegisterIsAvailable(src1)); 3269 DCHECK(VfpRegisterIsAvailable(src2)); 3270 int vd, d; 3271 dst.split_code(&vd, &d); 3272 int vn, n; 3273 src1.split_code(&vn, &n); 3274 int vm, m; 3275 src2.split_code(&vm, &m); 3276 emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 | 3277 0x5 * B9 | B8 | n * B7 | m * B5 | vm); 3278} 3279 3280void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1, 3281 const SwVfpRegister src2, const Condition cond) { 3282 // Sd = vmul(Sn, Sm) single precision floating point multiplication. 3283 // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm. 3284 // Instruction details available in ARM DDI 0406C.b, A8-960. 3285 // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) | 3286 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3287 int vd, d; 3288 dst.split_code(&vd, &d); 3289 int vn, n; 3290 src1.split_code(&vn, &n); 3291 int vm, m; 3292 src2.split_code(&vm, &m); 3293 emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 | 3294 0x5 * B9 | n * B7 | m * B5 | vm); 3295} 3296 3297void Assembler::vmla(const DwVfpRegister dst, const DwVfpRegister src1, 3298 const DwVfpRegister src2, const Condition cond) { 3299 // Instruction details available in ARM DDI 0406C.b, A8-932. 3300 // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3301 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0) 3302 DCHECK(VfpRegisterIsAvailable(dst)); 3303 DCHECK(VfpRegisterIsAvailable(src1)); 3304 DCHECK(VfpRegisterIsAvailable(src2)); 3305 int vd, d; 3306 dst.split_code(&vd, &d); 3307 int vn, n; 3308 src1.split_code(&vn, &n); 3309 int vm, m; 3310 src2.split_code(&vm, &m); 3311 emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 | 3312 n * B7 | m * B5 | vm); 3313} 3314 3315void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1, 3316 const SwVfpRegister src2, const Condition cond) { 3317 // Instruction details available in ARM DDI 0406C.b, A8-932. 3318 // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3319 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0) 3320 int vd, d; 3321 dst.split_code(&vd, &d); 3322 int vn, n; 3323 src1.split_code(&vn, &n); 3324 int vm, m; 3325 src2.split_code(&vm, &m); 3326 emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 | 3327 m * B5 | vm); 3328} 3329 3330void Assembler::vmls(const DwVfpRegister dst, const DwVfpRegister src1, 3331 const DwVfpRegister src2, const Condition cond) { 3332 // Instruction details available in ARM DDI 0406C.b, A8-932. 3333 // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3334 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0) 3335 DCHECK(VfpRegisterIsAvailable(dst)); 3336 DCHECK(VfpRegisterIsAvailable(src1)); 3337 DCHECK(VfpRegisterIsAvailable(src2)); 3338 int vd, d; 3339 dst.split_code(&vd, &d); 3340 int vn, n; 3341 src1.split_code(&vn, &n); 3342 int vm, m; 3343 src2.split_code(&vm, &m); 3344 emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 | 3345 n * B7 | B6 | m * B5 | vm); 3346} 3347 3348void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1, 3349 const SwVfpRegister src2, const Condition cond) { 3350 // Instruction details available in ARM DDI 0406C.b, A8-932. 3351 // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3352 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0) 3353 int vd, d; 3354 dst.split_code(&vd, &d); 3355 int vn, n; 3356 src1.split_code(&vn, &n); 3357 int vm, m; 3358 src2.split_code(&vm, &m); 3359 emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 | 3360 B6 | m * B5 | vm); 3361} 3362 3363void Assembler::vdiv(const DwVfpRegister dst, const DwVfpRegister src1, 3364 const DwVfpRegister src2, const Condition cond) { 3365 // Dd = vdiv(Dn, Dm) double precision floating point division. 3366 // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. 3367 // Instruction details available in ARM DDI 0406C.b, A8-882. 3368 // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) | 3369 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3370 DCHECK(VfpRegisterIsAvailable(dst)); 3371 DCHECK(VfpRegisterIsAvailable(src1)); 3372 DCHECK(VfpRegisterIsAvailable(src2)); 3373 int vd, d; 3374 dst.split_code(&vd, &d); 3375 int vn, n; 3376 src1.split_code(&vn, &n); 3377 int vm, m; 3378 src2.split_code(&vm, &m); 3379 emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 | 3380 n * B7 | m * B5 | vm); 3381} 3382 3383void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1, 3384 const SwVfpRegister src2, const Condition cond) { 3385 // Sd = vdiv(Sn, Sm) single precision floating point division. 3386 // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm. 3387 // Instruction details available in ARM DDI 0406C.b, A8-882. 3388 // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) | 3389 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3390 int vd, d; 3391 dst.split_code(&vd, &d); 3392 int vn, n; 3393 src1.split_code(&vn, &n); 3394 int vm, m; 3395 src2.split_code(&vm, &m); 3396 emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 | 3397 m * B5 | vm); 3398} 3399 3400void Assembler::vcmp(const DwVfpRegister src1, const DwVfpRegister src2, 3401 const Condition cond) { 3402 // vcmp(Dd, Dm) double precision floating point comparison. 3403 // Instruction details available in ARM DDI 0406C.b, A8-864. 3404 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) | 3405 // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3406 DCHECK(VfpRegisterIsAvailable(src1)); 3407 DCHECK(VfpRegisterIsAvailable(src2)); 3408 int vd, d; 3409 src1.split_code(&vd, &d); 3410 int vm, m; 3411 src2.split_code(&vm, &m); 3412 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 | 3413 0x5 * B9 | B8 | B6 | m * B5 | vm); 3414} 3415 3416void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2, 3417 const Condition cond) { 3418 // vcmp(Sd, Sm) single precision floating point comparison. 3419 // Instruction details available in ARM DDI 0406C.b, A8-864. 3420 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) | 3421 // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3422 int vd, d; 3423 src1.split_code(&vd, &d); 3424 int vm, m; 3425 src2.split_code(&vm, &m); 3426 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 | 3427 0x5 * B9 | B6 | m * B5 | vm); 3428} 3429 3430void Assembler::vcmp(const DwVfpRegister src1, const double src2, 3431 const Condition cond) { 3432 // vcmp(Dd, #0.0) double precision floating point comparison. 3433 // Instruction details available in ARM DDI 0406C.b, A8-864. 3434 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) | 3435 // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0) 3436 DCHECK(VfpRegisterIsAvailable(src1)); 3437 DCHECK_EQ(src2, 0.0); 3438 int vd, d; 3439 src1.split_code(&vd, &d); 3440 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 | 3441 0x5 * B9 | B8 | B6); 3442} 3443 3444void Assembler::vcmp(const SwVfpRegister src1, const float src2, 3445 const Condition cond) { 3446 // vcmp(Sd, #0.0) single precision floating point comparison. 3447 // Instruction details available in ARM DDI 0406C.b, A8-864. 3448 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) | 3449 // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0) 3450 DCHECK_EQ(src2, 0.0); 3451 int vd, d; 3452 src1.split_code(&vd, &d); 3453 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 | 3454 0x5 * B9 | B6); 3455} 3456 3457void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1, 3458 const DwVfpRegister src2) { 3459 // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3460 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3461 DCHECK(IsEnabled(ARMv8)); 3462 int vd, d; 3463 dst.split_code(&vd, &d); 3464 int vn, n; 3465 src1.split_code(&vn, &n); 3466 int vm, m; 3467 src2.split_code(&vm, &m); 3468 3469 emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 3470 0x5 * B9 | B8 | n * B7 | m * B5 | vm); 3471} 3472 3473void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1, 3474 const SwVfpRegister src2) { 3475 // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3476 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0) 3477 DCHECK(IsEnabled(ARMv8)); 3478 int vd, d; 3479 dst.split_code(&vd, &d); 3480 int vn, n; 3481 src1.split_code(&vn, &n); 3482 int vm, m; 3483 src2.split_code(&vm, &m); 3484 3485 emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 3486 0x5 * B9 | n * B7 | m * B5 | vm); 3487} 3488 3489void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1, 3490 const DwVfpRegister src2) { 3491 // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3492 // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3493 DCHECK(IsEnabled(ARMv8)); 3494 int vd, d; 3495 dst.split_code(&vd, &d); 3496 int vn, n; 3497 src1.split_code(&vn, &n); 3498 int vm, m; 3499 src2.split_code(&vm, &m); 3500 3501 emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 3502 0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm); 3503} 3504 3505void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1, 3506 const SwVfpRegister src2) { 3507 // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) | 3508 // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3509 DCHECK(IsEnabled(ARMv8)); 3510 int vd, d; 3511 dst.split_code(&vd, &d); 3512 int vn, n; 3513 src1.split_code(&vn, &n); 3514 int vm, m; 3515 src2.split_code(&vm, &m); 3516 3517 emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 3518 0x5 * B9 | n * B7 | B6 | m * B5 | vm); 3519} 3520 3521void Assembler::vsel(Condition cond, const DwVfpRegister dst, 3522 const DwVfpRegister src1, const DwVfpRegister src2) { 3523 // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) | 3524 // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 3525 // 0(6) | M(5) | 0(4) | Vm(3-0) 3526 DCHECK(IsEnabled(ARMv8)); 3527 int vd, d; 3528 dst.split_code(&vd, &d); 3529 int vn, n; 3530 src1.split_code(&vn, &n); 3531 int vm, m; 3532 src2.split_code(&vm, &m); 3533 int sz = 1; 3534 3535 // VSEL has a special (restricted) condition encoding. 3536 // eq(0b0000)... -> 0b00 3537 // ge(0b1010)... -> 0b10 3538 // gt(0b1100)... -> 0b11 3539 // vs(0b0110)... -> 0b01 3540 // No other conditions are supported. 3541 int vsel_cond = (cond >> 30) & 0x3; 3542 if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) { 3543 // We can implement some other conditions by swapping the inputs. 3544 DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc)); 3545 std::swap(vn, vm); 3546 std::swap(n, m); 3547 } 3548 3549 emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 | 3550 vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm); 3551} 3552 3553void Assembler::vsel(Condition cond, const SwVfpRegister dst, 3554 const SwVfpRegister src1, const SwVfpRegister src2) { 3555 // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) | 3556 // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 3557 // 0(6) | M(5) | 0(4) | Vm(3-0) 3558 DCHECK(IsEnabled(ARMv8)); 3559 int vd, d; 3560 dst.split_code(&vd, &d); 3561 int vn, n; 3562 src1.split_code(&vn, &n); 3563 int vm, m; 3564 src2.split_code(&vm, &m); 3565 int sz = 0; 3566 3567 // VSEL has a special (restricted) condition encoding. 3568 // eq(0b0000)... -> 0b00 3569 // ge(0b1010)... -> 0b10 3570 // gt(0b1100)... -> 0b11 3571 // vs(0b0110)... -> 0b01 3572 // No other conditions are supported. 3573 int vsel_cond = (cond >> 30) & 0x3; 3574 if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) { 3575 // We can implement some other conditions by swapping the inputs. 3576 DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc)); 3577 std::swap(vn, vm); 3578 std::swap(n, m); 3579 } 3580 3581 emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 | 3582 vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm); 3583} 3584 3585void Assembler::vsqrt(const DwVfpRegister dst, const DwVfpRegister src, 3586 const Condition cond) { 3587 // Instruction details available in ARM DDI 0406C.b, A8-1058. 3588 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) | 3589 // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0) 3590 DCHECK(VfpRegisterIsAvailable(dst)); 3591 DCHECK(VfpRegisterIsAvailable(src)); 3592 int vd, d; 3593 dst.split_code(&vd, &d); 3594 int vm, m; 3595 src.split_code(&vm, &m); 3596 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 | 3597 B8 | 0x3 * B6 | m * B5 | vm); 3598} 3599 3600void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src, 3601 const Condition cond) { 3602 // Instruction details available in ARM DDI 0406C.b, A8-1058. 3603 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) | 3604 // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0) 3605 int vd, d; 3606 dst.split_code(&vd, &d); 3607 int vm, m; 3608 src.split_code(&vm, &m); 3609 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 | 3610 0x3 * B6 | m * B5 | vm); 3611} 3612 3613void Assembler::vmsr(Register dst, Condition cond) { 3614 // Instruction details available in ARM DDI 0406A, A8-652. 3615 // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) | 3616 // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0) 3617 emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4); 3618} 3619 3620void Assembler::vmrs(Register dst, Condition cond) { 3621 // Instruction details available in ARM DDI 0406A, A8-652. 3622 // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) | 3623 // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0) 3624 emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4); 3625} 3626 3627void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) { 3628 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3629 // 10(19-18) | RM=00(17-16) | Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) | 3630 // M(5) | 0(4) | Vm(3-0) 3631 DCHECK(IsEnabled(ARMv8)); 3632 int vd, d; 3633 dst.split_code(&vd, &d); 3634 int vm, m; 3635 src.split_code(&vm, &m); 3636 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 | 3637 0x5 * B9 | B6 | m * B5 | vm); 3638} 3639 3640void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) { 3641 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3642 // 10(19-18) | RM=00(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | 3643 // M(5) | 0(4) | Vm(3-0) 3644 DCHECK(IsEnabled(ARMv8)); 3645 int vd, d; 3646 dst.split_code(&vd, &d); 3647 int vm, m; 3648 src.split_code(&vm, &m); 3649 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 | 3650 0x5 * B9 | B8 | B6 | m * B5 | vm); 3651} 3652 3653void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) { 3654 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3655 // 10(19-18) | RM=01(17-16) | Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) | 3656 // M(5) | 0(4) | Vm(3-0) 3657 DCHECK(IsEnabled(ARMv8)); 3658 int vd, d; 3659 dst.split_code(&vd, &d); 3660 int vm, m; 3661 src.split_code(&vm, &m); 3662 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 | 3663 vd * B12 | 0x5 * B9 | B6 | m * B5 | vm); 3664} 3665 3666void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) { 3667 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3668 // 10(19-18) | RM=01(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | 3669 // M(5) | 0(4) | Vm(3-0) 3670 DCHECK(IsEnabled(ARMv8)); 3671 int vd, d; 3672 dst.split_code(&vd, &d); 3673 int vm, m; 3674 src.split_code(&vm, &m); 3675 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 | 3676 vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); 3677} 3678 3679void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) { 3680 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3681 // 10(19-18) | RM=10(17-16) | Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) | 3682 // M(5) | 0(4) | Vm(3-0) 3683 DCHECK(IsEnabled(ARMv8)); 3684 int vd, d; 3685 dst.split_code(&vd, &d); 3686 int vm, m; 3687 src.split_code(&vm, &m); 3688 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 | 3689 vd * B12 | 0x5 * B9 | B6 | m * B5 | vm); 3690} 3691 3692void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) { 3693 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3694 // 10(19-18) | RM=10(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | 3695 // M(5) | 0(4) | Vm(3-0) 3696 DCHECK(IsEnabled(ARMv8)); 3697 int vd, d; 3698 dst.split_code(&vd, &d); 3699 int vm, m; 3700 src.split_code(&vm, &m); 3701 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 | 3702 vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); 3703} 3704 3705void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) { 3706 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3707 // 10(19-18) | RM=11(17-16) | Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) | 3708 // M(5) | 0(4) | Vm(3-0) 3709 DCHECK(IsEnabled(ARMv8)); 3710 int vd, d; 3711 dst.split_code(&vd, &d); 3712 int vm, m; 3713 src.split_code(&vm, &m); 3714 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 | 3715 vd * B12 | 0x5 * B9 | B6 | m * B5 | vm); 3716} 3717 3718void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) { 3719 // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | 3720 // 10(19-18) | RM=11(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | 3721 // M(5) | 0(4) | Vm(3-0) 3722 DCHECK(IsEnabled(ARMv8)); 3723 int vd, d; 3724 dst.split_code(&vd, &d); 3725 int vm, m; 3726 src.split_code(&vm, &m); 3727 emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 | 3728 vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); 3729} 3730 3731void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src, 3732 const Condition cond) { 3733 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) | 3734 // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3735 DCHECK(IsEnabled(ARMv8)); 3736 int vd, d; 3737 dst.split_code(&vd, &d); 3738 int vm, m; 3739 src.split_code(&vm, &m); 3740 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 | 3741 0x5 * B9 | B7 | B6 | m * B5 | vm); 3742} 3743 3744void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src, 3745 const Condition cond) { 3746 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) | 3747 // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) 3748 DCHECK(IsEnabled(ARMv8)); 3749 int vd, d; 3750 dst.split_code(&vd, &d); 3751 int vm, m; 3752 src.split_code(&vm, &m); 3753 emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 | 3754 0x5 * B9 | B8 | B7 | B6 | m * B5 | vm); 3755} 3756 3757// Support for NEON. 3758 3759void Assembler::vld1(NeonSize size, const NeonListOperand& dst, 3760 const NeonMemOperand& src) { 3761 // Instruction details available in ARM DDI 0406C.b, A8.8.320. 3762 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | 3763 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) 3764 DCHECK(IsEnabled(NEON)); 3765 int vd, d; 3766 dst.base().split_code(&vd, &d); 3767 emit(0xFU * B28 | 4 * B24 | d * B22 | 2 * B20 | src.rn().code() * B16 | 3768 vd * B12 | dst.type() * B8 | size * B6 | src.align() * B4 | 3769 src.rm().code()); 3770} 3771 3772// vld1s(ingle element to one lane). 3773void Assembler::vld1s(NeonSize size, const NeonListOperand& dst, uint8_t index, 3774 const NeonMemOperand& src) { 3775 // Instruction details available in ARM DDI 0406C.b, A8.8.322. 3776 // 1111(31-28) | 01001(27-23) | D(22) | 10(21-20) | Rn(19-16) | 3777 // Vd(15-12) | size(11-10) | index_align(7-4) | Rm(3-0) 3778 // See vld1 (single element to all lanes) if size == 0x3, implemented as 3779 // vld1r(eplicate). 3780 DCHECK_NE(size, 0x3); 3781 // Check for valid lane indices. 3782 DCHECK_GT(1 << (3 - size), index); 3783 // Specifying alignment not supported, use standard alignment. 3784 uint8_t index_align = index << (size + 1); 3785 3786 DCHECK(IsEnabled(NEON)); 3787 int vd, d; 3788 dst.base().split_code(&vd, &d); 3789 emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 | 3790 src.rn().code() * B16 | vd * B12 | size * B10 | index_align * B4 | 3791 src.rm().code()); 3792} 3793 3794// vld1r(eplicate) 3795void Assembler::vld1r(NeonSize size, const NeonListOperand& dst, 3796 const NeonMemOperand& src) { 3797 DCHECK(IsEnabled(NEON)); 3798 int vd, d; 3799 dst.base().split_code(&vd, &d); 3800 emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 | 3801 src.rn().code() * B16 | vd * B12 | 0xC * B8 | size * B6 | 3802 dst.length() * B5 | src.rm().code()); 3803} 3804 3805void Assembler::vst1(NeonSize size, const NeonListOperand& src, 3806 const NeonMemOperand& dst) { 3807 // Instruction details available in ARM DDI 0406C.b, A8.8.404. 3808 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | 3809 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) 3810 DCHECK(IsEnabled(NEON)); 3811 int vd, d; 3812 src.base().split_code(&vd, &d); 3813 emit(0xFU * B28 | 4 * B24 | d * B22 | dst.rn().code() * B16 | vd * B12 | 3814 src.type() * B8 | size * B6 | dst.align() * B4 | dst.rm().code()); 3815} 3816 3817void Assembler::vst1s(NeonSize size, const NeonListOperand& src, uint8_t index, 3818 const NeonMemOperand& dst) { 3819 // Instruction details available in ARM DDI 0487F.b F6.1.236. 3820 // 1111(31-28) | 01001(27-23) | D(22) | 00(21-20) | Rn(19-16) | 3821 // Vd(15-12) | size(11-10) | 00(9-8) | index_align(7-4) | Rm(3-0) 3822 DCHECK(IsEnabled(NEON)); 3823 DCHECK_NE(size, 0x3); 3824 DCHECK_GT(1 << (3 - size), index); 3825 // Specifying alignment not supported, use standard alignment. 3826 uint8_t index_align = index << (size + 1); 3827 int vd, d; 3828 src.base().split_code(&vd, &d); 3829 emit(0xFU * B28 | 9 * B23 | d * B22 | dst.rn().code() * B16 | vd * B12 | 3830 size * B10 | index_align * B4 | dst.rm().code()); 3831} 3832 3833void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { 3834 // Instruction details available in ARM DDI 0406C.b, A8.8.346. 3835 // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) | 3836 // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0) 3837 DCHECK(IsEnabled(NEON)); 3838 int vd, d; 3839 dst.split_code(&vd, &d); 3840 int vm, m; 3841 src.split_code(&vm, &m); 3842 int U = NeonU(dt); 3843 int imm3 = 1 << NeonSz(dt); 3844 emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 | 3845 0xA * B8 | m * B5 | B4 | vm); 3846} 3847 3848void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt, 3849 DwVfpRegister dst, QwNeonRegister src) { 3850 // Instruction details available in ARM DDI 0406C.b, A8.8.1004. 3851 // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation. 3852 // vqmovun.<type><size> Dd, Qm. Same as above, but produces unsigned results. 3853 DCHECK(IsEnabled(NEON)); 3854 DCHECK_IMPLIES(NeonU(src_dt), NeonU(dst_dt)); 3855 int vd, d; 3856 dst.split_code(&vd, &d); 3857 int vm, m; 3858 src.split_code(&vm, &m); 3859 int size = NeonSz(dst_dt); 3860 DCHECK_NE(3, size); 3861 int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10; 3862 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 | 3863 0x2 * B8 | op * B6 | m * B5 | vm); 3864} 3865 3866static int EncodeScalar(NeonDataType dt, int index) { 3867 int opc1_opc2 = 0; 3868 DCHECK_LE(0, index); 3869 switch (dt) { 3870 case NeonS8: 3871 case NeonU8: 3872 DCHECK_GT(8, index); 3873 opc1_opc2 = 0x8 | index; 3874 break; 3875 case NeonS16: 3876 case NeonU16: 3877 DCHECK_GT(4, index); 3878 opc1_opc2 = 0x1 | (index << 1); 3879 break; 3880 case NeonS32: 3881 case NeonU32: 3882 DCHECK_GT(2, index); 3883 opc1_opc2 = index << 2; 3884 break; 3885 default: 3886 UNREACHABLE(); 3887 } 3888 return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5; 3889} 3890 3891void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index, 3892 Register src) { 3893 // Instruction details available in ARM DDI 0406C.b, A8.8.940. 3894 // vmov ARM core register to scalar. 3895 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); 3896 int vd, d; 3897 dst.split_code(&vd, &d); 3898 int opc1_opc2 = EncodeScalar(dt, index); 3899 emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 | 3900 opc1_opc2); 3901} 3902 3903void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src, 3904 int index) { 3905 // Instruction details available in ARM DDI 0406C.b, A8.8.942. 3906 // vmov Arm scalar to core register. 3907 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); 3908 int vn, n; 3909 src.split_code(&vn, &n); 3910 int opc1_opc2 = EncodeScalar(dt, index); 3911 // NeonS32 and NeonU32 both encoded as u = 0. 3912 int u = NeonDataTypeToSize(dt) == Neon32 ? 0 : NeonU(dt); 3913 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | 3914 n * B7 | B4 | opc1_opc2); 3915} 3916 3917void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) { 3918 // Instruction details available in ARM DDI 0406C.b, A8-938. 3919 // vmov is encoded as vorr. 3920 vorr(dst, src, src); 3921} 3922 3923void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) { 3924 DCHECK(IsEnabled(NEON)); 3925 // Instruction details available in ARM DDI 0406C.b, A8-886. 3926 int B = 0, E = 0; 3927 switch (size) { 3928 case Neon8: 3929 B = 1; 3930 break; 3931 case Neon16: 3932 E = 1; 3933 break; 3934 case Neon32: 3935 break; 3936 default: 3937 UNREACHABLE(); 3938 } 3939 int vd, d; 3940 dst.split_code(&vd, &d); 3941 3942 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | 3943 0xB * B8 | d * B7 | E * B5 | B4); 3944} 3945 3946enum NeonRegType { NEON_D, NEON_Q }; 3947 3948void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) { 3949 if (type == NEON_D) { 3950 DwVfpRegister::split_code(code, vm, m); 3951 } else { 3952 DCHECK_EQ(type, NEON_Q); 3953 QwNeonRegister::split_code(code, vm, m); 3954 *encoding |= B6; 3955 } 3956} 3957 3958static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code, 3959 DwVfpRegister src, int index) { 3960 DCHECK_NE(Neon64, size); 3961 int sz = static_cast<int>(size); 3962 DCHECK_LE(0, index); 3963 DCHECK_GT(kSimd128Size / (1 << sz), index); 3964 int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF); 3965 int qbit = 0; 3966 int vd, d; 3967 NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit); 3968 int vm, m; 3969 src.split_code(&vm, &m); 3970 3971 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 3972 0x18 * B7 | qbit | m * B5 | vm; 3973} 3974 3975void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src, 3976 int index) { 3977 DCHECK(IsEnabled(NEON)); 3978 // Instruction details available in ARM DDI 0406C.b, A8-884. 3979 emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index)); 3980} 3981 3982void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src, 3983 int index) { 3984 // Instruction details available in ARM DDI 0406C.b, A8-884. 3985 DCHECK(IsEnabled(NEON)); 3986 emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index)); 3987} 3988 3989// Encode NEON vcvt.src_type.dst_type instruction. 3990static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst, 3991 VFPType src_type, QwNeonRegister src) { 3992 DCHECK(src_type != dst_type); 3993 DCHECK(src_type == F32 || dst_type == F32); 3994 // Instruction details available in ARM DDI 0406C.b, A8.8.868. 3995 int vd, d; 3996 dst.split_code(&vd, &d); 3997 int vm, m; 3998 src.split_code(&vm, &m); 3999 4000 int op = 0; 4001 if (src_type == F32) { 4002 DCHECK(dst_type == S32 || dst_type == U32); 4003 op = dst_type == U32 ? 3 : 2; 4004 } else { 4005 DCHECK(src_type == S32 || src_type == U32); 4006 op = src_type == U32 ? 1 : 0; 4007 } 4008 4009 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | 4010 B6 | m * B5 | vm; 4011} 4012 4013void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) { 4014 DCHECK(IsEnabled(NEON)); 4015 DCHECK(VfpRegisterIsAvailable(dst)); 4016 DCHECK(VfpRegisterIsAvailable(src)); 4017 emit(EncodeNeonVCVT(F32, dst, S32, src)); 4018} 4019 4020void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) { 4021 DCHECK(IsEnabled(NEON)); 4022 DCHECK(VfpRegisterIsAvailable(dst)); 4023 DCHECK(VfpRegisterIsAvailable(src)); 4024 emit(EncodeNeonVCVT(F32, dst, U32, src)); 4025} 4026 4027void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) { 4028 DCHECK(IsEnabled(NEON)); 4029 DCHECK(VfpRegisterIsAvailable(dst)); 4030 DCHECK(VfpRegisterIsAvailable(src)); 4031 emit(EncodeNeonVCVT(S32, dst, F32, src)); 4032} 4033 4034void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) { 4035 DCHECK(IsEnabled(NEON)); 4036 DCHECK(VfpRegisterIsAvailable(dst)); 4037 DCHECK(VfpRegisterIsAvailable(src)); 4038 emit(EncodeNeonVCVT(U32, dst, F32, src)); 4039} 4040 4041enum UnaryOp { 4042 VMVN, 4043 VSWP, 4044 VABS, 4045 VABSF, 4046 VNEG, 4047 VNEGF, 4048 VRINTM, 4049 VRINTN, 4050 VRINTP, 4051 VRINTZ, 4052 VZIP, 4053 VUZP, 4054 VREV16, 4055 VREV32, 4056 VREV64, 4057 VTRN, 4058 VRECPE, 4059 VRSQRTE, 4060 VPADAL_S, 4061 VPADAL_U, 4062 VPADDL_S, 4063 VPADDL_U, 4064 VCEQ0, 4065 VCLT0, 4066 VCNT 4067}; 4068 4069// Encoding helper for "Advanced SIMD two registers misc" decode group. See ARM 4070// DDI 0487F.b, F4-4228. 4071static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, 4072 int dst_code, int src_code) { 4073 int op_encoding = 0; 4074 switch (op) { 4075 case VMVN: 4076 DCHECK_EQ(Neon8, size); // size == 0 for vmvn 4077 op_encoding = B10 | 0x3 * B7; 4078 break; 4079 case VSWP: 4080 DCHECK_EQ(Neon8, size); // size == 0 for vswp 4081 op_encoding = B17; 4082 break; 4083 case VABS: 4084 op_encoding = B16 | 0x6 * B7; 4085 break; 4086 case VABSF: 4087 DCHECK_EQ(Neon32, size); 4088 op_encoding = B16 | B10 | 0x6 * B7; 4089 break; 4090 case VNEG: 4091 op_encoding = B16 | 0x7 * B7; 4092 break; 4093 case VNEGF: 4094 DCHECK_EQ(Neon32, size); 4095 op_encoding = B16 | B10 | 0x7 * B7; 4096 break; 4097 case VRINTM: 4098 op_encoding = B17 | 0xD * B7; 4099 break; 4100 case VRINTN: 4101 op_encoding = B17 | 0x8 * B7; 4102 break; 4103 case VRINTP: 4104 op_encoding = B17 | 0xF * B7; 4105 break; 4106 case VRINTZ: 4107 op_encoding = B17 | 0xB * B7; 4108 break; 4109 case VZIP: 4110 op_encoding = 0x2 * B16 | 0x3 * B7; 4111 break; 4112 case VUZP: 4113 op_encoding = 0x2 * B16 | 0x2 * B7; 4114 break; 4115 case VREV16: 4116 op_encoding = 0x2 * B7; 4117 break; 4118 case VREV32: 4119 op_encoding = 0x1 * B7; 4120 break; 4121 case VREV64: 4122 // op_encoding is 0; 4123 break; 4124 case VTRN: 4125 op_encoding = 0x2 * B16 | B7; 4126 break; 4127 case VRECPE: 4128 // Only support floating point. 4129 op_encoding = 0x3 * B16 | 0xA * B7; 4130 break; 4131 case VRSQRTE: 4132 // Only support floating point. 4133 op_encoding = 0x3 * B16 | 0xB * B7; 4134 break; 4135 case VPADAL_S: 4136 op_encoding = 0xC * B7; 4137 break; 4138 case VPADAL_U: 4139 op_encoding = 0xD * B7; 4140 break; 4141 case VPADDL_S: 4142 op_encoding = 0x4 * B7; 4143 break; 4144 case VPADDL_U: 4145 op_encoding = 0x5 * B7; 4146 break; 4147 case VCEQ0: 4148 // Only support integers. 4149 op_encoding = 0x1 * B16 | 0x2 * B7; 4150 break; 4151 case VCLT0: 4152 // Only support signed integers. 4153 op_encoding = 0x1 * B16 | 0x4 * B7; 4154 break; 4155 case VCNT: 4156 op_encoding = 0xA * B7; 4157 break; 4158 } 4159 int vd, d; 4160 NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding); 4161 int vm, m; 4162 NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding); 4163 4164 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 | 4165 vm | op_encoding; 4166} 4167 4168void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) { 4169 // Qd = vmvn(Qn, Qm) SIMD bitwise negate. 4170 // Instruction details available in ARM DDI 0406C.b, A8-966. 4171 DCHECK(IsEnabled(NEON)); 4172 emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code())); 4173} 4174 4175void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { 4176 DCHECK(IsEnabled(NEON)); 4177 // Dd = vswp(Dn, Dm) SIMD d-register swap. 4178 // Instruction details available in ARM DDI 0406C.b, A8.8.418. 4179 DCHECK(IsEnabled(NEON)); 4180 emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code())); 4181} 4182 4183void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { 4184 // Qd = vswp(Qn, Qm) SIMD q-register swap. 4185 // Instruction details available in ARM DDI 0406C.b, A8.8.418. 4186 DCHECK(IsEnabled(NEON)); 4187 emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code())); 4188} 4189 4190void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) { 4191 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. 4192 // Instruction details available in ARM DDI 0406C.b, A8.8.824. 4193 DCHECK(IsEnabled(NEON)); 4194 emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code())); 4195} 4196 4197void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { 4198 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. 4199 // Instruction details available in ARM DDI 0406C.b, A8.8.824. 4200 DCHECK(IsEnabled(NEON)); 4201 emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code())); 4202} 4203 4204void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) { 4205 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. 4206 // Instruction details available in ARM DDI 0406C.b, A8.8.968. 4207 DCHECK(IsEnabled(NEON)); 4208 emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code())); 4209} 4210 4211void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { 4212 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. 4213 // Instruction details available in ARM DDI 0406C.b, A8.8.968. 4214 DCHECK(IsEnabled(NEON)); 4215 emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code())); 4216} 4217 4218enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; 4219 4220static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type, 4221 int dst_code, int src_code1, 4222 int src_code2) { 4223 int op_encoding = 0; 4224 switch (op) { 4225 case VBIC: 4226 op_encoding = 0x1 * B20; 4227 break; 4228 case VBIF: 4229 op_encoding = B24 | 0x3 * B20; 4230 break; 4231 case VBIT: 4232 op_encoding = B24 | 0x2 * B20; 4233 break; 4234 case VBSL: 4235 op_encoding = B24 | 0x1 * B20; 4236 break; 4237 case VEOR: 4238 op_encoding = B24; 4239 break; 4240 case VORR: 4241 op_encoding = 0x2 * B20; 4242 break; 4243 case VORN: 4244 op_encoding = 0x3 * B20; 4245 break; 4246 case VAND: 4247 // op_encoding is 0. 4248 break; 4249 default: 4250 UNREACHABLE(); 4251 } 4252 int vd, d; 4253 NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding); 4254 int vn, n; 4255 NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding); 4256 int vm, m; 4257 NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding); 4258 4259 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | 4260 n * B7 | m * B5 | B4 | vm; 4261} 4262 4263void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, 4264 QwNeonRegister src2) { 4265 // Qd = vand(Qn, Qm) SIMD AND. 4266 // Instruction details available in ARM DDI 0406C.b, A8.8.836. 4267 DCHECK(IsEnabled(NEON)); 4268 emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(), 4269 src2.code())); 4270} 4271 4272void Assembler::vbic(QwNeonRegister dst, QwNeonRegister src1, 4273 QwNeonRegister src2) { 4274 // Qd = vbic(Qn, Qm) SIMD AND. 4275 // Instruction details available in ARM DDI 0406C.b, A8-840. 4276 DCHECK(IsEnabled(NEON)); 4277 emit(EncodeNeonBinaryBitwiseOp(VBIC, NEON_Q, dst.code(), src1.code(), 4278 src2.code())); 4279} 4280 4281void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1, 4282 QwNeonRegister src2) { 4283 // Qd = vbsl(Qn, Qm) SIMD bitwise select. 4284 // Instruction details available in ARM DDI 0406C.b, A8-844. 4285 DCHECK(IsEnabled(NEON)); 4286 emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(), 4287 src2.code())); 4288} 4289 4290void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, 4291 DwVfpRegister src2) { 4292 // Dd = veor(Dn, Dm) SIMD exclusive OR. 4293 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 4294 DCHECK(IsEnabled(NEON)); 4295 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(), 4296 src2.code())); 4297} 4298 4299void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, 4300 QwNeonRegister src2) { 4301 // Qd = veor(Qn, Qm) SIMD exclusive OR. 4302 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 4303 DCHECK(IsEnabled(NEON)); 4304 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(), 4305 src2.code())); 4306} 4307 4308void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, 4309 QwNeonRegister src2) { 4310 // Qd = vorr(Qn, Qm) SIMD OR. 4311 // Instruction details available in ARM DDI 0406C.b, A8.8.976. 4312 DCHECK(IsEnabled(NEON)); 4313 emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(), 4314 src2.code())); 4315} 4316 4317void Assembler::vorn(QwNeonRegister dst, QwNeonRegister src1, 4318 QwNeonRegister src2) { 4319 // Qd = vorn(Qn, Qm) SIMD OR NOT. 4320 // Instruction details available in ARM DDI 0406C.d, A8.8.359. 4321 DCHECK(IsEnabled(NEON)); 4322 emit(EncodeNeonBinaryBitwiseOp(VORN, NEON_Q, dst.code(), src1.code(), 4323 src2.code())); 4324} 4325 4326enum FPBinOp { 4327 VADDF, 4328 VSUBF, 4329 VMULF, 4330 VMINF, 4331 VMAXF, 4332 VRECPS, 4333 VRSQRTS, 4334 VCEQF, 4335 VCGEF, 4336 VCGTF 4337}; 4338 4339static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst, 4340 QwNeonRegister src1, QwNeonRegister src2) { 4341 int op_encoding = 0; 4342 switch (op) { 4343 case VADDF: 4344 op_encoding = 0xD * B8; 4345 break; 4346 case VSUBF: 4347 op_encoding = B21 | 0xD * B8; 4348 break; 4349 case VMULF: 4350 op_encoding = B24 | 0xD * B8 | B4; 4351 break; 4352 case VMINF: 4353 op_encoding = B21 | 0xF * B8; 4354 break; 4355 case VMAXF: 4356 op_encoding = 0xF * B8; 4357 break; 4358 case VRECPS: 4359 op_encoding = 0xF * B8 | B4; 4360 break; 4361 case VRSQRTS: 4362 op_encoding = B21 | 0xF * B8 | B4; 4363 break; 4364 case VCEQF: 4365 op_encoding = 0xE * B8; 4366 break; 4367 case VCGEF: 4368 op_encoding = B24 | 0xE * B8; 4369 break; 4370 case VCGTF: 4371 op_encoding = B24 | B21 | 0xE * B8; 4372 break; 4373 default: 4374 UNREACHABLE(); 4375 } 4376 int vd, d; 4377 dst.split_code(&vd, &d); 4378 int vn, n; 4379 src1.split_code(&vn, &n); 4380 int vm, m; 4381 src2.split_code(&vm, &m); 4382 return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 | 4383 vm | op_encoding; 4384} 4385 4386enum IntegerBinOp { 4387 VADD, 4388 VQADD, 4389 VSUB, 4390 VQSUB, 4391 VMUL, 4392 VMIN, 4393 VMAX, 4394 VTST, 4395 VCEQ, 4396 VCGE, 4397 VCGT, 4398 VRHADD, 4399 VQRDMULH 4400}; 4401 4402static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, 4403 QwNeonRegister dst, QwNeonRegister src1, 4404 QwNeonRegister src2) { 4405 int op_encoding = 0; 4406 switch (op) { 4407 case VADD: 4408 op_encoding = 0x8 * B8; 4409 break; 4410 case VQADD: 4411 op_encoding = B4; 4412 break; 4413 case VSUB: 4414 op_encoding = B24 | 0x8 * B8; 4415 break; 4416 case VQSUB: 4417 op_encoding = 0x2 * B8 | B4; 4418 break; 4419 case VMUL: 4420 op_encoding = 0x9 * B8 | B4; 4421 break; 4422 case VMIN: 4423 op_encoding = 0x6 * B8 | B4; 4424 break; 4425 case VMAX: 4426 op_encoding = 0x6 * B8; 4427 break; 4428 case VTST: 4429 op_encoding = 0x8 * B8 | B4; 4430 break; 4431 case VCEQ: 4432 op_encoding = B24 | 0x8 * B8 | B4; 4433 break; 4434 case VCGE: 4435 op_encoding = 0x3 * B8 | B4; 4436 break; 4437 case VCGT: 4438 op_encoding = 0x3 * B8; 4439 break; 4440 case VRHADD: 4441 op_encoding = B8; 4442 break; 4443 case VQRDMULH: 4444 op_encoding = B24 | 0xB * B8; 4445 break; 4446 default: 4447 UNREACHABLE(); 4448 } 4449 int vd, d; 4450 dst.split_code(&vd, &d); 4451 int vn, n; 4452 src1.split_code(&vn, &n); 4453 int vm, m; 4454 src2.split_code(&vm, &m); 4455 int size = NeonSz(dt); 4456 int u = NeonU(dt); 4457 return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 | 4458 n * B7 | B6 | m * B5 | vm | op_encoding; 4459} 4460 4461static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst, 4462 QwNeonRegister src1, QwNeonRegister src2) { 4463 // Map NeonSize values to the signed values in NeonDataType, so the U bit 4464 // will be 0. 4465 return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2); 4466} 4467 4468void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1, 4469 QwNeonRegister src2) { 4470 DCHECK(IsEnabled(NEON)); 4471 // Qd = vadd(Qn, Qm) SIMD floating point addition. 4472 // Instruction details available in ARM DDI 0406C.b, A8-830. 4473 emit(EncodeNeonBinOp(VADDF, dst, src1, src2)); 4474} 4475 4476void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4477 QwNeonRegister src2) { 4478 DCHECK(IsEnabled(NEON)); 4479 // Qd = vadd(Qn, Qm) SIMD integer addition. 4480 // Instruction details available in ARM DDI 0406C.b, A8-828. 4481 emit(EncodeNeonBinOp(VADD, size, dst, src1, src2)); 4482} 4483 4484void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4485 QwNeonRegister src2) { 4486 DCHECK(IsEnabled(NEON)); 4487 // Qd = vqadd(Qn, Qm) SIMD integer saturating addition. 4488 // Instruction details available in ARM DDI 0406C.b, A8-996. 4489 emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2)); 4490} 4491 4492void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1, 4493 QwNeonRegister src2) { 4494 DCHECK(IsEnabled(NEON)); 4495 // Qd = vsub(Qn, Qm) SIMD floating point subtraction. 4496 // Instruction details available in ARM DDI 0406C.b, A8-1086. 4497 emit(EncodeNeonBinOp(VSUBF, dst, src1, src2)); 4498} 4499 4500void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4501 QwNeonRegister src2) { 4502 DCHECK(IsEnabled(NEON)); 4503 // Qd = vsub(Qn, Qm) SIMD integer subtraction. 4504 // Instruction details available in ARM DDI 0406C.b, A8-1084. 4505 emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2)); 4506} 4507 4508void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4509 QwNeonRegister src2) { 4510 DCHECK(IsEnabled(NEON)); 4511 // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction. 4512 // Instruction details available in ARM DDI 0406C.b, A8-1020. 4513 emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2)); 4514} 4515 4516void Assembler::vmlal(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1, 4517 DwVfpRegister src2) { 4518 DCHECK(IsEnabled(NEON)); 4519 // Qd = vmlal(Dn, Dm) Vector Multiply Accumulate Long (integer) 4520 // Instruction details available in ARM DDI 0406C.b, A8-931. 4521 int vd, d; 4522 dst.split_code(&vd, &d); 4523 int vn, n; 4524 src1.split_code(&vn, &n); 4525 int vm, m; 4526 src2.split_code(&vm, &m); 4527 int size = NeonSz(dt); 4528 int u = NeonU(dt); 4529 if (!u) UNIMPLEMENTED(); 4530 DCHECK_NE(size, 3); // SEE "Related encodings" 4531 emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 | 4532 vd * B12 | 0x8 * B8 | n * B7 | m * B5 | vm); 4533} 4534 4535void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, 4536 QwNeonRegister src2) { 4537 DCHECK(IsEnabled(NEON)); 4538 // Qd = vadd(Qn, Qm) SIMD floating point multiply. 4539 // Instruction details available in ARM DDI 0406C.b, A8-958. 4540 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); 4541} 4542 4543void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4544 QwNeonRegister src2) { 4545 DCHECK(IsEnabled(NEON)); 4546 // Qd = vadd(Qn, Qm) SIMD integer multiply. 4547 // Instruction details available in ARM DDI 0406C.b, A8-960. 4548 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); 4549} 4550 4551void Assembler::vmull(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1, 4552 DwVfpRegister src2) { 4553 DCHECK(IsEnabled(NEON)); 4554 // Qd = vmull(Dn, Dm) Vector Multiply Long (integer). 4555 // Instruction details available in ARM DDI 0406C.b, A8-960. 4556 int vd, d; 4557 dst.split_code(&vd, &d); 4558 int vn, n; 4559 src1.split_code(&vn, &n); 4560 int vm, m; 4561 src2.split_code(&vm, &m); 4562 int size = NeonSz(dt); 4563 int u = NeonU(dt); 4564 emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 | 4565 vd * B12 | 0xC * B8 | n * B7 | m * B5 | vm); 4566} 4567 4568void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1, 4569 QwNeonRegister src2) { 4570 DCHECK(IsEnabled(NEON)); 4571 // Qd = vmin(Qn, Qm) SIMD floating point MIN. 4572 // Instruction details available in ARM DDI 0406C.b, A8-928. 4573 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); 4574} 4575 4576void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4577 QwNeonRegister src2) { 4578 DCHECK(IsEnabled(NEON)); 4579 // Qd = vmin(Qn, Qm) SIMD integer MIN. 4580 // Instruction details available in ARM DDI 0406C.b, A8-926. 4581 emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2)); 4582} 4583 4584void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1, 4585 QwNeonRegister src2) { 4586 DCHECK(IsEnabled(NEON)); 4587 // Qd = vmax(Qn, Qm) SIMD floating point MAX. 4588 // Instruction details available in ARM DDI 0406C.b, A8-928. 4589 emit(EncodeNeonBinOp(VMAXF, dst, src1, src2)); 4590} 4591 4592void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4593 QwNeonRegister src2) { 4594 DCHECK(IsEnabled(NEON)); 4595 // Qd = vmax(Qn, Qm) SIMD integer MAX. 4596 // Instruction details available in ARM DDI 0406C.b, A8-926. 4597 emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2)); 4598} 4599 4600enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI, VSRA }; 4601 4602static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt, 4603 NeonRegType reg_type, int dst_code, 4604 int src_code, int shift_code) { 4605 DCHECK_EQ(op, VSHL); 4606 int op_encoding = 0; 4607 int vd, d; 4608 NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding); 4609 int vm, m; 4610 NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding); 4611 int vn, n; 4612 NeonSplitCode(reg_type, shift_code, &vn, &n, &op_encoding); 4613 int size = NeonSz(dt); 4614 int u = NeonU(dt); 4615 4616 return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 | 4617 0x4 * B8 | n * B7 | m * B5 | vm | op_encoding; 4618} 4619 4620static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned, 4621 NeonRegType reg_type, int dst_code, int src_code, 4622 int shift) { 4623 int size_in_bits = kBitsPerByte << static_cast<int>(size); 4624 int op_encoding = 0, imm6 = 0, L = 0; 4625 switch (op) { 4626 case VSHL: { 4627 DCHECK(shift >= 0 && size_in_bits > shift); 4628 imm6 = size_in_bits + shift; 4629 op_encoding = 0x5 * B8; 4630 break; 4631 } 4632 case VSHR: { 4633 DCHECK(shift > 0 && size_in_bits >= shift); 4634 imm6 = 2 * size_in_bits - shift; 4635 if (is_unsigned) op_encoding |= B24; 4636 break; 4637 } 4638 case VSLI: { 4639 DCHECK(shift >= 0 && size_in_bits > shift); 4640 imm6 = size_in_bits + shift; 4641 op_encoding = B24 | 0x5 * B8; 4642 break; 4643 } 4644 case VSRI: { 4645 DCHECK(shift > 0 && size_in_bits >= shift); 4646 imm6 = 2 * size_in_bits - shift; 4647 op_encoding = B24 | 0x4 * B8; 4648 break; 4649 } 4650 case VSRA: { 4651 DCHECK(shift > 0 && size_in_bits >= shift); 4652 imm6 = 2 * size_in_bits - shift; 4653 op_encoding = B8; 4654 if (is_unsigned) op_encoding |= B24; 4655 break; 4656 } 4657 default: 4658 UNREACHABLE(); 4659 } 4660 4661 L = imm6 >> 6; 4662 imm6 &= 0x3F; 4663 4664 int vd, d; 4665 NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding); 4666 int vm, m; 4667 NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding); 4668 4669 return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | L * B7 | m * B5 | B4 | 4670 vm | op_encoding; 4671} 4672 4673void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, 4674 int shift) { 4675 DCHECK(IsEnabled(NEON)); 4676 // Qd = vshl(Qm, bits) SIMD shift left immediate. 4677 // Instruction details available in ARM DDI 0406C.b, A8-1046. 4678 emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q, 4679 dst.code(), src.code(), shift)); 4680} 4681 4682void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, 4683 QwNeonRegister shift) { 4684 DCHECK(IsEnabled(NEON)); 4685 // Qd = vshl(Qm, Qn) SIMD shift left Register. 4686 // Instruction details available in ARM DDI 0487A.a, F8-3340.. 4687 emit(EncodeNeonShiftRegisterOp(VSHL, dt, NEON_Q, dst.code(), src.code(), 4688 shift.code())); 4689} 4690 4691void Assembler::vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src, 4692 int shift) { 4693 DCHECK(IsEnabled(NEON)); 4694 // Dd = vshr(Dm, bits) SIMD shift right immediate. 4695 // Instruction details available in ARM DDI 0406C.b, A8-1052. 4696 emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_D, 4697 dst.code(), src.code(), shift)); 4698} 4699 4700void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, 4701 int shift) { 4702 DCHECK(IsEnabled(NEON)); 4703 // Qd = vshr(Qm, bits) SIMD shift right immediate. 4704 // Instruction details available in ARM DDI 0406C.b, A8-1052. 4705 emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q, 4706 dst.code(), src.code(), shift)); 4707} 4708 4709void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src, 4710 int shift) { 4711 DCHECK(IsEnabled(NEON)); 4712 // Dd = vsli(Dm, bits) SIMD shift left and insert. 4713 // Instruction details available in ARM DDI 0406C.b, A8-1056. 4714 emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(), 4715 shift)); 4716} 4717 4718void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src, 4719 int shift) { 4720 DCHECK(IsEnabled(NEON)); 4721 // Dd = vsri(Dm, bits) SIMD shift right and insert. 4722 // Instruction details available in ARM DDI 0406C.b, A8-1062. 4723 emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(), 4724 shift)); 4725} 4726 4727void Assembler::vsra(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src, 4728 int imm) { 4729 DCHECK(IsEnabled(NEON)); 4730 // Dd = vsra(Dm, imm) SIMD shift right and accumulate. 4731 // Instruction details available in ARM DDI 0487F.b, F6-5569. 4732 emit(EncodeNeonShiftOp(VSRA, NeonDataTypeToSize(dt), NeonU(dt), NEON_D, 4733 dst.code(), src.code(), imm)); 4734} 4735 4736void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) { 4737 DCHECK(IsEnabled(NEON)); 4738 // Qd = vrecpe(Qm) SIMD reciprocal estimate. 4739 // Instruction details available in ARM DDI 0406C.b, A8-1024. 4740 emit(EncodeNeonUnaryOp(VRECPE, NEON_Q, Neon32, dst.code(), src.code())); 4741} 4742 4743void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) { 4744 DCHECK(IsEnabled(NEON)); 4745 // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate. 4746 // Instruction details available in ARM DDI 0406C.b, A8-1038. 4747 emit(EncodeNeonUnaryOp(VRSQRTE, NEON_Q, Neon32, dst.code(), src.code())); 4748} 4749 4750void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1, 4751 QwNeonRegister src2) { 4752 DCHECK(IsEnabled(NEON)); 4753 // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step. 4754 // Instruction details available in ARM DDI 0406C.b, A8-1026. 4755 emit(EncodeNeonBinOp(VRECPS, dst, src1, src2)); 4756} 4757 4758void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, 4759 QwNeonRegister src2) { 4760 DCHECK(IsEnabled(NEON)); 4761 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. 4762 // Instruction details available in ARM DDI 0406C.b, A8-1040. 4763 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); 4764} 4765 4766enum NeonPairwiseOp { VPADD, VPMIN, VPMAX }; 4767 4768static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, 4769 DwVfpRegister dst, DwVfpRegister src1, 4770 DwVfpRegister src2) { 4771 int op_encoding = 0; 4772 switch (op) { 4773 case VPADD: 4774 op_encoding = 0xB * B8 | B4; 4775 break; 4776 case VPMIN: 4777 op_encoding = 0xA * B8 | B4; 4778 break; 4779 case VPMAX: 4780 op_encoding = 0xA * B8; 4781 break; 4782 default: 4783 UNREACHABLE(); 4784 } 4785 int vd, d; 4786 dst.split_code(&vd, &d); 4787 int vn, n; 4788 src1.split_code(&vn, &n); 4789 int vm, m; 4790 src2.split_code(&vm, &m); 4791 int size = NeonSz(dt); 4792 int u = NeonU(dt); 4793 return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 | 4794 n * B7 | m * B5 | vm | op_encoding; 4795} 4796 4797void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1, 4798 DwVfpRegister src2) { 4799 DCHECK(IsEnabled(NEON)); 4800 // Dd = vpadd(Dn, Dm) SIMD floating point pairwise ADD. 4801 // Instruction details available in ARM DDI 0406C.b, A8-982. 4802 int vd, d; 4803 dst.split_code(&vd, &d); 4804 int vn, n; 4805 src1.split_code(&vn, &n); 4806 int vm, m; 4807 src2.split_code(&vm, &m); 4808 4809 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | 4810 m * B5 | vm); 4811} 4812 4813void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1, 4814 DwVfpRegister src2) { 4815 DCHECK(IsEnabled(NEON)); 4816 // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD. 4817 // Instruction details available in ARM DDI 0406C.b, A8-980. 4818 emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2)); 4819} 4820 4821void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, 4822 DwVfpRegister src2) { 4823 DCHECK(IsEnabled(NEON)); 4824 // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN. 4825 // Instruction details available in ARM DDI 0406C.b, A8-986. 4826 emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2)); 4827} 4828 4829void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, 4830 DwVfpRegister src2) { 4831 DCHECK(IsEnabled(NEON)); 4832 // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX. 4833 // Instruction details available in ARM DDI 0406C.b, A8-986. 4834 emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2)); 4835} 4836 4837void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst, 4838 const QwNeonRegister src) { 4839 // SIMD vector round floating-point to integer towards -Infinity. 4840 // See ARM DDI 0487F.b, F6-5493. 4841 DCHECK(IsEnabled(ARMv8)); 4842 emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code())); 4843} 4844 4845void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst, 4846 const QwNeonRegister src) { 4847 // SIMD vector round floating-point to integer to Nearest. 4848 // See ARM DDI 0487F.b, F6-5497. 4849 DCHECK(IsEnabled(ARMv8)); 4850 emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code())); 4851} 4852 4853void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst, 4854 const QwNeonRegister src) { 4855 // SIMD vector round floating-point to integer towards +Infinity. 4856 // See ARM DDI 0487F.b, F6-5501. 4857 DCHECK(IsEnabled(ARMv8)); 4858 emit(EncodeNeonUnaryOp(VRINTP, NEON_Q, NeonSize(dt), dst.code(), src.code())); 4859} 4860 4861void Assembler::vrintz(NeonDataType dt, const QwNeonRegister dst, 4862 const QwNeonRegister src) { 4863 // SIMD vector round floating-point to integer towards Zero. 4864 // See ARM DDI 0487F.b, F6-5511. 4865 DCHECK(IsEnabled(ARMv8)); 4866 emit(EncodeNeonUnaryOp(VRINTZ, NEON_Q, NeonSize(dt), dst.code(), src.code())); 4867} 4868 4869void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4870 QwNeonRegister src2) { 4871 DCHECK(IsEnabled(NEON)); 4872 // Qd = vtst(Qn, Qm) SIMD test integer operands. 4873 // Instruction details available in ARM DDI 0406C.b, A8-1098. 4874 emit(EncodeNeonBinOp(VTST, size, dst, src1, src2)); 4875} 4876 4877void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1, 4878 QwNeonRegister src2) { 4879 DCHECK(IsEnabled(NEON)); 4880 // Qd = vceq(Qn, Qm) SIMD floating point compare equal. 4881 // Instruction details available in ARM DDI 0406C.b, A8-844. 4882 emit(EncodeNeonBinOp(VCEQF, dst, src1, src2)); 4883} 4884 4885void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4886 QwNeonRegister src2) { 4887 DCHECK(IsEnabled(NEON)); 4888 // Qd = vceq(Qn, Qm) SIMD integer compare equal. 4889 // Instruction details available in ARM DDI 0406C.b, A8-844. 4890 emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2)); 4891} 4892 4893void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, 4894 int value) { 4895 DCHECK(IsEnabled(NEON)); 4896 DCHECK_EQ(0, value); 4897 // Qd = vceq(Qn, Qm, #0) Vector Compare Equal to Zero. 4898 // Instruction details available in ARM DDI 0406C.d, A8-847. 4899 emit(EncodeNeonUnaryOp(VCEQ0, NEON_Q, size, dst.code(), src1.code())); 4900} 4901 4902void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1, 4903 QwNeonRegister src2) { 4904 DCHECK(IsEnabled(NEON)); 4905 // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal. 4906 // Instruction details available in ARM DDI 0406C.b, A8-848. 4907 emit(EncodeNeonBinOp(VCGEF, dst, src1, src2)); 4908} 4909 4910void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4911 QwNeonRegister src2) { 4912 DCHECK(IsEnabled(NEON)); 4913 // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal. 4914 // Instruction details available in ARM DDI 0406C.b, A8-848. 4915 emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2)); 4916} 4917 4918void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1, 4919 QwNeonRegister src2) { 4920 DCHECK(IsEnabled(NEON)); 4921 // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than. 4922 // Instruction details available in ARM DDI 0406C.b, A8-852. 4923 emit(EncodeNeonBinOp(VCGTF, dst, src1, src2)); 4924} 4925 4926void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4927 QwNeonRegister src2) { 4928 DCHECK(IsEnabled(NEON)); 4929 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. 4930 // Instruction details available in ARM DDI 0406C.b, A8-852. 4931 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); 4932} 4933 4934void Assembler::vclt(NeonSize size, QwNeonRegister dst, QwNeonRegister src, 4935 int value) { 4936 DCHECK(IsEnabled(NEON)); 4937 DCHECK_EQ(0, value); 4938 // vclt.<size>(Qn, Qm, #0) SIMD Vector Compare Less Than Zero. 4939 // Instruction details available in ARM DDI 0487F.b, F6-5072. 4940 emit(EncodeNeonUnaryOp(VCLT0, NEON_Q, size, dst.code(), src.code())); 4941} 4942 4943void Assembler::vrhadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4944 QwNeonRegister src2) { 4945 DCHECK(IsEnabled(NEON)); 4946 // Qd = vrhadd(Qn, Qm) SIMD integer rounding halving add. 4947 // Instruction details available in ARM DDI 0406C.b, A8-1030. 4948 emit(EncodeNeonBinOp(VRHADD, dt, dst, src1, src2)); 4949} 4950 4951void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1, 4952 QwNeonRegister src2, int bytes) { 4953 DCHECK(IsEnabled(NEON)); 4954 // Qd = vext(Qn, Qm) SIMD byte extract. 4955 // Instruction details available in ARM DDI 0406C.b, A8-890. 4956 int vd, d; 4957 dst.split_code(&vd, &d); 4958 int vn, n; 4959 src1.split_code(&vn, &n); 4960 int vm, m; 4961 src2.split_code(&vm, &m); 4962 DCHECK_GT(16, bytes); 4963 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | 4964 n * B7 | B6 | m * B5 | vm); 4965} 4966 4967void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { 4968 if (size == Neon32) { // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm. 4969 vtrn(size, src1, src2); 4970 } else { 4971 DCHECK(IsEnabled(NEON)); 4972 // vzip.<size>(Dn, Dm) SIMD zip (interleave). 4973 // Instruction details available in ARM DDI 0406C.b, A8-1102. 4974 emit(EncodeNeonUnaryOp(VZIP, NEON_D, size, src1.code(), src2.code())); 4975 } 4976} 4977 4978void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { 4979 DCHECK(IsEnabled(NEON)); 4980 // vzip.<size>(Qn, Qm) SIMD zip (interleave). 4981 // Instruction details available in ARM DDI 0406C.b, A8-1102. 4982 emit(EncodeNeonUnaryOp(VZIP, NEON_Q, size, src1.code(), src2.code())); 4983} 4984 4985void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { 4986 if (size == Neon32) { // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm. 4987 vtrn(size, src1, src2); 4988 } else { 4989 DCHECK(IsEnabled(NEON)); 4990 // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave). 4991 // Instruction details available in ARM DDI 0406C.b, A8-1100. 4992 emit(EncodeNeonUnaryOp(VUZP, NEON_D, size, src1.code(), src2.code())); 4993 } 4994} 4995 4996void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { 4997 DCHECK(IsEnabled(NEON)); 4998 // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). 4999 // Instruction details available in ARM DDI 0406C.b, A8-1100. 5000 emit(EncodeNeonUnaryOp(VUZP, NEON_Q, size, src1.code(), src2.code())); 5001} 5002 5003void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { 5004 DCHECK(IsEnabled(NEON)); 5005 // Qd = vrev16.<size>(Qm) SIMD element reverse. 5006 // Instruction details available in ARM DDI 0406C.b, A8-1028. 5007 emit(EncodeNeonUnaryOp(VREV16, NEON_Q, size, dst.code(), src.code())); 5008} 5009 5010void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { 5011 DCHECK(IsEnabled(NEON)); 5012 // Qd = vrev32.<size>(Qm) SIMD element reverse. 5013 // Instruction details available in ARM DDI 0406C.b, A8-1028. 5014 emit(EncodeNeonUnaryOp(VREV32, NEON_Q, size, dst.code(), src.code())); 5015} 5016 5017void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { 5018 DCHECK(IsEnabled(NEON)); 5019 // Qd = vrev64.<size>(Qm) SIMD element reverse. 5020 // Instruction details available in ARM DDI 0406C.b, A8-1028. 5021 emit(EncodeNeonUnaryOp(VREV64, NEON_Q, size, dst.code(), src.code())); 5022} 5023 5024void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { 5025 DCHECK(IsEnabled(NEON)); 5026 // vtrn.<size>(Dn, Dm) SIMD element transpose. 5027 // Instruction details available in ARM DDI 0406C.b, A8-1096. 5028 emit(EncodeNeonUnaryOp(VTRN, NEON_D, size, src1.code(), src2.code())); 5029} 5030 5031void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { 5032 DCHECK(IsEnabled(NEON)); 5033 // vtrn.<size>(Qn, Qm) SIMD element transpose. 5034 // Instruction details available in ARM DDI 0406C.b, A8-1096. 5035 emit(EncodeNeonUnaryOp(VTRN, NEON_Q, size, src1.code(), src2.code())); 5036} 5037 5038void Assembler::vpadal(NeonDataType dt, QwNeonRegister dst, 5039 QwNeonRegister src) { 5040 DCHECK(IsEnabled(NEON)); 5041 // vpadal.<dt>(Qd, Qm) SIMD Vector Pairwise Add and Accumulate Long 5042 emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADAL_U : VPADAL_S, NEON_Q, 5043 NeonDataTypeToSize(dt), dst.code(), src.code())); 5044} 5045 5046void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst, 5047 QwNeonRegister src) { 5048 DCHECK(IsEnabled(NEON)); 5049 // vpaddl.<dt>(Qd, Qm) SIMD Vector Pairwise Add Long. 5050 emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADDL_U : VPADDL_S, NEON_Q, 5051 NeonDataTypeToSize(dt), dst.code(), src.code())); 5052} 5053 5054void Assembler::vqrdmulh(NeonDataType dt, QwNeonRegister dst, 5055 QwNeonRegister src1, QwNeonRegister src2) { 5056 DCHECK(IsEnabled(NEON)); 5057 DCHECK(dt == NeonS16 || dt == NeonS32); 5058 emit(EncodeNeonBinOp(VQRDMULH, dt, dst, src1, src2)); 5059} 5060 5061void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) { 5062 // Qd = vcnt(Qm) SIMD Vector Count Set Bits. 5063 // Instruction details available at ARM DDI 0487F.b, F6-5094. 5064 DCHECK(IsEnabled(NEON)); 5065 emit(EncodeNeonUnaryOp(VCNT, NEON_Q, Neon8, dst.code(), src.code())); 5066} 5067 5068// Encode NEON vtbl / vtbx instruction. 5069static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, 5070 DwVfpRegister index, bool vtbx) { 5071 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. 5072 // Instruction details available in ARM DDI 0406C.b, A8-1094. 5073 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. 5074 // Instruction details available in ARM DDI 0406C.b, A8-1094. 5075 int vd, d; 5076 dst.split_code(&vd, &d); 5077 int vn, n; 5078 list.base().split_code(&vn, &n); 5079 int vm, m; 5080 index.split_code(&vm, &m); 5081 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. 5082 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | 5083 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; 5084} 5085 5086void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list, 5087 DwVfpRegister index) { 5088 DCHECK(IsEnabled(NEON)); 5089 emit(EncodeNeonVTB(dst, list, index, false)); 5090} 5091 5092void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list, 5093 DwVfpRegister index) { 5094 DCHECK(IsEnabled(NEON)); 5095 emit(EncodeNeonVTB(dst, list, index, true)); 5096} 5097 5098// Pseudo instructions. 5099void Assembler::nop(int type) { 5100 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes 5101 // some of the CPU's pipeline and has to issue. Older ARM chips simply used 5102 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. 5103 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode 5104 // a type. 5105 DCHECK(0 <= type && type <= 14); // mov pc, pc isn't a nop. 5106 emit(al | 13 * B21 | type * B12 | type); 5107} 5108 5109void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); } 5110 5111bool Assembler::IsMovT(Instr instr) { 5112 instr &= ~(((kNumberOfConditions - 1) << 28) | // Mask off conditions 5113 ((kNumRegisters - 1) * B12) | // mask out register 5114 EncodeMovwImmediate(0xFFFF)); // mask out immediate value 5115 return instr == kMovtPattern; 5116} 5117 5118bool Assembler::IsMovW(Instr instr) { 5119 instr &= ~(((kNumberOfConditions - 1) << 28) | // Mask off conditions 5120 ((kNumRegisters - 1) * B12) | // mask out destination 5121 EncodeMovwImmediate(0xFFFF)); // mask out immediate value 5122 return instr == kMovwPattern; 5123} 5124 5125Instr Assembler::GetMovTPattern() { return kMovtPattern; } 5126 5127Instr Assembler::GetMovWPattern() { return kMovwPattern; } 5128 5129Instr Assembler::EncodeMovwImmediate(uint32_t immediate) { 5130 DCHECK_LT(immediate, 0x10000); 5131 return ((immediate & 0xF000) << 4) | (immediate & 0xFFF); 5132} 5133 5134Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) { 5135 instruction &= ~EncodeMovwImmediate(0xFFFF); 5136 return instruction | EncodeMovwImmediate(immediate); 5137} 5138 5139int Assembler::DecodeShiftImm(Instr instr) { 5140 int rotate = Instruction::RotateValue(instr) * 2; 5141 int immed8 = Instruction::Immed8Value(instr); 5142 return base::bits::RotateRight32(immed8, rotate); 5143} 5144 5145Instr Assembler::PatchShiftImm(Instr instr, int immed) { 5146 uint32_t rotate_imm = 0; 5147 uint32_t immed_8 = 0; 5148 bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr); 5149 DCHECK(immed_fits); 5150 USE(immed_fits); 5151 return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8; 5152} 5153 5154bool Assembler::IsNop(Instr instr, int type) { 5155 DCHECK(0 <= type && type <= 14); // mov pc, pc isn't a nop. 5156 // Check for mov rx, rx where x = type. 5157 return instr == (al | 13 * B21 | type * B12 | type); 5158} 5159 5160bool Assembler::IsMovImmed(Instr instr) { 5161 return (instr & kMovImmedMask) == kMovImmedPattern; 5162} 5163 5164bool Assembler::IsOrrImmed(Instr instr) { 5165 return (instr & kOrrImmedMask) == kOrrImmedPattern; 5166} 5167 5168// static 5169bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) { 5170 uint32_t dummy1; 5171 uint32_t dummy2; 5172 return FitsShifter(imm32, &dummy1, &dummy2, nullptr); 5173} 5174 5175bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) { 5176 return is_uint12(abs(imm32)); 5177} 5178 5179// Debugging. 5180void Assembler::RecordConstPool(int size) { 5181 // We only need this for debugger support, to correctly compute offsets in the 5182 // code. 5183 RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size)); 5184} 5185 5186void Assembler::GrowBuffer() { 5187 DCHECK_EQ(buffer_start_, buffer_->start()); 5188 5189 // Compute new buffer size. 5190 int old_size = buffer_->size(); 5191 int new_size = std::min(2 * old_size, old_size + 1 * MB); 5192 5193 // Some internal data structures overflow for very large buffers, 5194 // they must ensure that kMaximalBufferSize is not too large. 5195 if (new_size > kMaximalBufferSize) { 5196 V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer"); 5197 } 5198 5199 // Set up new buffer. 5200 std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size); 5201 DCHECK_EQ(new_size, new_buffer->size()); 5202 byte* new_start = new_buffer->start(); 5203 5204 // Copy the data. 5205 int pc_delta = new_start - buffer_start_; 5206 int rc_delta = (new_start + new_size) - (buffer_start_ + old_size); 5207 size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos(); 5208 MemMove(new_start, buffer_start_, pc_offset()); 5209 byte* new_reloc_start = reinterpret_cast<byte*>( 5210 reinterpret_cast<Address>(reloc_info_writer.pos()) + rc_delta); 5211 MemMove(new_reloc_start, reloc_info_writer.pos(), reloc_size); 5212 5213 // Switch buffers. 5214 buffer_ = std::move(new_buffer); 5215 buffer_start_ = new_start; 5216 pc_ = reinterpret_cast<byte*>(reinterpret_cast<Address>(pc_) + pc_delta); 5217 byte* new_last_pc = reinterpret_cast<byte*>( 5218 reinterpret_cast<Address>(reloc_info_writer.last_pc()) + pc_delta); 5219 reloc_info_writer.Reposition(new_reloc_start, new_last_pc); 5220 5221 // None of our relocation types are pc relative pointing outside the code 5222 // buffer nor pc absolute pointing inside the code buffer, so there is no need 5223 // to relocate any emitted relocation entries. 5224} 5225 5226void Assembler::db(uint8_t data) { 5227 // db is used to write raw data. The constant pool should be emitted or 5228 // blocked before using db. 5229 DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty()); 5230 CheckBuffer(); 5231 *reinterpret_cast<uint8_t*>(pc_) = data; 5232 pc_ += sizeof(uint8_t); 5233} 5234 5235void Assembler::dd(uint32_t data, RelocInfo::Mode rmode) { 5236 // dd is used to write raw data. The constant pool should be emitted or 5237 // blocked before using dd. 5238 DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty()); 5239 CheckBuffer(); 5240 if (!RelocInfo::IsNoInfo(rmode)) { 5241 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 5242 RelocInfo::IsLiteralConstant(rmode)); 5243 RecordRelocInfo(rmode); 5244 } 5245 base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), data); 5246 pc_ += sizeof(uint32_t); 5247} 5248 5249void Assembler::dq(uint64_t value, RelocInfo::Mode rmode) { 5250 // dq is used to write raw data. The constant pool should be emitted or 5251 // blocked before using dq. 5252 DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty()); 5253 CheckBuffer(); 5254 if (!RelocInfo::IsNoInfo(rmode)) { 5255 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 5256 RelocInfo::IsLiteralConstant(rmode)); 5257 RecordRelocInfo(rmode); 5258 } 5259 base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), value); 5260 pc_ += sizeof(uint64_t); 5261} 5262 5263void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) { 5264 if (!ShouldRecordRelocInfo(rmode)) return; 5265 DCHECK_GE(buffer_space(), kMaxRelocSize); // too late to grow buffer here 5266 RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code()); 5267 reloc_info_writer.Write(&rinfo); 5268} 5269 5270void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode, 5271 intptr_t value) { 5272 DCHECK(rmode != RelocInfo::CONST_POOL); 5273 // We can share CODE_TARGETs and embedded objects, but we must make sure we 5274 // only emit one reloc info for them (thus delta patching will apply the delta 5275 // only once). At the moment, we do not deduplicate heap object request which 5276 // are indicated by value == 0. 5277 bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) || 5278 (rmode == RelocInfo::CODE_TARGET && value != 0) || 5279 (RelocInfo::IsEmbeddedObjectMode(rmode) && value != 0); 5280 DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants); 5281 if (first_const_pool_32_use_ < 0) { 5282 DCHECK(pending_32_bit_constants_.empty()); 5283 DCHECK_EQ(constant_pool_deadline_, kMaxInt); 5284 first_const_pool_32_use_ = position; 5285 constant_pool_deadline_ = position + kCheckPoolDeadline; 5286 } else { 5287 DCHECK(!pending_32_bit_constants_.empty()); 5288 } 5289 ConstantPoolEntry entry(position, value, sharing_ok, rmode); 5290 5291 bool shared = false; 5292 if (sharing_ok) { 5293 // Merge the constant, if possible. 5294 for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) { 5295 ConstantPoolEntry& current_entry = pending_32_bit_constants_[i]; 5296 if (!current_entry.sharing_ok()) continue; 5297 if (entry.value() == current_entry.value() && 5298 entry.rmode() == current_entry.rmode()) { 5299 entry.set_merged_index(i); 5300 shared = true; 5301 break; 5302 } 5303 } 5304 } 5305 5306 pending_32_bit_constants_.emplace_back(entry); 5307 5308 // Make sure the constant pool is not emitted in place of the next 5309 // instruction for which we just recorded relocation info. 5310 BlockConstPoolFor(1); 5311 5312 // Emit relocation info. 5313 if (MustOutputRelocInfo(rmode, this) && !shared) { 5314 RecordRelocInfo(rmode); 5315 } 5316} 5317 5318void Assembler::BlockConstPoolFor(int instructions) { 5319 int pc_limit = pc_offset() + instructions * kInstrSize; 5320 if (no_const_pool_before_ < pc_limit) { 5321 no_const_pool_before_ = pc_limit; 5322 } 5323 5324 // If we're due a const pool check before the block finishes, move it to just 5325 // after the block. 5326 if (constant_pool_deadline_ < no_const_pool_before_) { 5327 // Make sure that the new deadline isn't too late (including a jump and the 5328 // constant pool marker). 5329 DCHECK_LE(no_const_pool_before_, 5330 first_const_pool_32_use_ + kMaxDistToIntPool); 5331 constant_pool_deadline_ = no_const_pool_before_; 5332 } 5333} 5334 5335void Assembler::CheckConstPool(bool force_emit, bool require_jump) { 5336 // Some short sequence of instruction mustn't be broken up by constant pool 5337 // emission, such sequences are protected by calls to BlockConstPoolFor and 5338 // BlockConstPoolScope. 5339 if (is_const_pool_blocked()) { 5340 // Something is wrong if emission is forced and blocked at the same time. 5341 DCHECK(!force_emit); 5342 return; 5343 } 5344 5345 // There is nothing to do if there are no pending constant pool entries. 5346 if (pending_32_bit_constants_.empty()) { 5347 // We should only fall into this case if we're either trying to forcing 5348 // emission or opportunistically checking after a jump. 5349 DCHECK(force_emit || !require_jump); 5350 return; 5351 } 5352 5353 // We emit a constant pool when: 5354 // * requested to do so by parameter force_emit (e.g. after each function). 5355 // * the distance from the first instruction accessing the constant pool to 5356 // the first constant pool entry will exceed its limit the next time the 5357 // pool is checked. 5358 // * the instruction doesn't require a jump after itself to jump over the 5359 // constant pool, and we're getting close to running out of range. 5360 if (!force_emit) { 5361 DCHECK_NE(first_const_pool_32_use_, -1); 5362 int dist32 = pc_offset() - first_const_pool_32_use_; 5363 if (require_jump) { 5364 // We should only be on this path if we've exceeded our deadline. 5365 DCHECK_GE(dist32, kCheckPoolDeadline); 5366 } else if (dist32 < kCheckPoolDeadline / 2) { 5367 return; 5368 } 5369 } 5370 5371 int size_after_marker = pending_32_bit_constants_.size() * kPointerSize; 5372 5373 // Deduplicate constants. 5374 for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) { 5375 ConstantPoolEntry& entry = pending_32_bit_constants_[i]; 5376 if (entry.is_merged()) size_after_marker -= kPointerSize; 5377 } 5378 5379 // Check that the code buffer is large enough before emitting the constant 5380 // pool (include the jump over the pool and the constant pool marker and 5381 // the gap to the relocation information). 5382 int jump_instr = require_jump ? kInstrSize : 0; 5383 int size_up_to_marker = jump_instr + kInstrSize; 5384 int size = size_up_to_marker + size_after_marker; 5385 int needed_space = size + kGap; 5386 while (buffer_space() <= needed_space) GrowBuffer(); 5387 5388 { 5389 ASM_CODE_COMMENT_STRING(this, "Constant Pool"); 5390 // Block recursive calls to CheckConstPool. 5391 BlockConstPoolScope block_const_pool(this); 5392 RecordConstPool(size); 5393 5394 Label size_check; 5395 bind(&size_check); 5396 5397 // Emit jump over constant pool if necessary. 5398 Label after_pool; 5399 if (require_jump) { 5400 b(&after_pool); 5401 } 5402 5403 // Put down constant pool marker "Undefined instruction". 5404 // The data size helps disassembly know what to print. 5405 emit(kConstantPoolMarker | 5406 EncodeConstantPoolLength(size_after_marker / kPointerSize)); 5407 5408 // The first entry in the constant pool should also be the first 5409 CHECK_EQ(first_const_pool_32_use_, pending_32_bit_constants_[0].position()); 5410 CHECK(!pending_32_bit_constants_[0].is_merged()); 5411 5412 // Make sure we're not emitting the constant too late. 5413 CHECK_LE(pc_offset(), 5414 first_const_pool_32_use_ + kMaxDistToPcRelativeConstant); 5415 5416 // Check that the code buffer is large enough before emitting the constant 5417 // pool (this includes the gap to the relocation information). 5418 int needed_space = pending_32_bit_constants_.size() * kPointerSize + kGap; 5419 while (buffer_space() <= needed_space) { 5420 GrowBuffer(); 5421 } 5422 5423 // Emit 32-bit constant pool entries. 5424 for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) { 5425 ConstantPoolEntry& entry = pending_32_bit_constants_[i]; 5426 Instr instr = instr_at(entry.position()); 5427 5428 // 64-bit loads shouldn't get here. 5429 DCHECK(!IsVldrDPcImmediateOffset(instr)); 5430 DCHECK(!IsMovW(instr)); 5431 DCHECK(IsLdrPcImmediateOffset(instr) && 5432 GetLdrRegisterImmediateOffset(instr) == 0); 5433 5434 int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta; 5435 DCHECK(is_uint12(delta)); 5436 // 0 is the smallest delta: 5437 // ldr rd, [pc, #0] 5438 // constant pool marker 5439 // data 5440 5441 if (entry.is_merged()) { 5442 DCHECK(entry.sharing_ok()); 5443 ConstantPoolEntry& merged = 5444 pending_32_bit_constants_[entry.merged_index()]; 5445 DCHECK(entry.value() == merged.value()); 5446 DCHECK_LT(merged.position(), entry.position()); 5447 Instr merged_instr = instr_at(merged.position()); 5448 DCHECK(IsLdrPcImmediateOffset(merged_instr)); 5449 delta = GetLdrRegisterImmediateOffset(merged_instr); 5450 delta += merged.position() - entry.position(); 5451 } 5452 instr_at_put(entry.position(), 5453 SetLdrRegisterImmediateOffset(instr, delta)); 5454 if (!entry.is_merged()) { 5455 emit(entry.value()); 5456 } 5457 } 5458 5459 pending_32_bit_constants_.clear(); 5460 5461 first_const_pool_32_use_ = -1; 5462 5463 DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check)); 5464 5465 if (after_pool.is_linked()) { 5466 bind(&after_pool); 5467 } 5468 } 5469 5470 // Since a constant pool was just emitted, we don't need another check until 5471 // the next constant pool entry is added. 5472 constant_pool_deadline_ = kMaxInt; 5473} 5474 5475PatchingAssembler::PatchingAssembler(const AssemblerOptions& options, 5476 byte* address, int instructions) 5477 : Assembler(options, ExternalAssemblerBuffer( 5478 address, instructions * kInstrSize + kGap)) { 5479 DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size()); 5480} 5481 5482PatchingAssembler::~PatchingAssembler() { 5483 // Check that we don't have any pending constant pools. 5484 DCHECK(pending_32_bit_constants_.empty()); 5485 5486 // Check that the code was patched as expected. 5487 DCHECK_EQ(pc_, buffer_start_ + buffer_->size() - kGap); 5488 DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size()); 5489} 5490 5491void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); } 5492 5493void PatchingAssembler::PadWithNops() { 5494 DCHECK_LE(pc_, buffer_start_ + buffer_->size() - kGap); 5495 while (pc_ < buffer_start_ + buffer_->size() - kGap) { 5496 nop(); 5497 } 5498} 5499 5500UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler) 5501 : assembler_(assembler), 5502 old_available_(*assembler->GetScratchRegisterList()), 5503 old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {} 5504 5505UseScratchRegisterScope::~UseScratchRegisterScope() { 5506 *assembler_->GetScratchRegisterList() = old_available_; 5507 *assembler_->GetScratchVfpRegisterList() = old_available_vfp_; 5508} 5509 5510Register UseScratchRegisterScope::Acquire() { 5511 RegList* available = assembler_->GetScratchRegisterList(); 5512 DCHECK_NOT_NULL(available); 5513 return available->PopFirst(); 5514} 5515 5516LoadStoreLaneParams::LoadStoreLaneParams(MachineRepresentation rep, 5517 uint8_t laneidx) { 5518 if (rep == MachineRepresentation::kWord8) { 5519 *this = LoadStoreLaneParams(laneidx, Neon8, 8); 5520 } else if (rep == MachineRepresentation::kWord16) { 5521 *this = LoadStoreLaneParams(laneidx, Neon16, 4); 5522 } else if (rep == MachineRepresentation::kWord32) { 5523 *this = LoadStoreLaneParams(laneidx, Neon32, 2); 5524 } else if (rep == MachineRepresentation::kWord64) { 5525 *this = LoadStoreLaneParams(laneidx, Neon64, 1); 5526 } else { 5527 UNREACHABLE(); 5528 } 5529} 5530 5531} // namespace internal 5532} // namespace v8 5533 5534#endif // V8_TARGET_ARCH_ARM 5535