1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the
14// distribution.
15//
16// - Neither the name of Sun Microsystems or the names of contributors may
17// be used to endorse or promote products derived from this software without
18// specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31// OF THE POSSIBILITY OF SUCH DAMAGE.
32
33// The original source code covered by the above license above has been
34// modified significantly by Google Inc.
35// Copyright 2012 the V8 project authors. All rights reserved.
36
37#include "src/codegen/arm/assembler-arm.h"
38
39#if V8_TARGET_ARCH_ARM
40
41#include "src/base/bits.h"
42#include "src/base/cpu.h"
43#include "src/base/overflowing-math.h"
44#include "src/codegen/arm/assembler-arm-inl.h"
45#include "src/codegen/assembler-inl.h"
46#include "src/codegen/machine-type.h"
47#include "src/codegen/macro-assembler.h"
48#include "src/codegen/string-constants.h"
49#include "src/deoptimizer/deoptimizer.h"
50#include "src/objects/objects-inl.h"
51
52namespace v8 {
53namespace internal {
54
55static const unsigned kArmv6 = 0u;
56static const unsigned kArmv7 = kArmv6 | (1u << ARMv7);
57static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV);
58static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8);
59
60static unsigned CpuFeaturesFromCommandLine() {
61  unsigned result;
62  if (strcmp(FLAG_arm_arch, "armv8") == 0) {
63    result = kArmv8;
64  } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) {
65    result = kArmv7WithSudiv;
66  } else if (strcmp(FLAG_arm_arch, "armv7") == 0) {
67    result = kArmv7;
68  } else if (strcmp(FLAG_arm_arch, "armv6") == 0) {
69    result = kArmv6;
70  } else {
71    fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n",
72            FLAG_arm_arch);
73    fprintf(stderr,
74            "Supported values are:  armv8\n"
75            "                       armv7+sudiv\n"
76            "                       armv7\n"
77            "                       armv6\n");
78    FATAL("arm-arch");
79  }
80
81  // If any of the old (deprecated) flags are specified, print a warning, but
82  // otherwise try to respect them for now.
83  // TODO(jbramley): When all the old bots have been updated, remove this.
84  if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value ||
85      FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value ||
86      FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) {
87    // As an approximation of the old behaviour, set the default values from the
88    // arm_arch setting, then apply the flags over the top.
89    bool enable_armv7 = (result & (1u << ARMv7)) != 0;
90    bool enable_vfp3 = (result & (1u << ARMv7)) != 0;
91    bool enable_32dregs = (result & (1u << ARMv7)) != 0;
92    bool enable_neon = (result & (1u << ARMv7)) != 0;
93    bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0;
94    bool enable_armv8 = (result & (1u << ARMv8)) != 0;
95    if (FLAG_enable_armv7.has_value) {
96      fprintf(stderr,
97              "Warning: --enable_armv7 is deprecated. "
98              "Use --arm_arch instead.\n");
99      enable_armv7 = FLAG_enable_armv7.value;
100    }
101    if (FLAG_enable_vfp3.has_value) {
102      fprintf(stderr,
103              "Warning: --enable_vfp3 is deprecated. "
104              "Use --arm_arch instead.\n");
105      enable_vfp3 = FLAG_enable_vfp3.value;
106    }
107    if (FLAG_enable_32dregs.has_value) {
108      fprintf(stderr,
109              "Warning: --enable_32dregs is deprecated. "
110              "Use --arm_arch instead.\n");
111      enable_32dregs = FLAG_enable_32dregs.value;
112    }
113    if (FLAG_enable_neon.has_value) {
114      fprintf(stderr,
115              "Warning: --enable_neon is deprecated. "
116              "Use --arm_arch instead.\n");
117      enable_neon = FLAG_enable_neon.value;
118    }
119    if (FLAG_enable_sudiv.has_value) {
120      fprintf(stderr,
121              "Warning: --enable_sudiv is deprecated. "
122              "Use --arm_arch instead.\n");
123      enable_sudiv = FLAG_enable_sudiv.value;
124    }
125    if (FLAG_enable_armv8.has_value) {
126      fprintf(stderr,
127              "Warning: --enable_armv8 is deprecated. "
128              "Use --arm_arch instead.\n");
129      enable_armv8 = FLAG_enable_armv8.value;
130    }
131    // Emulate the old implications.
132    if (enable_armv8) {
133      enable_vfp3 = true;
134      enable_neon = true;
135      enable_32dregs = true;
136      enable_sudiv = true;
137    }
138    // Select the best available configuration.
139    if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) {
140      if (enable_sudiv) {
141        if (enable_armv8) {
142          result = kArmv8;
143        } else {
144          result = kArmv7WithSudiv;
145        }
146      } else {
147        result = kArmv7;
148      }
149    } else {
150      result = kArmv6;
151    }
152  }
153  return result;
154}
155
156// Get the CPU features enabled by the build.
157// For cross compilation the preprocessor symbols such as
158// CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to
159// enable ARMv7 and VFPv3 instructions when building the snapshot. However,
160// these flags should be consistent with a supported ARM configuration:
161//  "armv6":       ARMv6 + VFPv2
162//  "armv7":       ARMv7 + VFPv3-D32 + NEON
163//  "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV
164//  "armv8":       ARMv8 (+ all of the above)
165static constexpr unsigned CpuFeaturesFromCompiler() {
166// TODO(jbramley): Once the build flags are simplified, these tests should
167// also be simplified.
168
169// Check *architectural* implications.
170#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
171#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS"
172#endif
173#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV)
174#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV"
175#endif
176#if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS)
177// V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly,
178// VFPv3 isn't available before ARMv7.
179#error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS"
180#endif
181#if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
182#error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS"
183#endif
184
185// Find compiler-implied features.
186#if defined(CAN_USE_ARMV8_INSTRUCTIONS) &&                           \
187    defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
188    defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
189  return kArmv8;
190#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
191    defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
192  return kArmv7WithSudiv;
193#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \
194    defined(CAN_USE_VFP3_INSTRUCTIONS)
195  return kArmv7;
196#else
197  return kArmv6;
198#endif
199}
200
201bool CpuFeatures::SupportsWasmSimd128() { return IsSupported(NEON); }
202
203void CpuFeatures::ProbeImpl(bool cross_compile) {
204  dcache_line_size_ = 64;
205
206  unsigned command_line = CpuFeaturesFromCommandLine();
207  // Only use statically determined features for cross compile (snapshot).
208  if (cross_compile) {
209    supported_ |= command_line & CpuFeaturesFromCompiler();
210    return;
211  }
212
213#ifndef __arm__
214  // For the simulator build, use whatever the flags specify.
215  supported_ |= command_line;
216
217#else  // __arm__
218  // Probe for additional features at runtime.
219  base::CPU cpu;
220  // Runtime detection is slightly fuzzy, and some inferences are necessary.
221  unsigned runtime = kArmv6;
222  // NEON and VFPv3 imply at least ARMv7-A.
223  if (cpu.has_neon() && cpu.has_vfp3_d32()) {
224    DCHECK(cpu.has_vfp3());
225    runtime |= kArmv7;
226    if (cpu.has_idiva()) {
227      runtime |= kArmv7WithSudiv;
228      if (cpu.architecture() >= 8) {
229        runtime |= kArmv8;
230      }
231    }
232  }
233
234  // Use the best of the features found by CPU detection and those inferred from
235  // the build system. In both cases, restrict available features using the
236  // command-line. Note that the command-line flags are very permissive (kArmv8)
237  // by default.
238  supported_ |= command_line & CpuFeaturesFromCompiler();
239  supported_ |= command_line & runtime;
240
241  // Additional tuning options.
242
243  // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
244  if (cpu.implementer() == base::CPU::kArm &&
245      (cpu.part() == base::CPU::kArmCortexA5 ||
246       cpu.part() == base::CPU::kArmCortexA9)) {
247    dcache_line_size_ = 32;
248  }
249#endif
250
251  DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7));
252  DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV));
253
254  // Set a static value on whether Simd is supported.
255  // This variable is only used for certain archs to query SupportWasmSimd128()
256  // at runtime in builtins using an extern ref. Other callers should use
257  // CpuFeatures::SupportWasmSimd128().
258  CpuFeatures::supports_wasm_simd_128_ = CpuFeatures::SupportsWasmSimd128();
259}
260
261void CpuFeatures::PrintTarget() {
262  const char* arm_arch = nullptr;
263  const char* arm_target_type = "";
264  const char* arm_no_probe = "";
265  const char* arm_fpu = "";
266  const char* arm_thumb = "";
267  const char* arm_float_abi = nullptr;
268
269#if !defined __arm__
270  arm_target_type = " simulator";
271#endif
272
273#if defined ARM_TEST_NO_FEATURE_PROBE
274  arm_no_probe = " noprobe";
275#endif
276
277#if defined CAN_USE_ARMV8_INSTRUCTIONS
278  arm_arch = "arm v8";
279#elif defined CAN_USE_ARMV7_INSTRUCTIONS
280  arm_arch = "arm v7";
281#else
282  arm_arch = "arm v6";
283#endif
284
285#if defined CAN_USE_NEON
286  arm_fpu = " neon";
287#elif defined CAN_USE_VFP3_INSTRUCTIONS
288#if defined CAN_USE_VFP32DREGS
289  arm_fpu = " vfp3";
290#else
291  arm_fpu = " vfp3-d16";
292#endif
293#else
294  arm_fpu = " vfp2";
295#endif
296
297#ifdef __arm__
298  arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
299#elif USE_EABI_HARDFLOAT
300  arm_float_abi = "hard";
301#else
302  arm_float_abi = "softfp";
303#endif
304
305#if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
306  arm_thumb = " thumb";
307#endif
308
309  printf("target%s%s %s%s%s %s\n", arm_target_type, arm_no_probe, arm_arch,
310         arm_fpu, arm_thumb, arm_float_abi);
311}
312
313void CpuFeatures::PrintFeatures() {
314  printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d",
315         CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7),
316         CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS),
317         CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV));
318#ifdef __arm__
319  bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
320#elif USE_EABI_HARDFLOAT
321  bool eabi_hardfloat = true;
322#else
323  bool eabi_hardfloat = false;
324#endif
325  printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
326}
327
328// -----------------------------------------------------------------------------
329// Implementation of RelocInfo
330
331// static
332const int RelocInfo::kApplyMask =
333    RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
334
335bool RelocInfo::IsCodedSpecially() {
336  // The deserializer needs to know whether a pointer is specially coded.  Being
337  // specially coded on ARM means that it is a movw/movt instruction. We don't
338  // generate those for relocatable pointers.
339  return false;
340}
341
342bool RelocInfo::IsInConstantPool() {
343  return Assembler::is_constant_pool_load(pc_);
344}
345
346uint32_t RelocInfo::wasm_call_tag() const {
347  DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
348  return static_cast<uint32_t>(
349      Assembler::target_address_at(pc_, constant_pool_));
350}
351
352// -----------------------------------------------------------------------------
353// Implementation of Operand and MemOperand
354// See assembler-arm-inl.h for inlined constructors
355
356Operand::Operand(Handle<HeapObject> handle) {
357  rm_ = no_reg;
358  value_.immediate = static_cast<intptr_t>(handle.address());
359  rmode_ = RelocInfo::FULL_EMBEDDED_OBJECT;
360}
361
362Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
363  DCHECK(is_uint5(shift_imm));
364
365  rm_ = rm;
366  rs_ = no_reg;
367  shift_op_ = shift_op;
368  shift_imm_ = shift_imm & 31;
369
370  if ((shift_op == ROR) && (shift_imm == 0)) {
371    // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
372    // RRX as ROR #0 (See below).
373    shift_op = LSL;
374  } else if (shift_op == RRX) {
375    // encoded as ROR with shift_imm == 0
376    DCHECK_EQ(shift_imm, 0);
377    shift_op_ = ROR;
378    shift_imm_ = 0;
379  }
380}
381
382Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
383  DCHECK(shift_op != RRX);
384  rm_ = rm;
385  rs_ = no_reg;
386  shift_op_ = shift_op;
387  rs_ = rs;
388}
389
390Operand Operand::EmbeddedNumber(double value) {
391  int32_t smi;
392  if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
393  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
394  result.is_heap_object_request_ = true;
395  result.value_.heap_object_request = HeapObjectRequest(value);
396  return result;
397}
398
399Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
400  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
401  result.is_heap_object_request_ = true;
402  result.value_.heap_object_request = HeapObjectRequest(str);
403  return result;
404}
405
406MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am)
407    : rn_(rn), rm_(no_reg), offset_(offset), am_(am) {
408  // Accesses below the stack pointer are not safe, and are prohibited by the
409  // ABI. We can check obvious violations here.
410  if (rn == sp) {
411    if (am == Offset) DCHECK_LE(0, offset);
412    if (am == NegOffset) DCHECK_GE(0, offset);
413  }
414}
415
416MemOperand::MemOperand(Register rn, Register rm, AddrMode am)
417    : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {}
418
419MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op,
420                       int shift_imm, AddrMode am)
421    : rn_(rn),
422      rm_(rm),
423      shift_op_(shift_op),
424      shift_imm_(shift_imm & 31),
425      am_(am) {
426  DCHECK(is_uint5(shift_imm));
427}
428
429NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align)
430    : rn_(rn), rm_(am == Offset ? pc : sp) {
431  DCHECK((am == Offset) || (am == PostIndex));
432  SetAlignment(align);
433}
434
435NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align)
436    : rn_(rn), rm_(rm) {
437  SetAlignment(align);
438}
439
440void NeonMemOperand::SetAlignment(int align) {
441  switch (align) {
442    case 0:
443      align_ = 0;
444      break;
445    case 64:
446      align_ = 1;
447      break;
448    case 128:
449      align_ = 2;
450      break;
451    case 256:
452      align_ = 3;
453      break;
454    default:
455      UNREACHABLE();
456  }
457}
458
459void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
460  DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
461  for (auto& request : heap_object_requests_) {
462    Handle<HeapObject> object;
463    switch (request.kind()) {
464      case HeapObjectRequest::kHeapNumber:
465        object = isolate->factory()->NewHeapNumber<AllocationType::kOld>(
466            request.heap_number());
467        break;
468      case HeapObjectRequest::kStringConstant: {
469        const StringConstantBase* str = request.string();
470        CHECK_NOT_NULL(str);
471        object = str->AllocateStringConstant(isolate);
472        break;
473      }
474    }
475    Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
476    Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) =
477        object.address();
478  }
479}
480
481// -----------------------------------------------------------------------------
482// Specific instructions, constants, and masks.
483
484// str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
485// register r is not encoded.
486const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16;
487// ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
488// register r is not encoded.
489const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16;
490// ldr rd, [pc, #offset]
491const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
492const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16;
493// Pc-relative call or jump to a signed imm24 offset.
494// bl pc + #offset
495// b  pc + #offset
496const Instr kBOrBlPCImmedMask = 0xE * B24;
497const Instr kBOrBlPCImmedPattern = 0xA * B24;
498// vldr dd, [pc, #offset]
499const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
500const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8;
501// blxcc rm
502const Instr kBlxRegMask =
503    15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
504const Instr kBlxRegPattern = B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX;
505const Instr kBlxIp = al | kBlxRegPattern | ip.code();
506const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16;
507const Instr kMovMvnPattern = 0xD * B21;
508const Instr kMovMvnFlip = B22;
509const Instr kMovLeaveCCMask = 0xDFF * B16;
510const Instr kMovLeaveCCPattern = 0x1A0 * B16;
511const Instr kMovwPattern = 0x30 * B20;
512const Instr kMovtPattern = 0x34 * B20;
513const Instr kMovwLeaveCCFlip = 0x5 * B21;
514const Instr kMovImmedMask = 0x7F * B21;
515const Instr kMovImmedPattern = 0x1D * B21;
516const Instr kOrrImmedMask = 0x7F * B21;
517const Instr kOrrImmedPattern = 0x1C * B21;
518const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12;
519const Instr kCmpCmnPattern = 0x15 * B20;
520const Instr kCmpCmnFlip = B21;
521const Instr kAddSubFlip = 0x6 * B21;
522const Instr kAndBicFlip = 0xE * B21;
523
524// A mask for the Rd register for push, pop, ldr, str instructions.
525const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16;
526const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16;
527const Instr kLdrRegFpNegOffsetPattern =
528    al | B26 | L | NegOffset | fp.code() * B16;
529const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16;
530const Instr kLdrStrInstrTypeMask = 0xFFFF0000;
531
532Assembler::Assembler(const AssemblerOptions& options,
533                     std::unique_ptr<AssemblerBuffer> buffer)
534    : AssemblerBase(options, std::move(buffer)),
535      pending_32_bit_constants_(),
536      scratch_register_list_({ip}) {
537  reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
538  constant_pool_deadline_ = kMaxInt;
539  const_pool_blocked_nesting_ = 0;
540  no_const_pool_before_ = 0;
541  first_const_pool_32_use_ = -1;
542  last_bound_pos_ = 0;
543  if (CpuFeatures::IsSupported(VFP32DREGS)) {
544    // Register objects tend to be abstracted and survive between scopes, so
545    // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
546    // its use consistent with other features, we always enable it if we can.
547    EnableCpuFeature(VFP32DREGS);
548    // Make sure we pick two D registers which alias a Q register. This way, we
549    // can use a Q as a scratch if NEON is supported.
550    scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
551  } else {
552    // When VFP32DREGS is not supported, d15 become allocatable. Therefore we
553    // cannot use it as a scratch.
554    scratch_vfp_register_list_ = d14.ToVfpRegList();
555  }
556}
557
558Assembler::~Assembler() {
559  DCHECK_EQ(const_pool_blocked_nesting_, 0);
560  DCHECK_EQ(first_const_pool_32_use_, -1);
561}
562
563void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
564                        SafepointTableBuilder* safepoint_table_builder,
565                        int handler_table_offset) {
566  // As a crutch to avoid having to add manual Align calls wherever we use a
567  // raw workflow to create Code objects (mostly in tests), add another Align
568  // call here. It does no harm - the end of the Code object is aligned to the
569  // (larger) kCodeAlignment anyways.
570  // TODO(jgruber): Consider moving responsibility for proper alignment to
571  // metadata table builders (safepoint, handler, constant pool, code
572  // comments).
573  DataAlign(Code::kMetadataAlignment);
574
575  // Emit constant pool if necessary.
576  CheckConstPool(true, false);
577  DCHECK(pending_32_bit_constants_.empty());
578
579  int code_comments_size = WriteCodeComments();
580
581  AllocateAndInstallRequestedHeapObjects(isolate);
582
583  // Set up code descriptor.
584  // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
585  // this point to make CodeDesc initialization less fiddly.
586
587  static constexpr int kConstantPoolSize = 0;
588  const int instruction_size = pc_offset();
589  const int code_comments_offset = instruction_size - code_comments_size;
590  const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
591  const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
592                                        ? constant_pool_offset
593                                        : handler_table_offset;
594  const int safepoint_table_offset =
595      (safepoint_table_builder == kNoSafepointTable)
596          ? handler_table_offset2
597          : safepoint_table_builder->safepoint_table_offset();
598  const int reloc_info_offset =
599      static_cast<int>(reloc_info_writer.pos() - buffer_->start());
600  CodeDesc::Initialize(desc, this, safepoint_table_offset,
601                       handler_table_offset2, constant_pool_offset,
602                       code_comments_offset, reloc_info_offset);
603}
604
605void Assembler::Align(int m) {
606  DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
607  DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0);
608  while ((pc_offset() & (m - 1)) != 0) {
609    nop();
610  }
611}
612
613void Assembler::CodeTargetAlign() {
614  // Preferred alignment of jump targets on some ARM chips.
615  Align(8);
616}
617
618Condition Assembler::GetCondition(Instr instr) {
619  return Instruction::ConditionField(instr);
620}
621
622bool Assembler::IsLdrRegisterImmediate(Instr instr) {
623  return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20);
624}
625
626bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
627  return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
628}
629
630int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
631  DCHECK(IsLdrRegisterImmediate(instr));
632  bool positive = (instr & B23) == B23;
633  int offset = instr & kOff12Mask;  // Zero extended offset.
634  return positive ? offset : -offset;
635}
636
637int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
638  DCHECK(IsVldrDRegisterImmediate(instr));
639  bool positive = (instr & B23) == B23;
640  int offset = instr & kOff8Mask;  // Zero extended offset.
641  offset <<= 2;
642  return positive ? offset : -offset;
643}
644
645Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
646  DCHECK(IsLdrRegisterImmediate(instr));
647  bool positive = offset >= 0;
648  if (!positive) offset = -offset;
649  DCHECK(is_uint12(offset));
650  // Set bit indicating whether the offset should be added.
651  instr = (instr & ~B23) | (positive ? B23 : 0);
652  // Set the actual offset.
653  return (instr & ~kOff12Mask) | offset;
654}
655
656Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
657  DCHECK(IsVldrDRegisterImmediate(instr));
658  DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
659  bool positive = offset >= 0;
660  if (!positive) offset = -offset;
661  DCHECK(is_uint10(offset));
662  // Set bit indicating whether the offset should be added.
663  instr = (instr & ~B23) | (positive ? B23 : 0);
664  // Set the actual offset. Its bottom 2 bits are zero.
665  return (instr & ~kOff8Mask) | (offset >> 2);
666}
667
668bool Assembler::IsStrRegisterImmediate(Instr instr) {
669  return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
670}
671
672Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
673  DCHECK(IsStrRegisterImmediate(instr));
674  bool positive = offset >= 0;
675  if (!positive) offset = -offset;
676  DCHECK(is_uint12(offset));
677  // Set bit indicating whether the offset should be added.
678  instr = (instr & ~B23) | (positive ? B23 : 0);
679  // Set the actual offset.
680  return (instr & ~kOff12Mask) | offset;
681}
682
683bool Assembler::IsAddRegisterImmediate(Instr instr) {
684  return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23);
685}
686
687Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
688  DCHECK(IsAddRegisterImmediate(instr));
689  DCHECK_GE(offset, 0);
690  DCHECK(is_uint12(offset));
691  // Set the offset.
692  return (instr & ~kOff12Mask) | offset;
693}
694
695Register Assembler::GetRd(Instr instr) {
696  return Register::from_code(Instruction::RdValue(instr));
697}
698
699Register Assembler::GetRn(Instr instr) {
700  return Register::from_code(Instruction::RnValue(instr));
701}
702
703Register Assembler::GetRm(Instr instr) {
704  return Register::from_code(Instruction::RmValue(instr));
705}
706
707bool Assembler::IsPush(Instr instr) {
708  return ((instr & ~kRdMask) == kPushRegPattern);
709}
710
711bool Assembler::IsPop(Instr instr) {
712  return ((instr & ~kRdMask) == kPopRegPattern);
713}
714
715bool Assembler::IsStrRegFpOffset(Instr instr) {
716  return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
717}
718
719bool Assembler::IsLdrRegFpOffset(Instr instr) {
720  return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
721}
722
723bool Assembler::IsStrRegFpNegOffset(Instr instr) {
724  return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
725}
726
727bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
728  return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
729}
730
731bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
732  // Check the instruction is indeed a
733  // ldr<cond> <Rd>, [pc +/- offset_12].
734  return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
735}
736
737bool Assembler::IsBOrBlPcImmediateOffset(Instr instr) {
738  return (instr & kBOrBlPCImmedMask) == kBOrBlPCImmedPattern;
739}
740
741bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
742  // Check the instruction is indeed a
743  // vldr<cond> <Dd>, [pc +/- offset_10].
744  return (instr & kVldrDPCMask) == kVldrDPCPattern;
745}
746
747bool Assembler::IsBlxReg(Instr instr) {
748  // Check the instruction is indeed a
749  // blxcc <Rm>
750  return (instr & kBlxRegMask) == kBlxRegPattern;
751}
752
753bool Assembler::IsBlxIp(Instr instr) {
754  // Check the instruction is indeed a
755  // blx ip
756  return instr == kBlxIp;
757}
758
759bool Assembler::IsTstImmediate(Instr instr) {
760  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | TST | S);
761}
762
763bool Assembler::IsCmpRegister(Instr instr) {
764  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) ==
765         (CMP | S);
766}
767
768bool Assembler::IsCmpImmediate(Instr instr) {
769  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | CMP | S);
770}
771
772Register Assembler::GetCmpImmediateRegister(Instr instr) {
773  DCHECK(IsCmpImmediate(instr));
774  return GetRn(instr);
775}
776
777int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
778  DCHECK(IsCmpImmediate(instr));
779  return instr & kOff12Mask;
780}
781
782// Labels refer to positions in the (to be) generated code.
783// There are bound, linked, and unused labels.
784//
785// Bound labels refer to known positions in the already
786// generated code. pos() is the position the label refers to.
787//
788// Linked labels refer to unknown positions in the code
789// to be generated; pos() is the position of the last
790// instruction using the label.
791//
792// The linked labels form a link chain by making the branch offset
793// in the instruction steam to point to the previous branch
794// instruction using the same label.
795//
796// The link chain is terminated by a branch offset pointing to the
797// same position.
798
799int Assembler::target_at(int pos) {
800  Instr instr = instr_at(pos);
801  if (is_uint24(instr)) {
802    // Emitted link to a label, not part of a branch.
803    return instr;
804  }
805  DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
806  int imm26 = ((instr & kImm24Mask) << 8) >> 6;
807  if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
808      ((instr & B24) != 0)) {
809    // blx uses bit 24 to encode bit 2 of imm26
810    imm26 += 2;
811  }
812  return pos + Instruction::kPcLoadDelta + imm26;
813}
814
815void Assembler::target_at_put(int pos, int target_pos) {
816  Instr instr = instr_at(pos);
817  if (is_uint24(instr)) {
818    DCHECK(target_pos == pos || target_pos >= 0);
819    // Emitted link to a label, not part of a branch.
820    // Load the position of the label relative to the generated code object
821    // pointer in a register.
822
823    // The existing code must be a single 24-bit label chain link, followed by
824    // nops encoding the destination register. See mov_label_offset.
825
826    // Extract the destination register from the first nop instructions.
827    Register dst =
828        Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize)));
829    // In addition to the 24-bit label chain link, we expect to find one nop for
830    // ARMv7 and above, or two nops for ARMv6. See mov_label_offset.
831    DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
832    if (!CpuFeatures::IsSupported(ARMv7)) {
833      DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code()));
834    }
835
836    // Here are the instructions we need to emit:
837    //   For ARMv7: target24 => target16_1:target16_0
838    //      movw dst, #target16_0
839    //      movt dst, #target16_1
840    //   For ARMv6: target24 => target8_2:target8_1:target8_0
841    //      mov dst, #target8_0
842    //      orr dst, dst, #target8_1 << 8
843    //      orr dst, dst, #target8_2 << 16
844
845    uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
846    CHECK(is_uint24(target24));
847    if (is_uint8(target24)) {
848      // If the target fits in a byte then only patch with a mov
849      // instruction.
850      PatchingAssembler patcher(
851          options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
852      patcher.mov(dst, Operand(target24));
853    } else {
854      uint16_t target16_0 = target24 & kImm16Mask;
855      uint16_t target16_1 = target24 >> 16;
856      if (CpuFeatures::IsSupported(ARMv7)) {
857        // Patch with movw/movt.
858        if (target16_1 == 0) {
859          PatchingAssembler patcher(
860              options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
861          CpuFeatureScope scope(&patcher, ARMv7);
862          patcher.movw(dst, target16_0);
863        } else {
864          PatchingAssembler patcher(
865              options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
866          CpuFeatureScope scope(&patcher, ARMv7);
867          patcher.movw(dst, target16_0);
868          patcher.movt(dst, target16_1);
869        }
870      } else {
871        // Patch with a sequence of mov/orr/orr instructions.
872        uint8_t target8_0 = target16_0 & kImm8Mask;
873        uint8_t target8_1 = target16_0 >> 8;
874        uint8_t target8_2 = target16_1 & kImm8Mask;
875        if (target8_2 == 0) {
876          PatchingAssembler patcher(
877              options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
878          patcher.mov(dst, Operand(target8_0));
879          patcher.orr(dst, dst, Operand(target8_1 << 8));
880        } else {
881          PatchingAssembler patcher(
882              options(), reinterpret_cast<byte*>(buffer_start_ + pos), 3);
883          patcher.mov(dst, Operand(target8_0));
884          patcher.orr(dst, dst, Operand(target8_1 << 8));
885          patcher.orr(dst, dst, Operand(target8_2 << 16));
886        }
887      }
888    }
889    return;
890  }
891  int imm26 = target_pos - (pos + Instruction::kPcLoadDelta);
892  DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
893  if (Instruction::ConditionField(instr) == kSpecialCondition) {
894    // blx uses bit 24 to encode bit 2 of imm26
895    DCHECK_EQ(0, imm26 & 1);
896    instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
897  } else {
898    DCHECK_EQ(0, imm26 & 3);
899    instr &= ~kImm24Mask;
900  }
901  int imm24 = imm26 >> 2;
902  CHECK(is_int24(imm24));
903  instr_at_put(pos, instr | (imm24 & kImm24Mask));
904}
905
906void Assembler::print(const Label* L) {
907  if (L->is_unused()) {
908    PrintF("unused label\n");
909  } else if (L->is_bound()) {
910    PrintF("bound label to %d\n", L->pos());
911  } else if (L->is_linked()) {
912    Label l;
913    l.link_to(L->pos());
914    PrintF("unbound label");
915    while (l.is_linked()) {
916      PrintF("@ %d ", l.pos());
917      Instr instr = instr_at(l.pos());
918      if ((instr & ~kImm24Mask) == 0) {
919        PrintF("value\n");
920      } else {
921        DCHECK_EQ(instr & 7 * B25, 5 * B25);  // b, bl, or blx
922        Condition cond = Instruction::ConditionField(instr);
923        const char* b;
924        const char* c;
925        if (cond == kSpecialCondition) {
926          b = "blx";
927          c = "";
928        } else {
929          if ((instr & B24) != 0)
930            b = "bl";
931          else
932            b = "b";
933
934          switch (cond) {
935            case eq:
936              c = "eq";
937              break;
938            case ne:
939              c = "ne";
940              break;
941            case hs:
942              c = "hs";
943              break;
944            case lo:
945              c = "lo";
946              break;
947            case mi:
948              c = "mi";
949              break;
950            case pl:
951              c = "pl";
952              break;
953            case vs:
954              c = "vs";
955              break;
956            case vc:
957              c = "vc";
958              break;
959            case hi:
960              c = "hi";
961              break;
962            case ls:
963              c = "ls";
964              break;
965            case ge:
966              c = "ge";
967              break;
968            case lt:
969              c = "lt";
970              break;
971            case gt:
972              c = "gt";
973              break;
974            case le:
975              c = "le";
976              break;
977            case al:
978              c = "";
979              break;
980            default:
981              c = "";
982              UNREACHABLE();
983          }
984        }
985        PrintF("%s%s\n", b, c);
986      }
987      next(&l);
988    }
989  } else {
990    PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
991  }
992}
993
994void Assembler::bind_to(Label* L, int pos) {
995  DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
996  while (L->is_linked()) {
997    int fixup_pos = L->pos();
998    next(L);  // call next before overwriting link with target at fixup_pos
999    target_at_put(fixup_pos, pos);
1000  }
1001  L->bind_to(pos);
1002
1003  // Keep track of the last bound label so we don't eliminate any instructions
1004  // before a bound label.
1005  if (pos > last_bound_pos_) last_bound_pos_ = pos;
1006}
1007
1008void Assembler::bind(Label* L) {
1009  DCHECK(!L->is_bound());  // label can only be bound once
1010  bind_to(L, pc_offset());
1011}
1012
1013void Assembler::next(Label* L) {
1014  DCHECK(L->is_linked());
1015  int link = target_at(L->pos());
1016  if (link == L->pos()) {
1017    // Branch target points to the same instruction. This is the end of the link
1018    // chain.
1019    L->Unuse();
1020  } else {
1021    DCHECK_GE(link, 0);
1022    L->link_to(link);
1023  }
1024}
1025
1026namespace {
1027
1028// Low-level code emission routines depending on the addressing mode.
1029// If this returns true then you have to use the rotate_imm and immed_8
1030// that it returns, because it may have already changed the instruction
1031// to match them!
1032bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8,
1033                 Instr* instr) {
1034  // imm32 must be unsigned.
1035  {
1036    // 32-bit immediates can be encoded as:
1037    //   (8-bit value, 2*N bit left rotation)
1038    // e.g. 0xab00 can be encoded as 0xab shifted left by 8 == 2*4, i.e.
1039    //   (0xab, 4)
1040    //
1041    // Check three categories which cover all possible shifter fits:
1042    //   1. 0x000000FF: The value is already 8-bit (no shifting necessary),
1043    //   2. 0x000FF000: The 8-bit value is somewhere in the middle of the 32-bit
1044    //                  value, and
1045    //   3. 0xF000000F: The 8-bit value is split over the beginning and end of
1046    //                  the 32-bit value.
1047
1048    // For 0x000000FF.
1049    if (imm32 <= 0xFF) {
1050      *rotate_imm = 0;
1051      *immed_8 = imm32;
1052      return true;
1053    }
1054    // For 0x000FF000, count trailing zeros and shift down to 0x000000FF. Note
1055    // that we have to round the trailing zeros down to the nearest multiple of
1056    // two, since we can only encode shifts of 2*N. Note also that we know that
1057    // imm32 isn't zero, since we already checked if it's less than 0xFF.
1058    int half_trailing_zeros = base::bits::CountTrailingZerosNonZero(imm32) / 2;
1059    uint32_t imm8 = imm32 >> (half_trailing_zeros * 2);
1060    if (imm8 <= 0xFF) {
1061      DCHECK_GT(half_trailing_zeros, 0);
1062      // Rotating right by trailing_zeros is equivalent to rotating left by
1063      // 32 - trailing_zeros. We return rotate_right / 2, so calculate
1064      // (32 - trailing_zeros)/2 == 16 - trailing_zeros/2.
1065      *rotate_imm = (16 - half_trailing_zeros);
1066      *immed_8 = imm8;
1067      return true;
1068    }
1069    // For 0xF000000F, rotate by 16 to get 0x000FF000 and continue as if it
1070    // were that case.
1071    uint32_t imm32_rot16 = base::bits::RotateLeft32(imm32, 16);
1072    half_trailing_zeros =
1073        base::bits::CountTrailingZerosNonZero(imm32_rot16) / 2;
1074    imm8 = imm32_rot16 >> (half_trailing_zeros * 2);
1075    if (imm8 <= 0xFF) {
1076      // We've rotated left by 2*8, so we can't have more than that many
1077      // trailing zeroes.
1078      DCHECK_LT(half_trailing_zeros, 8);
1079      // We've already rotated by 2*8, before calculating trailing_zeros/2,
1080      // so we need (32 - (16 + trailing_zeros))/2 == 8 - trailing_zeros/2.
1081      *rotate_imm = 8 - half_trailing_zeros;
1082      *immed_8 = imm8;
1083      return true;
1084    }
1085  }
1086  // If the opcode is one with a complementary version and the complementary
1087  // immediate fits, change the opcode.
1088  if (instr != nullptr) {
1089    if ((*instr & kMovMvnMask) == kMovMvnPattern) {
1090      if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1091        *instr ^= kMovMvnFlip;
1092        return true;
1093      } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) {
1094        if (CpuFeatures::IsSupported(ARMv7)) {
1095          if (imm32 < 0x10000) {
1096            *instr ^= kMovwLeaveCCFlip;
1097            *instr |= Assembler::EncodeMovwImmediate(imm32);
1098            *rotate_imm = *immed_8 = 0;  // Not used for movw.
1099            return true;
1100          }
1101        }
1102      }
1103    } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
1104      if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) {
1105        *instr ^= kCmpCmnFlip;
1106        return true;
1107      }
1108    } else {
1109      Instr alu_insn = (*instr & kALUMask);
1110      if (alu_insn == ADD || alu_insn == SUB) {
1111        if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8,
1112                        nullptr)) {
1113          *instr ^= kAddSubFlip;
1114          return true;
1115        }
1116      } else if (alu_insn == AND || alu_insn == BIC) {
1117        if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1118          *instr ^= kAndBicFlip;
1119          return true;
1120        }
1121      }
1122    }
1123  }
1124  return false;
1125}
1126
1127// We have to use the temporary register for things that can be relocated even
1128// if they can be encoded in the ARM's 12 bits of immediate-offset instruction
1129// space.  There is no guarantee that the relocated location can be similarly
1130// encoded.
1131bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) {
1132  if (RelocInfo::IsOnlyForSerializer(rmode)) {
1133    if (assembler->predictable_code_size()) return true;
1134    return assembler->options().record_reloc_info_for_serialization;
1135  } else if (RelocInfo::IsNoInfo(rmode)) {
1136    return false;
1137  }
1138  return true;
1139}
1140
1141bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) {
1142  DCHECK_NOT_NULL(assembler);
1143  if (x.MustOutputRelocInfo(assembler)) {
1144    // Prefer constant pool if data is likely to be patched.
1145    return false;
1146  } else {
1147    // Otherwise, use immediate load if movw / movt is available.
1148    return CpuFeatures::IsSupported(ARMv7);
1149  }
1150}
1151
1152}  // namespace
1153
1154bool Operand::MustOutputRelocInfo(const Assembler* assembler) const {
1155  return v8::internal::MustOutputRelocInfo(rmode_, assembler);
1156}
1157
1158int Operand::InstructionsRequired(const Assembler* assembler,
1159                                  Instr instr) const {
1160  DCHECK_NOT_NULL(assembler);
1161  if (rm_.is_valid()) return 1;
1162  uint32_t dummy1, dummy2;
1163  if (MustOutputRelocInfo(assembler) ||
1164      !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) {
1165    // The immediate operand cannot be encoded as a shifter operand, or use of
1166    // constant pool is required.  First account for the instructions required
1167    // for the constant pool or immediate load
1168    int instructions;
1169    if (UseMovImmediateLoad(*this, assembler)) {
1170      DCHECK(CpuFeatures::IsSupported(ARMv7));
1171      // A movw / movt immediate load.
1172      instructions = 2;
1173    } else {
1174      // A small constant pool load.
1175      instructions = 1;
1176    }
1177    if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
1178      // For a mov or mvn instruction which doesn't set the condition
1179      // code, the constant pool or immediate load is enough, otherwise we need
1180      // to account for the actual instruction being requested.
1181      instructions += 1;
1182    }
1183    return instructions;
1184  } else {
1185    // No use of constant pool and the immediate operand can be encoded as a
1186    // shifter operand.
1187    return 1;
1188  }
1189}
1190
1191void Assembler::Move32BitImmediate(Register rd, const Operand& x,
1192                                   Condition cond) {
1193  if (UseMovImmediateLoad(x, this)) {
1194    CpuFeatureScope scope(this, ARMv7);
1195    // UseMovImmediateLoad should return false when we need to output
1196    // relocation info, since we prefer the constant pool for values that
1197    // can be patched.
1198    DCHECK(!x.MustOutputRelocInfo(this));
1199    UseScratchRegisterScope temps(this);
1200    // Re-use the destination register as a scratch if possible.
1201    Register target = rd != pc && rd != sp ? rd : temps.Acquire();
1202    uint32_t imm32 = static_cast<uint32_t>(x.immediate());
1203    movw(target, imm32 & 0xFFFF, cond);
1204    movt(target, imm32 >> 16, cond);
1205    if (target.code() != rd.code()) {
1206      mov(rd, target, LeaveCC, cond);
1207    }
1208  } else {
1209    int32_t immediate;
1210    if (x.IsHeapObjectRequest()) {
1211      RequestHeapObject(x.heap_object_request());
1212      immediate = 0;
1213    } else {
1214      immediate = x.immediate();
1215    }
1216    ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate);
1217    ldr_pcrel(rd, 0, cond);
1218  }
1219}
1220
1221void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
1222                          const Operand& x) {
1223  CheckBuffer();
1224  uint32_t opcode = instr & kOpCodeMask;
1225  bool set_flags = (instr & S) != 0;
1226  DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) ||
1227         (opcode == BIC) || (opcode == EOR) || (opcode == ORR) ||
1228         (opcode == RSB) || (opcode == RSC) || (opcode == SBC) ||
1229         (opcode == SUB) || (opcode == CMN) || (opcode == CMP) ||
1230         (opcode == TEQ) || (opcode == TST) || (opcode == MOV) ||
1231         (opcode == MVN));
1232  // For comparison instructions, rd is not defined.
1233  DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) ||
1234         (opcode == TEQ) || (opcode == TST));
1235  // For move instructions, rn is not defined.
1236  DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN));
1237  DCHECK(rd.is_valid() || rn.is_valid());
1238  DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0);
1239  if (!AddrMode1TryEncodeOperand(&instr, x)) {
1240    DCHECK(x.IsImmediate());
1241    // Upon failure to encode, the opcode should not have changed.
1242    DCHECK(opcode == (instr & kOpCodeMask));
1243    UseScratchRegisterScope temps(this);
1244    Condition cond = Instruction::ConditionField(instr);
1245    if ((opcode == MOV) && !set_flags) {
1246      // Generate a sequence of mov instructions or a load from the constant
1247      // pool only for a MOV instruction which does not set the flags.
1248      DCHECK(!rn.is_valid());
1249      Move32BitImmediate(rd, x, cond);
1250    } else if ((opcode == ADD) && !set_flags && (rd == rn) &&
1251               !temps.CanAcquire()) {
1252      // Split the operation into a sequence of additions if we cannot use a
1253      // scratch register. In this case, we cannot re-use rn and the assembler
1254      // does not have any scratch registers to spare.
1255      uint32_t imm = x.immediate();
1256      do {
1257        // The immediate encoding format is composed of 8 bits of data and 4
1258        // bits encoding a rotation. Each of the 16 possible rotations accounts
1259        // for a rotation by an even number.
1260        //   4 bits -> 16 rotations possible
1261        //          -> 16 rotations of 2 bits each fits in a 32-bit value.
1262        // This means that finding the even number of trailing zeroes of the
1263        // immediate allows us to more efficiently split it:
1264        int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u;
1265        uint32_t mask = (0xFF << trailing_zeroes);
1266        add(rd, rd, Operand(imm & mask), LeaveCC, cond);
1267        imm = imm & ~mask;
1268      } while (!ImmediateFitsAddrMode1Instruction(imm));
1269      add(rd, rd, Operand(imm), LeaveCC, cond);
1270    } else {
1271      // The immediate operand cannot be encoded as a shifter operand, so load
1272      // it first to a scratch register and change the original instruction to
1273      // use it.
1274      // Re-use the destination register if possible.
1275      Register scratch = (rd.is_valid() && rd != rn && rd != pc && rd != sp)
1276                             ? rd
1277                             : temps.Acquire();
1278      mov(scratch, x, LeaveCC, cond);
1279      AddrMode1(instr, rd, rn, Operand(scratch));
1280    }
1281    return;
1282  }
1283  if (!rd.is_valid()) {
1284    // Emit a comparison instruction.
1285    emit(instr | rn.code() * B16);
1286  } else if (!rn.is_valid()) {
1287    // Emit a move instruction. If the operand is a register-shifted register,
1288    // then prevent the destination from being PC as this is unpredictable.
1289    DCHECK(!x.IsRegisterShiftedRegister() || rd != pc);
1290    emit(instr | rd.code() * B12);
1291  } else {
1292    emit(instr | rn.code() * B16 | rd.code() * B12);
1293  }
1294  if (rn == pc || x.rm_ == pc) {
1295    // Block constant pool emission for one instruction after reading pc.
1296    BlockConstPoolFor(1);
1297  }
1298}
1299
1300bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) {
1301  if (x.IsImmediate()) {
1302    // Immediate.
1303    uint32_t rotate_imm;
1304    uint32_t immed_8;
1305    if (x.MustOutputRelocInfo(this) ||
1306        !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) {
1307      // Let the caller handle generating multiple instructions.
1308      return false;
1309    }
1310    *instr |= I | rotate_imm * B8 | immed_8;
1311  } else if (x.IsImmediateShiftedRegister()) {
1312    *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1313  } else {
1314    DCHECK(x.IsRegisterShiftedRegister());
1315    // It is unpredictable to use the PC in this case.
1316    DCHECK(x.rm_ != pc && x.rs_ != pc);
1317    *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code();
1318  }
1319
1320  return true;
1321}
1322
1323void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) {
1324  DCHECK((instr & ~(kCondMask | B | L)) == B26);
1325  // This method does not handle pc-relative addresses. ldr_pcrel() should be
1326  // used instead.
1327  DCHECK(x.rn_ != pc);
1328  int am = x.am_;
1329  if (!x.rm_.is_valid()) {
1330    // Immediate offset.
1331    int offset_12 = x.offset_;
1332    if (offset_12 < 0) {
1333      offset_12 = -offset_12;
1334      am ^= U;
1335    }
1336    if (!is_uint12(offset_12)) {
1337      // Immediate offset cannot be encoded, load it first to a scratch
1338      // register.
1339      UseScratchRegisterScope temps(this);
1340      // Allow re-using rd for load instructions if possible.
1341      bool is_load = (instr & L) == L;
1342      Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1343                             ? rd
1344                             : temps.Acquire();
1345      mov(scratch, Operand(x.offset_), LeaveCC,
1346          Instruction::ConditionField(instr));
1347      AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1348      return;
1349    }
1350    DCHECK_GE(offset_12, 0);  // no masking needed
1351    instr |= offset_12;
1352  } else {
1353    // Register offset (shift_imm_ and shift_op_ are 0) or scaled
1354    // register offset the constructors make sure than both shift_imm_
1355    // and shift_op_ are initialized.
1356    DCHECK(x.rm_ != pc);
1357    instr |= B25 | x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1358  }
1359  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1360  emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1361}
1362
1363void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) {
1364  DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
1365  DCHECK(x.rn_.is_valid());
1366  // This method does not handle pc-relative addresses. ldr_pcrel() should be
1367  // used instead.
1368  DCHECK(x.rn_ != pc);
1369  int am = x.am_;
1370  bool is_load = (instr & L) == L;
1371  if (!x.rm_.is_valid()) {
1372    // Immediate offset.
1373    int offset_8 = x.offset_;
1374    if (offset_8 < 0) {
1375      offset_8 = -offset_8;
1376      am ^= U;
1377    }
1378    if (!is_uint8(offset_8)) {
1379      // Immediate offset cannot be encoded, load it first to a scratch
1380      // register.
1381      UseScratchRegisterScope temps(this);
1382      // Allow re-using rd for load instructions if possible.
1383      Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1384                             ? rd
1385                             : temps.Acquire();
1386      mov(scratch, Operand(x.offset_), LeaveCC,
1387          Instruction::ConditionField(instr));
1388      AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1389      return;
1390    }
1391    DCHECK_GE(offset_8, 0);  // no masking needed
1392    instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF);
1393  } else if (x.shift_imm_ != 0) {
1394    // Scaled register offsets are not supported, compute the offset separately
1395    // to a scratch register.
1396    UseScratchRegisterScope temps(this);
1397    // Allow re-using rd for load instructions if possible.
1398    Register scratch =
1399        (is_load && rd != x.rn_ && rd != pc && rd != sp) ? rd : temps.Acquire();
1400    mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
1401        Instruction::ConditionField(instr));
1402    AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1403    return;
1404  } else {
1405    // Register offset.
1406    DCHECK((am & (P | W)) == P || x.rm_ != pc);  // no pc index with writeback
1407    instr |= x.rm_.code();
1408  }
1409  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1410  emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1411}
1412
1413void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) {
1414  DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
1415  DCHECK(!rl.is_empty());
1416  DCHECK(rn != pc);
1417  emit(instr | rn.code() * B16 | rl.bits());
1418}
1419
1420void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) {
1421  // Unindexed addressing is not encoded by this function.
1422  DCHECK_EQ((B27 | B26),
1423            (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
1424  DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
1425  int am = x.am_;
1426  int offset_8 = x.offset_;
1427  DCHECK_EQ(offset_8 & 3, 0);  // offset must be an aligned word offset
1428  offset_8 >>= 2;
1429  if (offset_8 < 0) {
1430    offset_8 = -offset_8;
1431    am ^= U;
1432  }
1433  DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
1434  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1435
1436  // Post-indexed addressing requires W == 1; different than in AddrMode2/3.
1437  if ((am & P) == 0) am |= W;
1438
1439  DCHECK_GE(offset_8, 0);  // no masking needed
1440  emit(instr | am | x.rn_.code() * B16 | crd.code() * B12 | offset_8);
1441}
1442
1443int Assembler::branch_offset(Label* L) {
1444  int target_pos;
1445  if (L->is_bound()) {
1446    target_pos = L->pos();
1447  } else {
1448    if (L->is_linked()) {
1449      // Point to previous instruction that uses the link.
1450      target_pos = L->pos();
1451    } else {
1452      // First entry of the link chain points to itself.
1453      target_pos = pc_offset();
1454    }
1455    L->link_to(pc_offset());
1456  }
1457
1458  return target_pos - (pc_offset() + Instruction::kPcLoadDelta);
1459}
1460
1461// Branch instructions.
1462void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1463  if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode);
1464  DCHECK_EQ(branch_offset & 3, 0);
1465  int imm24 = branch_offset >> 2;
1466  const bool b_imm_check = is_int24(imm24);
1467  CHECK(b_imm_check);
1468
1469  // Block the emission of the constant pool before the next instruction.
1470  // Otherwise the passed-in branch offset would be off.
1471  BlockConstPoolFor(1);
1472
1473  emit(cond | B27 | B25 | (imm24 & kImm24Mask));
1474
1475  if (cond == al) {
1476    // Dead code is a good location to emit the constant pool.
1477    CheckConstPool(false, false);
1478  }
1479}
1480
1481void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1482  if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode);
1483  DCHECK_EQ(branch_offset & 3, 0);
1484  int imm24 = branch_offset >> 2;
1485  const bool bl_imm_check = is_int24(imm24);
1486  CHECK(bl_imm_check);
1487
1488  // Block the emission of the constant pool before the next instruction.
1489  // Otherwise the passed-in branch offset would be off.
1490  BlockConstPoolFor(1);
1491
1492  emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
1493}
1494
1495void Assembler::blx(int branch_offset) {
1496  DCHECK_EQ(branch_offset & 1, 0);
1497  int h = ((branch_offset & 2) >> 1) * B24;
1498  int imm24 = branch_offset >> 2;
1499  const bool blx_imm_check = is_int24(imm24);
1500  CHECK(blx_imm_check);
1501
1502  // Block the emission of the constant pool before the next instruction.
1503  // Otherwise the passed-in branch offset would be off.
1504  BlockConstPoolFor(1);
1505
1506  emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
1507}
1508
1509void Assembler::blx(Register target, Condition cond) {
1510  DCHECK(target != pc);
1511  emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX | target.code());
1512}
1513
1514void Assembler::bx(Register target, Condition cond) {
1515  DCHECK(target != pc);  // use of pc is actually allowed, but discouraged
1516  emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BX | target.code());
1517}
1518
1519void Assembler::b(Label* L, Condition cond) {
1520  CheckBuffer();
1521  b(branch_offset(L), cond);
1522}
1523
1524void Assembler::bl(Label* L, Condition cond) {
1525  CheckBuffer();
1526  bl(branch_offset(L), cond);
1527}
1528
1529void Assembler::blx(Label* L) {
1530  CheckBuffer();
1531  blx(branch_offset(L));
1532}
1533
1534// Data-processing instructions.
1535
1536void Assembler::and_(Register dst, Register src1, const Operand& src2, SBit s,
1537                     Condition cond) {
1538  AddrMode1(cond | AND | s, dst, src1, src2);
1539}
1540
1541void Assembler::and_(Register dst, Register src1, Register src2, SBit s,
1542                     Condition cond) {
1543  and_(dst, src1, Operand(src2), s, cond);
1544}
1545
1546void Assembler::eor(Register dst, Register src1, const Operand& src2, SBit s,
1547                    Condition cond) {
1548  AddrMode1(cond | EOR | s, dst, src1, src2);
1549}
1550
1551void Assembler::eor(Register dst, Register src1, Register src2, SBit s,
1552                    Condition cond) {
1553  AddrMode1(cond | EOR | s, dst, src1, Operand(src2));
1554}
1555
1556void Assembler::sub(Register dst, Register src1, const Operand& src2, SBit s,
1557                    Condition cond) {
1558  AddrMode1(cond | SUB | s, dst, src1, src2);
1559}
1560
1561void Assembler::sub(Register dst, Register src1, Register src2, SBit s,
1562                    Condition cond) {
1563  sub(dst, src1, Operand(src2), s, cond);
1564}
1565
1566void Assembler::rsb(Register dst, Register src1, const Operand& src2, SBit s,
1567                    Condition cond) {
1568  AddrMode1(cond | RSB | s, dst, src1, src2);
1569}
1570
1571void Assembler::add(Register dst, Register src1, const Operand& src2, SBit s,
1572                    Condition cond) {
1573  AddrMode1(cond | ADD | s, dst, src1, src2);
1574}
1575
1576void Assembler::add(Register dst, Register src1, Register src2, SBit s,
1577                    Condition cond) {
1578  add(dst, src1, Operand(src2), s, cond);
1579}
1580
1581void Assembler::adc(Register dst, Register src1, const Operand& src2, SBit s,
1582                    Condition cond) {
1583  AddrMode1(cond | ADC | s, dst, src1, src2);
1584}
1585
1586void Assembler::sbc(Register dst, Register src1, const Operand& src2, SBit s,
1587                    Condition cond) {
1588  AddrMode1(cond | SBC | s, dst, src1, src2);
1589}
1590
1591void Assembler::rsc(Register dst, Register src1, const Operand& src2, SBit s,
1592                    Condition cond) {
1593  AddrMode1(cond | RSC | s, dst, src1, src2);
1594}
1595
1596void Assembler::tst(Register src1, const Operand& src2, Condition cond) {
1597  AddrMode1(cond | TST | S, no_reg, src1, src2);
1598}
1599
1600void Assembler::tst(Register src1, Register src2, Condition cond) {
1601  tst(src1, Operand(src2), cond);
1602}
1603
1604void Assembler::teq(Register src1, const Operand& src2, Condition cond) {
1605  AddrMode1(cond | TEQ | S, no_reg, src1, src2);
1606}
1607
1608void Assembler::cmp(Register src1, const Operand& src2, Condition cond) {
1609  AddrMode1(cond | CMP | S, no_reg, src1, src2);
1610}
1611
1612void Assembler::cmp(Register src1, Register src2, Condition cond) {
1613  cmp(src1, Operand(src2), cond);
1614}
1615
1616void Assembler::cmp_raw_immediate(Register src, int raw_immediate,
1617                                  Condition cond) {
1618  DCHECK(is_uint12(raw_immediate));
1619  emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
1620}
1621
1622void Assembler::cmn(Register src1, const Operand& src2, Condition cond) {
1623  AddrMode1(cond | CMN | S, no_reg, src1, src2);
1624}
1625
1626void Assembler::orr(Register dst, Register src1, const Operand& src2, SBit s,
1627                    Condition cond) {
1628  AddrMode1(cond | ORR | s, dst, src1, src2);
1629}
1630
1631void Assembler::orr(Register dst, Register src1, Register src2, SBit s,
1632                    Condition cond) {
1633  orr(dst, src1, Operand(src2), s, cond);
1634}
1635
1636void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) {
1637  // Don't allow nop instructions in the form mov rn, rn to be generated using
1638  // the mov instruction. They must be generated using nop(int/NopMarkerTypes).
1639  DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al));
1640  AddrMode1(cond | MOV | s, dst, no_reg, src);
1641}
1642
1643void Assembler::mov(Register dst, Register src, SBit s, Condition cond) {
1644  mov(dst, Operand(src), s, cond);
1645}
1646
1647void Assembler::mov_label_offset(Register dst, Label* label) {
1648  if (label->is_bound()) {
1649    mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
1650  } else {
1651    // Emit the link to the label in the code stream followed by extra nop
1652    // instructions.
1653    // If the label is not linked, then start a new link chain by linking it to
1654    // itself, emitting pc_offset().
1655    int link = label->is_linked() ? label->pos() : pc_offset();
1656    label->link_to(pc_offset());
1657
1658    // When the label is bound, these instructions will be patched with a
1659    // sequence of movw/movt or mov/orr/orr instructions. They will load the
1660    // destination register with the position of the label from the beginning
1661    // of the code.
1662    //
1663    // The link will be extracted from the first instruction and the destination
1664    // register from the second.
1665    //   For ARMv7:
1666    //      link
1667    //      mov dst, dst
1668    //   For ARMv6:
1669    //      link
1670    //      mov dst, dst
1671    //      mov dst, dst
1672    //
1673    // When the label gets bound: target_at extracts the link and target_at_put
1674    // patches the instructions.
1675    CHECK(is_uint24(link));
1676    BlockConstPoolScope block_const_pool(this);
1677    emit(link);
1678    nop(dst.code());
1679    if (!CpuFeatures::IsSupported(ARMv7)) {
1680      nop(dst.code());
1681    }
1682  }
1683}
1684
1685void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
1686  DCHECK(IsEnabled(ARMv7));
1687  emit(cond | 0x30 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1688}
1689
1690void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
1691  DCHECK(IsEnabled(ARMv7));
1692  emit(cond | 0x34 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1693}
1694
1695void Assembler::bic(Register dst, Register src1, const Operand& src2, SBit s,
1696                    Condition cond) {
1697  AddrMode1(cond | BIC | s, dst, src1, src2);
1698}
1699
1700void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) {
1701  AddrMode1(cond | MVN | s, dst, no_reg, src);
1702}
1703
1704void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s,
1705                    Condition cond) {
1706  if (src2.IsRegister()) {
1707    mov(dst, Operand(src1, ASR, src2.rm()), s, cond);
1708  } else {
1709    mov(dst, Operand(src1, ASR, src2.immediate()), s, cond);
1710  }
1711}
1712
1713void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s,
1714                    Condition cond) {
1715  if (src2.IsRegister()) {
1716    mov(dst, Operand(src1, LSL, src2.rm()), s, cond);
1717  } else {
1718    mov(dst, Operand(src1, LSL, src2.immediate()), s, cond);
1719  }
1720}
1721
1722void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s,
1723                    Condition cond) {
1724  if (src2.IsRegister()) {
1725    mov(dst, Operand(src1, LSR, src2.rm()), s, cond);
1726  } else {
1727    mov(dst, Operand(src1, LSR, src2.immediate()), s, cond);
1728  }
1729}
1730
1731// Multiply instructions.
1732void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
1733                    SBit s, Condition cond) {
1734  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1735  emit(cond | A | s | dst.code() * B16 | srcA.code() * B12 | src2.code() * B8 |
1736       B7 | B4 | src1.code());
1737}
1738
1739void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
1740                    Condition cond) {
1741  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1742  DCHECK(IsEnabled(ARMv7));
1743  emit(cond | B22 | B21 | dst.code() * B16 | srcA.code() * B12 |
1744       src2.code() * B8 | B7 | B4 | src1.code());
1745}
1746
1747void Assembler::sdiv(Register dst, Register src1, Register src2,
1748                     Condition cond) {
1749  DCHECK(dst != pc && src1 != pc && src2 != pc);
1750  DCHECK(IsEnabled(SUDIV));
1751  emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 |
1752       src2.code() * B8 | B4 | src1.code());
1753}
1754
1755void Assembler::udiv(Register dst, Register src1, Register src2,
1756                     Condition cond) {
1757  DCHECK(dst != pc && src1 != pc && src2 != pc);
1758  DCHECK(IsEnabled(SUDIV));
1759  emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 |
1760       src2.code() * B8 | B4 | src1.code());
1761}
1762
1763void Assembler::mul(Register dst, Register src1, Register src2, SBit s,
1764                    Condition cond) {
1765  DCHECK(dst != pc && src1 != pc && src2 != pc);
1766  // dst goes in bits 16-19 for this instruction!
1767  emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code());
1768}
1769
1770void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA,
1771                      Condition cond) {
1772  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1773  emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 |
1774       srcA.code() * B12 | src2.code() * B8 | B4 | src1.code());
1775}
1776
1777void Assembler::smmul(Register dst, Register src1, Register src2,
1778                      Condition cond) {
1779  DCHECK(dst != pc && src1 != pc && src2 != pc);
1780  emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 |
1781       src2.code() * B8 | B4 | src1.code());
1782}
1783
1784void Assembler::smlal(Register dstL, Register dstH, Register src1,
1785                      Register src2, SBit s, Condition cond) {
1786  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1787  DCHECK(dstL != dstH);
1788  emit(cond | B23 | B22 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1789       src2.code() * B8 | B7 | B4 | src1.code());
1790}
1791
1792void Assembler::smull(Register dstL, Register dstH, Register src1,
1793                      Register src2, SBit s, Condition cond) {
1794  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1795  DCHECK(dstL != dstH);
1796  emit(cond | B23 | B22 | s | dstH.code() * B16 | dstL.code() * B12 |
1797       src2.code() * B8 | B7 | B4 | src1.code());
1798}
1799
1800void Assembler::umlal(Register dstL, Register dstH, Register src1,
1801                      Register src2, SBit s, Condition cond) {
1802  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1803  DCHECK(dstL != dstH);
1804  emit(cond | B23 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1805       src2.code() * B8 | B7 | B4 | src1.code());
1806}
1807
1808void Assembler::umull(Register dstL, Register dstH, Register src1,
1809                      Register src2, SBit s, Condition cond) {
1810  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1811  DCHECK(dstL != dstH);
1812  emit(cond | B23 | s | dstH.code() * B16 | dstL.code() * B12 |
1813       src2.code() * B8 | B7 | B4 | src1.code());
1814}
1815
1816// Miscellaneous arithmetic instructions.
1817void Assembler::clz(Register dst, Register src, Condition cond) {
1818  DCHECK(dst != pc && src != pc);
1819  emit(cond | B24 | B22 | B21 | 15 * B16 | dst.code() * B12 | 15 * B8 | CLZ |
1820       src.code());
1821}
1822
1823// Saturating instructions.
1824
1825// Unsigned saturate.
1826void Assembler::usat(Register dst, int satpos, const Operand& src,
1827                     Condition cond) {
1828  DCHECK(dst != pc && src.rm_ != pc);
1829  DCHECK((satpos >= 0) && (satpos <= 31));
1830  DCHECK(src.IsImmediateShiftedRegister());
1831  DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
1832
1833  int sh = 0;
1834  if (src.shift_op_ == ASR) {
1835    sh = 1;
1836  }
1837
1838  emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 |
1839       src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code());
1840}
1841
1842// Bitfield manipulation instructions.
1843
1844// Unsigned bit field extract.
1845// Extracts #width adjacent bits from position #lsb in a register, and
1846// writes them to the low bits of a destination register.
1847//   ubfx dst, src, #lsb, #width
1848void Assembler::ubfx(Register dst, Register src, int lsb, int width,
1849                     Condition cond) {
1850  DCHECK(IsEnabled(ARMv7));
1851  DCHECK(dst != pc && src != pc);
1852  DCHECK((lsb >= 0) && (lsb <= 31));
1853  DCHECK((width >= 1) && (width <= (32 - lsb)));
1854  emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 |
1855       lsb * B7 | B6 | B4 | src.code());
1856}
1857
1858// Signed bit field extract.
1859// Extracts #width adjacent bits from position #lsb in a register, and
1860// writes them to the low bits of a destination register. The extracted
1861// value is sign extended to fill the destination register.
1862//   sbfx dst, src, #lsb, #width
1863void Assembler::sbfx(Register dst, Register src, int lsb, int width,
1864                     Condition cond) {
1865  DCHECK(IsEnabled(ARMv7));
1866  DCHECK(dst != pc && src != pc);
1867  DCHECK((lsb >= 0) && (lsb <= 31));
1868  DCHECK((width >= 1) && (width <= (32 - lsb)));
1869  emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 |
1870       lsb * B7 | B6 | B4 | src.code());
1871}
1872
1873// Bit field clear.
1874// Sets #width adjacent bits at position #lsb in the destination register
1875// to zero, preserving the value of the other bits.
1876//   bfc dst, #lsb, #width
1877void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
1878  DCHECK(IsEnabled(ARMv7));
1879  DCHECK(dst != pc);
1880  DCHECK((lsb >= 0) && (lsb <= 31));
1881  DCHECK((width >= 1) && (width <= (32 - lsb)));
1882  int msb = lsb + width - 1;
1883  emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF);
1884}
1885
1886// Bit field insert.
1887// Inserts #width adjacent bits from the low bits of the source register
1888// into position #lsb of the destination register.
1889//   bfi dst, src, #lsb, #width
1890void Assembler::bfi(Register dst, Register src, int lsb, int width,
1891                    Condition cond) {
1892  DCHECK(IsEnabled(ARMv7));
1893  DCHECK(dst != pc && src != pc);
1894  DCHECK((lsb >= 0) && (lsb <= 31));
1895  DCHECK((width >= 1) && (width <= (32 - lsb)));
1896  int msb = lsb + width - 1;
1897  emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 |
1898       src.code());
1899}
1900
1901void Assembler::pkhbt(Register dst, Register src1, const Operand& src2,
1902                      Condition cond) {
1903  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1904  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1905  // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
1906  DCHECK(dst != pc);
1907  DCHECK(src1 != pc);
1908  DCHECK(src2.IsImmediateShiftedRegister());
1909  DCHECK(src2.rm() != pc);
1910  DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
1911  DCHECK(src2.shift_op() == LSL);
1912  emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 |
1913       src2.shift_imm_ * B7 | B4 | src2.rm().code());
1914}
1915
1916void Assembler::pkhtb(Register dst, Register src1, const Operand& src2,
1917                      Condition cond) {
1918  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1919  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1920  // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
1921  DCHECK(dst != pc);
1922  DCHECK(src1 != pc);
1923  DCHECK(src2.IsImmediateShiftedRegister());
1924  DCHECK(src2.rm() != pc);
1925  DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
1926  DCHECK(src2.shift_op() == ASR);
1927  int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
1928  emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 | asr * B7 |
1929       B6 | B4 | src2.rm().code());
1930}
1931
1932void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) {
1933  // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1934  // cond(31-28) | 01101010(27-20) | 1111(19-16) |
1935  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1936  DCHECK(dst != pc);
1937  DCHECK(src != pc);
1938  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1939  emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 |
1940       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1941}
1942
1943void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate,
1944                      Condition cond) {
1945  // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1946  // cond(31-28) | 01101010(27-20) | Rn(19-16) |
1947  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1948  DCHECK(dst != pc);
1949  DCHECK(src1 != pc);
1950  DCHECK(src2 != pc);
1951  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1952  emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 |
1953       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1954}
1955
1956void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) {
1957  // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1958  // cond(31-28) | 01101011(27-20) | 1111(19-16) |
1959  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1960  DCHECK(dst != pc);
1961  DCHECK(src != pc);
1962  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1963  emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 |
1964       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1965}
1966
1967void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate,
1968                      Condition cond) {
1969  // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1970  // cond(31-28) | 01101011(27-20) | Rn(19-16) |
1971  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1972  DCHECK(dst != pc);
1973  DCHECK(src1 != pc);
1974  DCHECK(src2 != pc);
1975  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1976  emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 |
1977       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1978}
1979
1980void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) {
1981  // Instruction details available in ARM DDI 0406C.b, A8.8.274.
1982  // cond(31-28) | 01101110(27-20) | 1111(19-16) |
1983  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1984  DCHECK(dst != pc);
1985  DCHECK(src != pc);
1986  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1987  emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 |
1988       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1989}
1990
1991void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate,
1992                      Condition cond) {
1993  // Instruction details available in ARM DDI 0406C.b, A8.8.271.
1994  // cond(31-28) | 01101110(27-20) | Rn(19-16) |
1995  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1996  DCHECK(dst != pc);
1997  DCHECK(src1 != pc);
1998  DCHECK(src2 != pc);
1999  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2000  emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 |
2001       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2002}
2003
2004void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) {
2005  // Instruction details available in ARM DDI 0406C.b, A8.8.275.
2006  // cond(31-28) | 01101100(27-20) | 1111(19-16) |
2007  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2008  DCHECK(dst != pc);
2009  DCHECK(src != pc);
2010  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2011  emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 |
2012       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
2013}
2014
2015void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) {
2016  // Instruction details available in ARM DDI 0406C.b, A8.8.276.
2017  // cond(31-28) | 01101111(27-20) | 1111(19-16) |
2018  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2019  DCHECK(dst != pc);
2020  DCHECK(src != pc);
2021  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2022  emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 |
2023       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
2024}
2025
2026void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate,
2027                      Condition cond) {
2028  // Instruction details available in ARM DDI 0406C.b, A8.8.273.
2029  // cond(31-28) | 01101111(27-20) | Rn(19-16) |
2030  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2031  DCHECK(dst != pc);
2032  DCHECK(src1 != pc);
2033  DCHECK(src2 != pc);
2034  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2035  emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 |
2036       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2037}
2038
2039void Assembler::rbit(Register dst, Register src, Condition cond) {
2040  // Instruction details available in ARM DDI 0406C.b, A8.8.144.
2041  // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
2042  DCHECK(IsEnabled(ARMv7));
2043  DCHECK(dst != pc);
2044  DCHECK(src != pc);
2045  emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
2046}
2047
2048void Assembler::rev(Register dst, Register src, Condition cond) {
2049  // Instruction details available in ARM DDI 0406C.b, A8.8.144.
2050  // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
2051  DCHECK(dst != pc);
2052  DCHECK(src != pc);
2053  emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
2054}
2055
2056// Status register access instructions.
2057void Assembler::mrs(Register dst, SRegister s, Condition cond) {
2058  DCHECK(dst != pc);
2059  emit(cond | B24 | s | 15 * B16 | dst.code() * B12);
2060}
2061
2062void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
2063                    Condition cond) {
2064  DCHECK_NE(fields & 0x000F0000, 0);  // At least one field must be set.
2065  DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR));
2066  Instr instr;
2067  if (src.IsImmediate()) {
2068    // Immediate.
2069    uint32_t rotate_imm;
2070    uint32_t immed_8;
2071    if (src.MustOutputRelocInfo(this) ||
2072        !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) {
2073      UseScratchRegisterScope temps(this);
2074      Register scratch = temps.Acquire();
2075      // Immediate operand cannot be encoded, load it first to a scratch
2076      // register.
2077      Move32BitImmediate(scratch, src);
2078      msr(fields, Operand(scratch), cond);
2079      return;
2080    }
2081    instr = I | rotate_imm * B8 | immed_8;
2082  } else {
2083    DCHECK(src.IsRegister());  // Only rm is allowed.
2084    instr = src.rm_.code();
2085  }
2086  emit(cond | instr | B24 | B21 | fields | 15 * B12);
2087}
2088
2089// Load/Store instructions.
2090void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
2091  AddrMode2(cond | B26 | L, dst, src);
2092}
2093
2094void Assembler::str(Register src, const MemOperand& dst, Condition cond) {
2095  AddrMode2(cond | B26, src, dst);
2096}
2097
2098void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) {
2099  AddrMode2(cond | B26 | B | L, dst, src);
2100}
2101
2102void Assembler::strb(Register src, const MemOperand& dst, Condition cond) {
2103  AddrMode2(cond | B26 | B, src, dst);
2104}
2105
2106void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) {
2107  AddrMode3(cond | L | B7 | H | B4, dst, src);
2108}
2109
2110void Assembler::strh(Register src, const MemOperand& dst, Condition cond) {
2111  AddrMode3(cond | B7 | H | B4, src, dst);
2112}
2113
2114void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) {
2115  AddrMode3(cond | L | B7 | S6 | B4, dst, src);
2116}
2117
2118void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) {
2119  AddrMode3(cond | L | B7 | S6 | H | B4, dst, src);
2120}
2121
2122void Assembler::ldrd(Register dst1, Register dst2, const MemOperand& src,
2123                     Condition cond) {
2124  DCHECK(src.rm() == no_reg);
2125  DCHECK(dst1 != lr);  // r14.
2126  DCHECK_EQ(0, dst1.code() % 2);
2127  DCHECK_EQ(dst1.code() + 1, dst2.code());
2128  AddrMode3(cond | B7 | B6 | B4, dst1, src);
2129}
2130
2131void Assembler::strd(Register src1, Register src2, const MemOperand& dst,
2132                     Condition cond) {
2133  DCHECK(dst.rm() == no_reg);
2134  DCHECK(src1 != lr);  // r14.
2135  DCHECK_EQ(0, src1.code() % 2);
2136  DCHECK_EQ(src1.code() + 1, src2.code());
2137  AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst);
2138}
2139
2140void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) {
2141  AddrMode am = Offset;
2142  if (imm12 < 0) {
2143    imm12 = -imm12;
2144    am = NegOffset;
2145  }
2146  DCHECK(is_uint12(imm12));
2147  emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12);
2148}
2149
2150// Load/Store exclusive instructions.
2151void Assembler::ldrex(Register dst, Register src, Condition cond) {
2152  // Instruction details available in ARM DDI 0406C.b, A8.8.75.
2153  // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2154  DCHECK(dst != pc);
2155  DCHECK(src != pc);
2156  emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F);
2157}
2158
2159void Assembler::strex(Register src1, Register src2, Register dst,
2160                      Condition cond) {
2161  // Instruction details available in ARM DDI 0406C.b, A8.8.212.
2162  // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2163  // Rt(3-0)
2164  DCHECK(dst != pc);
2165  DCHECK(src1 != pc);
2166  DCHECK(src2 != pc);
2167  DCHECK(src1 != dst);
2168  DCHECK(src1 != src2);
2169  emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 |
2170       src2.code());
2171}
2172
2173void Assembler::ldrexb(Register dst, Register src, Condition cond) {
2174  // Instruction details available in ARM DDI 0406C.b, A8.8.76.
2175  // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2176  DCHECK(dst != pc);
2177  DCHECK(src != pc);
2178  emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 |
2179       0xF9F);
2180}
2181
2182void Assembler::strexb(Register src1, Register src2, Register dst,
2183                       Condition cond) {
2184  // Instruction details available in ARM DDI 0406C.b, A8.8.213.
2185  // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2186  // Rt(3-0)
2187  DCHECK(dst != pc);
2188  DCHECK(src1 != pc);
2189  DCHECK(src2 != pc);
2190  DCHECK(src1 != dst);
2191  DCHECK(src1 != src2);
2192  emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 |
2193       0xF9 * B4 | src2.code());
2194}
2195
2196void Assembler::ldrexh(Register dst, Register src, Condition cond) {
2197  // Instruction details available in ARM DDI 0406C.b, A8.8.78.
2198  // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2199  DCHECK(dst != pc);
2200  DCHECK(src != pc);
2201  emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 |
2202       dst.code() * B12 | 0xF9F);
2203}
2204
2205void Assembler::strexh(Register src1, Register src2, Register dst,
2206                       Condition cond) {
2207  // Instruction details available in ARM DDI 0406C.b, A8.8.215.
2208  // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2209  // Rt(3-0)
2210  DCHECK(dst != pc);
2211  DCHECK(src1 != pc);
2212  DCHECK(src2 != pc);
2213  DCHECK(src1 != dst);
2214  DCHECK(src1 != src2);
2215  emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 |
2216       0xF9 * B4 | src2.code());
2217}
2218
2219void Assembler::ldrexd(Register dst1, Register dst2, Register src,
2220                       Condition cond) {
2221  // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2222  DCHECK(dst1 != lr);  // r14.
2223  // The pair of destination registers is restricted to being an even-numbered
2224  // register and the odd-numbered register that immediately follows it.
2225  DCHECK_EQ(0, dst1.code() % 2);
2226  DCHECK_EQ(dst1.code() + 1, dst2.code());
2227  emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 |
2228       0xF9F);
2229}
2230
2231void Assembler::strexd(Register res, Register src1, Register src2, Register dst,
2232                       Condition cond) {
2233  // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2234  DCHECK(src1 != lr);  // r14.
2235  // The pair of source registers is restricted to being an even-numbered
2236  // register and the odd-numbered register that immediately follows it.
2237  DCHECK_EQ(0, src1.code() % 2);
2238  DCHECK_EQ(src1.code() + 1, src2.code());
2239  emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 |
2240       0xF9 * B4 | src1.code());
2241}
2242
2243// Preload instructions.
2244void Assembler::pld(const MemOperand& address) {
2245  // Instruction details available in ARM DDI 0406C.b, A8.8.128.
2246  // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
2247  // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
2248  DCHECK(address.rm() == no_reg);
2249  DCHECK(address.am() == Offset);
2250  int U = B23;
2251  int offset = address.offset();
2252  if (offset < 0) {
2253    offset = -offset;
2254    U = 0;
2255  }
2256  DCHECK_LT(offset, 4096);
2257  emit(kSpecialCondition | B26 | B24 | U | B22 | B20 |
2258       address.rn().code() * B16 | 0xF * B12 | offset);
2259}
2260
2261// Load/Store multiple instructions.
2262void Assembler::ldm(BlockAddrMode am, Register base, RegList dst,
2263                    Condition cond) {
2264  // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
2265  DCHECK(base == sp || !dst.has(sp));
2266
2267  AddrMode4(cond | B27 | am | L, base, dst);
2268
2269  // Emit the constant pool after a function return implemented by ldm ..{..pc}.
2270  if (cond == al && dst.has(pc)) {
2271    // There is a slight chance that the ldm instruction was actually a call,
2272    // in which case it would be wrong to return into the constant pool; we
2273    // recognize this case by checking if the emission of the pool was blocked
2274    // at the pc of the ldm instruction by a mov lr, pc instruction; if this is
2275    // the case, we emit a jump over the pool.
2276    CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize);
2277  }
2278}
2279
2280void Assembler::stm(BlockAddrMode am, Register base, RegList src,
2281                    Condition cond) {
2282  AddrMode4(cond | B27 | am, base, src);
2283}
2284
2285// Exception-generating instructions and debugging support.
2286// Stops with a non-negative code less than kNumOfWatchedStops support
2287// enabling/disabling and a counter feature. See simulator-arm.h .
2288void Assembler::stop(Condition cond, int32_t code) {
2289#ifndef __arm__
2290  DCHECK_GE(code, kDefaultStopCode);
2291  {
2292    BlockConstPoolScope block_const_pool(this);
2293    if (code >= 0) {
2294      svc(kStopCode + code, cond);
2295    } else {
2296      svc(kStopCode + kMaxStopCode, cond);
2297    }
2298  }
2299#else   // def __arm__
2300  if (cond != al) {
2301    Label skip;
2302    b(&skip, NegateCondition(cond));
2303    bkpt(0);
2304    bind(&skip);
2305  } else {
2306    bkpt(0);
2307  }
2308#endif  // def __arm__
2309}
2310
2311void Assembler::bkpt(uint32_t imm16) {
2312  DCHECK(is_uint16(imm16));
2313  emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF));
2314}
2315
2316void Assembler::svc(uint32_t imm24, Condition cond) {
2317  CHECK(is_uint24(imm24));
2318  emit(cond | 15 * B24 | imm24);
2319}
2320
2321void Assembler::dmb(BarrierOption option) {
2322  if (CpuFeatures::IsSupported(ARMv7)) {
2323    // Details available in ARM DDI 0406C.b, A8-378.
2324    emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option);
2325  } else {
2326    // Details available in ARM DDI 0406C.b, B3-1750.
2327    // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored.
2328    mcr(p15, 0, r0, cr7, cr10, 5);
2329  }
2330}
2331
2332void Assembler::dsb(BarrierOption option) {
2333  if (CpuFeatures::IsSupported(ARMv7)) {
2334    // Details available in ARM DDI 0406C.b, A8-380.
2335    emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option);
2336  } else {
2337    // Details available in ARM DDI 0406C.b, B3-1750.
2338    // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored.
2339    mcr(p15, 0, r0, cr7, cr10, 4);
2340  }
2341}
2342
2343void Assembler::isb(BarrierOption option) {
2344  if (CpuFeatures::IsSupported(ARMv7)) {
2345    // Details available in ARM DDI 0406C.b, A8-389.
2346    emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option);
2347  } else {
2348    // Details available in ARM DDI 0406C.b, B3-1750.
2349    // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored.
2350    mcr(p15, 0, r0, cr7, cr5, 4);
2351  }
2352}
2353
2354void Assembler::csdb() {
2355  // Details available in Arm Cache Speculation Side-channels white paper,
2356  // version 1.1, page 4.
2357  emit(0xE320F014);
2358}
2359
2360// Coprocessor instructions.
2361void Assembler::cdp(Coprocessor coproc, int opcode_1, CRegister crd,
2362                    CRegister crn, CRegister crm, int opcode_2,
2363                    Condition cond) {
2364  DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
2365  emit(cond | B27 | B26 | B25 | (opcode_1 & 15) * B20 | crn.code() * B16 |
2366       crd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | crm.code());
2367}
2368
2369void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd,
2370                     CRegister crn, CRegister crm, int opcode_2) {
2371  cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition);
2372}
2373
2374void Assembler::mcr(Coprocessor coproc, int opcode_1, Register rd,
2375                    CRegister crn, CRegister crm, int opcode_2,
2376                    Condition cond) {
2377  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2378  emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | crn.code() * B16 |
2379       rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2380}
2381
2382void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd,
2383                     CRegister crn, CRegister crm, int opcode_2) {
2384  mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2385}
2386
2387void Assembler::mrc(Coprocessor coproc, int opcode_1, Register rd,
2388                    CRegister crn, CRegister crm, int opcode_2,
2389                    Condition cond) {
2390  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2391  emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | L | crn.code() * B16 |
2392       rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2393}
2394
2395void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd,
2396                     CRegister crn, CRegister crm, int opcode_2) {
2397  mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2398}
2399
2400void Assembler::ldc(Coprocessor coproc, CRegister crd, const MemOperand& src,
2401                    LFlag l, Condition cond) {
2402  AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src);
2403}
2404
2405void Assembler::ldc(Coprocessor coproc, CRegister crd, Register rn, int option,
2406                    LFlag l, Condition cond) {
2407  // Unindexed addressing.
2408  DCHECK(is_uint8(option));
2409  emit(cond | B27 | B26 | U | l | L | rn.code() * B16 | crd.code() * B12 |
2410       coproc * B8 | (option & 255));
2411}
2412
2413void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src,
2414                     LFlag l) {
2415  ldc(coproc, crd, src, l, kSpecialCondition);
2416}
2417
2418void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option,
2419                     LFlag l) {
2420  ldc(coproc, crd, rn, option, l, kSpecialCondition);
2421}
2422
2423// Support for VFP.
2424
2425void Assembler::vldr(const DwVfpRegister dst, const Register base, int offset,
2426                     const Condition cond) {
2427  // Ddst = MEM(Rbase + offset).
2428  // Instruction details available in ARM DDI 0406C.b, A8-924.
2429  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
2430  // Vd(15-12) | 1011(11-8) | offset
2431  DCHECK(VfpRegisterIsAvailable(dst));
2432  int u = 1;
2433  if (offset < 0) {
2434    CHECK_NE(offset, kMinInt);
2435    offset = -offset;
2436    u = 0;
2437  }
2438  int vd, d;
2439  dst.split_code(&vd, &d);
2440
2441  DCHECK_GE(offset, 0);
2442  if ((offset % 4) == 0 && (offset / 4) < 256) {
2443    emit(cond | 0xD * B24 | u * B23 | d * B22 | B20 | base.code() * B16 |
2444         vd * B12 | 0xB * B8 | ((offset / 4) & 255));
2445  } else {
2446    UseScratchRegisterScope temps(this);
2447    Register scratch = temps.Acquire();
2448    // Larger offsets must be handled by computing the correct address in a
2449    // scratch register.
2450    DCHECK(base != scratch);
2451    if (u == 1) {
2452      add(scratch, base, Operand(offset));
2453    } else {
2454      sub(scratch, base, Operand(offset));
2455    }
2456    emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 |
2457         0xB * B8);
2458  }
2459}
2460
2461void Assembler::vldr(const DwVfpRegister dst, const MemOperand& operand,
2462                     const Condition cond) {
2463  DCHECK(VfpRegisterIsAvailable(dst));
2464  DCHECK(operand.am_ == Offset);
2465  if (operand.rm().is_valid()) {
2466    UseScratchRegisterScope temps(this);
2467    Register scratch = temps.Acquire();
2468    add(scratch, operand.rn(),
2469        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2470    vldr(dst, scratch, 0, cond);
2471  } else {
2472    vldr(dst, operand.rn(), operand.offset(), cond);
2473  }
2474}
2475
2476void Assembler::vldr(const SwVfpRegister dst, const Register base, int offset,
2477                     const Condition cond) {
2478  // Sdst = MEM(Rbase + offset).
2479  // Instruction details available in ARM DDI 0406A, A8-628.
2480  // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
2481  // Vdst(15-12) | 1010(11-8) | offset
2482  int u = 1;
2483  if (offset < 0) {
2484    offset = -offset;
2485    u = 0;
2486  }
2487  int sd, d;
2488  dst.split_code(&sd, &d);
2489  DCHECK_GE(offset, 0);
2490
2491  if ((offset % 4) == 0 && (offset / 4) < 256) {
2492    emit(cond | u * B23 | d * B22 | 0xD1 * B20 | base.code() * B16 | sd * B12 |
2493         0xA * B8 | ((offset / 4) & 255));
2494  } else {
2495    // Larger offsets must be handled by computing the correct address in a
2496    // scratch register.
2497    UseScratchRegisterScope temps(this);
2498    Register scratch = temps.Acquire();
2499    DCHECK(base != scratch);
2500    if (u == 1) {
2501      add(scratch, base, Operand(offset));
2502    } else {
2503      sub(scratch, base, Operand(offset));
2504    }
2505    emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 |
2506         0xA * B8);
2507  }
2508}
2509
2510void Assembler::vldr(const SwVfpRegister dst, const MemOperand& operand,
2511                     const Condition cond) {
2512  DCHECK(operand.am_ == Offset);
2513  if (operand.rm().is_valid()) {
2514    UseScratchRegisterScope temps(this);
2515    Register scratch = temps.Acquire();
2516    add(scratch, operand.rn(),
2517        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2518    vldr(dst, scratch, 0, cond);
2519  } else {
2520    vldr(dst, operand.rn(), operand.offset(), cond);
2521  }
2522}
2523
2524void Assembler::vstr(const DwVfpRegister src, const Register base, int offset,
2525                     const Condition cond) {
2526  // MEM(Rbase + offset) = Dsrc.
2527  // Instruction details available in ARM DDI 0406C.b, A8-1082.
2528  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
2529  // Vd(15-12) | 1011(11-8) | (offset/4)
2530  DCHECK(VfpRegisterIsAvailable(src));
2531  int u = 1;
2532  if (offset < 0) {
2533    CHECK_NE(offset, kMinInt);
2534    offset = -offset;
2535    u = 0;
2536  }
2537  DCHECK_GE(offset, 0);
2538  int vd, d;
2539  src.split_code(&vd, &d);
2540
2541  if ((offset % 4) == 0 && (offset / 4) < 256) {
2542    emit(cond | 0xD * B24 | u * B23 | d * B22 | base.code() * B16 | vd * B12 |
2543         0xB * B8 | ((offset / 4) & 255));
2544  } else {
2545    // Larger offsets must be handled by computing the correct address in the a
2546    // scratch register.
2547    UseScratchRegisterScope temps(this);
2548    Register scratch = temps.Acquire();
2549    DCHECK(base != scratch);
2550    if (u == 1) {
2551      add(scratch, base, Operand(offset));
2552    } else {
2553      sub(scratch, base, Operand(offset));
2554    }
2555    emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 |
2556         0xB * B8);
2557  }
2558}
2559
2560void Assembler::vstr(const DwVfpRegister src, const MemOperand& operand,
2561                     const Condition cond) {
2562  DCHECK(VfpRegisterIsAvailable(src));
2563  DCHECK(operand.am_ == Offset);
2564  if (operand.rm().is_valid()) {
2565    UseScratchRegisterScope temps(this);
2566    Register scratch = temps.Acquire();
2567    add(scratch, operand.rn(),
2568        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2569    vstr(src, scratch, 0, cond);
2570  } else {
2571    vstr(src, operand.rn(), operand.offset(), cond);
2572  }
2573}
2574
2575void Assembler::vstr(const SwVfpRegister src, const Register base, int offset,
2576                     const Condition cond) {
2577  // MEM(Rbase + offset) = SSrc.
2578  // Instruction details available in ARM DDI 0406A, A8-786.
2579  // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
2580  // Vdst(15-12) | 1010(11-8) | (offset/4)
2581  int u = 1;
2582  if (offset < 0) {
2583    CHECK_NE(offset, kMinInt);
2584    offset = -offset;
2585    u = 0;
2586  }
2587  int sd, d;
2588  src.split_code(&sd, &d);
2589  DCHECK_GE(offset, 0);
2590  if ((offset % 4) == 0 && (offset / 4) < 256) {
2591    emit(cond | u * B23 | d * B22 | 0xD0 * B20 | base.code() * B16 | sd * B12 |
2592         0xA * B8 | ((offset / 4) & 255));
2593  } else {
2594    // Larger offsets must be handled by computing the correct address in a
2595    // scratch register.
2596    UseScratchRegisterScope temps(this);
2597    Register scratch = temps.Acquire();
2598    DCHECK(base != scratch);
2599    if (u == 1) {
2600      add(scratch, base, Operand(offset));
2601    } else {
2602      sub(scratch, base, Operand(offset));
2603    }
2604    emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 |
2605         0xA * B8);
2606  }
2607}
2608
2609void Assembler::vstr(const SwVfpRegister src, const MemOperand& operand,
2610                     const Condition cond) {
2611  DCHECK(operand.am_ == Offset);
2612  if (operand.rm().is_valid()) {
2613    UseScratchRegisterScope temps(this);
2614    Register scratch = temps.Acquire();
2615    add(scratch, operand.rn(),
2616        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2617    vstr(src, scratch, 0, cond);
2618  } else {
2619    vstr(src, operand.rn(), operand.offset(), cond);
2620  }
2621}
2622
2623void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first,
2624                     DwVfpRegister last, Condition cond) {
2625  // Instruction details available in ARM DDI 0406C.b, A8-922.
2626  // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2627  // first(15-12) | 1011(11-8) | (count * 2)
2628  DCHECK_LE(first.code(), last.code());
2629  DCHECK(VfpRegisterIsAvailable(last));
2630  DCHECK(am == ia || am == ia_w || am == db_w);
2631  DCHECK(base != pc);
2632
2633  int sd, d;
2634  first.split_code(&sd, &d);
2635  int count = last.code() - first.code() + 1;
2636  DCHECK_LE(count, 16);
2637  emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2638       0xB * B8 | count * 2);
2639}
2640
2641void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first,
2642                     DwVfpRegister last, Condition cond) {
2643  // Instruction details available in ARM DDI 0406C.b, A8-1080.
2644  // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2645  // first(15-12) | 1011(11-8) | (count * 2)
2646  DCHECK_LE(first.code(), last.code());
2647  DCHECK(VfpRegisterIsAvailable(last));
2648  DCHECK(am == ia || am == ia_w || am == db_w);
2649  DCHECK(base != pc);
2650
2651  int sd, d;
2652  first.split_code(&sd, &d);
2653  int count = last.code() - first.code() + 1;
2654  DCHECK_LE(count, 16);
2655  emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2656       0xB * B8 | count * 2);
2657}
2658
2659void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first,
2660                     SwVfpRegister last, Condition cond) {
2661  // Instruction details available in ARM DDI 0406A, A8-626.
2662  // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2663  // first(15-12) | 1010(11-8) | (count/2)
2664  DCHECK_LE(first.code(), last.code());
2665  DCHECK(am == ia || am == ia_w || am == db_w);
2666  DCHECK(base != pc);
2667
2668  int sd, d;
2669  first.split_code(&sd, &d);
2670  int count = last.code() - first.code() + 1;
2671  emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2672       0xA * B8 | count);
2673}
2674
2675void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first,
2676                     SwVfpRegister last, Condition cond) {
2677  // Instruction details available in ARM DDI 0406A, A8-784.
2678  // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2679  // first(15-12) | 1011(11-8) | (count/2)
2680  DCHECK_LE(first.code(), last.code());
2681  DCHECK(am == ia || am == ia_w || am == db_w);
2682  DCHECK(base != pc);
2683
2684  int sd, d;
2685  first.split_code(&sd, &d);
2686  int count = last.code() - first.code() + 1;
2687  emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2688       0xA * B8 | count);
2689}
2690
2691static void DoubleAsTwoUInt32(base::Double d, uint32_t* lo, uint32_t* hi) {
2692  uint64_t i = d.AsUint64();
2693
2694  *lo = i & 0xFFFFFFFF;
2695  *hi = i >> 32;
2696}
2697
2698static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) {
2699  // Integer promotion from uint8_t to int makes these all okay.
2700  *encoding = ((imm & 0x80) << (24 - 7));   // a
2701  *encoding |= ((imm & 0x70) << (16 - 4));  // bcd
2702  *encoding |= (imm & 0x0f);                //  efgh
2703}
2704
2705// This checks if imm can be encoded into an immediate for vmov.
2706// See Table A7-15 in ARM DDI 0406C.d.
2707// Currently only supports the first row and op=0 && cmode=1110.
2708static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
2709  uint32_t lo = imm & 0xFFFFFFFF;
2710  uint32_t hi = imm >> 32;
2711  if ((lo == hi && ((lo & 0xffffff00) == 0))) {
2712    WriteVmovIntImmEncoding(imm & 0xff, encoding);
2713    *cmode = 0;
2714    return true;
2715  } else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) &&
2716             ((lo & 0xff) == (lo >> 24))) {
2717    // Check that all bytes in imm are the same.
2718    WriteVmovIntImmEncoding(imm & 0xff, encoding);
2719    *cmode = 0xe;
2720    return true;
2721  }
2722
2723  return false;
2724}
2725
2726void Assembler::vmov(const DwVfpRegister dst, uint64_t imm) {
2727  uint32_t enc;
2728  uint8_t cmode;
2729  uint8_t op = 0;
2730  if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2731    CpuFeatureScope scope(this, NEON);
2732    // Instruction details available in ARM DDI 0406C.b, A8-937.
2733    // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2734    // | 0(7) | 0(6) | op(5) | 4(1) | imm4(3-0)
2735    int vd, d;
2736    dst.split_code(&vd, &d);
2737    emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2738         op * B5 | 0x1 * B4 | enc);
2739  } else {
2740    UNIMPLEMENTED();
2741  }
2742}
2743
2744void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
2745  uint32_t enc;
2746  uint8_t cmode;
2747  uint8_t op = 0;
2748  if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2749    CpuFeatureScope scope(this, NEON);
2750    // Instruction details available in ARM DDI 0406C.b, A8-937.
2751    // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2752    // | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
2753    int vd, d;
2754    dst.split_code(&vd, &d);
2755    emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2756         0x1 * B6 | op * B5 | 0x1 * B4 | enc);
2757  } else {
2758    UNIMPLEMENTED();
2759  }
2760}
2761
2762// Only works for little endian floating point formats.
2763// We don't support VFP on the mixed endian floating point platform.
2764static bool FitsVmovFPImmediate(base::Double d, uint32_t* encoding) {
2765  // VMOV can accept an immediate of the form:
2766  //
2767  //  +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7
2768  //
2769  // The immediate is encoded using an 8-bit quantity, comprised of two
2770  // 4-bit fields. For an 8-bit immediate of the form:
2771  //
2772  //  [abcdefgh]
2773  //
2774  // where a is the MSB and h is the LSB, an immediate 64-bit double can be
2775  // created of the form:
2776  //
2777  //  [aBbbbbbb,bbcdefgh,00000000,00000000,
2778  //      00000000,00000000,00000000,00000000]
2779  //
2780  // where B = ~b.
2781  //
2782
2783  uint32_t lo, hi;
2784  DoubleAsTwoUInt32(d, &lo, &hi);
2785
2786  // The most obvious constraint is the long block of zeroes.
2787  if ((lo != 0) || ((hi & 0xFFFF) != 0)) {
2788    return false;
2789  }
2790
2791  // Bits 61:54 must be all clear or all set.
2792  if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) {
2793    return false;
2794  }
2795
2796  // Bit 62 must be NOT bit 61.
2797  if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
2798    return false;
2799  }
2800
2801  // Create the encoded immediate in the form:
2802  //  [00000000,0000abcd,00000000,0000efgh]
2803  *encoding = (hi >> 16) & 0xF;       // Low nybble.
2804  *encoding |= (hi >> 4) & 0x70000;   // Low three bits of the high nybble.
2805  *encoding |= (hi >> 12) & 0x80000;  // Top bit of the high nybble.
2806
2807  return true;
2808}
2809
2810void Assembler::vmov(const SwVfpRegister dst, Float32 imm) {
2811  uint32_t enc;
2812  if (CpuFeatures::IsSupported(VFPv3) &&
2813      FitsVmovFPImmediate(base::Double(imm.get_scalar()), &enc)) {
2814    CpuFeatureScope scope(this, VFPv3);
2815    // The float can be encoded in the instruction.
2816    //
2817    // Sd = immediate
2818    // Instruction details available in ARM DDI 0406C.b, A8-936.
2819    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2820    // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
2821    int vd, d;
2822    dst.split_code(&vd, &d);
2823    emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
2824  } else {
2825    UseScratchRegisterScope temps(this);
2826    Register scratch = temps.Acquire();
2827    mov(scratch, Operand(imm.get_bits()));
2828    vmov(dst, scratch);
2829  }
2830}
2831
2832void Assembler::vmov(const DwVfpRegister dst, base::Double imm,
2833                     const Register extra_scratch) {
2834  DCHECK(VfpRegisterIsAvailable(dst));
2835  uint32_t enc;
2836  if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) {
2837    CpuFeatureScope scope(this, VFPv3);
2838    // The double can be encoded in the instruction.
2839    //
2840    // Dd = immediate
2841    // Instruction details available in ARM DDI 0406C.b, A8-936.
2842    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2843    // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
2844    int vd, d;
2845    dst.split_code(&vd, &d);
2846    emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 |
2847         enc);
2848  } else {
2849    // Synthesise the double from ARM immediates.
2850    uint32_t lo, hi;
2851    DoubleAsTwoUInt32(imm, &lo, &hi);
2852    UseScratchRegisterScope temps(this);
2853    Register scratch = temps.Acquire();
2854
2855    if (lo == hi) {
2856      // Move the low and high parts of the double to a D register in one
2857      // instruction.
2858      mov(scratch, Operand(lo));
2859      vmov(dst, scratch, scratch);
2860    } else if (extra_scratch == no_reg) {
2861      // We only have one spare scratch register.
2862      mov(scratch, Operand(lo));
2863      vmov(NeonS32, dst, 0, scratch);
2864      if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) {
2865        CpuFeatureScope scope(this, ARMv7);
2866        movt(scratch, hi >> 16);
2867      } else {
2868        mov(scratch, Operand(hi));
2869      }
2870      vmov(NeonS32, dst, 1, scratch);
2871    } else {
2872      // Move the low and high parts of the double to a D register in one
2873      // instruction.
2874      mov(scratch, Operand(lo));
2875      mov(extra_scratch, Operand(hi));
2876      vmov(dst, scratch, extra_scratch);
2877    }
2878  }
2879}
2880
2881void Assembler::vmov(const SwVfpRegister dst, const SwVfpRegister src,
2882                     const Condition cond) {
2883  // Sd = Sm
2884  // Instruction details available in ARM DDI 0406B, A8-642.
2885  int sd, d, sm, m;
2886  dst.split_code(&sd, &d);
2887  src.split_code(&sm, &m);
2888  emit(cond | 0xE * B24 | d * B22 | 0xB * B20 | sd * B12 | 0xA * B8 | B6 |
2889       m * B5 | sm);
2890}
2891
2892void Assembler::vmov(const DwVfpRegister dst, const DwVfpRegister src,
2893                     const Condition cond) {
2894  // Dd = Dm
2895  // Instruction details available in ARM DDI 0406C.b, A8-938.
2896  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
2897  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2898  DCHECK(VfpRegisterIsAvailable(dst));
2899  DCHECK(VfpRegisterIsAvailable(src));
2900  int vd, d;
2901  dst.split_code(&vd, &d);
2902  int vm, m;
2903  src.split_code(&vm, &m);
2904  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B6 |
2905       m * B5 | vm);
2906}
2907
2908void Assembler::vmov(const DwVfpRegister dst, const Register src1,
2909                     const Register src2, const Condition cond) {
2910  // Dm = <Rt,Rt2>.
2911  // Instruction details available in ARM DDI 0406C.b, A8-948.
2912  // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
2913  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2914  DCHECK(VfpRegisterIsAvailable(dst));
2915  DCHECK(src1 != pc && src2 != pc);
2916  int vm, m;
2917  dst.split_code(&vm, &m);
2918  emit(cond | 0xC * B24 | B22 | src2.code() * B16 | src1.code() * B12 |
2919       0xB * B8 | m * B5 | B4 | vm);
2920}
2921
2922void Assembler::vmov(const Register dst1, const Register dst2,
2923                     const DwVfpRegister src, const Condition cond) {
2924  // <Rt,Rt2> = Dm.
2925  // Instruction details available in ARM DDI 0406C.b, A8-948.
2926  // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
2927  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2928  DCHECK(VfpRegisterIsAvailable(src));
2929  DCHECK(dst1 != pc && dst2 != pc);
2930  int vm, m;
2931  src.split_code(&vm, &m);
2932  emit(cond | 0xC * B24 | B22 | B20 | dst2.code() * B16 | dst1.code() * B12 |
2933       0xB * B8 | m * B5 | B4 | vm);
2934}
2935
2936void Assembler::vmov(const SwVfpRegister dst, const Register src,
2937                     const Condition cond) {
2938  // Sn = Rt.
2939  // Instruction details available in ARM DDI 0406A, A8-642.
2940  // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
2941  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2942  DCHECK(src != pc);
2943  int sn, n;
2944  dst.split_code(&sn, &n);
2945  emit(cond | 0xE * B24 | sn * B16 | src.code() * B12 | 0xA * B8 | n * B7 | B4);
2946}
2947
2948void Assembler::vmov(const Register dst, const SwVfpRegister src,
2949                     const Condition cond) {
2950  // Rt = Sn.
2951  // Instruction details available in ARM DDI 0406A, A8-642.
2952  // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
2953  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2954  DCHECK(dst != pc);
2955  int sn, n;
2956  src.split_code(&sn, &n);
2957  emit(cond | 0xE * B24 | B20 | sn * B16 | dst.code() * B12 | 0xA * B8 |
2958       n * B7 | B4);
2959}
2960
2961// Type of data to read from or write to VFP register.
2962// Used as specifier in generic vcvt instruction.
2963enum VFPType { S32, U32, F32, F64 };
2964
2965static bool IsSignedVFPType(VFPType type) {
2966  switch (type) {
2967    case S32:
2968      return true;
2969    case U32:
2970      return false;
2971    default:
2972      UNREACHABLE();
2973  }
2974}
2975
2976static bool IsIntegerVFPType(VFPType type) {
2977  switch (type) {
2978    case S32:
2979    case U32:
2980      return true;
2981    case F32:
2982    case F64:
2983      return false;
2984    default:
2985      UNREACHABLE();
2986  }
2987}
2988
2989static bool IsDoubleVFPType(VFPType type) {
2990  switch (type) {
2991    case F32:
2992      return false;
2993    case F64:
2994      return true;
2995    default:
2996      UNREACHABLE();
2997  }
2998}
2999
3000// Split five bit reg_code based on size of reg_type.
3001//  32-bit register codes are Vm:M
3002//  64-bit register codes are M:Vm
3003// where Vm is four bits, and M is a single bit.
3004static void SplitRegCode(VFPType reg_type, int reg_code, int* vm, int* m) {
3005  DCHECK((reg_code >= 0) && (reg_code <= 31));
3006  if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
3007    SwVfpRegister::split_code(reg_code, vm, m);
3008  } else {
3009    DwVfpRegister::split_code(reg_code, vm, m);
3010  }
3011}
3012
3013// Encode vcvt.src_type.dst_type instruction.
3014static Instr EncodeVCVT(const VFPType dst_type, const int dst_code,
3015                        const VFPType src_type, const int src_code,
3016                        VFPConversionMode mode, const Condition cond) {
3017  DCHECK(src_type != dst_type);
3018  int D, Vd, M, Vm;
3019  SplitRegCode(src_type, src_code, &Vm, &M);
3020  SplitRegCode(dst_type, dst_code, &Vd, &D);
3021
3022  if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) {
3023    // Conversion between IEEE floating point and 32-bit integer.
3024    // Instruction details available in ARM DDI 0406B, A8.6.295.
3025    // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
3026    // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3027    DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
3028
3029    int sz, opc2, op;
3030
3031    if (IsIntegerVFPType(dst_type)) {
3032      opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4;
3033      sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3034      op = mode;
3035    } else {
3036      DCHECK(IsIntegerVFPType(src_type));
3037      opc2 = 0x0;
3038      sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
3039      op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
3040    }
3041
3042    return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | B19 | opc2 * B16 |
3043            Vd * B12 | 0x5 * B9 | sz * B8 | op * B7 | B6 | M * B5 | Vm);
3044  } else {
3045    // Conversion between IEEE double and single precision.
3046    // Instruction details available in ARM DDI 0406B, A8.6.298.
3047    // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) |
3048    // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3049    int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3050    return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | 0x7 * B16 |
3051            Vd * B12 | 0x5 * B9 | sz * B8 | B7 | B6 | M * B5 | Vm);
3052  }
3053}
3054
3055void Assembler::vcvt_f64_s32(const DwVfpRegister dst, const SwVfpRegister src,
3056                             VFPConversionMode mode, const Condition cond) {
3057  DCHECK(VfpRegisterIsAvailable(dst));
3058  emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
3059}
3060
3061void Assembler::vcvt_f32_s32(const SwVfpRegister dst, const SwVfpRegister src,
3062                             VFPConversionMode mode, const Condition cond) {
3063  emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
3064}
3065
3066void Assembler::vcvt_f64_u32(const DwVfpRegister dst, const SwVfpRegister src,
3067                             VFPConversionMode mode, const Condition cond) {
3068  DCHECK(VfpRegisterIsAvailable(dst));
3069  emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
3070}
3071
3072void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src,
3073                             VFPConversionMode mode, const Condition cond) {
3074  emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond));
3075}
3076
3077void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3078                             VFPConversionMode mode, const Condition cond) {
3079  emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond));
3080}
3081
3082void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3083                             VFPConversionMode mode, const Condition cond) {
3084  emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond));
3085}
3086
3087void Assembler::vcvt_s32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3088                             VFPConversionMode mode, const Condition cond) {
3089  DCHECK(VfpRegisterIsAvailable(src));
3090  emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
3091}
3092
3093void Assembler::vcvt_u32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3094                             VFPConversionMode mode, const Condition cond) {
3095  DCHECK(VfpRegisterIsAvailable(src));
3096  emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
3097}
3098
3099void Assembler::vcvt_f64_f32(const DwVfpRegister dst, const SwVfpRegister src,
3100                             VFPConversionMode mode, const Condition cond) {
3101  DCHECK(VfpRegisterIsAvailable(dst));
3102  emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
3103}
3104
3105void Assembler::vcvt_f32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3106                             VFPConversionMode mode, const Condition cond) {
3107  DCHECK(VfpRegisterIsAvailable(src));
3108  emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
3109}
3110
3111void Assembler::vcvt_f64_s32(const DwVfpRegister dst, int fraction_bits,
3112                             const Condition cond) {
3113  // Instruction details available in ARM DDI 0406C.b, A8-874.
3114  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
3115  // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
3116  DCHECK(IsEnabled(VFPv3));
3117  DCHECK(VfpRegisterIsAvailable(dst));
3118  DCHECK(fraction_bits > 0 && fraction_bits <= 32);
3119  int vd, d;
3120  dst.split_code(&vd, &d);
3121  int imm5 = 32 - fraction_bits;
3122  int i = imm5 & 1;
3123  int imm4 = (imm5 >> 1) & 0xF;
3124  emit(cond | 0xE * B24 | B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3125       vd * B12 | 0x5 * B9 | B8 | B7 | B6 | i * B5 | imm4);
3126}
3127
3128void Assembler::vneg(const DwVfpRegister dst, const DwVfpRegister src,
3129                     const Condition cond) {
3130  // Instruction details available in ARM DDI 0406C.b, A8-968.
3131  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3132  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3133  DCHECK(VfpRegisterIsAvailable(dst));
3134  DCHECK(VfpRegisterIsAvailable(src));
3135  int vd, d;
3136  dst.split_code(&vd, &d);
3137  int vm, m;
3138  src.split_code(&vm, &m);
3139
3140  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3141       B8 | B6 | m * B5 | vm);
3142}
3143
3144void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src,
3145                     const Condition cond) {
3146  // Instruction details available in ARM DDI 0406C.b, A8-968.
3147  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3148  // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3149  int vd, d;
3150  dst.split_code(&vd, &d);
3151  int vm, m;
3152  src.split_code(&vm, &m);
3153
3154  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3155       B6 | m * B5 | vm);
3156}
3157
3158void Assembler::vabs(const DwVfpRegister dst, const DwVfpRegister src,
3159                     const Condition cond) {
3160  // Instruction details available in ARM DDI 0406C.b, A8-524.
3161  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3162  // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3163  DCHECK(VfpRegisterIsAvailable(dst));
3164  DCHECK(VfpRegisterIsAvailable(src));
3165  int vd, d;
3166  dst.split_code(&vd, &d);
3167  int vm, m;
3168  src.split_code(&vm, &m);
3169  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B7 |
3170       B6 | m * B5 | vm);
3171}
3172
3173void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src,
3174                     const Condition cond) {
3175  // Instruction details available in ARM DDI 0406C.b, A8-524.
3176  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3177  // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3178  int vd, d;
3179  dst.split_code(&vd, &d);
3180  int vm, m;
3181  src.split_code(&vm, &m);
3182  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 |
3183       m * B5 | vm);
3184}
3185
3186void Assembler::vadd(const DwVfpRegister dst, const DwVfpRegister src1,
3187                     const DwVfpRegister src2, const Condition cond) {
3188  // Dd = vadd(Dn, Dm) double precision floating point addition.
3189  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3190  // Instruction details available in ARM DDI 0406C.b, A8-830.
3191  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3192  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3193  DCHECK(VfpRegisterIsAvailable(dst));
3194  DCHECK(VfpRegisterIsAvailable(src1));
3195  DCHECK(VfpRegisterIsAvailable(src2));
3196  int vd, d;
3197  dst.split_code(&vd, &d);
3198  int vn, n;
3199  src1.split_code(&vn, &n);
3200  int vm, m;
3201  src2.split_code(&vm, &m);
3202  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3203       0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3204}
3205
3206void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1,
3207                     const SwVfpRegister src2, const Condition cond) {
3208  // Sd = vadd(Sn, Sm) single precision floating point addition.
3209  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3210  // Instruction details available in ARM DDI 0406C.b, A8-830.
3211  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3212  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3213  int vd, d;
3214  dst.split_code(&vd, &d);
3215  int vn, n;
3216  src1.split_code(&vn, &n);
3217  int vm, m;
3218  src2.split_code(&vm, &m);
3219  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3220       0x5 * B9 | n * B7 | m * B5 | vm);
3221}
3222
3223void Assembler::vsub(const DwVfpRegister dst, const DwVfpRegister src1,
3224                     const DwVfpRegister src2, const Condition cond) {
3225  // Dd = vsub(Dn, Dm) double precision floating point subtraction.
3226  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3227  // Instruction details available in ARM DDI 0406C.b, A8-1086.
3228  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3229  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3230  DCHECK(VfpRegisterIsAvailable(dst));
3231  DCHECK(VfpRegisterIsAvailable(src1));
3232  DCHECK(VfpRegisterIsAvailable(src2));
3233  int vd, d;
3234  dst.split_code(&vd, &d);
3235  int vn, n;
3236  src1.split_code(&vn, &n);
3237  int vm, m;
3238  src2.split_code(&vm, &m);
3239  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3240       0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3241}
3242
3243void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1,
3244                     const SwVfpRegister src2, const Condition cond) {
3245  // Sd = vsub(Sn, Sm) single precision floating point subtraction.
3246  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3247  // Instruction details available in ARM DDI 0406C.b, A8-1086.
3248  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3249  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3250  int vd, d;
3251  dst.split_code(&vd, &d);
3252  int vn, n;
3253  src1.split_code(&vn, &n);
3254  int vm, m;
3255  src2.split_code(&vm, &m);
3256  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3257       0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3258}
3259
3260void Assembler::vmul(const DwVfpRegister dst, const DwVfpRegister src1,
3261                     const DwVfpRegister src2, const Condition cond) {
3262  // Dd = vmul(Dn, Dm) double precision floating point multiplication.
3263  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3264  // Instruction details available in ARM DDI 0406C.b, A8-960.
3265  // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3266  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3267  DCHECK(VfpRegisterIsAvailable(dst));
3268  DCHECK(VfpRegisterIsAvailable(src1));
3269  DCHECK(VfpRegisterIsAvailable(src2));
3270  int vd, d;
3271  dst.split_code(&vd, &d);
3272  int vn, n;
3273  src1.split_code(&vn, &n);
3274  int vm, m;
3275  src2.split_code(&vm, &m);
3276  emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3277       0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3278}
3279
3280void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1,
3281                     const SwVfpRegister src2, const Condition cond) {
3282  // Sd = vmul(Sn, Sm) single precision floating point multiplication.
3283  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3284  // Instruction details available in ARM DDI 0406C.b, A8-960.
3285  // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3286  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3287  int vd, d;
3288  dst.split_code(&vd, &d);
3289  int vn, n;
3290  src1.split_code(&vn, &n);
3291  int vm, m;
3292  src2.split_code(&vm, &m);
3293  emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3294       0x5 * B9 | n * B7 | m * B5 | vm);
3295}
3296
3297void Assembler::vmla(const DwVfpRegister dst, const DwVfpRegister src1,
3298                     const DwVfpRegister src2, const Condition cond) {
3299  // Instruction details available in ARM DDI 0406C.b, A8-932.
3300  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3301  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3302  DCHECK(VfpRegisterIsAvailable(dst));
3303  DCHECK(VfpRegisterIsAvailable(src1));
3304  DCHECK(VfpRegisterIsAvailable(src2));
3305  int vd, d;
3306  dst.split_code(&vd, &d);
3307  int vn, n;
3308  src1.split_code(&vn, &n);
3309  int vm, m;
3310  src2.split_code(&vm, &m);
3311  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3312       n * B7 | m * B5 | vm);
3313}
3314
3315void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1,
3316                     const SwVfpRegister src2, const Condition cond) {
3317  // Instruction details available in ARM DDI 0406C.b, A8-932.
3318  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3319  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3320  int vd, d;
3321  dst.split_code(&vd, &d);
3322  int vn, n;
3323  src1.split_code(&vn, &n);
3324  int vm, m;
3325  src2.split_code(&vm, &m);
3326  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3327       m * B5 | vm);
3328}
3329
3330void Assembler::vmls(const DwVfpRegister dst, const DwVfpRegister src1,
3331                     const DwVfpRegister src2, const Condition cond) {
3332  // Instruction details available in ARM DDI 0406C.b, A8-932.
3333  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3334  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3335  DCHECK(VfpRegisterIsAvailable(dst));
3336  DCHECK(VfpRegisterIsAvailable(src1));
3337  DCHECK(VfpRegisterIsAvailable(src2));
3338  int vd, d;
3339  dst.split_code(&vd, &d);
3340  int vn, n;
3341  src1.split_code(&vn, &n);
3342  int vm, m;
3343  src2.split_code(&vm, &m);
3344  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3345       n * B7 | B6 | m * B5 | vm);
3346}
3347
3348void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1,
3349                     const SwVfpRegister src2, const Condition cond) {
3350  // Instruction details available in ARM DDI 0406C.b, A8-932.
3351  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3352  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3353  int vd, d;
3354  dst.split_code(&vd, &d);
3355  int vn, n;
3356  src1.split_code(&vn, &n);
3357  int vm, m;
3358  src2.split_code(&vm, &m);
3359  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3360       B6 | m * B5 | vm);
3361}
3362
3363void Assembler::vdiv(const DwVfpRegister dst, const DwVfpRegister src1,
3364                     const DwVfpRegister src2, const Condition cond) {
3365  // Dd = vdiv(Dn, Dm) double precision floating point division.
3366  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3367  // Instruction details available in ARM DDI 0406C.b, A8-882.
3368  // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3369  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3370  DCHECK(VfpRegisterIsAvailable(dst));
3371  DCHECK(VfpRegisterIsAvailable(src1));
3372  DCHECK(VfpRegisterIsAvailable(src2));
3373  int vd, d;
3374  dst.split_code(&vd, &d);
3375  int vn, n;
3376  src1.split_code(&vn, &n);
3377  int vm, m;
3378  src2.split_code(&vm, &m);
3379  emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3380       n * B7 | m * B5 | vm);
3381}
3382
3383void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1,
3384                     const SwVfpRegister src2, const Condition cond) {
3385  // Sd = vdiv(Sn, Sm) single precision floating point division.
3386  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3387  // Instruction details available in ARM DDI 0406C.b, A8-882.
3388  // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3389  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3390  int vd, d;
3391  dst.split_code(&vd, &d);
3392  int vn, n;
3393  src1.split_code(&vn, &n);
3394  int vm, m;
3395  src2.split_code(&vm, &m);
3396  emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3397       m * B5 | vm);
3398}
3399
3400void Assembler::vcmp(const DwVfpRegister src1, const DwVfpRegister src2,
3401                     const Condition cond) {
3402  // vcmp(Dd, Dm) double precision floating point comparison.
3403  // Instruction details available in ARM DDI 0406C.b, A8-864.
3404  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3405  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3406  DCHECK(VfpRegisterIsAvailable(src1));
3407  DCHECK(VfpRegisterIsAvailable(src2));
3408  int vd, d;
3409  src1.split_code(&vd, &d);
3410  int vm, m;
3411  src2.split_code(&vm, &m);
3412  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3413       0x5 * B9 | B8 | B6 | m * B5 | vm);
3414}
3415
3416void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2,
3417                     const Condition cond) {
3418  // vcmp(Sd, Sm) single precision floating point comparison.
3419  // Instruction details available in ARM DDI 0406C.b, A8-864.
3420  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3421  // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3422  int vd, d;
3423  src1.split_code(&vd, &d);
3424  int vm, m;
3425  src2.split_code(&vm, &m);
3426  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3427       0x5 * B9 | B6 | m * B5 | vm);
3428}
3429
3430void Assembler::vcmp(const DwVfpRegister src1, const double src2,
3431                     const Condition cond) {
3432  // vcmp(Dd, #0.0) double precision floating point comparison.
3433  // Instruction details available in ARM DDI 0406C.b, A8-864.
3434  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3435  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3436  DCHECK(VfpRegisterIsAvailable(src1));
3437  DCHECK_EQ(src2, 0.0);
3438  int vd, d;
3439  src1.split_code(&vd, &d);
3440  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3441       0x5 * B9 | B8 | B6);
3442}
3443
3444void Assembler::vcmp(const SwVfpRegister src1, const float src2,
3445                     const Condition cond) {
3446  // vcmp(Sd, #0.0) single precision floating point comparison.
3447  // Instruction details available in ARM DDI 0406C.b, A8-864.
3448  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3449  // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3450  DCHECK_EQ(src2, 0.0);
3451  int vd, d;
3452  src1.split_code(&vd, &d);
3453  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3454       0x5 * B9 | B6);
3455}
3456
3457void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1,
3458                       const DwVfpRegister src2) {
3459  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3460  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3461  DCHECK(IsEnabled(ARMv8));
3462  int vd, d;
3463  dst.split_code(&vd, &d);
3464  int vn, n;
3465  src1.split_code(&vn, &n);
3466  int vm, m;
3467  src2.split_code(&vm, &m);
3468
3469  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3470       0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3471}
3472
3473void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1,
3474                       const SwVfpRegister src2) {
3475  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3476  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3477  DCHECK(IsEnabled(ARMv8));
3478  int vd, d;
3479  dst.split_code(&vd, &d);
3480  int vn, n;
3481  src1.split_code(&vn, &n);
3482  int vm, m;
3483  src2.split_code(&vm, &m);
3484
3485  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3486       0x5 * B9 | n * B7 | m * B5 | vm);
3487}
3488
3489void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1,
3490                       const DwVfpRegister src2) {
3491  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3492  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3493  DCHECK(IsEnabled(ARMv8));
3494  int vd, d;
3495  dst.split_code(&vd, &d);
3496  int vn, n;
3497  src1.split_code(&vn, &n);
3498  int vm, m;
3499  src2.split_code(&vm, &m);
3500
3501  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3502       0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3503}
3504
3505void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1,
3506                       const SwVfpRegister src2) {
3507  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3508  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3509  DCHECK(IsEnabled(ARMv8));
3510  int vd, d;
3511  dst.split_code(&vd, &d);
3512  int vn, n;
3513  src1.split_code(&vn, &n);
3514  int vm, m;
3515  src2.split_code(&vm, &m);
3516
3517  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3518       0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3519}
3520
3521void Assembler::vsel(Condition cond, const DwVfpRegister dst,
3522                     const DwVfpRegister src1, const DwVfpRegister src2) {
3523  // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3524  // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) |
3525  // 0(6) | M(5) | 0(4) | Vm(3-0)
3526  DCHECK(IsEnabled(ARMv8));
3527  int vd, d;
3528  dst.split_code(&vd, &d);
3529  int vn, n;
3530  src1.split_code(&vn, &n);
3531  int vm, m;
3532  src2.split_code(&vm, &m);
3533  int sz = 1;
3534
3535  // VSEL has a special (restricted) condition encoding.
3536  //   eq(0b0000)... -> 0b00
3537  //   ge(0b1010)... -> 0b10
3538  //   gt(0b1100)... -> 0b11
3539  //   vs(0b0110)... -> 0b01
3540  // No other conditions are supported.
3541  int vsel_cond = (cond >> 30) & 0x3;
3542  if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3543    // We can implement some other conditions by swapping the inputs.
3544    DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3545    std::swap(vn, vm);
3546    std::swap(n, m);
3547  }
3548
3549  emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3550       vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3551}
3552
3553void Assembler::vsel(Condition cond, const SwVfpRegister dst,
3554                     const SwVfpRegister src1, const SwVfpRegister src2) {
3555  // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3556  // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) |
3557  // 0(6) | M(5) | 0(4) | Vm(3-0)
3558  DCHECK(IsEnabled(ARMv8));
3559  int vd, d;
3560  dst.split_code(&vd, &d);
3561  int vn, n;
3562  src1.split_code(&vn, &n);
3563  int vm, m;
3564  src2.split_code(&vm, &m);
3565  int sz = 0;
3566
3567  // VSEL has a special (restricted) condition encoding.
3568  //   eq(0b0000)... -> 0b00
3569  //   ge(0b1010)... -> 0b10
3570  //   gt(0b1100)... -> 0b11
3571  //   vs(0b0110)... -> 0b01
3572  // No other conditions are supported.
3573  int vsel_cond = (cond >> 30) & 0x3;
3574  if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3575    // We can implement some other conditions by swapping the inputs.
3576    DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3577    std::swap(vn, vm);
3578    std::swap(n, m);
3579  }
3580
3581  emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3582       vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3583}
3584
3585void Assembler::vsqrt(const DwVfpRegister dst, const DwVfpRegister src,
3586                      const Condition cond) {
3587  // Instruction details available in ARM DDI 0406C.b, A8-1058.
3588  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3589  // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3590  DCHECK(VfpRegisterIsAvailable(dst));
3591  DCHECK(VfpRegisterIsAvailable(src));
3592  int vd, d;
3593  dst.split_code(&vd, &d);
3594  int vm, m;
3595  src.split_code(&vm, &m);
3596  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3597       B8 | 0x3 * B6 | m * B5 | vm);
3598}
3599
3600void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src,
3601                      const Condition cond) {
3602  // Instruction details available in ARM DDI 0406C.b, A8-1058.
3603  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3604  // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3605  int vd, d;
3606  dst.split_code(&vd, &d);
3607  int vm, m;
3608  src.split_code(&vm, &m);
3609  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3610       0x3 * B6 | m * B5 | vm);
3611}
3612
3613void Assembler::vmsr(Register dst, Condition cond) {
3614  // Instruction details available in ARM DDI 0406A, A8-652.
3615  // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
3616  // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3617  emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3618}
3619
3620void Assembler::vmrs(Register dst, Condition cond) {
3621  // Instruction details available in ARM DDI 0406A, A8-652.
3622  // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
3623  // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3624  emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3625}
3626
3627void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) {
3628  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3629  // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3630  // M(5) | 0(4) | Vm(3-0)
3631  DCHECK(IsEnabled(ARMv8));
3632  int vd, d;
3633  dst.split_code(&vd, &d);
3634  int vm, m;
3635  src.split_code(&vm, &m);
3636  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3637       0x5 * B9 | B6 | m * B5 | vm);
3638}
3639
3640void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) {
3641  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3642  // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3643  // M(5) | 0(4) | Vm(3-0)
3644  DCHECK(IsEnabled(ARMv8));
3645  int vd, d;
3646  dst.split_code(&vd, &d);
3647  int vm, m;
3648  src.split_code(&vm, &m);
3649  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3650       0x5 * B9 | B8 | B6 | m * B5 | vm);
3651}
3652
3653void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) {
3654  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3655  // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3656  // M(5) | 0(4) | Vm(3-0)
3657  DCHECK(IsEnabled(ARMv8));
3658  int vd, d;
3659  dst.split_code(&vd, &d);
3660  int vm, m;
3661  src.split_code(&vm, &m);
3662  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3663       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3664}
3665
3666void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) {
3667  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3668  // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3669  // M(5) | 0(4) | Vm(3-0)
3670  DCHECK(IsEnabled(ARMv8));
3671  int vd, d;
3672  dst.split_code(&vd, &d);
3673  int vm, m;
3674  src.split_code(&vm, &m);
3675  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3676       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3677}
3678
3679void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) {
3680  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3681  // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3682  // M(5) | 0(4) | Vm(3-0)
3683  DCHECK(IsEnabled(ARMv8));
3684  int vd, d;
3685  dst.split_code(&vd, &d);
3686  int vm, m;
3687  src.split_code(&vm, &m);
3688  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3689       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3690}
3691
3692void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
3693  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3694  // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3695  // M(5) | 0(4) | Vm(3-0)
3696  DCHECK(IsEnabled(ARMv8));
3697  int vd, d;
3698  dst.split_code(&vd, &d);
3699  int vm, m;
3700  src.split_code(&vm, &m);
3701  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3702       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3703}
3704
3705void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
3706  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3707  // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3708  // M(5) | 0(4) | Vm(3-0)
3709  DCHECK(IsEnabled(ARMv8));
3710  int vd, d;
3711  dst.split_code(&vd, &d);
3712  int vm, m;
3713  src.split_code(&vm, &m);
3714  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3715       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3716}
3717
3718void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
3719  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3720  // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3721  // M(5) | 0(4) | Vm(3-0)
3722  DCHECK(IsEnabled(ARMv8));
3723  int vd, d;
3724  dst.split_code(&vd, &d);
3725  int vm, m;
3726  src.split_code(&vm, &m);
3727  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3728       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3729}
3730
3731void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src,
3732                       const Condition cond) {
3733  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3734  // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3735  DCHECK(IsEnabled(ARMv8));
3736  int vd, d;
3737  dst.split_code(&vd, &d);
3738  int vm, m;
3739  src.split_code(&vm, &m);
3740  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3741       0x5 * B9 | B7 | B6 | m * B5 | vm);
3742}
3743
3744void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src,
3745                       const Condition cond) {
3746  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3747  // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3748  DCHECK(IsEnabled(ARMv8));
3749  int vd, d;
3750  dst.split_code(&vd, &d);
3751  int vm, m;
3752  src.split_code(&vm, &m);
3753  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3754       0x5 * B9 | B8 | B7 | B6 | m * B5 | vm);
3755}
3756
3757// Support for NEON.
3758
3759void Assembler::vld1(NeonSize size, const NeonListOperand& dst,
3760                     const NeonMemOperand& src) {
3761  // Instruction details available in ARM DDI 0406C.b, A8.8.320.
3762  // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3763  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3764  DCHECK(IsEnabled(NEON));
3765  int vd, d;
3766  dst.base().split_code(&vd, &d);
3767  emit(0xFU * B28 | 4 * B24 | d * B22 | 2 * B20 | src.rn().code() * B16 |
3768       vd * B12 | dst.type() * B8 | size * B6 | src.align() * B4 |
3769       src.rm().code());
3770}
3771
3772// vld1s(ingle element to one lane).
3773void Assembler::vld1s(NeonSize size, const NeonListOperand& dst, uint8_t index,
3774                      const NeonMemOperand& src) {
3775  // Instruction details available in ARM DDI 0406C.b, A8.8.322.
3776  // 1111(31-28) | 01001(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3777  // Vd(15-12) | size(11-10) | index_align(7-4) | Rm(3-0)
3778  // See vld1 (single element to all lanes) if size == 0x3, implemented as
3779  // vld1r(eplicate).
3780  DCHECK_NE(size, 0x3);
3781  // Check for valid lane indices.
3782  DCHECK_GT(1 << (3 - size), index);
3783  // Specifying alignment not supported, use standard alignment.
3784  uint8_t index_align = index << (size + 1);
3785
3786  DCHECK(IsEnabled(NEON));
3787  int vd, d;
3788  dst.base().split_code(&vd, &d);
3789  emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3790       src.rn().code() * B16 | vd * B12 | size * B10 | index_align * B4 |
3791       src.rm().code());
3792}
3793
3794// vld1r(eplicate)
3795void Assembler::vld1r(NeonSize size, const NeonListOperand& dst,
3796                      const NeonMemOperand& src) {
3797  DCHECK(IsEnabled(NEON));
3798  int vd, d;
3799  dst.base().split_code(&vd, &d);
3800  emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3801       src.rn().code() * B16 | vd * B12 | 0xC * B8 | size * B6 |
3802       dst.length() * B5 | src.rm().code());
3803}
3804
3805void Assembler::vst1(NeonSize size, const NeonListOperand& src,
3806                     const NeonMemOperand& dst) {
3807  // Instruction details available in ARM DDI 0406C.b, A8.8.404.
3808  // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
3809  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3810  DCHECK(IsEnabled(NEON));
3811  int vd, d;
3812  src.base().split_code(&vd, &d);
3813  emit(0xFU * B28 | 4 * B24 | d * B22 | dst.rn().code() * B16 | vd * B12 |
3814       src.type() * B8 | size * B6 | dst.align() * B4 | dst.rm().code());
3815}
3816
3817void Assembler::vst1s(NeonSize size, const NeonListOperand& src, uint8_t index,
3818                      const NeonMemOperand& dst) {
3819  // Instruction details available in ARM DDI 0487F.b F6.1.236.
3820  // 1111(31-28) | 01001(27-23) | D(22) | 00(21-20) | Rn(19-16) |
3821  // Vd(15-12) | size(11-10) | 00(9-8) | index_align(7-4) | Rm(3-0)
3822  DCHECK(IsEnabled(NEON));
3823  DCHECK_NE(size, 0x3);
3824  DCHECK_GT(1 << (3 - size), index);
3825  // Specifying alignment not supported, use standard alignment.
3826  uint8_t index_align = index << (size + 1);
3827  int vd, d;
3828  src.base().split_code(&vd, &d);
3829  emit(0xFU * B28 | 9 * B23 | d * B22 | dst.rn().code() * B16 | vd * B12 |
3830       size * B10 | index_align * B4 | dst.rm().code());
3831}
3832
3833void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
3834  // Instruction details available in ARM DDI 0406C.b, A8.8.346.
3835  // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
3836  // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
3837  DCHECK(IsEnabled(NEON));
3838  int vd, d;
3839  dst.split_code(&vd, &d);
3840  int vm, m;
3841  src.split_code(&vm, &m);
3842  int U = NeonU(dt);
3843  int imm3 = 1 << NeonSz(dt);
3844  emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
3845       0xA * B8 | m * B5 | B4 | vm);
3846}
3847
3848void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
3849                       DwVfpRegister dst, QwNeonRegister src) {
3850  // Instruction details available in ARM DDI 0406C.b, A8.8.1004.
3851  // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
3852  // vqmovun.<type><size> Dd, Qm. Same as above, but produces unsigned results.
3853  DCHECK(IsEnabled(NEON));
3854  DCHECK_IMPLIES(NeonU(src_dt), NeonU(dst_dt));
3855  int vd, d;
3856  dst.split_code(&vd, &d);
3857  int vm, m;
3858  src.split_code(&vm, &m);
3859  int size = NeonSz(dst_dt);
3860  DCHECK_NE(3, size);
3861  int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
3862  emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
3863       0x2 * B8 | op * B6 | m * B5 | vm);
3864}
3865
3866static int EncodeScalar(NeonDataType dt, int index) {
3867  int opc1_opc2 = 0;
3868  DCHECK_LE(0, index);
3869  switch (dt) {
3870    case NeonS8:
3871    case NeonU8:
3872      DCHECK_GT(8, index);
3873      opc1_opc2 = 0x8 | index;
3874      break;
3875    case NeonS16:
3876    case NeonU16:
3877      DCHECK_GT(4, index);
3878      opc1_opc2 = 0x1 | (index << 1);
3879      break;
3880    case NeonS32:
3881    case NeonU32:
3882      DCHECK_GT(2, index);
3883      opc1_opc2 = index << 2;
3884      break;
3885    default:
3886      UNREACHABLE();
3887  }
3888  return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
3889}
3890
3891void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
3892                     Register src) {
3893  // Instruction details available in ARM DDI 0406C.b, A8.8.940.
3894  // vmov ARM core register to scalar.
3895  DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3896  int vd, d;
3897  dst.split_code(&vd, &d);
3898  int opc1_opc2 = EncodeScalar(dt, index);
3899  emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
3900       opc1_opc2);
3901}
3902
3903void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
3904                     int index) {
3905  // Instruction details available in ARM DDI 0406C.b, A8.8.942.
3906  // vmov Arm scalar to core register.
3907  DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3908  int vn, n;
3909  src.split_code(&vn, &n);
3910  int opc1_opc2 = EncodeScalar(dt, index);
3911  // NeonS32 and NeonU32 both encoded as u = 0.
3912  int u = NeonDataTypeToSize(dt) == Neon32 ? 0 : NeonU(dt);
3913  emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
3914       n * B7 | B4 | opc1_opc2);
3915}
3916
3917void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) {
3918  // Instruction details available in ARM DDI 0406C.b, A8-938.
3919  // vmov is encoded as vorr.
3920  vorr(dst, src, src);
3921}
3922
3923void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) {
3924  DCHECK(IsEnabled(NEON));
3925  // Instruction details available in ARM DDI 0406C.b, A8-886.
3926  int B = 0, E = 0;
3927  switch (size) {
3928    case Neon8:
3929      B = 1;
3930      break;
3931    case Neon16:
3932      E = 1;
3933      break;
3934    case Neon32:
3935      break;
3936    default:
3937      UNREACHABLE();
3938  }
3939  int vd, d;
3940  dst.split_code(&vd, &d);
3941
3942  emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
3943       0xB * B8 | d * B7 | E * B5 | B4);
3944}
3945
3946enum NeonRegType { NEON_D, NEON_Q };
3947
3948void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
3949  if (type == NEON_D) {
3950    DwVfpRegister::split_code(code, vm, m);
3951  } else {
3952    DCHECK_EQ(type, NEON_Q);
3953    QwNeonRegister::split_code(code, vm, m);
3954    *encoding |= B6;
3955  }
3956}
3957
3958static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
3959                             DwVfpRegister src, int index) {
3960  DCHECK_NE(Neon64, size);
3961  int sz = static_cast<int>(size);
3962  DCHECK_LE(0, index);
3963  DCHECK_GT(kSimd128Size / (1 << sz), index);
3964  int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
3965  int qbit = 0;
3966  int vd, d;
3967  NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
3968  int vm, m;
3969  src.split_code(&vm, &m);
3970
3971  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
3972         0x18 * B7 | qbit | m * B5 | vm;
3973}
3974
3975void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
3976                     int index) {
3977  DCHECK(IsEnabled(NEON));
3978  // Instruction details available in ARM DDI 0406C.b, A8-884.
3979  emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
3980}
3981
3982void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
3983                     int index) {
3984  // Instruction details available in ARM DDI 0406C.b, A8-884.
3985  DCHECK(IsEnabled(NEON));
3986  emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
3987}
3988
3989// Encode NEON vcvt.src_type.dst_type instruction.
3990static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
3991                            VFPType src_type, QwNeonRegister src) {
3992  DCHECK(src_type != dst_type);
3993  DCHECK(src_type == F32 || dst_type == F32);
3994  // Instruction details available in ARM DDI 0406C.b, A8.8.868.
3995  int vd, d;
3996  dst.split_code(&vd, &d);
3997  int vm, m;
3998  src.split_code(&vm, &m);
3999
4000  int op = 0;
4001  if (src_type == F32) {
4002    DCHECK(dst_type == S32 || dst_type == U32);
4003    op = dst_type == U32 ? 3 : 2;
4004  } else {
4005    DCHECK(src_type == S32 || src_type == U32);
4006    op = src_type == U32 ? 1 : 0;
4007  }
4008
4009  return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
4010         B6 | m * B5 | vm;
4011}
4012
4013void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) {
4014  DCHECK(IsEnabled(NEON));
4015  DCHECK(VfpRegisterIsAvailable(dst));
4016  DCHECK(VfpRegisterIsAvailable(src));
4017  emit(EncodeNeonVCVT(F32, dst, S32, src));
4018}
4019
4020void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) {
4021  DCHECK(IsEnabled(NEON));
4022  DCHECK(VfpRegisterIsAvailable(dst));
4023  DCHECK(VfpRegisterIsAvailable(src));
4024  emit(EncodeNeonVCVT(F32, dst, U32, src));
4025}
4026
4027void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) {
4028  DCHECK(IsEnabled(NEON));
4029  DCHECK(VfpRegisterIsAvailable(dst));
4030  DCHECK(VfpRegisterIsAvailable(src));
4031  emit(EncodeNeonVCVT(S32, dst, F32, src));
4032}
4033
4034void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
4035  DCHECK(IsEnabled(NEON));
4036  DCHECK(VfpRegisterIsAvailable(dst));
4037  DCHECK(VfpRegisterIsAvailable(src));
4038  emit(EncodeNeonVCVT(U32, dst, F32, src));
4039}
4040
4041enum UnaryOp {
4042  VMVN,
4043  VSWP,
4044  VABS,
4045  VABSF,
4046  VNEG,
4047  VNEGF,
4048  VRINTM,
4049  VRINTN,
4050  VRINTP,
4051  VRINTZ,
4052  VZIP,
4053  VUZP,
4054  VREV16,
4055  VREV32,
4056  VREV64,
4057  VTRN,
4058  VRECPE,
4059  VRSQRTE,
4060  VPADAL_S,
4061  VPADAL_U,
4062  VPADDL_S,
4063  VPADDL_U,
4064  VCEQ0,
4065  VCLT0,
4066  VCNT
4067};
4068
4069// Encoding helper for "Advanced SIMD two registers misc" decode group. See ARM
4070// DDI 0487F.b, F4-4228.
4071static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
4072                               int dst_code, int src_code) {
4073  int op_encoding = 0;
4074  switch (op) {
4075    case VMVN:
4076      DCHECK_EQ(Neon8, size);  // size == 0 for vmvn
4077      op_encoding = B10 | 0x3 * B7;
4078      break;
4079    case VSWP:
4080      DCHECK_EQ(Neon8, size);  // size == 0 for vswp
4081      op_encoding = B17;
4082      break;
4083    case VABS:
4084      op_encoding = B16 | 0x6 * B7;
4085      break;
4086    case VABSF:
4087      DCHECK_EQ(Neon32, size);
4088      op_encoding = B16 | B10 | 0x6 * B7;
4089      break;
4090    case VNEG:
4091      op_encoding = B16 | 0x7 * B7;
4092      break;
4093    case VNEGF:
4094      DCHECK_EQ(Neon32, size);
4095      op_encoding = B16 | B10 | 0x7 * B7;
4096      break;
4097    case VRINTM:
4098      op_encoding = B17 | 0xD * B7;
4099      break;
4100    case VRINTN:
4101      op_encoding = B17 | 0x8 * B7;
4102      break;
4103    case VRINTP:
4104      op_encoding = B17 | 0xF * B7;
4105      break;
4106    case VRINTZ:
4107      op_encoding = B17 | 0xB * B7;
4108      break;
4109    case VZIP:
4110      op_encoding = 0x2 * B16 | 0x3 * B7;
4111      break;
4112    case VUZP:
4113      op_encoding = 0x2 * B16 | 0x2 * B7;
4114      break;
4115    case VREV16:
4116      op_encoding = 0x2 * B7;
4117      break;
4118    case VREV32:
4119      op_encoding = 0x1 * B7;
4120      break;
4121    case VREV64:
4122      // op_encoding is 0;
4123      break;
4124    case VTRN:
4125      op_encoding = 0x2 * B16 | B7;
4126      break;
4127    case VRECPE:
4128      // Only support floating point.
4129      op_encoding = 0x3 * B16 | 0xA * B7;
4130      break;
4131    case VRSQRTE:
4132      // Only support floating point.
4133      op_encoding = 0x3 * B16 | 0xB * B7;
4134      break;
4135    case VPADAL_S:
4136      op_encoding = 0xC * B7;
4137      break;
4138    case VPADAL_U:
4139      op_encoding = 0xD * B7;
4140      break;
4141    case VPADDL_S:
4142      op_encoding = 0x4 * B7;
4143      break;
4144    case VPADDL_U:
4145      op_encoding = 0x5 * B7;
4146      break;
4147    case VCEQ0:
4148      // Only support integers.
4149      op_encoding = 0x1 * B16 | 0x2 * B7;
4150      break;
4151    case VCLT0:
4152      // Only support signed integers.
4153      op_encoding = 0x1 * B16 | 0x4 * B7;
4154      break;
4155    case VCNT:
4156      op_encoding = 0xA * B7;
4157      break;
4158  }
4159  int vd, d;
4160  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4161  int vm, m;
4162  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4163
4164  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
4165         vm | op_encoding;
4166}
4167
4168void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) {
4169  // Qd = vmvn(Qn, Qm) SIMD bitwise negate.
4170  // Instruction details available in ARM DDI 0406C.b, A8-966.
4171  DCHECK(IsEnabled(NEON));
4172  emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code()));
4173}
4174
4175void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
4176  DCHECK(IsEnabled(NEON));
4177  // Dd = vswp(Dn, Dm) SIMD d-register swap.
4178  // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4179  DCHECK(IsEnabled(NEON));
4180  emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code()));
4181}
4182
4183void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
4184  // Qd = vswp(Qn, Qm) SIMD q-register swap.
4185  // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4186  DCHECK(IsEnabled(NEON));
4187  emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code()));
4188}
4189
4190void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) {
4191  // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
4192  // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4193  DCHECK(IsEnabled(NEON));
4194  emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code()));
4195}
4196
4197void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4198  // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
4199  // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4200  DCHECK(IsEnabled(NEON));
4201  emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code()));
4202}
4203
4204void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) {
4205  // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
4206  // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4207  DCHECK(IsEnabled(NEON));
4208  emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code()));
4209}
4210
4211void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4212  // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
4213  // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4214  DCHECK(IsEnabled(NEON));
4215  emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code()));
4216}
4217
4218enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
4219
4220static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
4221                                       int dst_code, int src_code1,
4222                                       int src_code2) {
4223  int op_encoding = 0;
4224  switch (op) {
4225    case VBIC:
4226      op_encoding = 0x1 * B20;
4227      break;
4228    case VBIF:
4229      op_encoding = B24 | 0x3 * B20;
4230      break;
4231    case VBIT:
4232      op_encoding = B24 | 0x2 * B20;
4233      break;
4234    case VBSL:
4235      op_encoding = B24 | 0x1 * B20;
4236      break;
4237    case VEOR:
4238      op_encoding = B24;
4239      break;
4240    case VORR:
4241      op_encoding = 0x2 * B20;
4242      break;
4243    case VORN:
4244      op_encoding = 0x3 * B20;
4245      break;
4246    case VAND:
4247      // op_encoding is 0.
4248      break;
4249    default:
4250      UNREACHABLE();
4251  }
4252  int vd, d;
4253  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4254  int vn, n;
4255  NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
4256  int vm, m;
4257  NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
4258
4259  return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
4260         n * B7 | m * B5 | B4 | vm;
4261}
4262
4263void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
4264                     QwNeonRegister src2) {
4265  // Qd = vand(Qn, Qm) SIMD AND.
4266  // Instruction details available in ARM DDI 0406C.b, A8.8.836.
4267  DCHECK(IsEnabled(NEON));
4268  emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(),
4269                                 src2.code()));
4270}
4271
4272void Assembler::vbic(QwNeonRegister dst, QwNeonRegister src1,
4273                     QwNeonRegister src2) {
4274  // Qd = vbic(Qn, Qm) SIMD AND.
4275  // Instruction details available in ARM DDI 0406C.b, A8-840.
4276  DCHECK(IsEnabled(NEON));
4277  emit(EncodeNeonBinaryBitwiseOp(VBIC, NEON_Q, dst.code(), src1.code(),
4278                                 src2.code()));
4279}
4280
4281void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1,
4282                     QwNeonRegister src2) {
4283  // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4284  // Instruction details available in ARM DDI 0406C.b, A8-844.
4285  DCHECK(IsEnabled(NEON));
4286  emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(),
4287                                 src2.code()));
4288}
4289
4290void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
4291                     DwVfpRegister src2) {
4292  // Dd = veor(Dn, Dm) SIMD exclusive OR.
4293  // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4294  DCHECK(IsEnabled(NEON));
4295  emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(),
4296                                 src2.code()));
4297}
4298
4299void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
4300                     QwNeonRegister src2) {
4301  // Qd = veor(Qn, Qm) SIMD exclusive OR.
4302  // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4303  DCHECK(IsEnabled(NEON));
4304  emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(),
4305                                 src2.code()));
4306}
4307
4308void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
4309                     QwNeonRegister src2) {
4310  // Qd = vorr(Qn, Qm) SIMD OR.
4311  // Instruction details available in ARM DDI 0406C.b, A8.8.976.
4312  DCHECK(IsEnabled(NEON));
4313  emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(),
4314                                 src2.code()));
4315}
4316
4317void Assembler::vorn(QwNeonRegister dst, QwNeonRegister src1,
4318                     QwNeonRegister src2) {
4319  // Qd = vorn(Qn, Qm) SIMD OR NOT.
4320  // Instruction details available in ARM DDI 0406C.d, A8.8.359.
4321  DCHECK(IsEnabled(NEON));
4322  emit(EncodeNeonBinaryBitwiseOp(VORN, NEON_Q, dst.code(), src1.code(),
4323                                 src2.code()));
4324}
4325
4326enum FPBinOp {
4327  VADDF,
4328  VSUBF,
4329  VMULF,
4330  VMINF,
4331  VMAXF,
4332  VRECPS,
4333  VRSQRTS,
4334  VCEQF,
4335  VCGEF,
4336  VCGTF
4337};
4338
4339static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
4340                             QwNeonRegister src1, QwNeonRegister src2) {
4341  int op_encoding = 0;
4342  switch (op) {
4343    case VADDF:
4344      op_encoding = 0xD * B8;
4345      break;
4346    case VSUBF:
4347      op_encoding = B21 | 0xD * B8;
4348      break;
4349    case VMULF:
4350      op_encoding = B24 | 0xD * B8 | B4;
4351      break;
4352    case VMINF:
4353      op_encoding = B21 | 0xF * B8;
4354      break;
4355    case VMAXF:
4356      op_encoding = 0xF * B8;
4357      break;
4358    case VRECPS:
4359      op_encoding = 0xF * B8 | B4;
4360      break;
4361    case VRSQRTS:
4362      op_encoding = B21 | 0xF * B8 | B4;
4363      break;
4364    case VCEQF:
4365      op_encoding = 0xE * B8;
4366      break;
4367    case VCGEF:
4368      op_encoding = B24 | 0xE * B8;
4369      break;
4370    case VCGTF:
4371      op_encoding = B24 | B21 | 0xE * B8;
4372      break;
4373    default:
4374      UNREACHABLE();
4375  }
4376  int vd, d;
4377  dst.split_code(&vd, &d);
4378  int vn, n;
4379  src1.split_code(&vn, &n);
4380  int vm, m;
4381  src2.split_code(&vm, &m);
4382  return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
4383         vm | op_encoding;
4384}
4385
4386enum IntegerBinOp {
4387  VADD,
4388  VQADD,
4389  VSUB,
4390  VQSUB,
4391  VMUL,
4392  VMIN,
4393  VMAX,
4394  VTST,
4395  VCEQ,
4396  VCGE,
4397  VCGT,
4398  VRHADD,
4399  VQRDMULH
4400};
4401
4402static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
4403                             QwNeonRegister dst, QwNeonRegister src1,
4404                             QwNeonRegister src2) {
4405  int op_encoding = 0;
4406  switch (op) {
4407    case VADD:
4408      op_encoding = 0x8 * B8;
4409      break;
4410    case VQADD:
4411      op_encoding = B4;
4412      break;
4413    case VSUB:
4414      op_encoding = B24 | 0x8 * B8;
4415      break;
4416    case VQSUB:
4417      op_encoding = 0x2 * B8 | B4;
4418      break;
4419    case VMUL:
4420      op_encoding = 0x9 * B8 | B4;
4421      break;
4422    case VMIN:
4423      op_encoding = 0x6 * B8 | B4;
4424      break;
4425    case VMAX:
4426      op_encoding = 0x6 * B8;
4427      break;
4428    case VTST:
4429      op_encoding = 0x8 * B8 | B4;
4430      break;
4431    case VCEQ:
4432      op_encoding = B24 | 0x8 * B8 | B4;
4433      break;
4434    case VCGE:
4435      op_encoding = 0x3 * B8 | B4;
4436      break;
4437    case VCGT:
4438      op_encoding = 0x3 * B8;
4439      break;
4440    case VRHADD:
4441      op_encoding = B8;
4442      break;
4443    case VQRDMULH:
4444      op_encoding = B24 | 0xB * B8;
4445      break;
4446    default:
4447      UNREACHABLE();
4448  }
4449  int vd, d;
4450  dst.split_code(&vd, &d);
4451  int vn, n;
4452  src1.split_code(&vn, &n);
4453  int vm, m;
4454  src2.split_code(&vm, &m);
4455  int size = NeonSz(dt);
4456  int u = NeonU(dt);
4457  return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4458         n * B7 | B6 | m * B5 | vm | op_encoding;
4459}
4460
4461static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
4462                             QwNeonRegister src1, QwNeonRegister src2) {
4463  // Map NeonSize values to the signed values in NeonDataType, so the U bit
4464  // will be 0.
4465  return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
4466}
4467
4468void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
4469                     QwNeonRegister src2) {
4470  DCHECK(IsEnabled(NEON));
4471  // Qd = vadd(Qn, Qm) SIMD floating point addition.
4472  // Instruction details available in ARM DDI 0406C.b, A8-830.
4473  emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
4474}
4475
4476void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4477                     QwNeonRegister src2) {
4478  DCHECK(IsEnabled(NEON));
4479  // Qd = vadd(Qn, Qm) SIMD integer addition.
4480  // Instruction details available in ARM DDI 0406C.b, A8-828.
4481  emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
4482}
4483
4484void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4485                      QwNeonRegister src2) {
4486  DCHECK(IsEnabled(NEON));
4487  // Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
4488  // Instruction details available in ARM DDI 0406C.b, A8-996.
4489  emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
4490}
4491
4492void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
4493                     QwNeonRegister src2) {
4494  DCHECK(IsEnabled(NEON));
4495  // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
4496  // Instruction details available in ARM DDI 0406C.b, A8-1086.
4497  emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
4498}
4499
4500void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4501                     QwNeonRegister src2) {
4502  DCHECK(IsEnabled(NEON));
4503  // Qd = vsub(Qn, Qm) SIMD integer subtraction.
4504  // Instruction details available in ARM DDI 0406C.b, A8-1084.
4505  emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
4506}
4507
4508void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4509                      QwNeonRegister src2) {
4510  DCHECK(IsEnabled(NEON));
4511  // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
4512  // Instruction details available in ARM DDI 0406C.b, A8-1020.
4513  emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
4514}
4515
4516void Assembler::vmlal(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4517                      DwVfpRegister src2) {
4518  DCHECK(IsEnabled(NEON));
4519  // Qd = vmlal(Dn, Dm) Vector Multiply Accumulate Long (integer)
4520  // Instruction details available in ARM DDI 0406C.b, A8-931.
4521  int vd, d;
4522  dst.split_code(&vd, &d);
4523  int vn, n;
4524  src1.split_code(&vn, &n);
4525  int vm, m;
4526  src2.split_code(&vm, &m);
4527  int size = NeonSz(dt);
4528  int u = NeonU(dt);
4529  if (!u) UNIMPLEMENTED();
4530  DCHECK_NE(size, 3);  // SEE "Related encodings"
4531  emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4532       vd * B12 | 0x8 * B8 | n * B7 | m * B5 | vm);
4533}
4534
4535void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
4536                     QwNeonRegister src2) {
4537  DCHECK(IsEnabled(NEON));
4538  // Qd = vadd(Qn, Qm) SIMD floating point multiply.
4539  // Instruction details available in ARM DDI 0406C.b, A8-958.
4540  emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
4541}
4542
4543void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4544                     QwNeonRegister src2) {
4545  DCHECK(IsEnabled(NEON));
4546  // Qd = vadd(Qn, Qm) SIMD integer multiply.
4547  // Instruction details available in ARM DDI 0406C.b, A8-960.
4548  emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
4549}
4550
4551void Assembler::vmull(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4552                      DwVfpRegister src2) {
4553  DCHECK(IsEnabled(NEON));
4554  // Qd = vmull(Dn, Dm) Vector Multiply Long (integer).
4555  // Instruction details available in ARM DDI 0406C.b, A8-960.
4556  int vd, d;
4557  dst.split_code(&vd, &d);
4558  int vn, n;
4559  src1.split_code(&vn, &n);
4560  int vm, m;
4561  src2.split_code(&vm, &m);
4562  int size = NeonSz(dt);
4563  int u = NeonU(dt);
4564  emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4565       vd * B12 | 0xC * B8 | n * B7 | m * B5 | vm);
4566}
4567
4568void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1,
4569                     QwNeonRegister src2) {
4570  DCHECK(IsEnabled(NEON));
4571  // Qd = vmin(Qn, Qm) SIMD floating point MIN.
4572  // Instruction details available in ARM DDI 0406C.b, A8-928.
4573  emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
4574}
4575
4576void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4577                     QwNeonRegister src2) {
4578  DCHECK(IsEnabled(NEON));
4579  // Qd = vmin(Qn, Qm) SIMD integer MIN.
4580  // Instruction details available in ARM DDI 0406C.b, A8-926.
4581  emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
4582}
4583
4584void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
4585                     QwNeonRegister src2) {
4586  DCHECK(IsEnabled(NEON));
4587  // Qd = vmax(Qn, Qm) SIMD floating point MAX.
4588  // Instruction details available in ARM DDI 0406C.b, A8-928.
4589  emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
4590}
4591
4592void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4593                     QwNeonRegister src2) {
4594  DCHECK(IsEnabled(NEON));
4595  // Qd = vmax(Qn, Qm) SIMD integer MAX.
4596  // Instruction details available in ARM DDI 0406C.b, A8-926.
4597  emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
4598}
4599
4600enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI, VSRA };
4601
4602static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt,
4603                                       NeonRegType reg_type, int dst_code,
4604                                       int src_code, int shift_code) {
4605  DCHECK_EQ(op, VSHL);
4606  int op_encoding = 0;
4607  int vd, d;
4608  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4609  int vm, m;
4610  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4611  int vn, n;
4612  NeonSplitCode(reg_type, shift_code, &vn, &n, &op_encoding);
4613  int size = NeonSz(dt);
4614  int u = NeonU(dt);
4615
4616  return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4617         0x4 * B8 | n * B7 | m * B5 | vm | op_encoding;
4618}
4619
4620static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
4621                               NeonRegType reg_type, int dst_code, int src_code,
4622                               int shift) {
4623  int size_in_bits = kBitsPerByte << static_cast<int>(size);
4624  int op_encoding = 0, imm6 = 0, L = 0;
4625  switch (op) {
4626    case VSHL: {
4627      DCHECK(shift >= 0 && size_in_bits > shift);
4628      imm6 = size_in_bits + shift;
4629      op_encoding = 0x5 * B8;
4630      break;
4631    }
4632    case VSHR: {
4633      DCHECK(shift > 0 && size_in_bits >= shift);
4634      imm6 = 2 * size_in_bits - shift;
4635      if (is_unsigned) op_encoding |= B24;
4636      break;
4637    }
4638    case VSLI: {
4639      DCHECK(shift >= 0 && size_in_bits > shift);
4640      imm6 = size_in_bits + shift;
4641      op_encoding = B24 | 0x5 * B8;
4642      break;
4643    }
4644    case VSRI: {
4645      DCHECK(shift > 0 && size_in_bits >= shift);
4646      imm6 = 2 * size_in_bits - shift;
4647      op_encoding = B24 | 0x4 * B8;
4648      break;
4649    }
4650    case VSRA: {
4651      DCHECK(shift > 0 && size_in_bits >= shift);
4652      imm6 = 2 * size_in_bits - shift;
4653      op_encoding = B8;
4654      if (is_unsigned) op_encoding |= B24;
4655      break;
4656    }
4657    default:
4658      UNREACHABLE();
4659  }
4660
4661  L = imm6 >> 6;
4662  imm6 &= 0x3F;
4663
4664  int vd, d;
4665  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4666  int vm, m;
4667  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4668
4669  return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | L * B7 | m * B5 | B4 |
4670         vm | op_encoding;
4671}
4672
4673void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4674                     int shift) {
4675  DCHECK(IsEnabled(NEON));
4676  // Qd = vshl(Qm, bits) SIMD shift left immediate.
4677  // Instruction details available in ARM DDI 0406C.b, A8-1046.
4678  emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
4679                         dst.code(), src.code(), shift));
4680}
4681
4682void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4683                     QwNeonRegister shift) {
4684  DCHECK(IsEnabled(NEON));
4685  // Qd = vshl(Qm, Qn) SIMD shift left Register.
4686  // Instruction details available in ARM DDI 0487A.a, F8-3340..
4687  emit(EncodeNeonShiftRegisterOp(VSHL, dt, NEON_Q, dst.code(), src.code(),
4688                                 shift.code()));
4689}
4690
4691void Assembler::vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
4692                     int shift) {
4693  DCHECK(IsEnabled(NEON));
4694  // Dd = vshr(Dm, bits) SIMD shift right immediate.
4695  // Instruction details available in ARM DDI 0406C.b, A8-1052.
4696  emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
4697                         dst.code(), src.code(), shift));
4698}
4699
4700void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4701                     int shift) {
4702  DCHECK(IsEnabled(NEON));
4703  // Qd = vshr(Qm, bits) SIMD shift right immediate.
4704  // Instruction details available in ARM DDI 0406C.b, A8-1052.
4705  emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
4706                         dst.code(), src.code(), shift));
4707}
4708
4709void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4710                     int shift) {
4711  DCHECK(IsEnabled(NEON));
4712  // Dd = vsli(Dm, bits) SIMD shift left and insert.
4713  // Instruction details available in ARM DDI 0406C.b, A8-1056.
4714  emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
4715                         shift));
4716}
4717
4718void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4719                     int shift) {
4720  DCHECK(IsEnabled(NEON));
4721  // Dd = vsri(Dm, bits) SIMD shift right and insert.
4722  // Instruction details available in ARM DDI 0406C.b, A8-1062.
4723  emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
4724                         shift));
4725}
4726
4727void Assembler::vsra(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
4728                     int imm) {
4729  DCHECK(IsEnabled(NEON));
4730  // Dd = vsra(Dm, imm) SIMD shift right and accumulate.
4731  // Instruction details available in ARM DDI 0487F.b, F6-5569.
4732  emit(EncodeNeonShiftOp(VSRA, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
4733                         dst.code(), src.code(), imm));
4734}
4735
4736void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
4737  DCHECK(IsEnabled(NEON));
4738  // Qd = vrecpe(Qm) SIMD reciprocal estimate.
4739  // Instruction details available in ARM DDI 0406C.b, A8-1024.
4740  emit(EncodeNeonUnaryOp(VRECPE, NEON_Q, Neon32, dst.code(), src.code()));
4741}
4742
4743void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
4744  DCHECK(IsEnabled(NEON));
4745  // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
4746  // Instruction details available in ARM DDI 0406C.b, A8-1038.
4747  emit(EncodeNeonUnaryOp(VRSQRTE, NEON_Q, Neon32, dst.code(), src.code()));
4748}
4749
4750void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
4751                       QwNeonRegister src2) {
4752  DCHECK(IsEnabled(NEON));
4753  // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
4754  // Instruction details available in ARM DDI 0406C.b, A8-1026.
4755  emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
4756}
4757
4758void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
4759                        QwNeonRegister src2) {
4760  DCHECK(IsEnabled(NEON));
4761  // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
4762  // Instruction details available in ARM DDI 0406C.b, A8-1040.
4763  emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
4764}
4765
4766enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
4767
4768static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
4769                                  DwVfpRegister dst, DwVfpRegister src1,
4770                                  DwVfpRegister src2) {
4771  int op_encoding = 0;
4772  switch (op) {
4773    case VPADD:
4774      op_encoding = 0xB * B8 | B4;
4775      break;
4776    case VPMIN:
4777      op_encoding = 0xA * B8 | B4;
4778      break;
4779    case VPMAX:
4780      op_encoding = 0xA * B8;
4781      break;
4782    default:
4783      UNREACHABLE();
4784  }
4785  int vd, d;
4786  dst.split_code(&vd, &d);
4787  int vn, n;
4788  src1.split_code(&vn, &n);
4789  int vm, m;
4790  src2.split_code(&vm, &m);
4791  int size = NeonSz(dt);
4792  int u = NeonU(dt);
4793  return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4794         n * B7 | m * B5 | vm | op_encoding;
4795}
4796
4797void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
4798                      DwVfpRegister src2) {
4799  DCHECK(IsEnabled(NEON));
4800  // Dd = vpadd(Dn, Dm) SIMD floating point pairwise ADD.
4801  // Instruction details available in ARM DDI 0406C.b, A8-982.
4802  int vd, d;
4803  dst.split_code(&vd, &d);
4804  int vn, n;
4805  src1.split_code(&vn, &n);
4806  int vm, m;
4807  src2.split_code(&vm, &m);
4808
4809  emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
4810       m * B5 | vm);
4811}
4812
4813void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
4814                      DwVfpRegister src2) {
4815  DCHECK(IsEnabled(NEON));
4816  // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
4817  // Instruction details available in ARM DDI 0406C.b, A8-980.
4818  emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
4819}
4820
4821void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4822                      DwVfpRegister src2) {
4823  DCHECK(IsEnabled(NEON));
4824  // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
4825  // Instruction details available in ARM DDI 0406C.b, A8-986.
4826  emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
4827}
4828
4829void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4830                      DwVfpRegister src2) {
4831  DCHECK(IsEnabled(NEON));
4832  // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
4833  // Instruction details available in ARM DDI 0406C.b, A8-986.
4834  emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
4835}
4836
4837void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
4838                       const QwNeonRegister src) {
4839  // SIMD vector round floating-point to integer towards -Infinity.
4840  // See ARM DDI 0487F.b, F6-5493.
4841  DCHECK(IsEnabled(ARMv8));
4842  emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4843}
4844
4845void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
4846                       const QwNeonRegister src) {
4847  // SIMD vector round floating-point to integer to Nearest.
4848  // See ARM DDI 0487F.b, F6-5497.
4849  DCHECK(IsEnabled(ARMv8));
4850  emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4851}
4852
4853void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
4854                       const QwNeonRegister src) {
4855  // SIMD vector round floating-point to integer towards +Infinity.
4856  // See ARM DDI 0487F.b, F6-5501.
4857  DCHECK(IsEnabled(ARMv8));
4858  emit(EncodeNeonUnaryOp(VRINTP, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4859}
4860
4861void Assembler::vrintz(NeonDataType dt, const QwNeonRegister dst,
4862                       const QwNeonRegister src) {
4863  // SIMD vector round floating-point to integer towards Zero.
4864  // See ARM DDI 0487F.b, F6-5511.
4865  DCHECK(IsEnabled(ARMv8));
4866  emit(EncodeNeonUnaryOp(VRINTZ, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4867}
4868
4869void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4870                     QwNeonRegister src2) {
4871  DCHECK(IsEnabled(NEON));
4872  // Qd = vtst(Qn, Qm) SIMD test integer operands.
4873  // Instruction details available in ARM DDI 0406C.b, A8-1098.
4874  emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
4875}
4876
4877void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
4878                     QwNeonRegister src2) {
4879  DCHECK(IsEnabled(NEON));
4880  // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
4881  // Instruction details available in ARM DDI 0406C.b, A8-844.
4882  emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
4883}
4884
4885void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4886                     QwNeonRegister src2) {
4887  DCHECK(IsEnabled(NEON));
4888  // Qd = vceq(Qn, Qm) SIMD integer compare equal.
4889  // Instruction details available in ARM DDI 0406C.b, A8-844.
4890  emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
4891}
4892
4893void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4894                     int value) {
4895  DCHECK(IsEnabled(NEON));
4896  DCHECK_EQ(0, value);
4897  // Qd = vceq(Qn, Qm, #0) Vector Compare Equal to Zero.
4898  // Instruction details available in ARM DDI 0406C.d, A8-847.
4899  emit(EncodeNeonUnaryOp(VCEQ0, NEON_Q, size, dst.code(), src1.code()));
4900}
4901
4902void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
4903                     QwNeonRegister src2) {
4904  DCHECK(IsEnabled(NEON));
4905  // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
4906  // Instruction details available in ARM DDI 0406C.b, A8-848.
4907  emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
4908}
4909
4910void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4911                     QwNeonRegister src2) {
4912  DCHECK(IsEnabled(NEON));
4913  // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
4914  // Instruction details available in ARM DDI 0406C.b, A8-848.
4915  emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
4916}
4917
4918void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
4919                     QwNeonRegister src2) {
4920  DCHECK(IsEnabled(NEON));
4921  // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
4922  // Instruction details available in ARM DDI 0406C.b, A8-852.
4923  emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
4924}
4925
4926void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4927                     QwNeonRegister src2) {
4928  DCHECK(IsEnabled(NEON));
4929  // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
4930  // Instruction details available in ARM DDI 0406C.b, A8-852.
4931  emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
4932}
4933
4934void Assembler::vclt(NeonSize size, QwNeonRegister dst, QwNeonRegister src,
4935                     int value) {
4936  DCHECK(IsEnabled(NEON));
4937  DCHECK_EQ(0, value);
4938  // vclt.<size>(Qn, Qm, #0) SIMD Vector Compare Less Than Zero.
4939  // Instruction details available in ARM DDI 0487F.b, F6-5072.
4940  emit(EncodeNeonUnaryOp(VCLT0, NEON_Q, size, dst.code(), src.code()));
4941}
4942
4943void Assembler::vrhadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4944                       QwNeonRegister src2) {
4945  DCHECK(IsEnabled(NEON));
4946  // Qd = vrhadd(Qn, Qm) SIMD integer rounding halving add.
4947  // Instruction details available in ARM DDI 0406C.b, A8-1030.
4948  emit(EncodeNeonBinOp(VRHADD, dt, dst, src1, src2));
4949}
4950
4951void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
4952                     QwNeonRegister src2, int bytes) {
4953  DCHECK(IsEnabled(NEON));
4954  // Qd = vext(Qn, Qm) SIMD byte extract.
4955  // Instruction details available in ARM DDI 0406C.b, A8-890.
4956  int vd, d;
4957  dst.split_code(&vd, &d);
4958  int vn, n;
4959  src1.split_code(&vn, &n);
4960  int vm, m;
4961  src2.split_code(&vm, &m);
4962  DCHECK_GT(16, bytes);
4963  emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
4964       n * B7 | B6 | m * B5 | vm);
4965}
4966
4967void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4968  if (size == Neon32) {  // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4969    vtrn(size, src1, src2);
4970  } else {
4971    DCHECK(IsEnabled(NEON));
4972    // vzip.<size>(Dn, Dm) SIMD zip (interleave).
4973    // Instruction details available in ARM DDI 0406C.b, A8-1102.
4974    emit(EncodeNeonUnaryOp(VZIP, NEON_D, size, src1.code(), src2.code()));
4975  }
4976}
4977
4978void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4979  DCHECK(IsEnabled(NEON));
4980  // vzip.<size>(Qn, Qm) SIMD zip (interleave).
4981  // Instruction details available in ARM DDI 0406C.b, A8-1102.
4982  emit(EncodeNeonUnaryOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
4983}
4984
4985void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4986  if (size == Neon32) {  // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4987    vtrn(size, src1, src2);
4988  } else {
4989    DCHECK(IsEnabled(NEON));
4990    // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
4991    // Instruction details available in ARM DDI 0406C.b, A8-1100.
4992    emit(EncodeNeonUnaryOp(VUZP, NEON_D, size, src1.code(), src2.code()));
4993  }
4994}
4995
4996void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4997  DCHECK(IsEnabled(NEON));
4998  // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
4999  // Instruction details available in ARM DDI 0406C.b, A8-1100.
5000  emit(EncodeNeonUnaryOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
5001}
5002
5003void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5004  DCHECK(IsEnabled(NEON));
5005  // Qd = vrev16.<size>(Qm) SIMD element reverse.
5006  // Instruction details available in ARM DDI 0406C.b, A8-1028.
5007  emit(EncodeNeonUnaryOp(VREV16, NEON_Q, size, dst.code(), src.code()));
5008}
5009
5010void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5011  DCHECK(IsEnabled(NEON));
5012  // Qd = vrev32.<size>(Qm) SIMD element reverse.
5013  // Instruction details available in ARM DDI 0406C.b, A8-1028.
5014  emit(EncodeNeonUnaryOp(VREV32, NEON_Q, size, dst.code(), src.code()));
5015}
5016
5017void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5018  DCHECK(IsEnabled(NEON));
5019  // Qd = vrev64.<size>(Qm) SIMD element reverse.
5020  // Instruction details available in ARM DDI 0406C.b, A8-1028.
5021  emit(EncodeNeonUnaryOp(VREV64, NEON_Q, size, dst.code(), src.code()));
5022}
5023
5024void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
5025  DCHECK(IsEnabled(NEON));
5026  // vtrn.<size>(Dn, Dm) SIMD element transpose.
5027  // Instruction details available in ARM DDI 0406C.b, A8-1096.
5028  emit(EncodeNeonUnaryOp(VTRN, NEON_D, size, src1.code(), src2.code()));
5029}
5030
5031void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
5032  DCHECK(IsEnabled(NEON));
5033  // vtrn.<size>(Qn, Qm) SIMD element transpose.
5034  // Instruction details available in ARM DDI 0406C.b, A8-1096.
5035  emit(EncodeNeonUnaryOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
5036}
5037
5038void Assembler::vpadal(NeonDataType dt, QwNeonRegister dst,
5039                       QwNeonRegister src) {
5040  DCHECK(IsEnabled(NEON));
5041  // vpadal.<dt>(Qd, Qm) SIMD Vector Pairwise Add and Accumulate Long
5042  emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADAL_U : VPADAL_S, NEON_Q,
5043                         NeonDataTypeToSize(dt), dst.code(), src.code()));
5044}
5045
5046void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst,
5047                       QwNeonRegister src) {
5048  DCHECK(IsEnabled(NEON));
5049  // vpaddl.<dt>(Qd, Qm) SIMD Vector Pairwise Add Long.
5050  emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADDL_U : VPADDL_S, NEON_Q,
5051                         NeonDataTypeToSize(dt), dst.code(), src.code()));
5052}
5053
5054void Assembler::vqrdmulh(NeonDataType dt, QwNeonRegister dst,
5055                         QwNeonRegister src1, QwNeonRegister src2) {
5056  DCHECK(IsEnabled(NEON));
5057  DCHECK(dt == NeonS16 || dt == NeonS32);
5058  emit(EncodeNeonBinOp(VQRDMULH, dt, dst, src1, src2));
5059}
5060
5061void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) {
5062  // Qd = vcnt(Qm) SIMD Vector Count Set Bits.
5063  // Instruction details available at ARM DDI 0487F.b, F6-5094.
5064  DCHECK(IsEnabled(NEON));
5065  emit(EncodeNeonUnaryOp(VCNT, NEON_Q, Neon8, dst.code(), src.code()));
5066}
5067
5068// Encode NEON vtbl / vtbx instruction.
5069static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
5070                           DwVfpRegister index, bool vtbx) {
5071  // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
5072  // Instruction details available in ARM DDI 0406C.b, A8-1094.
5073  // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
5074  // Instruction details available in ARM DDI 0406C.b, A8-1094.
5075  int vd, d;
5076  dst.split_code(&vd, &d);
5077  int vn, n;
5078  list.base().split_code(&vn, &n);
5079  int vm, m;
5080  index.split_code(&vm, &m);
5081  int op = vtbx ? 1 : 0;  // vtbl = 0, vtbx = 1.
5082  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
5083         list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
5084}
5085
5086void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list,
5087                     DwVfpRegister index) {
5088  DCHECK(IsEnabled(NEON));
5089  emit(EncodeNeonVTB(dst, list, index, false));
5090}
5091
5092void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list,
5093                     DwVfpRegister index) {
5094  DCHECK(IsEnabled(NEON));
5095  emit(EncodeNeonVTB(dst, list, index, true));
5096}
5097
5098// Pseudo instructions.
5099void Assembler::nop(int type) {
5100  // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
5101  // some of the CPU's pipeline and has to issue. Older ARM chips simply used
5102  // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
5103  // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
5104  // a type.
5105  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
5106  emit(al | 13 * B21 | type * B12 | type);
5107}
5108
5109void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); }
5110
5111bool Assembler::IsMovT(Instr instr) {
5112  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
5113             ((kNumRegisters - 1) * B12) |        // mask out register
5114             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
5115  return instr == kMovtPattern;
5116}
5117
5118bool Assembler::IsMovW(Instr instr) {
5119  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
5120             ((kNumRegisters - 1) * B12) |        // mask out destination
5121             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
5122  return instr == kMovwPattern;
5123}
5124
5125Instr Assembler::GetMovTPattern() { return kMovtPattern; }
5126
5127Instr Assembler::GetMovWPattern() { return kMovwPattern; }
5128
5129Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
5130  DCHECK_LT(immediate, 0x10000);
5131  return ((immediate & 0xF000) << 4) | (immediate & 0xFFF);
5132}
5133
5134Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
5135  instruction &= ~EncodeMovwImmediate(0xFFFF);
5136  return instruction | EncodeMovwImmediate(immediate);
5137}
5138
5139int Assembler::DecodeShiftImm(Instr instr) {
5140  int rotate = Instruction::RotateValue(instr) * 2;
5141  int immed8 = Instruction::Immed8Value(instr);
5142  return base::bits::RotateRight32(immed8, rotate);
5143}
5144
5145Instr Assembler::PatchShiftImm(Instr instr, int immed) {
5146  uint32_t rotate_imm = 0;
5147  uint32_t immed_8 = 0;
5148  bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr);
5149  DCHECK(immed_fits);
5150  USE(immed_fits);
5151  return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
5152}
5153
5154bool Assembler::IsNop(Instr instr, int type) {
5155  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
5156  // Check for mov rx, rx where x = type.
5157  return instr == (al | 13 * B21 | type * B12 | type);
5158}
5159
5160bool Assembler::IsMovImmed(Instr instr) {
5161  return (instr & kMovImmedMask) == kMovImmedPattern;
5162}
5163
5164bool Assembler::IsOrrImmed(Instr instr) {
5165  return (instr & kOrrImmedMask) == kOrrImmedPattern;
5166}
5167
5168// static
5169bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
5170  uint32_t dummy1;
5171  uint32_t dummy2;
5172  return FitsShifter(imm32, &dummy1, &dummy2, nullptr);
5173}
5174
5175bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
5176  return is_uint12(abs(imm32));
5177}
5178
5179// Debugging.
5180void Assembler::RecordConstPool(int size) {
5181  // We only need this for debugger support, to correctly compute offsets in the
5182  // code.
5183  RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
5184}
5185
5186void Assembler::GrowBuffer() {
5187  DCHECK_EQ(buffer_start_, buffer_->start());
5188
5189  // Compute new buffer size.
5190  int old_size = buffer_->size();
5191  int new_size = std::min(2 * old_size, old_size + 1 * MB);
5192
5193  // Some internal data structures overflow for very large buffers,
5194  // they must ensure that kMaximalBufferSize is not too large.
5195  if (new_size > kMaximalBufferSize) {
5196    V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
5197  }
5198
5199  // Set up new buffer.
5200  std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
5201  DCHECK_EQ(new_size, new_buffer->size());
5202  byte* new_start = new_buffer->start();
5203
5204  // Copy the data.
5205  int pc_delta = new_start - buffer_start_;
5206  int rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
5207  size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
5208  MemMove(new_start, buffer_start_, pc_offset());
5209  byte* new_reloc_start = reinterpret_cast<byte*>(
5210      reinterpret_cast<Address>(reloc_info_writer.pos()) + rc_delta);
5211  MemMove(new_reloc_start, reloc_info_writer.pos(), reloc_size);
5212
5213  // Switch buffers.
5214  buffer_ = std::move(new_buffer);
5215  buffer_start_ = new_start;
5216  pc_ = reinterpret_cast<byte*>(reinterpret_cast<Address>(pc_) + pc_delta);
5217  byte* new_last_pc = reinterpret_cast<byte*>(
5218      reinterpret_cast<Address>(reloc_info_writer.last_pc()) + pc_delta);
5219  reloc_info_writer.Reposition(new_reloc_start, new_last_pc);
5220
5221  // None of our relocation types are pc relative pointing outside the code
5222  // buffer nor pc absolute pointing inside the code buffer, so there is no need
5223  // to relocate any emitted relocation entries.
5224}
5225
5226void Assembler::db(uint8_t data) {
5227  // db is used to write raw data. The constant pool should be emitted or
5228  // blocked before using db.
5229  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5230  CheckBuffer();
5231  *reinterpret_cast<uint8_t*>(pc_) = data;
5232  pc_ += sizeof(uint8_t);
5233}
5234
5235void Assembler::dd(uint32_t data, RelocInfo::Mode rmode) {
5236  // dd is used to write raw data. The constant pool should be emitted or
5237  // blocked before using dd.
5238  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5239  CheckBuffer();
5240  if (!RelocInfo::IsNoInfo(rmode)) {
5241    DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
5242           RelocInfo::IsLiteralConstant(rmode));
5243    RecordRelocInfo(rmode);
5244  }
5245  base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), data);
5246  pc_ += sizeof(uint32_t);
5247}
5248
5249void Assembler::dq(uint64_t value, RelocInfo::Mode rmode) {
5250  // dq is used to write raw data. The constant pool should be emitted or
5251  // blocked before using dq.
5252  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5253  CheckBuffer();
5254  if (!RelocInfo::IsNoInfo(rmode)) {
5255    DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
5256           RelocInfo::IsLiteralConstant(rmode));
5257    RecordRelocInfo(rmode);
5258  }
5259  base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), value);
5260  pc_ += sizeof(uint64_t);
5261}
5262
5263void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
5264  if (!ShouldRecordRelocInfo(rmode)) return;
5265  DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
5266  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
5267  reloc_info_writer.Write(&rinfo);
5268}
5269
5270void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode,
5271                                     intptr_t value) {
5272  DCHECK(rmode != RelocInfo::CONST_POOL);
5273  // We can share CODE_TARGETs and embedded objects, but we must make sure we
5274  // only emit one reloc info for them (thus delta patching will apply the delta
5275  // only once). At the moment, we do not deduplicate heap object request which
5276  // are indicated by value == 0.
5277  bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) ||
5278                    (rmode == RelocInfo::CODE_TARGET && value != 0) ||
5279                    (RelocInfo::IsEmbeddedObjectMode(rmode) && value != 0);
5280  DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants);
5281  if (first_const_pool_32_use_ < 0) {
5282    DCHECK(pending_32_bit_constants_.empty());
5283    DCHECK_EQ(constant_pool_deadline_, kMaxInt);
5284    first_const_pool_32_use_ = position;
5285    constant_pool_deadline_ = position + kCheckPoolDeadline;
5286  } else {
5287    DCHECK(!pending_32_bit_constants_.empty());
5288  }
5289  ConstantPoolEntry entry(position, value, sharing_ok, rmode);
5290
5291  bool shared = false;
5292  if (sharing_ok) {
5293    // Merge the constant, if possible.
5294    for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5295      ConstantPoolEntry& current_entry = pending_32_bit_constants_[i];
5296      if (!current_entry.sharing_ok()) continue;
5297      if (entry.value() == current_entry.value() &&
5298          entry.rmode() == current_entry.rmode()) {
5299        entry.set_merged_index(i);
5300        shared = true;
5301        break;
5302      }
5303    }
5304  }
5305
5306  pending_32_bit_constants_.emplace_back(entry);
5307
5308  // Make sure the constant pool is not emitted in place of the next
5309  // instruction for which we just recorded relocation info.
5310  BlockConstPoolFor(1);
5311
5312  // Emit relocation info.
5313  if (MustOutputRelocInfo(rmode, this) && !shared) {
5314    RecordRelocInfo(rmode);
5315  }
5316}
5317
5318void Assembler::BlockConstPoolFor(int instructions) {
5319  int pc_limit = pc_offset() + instructions * kInstrSize;
5320  if (no_const_pool_before_ < pc_limit) {
5321    no_const_pool_before_ = pc_limit;
5322  }
5323
5324  // If we're due a const pool check before the block finishes, move it to just
5325  // after the block.
5326  if (constant_pool_deadline_ < no_const_pool_before_) {
5327    // Make sure that the new deadline isn't too late (including a jump and the
5328    // constant pool marker).
5329    DCHECK_LE(no_const_pool_before_,
5330              first_const_pool_32_use_ + kMaxDistToIntPool);
5331    constant_pool_deadline_ = no_const_pool_before_;
5332  }
5333}
5334
5335void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
5336  // Some short sequence of instruction mustn't be broken up by constant pool
5337  // emission, such sequences are protected by calls to BlockConstPoolFor and
5338  // BlockConstPoolScope.
5339  if (is_const_pool_blocked()) {
5340    // Something is wrong if emission is forced and blocked at the same time.
5341    DCHECK(!force_emit);
5342    return;
5343  }
5344
5345  // There is nothing to do if there are no pending constant pool entries.
5346  if (pending_32_bit_constants_.empty()) {
5347    // We should only fall into this case if we're either trying to forcing
5348    // emission or opportunistically checking after a jump.
5349    DCHECK(force_emit || !require_jump);
5350    return;
5351  }
5352
5353  // We emit a constant pool when:
5354  //  * requested to do so by parameter force_emit (e.g. after each function).
5355  //  * the distance from the first instruction accessing the constant pool to
5356  //    the first constant pool entry will exceed its limit the next time the
5357  //    pool is checked.
5358  //  * the instruction doesn't require a jump after itself to jump over the
5359  //    constant pool, and we're getting close to running out of range.
5360  if (!force_emit) {
5361    DCHECK_NE(first_const_pool_32_use_, -1);
5362    int dist32 = pc_offset() - first_const_pool_32_use_;
5363    if (require_jump) {
5364      // We should only be on this path if we've exceeded our deadline.
5365      DCHECK_GE(dist32, kCheckPoolDeadline);
5366    } else if (dist32 < kCheckPoolDeadline / 2) {
5367      return;
5368    }
5369  }
5370
5371  int size_after_marker = pending_32_bit_constants_.size() * kPointerSize;
5372
5373  // Deduplicate constants.
5374  for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5375    ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5376    if (entry.is_merged()) size_after_marker -= kPointerSize;
5377  }
5378
5379  // Check that the code buffer is large enough before emitting the constant
5380  // pool (include the jump over the pool and the constant pool marker and
5381  // the gap to the relocation information).
5382  int jump_instr = require_jump ? kInstrSize : 0;
5383  int size_up_to_marker = jump_instr + kInstrSize;
5384  int size = size_up_to_marker + size_after_marker;
5385  int needed_space = size + kGap;
5386  while (buffer_space() <= needed_space) GrowBuffer();
5387
5388  {
5389    ASM_CODE_COMMENT_STRING(this, "Constant Pool");
5390    // Block recursive calls to CheckConstPool.
5391    BlockConstPoolScope block_const_pool(this);
5392    RecordConstPool(size);
5393
5394    Label size_check;
5395    bind(&size_check);
5396
5397    // Emit jump over constant pool if necessary.
5398    Label after_pool;
5399    if (require_jump) {
5400      b(&after_pool);
5401    }
5402
5403    // Put down constant pool marker "Undefined instruction".
5404    // The data size helps disassembly know what to print.
5405    emit(kConstantPoolMarker |
5406         EncodeConstantPoolLength(size_after_marker / kPointerSize));
5407
5408    // The first entry in the constant pool should also be the first
5409    CHECK_EQ(first_const_pool_32_use_, pending_32_bit_constants_[0].position());
5410    CHECK(!pending_32_bit_constants_[0].is_merged());
5411
5412    // Make sure we're not emitting the constant too late.
5413    CHECK_LE(pc_offset(),
5414             first_const_pool_32_use_ + kMaxDistToPcRelativeConstant);
5415
5416    // Check that the code buffer is large enough before emitting the constant
5417    // pool (this includes the gap to the relocation information).
5418    int needed_space = pending_32_bit_constants_.size() * kPointerSize + kGap;
5419    while (buffer_space() <= needed_space) {
5420      GrowBuffer();
5421    }
5422
5423    // Emit 32-bit constant pool entries.
5424    for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5425      ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5426      Instr instr = instr_at(entry.position());
5427
5428      // 64-bit loads shouldn't get here.
5429      DCHECK(!IsVldrDPcImmediateOffset(instr));
5430      DCHECK(!IsMovW(instr));
5431      DCHECK(IsLdrPcImmediateOffset(instr) &&
5432             GetLdrRegisterImmediateOffset(instr) == 0);
5433
5434      int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
5435      DCHECK(is_uint12(delta));
5436      // 0 is the smallest delta:
5437      //   ldr rd, [pc, #0]
5438      //   constant pool marker
5439      //   data
5440
5441      if (entry.is_merged()) {
5442        DCHECK(entry.sharing_ok());
5443        ConstantPoolEntry& merged =
5444            pending_32_bit_constants_[entry.merged_index()];
5445        DCHECK(entry.value() == merged.value());
5446        DCHECK_LT(merged.position(), entry.position());
5447        Instr merged_instr = instr_at(merged.position());
5448        DCHECK(IsLdrPcImmediateOffset(merged_instr));
5449        delta = GetLdrRegisterImmediateOffset(merged_instr);
5450        delta += merged.position() - entry.position();
5451      }
5452      instr_at_put(entry.position(),
5453                   SetLdrRegisterImmediateOffset(instr, delta));
5454      if (!entry.is_merged()) {
5455        emit(entry.value());
5456      }
5457    }
5458
5459    pending_32_bit_constants_.clear();
5460
5461    first_const_pool_32_use_ = -1;
5462
5463    DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check));
5464
5465    if (after_pool.is_linked()) {
5466      bind(&after_pool);
5467    }
5468  }
5469
5470  // Since a constant pool was just emitted, we don't need another check until
5471  // the next constant pool entry is added.
5472  constant_pool_deadline_ = kMaxInt;
5473}
5474
5475PatchingAssembler::PatchingAssembler(const AssemblerOptions& options,
5476                                     byte* address, int instructions)
5477    : Assembler(options, ExternalAssemblerBuffer(
5478                             address, instructions * kInstrSize + kGap)) {
5479  DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5480}
5481
5482PatchingAssembler::~PatchingAssembler() {
5483  // Check that we don't have any pending constant pools.
5484  DCHECK(pending_32_bit_constants_.empty());
5485
5486  // Check that the code was patched as expected.
5487  DCHECK_EQ(pc_, buffer_start_ + buffer_->size() - kGap);
5488  DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5489}
5490
5491void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); }
5492
5493void PatchingAssembler::PadWithNops() {
5494  DCHECK_LE(pc_, buffer_start_ + buffer_->size() - kGap);
5495  while (pc_ < buffer_start_ + buffer_->size() - kGap) {
5496    nop();
5497  }
5498}
5499
5500UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
5501    : assembler_(assembler),
5502      old_available_(*assembler->GetScratchRegisterList()),
5503      old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
5504
5505UseScratchRegisterScope::~UseScratchRegisterScope() {
5506  *assembler_->GetScratchRegisterList() = old_available_;
5507  *assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
5508}
5509
5510Register UseScratchRegisterScope::Acquire() {
5511  RegList* available = assembler_->GetScratchRegisterList();
5512  DCHECK_NOT_NULL(available);
5513  return available->PopFirst();
5514}
5515
5516LoadStoreLaneParams::LoadStoreLaneParams(MachineRepresentation rep,
5517                                         uint8_t laneidx) {
5518  if (rep == MachineRepresentation::kWord8) {
5519    *this = LoadStoreLaneParams(laneidx, Neon8, 8);
5520  } else if (rep == MachineRepresentation::kWord16) {
5521    *this = LoadStoreLaneParams(laneidx, Neon16, 4);
5522  } else if (rep == MachineRepresentation::kWord32) {
5523    *this = LoadStoreLaneParams(laneidx, Neon32, 2);
5524  } else if (rep == MachineRepresentation::kWord64) {
5525    *this = LoadStoreLaneParams(laneidx, Neon64, 1);
5526  } else {
5527    UNREACHABLE();
5528  }
5529}
5530
5531}  // namespace internal
5532}  // namespace v8
5533
5534#endif  // V8_TARGET_ARCH_ARM
5535