1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_
6#define V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_
7
8#include <deque>
9#include <map>
10#include <memory>
11
12#include "src/base/optional.h"
13#include "src/codegen/arm64/constants-arm64.h"
14#include "src/codegen/arm64/instructions-arm64.h"
15#include "src/codegen/arm64/register-arm64.h"
16#include "src/codegen/assembler.h"
17#include "src/codegen/constant-pool.h"
18#include "src/common/globals.h"
19#include "src/utils/utils.h"
20
21// Windows arm64 SDK defines mvn to NEON intrinsic neon_not which will not
22// be used here.
23#if defined(V8_OS_WIN) && defined(mvn)
24#undef mvn
25#endif
26
27#if defined(V8_OS_WIN)
28#include "src/base/platform/wrappers.h"
29#include "src/diagnostics/unwinding-info-win64.h"
30#endif  // V8_OS_WIN
31
32namespace v8 {
33namespace internal {
34
35class SafepointTableBuilder;
36
37// -----------------------------------------------------------------------------
38// Immediates.
39class Immediate {
40 public:
41  template <typename T>
42  inline explicit Immediate(
43      Handle<T> handle, RelocInfo::Mode mode = RelocInfo::FULL_EMBEDDED_OBJECT);
44
45  // This is allowed to be an implicit constructor because Immediate is
46  // a wrapper class that doesn't normally perform any type conversion.
47  template <typename T>
48  inline Immediate(T value);  // NOLINT(runtime/explicit)
49
50  template <typename T>
51  inline Immediate(T value, RelocInfo::Mode rmode);
52
53  int64_t value() const { return value_; }
54  RelocInfo::Mode rmode() const { return rmode_; }
55
56 private:
57  int64_t value_;
58  RelocInfo::Mode rmode_;
59};
60
61// -----------------------------------------------------------------------------
62// Operands.
63constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize;
64constexpr uint64_t kSmiShiftMask = (1ULL << kSmiShift) - 1;
65
66// Represents an operand in a machine instruction.
67class Operand {
68  // TODO(all): If necessary, study more in details which methods
69  // TODO(all): should be inlined or not.
70 public:
71  // rm, {<shift> {#<shift_amount>}}
72  // where <shift> is one of {LSL, LSR, ASR, ROR}.
73  //       <shift_amount> is uint6_t.
74  // This is allowed to be an implicit constructor because Operand is
75  // a wrapper class that doesn't normally perform any type conversion.
76  inline Operand(Register reg, Shift shift = LSL,
77                 unsigned shift_amount = 0);  // NOLINT(runtime/explicit)
78
79  // rm, <extend> {#<shift_amount>}
80  // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
81  //       <shift_amount> is uint2_t.
82  inline Operand(Register reg, Extend extend, unsigned shift_amount = 0);
83
84  static Operand EmbeddedNumber(double number);  // Smi or HeapNumber.
85  static Operand EmbeddedStringConstant(const StringConstantBase* str);
86
87  inline bool IsHeapObjectRequest() const;
88  inline HeapObjectRequest heap_object_request() const;
89  inline Immediate immediate_for_heap_object_request() const;
90
91  // Implicit constructor for all int types, ExternalReference, and Smi.
92  template <typename T>
93  inline Operand(T t);  // NOLINT(runtime/explicit)
94
95  // Implicit constructor for int types.
96  template <typename T>
97  inline Operand(T t, RelocInfo::Mode rmode);
98
99  inline bool IsImmediate() const;
100  inline bool IsShiftedRegister() const;
101  inline bool IsExtendedRegister() const;
102  inline bool IsZero() const;
103
104  // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
105  // which helps in the encoding of instructions that use the stack pointer.
106  inline Operand ToExtendedRegister() const;
107
108  // Returns new Operand adapted for using with W registers.
109  inline Operand ToW() const;
110
111  inline Immediate immediate() const;
112  inline int64_t ImmediateValue() const;
113  inline RelocInfo::Mode ImmediateRMode() const;
114  inline Register reg() const;
115  inline Shift shift() const;
116  inline Extend extend() const;
117  inline unsigned shift_amount() const;
118
119  // Relocation information.
120  bool NeedsRelocation(const Assembler* assembler) const;
121
122 private:
123  base::Optional<HeapObjectRequest> heap_object_request_;
124  Immediate immediate_;
125  Register reg_;
126  Shift shift_;
127  Extend extend_;
128  unsigned shift_amount_;
129};
130
131// MemOperand represents a memory operand in a load or store instruction.
132class MemOperand {
133 public:
134  inline MemOperand();
135  inline explicit MemOperand(Register base, int64_t offset = 0,
136                             AddrMode addrmode = Offset);
137  inline explicit MemOperand(Register base, Register regoffset,
138                             Shift shift = LSL, unsigned shift_amount = 0);
139  inline explicit MemOperand(Register base, Register regoffset, Extend extend,
140                             unsigned shift_amount = 0);
141  inline explicit MemOperand(Register base, const Operand& offset,
142                             AddrMode addrmode = Offset);
143
144  const Register& base() const { return base_; }
145  const Register& regoffset() const { return regoffset_; }
146  int64_t offset() const { return offset_; }
147  AddrMode addrmode() const { return addrmode_; }
148  Shift shift() const { return shift_; }
149  Extend extend() const { return extend_; }
150  unsigned shift_amount() const { return shift_amount_; }
151  inline bool IsImmediateOffset() const;
152  inline bool IsRegisterOffset() const;
153  inline bool IsPreIndex() const;
154  inline bool IsPostIndex() const;
155
156 private:
157  Register base_;
158  Register regoffset_;
159  int64_t offset_;
160  AddrMode addrmode_;
161  Shift shift_;
162  Extend extend_;
163  unsigned shift_amount_;
164};
165
166// -----------------------------------------------------------------------------
167// Assembler.
168
169class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
170 public:
171  // Create an assembler. Instructions and relocation information are emitted
172  // into a buffer, with the instructions starting from the beginning and the
173  // relocation information starting from the end of the buffer. See CodeDesc
174  // for a detailed comment on the layout (globals.h).
175  //
176  // If the provided buffer is nullptr, the assembler allocates and grows its
177  // own buffer. Otherwise it takes ownership of the provided buffer.
178  explicit Assembler(const AssemblerOptions&,
179                     std::unique_ptr<AssemblerBuffer> = {});
180
181  ~Assembler() override;
182
183  void AbortedCodeGeneration() override;
184
185  // System functions ---------------------------------------------------------
186  // Start generating code from the beginning of the buffer, discarding any code
187  // and data that has already been emitted into the buffer.
188  //
189  // In order to avoid any accidental transfer of state, Reset DCHECKs that the
190  // constant pool is not blocked.
191  void Reset();
192
193  // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
194  static constexpr int kNoHandlerTable = 0;
195  static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
196  void GetCode(Isolate* isolate, CodeDesc* desc,
197               SafepointTableBuilder* safepoint_table_builder,
198               int handler_table_offset);
199
200  // Convenience wrapper for code without safepoint or handler tables.
201  void GetCode(Isolate* isolate, CodeDesc* desc) {
202    GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
203  }
204
205  // Insert the smallest number of nop instructions
206  // possible to align the pc offset to a multiple
207  // of m. m must be a power of 2 (>= 4).
208  void Align(int m);
209  // Insert the smallest number of zero bytes possible to align the pc offset
210  // to a mulitple of m. m must be a power of 2 (>= 2).
211  void DataAlign(int m);
212  // Aligns code to something that's optimal for a jump target for the platform.
213  void CodeTargetAlign();
214  void LoopHeaderAlign() { CodeTargetAlign(); }
215
216  inline void Unreachable();
217
218  // Label --------------------------------------------------------------------
219  // Bind a label to the current pc. Note that labels can only be bound once,
220  // and if labels are linked to other instructions, they _must_ be bound
221  // before they go out of scope.
222  void bind(Label* label);
223
224  // RelocInfo and pools ------------------------------------------------------
225
226  // Record relocation information for current pc_.
227  enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY };
228  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0,
229                       ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY);
230
231  // Generate a B immediate instruction with the corresponding relocation info.
232  // 'offset' is the immediate to encode in the B instruction (so it is the
233  // difference between the target and the PC of the instruction, divided by
234  // the instruction size).
235  void near_jump(int offset, RelocInfo::Mode rmode);
236  // Generate a BL immediate instruction with the corresponding relocation info.
237  // As for near_jump, 'offset' is the immediate to encode in the BL
238  // instruction.
239  void near_call(int offset, RelocInfo::Mode rmode);
240  // Generate a BL immediate instruction with the corresponding relocation info
241  // for the input HeapObjectRequest.
242  void near_call(HeapObjectRequest request);
243
244  // Return the address in the constant pool of the code target address used by
245  // the branch/call instruction at pc.
246  inline static Address target_pointer_address_at(Address pc);
247
248  // Read/Modify the code target address in the branch/call instruction at pc.
249  // The isolate argument is unused (and may be nullptr) when skipping flushing.
250  inline static Address target_address_at(Address pc, Address constant_pool);
251
252  // Read/Modify the code target address in the branch/call instruction at pc.
253  inline static Tagged_t target_compressed_address_at(Address pc,
254                                                      Address constant_pool);
255  inline static void set_target_address_at(
256      Address pc, Address constant_pool, Address target,
257      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
258
259  inline static void set_target_compressed_address_at(
260      Address pc, Address constant_pool, Tagged_t target,
261      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
262
263  // Returns the handle for the code object called at 'pc'.
264  // This might need to be temporarily encoded as an offset into code_targets_.
265  inline Handle<CodeT> code_target_object_handle_at(Address pc);
266  inline EmbeddedObjectIndex embedded_object_index_referenced_from(Address pc);
267  inline void set_embedded_object_index_referenced_from(
268      Address p, EmbeddedObjectIndex index);
269  // Returns the handle for the heap object referenced at 'pc'.
270  inline Handle<HeapObject> target_object_handle_at(Address pc);
271
272  // Returns the target address for a runtime function for the call encoded
273  // at 'pc'.
274  // Runtime entries can be temporarily encoded as the offset between the
275  // runtime function entrypoint and the code range base (stored in the
276  // code_range_base field), in order to be encodable as we generate the code,
277  // before it is moved into the code space.
278  inline Address runtime_entry_at(Address pc);
279
280  // This sets the branch destination. 'location' here can be either the pc of
281  // an immediate branch or the address of an entry in the constant pool.
282  // This is for calls and branches within generated code.
283  inline static void deserialization_set_special_target_at(Address location,
284                                                           Code code,
285                                                           Address target);
286
287  // Get the size of the special target encoded at 'location'.
288  inline static int deserialization_special_target_size(Address location);
289
290  // This sets the internal reference at the pc.
291  inline static void deserialization_set_target_internal_reference_at(
292      Address pc, Address target,
293      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
294
295  // This value is used in the serialization process and must be zero for
296  // ARM64, as the code target is split across multiple instructions and does
297  // not exist separately in the code, so the serializer should not step
298  // forwards in memory after a target is resolved and written.
299  static constexpr int kSpecialTargetSize = 0;
300
301  // Size of the generated code in bytes
302  uint64_t SizeOfGeneratedCode() const {
303    DCHECK((pc_ >= buffer_start_) && (pc_ < (buffer_start_ + buffer_->size())));
304    return pc_ - buffer_start_;
305  }
306
307  // Return the code size generated from label to the current position.
308  uint64_t SizeOfCodeGeneratedSince(const Label* label) {
309    DCHECK(label->is_bound());
310    DCHECK_GE(pc_offset(), label->pos());
311    DCHECK_LT(pc_offset(), buffer_->size());
312    return pc_offset() - label->pos();
313  }
314
315  // Return the number of instructions generated from label to the
316  // current position.
317  uint64_t InstructionsGeneratedSince(const Label* label) {
318    return SizeOfCodeGeneratedSince(label) / kInstrSize;
319  }
320
321  static bool IsConstantPoolAt(Instruction* instr);
322  static int ConstantPoolSizeAt(Instruction* instr);
323  // See Assembler::CheckConstPool for more info.
324  void EmitPoolGuard();
325
326  // Prevent veneer pool emission until EndBlockVeneerPool is called.
327  // Call to this function can be nested but must be followed by an equal
328  // number of calls to EndBlockConstpool.
329  void StartBlockVeneerPool();
330
331  // Resume constant pool emission. Need to be called as many time as
332  // StartBlockVeneerPool to have an effect.
333  void EndBlockVeneerPool();
334
335  bool is_veneer_pool_blocked() const {
336    return veneer_pool_blocked_nesting_ > 0;
337  }
338
339  // Record a deoptimization reason that can be used by a log or cpu profiler.
340  // Use --trace-deopt to enable.
341  void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
342                         SourcePosition position, int id);
343
344  int buffer_space() const;
345
346  // Record the emission of a constant pool.
347  //
348  // The emission of constant and veneer pools depends on the size of the code
349  // generated and the number of RelocInfo recorded.
350  // The Debug mechanism needs to map code offsets between two versions of a
351  // function, compiled with and without debugger support (see for example
352  // Debug::PrepareForBreakPoints()).
353  // Compiling functions with debugger support generates additional code
354  // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools
355  // and cause the version of the code with debugger support to have pools
356  // generated in different places.
357  // Recording the position and size of emitted pools allows to correctly
358  // compute the offset mappings between the different versions of a function in
359  // all situations.
360  //
361  // The parameter indicates the size of the pool (in bytes), including
362  // the marker and branch over the data.
363  void RecordConstPool(int size);
364
365  // Instruction set functions ------------------------------------------------
366
367  // Branch / Jump instructions.
368  // For branches offsets are scaled, i.e. in instructions not in bytes.
369  // Branch to register.
370  void br(const Register& xn);
371
372  // Branch-link to register.
373  void blr(const Register& xn);
374
375  // Branch to register with return hint.
376  void ret(const Register& xn = lr);
377
378  // Unconditional branch to label.
379  void b(Label* label);
380
381  // Conditional branch to label.
382  void b(Label* label, Condition cond);
383
384  // Unconditional branch to PC offset.
385  void b(int imm26);
386
387  // Conditional branch to PC offset.
388  void b(int imm19, Condition cond);
389
390  // Branch-link to label / pc offset.
391  void bl(Label* label);
392  void bl(int imm26);
393
394  // Compare and branch to label / pc offset if zero.
395  void cbz(const Register& rt, Label* label);
396  void cbz(const Register& rt, int imm19);
397
398  // Compare and branch to label / pc offset if not zero.
399  void cbnz(const Register& rt, Label* label);
400  void cbnz(const Register& rt, int imm19);
401
402  // Test bit and branch to label / pc offset if zero.
403  void tbz(const Register& rt, unsigned bit_pos, Label* label);
404  void tbz(const Register& rt, unsigned bit_pos, int imm14);
405
406  // Test bit and branch to label / pc offset if not zero.
407  void tbnz(const Register& rt, unsigned bit_pos, Label* label);
408  void tbnz(const Register& rt, unsigned bit_pos, int imm14);
409
410  // Address calculation instructions.
411  // Calculate a PC-relative address. Unlike for branches the offset in adr is
412  // unscaled (i.e. the result can be unaligned).
413  void adr(const Register& rd, Label* label);
414  void adr(const Register& rd, int imm21);
415
416  // Data Processing instructions.
417  // Add.
418  void add(const Register& rd, const Register& rn, const Operand& operand);
419
420  // Add and update status flags.
421  void adds(const Register& rd, const Register& rn, const Operand& operand);
422
423  // Compare negative.
424  void cmn(const Register& rn, const Operand& operand);
425
426  // Subtract.
427  void sub(const Register& rd, const Register& rn, const Operand& operand);
428
429  // Subtract and update status flags.
430  void subs(const Register& rd, const Register& rn, const Operand& operand);
431
432  // Compare.
433  void cmp(const Register& rn, const Operand& operand);
434
435  // Negate.
436  void neg(const Register& rd, const Operand& operand);
437
438  // Negate and update status flags.
439  void negs(const Register& rd, const Operand& operand);
440
441  // Add with carry bit.
442  void adc(const Register& rd, const Register& rn, const Operand& operand);
443
444  // Add with carry bit and update status flags.
445  void adcs(const Register& rd, const Register& rn, const Operand& operand);
446
447  // Subtract with carry bit.
448  void sbc(const Register& rd, const Register& rn, const Operand& operand);
449
450  // Subtract with carry bit and update status flags.
451  void sbcs(const Register& rd, const Register& rn, const Operand& operand);
452
453  // Negate with carry bit.
454  void ngc(const Register& rd, const Operand& operand);
455
456  // Negate with carry bit and update status flags.
457  void ngcs(const Register& rd, const Operand& operand);
458
459  // Logical instructions.
460  // Bitwise and (A & B).
461  void and_(const Register& rd, const Register& rn, const Operand& operand);
462
463  // Bitwise and (A & B) and update status flags.
464  void ands(const Register& rd, const Register& rn, const Operand& operand);
465
466  // Bit test, and set flags.
467  void tst(const Register& rn, const Operand& operand);
468
469  // Bit clear (A & ~B).
470  void bic(const Register& rd, const Register& rn, const Operand& operand);
471
472  // Bit clear (A & ~B) and update status flags.
473  void bics(const Register& rd, const Register& rn, const Operand& operand);
474
475  // Bitwise and.
476  void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
477
478  // Bit clear immediate.
479  void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
480
481  // Bit clear.
482  void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
483
484  // Bitwise insert if false.
485  void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
486
487  // Bitwise insert if true.
488  void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
489
490  // Bitwise select.
491  void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
492
493  // Polynomial multiply.
494  void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
495
496  // Vector move immediate.
497  void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL,
498            const int shift_amount = 0);
499
500  // Bitwise not.
501  void mvn(const VRegister& vd, const VRegister& vn);
502
503  // Vector move inverted immediate.
504  void mvni(const VRegister& vd, const int imm8, Shift shift = LSL,
505            const int shift_amount = 0);
506
507  // Signed saturating accumulate of unsigned value.
508  void suqadd(const VRegister& vd, const VRegister& vn);
509
510  // Unsigned saturating accumulate of signed value.
511  void usqadd(const VRegister& vd, const VRegister& vn);
512
513  // Absolute value.
514  void abs(const VRegister& vd, const VRegister& vn);
515
516  // Signed saturating absolute value.
517  void sqabs(const VRegister& vd, const VRegister& vn);
518
519  // Negate.
520  void neg(const VRegister& vd, const VRegister& vn);
521
522  // Signed saturating negate.
523  void sqneg(const VRegister& vd, const VRegister& vn);
524
525  // Bitwise not.
526  void not_(const VRegister& vd, const VRegister& vn);
527
528  // Extract narrow.
529  void xtn(const VRegister& vd, const VRegister& vn);
530
531  // Extract narrow (second part).
532  void xtn2(const VRegister& vd, const VRegister& vn);
533
534  // Signed saturating extract narrow.
535  void sqxtn(const VRegister& vd, const VRegister& vn);
536
537  // Signed saturating extract narrow (second part).
538  void sqxtn2(const VRegister& vd, const VRegister& vn);
539
540  // Unsigned saturating extract narrow.
541  void uqxtn(const VRegister& vd, const VRegister& vn);
542
543  // Unsigned saturating extract narrow (second part).
544  void uqxtn2(const VRegister& vd, const VRegister& vn);
545
546  // Signed saturating extract unsigned narrow.
547  void sqxtun(const VRegister& vd, const VRegister& vn);
548
549  // Signed saturating extract unsigned narrow (second part).
550  void sqxtun2(const VRegister& vd, const VRegister& vn);
551
552  // Move register to register.
553  void mov(const VRegister& vd, const VRegister& vn);
554
555  // Bitwise not or.
556  void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
557
558  // Bitwise exclusive or.
559  void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
560
561  // Bitwise or (A | B).
562  void orr(const Register& rd, const Register& rn, const Operand& operand);
563
564  // Bitwise or.
565  void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566
567  // Bitwise or immediate.
568  void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
569
570  // Bitwise nor (A | ~B).
571  void orn(const Register& rd, const Register& rn, const Operand& operand);
572
573  // Bitwise eor/xor (A ^ B).
574  void eor(const Register& rd, const Register& rn, const Operand& operand);
575
576  // Bitwise enor/xnor (A ^ ~B).
577  void eon(const Register& rd, const Register& rn, const Operand& operand);
578
579  // Logical shift left variable.
580  void lslv(const Register& rd, const Register& rn, const Register& rm);
581
582  // Logical shift right variable.
583  void lsrv(const Register& rd, const Register& rn, const Register& rm);
584
585  // Arithmetic shift right variable.
586  void asrv(const Register& rd, const Register& rn, const Register& rm);
587
588  // Rotate right variable.
589  void rorv(const Register& rd, const Register& rn, const Register& rm);
590
591  // Bitfield instructions.
592  // Bitfield move.
593  void bfm(const Register& rd, const Register& rn, int immr, int imms);
594
595  // Signed bitfield move.
596  void sbfm(const Register& rd, const Register& rn, int immr, int imms);
597
598  // Unsigned bitfield move.
599  void ubfm(const Register& rd, const Register& rn, int immr, int imms);
600
601  // Bfm aliases.
602  // Bitfield insert.
603  void bfi(const Register& rd, const Register& rn, int lsb, int width) {
604    DCHECK_GE(width, 1);
605    DCHECK(lsb + width <= rn.SizeInBits());
606    bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
607  }
608
609  // Bitfield extract and insert low.
610  void bfxil(const Register& rd, const Register& rn, int lsb, int width) {
611    DCHECK_GE(width, 1);
612    DCHECK(lsb + width <= rn.SizeInBits());
613    bfm(rd, rn, lsb, lsb + width - 1);
614  }
615
616  // Sbfm aliases.
617  // Arithmetic shift right.
618  void asr(const Register& rd, const Register& rn, int shift) {
619    DCHECK(shift < rd.SizeInBits());
620    sbfm(rd, rn, shift, rd.SizeInBits() - 1);
621  }
622
623  // Signed bitfield insert in zero.
624  void sbfiz(const Register& rd, const Register& rn, int lsb, int width) {
625    DCHECK_GE(width, 1);
626    DCHECK(lsb + width <= rn.SizeInBits());
627    sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
628  }
629
630  // Signed bitfield extract.
631  void sbfx(const Register& rd, const Register& rn, int lsb, int width) {
632    DCHECK_GE(width, 1);
633    DCHECK(lsb + width <= rn.SizeInBits());
634    sbfm(rd, rn, lsb, lsb + width - 1);
635  }
636
637  // Signed extend byte.
638  void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
639
640  // Signed extend halfword.
641  void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
642
643  // Signed extend word.
644  void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
645
646  // Ubfm aliases.
647  // Logical shift left.
648  void lsl(const Register& rd, const Register& rn, int shift) {
649    int reg_size = rd.SizeInBits();
650    DCHECK(shift < reg_size);
651    ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
652  }
653
654  // Logical shift right.
655  void lsr(const Register& rd, const Register& rn, int shift) {
656    DCHECK(shift < rd.SizeInBits());
657    ubfm(rd, rn, shift, rd.SizeInBits() - 1);
658  }
659
660  // Unsigned bitfield insert in zero.
661  void ubfiz(const Register& rd, const Register& rn, int lsb, int width) {
662    DCHECK_GE(width, 1);
663    DCHECK(lsb + width <= rn.SizeInBits());
664    ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
665  }
666
667  // Unsigned bitfield extract.
668  void ubfx(const Register& rd, const Register& rn, int lsb, int width) {
669    DCHECK_GE(width, 1);
670    DCHECK(lsb + width <= rn.SizeInBits());
671    ubfm(rd, rn, lsb, lsb + width - 1);
672  }
673
674  // Unsigned extend byte.
675  void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
676
677  // Unsigned extend halfword.
678  void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
679
680  // Unsigned extend word.
681  void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
682
683  // Extract.
684  void extr(const Register& rd, const Register& rn, const Register& rm,
685            int lsb);
686
687  // Conditional select: rd = cond ? rn : rm.
688  void csel(const Register& rd, const Register& rn, const Register& rm,
689            Condition cond);
690
691  // Conditional select increment: rd = cond ? rn : rm + 1.
692  void csinc(const Register& rd, const Register& rn, const Register& rm,
693             Condition cond);
694
695  // Conditional select inversion: rd = cond ? rn : ~rm.
696  void csinv(const Register& rd, const Register& rn, const Register& rm,
697             Condition cond);
698
699  // Conditional select negation: rd = cond ? rn : -rm.
700  void csneg(const Register& rd, const Register& rn, const Register& rm,
701             Condition cond);
702
703  // Conditional set: rd = cond ? 1 : 0.
704  void cset(const Register& rd, Condition cond);
705
706  // Conditional set minus: rd = cond ? -1 : 0.
707  void csetm(const Register& rd, Condition cond);
708
709  // Conditional increment: rd = cond ? rn + 1 : rn.
710  void cinc(const Register& rd, const Register& rn, Condition cond);
711
712  // Conditional invert: rd = cond ? ~rn : rn.
713  void cinv(const Register& rd, const Register& rn, Condition cond);
714
715  // Conditional negate: rd = cond ? -rn : rn.
716  void cneg(const Register& rd, const Register& rn, Condition cond);
717
718  // Extr aliases.
719  void ror(const Register& rd, const Register& rs, unsigned shift) {
720    extr(rd, rs, rs, shift);
721  }
722
723  // Conditional comparison.
724  // Conditional compare negative.
725  void ccmn(const Register& rn, const Operand& operand, StatusFlags nzcv,
726            Condition cond);
727
728  // Conditional compare.
729  void ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv,
730            Condition cond);
731
732  // Multiplication.
733  // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply.
734  void mul(const Register& rd, const Register& rn, const Register& rm);
735
736  // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate.
737  void madd(const Register& rd, const Register& rn, const Register& rm,
738            const Register& ra);
739
740  // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply.
741  void mneg(const Register& rd, const Register& rn, const Register& rm);
742
743  // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract.
744  void msub(const Register& rd, const Register& rn, const Register& rm,
745            const Register& ra);
746
747  // 32 x 32 -> 64-bit multiply.
748  void smull(const Register& rd, const Register& rn, const Register& rm);
749
750  // Xd = bits<127:64> of Xn * Xm.
751  void smulh(const Register& rd, const Register& rn, const Register& rm);
752
753  // Signed 32 x 32 -> 64-bit multiply and accumulate.
754  void smaddl(const Register& rd, const Register& rn, const Register& rm,
755              const Register& ra);
756
757  // Unsigned 32 x 32 -> 64-bit multiply and accumulate.
758  void umaddl(const Register& rd, const Register& rn, const Register& rm,
759              const Register& ra);
760
761  // Signed 32 x 32 -> 64-bit multiply and subtract.
762  void smsubl(const Register& rd, const Register& rn, const Register& rm,
763              const Register& ra);
764
765  // Unsigned 32 x 32 -> 64-bit multiply and subtract.
766  void umsubl(const Register& rd, const Register& rn, const Register& rm,
767              const Register& ra);
768
769  // Signed integer divide.
770  void sdiv(const Register& rd, const Register& rn, const Register& rm);
771
772  // Unsigned integer divide.
773  void udiv(const Register& rd, const Register& rn, const Register& rm);
774
775  // Bit count, bit reverse and endian reverse.
776  void rbit(const Register& rd, const Register& rn);
777  void rev16(const Register& rd, const Register& rn);
778  void rev32(const Register& rd, const Register& rn);
779  void rev(const Register& rd, const Register& rn);
780  void clz(const Register& rd, const Register& rn);
781  void cls(const Register& rd, const Register& rn);
782
783  // Pointer Authentication Code for Instruction address, using key B, with
784  // address in x17 and modifier in x16 [Armv8.3].
785  void pacib1716();
786
787  // Pointer Authentication Code for Instruction address, using key B, with
788  // address in LR and modifier in SP [Armv8.3].
789  void pacibsp();
790
791  // Authenticate Instruction address, using key B, with address in x17 and
792  // modifier in x16 [Armv8.3].
793  void autib1716();
794
795  // Authenticate Instruction address, using key B, with address in LR and
796  // modifier in SP [Armv8.3].
797  void autibsp();
798
799  // Memory instructions.
800
801  // Load integer or FP register.
802  void ldr(const CPURegister& rt, const MemOperand& src);
803
804  // Store integer or FP register.
805  void str(const CPURegister& rt, const MemOperand& dst);
806
807  // Load word with sign extension.
808  void ldrsw(const Register& rt, const MemOperand& src);
809
810  // Load byte.
811  void ldrb(const Register& rt, const MemOperand& src);
812
813  // Store byte.
814  void strb(const Register& rt, const MemOperand& dst);
815
816  // Load byte with sign extension.
817  void ldrsb(const Register& rt, const MemOperand& src);
818
819  // Load half-word.
820  void ldrh(const Register& rt, const MemOperand& src);
821
822  // Store half-word.
823  void strh(const Register& rt, const MemOperand& dst);
824
825  // Load half-word with sign extension.
826  void ldrsh(const Register& rt, const MemOperand& src);
827
828  // Load integer or FP register pair.
829  void ldp(const CPURegister& rt, const CPURegister& rt2,
830           const MemOperand& src);
831
832  // Store integer or FP register pair.
833  void stp(const CPURegister& rt, const CPURegister& rt2,
834           const MemOperand& dst);
835
836  // Load word pair with sign extension.
837  void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
838
839  // Load literal to register from a pc relative address.
840  void ldr_pcrel(const CPURegister& rt, int imm19);
841
842  // Load literal to register.
843  void ldr(const CPURegister& rt, const Immediate& imm);
844  void ldr(const CPURegister& rt, const Operand& operand);
845
846  // Load-acquire word.
847  void ldar(const Register& rt, const Register& rn);
848
849  // Load-acquire exclusive word.
850  void ldaxr(const Register& rt, const Register& rn);
851
852  // Store-release word.
853  void stlr(const Register& rt, const Register& rn);
854
855  // Store-release exclusive word.
856  void stlxr(const Register& rs, const Register& rt, const Register& rn);
857
858  // Load-acquire byte.
859  void ldarb(const Register& rt, const Register& rn);
860
861  // Load-acquire exclusive byte.
862  void ldaxrb(const Register& rt, const Register& rn);
863
864  // Store-release byte.
865  void stlrb(const Register& rt, const Register& rn);
866
867  // Store-release exclusive byte.
868  void stlxrb(const Register& rs, const Register& rt, const Register& rn);
869
870  // Load-acquire half-word.
871  void ldarh(const Register& rt, const Register& rn);
872
873  // Load-acquire exclusive half-word.
874  void ldaxrh(const Register& rt, const Register& rn);
875
876  // Store-release half-word.
877  void stlrh(const Register& rt, const Register& rn);
878
879  // Store-release exclusive half-word.
880  void stlxrh(const Register& rs, const Register& rt, const Register& rn);
881
882  // Move instructions. The default shift of -1 indicates that the move
883  // instruction will calculate an appropriate 16-bit immediate and left shift
884  // that is equal to the 64-bit immediate argument. If an explicit left shift
885  // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
886  //
887  // For movk, an explicit shift can be used to indicate which half word should
888  // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
889  // half word with zero, whereas movk(x0, 0, 48) will overwrite the
890  // most-significant.
891
892  // Move and keep.
893  void movk(const Register& rd, uint64_t imm, int shift = -1) {
894    MoveWide(rd, imm, shift, MOVK);
895  }
896
897  // Move with non-zero.
898  void movn(const Register& rd, uint64_t imm, int shift = -1) {
899    MoveWide(rd, imm, shift, MOVN);
900  }
901
902  // Move with zero.
903  void movz(const Register& rd, uint64_t imm, int shift = -1) {
904    MoveWide(rd, imm, shift, MOVZ);
905  }
906
907  // Misc instructions.
908  // Monitor debug-mode breakpoint.
909  void brk(int code);
910
911  // Halting debug-mode breakpoint.
912  void hlt(int code);
913
914  // Move register to register.
915  void mov(const Register& rd, const Register& rn);
916
917  // Move NOT(operand) to register.
918  void mvn(const Register& rd, const Operand& operand);
919
920  // System instructions.
921  // Move to register from system register.
922  void mrs(const Register& rt, SystemRegister sysreg);
923
924  // Move from register to system register.
925  void msr(SystemRegister sysreg, const Register& rt);
926
927  // System hint.
928  void hint(SystemHint code);
929
930  // Data memory barrier
931  void dmb(BarrierDomain domain, BarrierType type);
932
933  // Data synchronization barrier
934  void dsb(BarrierDomain domain, BarrierType type);
935
936  // Instruction synchronization barrier
937  void isb();
938
939  // Conditional speculation barrier.
940  void csdb();
941
942  // Branch target identification.
943  void bti(BranchTargetIdentifier id);
944
945  // No-op.
946  void nop() { hint(NOP); }
947
948  // Different nop operations are used by the code generator to detect certain
949  // states of the generated code.
950  enum NopMarkerTypes {
951    DEBUG_BREAK_NOP,
952    INTERRUPT_CODE_NOP,
953    ADR_FAR_NOP,
954    FIRST_NOP_MARKER = DEBUG_BREAK_NOP,
955    LAST_NOP_MARKER = ADR_FAR_NOP
956  };
957
958  void nop(NopMarkerTypes n);
959
960  // Add.
961  void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
962
963  // Unsigned halving add.
964  void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
965
966  // Subtract.
967  void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
968
969  // Signed halving add.
970  void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
971
972  // Multiply by scalar element.
973  void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm,
974           int vm_index);
975
976  // Multiply-add by scalar element.
977  void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm,
978           int vm_index);
979
980  // Multiply-subtract by scalar element.
981  void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm,
982           int vm_index);
983
984  // Signed long multiply-add by scalar element.
985  void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
986             int vm_index);
987
988  // Signed long multiply-add by scalar element (second part).
989  void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
990              int vm_index);
991
992  // Unsigned long multiply-add by scalar element.
993  void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
994             int vm_index);
995
996  // Unsigned long multiply-add by scalar element (second part).
997  void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
998              int vm_index);
999
1000  // Signed long multiply-sub by scalar element.
1001  void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1002             int vm_index);
1003
1004  // Signed long multiply-sub by scalar element (second part).
1005  void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1006              int vm_index);
1007
1008  // Unsigned long multiply-sub by scalar element.
1009  void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1010             int vm_index);
1011
1012  // Unsigned long multiply-sub by scalar element (second part).
1013  void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1014              int vm_index);
1015
1016  // Signed long multiply by scalar element.
1017  void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1018             int vm_index);
1019
1020  // Signed long multiply by scalar element (second part).
1021  void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1022              int vm_index);
1023
1024  // Unsigned long multiply by scalar element.
1025  void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1026             int vm_index);
1027
1028  // Unsigned long multiply by scalar element (second part).
1029  void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1030              int vm_index);
1031
1032  // Add narrow returning high half.
1033  void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1034
1035  // Add narrow returning high half (second part).
1036  void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1037
1038  // Signed saturating double long multiply by element.
1039  void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1040               int vm_index);
1041
1042  // Signed saturating double long multiply by element (second part).
1043  void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1044                int vm_index);
1045
1046  // Signed saturating doubling long multiply-add by element.
1047  void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1048               int vm_index);
1049
1050  // Signed saturating doubling long multiply-add by element (second part).
1051  void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1052                int vm_index);
1053
1054  // Signed saturating doubling long multiply-sub by element.
1055  void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1056               int vm_index);
1057
1058  // Signed saturating doubling long multiply-sub by element (second part).
1059  void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1060                int vm_index);
1061
1062  // Compare bitwise to zero.
1063  void cmeq(const VRegister& vd, const VRegister& vn, int value);
1064
1065  // Compare signed greater than or equal to zero.
1066  void cmge(const VRegister& vd, const VRegister& vn, int value);
1067
1068  // Compare signed greater than zero.
1069  void cmgt(const VRegister& vd, const VRegister& vn, int value);
1070
1071  // Compare signed less than or equal to zero.
1072  void cmle(const VRegister& vd, const VRegister& vn, int value);
1073
1074  // Compare signed less than zero.
1075  void cmlt(const VRegister& vd, const VRegister& vn, int value);
1076
1077  // Unsigned rounding halving add.
1078  void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1079
1080  // Compare equal.
1081  void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1082
1083  // Compare signed greater than or equal.
1084  void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1085
1086  // Compare signed greater than.
1087  void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1088
1089  // Compare unsigned higher.
1090  void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1091
1092  // Compare unsigned higher or same.
1093  void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1094
1095  // Compare bitwise test bits nonzero.
1096  void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1097
1098  // Signed shift left by register.
1099  void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1100
1101  // Unsigned shift left by register.
1102  void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1103
1104  // Signed saturating doubling long multiply-subtract.
1105  void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1106
1107  // Signed saturating doubling long multiply-subtract (second part).
1108  void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1109
1110  // Signed saturating doubling long multiply.
1111  void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1112
1113  // Signed saturating doubling long multiply (second part).
1114  void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1115
1116  // Signed saturating doubling multiply returning high half.
1117  void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1118
1119  // Signed saturating rounding doubling multiply returning high half.
1120  void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1121
1122  // Signed saturating doubling multiply element returning high half.
1123  void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1124               int vm_index);
1125
1126  // Signed saturating rounding doubling multiply element returning high half.
1127  void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1128                int vm_index);
1129
1130  // Unsigned long multiply long.
1131  void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1132
1133  // Unsigned long multiply (second part).
1134  void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1135
1136  // Rounding add narrow returning high half.
1137  void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1138
1139  // Subtract narrow returning high half.
1140  void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1141
1142  // Subtract narrow returning high half (second part).
1143  void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1144
1145  // Rounding add narrow returning high half (second part).
1146  void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1147
1148  // Rounding subtract narrow returning high half.
1149  void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1150
1151  // Rounding subtract narrow returning high half (second part).
1152  void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1153
1154  // Signed saturating shift left by register.
1155  void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1156
1157  // Unsigned saturating shift left by register.
1158  void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1159
1160  // Signed rounding shift left by register.
1161  void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1162
1163  // Unsigned rounding shift left by register.
1164  void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1165
1166  // Signed saturating rounding shift left by register.
1167  void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1168
1169  // Unsigned saturating rounding shift left by register.
1170  void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1171
1172  // Signed absolute difference.
1173  void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1174
1175  // Unsigned absolute difference and accumulate.
1176  void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1177
1178  // Shift left by immediate and insert.
1179  void sli(const VRegister& vd, const VRegister& vn, int shift);
1180
1181  // Shift right by immediate and insert.
1182  void sri(const VRegister& vd, const VRegister& vn, int shift);
1183
1184  // Signed maximum.
1185  void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1186
1187  // Signed pairwise maximum.
1188  void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1189
1190  // Add across vector.
1191  void addv(const VRegister& vd, const VRegister& vn);
1192
1193  // Signed add long across vector.
1194  void saddlv(const VRegister& vd, const VRegister& vn);
1195
1196  // Unsigned add long across vector.
1197  void uaddlv(const VRegister& vd, const VRegister& vn);
1198
1199  // FP maximum number across vector.
1200  void fmaxnmv(const VRegister& vd, const VRegister& vn);
1201
1202  // FP maximum across vector.
1203  void fmaxv(const VRegister& vd, const VRegister& vn);
1204
1205  // FP minimum number across vector.
1206  void fminnmv(const VRegister& vd, const VRegister& vn);
1207
1208  // FP minimum across vector.
1209  void fminv(const VRegister& vd, const VRegister& vn);
1210
1211  // Signed maximum across vector.
1212  void smaxv(const VRegister& vd, const VRegister& vn);
1213
1214  // Signed minimum.
1215  void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1216
1217  // Signed minimum pairwise.
1218  void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1219
1220  // Signed minimum across vector.
1221  void sminv(const VRegister& vd, const VRegister& vn);
1222
1223  // One-element structure store from one register.
1224  void st1(const VRegister& vt, const MemOperand& src);
1225
1226  // One-element structure store from two registers.
1227  void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1228
1229  // One-element structure store from three registers.
1230  void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1231           const MemOperand& src);
1232
1233  // One-element structure store from four registers.
1234  void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1235           const VRegister& vt4, const MemOperand& src);
1236
1237  // One-element single structure store from one lane.
1238  void st1(const VRegister& vt, int lane, const MemOperand& src);
1239
1240  // Two-element structure store from two registers.
1241  void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1242
1243  // Two-element single structure store from two lanes.
1244  void st2(const VRegister& vt, const VRegister& vt2, int lane,
1245           const MemOperand& src);
1246
1247  // Three-element structure store from three registers.
1248  void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1249           const MemOperand& src);
1250
1251  // Three-element single structure store from three lanes.
1252  void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1253           int lane, const MemOperand& src);
1254
1255  // Four-element structure store from four registers.
1256  void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1257           const VRegister& vt4, const MemOperand& src);
1258
1259  // Four-element single structure store from four lanes.
1260  void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1261           const VRegister& vt4, int lane, const MemOperand& src);
1262
1263  // Unsigned add long.
1264  void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1265
1266  // Unsigned add long (second part).
1267  void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1268
1269  // Unsigned add wide.
1270  void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1271
1272  // Unsigned add wide (second part).
1273  void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1274
1275  // Signed add long.
1276  void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1277
1278  // Signed add long (second part).
1279  void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1280
1281  // Signed add wide.
1282  void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1283
1284  // Signed add wide (second part).
1285  void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1286
1287  // Unsigned subtract long.
1288  void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1289
1290  // Unsigned subtract long (second part).
1291  void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1292
1293  // Unsigned subtract wide.
1294  void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1295
1296  // Signed subtract long.
1297  void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1298
1299  // Signed subtract long (second part).
1300  void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1301
1302  // Signed integer subtract wide.
1303  void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1304
1305  // Signed integer subtract wide (second part).
1306  void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1307
1308  // Unsigned subtract wide (second part).
1309  void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1310
1311  // Unsigned maximum.
1312  void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1313
1314  // Unsigned pairwise maximum.
1315  void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1316
1317  // Unsigned maximum across vector.
1318  void umaxv(const VRegister& vd, const VRegister& vn);
1319
1320  // Unsigned minimum.
1321  void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1322
1323  // Unsigned pairwise minimum.
1324  void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1325
1326  // Unsigned minimum across vector.
1327  void uminv(const VRegister& vd, const VRegister& vn);
1328
1329  // Transpose vectors (primary).
1330  void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1331
1332  // Transpose vectors (secondary).
1333  void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1334
1335  // Unzip vectors (primary).
1336  void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1337
1338  // Unzip vectors (secondary).
1339  void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1340
1341  // Zip vectors (primary).
1342  void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1343
1344  // Zip vectors (secondary).
1345  void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1346
1347  // Signed shift right by immediate.
1348  void sshr(const VRegister& vd, const VRegister& vn, int shift);
1349
1350  // Unsigned shift right by immediate.
1351  void ushr(const VRegister& vd, const VRegister& vn, int shift);
1352
1353  // Signed rounding shift right by immediate.
1354  void srshr(const VRegister& vd, const VRegister& vn, int shift);
1355
1356  // Unsigned rounding shift right by immediate.
1357  void urshr(const VRegister& vd, const VRegister& vn, int shift);
1358
1359  // Signed shift right by immediate and accumulate.
1360  void ssra(const VRegister& vd, const VRegister& vn, int shift);
1361
1362  // Unsigned shift right by immediate and accumulate.
1363  void usra(const VRegister& vd, const VRegister& vn, int shift);
1364
1365  // Signed rounding shift right by immediate and accumulate.
1366  void srsra(const VRegister& vd, const VRegister& vn, int shift);
1367
1368  // Unsigned rounding shift right by immediate and accumulate.
1369  void ursra(const VRegister& vd, const VRegister& vn, int shift);
1370
1371  // Shift right narrow by immediate.
1372  void shrn(const VRegister& vd, const VRegister& vn, int shift);
1373
1374  // Shift right narrow by immediate (second part).
1375  void shrn2(const VRegister& vd, const VRegister& vn, int shift);
1376
1377  // Rounding shift right narrow by immediate.
1378  void rshrn(const VRegister& vd, const VRegister& vn, int shift);
1379
1380  // Rounding shift right narrow by immediate (second part).
1381  void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
1382
1383  // Unsigned saturating shift right narrow by immediate.
1384  void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
1385
1386  // Unsigned saturating shift right narrow by immediate (second part).
1387  void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
1388
1389  // Unsigned saturating rounding shift right narrow by immediate.
1390  void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
1391
1392  // Unsigned saturating rounding shift right narrow by immediate (second part).
1393  void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
1394
1395  // Signed saturating shift right narrow by immediate.
1396  void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
1397
1398  // Signed saturating shift right narrow by immediate (second part).
1399  void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
1400
1401  // Signed saturating rounded shift right narrow by immediate.
1402  void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
1403
1404  // Signed saturating rounded shift right narrow by immediate (second part).
1405  void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
1406
1407  // Signed saturating shift right unsigned narrow by immediate.
1408  void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
1409
1410  // Signed saturating shift right unsigned narrow by immediate (second part).
1411  void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
1412
1413  // Signed sat rounded shift right unsigned narrow by immediate.
1414  void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
1415
1416  // Signed sat rounded shift right unsigned narrow by immediate (second part).
1417  void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
1418
1419  // FP reciprocal step.
1420  void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1421
1422  // FP reciprocal estimate.
1423  void frecpe(const VRegister& vd, const VRegister& vn);
1424
1425  // FP reciprocal square root estimate.
1426  void frsqrte(const VRegister& vd, const VRegister& vn);
1427
1428  // FP reciprocal square root step.
1429  void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1430
1431  // Signed absolute difference and accumulate long.
1432  void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1433
1434  // Signed absolute difference and accumulate long (second part).
1435  void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1436
1437  // Unsigned absolute difference and accumulate long.
1438  void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1439
1440  // Unsigned absolute difference and accumulate long (second part).
1441  void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1442
1443  // Signed absolute difference long.
1444  void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1445
1446  // Signed absolute difference long (second part).
1447  void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1448
1449  // Unsigned absolute difference long.
1450  void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1451
1452  // Unsigned absolute difference long (second part).
1453  void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1454
1455  // Polynomial multiply long.
1456  void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1457
1458  // Polynomial multiply long (second part).
1459  void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1460
1461  // Signed long multiply-add.
1462  void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1463
1464  // Signed long multiply-add (second part).
1465  void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1466
1467  // Unsigned long multiply-add.
1468  void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1469
1470  // Unsigned long multiply-add (second part).
1471  void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1472
1473  // Signed long multiply-sub.
1474  void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1475
1476  // Signed long multiply-sub (second part).
1477  void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1478
1479  // Unsigned long multiply-sub.
1480  void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1481
1482  // Unsigned long multiply-sub (second part).
1483  void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1484
1485  // Signed long multiply.
1486  void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1487
1488  // Signed long multiply (second part).
1489  void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1490
1491  // Signed saturating doubling long multiply-add.
1492  void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1493
1494  // Signed saturating doubling long multiply-add (second part).
1495  void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1496
1497  // Unsigned absolute difference.
1498  void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1499
1500  // Signed absolute difference and accumulate.
1501  void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1502
1503  // FP instructions.
1504  // Move immediate to FP register.
1505  void fmov(const VRegister& fd, double imm);
1506  void fmov(const VRegister& fd, float imm);
1507
1508  // Move FP register to register.
1509  void fmov(const Register& rd, const VRegister& fn);
1510
1511  // Move register to FP register.
1512  void fmov(const VRegister& fd, const Register& rn);
1513
1514  // Move FP register to FP register.
1515  void fmov(const VRegister& fd, const VRegister& fn);
1516
1517  // Move 64-bit register to top half of 128-bit FP register.
1518  void fmov(const VRegister& vd, int index, const Register& rn);
1519
1520  // Move top half of 128-bit FP register to 64-bit register.
1521  void fmov(const Register& rd, const VRegister& vn, int index);
1522
1523  // FP add.
1524  void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1525
1526  // FP subtract.
1527  void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1528
1529  // FP multiply.
1530  void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1531
1532  // FP compare equal to zero.
1533  void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
1534
1535  // FP greater than zero.
1536  void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
1537
1538  // FP greater than or equal to zero.
1539  void fcmge(const VRegister& vd, const VRegister& vn, double imm);
1540
1541  // FP less than or equal to zero.
1542  void fcmle(const VRegister& vd, const VRegister& vn, double imm);
1543
1544  // FP less than to zero.
1545  void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
1546
1547  // FP absolute difference.
1548  void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1549
1550  // FP pairwise add vector.
1551  void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1552
1553  // FP pairwise add scalar.
1554  void faddp(const VRegister& vd, const VRegister& vn);
1555
1556  // FP pairwise maximum scalar.
1557  void fmaxp(const VRegister& vd, const VRegister& vn);
1558
1559  // FP pairwise maximum number scalar.
1560  void fmaxnmp(const VRegister& vd, const VRegister& vn);
1561
1562  // FP pairwise minimum number scalar.
1563  void fminnmp(const VRegister& vd, const VRegister& vn);
1564
1565  // FP vector multiply accumulate.
1566  void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1567
1568  // FP vector multiply subtract.
1569  void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1570
1571  // FP vector multiply extended.
1572  void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1573
1574  // FP absolute greater than or equal.
1575  void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1576
1577  // FP absolute greater than.
1578  void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1579
1580  // FP multiply by element.
1581  void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1582            int vm_index);
1583
1584  // FP fused multiply-add to accumulator by element.
1585  void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1586            int vm_index);
1587
1588  // FP fused multiply-sub from accumulator by element.
1589  void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1590            int vm_index);
1591
1592  // FP multiply extended by element.
1593  void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1594             int vm_index);
1595
1596  // FP compare equal.
1597  void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1598
1599  // FP greater than.
1600  void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1601
1602  // FP greater than or equal.
1603  void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1604
1605  // FP pairwise maximum vector.
1606  void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1607
1608  // FP pairwise minimum vector.
1609  void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1610
1611  // FP pairwise minimum scalar.
1612  void fminp(const VRegister& vd, const VRegister& vn);
1613
1614  // FP pairwise maximum number vector.
1615  void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1616
1617  // FP pairwise minimum number vector.
1618  void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1619
1620  // FP fused multiply-add.
1621  void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1622             const VRegister& va);
1623
1624  // FP fused multiply-subtract.
1625  void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1626             const VRegister& va);
1627
1628  // FP fused multiply-add and negate.
1629  void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1630              const VRegister& va);
1631
1632  // FP fused multiply-subtract and negate.
1633  void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1634              const VRegister& va);
1635
1636  // FP multiply-negate scalar.
1637  void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1638
1639  // FP reciprocal exponent scalar.
1640  void frecpx(const VRegister& vd, const VRegister& vn);
1641
1642  // FP divide.
1643  void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1644
1645  // FP maximum.
1646  void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1647
1648  // FP minimum.
1649  void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1650
1651  // FP maximum.
1652  void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1653
1654  // FP minimum.
1655  void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1656
1657  // FP absolute.
1658  void fabs(const VRegister& vd, const VRegister& vn);
1659
1660  // FP negate.
1661  void fneg(const VRegister& vd, const VRegister& vn);
1662
1663  // FP square root.
1664  void fsqrt(const VRegister& vd, const VRegister& vn);
1665
1666  // FP round to integer nearest with ties to away.
1667  void frinta(const VRegister& vd, const VRegister& vn);
1668
1669  // FP round to integer, implicit rounding.
1670  void frinti(const VRegister& vd, const VRegister& vn);
1671
1672  // FP round to integer toward minus infinity.
1673  void frintm(const VRegister& vd, const VRegister& vn);
1674
1675  // FP round to integer nearest with ties to even.
1676  void frintn(const VRegister& vd, const VRegister& vn);
1677
1678  // FP round to integer towards plus infinity.
1679  void frintp(const VRegister& vd, const VRegister& vn);
1680
1681  // FP round to integer, exact, implicit rounding.
1682  void frintx(const VRegister& vd, const VRegister& vn);
1683
1684  // FP round to integer towards zero.
1685  void frintz(const VRegister& vd, const VRegister& vn);
1686
1687  // FP compare registers.
1688  void fcmp(const VRegister& vn, const VRegister& vm);
1689
1690  // FP compare immediate.
1691  void fcmp(const VRegister& vn, double value);
1692
1693  // FP conditional compare.
1694  void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv,
1695             Condition cond);
1696
1697  // FP conditional select.
1698  void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1699             Condition cond);
1700
1701  // Common FP Convert functions.
1702  void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
1703  void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
1704
1705  // FP convert between precisions.
1706  void fcvt(const VRegister& vd, const VRegister& vn);
1707
1708  // FP convert to higher precision.
1709  void fcvtl(const VRegister& vd, const VRegister& vn);
1710
1711  // FP convert to higher precision (second part).
1712  void fcvtl2(const VRegister& vd, const VRegister& vn);
1713
1714  // FP convert to lower precision.
1715  void fcvtn(const VRegister& vd, const VRegister& vn);
1716
1717  // FP convert to lower prevision (second part).
1718  void fcvtn2(const VRegister& vd, const VRegister& vn);
1719
1720  // FP convert to lower precision, rounding to odd.
1721  void fcvtxn(const VRegister& vd, const VRegister& vn);
1722
1723  // FP convert to lower precision, rounding to odd (second part).
1724  void fcvtxn2(const VRegister& vd, const VRegister& vn);
1725
1726  // FP convert to signed integer, nearest with ties to away.
1727  void fcvtas(const Register& rd, const VRegister& vn);
1728
1729  // FP convert to unsigned integer, nearest with ties to away.
1730  void fcvtau(const Register& rd, const VRegister& vn);
1731
1732  // FP convert to signed integer, nearest with ties to away.
1733  void fcvtas(const VRegister& vd, const VRegister& vn);
1734
1735  // FP convert to unsigned integer, nearest with ties to away.
1736  void fcvtau(const VRegister& vd, const VRegister& vn);
1737
1738  // FP convert to signed integer, round towards -infinity.
1739  void fcvtms(const Register& rd, const VRegister& vn);
1740
1741  // FP convert to unsigned integer, round towards -infinity.
1742  void fcvtmu(const Register& rd, const VRegister& vn);
1743
1744  // FP convert to signed integer, round towards -infinity.
1745  void fcvtms(const VRegister& vd, const VRegister& vn);
1746
1747  // FP convert to unsigned integer, round towards -infinity.
1748  void fcvtmu(const VRegister& vd, const VRegister& vn);
1749
1750  // FP convert to signed integer, nearest with ties to even.
1751  void fcvtns(const Register& rd, const VRegister& vn);
1752
1753  // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
1754  void fjcvtzs(const Register& rd, const VRegister& vn);
1755
1756  // FP convert to unsigned integer, nearest with ties to even.
1757  void fcvtnu(const Register& rd, const VRegister& vn);
1758
1759  // FP convert to signed integer, nearest with ties to even.
1760  void fcvtns(const VRegister& rd, const VRegister& vn);
1761
1762  // FP convert to unsigned integer, nearest with ties to even.
1763  void fcvtnu(const VRegister& rd, const VRegister& vn);
1764
1765  // FP convert to signed integer or fixed-point, round towards zero.
1766  void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
1767
1768  // FP convert to unsigned integer or fixed-point, round towards zero.
1769  void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
1770
1771  // FP convert to signed integer or fixed-point, round towards zero.
1772  void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
1773
1774  // FP convert to unsigned integer or fixed-point, round towards zero.
1775  void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
1776
1777  // FP convert to signed integer, round towards +infinity.
1778  void fcvtps(const Register& rd, const VRegister& vn);
1779
1780  // FP convert to unsigned integer, round towards +infinity.
1781  void fcvtpu(const Register& rd, const VRegister& vn);
1782
1783  // FP convert to signed integer, round towards +infinity.
1784  void fcvtps(const VRegister& vd, const VRegister& vn);
1785
1786  // FP convert to unsigned integer, round towards +infinity.
1787  void fcvtpu(const VRegister& vd, const VRegister& vn);
1788
1789  // Convert signed integer or fixed point to FP.
1790  void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
1791
1792  // Convert unsigned integer or fixed point to FP.
1793  void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
1794
1795  // Convert signed integer or fixed-point to FP.
1796  void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
1797
1798  // Convert unsigned integer or fixed-point to FP.
1799  void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
1800
1801  // Extract vector from pair of vectors.
1802  void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1803           int index);
1804
1805  // Duplicate vector element to vector or scalar.
1806  void dup(const VRegister& vd, const VRegister& vn, int vn_index);
1807
1808  // Duplicate general-purpose register to vector.
1809  void dup(const VRegister& vd, const Register& rn);
1810
1811  // Insert vector element from general-purpose register.
1812  void ins(const VRegister& vd, int vd_index, const Register& rn);
1813
1814  // Move general-purpose register to a vector element.
1815  void mov(const VRegister& vd, int vd_index, const Register& rn);
1816
1817  // Unsigned move vector element to general-purpose register.
1818  void umov(const Register& rd, const VRegister& vn, int vn_index);
1819
1820  // Move vector element to general-purpose register.
1821  void mov(const Register& rd, const VRegister& vn, int vn_index);
1822
1823  // Move vector element to scalar.
1824  void mov(const VRegister& vd, const VRegister& vn, int vn_index);
1825
1826  // Insert vector element from another vector element.
1827  void ins(const VRegister& vd, int vd_index, const VRegister& vn,
1828           int vn_index);
1829
1830  // Move vector element to another vector element.
1831  void mov(const VRegister& vd, int vd_index, const VRegister& vn,
1832           int vn_index);
1833
1834  // Signed move vector element to general-purpose register.
1835  void smov(const Register& rd, const VRegister& vn, int vn_index);
1836
1837  // One-element structure load to one register.
1838  void ld1(const VRegister& vt, const MemOperand& src);
1839
1840  // One-element structure load to two registers.
1841  void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1842
1843  // One-element structure load to three registers.
1844  void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1845           const MemOperand& src);
1846
1847  // One-element structure load to four registers.
1848  void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1849           const VRegister& vt4, const MemOperand& src);
1850
1851  // One-element single structure load to one lane.
1852  void ld1(const VRegister& vt, int lane, const MemOperand& src);
1853
1854  // One-element single structure load to all lanes.
1855  void ld1r(const VRegister& vt, const MemOperand& src);
1856
1857  // Two-element structure load.
1858  void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1859
1860  // Two-element single structure load to one lane.
1861  void ld2(const VRegister& vt, const VRegister& vt2, int lane,
1862           const MemOperand& src);
1863
1864  // Two-element single structure load to all lanes.
1865  void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1866
1867  // Three-element structure load.
1868  void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1869           const MemOperand& src);
1870
1871  // Three-element single structure load to one lane.
1872  void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1873           int lane, const MemOperand& src);
1874
1875  // Three-element single structure load to all lanes.
1876  void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1877            const MemOperand& src);
1878
1879  // Four-element structure load.
1880  void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1881           const VRegister& vt4, const MemOperand& src);
1882
1883  // Four-element single structure load to one lane.
1884  void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1885           const VRegister& vt4, int lane, const MemOperand& src);
1886
1887  // Four-element single structure load to all lanes.
1888  void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
1889            const VRegister& vt4, const MemOperand& src);
1890
1891  // Count leading sign bits.
1892  void cls(const VRegister& vd, const VRegister& vn);
1893
1894  // Count leading zero bits (vector).
1895  void clz(const VRegister& vd, const VRegister& vn);
1896
1897  // Population count per byte.
1898  void cnt(const VRegister& vd, const VRegister& vn);
1899
1900  // Reverse bit order.
1901  void rbit(const VRegister& vd, const VRegister& vn);
1902
1903  // Reverse elements in 16-bit halfwords.
1904  void rev16(const VRegister& vd, const VRegister& vn);
1905
1906  // Reverse elements in 32-bit words.
1907  void rev32(const VRegister& vd, const VRegister& vn);
1908
1909  // Reverse elements in 64-bit doublewords.
1910  void rev64(const VRegister& vd, const VRegister& vn);
1911
1912  // Unsigned reciprocal square root estimate.
1913  void ursqrte(const VRegister& vd, const VRegister& vn);
1914
1915  // Unsigned reciprocal estimate.
1916  void urecpe(const VRegister& vd, const VRegister& vn);
1917
1918  // Signed pairwise long add and accumulate.
1919  void sadalp(const VRegister& vd, const VRegister& vn);
1920
1921  // Signed pairwise long add.
1922  void saddlp(const VRegister& vd, const VRegister& vn);
1923
1924  // Unsigned pairwise long add.
1925  void uaddlp(const VRegister& vd, const VRegister& vn);
1926
1927  // Unsigned pairwise long add and accumulate.
1928  void uadalp(const VRegister& vd, const VRegister& vn);
1929
1930  // Shift left by immediate.
1931  void shl(const VRegister& vd, const VRegister& vn, int shift);
1932
1933  // Signed saturating shift left by immediate.
1934  void sqshl(const VRegister& vd, const VRegister& vn, int shift);
1935
1936  // Signed saturating shift left unsigned by immediate.
1937  void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
1938
1939  // Unsigned saturating shift left by immediate.
1940  void uqshl(const VRegister& vd, const VRegister& vn, int shift);
1941
1942  // Signed shift left long by immediate.
1943  void sshll(const VRegister& vd, const VRegister& vn, int shift);
1944
1945  // Signed shift left long by immediate (second part).
1946  void sshll2(const VRegister& vd, const VRegister& vn, int shift);
1947
1948  // Signed extend long.
1949  void sxtl(const VRegister& vd, const VRegister& vn);
1950
1951  // Signed extend long (second part).
1952  void sxtl2(const VRegister& vd, const VRegister& vn);
1953
1954  // Unsigned shift left long by immediate.
1955  void ushll(const VRegister& vd, const VRegister& vn, int shift);
1956
1957  // Unsigned shift left long by immediate (second part).
1958  void ushll2(const VRegister& vd, const VRegister& vn, int shift);
1959
1960  // Shift left long by element size.
1961  void shll(const VRegister& vd, const VRegister& vn, int shift);
1962
1963  // Shift left long by element size (second part).
1964  void shll2(const VRegister& vd, const VRegister& vn, int shift);
1965
1966  // Unsigned extend long.
1967  void uxtl(const VRegister& vd, const VRegister& vn);
1968
1969  // Unsigned extend long (second part).
1970  void uxtl2(const VRegister& vd, const VRegister& vn);
1971
1972  // Signed rounding halving add.
1973  void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1974
1975  // Unsigned halving sub.
1976  void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1977
1978  // Signed halving sub.
1979  void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1980
1981  // Unsigned saturating add.
1982  void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1983
1984  // Signed saturating add.
1985  void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1986
1987  // Unsigned saturating subtract.
1988  void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1989
1990  // Signed saturating subtract.
1991  void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1992
1993  // Add pairwise.
1994  void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1995
1996  // Add pair of elements scalar.
1997  void addp(const VRegister& vd, const VRegister& vn);
1998
1999  // Multiply-add to accumulator.
2000  void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2001
2002  // Multiply-subtract to accumulator.
2003  void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2004
2005  // Multiply.
2006  void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2007
2008  // Table lookup from one register.
2009  void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2010
2011  // Table lookup from two registers.
2012  void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2013           const VRegister& vm);
2014
2015  // Table lookup from three registers.
2016  void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2017           const VRegister& vn3, const VRegister& vm);
2018
2019  // Table lookup from four registers.
2020  void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2021           const VRegister& vn3, const VRegister& vn4, const VRegister& vm);
2022
2023  // Table lookup extension from one register.
2024  void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2025
2026  // Table lookup extension from two registers.
2027  void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2028           const VRegister& vm);
2029
2030  // Table lookup extension from three registers.
2031  void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2032           const VRegister& vn3, const VRegister& vm);
2033
2034  // Table lookup extension from four registers.
2035  void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2036           const VRegister& vn3, const VRegister& vn4, const VRegister& vm);
2037
2038  // Instruction functions used only for test, debug, and patching.
2039  // Emit raw instructions in the instruction stream.
2040  void dci(Instr raw_inst) { Emit(raw_inst); }
2041
2042  // Emit 8 bits of data in the instruction stream.
2043  void dc8(uint8_t data) { EmitData(&data, sizeof(data)); }
2044
2045  // Emit 32 bits of data in the instruction stream.
2046  void dc32(uint32_t data) { EmitData(&data, sizeof(data)); }
2047
2048  // Emit 64 bits of data in the instruction stream.
2049  void dc64(uint64_t data) { EmitData(&data, sizeof(data)); }
2050
2051  // Emit an address in the instruction stream.
2052  void dcptr(Label* label);
2053
2054  // Copy a string into the instruction stream, including the terminating
2055  // nullptr character. The instruction pointer (pc_) is then aligned correctly
2056  // for subsequent instructions.
2057  void EmitStringData(const char* string);
2058
2059  // Pseudo-instructions ------------------------------------------------------
2060
2061  // Parameters are described in arm64/instructions-arm64.h.
2062  void debug(const char* message, uint32_t code, Instr params = BREAK);
2063
2064  // Required by V8.
2065  void db(uint8_t data) { dc8(data); }
2066  void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
2067    BlockPoolsScope no_pool_scope(this);
2068    if (!RelocInfo::IsNoInfo(rmode)) {
2069      DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
2070             RelocInfo::IsLiteralConstant(rmode));
2071      RecordRelocInfo(rmode);
2072    }
2073    dc32(data);
2074  }
2075  void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
2076    BlockPoolsScope no_pool_scope(this);
2077    if (!RelocInfo::IsNoInfo(rmode)) {
2078      DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
2079             RelocInfo::IsLiteralConstant(rmode));
2080      RecordRelocInfo(rmode);
2081    }
2082    dc64(data);
2083  }
2084  void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
2085    BlockPoolsScope no_pool_scope(this);
2086    if (!RelocInfo::IsNoInfo(rmode)) {
2087      DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
2088             RelocInfo::IsLiteralConstant(rmode));
2089      RecordRelocInfo(rmode);
2090    }
2091    dc64(data);
2092  }
2093
2094  // Code generation helpers --------------------------------------------------
2095
2096  Instruction* pc() const { return Instruction::Cast(pc_); }
2097
2098  Instruction* InstructionAt(ptrdiff_t offset) const {
2099    return reinterpret_cast<Instruction*>(buffer_start_ + offset);
2100  }
2101
2102  ptrdiff_t InstructionOffset(Instruction* instr) const {
2103    return reinterpret_cast<byte*>(instr) - buffer_start_;
2104  }
2105
2106  // Register encoding.
2107  static Instr Rd(CPURegister rd) {
2108    DCHECK_NE(rd.code(), kSPRegInternalCode);
2109    return rd.code() << Rd_offset;
2110  }
2111
2112  static Instr Rn(CPURegister rn) {
2113    DCHECK_NE(rn.code(), kSPRegInternalCode);
2114    return rn.code() << Rn_offset;
2115  }
2116
2117  static Instr Rm(CPURegister rm) {
2118    DCHECK_NE(rm.code(), kSPRegInternalCode);
2119    return rm.code() << Rm_offset;
2120  }
2121
2122  static Instr RmNot31(CPURegister rm) {
2123    DCHECK_NE(rm.code(), kSPRegInternalCode);
2124    DCHECK(!rm.IsZero());
2125    return Rm(rm);
2126  }
2127
2128  static Instr Ra(CPURegister ra) {
2129    DCHECK_NE(ra.code(), kSPRegInternalCode);
2130    return ra.code() << Ra_offset;
2131  }
2132
2133  static Instr Rt(CPURegister rt) {
2134    DCHECK_NE(rt.code(), kSPRegInternalCode);
2135    return rt.code() << Rt_offset;
2136  }
2137
2138  static Instr Rt2(CPURegister rt2) {
2139    DCHECK_NE(rt2.code(), kSPRegInternalCode);
2140    return rt2.code() << Rt2_offset;
2141  }
2142
2143  static Instr Rs(CPURegister rs) {
2144    DCHECK_NE(rs.code(), kSPRegInternalCode);
2145    return rs.code() << Rs_offset;
2146  }
2147
2148  // These encoding functions allow the stack pointer to be encoded, and
2149  // disallow the zero register.
2150  static Instr RdSP(Register rd) {
2151    DCHECK(!rd.IsZero());
2152    return (rd.code() & kRegCodeMask) << Rd_offset;
2153  }
2154
2155  static Instr RnSP(Register rn) {
2156    DCHECK(!rn.IsZero());
2157    return (rn.code() & kRegCodeMask) << Rn_offset;
2158  }
2159
2160  // Flags encoding.
2161  inline static Instr Flags(FlagsUpdate S);
2162  inline static Instr Cond(Condition cond);
2163
2164  // PC-relative address encoding.
2165  inline static Instr ImmPCRelAddress(int imm21);
2166
2167  // Branch encoding.
2168  inline static Instr ImmUncondBranch(int imm26);
2169  inline static Instr ImmCondBranch(int imm19);
2170  inline static Instr ImmCmpBranch(int imm19);
2171  inline static Instr ImmTestBranch(int imm14);
2172  inline static Instr ImmTestBranchBit(unsigned bit_pos);
2173
2174  // Data Processing encoding.
2175  inline static Instr SF(Register rd);
2176  inline static Instr ImmAddSub(int imm);
2177  inline static Instr ImmS(unsigned imms, unsigned reg_size);
2178  inline static Instr ImmR(unsigned immr, unsigned reg_size);
2179  inline static Instr ImmSetBits(unsigned imms, unsigned reg_size);
2180  inline static Instr ImmRotate(unsigned immr, unsigned reg_size);
2181  inline static Instr ImmLLiteral(int imm19);
2182  inline static Instr BitN(unsigned bitn, unsigned reg_size);
2183  inline static Instr ShiftDP(Shift shift);
2184  inline static Instr ImmDPShift(unsigned amount);
2185  inline static Instr ExtendMode(Extend extend);
2186  inline static Instr ImmExtendShift(unsigned left_shift);
2187  inline static Instr ImmCondCmp(unsigned imm);
2188  inline static Instr Nzcv(StatusFlags nzcv);
2189
2190  static bool IsImmAddSub(int64_t immediate);
2191  static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n,
2192                           unsigned* imm_s, unsigned* imm_r);
2193
2194  // MemOperand offset encoding.
2195  inline static Instr ImmLSUnsigned(int imm12);
2196  inline static Instr ImmLS(int imm9);
2197  inline static Instr ImmLSPair(int imm7, unsigned size);
2198  inline static Instr ImmShiftLS(unsigned shift_amount);
2199  inline static Instr ImmException(int imm16);
2200  inline static Instr ImmSystemRegister(int imm15);
2201  inline static Instr ImmHint(int imm7);
2202  inline static Instr ImmBarrierDomain(int imm2);
2203  inline static Instr ImmBarrierType(int imm2);
2204  inline static unsigned CalcLSDataSize(LoadStoreOp op);
2205
2206  // Instruction bits for vector format in data processing operations.
2207  static Instr VFormat(VRegister vd) {
2208    if (vd.Is64Bits()) {
2209      switch (vd.LaneCount()) {
2210        case 2:
2211          return NEON_2S;
2212        case 4:
2213          return NEON_4H;
2214        case 8:
2215          return NEON_8B;
2216        default:
2217          UNREACHABLE();
2218      }
2219    } else {
2220      DCHECK(vd.Is128Bits());
2221      switch (vd.LaneCount()) {
2222        case 2:
2223          return NEON_2D;
2224        case 4:
2225          return NEON_4S;
2226        case 8:
2227          return NEON_8H;
2228        case 16:
2229          return NEON_16B;
2230        default:
2231          UNREACHABLE();
2232      }
2233    }
2234  }
2235
2236  // Instruction bits for vector format in floating point data processing
2237  // operations.
2238  static Instr FPFormat(VRegister vd) {
2239    if (vd.LaneCount() == 1) {
2240      // Floating point scalar formats.
2241      DCHECK(vd.Is32Bits() || vd.Is64Bits());
2242      return vd.Is64Bits() ? FP64 : FP32;
2243    }
2244
2245    // Two lane floating point vector formats.
2246    if (vd.LaneCount() == 2) {
2247      DCHECK(vd.Is64Bits() || vd.Is128Bits());
2248      return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
2249    }
2250
2251    // Four lane floating point vector format.
2252    DCHECK((vd.LaneCount() == 4) && vd.Is128Bits());
2253    return NEON_FP_4S;
2254  }
2255
2256  // Instruction bits for vector format in load and store operations.
2257  static Instr LSVFormat(VRegister vd) {
2258    if (vd.Is64Bits()) {
2259      switch (vd.LaneCount()) {
2260        case 1:
2261          return LS_NEON_1D;
2262        case 2:
2263          return LS_NEON_2S;
2264        case 4:
2265          return LS_NEON_4H;
2266        case 8:
2267          return LS_NEON_8B;
2268        default:
2269          UNREACHABLE();
2270      }
2271    } else {
2272      DCHECK(vd.Is128Bits());
2273      switch (vd.LaneCount()) {
2274        case 2:
2275          return LS_NEON_2D;
2276        case 4:
2277          return LS_NEON_4S;
2278        case 8:
2279          return LS_NEON_8H;
2280        case 16:
2281          return LS_NEON_16B;
2282        default:
2283          UNREACHABLE();
2284      }
2285    }
2286  }
2287
2288  // Instruction bits for scalar format in data processing operations.
2289  static Instr SFormat(VRegister vd) {
2290    DCHECK(vd.IsScalar());
2291    switch (vd.SizeInBytes()) {
2292      case 1:
2293        return NEON_B;
2294      case 2:
2295        return NEON_H;
2296      case 4:
2297        return NEON_S;
2298      case 8:
2299        return NEON_D;
2300      default:
2301        UNREACHABLE();
2302    }
2303  }
2304
2305  static Instr ImmNEONHLM(int index, int num_bits) {
2306    int h, l, m;
2307    if (num_bits == 3) {
2308      DCHECK(is_uint3(index));
2309      h = (index >> 2) & 1;
2310      l = (index >> 1) & 1;
2311      m = (index >> 0) & 1;
2312    } else if (num_bits == 2) {
2313      DCHECK(is_uint2(index));
2314      h = (index >> 1) & 1;
2315      l = (index >> 0) & 1;
2316      m = 0;
2317    } else {
2318      DCHECK(is_uint1(index) && (num_bits == 1));
2319      h = (index >> 0) & 1;
2320      l = 0;
2321      m = 0;
2322    }
2323    return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
2324  }
2325
2326  static Instr ImmNEONExt(int imm4) {
2327    DCHECK(is_uint4(imm4));
2328    return imm4 << ImmNEONExt_offset;
2329  }
2330
2331  static Instr ImmNEON5(Instr format, int index) {
2332    DCHECK(is_uint4(index));
2333    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
2334    int imm5 = (index << (s + 1)) | (1 << s);
2335    return imm5 << ImmNEON5_offset;
2336  }
2337
2338  static Instr ImmNEON4(Instr format, int index) {
2339    DCHECK(is_uint4(index));
2340    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
2341    int imm4 = index << s;
2342    return imm4 << ImmNEON4_offset;
2343  }
2344
2345  static Instr ImmNEONabcdefgh(int imm8) {
2346    DCHECK(is_uint8(imm8));
2347    Instr instr;
2348    instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
2349    instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
2350    return instr;
2351  }
2352
2353  static Instr NEONCmode(int cmode) {
2354    DCHECK(is_uint4(cmode));
2355    return cmode << NEONCmode_offset;
2356  }
2357
2358  static Instr NEONModImmOp(int op) {
2359    DCHECK(is_uint1(op));
2360    return op << NEONModImmOp_offset;
2361  }
2362
2363  static bool IsImmLSUnscaled(int64_t offset);
2364  static bool IsImmLSScaled(int64_t offset, unsigned size);
2365  static bool IsImmLLiteral(int64_t offset);
2366
2367  // Move immediates encoding.
2368  inline static Instr ImmMoveWide(int imm);
2369  inline static Instr ShiftMoveWide(int shift);
2370
2371  // FP Immediates.
2372  static Instr ImmFP(double imm);
2373  static Instr ImmNEONFP(double imm);
2374  inline static Instr FPScale(unsigned scale);
2375
2376  // FP register type.
2377  inline static Instr FPType(VRegister fd);
2378
2379  // Unused on this architecture.
2380  void MaybeEmitOutOfLineConstantPool() {}
2381
2382  void ForceConstantPoolEmissionWithoutJump() {
2383    constpool_.Check(Emission::kForced, Jump::kOmitted);
2384  }
2385  void ForceConstantPoolEmissionWithJump() {
2386    constpool_.Check(Emission::kForced, Jump::kRequired);
2387  }
2388  // Check if the const pool needs to be emitted while pretending that {margin}
2389  // more bytes of instructions have already been emitted.
2390  void EmitConstPoolWithJumpIfNeeded(size_t margin = 0) {
2391    constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin);
2392  }
2393
2394  // Used by veneer checks below - returns the max (= overapproximated) pc
2395  // offset after the veneer pool, if the veneer pool were to be emitted
2396  // immediately.
2397  intptr_t MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin);
2398  // Returns true if we should emit a veneer as soon as possible for a branch
2399  // which can at most reach to specified pc.
2400  bool ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
2401    return max_reachable_pc < MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
2402  }
2403  bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) {
2404    return ShouldEmitVeneer(unresolved_branches_first_limit(), margin);
2405  }
2406
2407  // The code size generated for a veneer. Currently one branch
2408  // instruction. This is for code size checking purposes, and can be extended
2409  // in the future for example if we decide to add nops between the veneers.
2410  static constexpr int kVeneerCodeSize = 1 * kInstrSize;
2411
2412  void RecordVeneerPool(int location_offset, int size);
2413  // Emits veneers for branches that are approaching their maximum range.
2414  // If need_protection is true, the veneers are protected by a branch jumping
2415  // over the code.
2416  void EmitVeneers(bool force_emit, bool need_protection,
2417                   size_t margin = kVeneerDistanceMargin);
2418  void EmitVeneersGuard() { EmitPoolGuard(); }
2419  // Checks whether veneers need to be emitted at this point.
2420  // If force_emit is set, a veneer is generated for *all* unresolved branches.
2421  void CheckVeneerPool(bool force_emit, bool require_jump,
2422                       size_t margin = kVeneerDistanceMargin);
2423
2424  using BlockConstPoolScope = ConstantPool::BlockScope;
2425
2426  class V8_NODISCARD BlockPoolsScope {
2427   public:
2428    // Block veneer and constant pool. Emits pools if necessary to ensure that
2429    // {margin} more bytes can be emitted without triggering pool emission.
2430    explicit BlockPoolsScope(Assembler* assem, size_t margin = 0)
2431        : assem_(assem), block_const_pool_(assem, margin) {
2432      assem_->CheckVeneerPool(false, true, margin);
2433      assem_->StartBlockVeneerPool();
2434    }
2435
2436    BlockPoolsScope(Assembler* assem, PoolEmissionCheck check)
2437        : assem_(assem), block_const_pool_(assem, check) {
2438      assem_->StartBlockVeneerPool();
2439    }
2440    ~BlockPoolsScope() { assem_->EndBlockVeneerPool(); }
2441
2442   private:
2443    Assembler* assem_;
2444    BlockConstPoolScope block_const_pool_;
2445    DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope);
2446  };
2447
2448#if defined(V8_OS_WIN)
2449  win64_unwindinfo::XdataEncoder* GetXdataEncoder() {
2450    return xdata_encoder_.get();
2451  }
2452
2453  win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const;
2454#endif
2455
2456 protected:
2457  inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const;
2458
2459  void LoadStore(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op);
2460  void LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
2461                     const MemOperand& addr, LoadStorePairOp op);
2462  void LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
2463                       NEONLoadStoreMultiStructOp op);
2464  void LoadStoreStruct1(const VRegister& vt, int reg_count,
2465                        const MemOperand& addr);
2466  void LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
2467                             const MemOperand& addr,
2468                             NEONLoadStoreSingleStructOp op);
2469  void LoadStoreStructSingleAllLanes(const VRegister& vt,
2470                                     const MemOperand& addr,
2471                                     NEONLoadStoreSingleStructOp op);
2472  void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr,
2473                             Instr op);
2474
2475  static bool IsImmLSPair(int64_t offset, unsigned size);
2476
2477  void Logical(const Register& rd, const Register& rn, const Operand& operand,
2478               LogicalOp op);
2479  void LogicalImmediate(const Register& rd, const Register& rn, unsigned n,
2480                        unsigned imm_s, unsigned imm_r, LogicalOp op);
2481
2482  void ConditionalCompare(const Register& rn, const Operand& operand,
2483                          StatusFlags nzcv, Condition cond,
2484                          ConditionalCompareOp op);
2485  static bool IsImmConditionalCompare(int64_t immediate);
2486
2487  void AddSubWithCarry(const Register& rd, const Register& rn,
2488                       const Operand& operand, FlagsUpdate S,
2489                       AddSubWithCarryOp op);
2490
2491  // Functions for emulating operands not directly supported by the instruction
2492  // set.
2493  void EmitShift(const Register& rd, const Register& rn, Shift shift,
2494                 unsigned amount);
2495  void EmitExtendShift(const Register& rd, const Register& rn, Extend extend,
2496                       unsigned left_shift);
2497
2498  void AddSub(const Register& rd, const Register& rn, const Operand& operand,
2499              FlagsUpdate S, AddSubOp op);
2500
2501  static bool IsImmFP32(float imm);
2502  static bool IsImmFP64(double imm);
2503
2504  // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
2505  // registers. Only simple loads are supported; sign- and zero-extension (such
2506  // as in LDPSW_x or LDRB_w) are not supported.
2507  static inline LoadStoreOp LoadOpFor(const CPURegister& rt);
2508  static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
2509                                              const CPURegister& rt2);
2510  static inline LoadStoreOp StoreOpFor(const CPURegister& rt);
2511  static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt,
2512                                               const CPURegister& rt2);
2513  static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
2514
2515  // Remove the specified branch from the unbound label link chain.
2516  // If available, a veneer for this label can be used for other branches in the
2517  // chain if the link chain cannot be fixed up without this branch.
2518  void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label,
2519                                      Instruction* label_veneer = nullptr);
2520
2521 private:
2522  static uint32_t FPToImm8(double imm);
2523
2524  // Instruction helpers.
2525  void MoveWide(const Register& rd, uint64_t imm, int shift,
2526                MoveWideImmediateOp mov_op);
2527  void DataProcShiftedRegister(const Register& rd, const Register& rn,
2528                               const Operand& operand, FlagsUpdate S, Instr op);
2529  void DataProcExtendedRegister(const Register& rd, const Register& rn,
2530                                const Operand& operand, FlagsUpdate S,
2531                                Instr op);
2532  void ConditionalSelect(const Register& rd, const Register& rn,
2533                         const Register& rm, Condition cond,
2534                         ConditionalSelectOp op);
2535  void DataProcessing1Source(const Register& rd, const Register& rn,
2536                             DataProcessing1SourceOp op);
2537  void DataProcessing3Source(const Register& rd, const Register& rn,
2538                             const Register& rm, const Register& ra,
2539                             DataProcessing3SourceOp op);
2540  void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn,
2541                               FPDataProcessing1SourceOp op);
2542  void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn,
2543                               const VRegister& fm,
2544                               FPDataProcessing2SourceOp op);
2545  void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn,
2546                               const VRegister& fm, const VRegister& fa,
2547                               FPDataProcessing3SourceOp op);
2548  void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
2549                        NEONAcrossLanesOp op);
2550  void NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
2551                       NEONAcrossLanesOp op);
2552  void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
2553                               const int left_shift,
2554                               NEONModifiedImmediateOp op);
2555  void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
2556                               const int shift_amount,
2557                               NEONModifiedImmediateOp op);
2558  void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2559                 NEON3SameOp vop);
2560  void NEONFP3Same(const VRegister& vd, const VRegister& vn,
2561                   const VRegister& vm, Instr op);
2562  void NEON3DifferentL(const VRegister& vd, const VRegister& vn,
2563                       const VRegister& vm, NEON3DifferentOp vop);
2564  void NEON3DifferentW(const VRegister& vd, const VRegister& vn,
2565                       const VRegister& vm, NEON3DifferentOp vop);
2566  void NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
2567                        const VRegister& vm, NEON3DifferentOp vop);
2568  void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
2569                      NEON2RegMiscOp vop, double value = 0.0);
2570  void NEON2RegMisc(const VRegister& vd, const VRegister& vn,
2571                    NEON2RegMiscOp vop, int value = 0);
2572  void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
2573  void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
2574  void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2575                NEONPermOp op);
2576  void NEONFPByElement(const VRegister& vd, const VRegister& vn,
2577                       const VRegister& vm, int vm_index,
2578                       NEONByIndexedElementOp op);
2579  void NEONByElement(const VRegister& vd, const VRegister& vn,
2580                     const VRegister& vm, int vm_index,
2581                     NEONByIndexedElementOp op);
2582  void NEONByElementL(const VRegister& vd, const VRegister& vn,
2583                      const VRegister& vm, int vm_index,
2584                      NEONByIndexedElementOp op);
2585  void NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
2586                          NEONShiftImmediateOp op, int immh_immb);
2587  void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
2588                              int shift, NEONShiftImmediateOp op);
2589  void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn,
2590                               int shift, NEONShiftImmediateOp op);
2591  void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift,
2592                           NEONShiftImmediateOp op);
2593  void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift,
2594                           NEONShiftImmediateOp op);
2595  void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
2596  void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2597                 NEONTableOp op);
2598
2599  Instr LoadStoreStructAddrModeField(const MemOperand& addr);
2600
2601  // Label helpers.
2602
2603  // Return an offset for a label-referencing instruction, typically a branch.
2604  int LinkAndGetByteOffsetTo(Label* label);
2605
2606  // This is the same as LinkAndGetByteOffsetTo, but return an offset
2607  // suitable for fields that take instruction offsets.
2608  inline int LinkAndGetInstructionOffsetTo(Label* label);
2609
2610  static constexpr int kStartOfLabelLinkChain = 0;
2611
2612  // Verify that a label's link chain is intact.
2613  void CheckLabelLinkChain(Label const* label);
2614
2615  // Emit the instruction at pc_.
2616  void Emit(Instr instruction) {
2617    STATIC_ASSERT(sizeof(*pc_) == 1);
2618    STATIC_ASSERT(sizeof(instruction) == kInstrSize);
2619    DCHECK_LE(pc_ + sizeof(instruction), buffer_start_ + buffer_->size());
2620
2621    memcpy(pc_, &instruction, sizeof(instruction));
2622    pc_ += sizeof(instruction);
2623    CheckBuffer();
2624  }
2625
2626  // Emit data inline in the instruction stream.
2627  void EmitData(void const* data, unsigned size) {
2628    DCHECK_EQ(sizeof(*pc_), 1);
2629    DCHECK_LE(pc_ + size, buffer_start_ + buffer_->size());
2630
2631    // TODO(all): Somehow register we have some data here. Then we can
2632    // disassemble it correctly.
2633    memcpy(pc_, data, size);
2634    pc_ += size;
2635    CheckBuffer();
2636  }
2637
2638  void GrowBuffer();
2639  V8_INLINE void CheckBufferSpace();
2640  void CheckBuffer();
2641
2642  // Emission of the veneer pools may be blocked in some code sequences.
2643  int veneer_pool_blocked_nesting_;  // Block emission if this is not zero.
2644
2645  // Relocation info generation
2646  // Each relocation is encoded as a variable size value
2647  static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize;
2648  RelocInfoWriter reloc_info_writer;
2649
2650  // Internal reference positions, required for (potential) patching in
2651  // GrowBuffer(); contains only those internal references whose labels
2652  // are already bound.
2653  std::deque<int> internal_reference_positions_;
2654
2655 protected:
2656  // Code generation
2657  // The relocation writer's position is at least kGap bytes below the end of
2658  // the generated instructions. This is so that multi-instruction sequences do
2659  // not have to check for overflow. The same is true for writes of large
2660  // relocation info entries, and debug strings encoded in the instruction
2661  // stream.
2662  static constexpr int kGap = 64;
2663  STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
2664
2665 public:
2666#ifdef DEBUG
2667  // Functions used for testing.
2668  size_t GetConstantPoolEntriesSizeForTesting() const {
2669    // Do not include branch over the pool.
2670    return constpool_.Entry32Count() * kInt32Size +
2671           constpool_.Entry64Count() * kInt64Size;
2672  }
2673
2674  static size_t GetCheckConstPoolIntervalForTesting() {
2675    return ConstantPool::kCheckInterval;
2676  }
2677
2678  static size_t GetApproxMaxDistToConstPoolForTesting() {
2679    return ConstantPool::kApproxDistToPool64;
2680  }
2681#endif
2682
2683  class FarBranchInfo {
2684   public:
2685    FarBranchInfo(int offset, Label* label)
2686        : pc_offset_(offset), label_(label) {}
2687    // Offset of the branch in the code generation buffer.
2688    int pc_offset_;
2689    // The label branched to.
2690    Label* label_;
2691  };
2692
2693 protected:
2694  // Information about unresolved (forward) branches.
2695  // The Assembler is only allowed to delete out-of-date information from here
2696  // after a label is bound. The MacroAssembler uses this information to
2697  // generate veneers.
2698  //
2699  // The second member gives information about the unresolved branch. The first
2700  // member of the pair is the maximum offset that the branch can reach in the
2701  // buffer. The map is sorted according to this reachable offset, allowing to
2702  // easily check when veneers need to be emitted.
2703  // Note that the maximum reachable offset (first member of the pairs) should
2704  // always be positive but has the same type as the return value for
2705  // pc_offset() for convenience.
2706  std::multimap<int, FarBranchInfo> unresolved_branches_;
2707
2708  // We generate a veneer for a branch if we reach within this distance of the
2709  // limit of the range.
2710  static constexpr int kVeneerDistanceMargin = 1 * KB;
2711  // The factor of 2 is a finger in the air guess. With a default margin of
2712  // 1KB, that leaves us an addional 256 instructions to avoid generating a
2713  // protective branch.
2714  static constexpr int kVeneerNoProtectionFactor = 2;
2715  static constexpr int kVeneerDistanceCheckMargin =
2716      kVeneerNoProtectionFactor * kVeneerDistanceMargin;
2717  int unresolved_branches_first_limit() const {
2718    DCHECK(!unresolved_branches_.empty());
2719    return unresolved_branches_.begin()->first;
2720  }
2721  // This PC-offset of the next veneer pool check helps reduce the overhead
2722  // of checking for veneer pools.
2723  // It is maintained to the closest unresolved branch limit minus the maximum
2724  // veneer margin (or kMaxInt if there are no unresolved branches).
2725  int next_veneer_pool_check_;
2726
2727#if defined(V8_OS_WIN)
2728  std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_;
2729#endif
2730
2731 private:
2732  // Avoid overflows for displacements etc.
2733  static const int kMaximalBufferSize = 512 * MB;
2734
2735  // If a veneer is emitted for a branch instruction, that instruction must be
2736  // removed from the associated label's link chain so that the assembler does
2737  // not later attempt (likely unsuccessfully) to patch it to branch directly to
2738  // the label.
2739  void DeleteUnresolvedBranchInfoForLabel(Label* label);
2740  // This function deletes the information related to the label by traversing
2741  // the label chain, and for each PC-relative instruction in the chain checking
2742  // if pending unresolved information exists. Its complexity is proportional to
2743  // the length of the label chain.
2744  void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label);
2745
2746  void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2747
2748  int WriteCodeComments();
2749
2750  // The pending constant pool.
2751  ConstantPool constpool_;
2752
2753  friend class EnsureSpace;
2754  friend class ConstantPool;
2755};
2756
2757class PatchingAssembler : public Assembler {
2758 public:
2759  // Create an Assembler with a buffer starting at 'start'.
2760  // The buffer size is
2761  //   size of instructions to patch + kGap
2762  // Where kGap is the distance from which the Assembler tries to grow the
2763  // buffer.
2764  // If more or fewer instructions than expected are generated or if some
2765  // relocation information takes space in the buffer, the PatchingAssembler
2766  // will crash trying to grow the buffer.
2767  // Note that the instruction cache will not be flushed.
2768  PatchingAssembler(const AssemblerOptions& options, byte* start,
2769                    unsigned count)
2770      : Assembler(options,
2771                  ExternalAssemblerBuffer(start, count * kInstrSize + kGap)),
2772        block_constant_pool_emission_scope(this) {}
2773
2774  ~PatchingAssembler() {
2775    // Verify we have generated the number of instruction we expected.
2776    DCHECK_EQ(pc_offset() + kGap, buffer_->size());
2777  }
2778
2779  // See definition of PatchAdrFar() for details.
2780  static constexpr int kAdrFarPatchableNNops = 2;
2781  static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2;
2782  void PatchAdrFar(int64_t target_offset);
2783  void PatchSubSp(uint32_t immediate);
2784
2785 private:
2786  BlockPoolsScope block_constant_pool_emission_scope;
2787};
2788
2789class EnsureSpace {
2790 public:
2791  explicit V8_INLINE EnsureSpace(Assembler* assembler);
2792
2793 private:
2794  Assembler::BlockPoolsScope block_pools_scope_;
2795};
2796
2797}  // namespace internal
2798}  // namespace v8
2799
2800#endif  // V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_
2801