1// Copyright 2013 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 6#define V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 7 8#include <deque> 9#include <map> 10#include <memory> 11 12#include "src/base/optional.h" 13#include "src/codegen/arm64/constants-arm64.h" 14#include "src/codegen/arm64/instructions-arm64.h" 15#include "src/codegen/arm64/register-arm64.h" 16#include "src/codegen/assembler.h" 17#include "src/codegen/constant-pool.h" 18#include "src/common/globals.h" 19#include "src/utils/utils.h" 20 21// Windows arm64 SDK defines mvn to NEON intrinsic neon_not which will not 22// be used here. 23#if defined(V8_OS_WIN) && defined(mvn) 24#undef mvn 25#endif 26 27#if defined(V8_OS_WIN) 28#include "src/base/platform/wrappers.h" 29#include "src/diagnostics/unwinding-info-win64.h" 30#endif // V8_OS_WIN 31 32namespace v8 { 33namespace internal { 34 35class SafepointTableBuilder; 36 37// ----------------------------------------------------------------------------- 38// Immediates. 39class Immediate { 40 public: 41 template <typename T> 42 inline explicit Immediate( 43 Handle<T> handle, RelocInfo::Mode mode = RelocInfo::FULL_EMBEDDED_OBJECT); 44 45 // This is allowed to be an implicit constructor because Immediate is 46 // a wrapper class that doesn't normally perform any type conversion. 47 template <typename T> 48 inline Immediate(T value); // NOLINT(runtime/explicit) 49 50 template <typename T> 51 inline Immediate(T value, RelocInfo::Mode rmode); 52 53 int64_t value() const { return value_; } 54 RelocInfo::Mode rmode() const { return rmode_; } 55 56 private: 57 int64_t value_; 58 RelocInfo::Mode rmode_; 59}; 60 61// ----------------------------------------------------------------------------- 62// Operands. 63constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; 64constexpr uint64_t kSmiShiftMask = (1ULL << kSmiShift) - 1; 65 66// Represents an operand in a machine instruction. 67class Operand { 68 // TODO(all): If necessary, study more in details which methods 69 // TODO(all): should be inlined or not. 70 public: 71 // rm, {<shift> {#<shift_amount>}} 72 // where <shift> is one of {LSL, LSR, ASR, ROR}. 73 // <shift_amount> is uint6_t. 74 // This is allowed to be an implicit constructor because Operand is 75 // a wrapper class that doesn't normally perform any type conversion. 76 inline Operand(Register reg, Shift shift = LSL, 77 unsigned shift_amount = 0); // NOLINT(runtime/explicit) 78 79 // rm, <extend> {#<shift_amount>} 80 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. 81 // <shift_amount> is uint2_t. 82 inline Operand(Register reg, Extend extend, unsigned shift_amount = 0); 83 84 static Operand EmbeddedNumber(double number); // Smi or HeapNumber. 85 static Operand EmbeddedStringConstant(const StringConstantBase* str); 86 87 inline bool IsHeapObjectRequest() const; 88 inline HeapObjectRequest heap_object_request() const; 89 inline Immediate immediate_for_heap_object_request() const; 90 91 // Implicit constructor for all int types, ExternalReference, and Smi. 92 template <typename T> 93 inline Operand(T t); // NOLINT(runtime/explicit) 94 95 // Implicit constructor for int types. 96 template <typename T> 97 inline Operand(T t, RelocInfo::Mode rmode); 98 99 inline bool IsImmediate() const; 100 inline bool IsShiftedRegister() const; 101 inline bool IsExtendedRegister() const; 102 inline bool IsZero() const; 103 104 // This returns an LSL shift (<= 4) operand as an equivalent extend operand, 105 // which helps in the encoding of instructions that use the stack pointer. 106 inline Operand ToExtendedRegister() const; 107 108 // Returns new Operand adapted for using with W registers. 109 inline Operand ToW() const; 110 111 inline Immediate immediate() const; 112 inline int64_t ImmediateValue() const; 113 inline RelocInfo::Mode ImmediateRMode() const; 114 inline Register reg() const; 115 inline Shift shift() const; 116 inline Extend extend() const; 117 inline unsigned shift_amount() const; 118 119 // Relocation information. 120 bool NeedsRelocation(const Assembler* assembler) const; 121 122 private: 123 base::Optional<HeapObjectRequest> heap_object_request_; 124 Immediate immediate_; 125 Register reg_; 126 Shift shift_; 127 Extend extend_; 128 unsigned shift_amount_; 129}; 130 131// MemOperand represents a memory operand in a load or store instruction. 132class MemOperand { 133 public: 134 inline MemOperand(); 135 inline explicit MemOperand(Register base, int64_t offset = 0, 136 AddrMode addrmode = Offset); 137 inline explicit MemOperand(Register base, Register regoffset, 138 Shift shift = LSL, unsigned shift_amount = 0); 139 inline explicit MemOperand(Register base, Register regoffset, Extend extend, 140 unsigned shift_amount = 0); 141 inline explicit MemOperand(Register base, const Operand& offset, 142 AddrMode addrmode = Offset); 143 144 const Register& base() const { return base_; } 145 const Register& regoffset() const { return regoffset_; } 146 int64_t offset() const { return offset_; } 147 AddrMode addrmode() const { return addrmode_; } 148 Shift shift() const { return shift_; } 149 Extend extend() const { return extend_; } 150 unsigned shift_amount() const { return shift_amount_; } 151 inline bool IsImmediateOffset() const; 152 inline bool IsRegisterOffset() const; 153 inline bool IsPreIndex() const; 154 inline bool IsPostIndex() const; 155 156 private: 157 Register base_; 158 Register regoffset_; 159 int64_t offset_; 160 AddrMode addrmode_; 161 Shift shift_; 162 Extend extend_; 163 unsigned shift_amount_; 164}; 165 166// ----------------------------------------------------------------------------- 167// Assembler. 168 169class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { 170 public: 171 // Create an assembler. Instructions and relocation information are emitted 172 // into a buffer, with the instructions starting from the beginning and the 173 // relocation information starting from the end of the buffer. See CodeDesc 174 // for a detailed comment on the layout (globals.h). 175 // 176 // If the provided buffer is nullptr, the assembler allocates and grows its 177 // own buffer. Otherwise it takes ownership of the provided buffer. 178 explicit Assembler(const AssemblerOptions&, 179 std::unique_ptr<AssemblerBuffer> = {}); 180 181 ~Assembler() override; 182 183 void AbortedCodeGeneration() override; 184 185 // System functions --------------------------------------------------------- 186 // Start generating code from the beginning of the buffer, discarding any code 187 // and data that has already been emitted into the buffer. 188 // 189 // In order to avoid any accidental transfer of state, Reset DCHECKs that the 190 // constant pool is not blocked. 191 void Reset(); 192 193 // GetCode emits any pending (non-emitted) code and fills the descriptor desc. 194 static constexpr int kNoHandlerTable = 0; 195 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; 196 void GetCode(Isolate* isolate, CodeDesc* desc, 197 SafepointTableBuilder* safepoint_table_builder, 198 int handler_table_offset); 199 200 // Convenience wrapper for code without safepoint or handler tables. 201 void GetCode(Isolate* isolate, CodeDesc* desc) { 202 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); 203 } 204 205 // Insert the smallest number of nop instructions 206 // possible to align the pc offset to a multiple 207 // of m. m must be a power of 2 (>= 4). 208 void Align(int m); 209 // Insert the smallest number of zero bytes possible to align the pc offset 210 // to a mulitple of m. m must be a power of 2 (>= 2). 211 void DataAlign(int m); 212 // Aligns code to something that's optimal for a jump target for the platform. 213 void CodeTargetAlign(); 214 void LoopHeaderAlign() { CodeTargetAlign(); } 215 216 inline void Unreachable(); 217 218 // Label -------------------------------------------------------------------- 219 // Bind a label to the current pc. Note that labels can only be bound once, 220 // and if labels are linked to other instructions, they _must_ be bound 221 // before they go out of scope. 222 void bind(Label* label); 223 224 // RelocInfo and pools ------------------------------------------------------ 225 226 // Record relocation information for current pc_. 227 enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY }; 228 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0, 229 ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY); 230 231 // Generate a B immediate instruction with the corresponding relocation info. 232 // 'offset' is the immediate to encode in the B instruction (so it is the 233 // difference between the target and the PC of the instruction, divided by 234 // the instruction size). 235 void near_jump(int offset, RelocInfo::Mode rmode); 236 // Generate a BL immediate instruction with the corresponding relocation info. 237 // As for near_jump, 'offset' is the immediate to encode in the BL 238 // instruction. 239 void near_call(int offset, RelocInfo::Mode rmode); 240 // Generate a BL immediate instruction with the corresponding relocation info 241 // for the input HeapObjectRequest. 242 void near_call(HeapObjectRequest request); 243 244 // Return the address in the constant pool of the code target address used by 245 // the branch/call instruction at pc. 246 inline static Address target_pointer_address_at(Address pc); 247 248 // Read/Modify the code target address in the branch/call instruction at pc. 249 // The isolate argument is unused (and may be nullptr) when skipping flushing. 250 inline static Address target_address_at(Address pc, Address constant_pool); 251 252 // Read/Modify the code target address in the branch/call instruction at pc. 253 inline static Tagged_t target_compressed_address_at(Address pc, 254 Address constant_pool); 255 inline static void set_target_address_at( 256 Address pc, Address constant_pool, Address target, 257 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 258 259 inline static void set_target_compressed_address_at( 260 Address pc, Address constant_pool, Tagged_t target, 261 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 262 263 // Returns the handle for the code object called at 'pc'. 264 // This might need to be temporarily encoded as an offset into code_targets_. 265 inline Handle<CodeT> code_target_object_handle_at(Address pc); 266 inline EmbeddedObjectIndex embedded_object_index_referenced_from(Address pc); 267 inline void set_embedded_object_index_referenced_from( 268 Address p, EmbeddedObjectIndex index); 269 // Returns the handle for the heap object referenced at 'pc'. 270 inline Handle<HeapObject> target_object_handle_at(Address pc); 271 272 // Returns the target address for a runtime function for the call encoded 273 // at 'pc'. 274 // Runtime entries can be temporarily encoded as the offset between the 275 // runtime function entrypoint and the code range base (stored in the 276 // code_range_base field), in order to be encodable as we generate the code, 277 // before it is moved into the code space. 278 inline Address runtime_entry_at(Address pc); 279 280 // This sets the branch destination. 'location' here can be either the pc of 281 // an immediate branch or the address of an entry in the constant pool. 282 // This is for calls and branches within generated code. 283 inline static void deserialization_set_special_target_at(Address location, 284 Code code, 285 Address target); 286 287 // Get the size of the special target encoded at 'location'. 288 inline static int deserialization_special_target_size(Address location); 289 290 // This sets the internal reference at the pc. 291 inline static void deserialization_set_target_internal_reference_at( 292 Address pc, Address target, 293 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 294 295 // This value is used in the serialization process and must be zero for 296 // ARM64, as the code target is split across multiple instructions and does 297 // not exist separately in the code, so the serializer should not step 298 // forwards in memory after a target is resolved and written. 299 static constexpr int kSpecialTargetSize = 0; 300 301 // Size of the generated code in bytes 302 uint64_t SizeOfGeneratedCode() const { 303 DCHECK((pc_ >= buffer_start_) && (pc_ < (buffer_start_ + buffer_->size()))); 304 return pc_ - buffer_start_; 305 } 306 307 // Return the code size generated from label to the current position. 308 uint64_t SizeOfCodeGeneratedSince(const Label* label) { 309 DCHECK(label->is_bound()); 310 DCHECK_GE(pc_offset(), label->pos()); 311 DCHECK_LT(pc_offset(), buffer_->size()); 312 return pc_offset() - label->pos(); 313 } 314 315 // Return the number of instructions generated from label to the 316 // current position. 317 uint64_t InstructionsGeneratedSince(const Label* label) { 318 return SizeOfCodeGeneratedSince(label) / kInstrSize; 319 } 320 321 static bool IsConstantPoolAt(Instruction* instr); 322 static int ConstantPoolSizeAt(Instruction* instr); 323 // See Assembler::CheckConstPool for more info. 324 void EmitPoolGuard(); 325 326 // Prevent veneer pool emission until EndBlockVeneerPool is called. 327 // Call to this function can be nested but must be followed by an equal 328 // number of calls to EndBlockConstpool. 329 void StartBlockVeneerPool(); 330 331 // Resume constant pool emission. Need to be called as many time as 332 // StartBlockVeneerPool to have an effect. 333 void EndBlockVeneerPool(); 334 335 bool is_veneer_pool_blocked() const { 336 return veneer_pool_blocked_nesting_ > 0; 337 } 338 339 // Record a deoptimization reason that can be used by a log or cpu profiler. 340 // Use --trace-deopt to enable. 341 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id, 342 SourcePosition position, int id); 343 344 int buffer_space() const; 345 346 // Record the emission of a constant pool. 347 // 348 // The emission of constant and veneer pools depends on the size of the code 349 // generated and the number of RelocInfo recorded. 350 // The Debug mechanism needs to map code offsets between two versions of a 351 // function, compiled with and without debugger support (see for example 352 // Debug::PrepareForBreakPoints()). 353 // Compiling functions with debugger support generates additional code 354 // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools 355 // and cause the version of the code with debugger support to have pools 356 // generated in different places. 357 // Recording the position and size of emitted pools allows to correctly 358 // compute the offset mappings between the different versions of a function in 359 // all situations. 360 // 361 // The parameter indicates the size of the pool (in bytes), including 362 // the marker and branch over the data. 363 void RecordConstPool(int size); 364 365 // Instruction set functions ------------------------------------------------ 366 367 // Branch / Jump instructions. 368 // For branches offsets are scaled, i.e. in instructions not in bytes. 369 // Branch to register. 370 void br(const Register& xn); 371 372 // Branch-link to register. 373 void blr(const Register& xn); 374 375 // Branch to register with return hint. 376 void ret(const Register& xn = lr); 377 378 // Unconditional branch to label. 379 void b(Label* label); 380 381 // Conditional branch to label. 382 void b(Label* label, Condition cond); 383 384 // Unconditional branch to PC offset. 385 void b(int imm26); 386 387 // Conditional branch to PC offset. 388 void b(int imm19, Condition cond); 389 390 // Branch-link to label / pc offset. 391 void bl(Label* label); 392 void bl(int imm26); 393 394 // Compare and branch to label / pc offset if zero. 395 void cbz(const Register& rt, Label* label); 396 void cbz(const Register& rt, int imm19); 397 398 // Compare and branch to label / pc offset if not zero. 399 void cbnz(const Register& rt, Label* label); 400 void cbnz(const Register& rt, int imm19); 401 402 // Test bit and branch to label / pc offset if zero. 403 void tbz(const Register& rt, unsigned bit_pos, Label* label); 404 void tbz(const Register& rt, unsigned bit_pos, int imm14); 405 406 // Test bit and branch to label / pc offset if not zero. 407 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 408 void tbnz(const Register& rt, unsigned bit_pos, int imm14); 409 410 // Address calculation instructions. 411 // Calculate a PC-relative address. Unlike for branches the offset in adr is 412 // unscaled (i.e. the result can be unaligned). 413 void adr(const Register& rd, Label* label); 414 void adr(const Register& rd, int imm21); 415 416 // Data Processing instructions. 417 // Add. 418 void add(const Register& rd, const Register& rn, const Operand& operand); 419 420 // Add and update status flags. 421 void adds(const Register& rd, const Register& rn, const Operand& operand); 422 423 // Compare negative. 424 void cmn(const Register& rn, const Operand& operand); 425 426 // Subtract. 427 void sub(const Register& rd, const Register& rn, const Operand& operand); 428 429 // Subtract and update status flags. 430 void subs(const Register& rd, const Register& rn, const Operand& operand); 431 432 // Compare. 433 void cmp(const Register& rn, const Operand& operand); 434 435 // Negate. 436 void neg(const Register& rd, const Operand& operand); 437 438 // Negate and update status flags. 439 void negs(const Register& rd, const Operand& operand); 440 441 // Add with carry bit. 442 void adc(const Register& rd, const Register& rn, const Operand& operand); 443 444 // Add with carry bit and update status flags. 445 void adcs(const Register& rd, const Register& rn, const Operand& operand); 446 447 // Subtract with carry bit. 448 void sbc(const Register& rd, const Register& rn, const Operand& operand); 449 450 // Subtract with carry bit and update status flags. 451 void sbcs(const Register& rd, const Register& rn, const Operand& operand); 452 453 // Negate with carry bit. 454 void ngc(const Register& rd, const Operand& operand); 455 456 // Negate with carry bit and update status flags. 457 void ngcs(const Register& rd, const Operand& operand); 458 459 // Logical instructions. 460 // Bitwise and (A & B). 461 void and_(const Register& rd, const Register& rn, const Operand& operand); 462 463 // Bitwise and (A & B) and update status flags. 464 void ands(const Register& rd, const Register& rn, const Operand& operand); 465 466 // Bit test, and set flags. 467 void tst(const Register& rn, const Operand& operand); 468 469 // Bit clear (A & ~B). 470 void bic(const Register& rd, const Register& rn, const Operand& operand); 471 472 // Bit clear (A & ~B) and update status flags. 473 void bics(const Register& rd, const Register& rn, const Operand& operand); 474 475 // Bitwise and. 476 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); 477 478 // Bit clear immediate. 479 void bic(const VRegister& vd, const int imm8, const int left_shift = 0); 480 481 // Bit clear. 482 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); 483 484 // Bitwise insert if false. 485 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); 486 487 // Bitwise insert if true. 488 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); 489 490 // Bitwise select. 491 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 492 493 // Polynomial multiply. 494 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 495 496 // Vector move immediate. 497 void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL, 498 const int shift_amount = 0); 499 500 // Bitwise not. 501 void mvn(const VRegister& vd, const VRegister& vn); 502 503 // Vector move inverted immediate. 504 void mvni(const VRegister& vd, const int imm8, Shift shift = LSL, 505 const int shift_amount = 0); 506 507 // Signed saturating accumulate of unsigned value. 508 void suqadd(const VRegister& vd, const VRegister& vn); 509 510 // Unsigned saturating accumulate of signed value. 511 void usqadd(const VRegister& vd, const VRegister& vn); 512 513 // Absolute value. 514 void abs(const VRegister& vd, const VRegister& vn); 515 516 // Signed saturating absolute value. 517 void sqabs(const VRegister& vd, const VRegister& vn); 518 519 // Negate. 520 void neg(const VRegister& vd, const VRegister& vn); 521 522 // Signed saturating negate. 523 void sqneg(const VRegister& vd, const VRegister& vn); 524 525 // Bitwise not. 526 void not_(const VRegister& vd, const VRegister& vn); 527 528 // Extract narrow. 529 void xtn(const VRegister& vd, const VRegister& vn); 530 531 // Extract narrow (second part). 532 void xtn2(const VRegister& vd, const VRegister& vn); 533 534 // Signed saturating extract narrow. 535 void sqxtn(const VRegister& vd, const VRegister& vn); 536 537 // Signed saturating extract narrow (second part). 538 void sqxtn2(const VRegister& vd, const VRegister& vn); 539 540 // Unsigned saturating extract narrow. 541 void uqxtn(const VRegister& vd, const VRegister& vn); 542 543 // Unsigned saturating extract narrow (second part). 544 void uqxtn2(const VRegister& vd, const VRegister& vn); 545 546 // Signed saturating extract unsigned narrow. 547 void sqxtun(const VRegister& vd, const VRegister& vn); 548 549 // Signed saturating extract unsigned narrow (second part). 550 void sqxtun2(const VRegister& vd, const VRegister& vn); 551 552 // Move register to register. 553 void mov(const VRegister& vd, const VRegister& vn); 554 555 // Bitwise not or. 556 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 557 558 // Bitwise exclusive or. 559 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); 560 561 // Bitwise or (A | B). 562 void orr(const Register& rd, const Register& rn, const Operand& operand); 563 564 // Bitwise or. 565 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); 566 567 // Bitwise or immediate. 568 void orr(const VRegister& vd, const int imm8, const int left_shift = 0); 569 570 // Bitwise nor (A | ~B). 571 void orn(const Register& rd, const Register& rn, const Operand& operand); 572 573 // Bitwise eor/xor (A ^ B). 574 void eor(const Register& rd, const Register& rn, const Operand& operand); 575 576 // Bitwise enor/xnor (A ^ ~B). 577 void eon(const Register& rd, const Register& rn, const Operand& operand); 578 579 // Logical shift left variable. 580 void lslv(const Register& rd, const Register& rn, const Register& rm); 581 582 // Logical shift right variable. 583 void lsrv(const Register& rd, const Register& rn, const Register& rm); 584 585 // Arithmetic shift right variable. 586 void asrv(const Register& rd, const Register& rn, const Register& rm); 587 588 // Rotate right variable. 589 void rorv(const Register& rd, const Register& rn, const Register& rm); 590 591 // Bitfield instructions. 592 // Bitfield move. 593 void bfm(const Register& rd, const Register& rn, int immr, int imms); 594 595 // Signed bitfield move. 596 void sbfm(const Register& rd, const Register& rn, int immr, int imms); 597 598 // Unsigned bitfield move. 599 void ubfm(const Register& rd, const Register& rn, int immr, int imms); 600 601 // Bfm aliases. 602 // Bitfield insert. 603 void bfi(const Register& rd, const Register& rn, int lsb, int width) { 604 DCHECK_GE(width, 1); 605 DCHECK(lsb + width <= rn.SizeInBits()); 606 bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 607 } 608 609 // Bitfield extract and insert low. 610 void bfxil(const Register& rd, const Register& rn, int lsb, int width) { 611 DCHECK_GE(width, 1); 612 DCHECK(lsb + width <= rn.SizeInBits()); 613 bfm(rd, rn, lsb, lsb + width - 1); 614 } 615 616 // Sbfm aliases. 617 // Arithmetic shift right. 618 void asr(const Register& rd, const Register& rn, int shift) { 619 DCHECK(shift < rd.SizeInBits()); 620 sbfm(rd, rn, shift, rd.SizeInBits() - 1); 621 } 622 623 // Signed bitfield insert in zero. 624 void sbfiz(const Register& rd, const Register& rn, int lsb, int width) { 625 DCHECK_GE(width, 1); 626 DCHECK(lsb + width <= rn.SizeInBits()); 627 sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 628 } 629 630 // Signed bitfield extract. 631 void sbfx(const Register& rd, const Register& rn, int lsb, int width) { 632 DCHECK_GE(width, 1); 633 DCHECK(lsb + width <= rn.SizeInBits()); 634 sbfm(rd, rn, lsb, lsb + width - 1); 635 } 636 637 // Signed extend byte. 638 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } 639 640 // Signed extend halfword. 641 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } 642 643 // Signed extend word. 644 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } 645 646 // Ubfm aliases. 647 // Logical shift left. 648 void lsl(const Register& rd, const Register& rn, int shift) { 649 int reg_size = rd.SizeInBits(); 650 DCHECK(shift < reg_size); 651 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 652 } 653 654 // Logical shift right. 655 void lsr(const Register& rd, const Register& rn, int shift) { 656 DCHECK(shift < rd.SizeInBits()); 657 ubfm(rd, rn, shift, rd.SizeInBits() - 1); 658 } 659 660 // Unsigned bitfield insert in zero. 661 void ubfiz(const Register& rd, const Register& rn, int lsb, int width) { 662 DCHECK_GE(width, 1); 663 DCHECK(lsb + width <= rn.SizeInBits()); 664 ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 665 } 666 667 // Unsigned bitfield extract. 668 void ubfx(const Register& rd, const Register& rn, int lsb, int width) { 669 DCHECK_GE(width, 1); 670 DCHECK(lsb + width <= rn.SizeInBits()); 671 ubfm(rd, rn, lsb, lsb + width - 1); 672 } 673 674 // Unsigned extend byte. 675 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } 676 677 // Unsigned extend halfword. 678 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } 679 680 // Unsigned extend word. 681 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } 682 683 // Extract. 684 void extr(const Register& rd, const Register& rn, const Register& rm, 685 int lsb); 686 687 // Conditional select: rd = cond ? rn : rm. 688 void csel(const Register& rd, const Register& rn, const Register& rm, 689 Condition cond); 690 691 // Conditional select increment: rd = cond ? rn : rm + 1. 692 void csinc(const Register& rd, const Register& rn, const Register& rm, 693 Condition cond); 694 695 // Conditional select inversion: rd = cond ? rn : ~rm. 696 void csinv(const Register& rd, const Register& rn, const Register& rm, 697 Condition cond); 698 699 // Conditional select negation: rd = cond ? rn : -rm. 700 void csneg(const Register& rd, const Register& rn, const Register& rm, 701 Condition cond); 702 703 // Conditional set: rd = cond ? 1 : 0. 704 void cset(const Register& rd, Condition cond); 705 706 // Conditional set minus: rd = cond ? -1 : 0. 707 void csetm(const Register& rd, Condition cond); 708 709 // Conditional increment: rd = cond ? rn + 1 : rn. 710 void cinc(const Register& rd, const Register& rn, Condition cond); 711 712 // Conditional invert: rd = cond ? ~rn : rn. 713 void cinv(const Register& rd, const Register& rn, Condition cond); 714 715 // Conditional negate: rd = cond ? -rn : rn. 716 void cneg(const Register& rd, const Register& rn, Condition cond); 717 718 // Extr aliases. 719 void ror(const Register& rd, const Register& rs, unsigned shift) { 720 extr(rd, rs, rs, shift); 721 } 722 723 // Conditional comparison. 724 // Conditional compare negative. 725 void ccmn(const Register& rn, const Operand& operand, StatusFlags nzcv, 726 Condition cond); 727 728 // Conditional compare. 729 void ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv, 730 Condition cond); 731 732 // Multiplication. 733 // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply. 734 void mul(const Register& rd, const Register& rn, const Register& rm); 735 736 // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate. 737 void madd(const Register& rd, const Register& rn, const Register& rm, 738 const Register& ra); 739 740 // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply. 741 void mneg(const Register& rd, const Register& rn, const Register& rm); 742 743 // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract. 744 void msub(const Register& rd, const Register& rn, const Register& rm, 745 const Register& ra); 746 747 // 32 x 32 -> 64-bit multiply. 748 void smull(const Register& rd, const Register& rn, const Register& rm); 749 750 // Xd = bits<127:64> of Xn * Xm. 751 void smulh(const Register& rd, const Register& rn, const Register& rm); 752 753 // Signed 32 x 32 -> 64-bit multiply and accumulate. 754 void smaddl(const Register& rd, const Register& rn, const Register& rm, 755 const Register& ra); 756 757 // Unsigned 32 x 32 -> 64-bit multiply and accumulate. 758 void umaddl(const Register& rd, const Register& rn, const Register& rm, 759 const Register& ra); 760 761 // Signed 32 x 32 -> 64-bit multiply and subtract. 762 void smsubl(const Register& rd, const Register& rn, const Register& rm, 763 const Register& ra); 764 765 // Unsigned 32 x 32 -> 64-bit multiply and subtract. 766 void umsubl(const Register& rd, const Register& rn, const Register& rm, 767 const Register& ra); 768 769 // Signed integer divide. 770 void sdiv(const Register& rd, const Register& rn, const Register& rm); 771 772 // Unsigned integer divide. 773 void udiv(const Register& rd, const Register& rn, const Register& rm); 774 775 // Bit count, bit reverse and endian reverse. 776 void rbit(const Register& rd, const Register& rn); 777 void rev16(const Register& rd, const Register& rn); 778 void rev32(const Register& rd, const Register& rn); 779 void rev(const Register& rd, const Register& rn); 780 void clz(const Register& rd, const Register& rn); 781 void cls(const Register& rd, const Register& rn); 782 783 // Pointer Authentication Code for Instruction address, using key B, with 784 // address in x17 and modifier in x16 [Armv8.3]. 785 void pacib1716(); 786 787 // Pointer Authentication Code for Instruction address, using key B, with 788 // address in LR and modifier in SP [Armv8.3]. 789 void pacibsp(); 790 791 // Authenticate Instruction address, using key B, with address in x17 and 792 // modifier in x16 [Armv8.3]. 793 void autib1716(); 794 795 // Authenticate Instruction address, using key B, with address in LR and 796 // modifier in SP [Armv8.3]. 797 void autibsp(); 798 799 // Memory instructions. 800 801 // Load integer or FP register. 802 void ldr(const CPURegister& rt, const MemOperand& src); 803 804 // Store integer or FP register. 805 void str(const CPURegister& rt, const MemOperand& dst); 806 807 // Load word with sign extension. 808 void ldrsw(const Register& rt, const MemOperand& src); 809 810 // Load byte. 811 void ldrb(const Register& rt, const MemOperand& src); 812 813 // Store byte. 814 void strb(const Register& rt, const MemOperand& dst); 815 816 // Load byte with sign extension. 817 void ldrsb(const Register& rt, const MemOperand& src); 818 819 // Load half-word. 820 void ldrh(const Register& rt, const MemOperand& src); 821 822 // Store half-word. 823 void strh(const Register& rt, const MemOperand& dst); 824 825 // Load half-word with sign extension. 826 void ldrsh(const Register& rt, const MemOperand& src); 827 828 // Load integer or FP register pair. 829 void ldp(const CPURegister& rt, const CPURegister& rt2, 830 const MemOperand& src); 831 832 // Store integer or FP register pair. 833 void stp(const CPURegister& rt, const CPURegister& rt2, 834 const MemOperand& dst); 835 836 // Load word pair with sign extension. 837 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); 838 839 // Load literal to register from a pc relative address. 840 void ldr_pcrel(const CPURegister& rt, int imm19); 841 842 // Load literal to register. 843 void ldr(const CPURegister& rt, const Immediate& imm); 844 void ldr(const CPURegister& rt, const Operand& operand); 845 846 // Load-acquire word. 847 void ldar(const Register& rt, const Register& rn); 848 849 // Load-acquire exclusive word. 850 void ldaxr(const Register& rt, const Register& rn); 851 852 // Store-release word. 853 void stlr(const Register& rt, const Register& rn); 854 855 // Store-release exclusive word. 856 void stlxr(const Register& rs, const Register& rt, const Register& rn); 857 858 // Load-acquire byte. 859 void ldarb(const Register& rt, const Register& rn); 860 861 // Load-acquire exclusive byte. 862 void ldaxrb(const Register& rt, const Register& rn); 863 864 // Store-release byte. 865 void stlrb(const Register& rt, const Register& rn); 866 867 // Store-release exclusive byte. 868 void stlxrb(const Register& rs, const Register& rt, const Register& rn); 869 870 // Load-acquire half-word. 871 void ldarh(const Register& rt, const Register& rn); 872 873 // Load-acquire exclusive half-word. 874 void ldaxrh(const Register& rt, const Register& rn); 875 876 // Store-release half-word. 877 void stlrh(const Register& rt, const Register& rn); 878 879 // Store-release exclusive half-word. 880 void stlxrh(const Register& rs, const Register& rt, const Register& rn); 881 882 // Move instructions. The default shift of -1 indicates that the move 883 // instruction will calculate an appropriate 16-bit immediate and left shift 884 // that is equal to the 64-bit immediate argument. If an explicit left shift 885 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 886 // 887 // For movk, an explicit shift can be used to indicate which half word should 888 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 889 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 890 // most-significant. 891 892 // Move and keep. 893 void movk(const Register& rd, uint64_t imm, int shift = -1) { 894 MoveWide(rd, imm, shift, MOVK); 895 } 896 897 // Move with non-zero. 898 void movn(const Register& rd, uint64_t imm, int shift = -1) { 899 MoveWide(rd, imm, shift, MOVN); 900 } 901 902 // Move with zero. 903 void movz(const Register& rd, uint64_t imm, int shift = -1) { 904 MoveWide(rd, imm, shift, MOVZ); 905 } 906 907 // Misc instructions. 908 // Monitor debug-mode breakpoint. 909 void brk(int code); 910 911 // Halting debug-mode breakpoint. 912 void hlt(int code); 913 914 // Move register to register. 915 void mov(const Register& rd, const Register& rn); 916 917 // Move NOT(operand) to register. 918 void mvn(const Register& rd, const Operand& operand); 919 920 // System instructions. 921 // Move to register from system register. 922 void mrs(const Register& rt, SystemRegister sysreg); 923 924 // Move from register to system register. 925 void msr(SystemRegister sysreg, const Register& rt); 926 927 // System hint. 928 void hint(SystemHint code); 929 930 // Data memory barrier 931 void dmb(BarrierDomain domain, BarrierType type); 932 933 // Data synchronization barrier 934 void dsb(BarrierDomain domain, BarrierType type); 935 936 // Instruction synchronization barrier 937 void isb(); 938 939 // Conditional speculation barrier. 940 void csdb(); 941 942 // Branch target identification. 943 void bti(BranchTargetIdentifier id); 944 945 // No-op. 946 void nop() { hint(NOP); } 947 948 // Different nop operations are used by the code generator to detect certain 949 // states of the generated code. 950 enum NopMarkerTypes { 951 DEBUG_BREAK_NOP, 952 INTERRUPT_CODE_NOP, 953 ADR_FAR_NOP, 954 FIRST_NOP_MARKER = DEBUG_BREAK_NOP, 955 LAST_NOP_MARKER = ADR_FAR_NOP 956 }; 957 958 void nop(NopMarkerTypes n); 959 960 // Add. 961 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); 962 963 // Unsigned halving add. 964 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 965 966 // Subtract. 967 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 968 969 // Signed halving add. 970 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 971 972 // Multiply by scalar element. 973 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 974 int vm_index); 975 976 // Multiply-add by scalar element. 977 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 978 int vm_index); 979 980 // Multiply-subtract by scalar element. 981 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 982 int vm_index); 983 984 // Signed long multiply-add by scalar element. 985 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 986 int vm_index); 987 988 // Signed long multiply-add by scalar element (second part). 989 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 990 int vm_index); 991 992 // Unsigned long multiply-add by scalar element. 993 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 994 int vm_index); 995 996 // Unsigned long multiply-add by scalar element (second part). 997 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 998 int vm_index); 999 1000 // Signed long multiply-sub by scalar element. 1001 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1002 int vm_index); 1003 1004 // Signed long multiply-sub by scalar element (second part). 1005 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1006 int vm_index); 1007 1008 // Unsigned long multiply-sub by scalar element. 1009 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1010 int vm_index); 1011 1012 // Unsigned long multiply-sub by scalar element (second part). 1013 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1014 int vm_index); 1015 1016 // Signed long multiply by scalar element. 1017 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1018 int vm_index); 1019 1020 // Signed long multiply by scalar element (second part). 1021 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1022 int vm_index); 1023 1024 // Unsigned long multiply by scalar element. 1025 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1026 int vm_index); 1027 1028 // Unsigned long multiply by scalar element (second part). 1029 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1030 int vm_index); 1031 1032 // Add narrow returning high half. 1033 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1034 1035 // Add narrow returning high half (second part). 1036 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1037 1038 // Signed saturating double long multiply by element. 1039 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1040 int vm_index); 1041 1042 // Signed saturating double long multiply by element (second part). 1043 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1044 int vm_index); 1045 1046 // Signed saturating doubling long multiply-add by element. 1047 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1048 int vm_index); 1049 1050 // Signed saturating doubling long multiply-add by element (second part). 1051 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1052 int vm_index); 1053 1054 // Signed saturating doubling long multiply-sub by element. 1055 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1056 int vm_index); 1057 1058 // Signed saturating doubling long multiply-sub by element (second part). 1059 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1060 int vm_index); 1061 1062 // Compare bitwise to zero. 1063 void cmeq(const VRegister& vd, const VRegister& vn, int value); 1064 1065 // Compare signed greater than or equal to zero. 1066 void cmge(const VRegister& vd, const VRegister& vn, int value); 1067 1068 // Compare signed greater than zero. 1069 void cmgt(const VRegister& vd, const VRegister& vn, int value); 1070 1071 // Compare signed less than or equal to zero. 1072 void cmle(const VRegister& vd, const VRegister& vn, int value); 1073 1074 // Compare signed less than zero. 1075 void cmlt(const VRegister& vd, const VRegister& vn, int value); 1076 1077 // Unsigned rounding halving add. 1078 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1079 1080 // Compare equal. 1081 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1082 1083 // Compare signed greater than or equal. 1084 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1085 1086 // Compare signed greater than. 1087 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1088 1089 // Compare unsigned higher. 1090 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1091 1092 // Compare unsigned higher or same. 1093 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1094 1095 // Compare bitwise test bits nonzero. 1096 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1097 1098 // Signed shift left by register. 1099 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1100 1101 // Unsigned shift left by register. 1102 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1103 1104 // Signed saturating doubling long multiply-subtract. 1105 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1106 1107 // Signed saturating doubling long multiply-subtract (second part). 1108 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1109 1110 // Signed saturating doubling long multiply. 1111 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1112 1113 // Signed saturating doubling long multiply (second part). 1114 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1115 1116 // Signed saturating doubling multiply returning high half. 1117 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1118 1119 // Signed saturating rounding doubling multiply returning high half. 1120 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1121 1122 // Signed saturating doubling multiply element returning high half. 1123 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1124 int vm_index); 1125 1126 // Signed saturating rounding doubling multiply element returning high half. 1127 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1128 int vm_index); 1129 1130 // Unsigned long multiply long. 1131 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1132 1133 // Unsigned long multiply (second part). 1134 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1135 1136 // Rounding add narrow returning high half. 1137 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1138 1139 // Subtract narrow returning high half. 1140 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1141 1142 // Subtract narrow returning high half (second part). 1143 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1144 1145 // Rounding add narrow returning high half (second part). 1146 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1147 1148 // Rounding subtract narrow returning high half. 1149 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1150 1151 // Rounding subtract narrow returning high half (second part). 1152 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1153 1154 // Signed saturating shift left by register. 1155 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1156 1157 // Unsigned saturating shift left by register. 1158 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1159 1160 // Signed rounding shift left by register. 1161 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1162 1163 // Unsigned rounding shift left by register. 1164 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1165 1166 // Signed saturating rounding shift left by register. 1167 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1168 1169 // Unsigned saturating rounding shift left by register. 1170 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1171 1172 // Signed absolute difference. 1173 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1174 1175 // Unsigned absolute difference and accumulate. 1176 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1177 1178 // Shift left by immediate and insert. 1179 void sli(const VRegister& vd, const VRegister& vn, int shift); 1180 1181 // Shift right by immediate and insert. 1182 void sri(const VRegister& vd, const VRegister& vn, int shift); 1183 1184 // Signed maximum. 1185 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1186 1187 // Signed pairwise maximum. 1188 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1189 1190 // Add across vector. 1191 void addv(const VRegister& vd, const VRegister& vn); 1192 1193 // Signed add long across vector. 1194 void saddlv(const VRegister& vd, const VRegister& vn); 1195 1196 // Unsigned add long across vector. 1197 void uaddlv(const VRegister& vd, const VRegister& vn); 1198 1199 // FP maximum number across vector. 1200 void fmaxnmv(const VRegister& vd, const VRegister& vn); 1201 1202 // FP maximum across vector. 1203 void fmaxv(const VRegister& vd, const VRegister& vn); 1204 1205 // FP minimum number across vector. 1206 void fminnmv(const VRegister& vd, const VRegister& vn); 1207 1208 // FP minimum across vector. 1209 void fminv(const VRegister& vd, const VRegister& vn); 1210 1211 // Signed maximum across vector. 1212 void smaxv(const VRegister& vd, const VRegister& vn); 1213 1214 // Signed minimum. 1215 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1216 1217 // Signed minimum pairwise. 1218 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1219 1220 // Signed minimum across vector. 1221 void sminv(const VRegister& vd, const VRegister& vn); 1222 1223 // One-element structure store from one register. 1224 void st1(const VRegister& vt, const MemOperand& src); 1225 1226 // One-element structure store from two registers. 1227 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1228 1229 // One-element structure store from three registers. 1230 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1231 const MemOperand& src); 1232 1233 // One-element structure store from four registers. 1234 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1235 const VRegister& vt4, const MemOperand& src); 1236 1237 // One-element single structure store from one lane. 1238 void st1(const VRegister& vt, int lane, const MemOperand& src); 1239 1240 // Two-element structure store from two registers. 1241 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1242 1243 // Two-element single structure store from two lanes. 1244 void st2(const VRegister& vt, const VRegister& vt2, int lane, 1245 const MemOperand& src); 1246 1247 // Three-element structure store from three registers. 1248 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1249 const MemOperand& src); 1250 1251 // Three-element single structure store from three lanes. 1252 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1253 int lane, const MemOperand& src); 1254 1255 // Four-element structure store from four registers. 1256 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1257 const VRegister& vt4, const MemOperand& src); 1258 1259 // Four-element single structure store from four lanes. 1260 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1261 const VRegister& vt4, int lane, const MemOperand& src); 1262 1263 // Unsigned add long. 1264 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1265 1266 // Unsigned add long (second part). 1267 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1268 1269 // Unsigned add wide. 1270 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1271 1272 // Unsigned add wide (second part). 1273 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1274 1275 // Signed add long. 1276 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1277 1278 // Signed add long (second part). 1279 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1280 1281 // Signed add wide. 1282 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1283 1284 // Signed add wide (second part). 1285 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1286 1287 // Unsigned subtract long. 1288 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1289 1290 // Unsigned subtract long (second part). 1291 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1292 1293 // Unsigned subtract wide. 1294 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1295 1296 // Signed subtract long. 1297 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1298 1299 // Signed subtract long (second part). 1300 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1301 1302 // Signed integer subtract wide. 1303 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1304 1305 // Signed integer subtract wide (second part). 1306 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1307 1308 // Unsigned subtract wide (second part). 1309 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1310 1311 // Unsigned maximum. 1312 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1313 1314 // Unsigned pairwise maximum. 1315 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1316 1317 // Unsigned maximum across vector. 1318 void umaxv(const VRegister& vd, const VRegister& vn); 1319 1320 // Unsigned minimum. 1321 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1322 1323 // Unsigned pairwise minimum. 1324 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1325 1326 // Unsigned minimum across vector. 1327 void uminv(const VRegister& vd, const VRegister& vn); 1328 1329 // Transpose vectors (primary). 1330 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1331 1332 // Transpose vectors (secondary). 1333 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1334 1335 // Unzip vectors (primary). 1336 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1337 1338 // Unzip vectors (secondary). 1339 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1340 1341 // Zip vectors (primary). 1342 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1343 1344 // Zip vectors (secondary). 1345 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1346 1347 // Signed shift right by immediate. 1348 void sshr(const VRegister& vd, const VRegister& vn, int shift); 1349 1350 // Unsigned shift right by immediate. 1351 void ushr(const VRegister& vd, const VRegister& vn, int shift); 1352 1353 // Signed rounding shift right by immediate. 1354 void srshr(const VRegister& vd, const VRegister& vn, int shift); 1355 1356 // Unsigned rounding shift right by immediate. 1357 void urshr(const VRegister& vd, const VRegister& vn, int shift); 1358 1359 // Signed shift right by immediate and accumulate. 1360 void ssra(const VRegister& vd, const VRegister& vn, int shift); 1361 1362 // Unsigned shift right by immediate and accumulate. 1363 void usra(const VRegister& vd, const VRegister& vn, int shift); 1364 1365 // Signed rounding shift right by immediate and accumulate. 1366 void srsra(const VRegister& vd, const VRegister& vn, int shift); 1367 1368 // Unsigned rounding shift right by immediate and accumulate. 1369 void ursra(const VRegister& vd, const VRegister& vn, int shift); 1370 1371 // Shift right narrow by immediate. 1372 void shrn(const VRegister& vd, const VRegister& vn, int shift); 1373 1374 // Shift right narrow by immediate (second part). 1375 void shrn2(const VRegister& vd, const VRegister& vn, int shift); 1376 1377 // Rounding shift right narrow by immediate. 1378 void rshrn(const VRegister& vd, const VRegister& vn, int shift); 1379 1380 // Rounding shift right narrow by immediate (second part). 1381 void rshrn2(const VRegister& vd, const VRegister& vn, int shift); 1382 1383 // Unsigned saturating shift right narrow by immediate. 1384 void uqshrn(const VRegister& vd, const VRegister& vn, int shift); 1385 1386 // Unsigned saturating shift right narrow by immediate (second part). 1387 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1388 1389 // Unsigned saturating rounding shift right narrow by immediate. 1390 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1391 1392 // Unsigned saturating rounding shift right narrow by immediate (second part). 1393 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1394 1395 // Signed saturating shift right narrow by immediate. 1396 void sqshrn(const VRegister& vd, const VRegister& vn, int shift); 1397 1398 // Signed saturating shift right narrow by immediate (second part). 1399 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1400 1401 // Signed saturating rounded shift right narrow by immediate. 1402 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1403 1404 // Signed saturating rounded shift right narrow by immediate (second part). 1405 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1406 1407 // Signed saturating shift right unsigned narrow by immediate. 1408 void sqshrun(const VRegister& vd, const VRegister& vn, int shift); 1409 1410 // Signed saturating shift right unsigned narrow by immediate (second part). 1411 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); 1412 1413 // Signed sat rounded shift right unsigned narrow by immediate. 1414 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); 1415 1416 // Signed sat rounded shift right unsigned narrow by immediate (second part). 1417 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); 1418 1419 // FP reciprocal step. 1420 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1421 1422 // FP reciprocal estimate. 1423 void frecpe(const VRegister& vd, const VRegister& vn); 1424 1425 // FP reciprocal square root estimate. 1426 void frsqrte(const VRegister& vd, const VRegister& vn); 1427 1428 // FP reciprocal square root step. 1429 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1430 1431 // Signed absolute difference and accumulate long. 1432 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1433 1434 // Signed absolute difference and accumulate long (second part). 1435 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1436 1437 // Unsigned absolute difference and accumulate long. 1438 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1439 1440 // Unsigned absolute difference and accumulate long (second part). 1441 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1442 1443 // Signed absolute difference long. 1444 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1445 1446 // Signed absolute difference long (second part). 1447 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1448 1449 // Unsigned absolute difference long. 1450 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1451 1452 // Unsigned absolute difference long (second part). 1453 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1454 1455 // Polynomial multiply long. 1456 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1457 1458 // Polynomial multiply long (second part). 1459 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1460 1461 // Signed long multiply-add. 1462 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1463 1464 // Signed long multiply-add (second part). 1465 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1466 1467 // Unsigned long multiply-add. 1468 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1469 1470 // Unsigned long multiply-add (second part). 1471 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1472 1473 // Signed long multiply-sub. 1474 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1475 1476 // Signed long multiply-sub (second part). 1477 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1478 1479 // Unsigned long multiply-sub. 1480 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1481 1482 // Unsigned long multiply-sub (second part). 1483 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1484 1485 // Signed long multiply. 1486 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1487 1488 // Signed long multiply (second part). 1489 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1490 1491 // Signed saturating doubling long multiply-add. 1492 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1493 1494 // Signed saturating doubling long multiply-add (second part). 1495 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1496 1497 // Unsigned absolute difference. 1498 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1499 1500 // Signed absolute difference and accumulate. 1501 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1502 1503 // FP instructions. 1504 // Move immediate to FP register. 1505 void fmov(const VRegister& fd, double imm); 1506 void fmov(const VRegister& fd, float imm); 1507 1508 // Move FP register to register. 1509 void fmov(const Register& rd, const VRegister& fn); 1510 1511 // Move register to FP register. 1512 void fmov(const VRegister& fd, const Register& rn); 1513 1514 // Move FP register to FP register. 1515 void fmov(const VRegister& fd, const VRegister& fn); 1516 1517 // Move 64-bit register to top half of 128-bit FP register. 1518 void fmov(const VRegister& vd, int index, const Register& rn); 1519 1520 // Move top half of 128-bit FP register to 64-bit register. 1521 void fmov(const Register& rd, const VRegister& vn, int index); 1522 1523 // FP add. 1524 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1525 1526 // FP subtract. 1527 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1528 1529 // FP multiply. 1530 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1531 1532 // FP compare equal to zero. 1533 void fcmeq(const VRegister& vd, const VRegister& vn, double imm); 1534 1535 // FP greater than zero. 1536 void fcmgt(const VRegister& vd, const VRegister& vn, double imm); 1537 1538 // FP greater than or equal to zero. 1539 void fcmge(const VRegister& vd, const VRegister& vn, double imm); 1540 1541 // FP less than or equal to zero. 1542 void fcmle(const VRegister& vd, const VRegister& vn, double imm); 1543 1544 // FP less than to zero. 1545 void fcmlt(const VRegister& vd, const VRegister& vn, double imm); 1546 1547 // FP absolute difference. 1548 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1549 1550 // FP pairwise add vector. 1551 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1552 1553 // FP pairwise add scalar. 1554 void faddp(const VRegister& vd, const VRegister& vn); 1555 1556 // FP pairwise maximum scalar. 1557 void fmaxp(const VRegister& vd, const VRegister& vn); 1558 1559 // FP pairwise maximum number scalar. 1560 void fmaxnmp(const VRegister& vd, const VRegister& vn); 1561 1562 // FP pairwise minimum number scalar. 1563 void fminnmp(const VRegister& vd, const VRegister& vn); 1564 1565 // FP vector multiply accumulate. 1566 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1567 1568 // FP vector multiply subtract. 1569 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1570 1571 // FP vector multiply extended. 1572 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1573 1574 // FP absolute greater than or equal. 1575 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1576 1577 // FP absolute greater than. 1578 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1579 1580 // FP multiply by element. 1581 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1582 int vm_index); 1583 1584 // FP fused multiply-add to accumulator by element. 1585 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1586 int vm_index); 1587 1588 // FP fused multiply-sub from accumulator by element. 1589 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1590 int vm_index); 1591 1592 // FP multiply extended by element. 1593 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1594 int vm_index); 1595 1596 // FP compare equal. 1597 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1598 1599 // FP greater than. 1600 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1601 1602 // FP greater than or equal. 1603 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1604 1605 // FP pairwise maximum vector. 1606 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1607 1608 // FP pairwise minimum vector. 1609 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1610 1611 // FP pairwise minimum scalar. 1612 void fminp(const VRegister& vd, const VRegister& vn); 1613 1614 // FP pairwise maximum number vector. 1615 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1616 1617 // FP pairwise minimum number vector. 1618 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1619 1620 // FP fused multiply-add. 1621 void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1622 const VRegister& va); 1623 1624 // FP fused multiply-subtract. 1625 void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1626 const VRegister& va); 1627 1628 // FP fused multiply-add and negate. 1629 void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1630 const VRegister& va); 1631 1632 // FP fused multiply-subtract and negate. 1633 void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1634 const VRegister& va); 1635 1636 // FP multiply-negate scalar. 1637 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1638 1639 // FP reciprocal exponent scalar. 1640 void frecpx(const VRegister& vd, const VRegister& vn); 1641 1642 // FP divide. 1643 void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1644 1645 // FP maximum. 1646 void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1647 1648 // FP minimum. 1649 void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1650 1651 // FP maximum. 1652 void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1653 1654 // FP minimum. 1655 void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1656 1657 // FP absolute. 1658 void fabs(const VRegister& vd, const VRegister& vn); 1659 1660 // FP negate. 1661 void fneg(const VRegister& vd, const VRegister& vn); 1662 1663 // FP square root. 1664 void fsqrt(const VRegister& vd, const VRegister& vn); 1665 1666 // FP round to integer nearest with ties to away. 1667 void frinta(const VRegister& vd, const VRegister& vn); 1668 1669 // FP round to integer, implicit rounding. 1670 void frinti(const VRegister& vd, const VRegister& vn); 1671 1672 // FP round to integer toward minus infinity. 1673 void frintm(const VRegister& vd, const VRegister& vn); 1674 1675 // FP round to integer nearest with ties to even. 1676 void frintn(const VRegister& vd, const VRegister& vn); 1677 1678 // FP round to integer towards plus infinity. 1679 void frintp(const VRegister& vd, const VRegister& vn); 1680 1681 // FP round to integer, exact, implicit rounding. 1682 void frintx(const VRegister& vd, const VRegister& vn); 1683 1684 // FP round to integer towards zero. 1685 void frintz(const VRegister& vd, const VRegister& vn); 1686 1687 // FP compare registers. 1688 void fcmp(const VRegister& vn, const VRegister& vm); 1689 1690 // FP compare immediate. 1691 void fcmp(const VRegister& vn, double value); 1692 1693 // FP conditional compare. 1694 void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv, 1695 Condition cond); 1696 1697 // FP conditional select. 1698 void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1699 Condition cond); 1700 1701 // Common FP Convert functions. 1702 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); 1703 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); 1704 1705 // FP convert between precisions. 1706 void fcvt(const VRegister& vd, const VRegister& vn); 1707 1708 // FP convert to higher precision. 1709 void fcvtl(const VRegister& vd, const VRegister& vn); 1710 1711 // FP convert to higher precision (second part). 1712 void fcvtl2(const VRegister& vd, const VRegister& vn); 1713 1714 // FP convert to lower precision. 1715 void fcvtn(const VRegister& vd, const VRegister& vn); 1716 1717 // FP convert to lower prevision (second part). 1718 void fcvtn2(const VRegister& vd, const VRegister& vn); 1719 1720 // FP convert to lower precision, rounding to odd. 1721 void fcvtxn(const VRegister& vd, const VRegister& vn); 1722 1723 // FP convert to lower precision, rounding to odd (second part). 1724 void fcvtxn2(const VRegister& vd, const VRegister& vn); 1725 1726 // FP convert to signed integer, nearest with ties to away. 1727 void fcvtas(const Register& rd, const VRegister& vn); 1728 1729 // FP convert to unsigned integer, nearest with ties to away. 1730 void fcvtau(const Register& rd, const VRegister& vn); 1731 1732 // FP convert to signed integer, nearest with ties to away. 1733 void fcvtas(const VRegister& vd, const VRegister& vn); 1734 1735 // FP convert to unsigned integer, nearest with ties to away. 1736 void fcvtau(const VRegister& vd, const VRegister& vn); 1737 1738 // FP convert to signed integer, round towards -infinity. 1739 void fcvtms(const Register& rd, const VRegister& vn); 1740 1741 // FP convert to unsigned integer, round towards -infinity. 1742 void fcvtmu(const Register& rd, const VRegister& vn); 1743 1744 // FP convert to signed integer, round towards -infinity. 1745 void fcvtms(const VRegister& vd, const VRegister& vn); 1746 1747 // FP convert to unsigned integer, round towards -infinity. 1748 void fcvtmu(const VRegister& vd, const VRegister& vn); 1749 1750 // FP convert to signed integer, nearest with ties to even. 1751 void fcvtns(const Register& rd, const VRegister& vn); 1752 1753 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3]. 1754 void fjcvtzs(const Register& rd, const VRegister& vn); 1755 1756 // FP convert to unsigned integer, nearest with ties to even. 1757 void fcvtnu(const Register& rd, const VRegister& vn); 1758 1759 // FP convert to signed integer, nearest with ties to even. 1760 void fcvtns(const VRegister& rd, const VRegister& vn); 1761 1762 // FP convert to unsigned integer, nearest with ties to even. 1763 void fcvtnu(const VRegister& rd, const VRegister& vn); 1764 1765 // FP convert to signed integer or fixed-point, round towards zero. 1766 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 1767 1768 // FP convert to unsigned integer or fixed-point, round towards zero. 1769 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 1770 1771 // FP convert to signed integer or fixed-point, round towards zero. 1772 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 1773 1774 // FP convert to unsigned integer or fixed-point, round towards zero. 1775 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 1776 1777 // FP convert to signed integer, round towards +infinity. 1778 void fcvtps(const Register& rd, const VRegister& vn); 1779 1780 // FP convert to unsigned integer, round towards +infinity. 1781 void fcvtpu(const Register& rd, const VRegister& vn); 1782 1783 // FP convert to signed integer, round towards +infinity. 1784 void fcvtps(const VRegister& vd, const VRegister& vn); 1785 1786 // FP convert to unsigned integer, round towards +infinity. 1787 void fcvtpu(const VRegister& vd, const VRegister& vn); 1788 1789 // Convert signed integer or fixed point to FP. 1790 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1791 1792 // Convert unsigned integer or fixed point to FP. 1793 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1794 1795 // Convert signed integer or fixed-point to FP. 1796 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1797 1798 // Convert unsigned integer or fixed-point to FP. 1799 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1800 1801 // Extract vector from pair of vectors. 1802 void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1803 int index); 1804 1805 // Duplicate vector element to vector or scalar. 1806 void dup(const VRegister& vd, const VRegister& vn, int vn_index); 1807 1808 // Duplicate general-purpose register to vector. 1809 void dup(const VRegister& vd, const Register& rn); 1810 1811 // Insert vector element from general-purpose register. 1812 void ins(const VRegister& vd, int vd_index, const Register& rn); 1813 1814 // Move general-purpose register to a vector element. 1815 void mov(const VRegister& vd, int vd_index, const Register& rn); 1816 1817 // Unsigned move vector element to general-purpose register. 1818 void umov(const Register& rd, const VRegister& vn, int vn_index); 1819 1820 // Move vector element to general-purpose register. 1821 void mov(const Register& rd, const VRegister& vn, int vn_index); 1822 1823 // Move vector element to scalar. 1824 void mov(const VRegister& vd, const VRegister& vn, int vn_index); 1825 1826 // Insert vector element from another vector element. 1827 void ins(const VRegister& vd, int vd_index, const VRegister& vn, 1828 int vn_index); 1829 1830 // Move vector element to another vector element. 1831 void mov(const VRegister& vd, int vd_index, const VRegister& vn, 1832 int vn_index); 1833 1834 // Signed move vector element to general-purpose register. 1835 void smov(const Register& rd, const VRegister& vn, int vn_index); 1836 1837 // One-element structure load to one register. 1838 void ld1(const VRegister& vt, const MemOperand& src); 1839 1840 // One-element structure load to two registers. 1841 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1842 1843 // One-element structure load to three registers. 1844 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1845 const MemOperand& src); 1846 1847 // One-element structure load to four registers. 1848 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1849 const VRegister& vt4, const MemOperand& src); 1850 1851 // One-element single structure load to one lane. 1852 void ld1(const VRegister& vt, int lane, const MemOperand& src); 1853 1854 // One-element single structure load to all lanes. 1855 void ld1r(const VRegister& vt, const MemOperand& src); 1856 1857 // Two-element structure load. 1858 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1859 1860 // Two-element single structure load to one lane. 1861 void ld2(const VRegister& vt, const VRegister& vt2, int lane, 1862 const MemOperand& src); 1863 1864 // Two-element single structure load to all lanes. 1865 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1866 1867 // Three-element structure load. 1868 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1869 const MemOperand& src); 1870 1871 // Three-element single structure load to one lane. 1872 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1873 int lane, const MemOperand& src); 1874 1875 // Three-element single structure load to all lanes. 1876 void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1877 const MemOperand& src); 1878 1879 // Four-element structure load. 1880 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1881 const VRegister& vt4, const MemOperand& src); 1882 1883 // Four-element single structure load to one lane. 1884 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1885 const VRegister& vt4, int lane, const MemOperand& src); 1886 1887 // Four-element single structure load to all lanes. 1888 void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1889 const VRegister& vt4, const MemOperand& src); 1890 1891 // Count leading sign bits. 1892 void cls(const VRegister& vd, const VRegister& vn); 1893 1894 // Count leading zero bits (vector). 1895 void clz(const VRegister& vd, const VRegister& vn); 1896 1897 // Population count per byte. 1898 void cnt(const VRegister& vd, const VRegister& vn); 1899 1900 // Reverse bit order. 1901 void rbit(const VRegister& vd, const VRegister& vn); 1902 1903 // Reverse elements in 16-bit halfwords. 1904 void rev16(const VRegister& vd, const VRegister& vn); 1905 1906 // Reverse elements in 32-bit words. 1907 void rev32(const VRegister& vd, const VRegister& vn); 1908 1909 // Reverse elements in 64-bit doublewords. 1910 void rev64(const VRegister& vd, const VRegister& vn); 1911 1912 // Unsigned reciprocal square root estimate. 1913 void ursqrte(const VRegister& vd, const VRegister& vn); 1914 1915 // Unsigned reciprocal estimate. 1916 void urecpe(const VRegister& vd, const VRegister& vn); 1917 1918 // Signed pairwise long add and accumulate. 1919 void sadalp(const VRegister& vd, const VRegister& vn); 1920 1921 // Signed pairwise long add. 1922 void saddlp(const VRegister& vd, const VRegister& vn); 1923 1924 // Unsigned pairwise long add. 1925 void uaddlp(const VRegister& vd, const VRegister& vn); 1926 1927 // Unsigned pairwise long add and accumulate. 1928 void uadalp(const VRegister& vd, const VRegister& vn); 1929 1930 // Shift left by immediate. 1931 void shl(const VRegister& vd, const VRegister& vn, int shift); 1932 1933 // Signed saturating shift left by immediate. 1934 void sqshl(const VRegister& vd, const VRegister& vn, int shift); 1935 1936 // Signed saturating shift left unsigned by immediate. 1937 void sqshlu(const VRegister& vd, const VRegister& vn, int shift); 1938 1939 // Unsigned saturating shift left by immediate. 1940 void uqshl(const VRegister& vd, const VRegister& vn, int shift); 1941 1942 // Signed shift left long by immediate. 1943 void sshll(const VRegister& vd, const VRegister& vn, int shift); 1944 1945 // Signed shift left long by immediate (second part). 1946 void sshll2(const VRegister& vd, const VRegister& vn, int shift); 1947 1948 // Signed extend long. 1949 void sxtl(const VRegister& vd, const VRegister& vn); 1950 1951 // Signed extend long (second part). 1952 void sxtl2(const VRegister& vd, const VRegister& vn); 1953 1954 // Unsigned shift left long by immediate. 1955 void ushll(const VRegister& vd, const VRegister& vn, int shift); 1956 1957 // Unsigned shift left long by immediate (second part). 1958 void ushll2(const VRegister& vd, const VRegister& vn, int shift); 1959 1960 // Shift left long by element size. 1961 void shll(const VRegister& vd, const VRegister& vn, int shift); 1962 1963 // Shift left long by element size (second part). 1964 void shll2(const VRegister& vd, const VRegister& vn, int shift); 1965 1966 // Unsigned extend long. 1967 void uxtl(const VRegister& vd, const VRegister& vn); 1968 1969 // Unsigned extend long (second part). 1970 void uxtl2(const VRegister& vd, const VRegister& vn); 1971 1972 // Signed rounding halving add. 1973 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1974 1975 // Unsigned halving sub. 1976 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1977 1978 // Signed halving sub. 1979 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1980 1981 // Unsigned saturating add. 1982 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1983 1984 // Signed saturating add. 1985 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1986 1987 // Unsigned saturating subtract. 1988 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1989 1990 // Signed saturating subtract. 1991 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1992 1993 // Add pairwise. 1994 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1995 1996 // Add pair of elements scalar. 1997 void addp(const VRegister& vd, const VRegister& vn); 1998 1999 // Multiply-add to accumulator. 2000 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2001 2002 // Multiply-subtract to accumulator. 2003 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2004 2005 // Multiply. 2006 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2007 2008 // Table lookup from one register. 2009 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2010 2011 // Table lookup from two registers. 2012 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2013 const VRegister& vm); 2014 2015 // Table lookup from three registers. 2016 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2017 const VRegister& vn3, const VRegister& vm); 2018 2019 // Table lookup from four registers. 2020 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2021 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2022 2023 // Table lookup extension from one register. 2024 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2025 2026 // Table lookup extension from two registers. 2027 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2028 const VRegister& vm); 2029 2030 // Table lookup extension from three registers. 2031 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2032 const VRegister& vn3, const VRegister& vm); 2033 2034 // Table lookup extension from four registers. 2035 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2036 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2037 2038 // Instruction functions used only for test, debug, and patching. 2039 // Emit raw instructions in the instruction stream. 2040 void dci(Instr raw_inst) { Emit(raw_inst); } 2041 2042 // Emit 8 bits of data in the instruction stream. 2043 void dc8(uint8_t data) { EmitData(&data, sizeof(data)); } 2044 2045 // Emit 32 bits of data in the instruction stream. 2046 void dc32(uint32_t data) { EmitData(&data, sizeof(data)); } 2047 2048 // Emit 64 bits of data in the instruction stream. 2049 void dc64(uint64_t data) { EmitData(&data, sizeof(data)); } 2050 2051 // Emit an address in the instruction stream. 2052 void dcptr(Label* label); 2053 2054 // Copy a string into the instruction stream, including the terminating 2055 // nullptr character. The instruction pointer (pc_) is then aligned correctly 2056 // for subsequent instructions. 2057 void EmitStringData(const char* string); 2058 2059 // Pseudo-instructions ------------------------------------------------------ 2060 2061 // Parameters are described in arm64/instructions-arm64.h. 2062 void debug(const char* message, uint32_t code, Instr params = BREAK); 2063 2064 // Required by V8. 2065 void db(uint8_t data) { dc8(data); } 2066 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) { 2067 BlockPoolsScope no_pool_scope(this); 2068 if (!RelocInfo::IsNoInfo(rmode)) { 2069 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2070 RelocInfo::IsLiteralConstant(rmode)); 2071 RecordRelocInfo(rmode); 2072 } 2073 dc32(data); 2074 } 2075 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) { 2076 BlockPoolsScope no_pool_scope(this); 2077 if (!RelocInfo::IsNoInfo(rmode)) { 2078 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2079 RelocInfo::IsLiteralConstant(rmode)); 2080 RecordRelocInfo(rmode); 2081 } 2082 dc64(data); 2083 } 2084 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) { 2085 BlockPoolsScope no_pool_scope(this); 2086 if (!RelocInfo::IsNoInfo(rmode)) { 2087 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2088 RelocInfo::IsLiteralConstant(rmode)); 2089 RecordRelocInfo(rmode); 2090 } 2091 dc64(data); 2092 } 2093 2094 // Code generation helpers -------------------------------------------------- 2095 2096 Instruction* pc() const { return Instruction::Cast(pc_); } 2097 2098 Instruction* InstructionAt(ptrdiff_t offset) const { 2099 return reinterpret_cast<Instruction*>(buffer_start_ + offset); 2100 } 2101 2102 ptrdiff_t InstructionOffset(Instruction* instr) const { 2103 return reinterpret_cast<byte*>(instr) - buffer_start_; 2104 } 2105 2106 // Register encoding. 2107 static Instr Rd(CPURegister rd) { 2108 DCHECK_NE(rd.code(), kSPRegInternalCode); 2109 return rd.code() << Rd_offset; 2110 } 2111 2112 static Instr Rn(CPURegister rn) { 2113 DCHECK_NE(rn.code(), kSPRegInternalCode); 2114 return rn.code() << Rn_offset; 2115 } 2116 2117 static Instr Rm(CPURegister rm) { 2118 DCHECK_NE(rm.code(), kSPRegInternalCode); 2119 return rm.code() << Rm_offset; 2120 } 2121 2122 static Instr RmNot31(CPURegister rm) { 2123 DCHECK_NE(rm.code(), kSPRegInternalCode); 2124 DCHECK(!rm.IsZero()); 2125 return Rm(rm); 2126 } 2127 2128 static Instr Ra(CPURegister ra) { 2129 DCHECK_NE(ra.code(), kSPRegInternalCode); 2130 return ra.code() << Ra_offset; 2131 } 2132 2133 static Instr Rt(CPURegister rt) { 2134 DCHECK_NE(rt.code(), kSPRegInternalCode); 2135 return rt.code() << Rt_offset; 2136 } 2137 2138 static Instr Rt2(CPURegister rt2) { 2139 DCHECK_NE(rt2.code(), kSPRegInternalCode); 2140 return rt2.code() << Rt2_offset; 2141 } 2142 2143 static Instr Rs(CPURegister rs) { 2144 DCHECK_NE(rs.code(), kSPRegInternalCode); 2145 return rs.code() << Rs_offset; 2146 } 2147 2148 // These encoding functions allow the stack pointer to be encoded, and 2149 // disallow the zero register. 2150 static Instr RdSP(Register rd) { 2151 DCHECK(!rd.IsZero()); 2152 return (rd.code() & kRegCodeMask) << Rd_offset; 2153 } 2154 2155 static Instr RnSP(Register rn) { 2156 DCHECK(!rn.IsZero()); 2157 return (rn.code() & kRegCodeMask) << Rn_offset; 2158 } 2159 2160 // Flags encoding. 2161 inline static Instr Flags(FlagsUpdate S); 2162 inline static Instr Cond(Condition cond); 2163 2164 // PC-relative address encoding. 2165 inline static Instr ImmPCRelAddress(int imm21); 2166 2167 // Branch encoding. 2168 inline static Instr ImmUncondBranch(int imm26); 2169 inline static Instr ImmCondBranch(int imm19); 2170 inline static Instr ImmCmpBranch(int imm19); 2171 inline static Instr ImmTestBranch(int imm14); 2172 inline static Instr ImmTestBranchBit(unsigned bit_pos); 2173 2174 // Data Processing encoding. 2175 inline static Instr SF(Register rd); 2176 inline static Instr ImmAddSub(int imm); 2177 inline static Instr ImmS(unsigned imms, unsigned reg_size); 2178 inline static Instr ImmR(unsigned immr, unsigned reg_size); 2179 inline static Instr ImmSetBits(unsigned imms, unsigned reg_size); 2180 inline static Instr ImmRotate(unsigned immr, unsigned reg_size); 2181 inline static Instr ImmLLiteral(int imm19); 2182 inline static Instr BitN(unsigned bitn, unsigned reg_size); 2183 inline static Instr ShiftDP(Shift shift); 2184 inline static Instr ImmDPShift(unsigned amount); 2185 inline static Instr ExtendMode(Extend extend); 2186 inline static Instr ImmExtendShift(unsigned left_shift); 2187 inline static Instr ImmCondCmp(unsigned imm); 2188 inline static Instr Nzcv(StatusFlags nzcv); 2189 2190 static bool IsImmAddSub(int64_t immediate); 2191 static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n, 2192 unsigned* imm_s, unsigned* imm_r); 2193 2194 // MemOperand offset encoding. 2195 inline static Instr ImmLSUnsigned(int imm12); 2196 inline static Instr ImmLS(int imm9); 2197 inline static Instr ImmLSPair(int imm7, unsigned size); 2198 inline static Instr ImmShiftLS(unsigned shift_amount); 2199 inline static Instr ImmException(int imm16); 2200 inline static Instr ImmSystemRegister(int imm15); 2201 inline static Instr ImmHint(int imm7); 2202 inline static Instr ImmBarrierDomain(int imm2); 2203 inline static Instr ImmBarrierType(int imm2); 2204 inline static unsigned CalcLSDataSize(LoadStoreOp op); 2205 2206 // Instruction bits for vector format in data processing operations. 2207 static Instr VFormat(VRegister vd) { 2208 if (vd.Is64Bits()) { 2209 switch (vd.LaneCount()) { 2210 case 2: 2211 return NEON_2S; 2212 case 4: 2213 return NEON_4H; 2214 case 8: 2215 return NEON_8B; 2216 default: 2217 UNREACHABLE(); 2218 } 2219 } else { 2220 DCHECK(vd.Is128Bits()); 2221 switch (vd.LaneCount()) { 2222 case 2: 2223 return NEON_2D; 2224 case 4: 2225 return NEON_4S; 2226 case 8: 2227 return NEON_8H; 2228 case 16: 2229 return NEON_16B; 2230 default: 2231 UNREACHABLE(); 2232 } 2233 } 2234 } 2235 2236 // Instruction bits for vector format in floating point data processing 2237 // operations. 2238 static Instr FPFormat(VRegister vd) { 2239 if (vd.LaneCount() == 1) { 2240 // Floating point scalar formats. 2241 DCHECK(vd.Is32Bits() || vd.Is64Bits()); 2242 return vd.Is64Bits() ? FP64 : FP32; 2243 } 2244 2245 // Two lane floating point vector formats. 2246 if (vd.LaneCount() == 2) { 2247 DCHECK(vd.Is64Bits() || vd.Is128Bits()); 2248 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 2249 } 2250 2251 // Four lane floating point vector format. 2252 DCHECK((vd.LaneCount() == 4) && vd.Is128Bits()); 2253 return NEON_FP_4S; 2254 } 2255 2256 // Instruction bits for vector format in load and store operations. 2257 static Instr LSVFormat(VRegister vd) { 2258 if (vd.Is64Bits()) { 2259 switch (vd.LaneCount()) { 2260 case 1: 2261 return LS_NEON_1D; 2262 case 2: 2263 return LS_NEON_2S; 2264 case 4: 2265 return LS_NEON_4H; 2266 case 8: 2267 return LS_NEON_8B; 2268 default: 2269 UNREACHABLE(); 2270 } 2271 } else { 2272 DCHECK(vd.Is128Bits()); 2273 switch (vd.LaneCount()) { 2274 case 2: 2275 return LS_NEON_2D; 2276 case 4: 2277 return LS_NEON_4S; 2278 case 8: 2279 return LS_NEON_8H; 2280 case 16: 2281 return LS_NEON_16B; 2282 default: 2283 UNREACHABLE(); 2284 } 2285 } 2286 } 2287 2288 // Instruction bits for scalar format in data processing operations. 2289 static Instr SFormat(VRegister vd) { 2290 DCHECK(vd.IsScalar()); 2291 switch (vd.SizeInBytes()) { 2292 case 1: 2293 return NEON_B; 2294 case 2: 2295 return NEON_H; 2296 case 4: 2297 return NEON_S; 2298 case 8: 2299 return NEON_D; 2300 default: 2301 UNREACHABLE(); 2302 } 2303 } 2304 2305 static Instr ImmNEONHLM(int index, int num_bits) { 2306 int h, l, m; 2307 if (num_bits == 3) { 2308 DCHECK(is_uint3(index)); 2309 h = (index >> 2) & 1; 2310 l = (index >> 1) & 1; 2311 m = (index >> 0) & 1; 2312 } else if (num_bits == 2) { 2313 DCHECK(is_uint2(index)); 2314 h = (index >> 1) & 1; 2315 l = (index >> 0) & 1; 2316 m = 0; 2317 } else { 2318 DCHECK(is_uint1(index) && (num_bits == 1)); 2319 h = (index >> 0) & 1; 2320 l = 0; 2321 m = 0; 2322 } 2323 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 2324 } 2325 2326 static Instr ImmNEONExt(int imm4) { 2327 DCHECK(is_uint4(imm4)); 2328 return imm4 << ImmNEONExt_offset; 2329 } 2330 2331 static Instr ImmNEON5(Instr format, int index) { 2332 DCHECK(is_uint4(index)); 2333 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2334 int imm5 = (index << (s + 1)) | (1 << s); 2335 return imm5 << ImmNEON5_offset; 2336 } 2337 2338 static Instr ImmNEON4(Instr format, int index) { 2339 DCHECK(is_uint4(index)); 2340 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2341 int imm4 = index << s; 2342 return imm4 << ImmNEON4_offset; 2343 } 2344 2345 static Instr ImmNEONabcdefgh(int imm8) { 2346 DCHECK(is_uint8(imm8)); 2347 Instr instr; 2348 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 2349 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 2350 return instr; 2351 } 2352 2353 static Instr NEONCmode(int cmode) { 2354 DCHECK(is_uint4(cmode)); 2355 return cmode << NEONCmode_offset; 2356 } 2357 2358 static Instr NEONModImmOp(int op) { 2359 DCHECK(is_uint1(op)); 2360 return op << NEONModImmOp_offset; 2361 } 2362 2363 static bool IsImmLSUnscaled(int64_t offset); 2364 static bool IsImmLSScaled(int64_t offset, unsigned size); 2365 static bool IsImmLLiteral(int64_t offset); 2366 2367 // Move immediates encoding. 2368 inline static Instr ImmMoveWide(int imm); 2369 inline static Instr ShiftMoveWide(int shift); 2370 2371 // FP Immediates. 2372 static Instr ImmFP(double imm); 2373 static Instr ImmNEONFP(double imm); 2374 inline static Instr FPScale(unsigned scale); 2375 2376 // FP register type. 2377 inline static Instr FPType(VRegister fd); 2378 2379 // Unused on this architecture. 2380 void MaybeEmitOutOfLineConstantPool() {} 2381 2382 void ForceConstantPoolEmissionWithoutJump() { 2383 constpool_.Check(Emission::kForced, Jump::kOmitted); 2384 } 2385 void ForceConstantPoolEmissionWithJump() { 2386 constpool_.Check(Emission::kForced, Jump::kRequired); 2387 } 2388 // Check if the const pool needs to be emitted while pretending that {margin} 2389 // more bytes of instructions have already been emitted. 2390 void EmitConstPoolWithJumpIfNeeded(size_t margin = 0) { 2391 constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin); 2392 } 2393 2394 // Used by veneer checks below - returns the max (= overapproximated) pc 2395 // offset after the veneer pool, if the veneer pool were to be emitted 2396 // immediately. 2397 intptr_t MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin); 2398 // Returns true if we should emit a veneer as soon as possible for a branch 2399 // which can at most reach to specified pc. 2400 bool ShouldEmitVeneer(int max_reachable_pc, size_t margin) { 2401 return max_reachable_pc < MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin); 2402 } 2403 bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) { 2404 return ShouldEmitVeneer(unresolved_branches_first_limit(), margin); 2405 } 2406 2407 // The code size generated for a veneer. Currently one branch 2408 // instruction. This is for code size checking purposes, and can be extended 2409 // in the future for example if we decide to add nops between the veneers. 2410 static constexpr int kVeneerCodeSize = 1 * kInstrSize; 2411 2412 void RecordVeneerPool(int location_offset, int size); 2413 // Emits veneers for branches that are approaching their maximum range. 2414 // If need_protection is true, the veneers are protected by a branch jumping 2415 // over the code. 2416 void EmitVeneers(bool force_emit, bool need_protection, 2417 size_t margin = kVeneerDistanceMargin); 2418 void EmitVeneersGuard() { EmitPoolGuard(); } 2419 // Checks whether veneers need to be emitted at this point. 2420 // If force_emit is set, a veneer is generated for *all* unresolved branches. 2421 void CheckVeneerPool(bool force_emit, bool require_jump, 2422 size_t margin = kVeneerDistanceMargin); 2423 2424 using BlockConstPoolScope = ConstantPool::BlockScope; 2425 2426 class V8_NODISCARD BlockPoolsScope { 2427 public: 2428 // Block veneer and constant pool. Emits pools if necessary to ensure that 2429 // {margin} more bytes can be emitted without triggering pool emission. 2430 explicit BlockPoolsScope(Assembler* assem, size_t margin = 0) 2431 : assem_(assem), block_const_pool_(assem, margin) { 2432 assem_->CheckVeneerPool(false, true, margin); 2433 assem_->StartBlockVeneerPool(); 2434 } 2435 2436 BlockPoolsScope(Assembler* assem, PoolEmissionCheck check) 2437 : assem_(assem), block_const_pool_(assem, check) { 2438 assem_->StartBlockVeneerPool(); 2439 } 2440 ~BlockPoolsScope() { assem_->EndBlockVeneerPool(); } 2441 2442 private: 2443 Assembler* assem_; 2444 BlockConstPoolScope block_const_pool_; 2445 DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope); 2446 }; 2447 2448#if defined(V8_OS_WIN) 2449 win64_unwindinfo::XdataEncoder* GetXdataEncoder() { 2450 return xdata_encoder_.get(); 2451 } 2452 2453 win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const; 2454#endif 2455 2456 protected: 2457 inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const; 2458 2459 void LoadStore(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op); 2460 void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, 2461 const MemOperand& addr, LoadStorePairOp op); 2462 void LoadStoreStruct(const VRegister& vt, const MemOperand& addr, 2463 NEONLoadStoreMultiStructOp op); 2464 void LoadStoreStruct1(const VRegister& vt, int reg_count, 2465 const MemOperand& addr); 2466 void LoadStoreStructSingle(const VRegister& vt, uint32_t lane, 2467 const MemOperand& addr, 2468 NEONLoadStoreSingleStructOp op); 2469 void LoadStoreStructSingleAllLanes(const VRegister& vt, 2470 const MemOperand& addr, 2471 NEONLoadStoreSingleStructOp op); 2472 void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr, 2473 Instr op); 2474 2475 static bool IsImmLSPair(int64_t offset, unsigned size); 2476 2477 void Logical(const Register& rd, const Register& rn, const Operand& operand, 2478 LogicalOp op); 2479 void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, 2480 unsigned imm_s, unsigned imm_r, LogicalOp op); 2481 2482 void ConditionalCompare(const Register& rn, const Operand& operand, 2483 StatusFlags nzcv, Condition cond, 2484 ConditionalCompareOp op); 2485 static bool IsImmConditionalCompare(int64_t immediate); 2486 2487 void AddSubWithCarry(const Register& rd, const Register& rn, 2488 const Operand& operand, FlagsUpdate S, 2489 AddSubWithCarryOp op); 2490 2491 // Functions for emulating operands not directly supported by the instruction 2492 // set. 2493 void EmitShift(const Register& rd, const Register& rn, Shift shift, 2494 unsigned amount); 2495 void EmitExtendShift(const Register& rd, const Register& rn, Extend extend, 2496 unsigned left_shift); 2497 2498 void AddSub(const Register& rd, const Register& rn, const Operand& operand, 2499 FlagsUpdate S, AddSubOp op); 2500 2501 static bool IsImmFP32(float imm); 2502 static bool IsImmFP64(double imm); 2503 2504 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 2505 // registers. Only simple loads are supported; sign- and zero-extension (such 2506 // as in LDPSW_x or LDRB_w) are not supported. 2507 static inline LoadStoreOp LoadOpFor(const CPURegister& rt); 2508 static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 2509 const CPURegister& rt2); 2510 static inline LoadStoreOp StoreOpFor(const CPURegister& rt); 2511 static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt, 2512 const CPURegister& rt2); 2513 static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 2514 2515 // Remove the specified branch from the unbound label link chain. 2516 // If available, a veneer for this label can be used for other branches in the 2517 // chain if the link chain cannot be fixed up without this branch. 2518 void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label, 2519 Instruction* label_veneer = nullptr); 2520 2521 private: 2522 static uint32_t FPToImm8(double imm); 2523 2524 // Instruction helpers. 2525 void MoveWide(const Register& rd, uint64_t imm, int shift, 2526 MoveWideImmediateOp mov_op); 2527 void DataProcShiftedRegister(const Register& rd, const Register& rn, 2528 const Operand& operand, FlagsUpdate S, Instr op); 2529 void DataProcExtendedRegister(const Register& rd, const Register& rn, 2530 const Operand& operand, FlagsUpdate S, 2531 Instr op); 2532 void ConditionalSelect(const Register& rd, const Register& rn, 2533 const Register& rm, Condition cond, 2534 ConditionalSelectOp op); 2535 void DataProcessing1Source(const Register& rd, const Register& rn, 2536 DataProcessing1SourceOp op); 2537 void DataProcessing3Source(const Register& rd, const Register& rn, 2538 const Register& rm, const Register& ra, 2539 DataProcessing3SourceOp op); 2540 void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn, 2541 FPDataProcessing1SourceOp op); 2542 void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn, 2543 const VRegister& fm, 2544 FPDataProcessing2SourceOp op); 2545 void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn, 2546 const VRegister& fm, const VRegister& fa, 2547 FPDataProcessing3SourceOp op); 2548 void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn, 2549 NEONAcrossLanesOp op); 2550 void NEONAcrossLanes(const VRegister& vd, const VRegister& vn, 2551 NEONAcrossLanesOp op); 2552 void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8, 2553 const int left_shift, 2554 NEONModifiedImmediateOp op); 2555 void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8, 2556 const int shift_amount, 2557 NEONModifiedImmediateOp op); 2558 void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2559 NEON3SameOp vop); 2560 void NEONFP3Same(const VRegister& vd, const VRegister& vn, 2561 const VRegister& vm, Instr op); 2562 void NEON3DifferentL(const VRegister& vd, const VRegister& vn, 2563 const VRegister& vm, NEON3DifferentOp vop); 2564 void NEON3DifferentW(const VRegister& vd, const VRegister& vn, 2565 const VRegister& vm, NEON3DifferentOp vop); 2566 void NEON3DifferentHN(const VRegister& vd, const VRegister& vn, 2567 const VRegister& vm, NEON3DifferentOp vop); 2568 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, 2569 NEON2RegMiscOp vop, double value = 0.0); 2570 void NEON2RegMisc(const VRegister& vd, const VRegister& vn, 2571 NEON2RegMiscOp vop, int value = 0); 2572 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); 2573 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); 2574 void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2575 NEONPermOp op); 2576 void NEONFPByElement(const VRegister& vd, const VRegister& vn, 2577 const VRegister& vm, int vm_index, 2578 NEONByIndexedElementOp op); 2579 void NEONByElement(const VRegister& vd, const VRegister& vn, 2580 const VRegister& vm, int vm_index, 2581 NEONByIndexedElementOp op); 2582 void NEONByElementL(const VRegister& vd, const VRegister& vn, 2583 const VRegister& vm, int vm_index, 2584 NEONByIndexedElementOp op); 2585 void NEONShiftImmediate(const VRegister& vd, const VRegister& vn, 2586 NEONShiftImmediateOp op, int immh_immb); 2587 void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, 2588 int shift, NEONShiftImmediateOp op); 2589 void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, 2590 int shift, NEONShiftImmediateOp op); 2591 void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, 2592 NEONShiftImmediateOp op); 2593 void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift, 2594 NEONShiftImmediateOp op); 2595 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); 2596 void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2597 NEONTableOp op); 2598 2599 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 2600 2601 // Label helpers. 2602 2603 // Return an offset for a label-referencing instruction, typically a branch. 2604 int LinkAndGetByteOffsetTo(Label* label); 2605 2606 // This is the same as LinkAndGetByteOffsetTo, but return an offset 2607 // suitable for fields that take instruction offsets. 2608 inline int LinkAndGetInstructionOffsetTo(Label* label); 2609 2610 static constexpr int kStartOfLabelLinkChain = 0; 2611 2612 // Verify that a label's link chain is intact. 2613 void CheckLabelLinkChain(Label const* label); 2614 2615 // Emit the instruction at pc_. 2616 void Emit(Instr instruction) { 2617 STATIC_ASSERT(sizeof(*pc_) == 1); 2618 STATIC_ASSERT(sizeof(instruction) == kInstrSize); 2619 DCHECK_LE(pc_ + sizeof(instruction), buffer_start_ + buffer_->size()); 2620 2621 memcpy(pc_, &instruction, sizeof(instruction)); 2622 pc_ += sizeof(instruction); 2623 CheckBuffer(); 2624 } 2625 2626 // Emit data inline in the instruction stream. 2627 void EmitData(void const* data, unsigned size) { 2628 DCHECK_EQ(sizeof(*pc_), 1); 2629 DCHECK_LE(pc_ + size, buffer_start_ + buffer_->size()); 2630 2631 // TODO(all): Somehow register we have some data here. Then we can 2632 // disassemble it correctly. 2633 memcpy(pc_, data, size); 2634 pc_ += size; 2635 CheckBuffer(); 2636 } 2637 2638 void GrowBuffer(); 2639 V8_INLINE void CheckBufferSpace(); 2640 void CheckBuffer(); 2641 2642 // Emission of the veneer pools may be blocked in some code sequences. 2643 int veneer_pool_blocked_nesting_; // Block emission if this is not zero. 2644 2645 // Relocation info generation 2646 // Each relocation is encoded as a variable size value 2647 static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; 2648 RelocInfoWriter reloc_info_writer; 2649 2650 // Internal reference positions, required for (potential) patching in 2651 // GrowBuffer(); contains only those internal references whose labels 2652 // are already bound. 2653 std::deque<int> internal_reference_positions_; 2654 2655 protected: 2656 // Code generation 2657 // The relocation writer's position is at least kGap bytes below the end of 2658 // the generated instructions. This is so that multi-instruction sequences do 2659 // not have to check for overflow. The same is true for writes of large 2660 // relocation info entries, and debug strings encoded in the instruction 2661 // stream. 2662 static constexpr int kGap = 64; 2663 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); 2664 2665 public: 2666#ifdef DEBUG 2667 // Functions used for testing. 2668 size_t GetConstantPoolEntriesSizeForTesting() const { 2669 // Do not include branch over the pool. 2670 return constpool_.Entry32Count() * kInt32Size + 2671 constpool_.Entry64Count() * kInt64Size; 2672 } 2673 2674 static size_t GetCheckConstPoolIntervalForTesting() { 2675 return ConstantPool::kCheckInterval; 2676 } 2677 2678 static size_t GetApproxMaxDistToConstPoolForTesting() { 2679 return ConstantPool::kApproxDistToPool64; 2680 } 2681#endif 2682 2683 class FarBranchInfo { 2684 public: 2685 FarBranchInfo(int offset, Label* label) 2686 : pc_offset_(offset), label_(label) {} 2687 // Offset of the branch in the code generation buffer. 2688 int pc_offset_; 2689 // The label branched to. 2690 Label* label_; 2691 }; 2692 2693 protected: 2694 // Information about unresolved (forward) branches. 2695 // The Assembler is only allowed to delete out-of-date information from here 2696 // after a label is bound. The MacroAssembler uses this information to 2697 // generate veneers. 2698 // 2699 // The second member gives information about the unresolved branch. The first 2700 // member of the pair is the maximum offset that the branch can reach in the 2701 // buffer. The map is sorted according to this reachable offset, allowing to 2702 // easily check when veneers need to be emitted. 2703 // Note that the maximum reachable offset (first member of the pairs) should 2704 // always be positive but has the same type as the return value for 2705 // pc_offset() for convenience. 2706 std::multimap<int, FarBranchInfo> unresolved_branches_; 2707 2708 // We generate a veneer for a branch if we reach within this distance of the 2709 // limit of the range. 2710 static constexpr int kVeneerDistanceMargin = 1 * KB; 2711 // The factor of 2 is a finger in the air guess. With a default margin of 2712 // 1KB, that leaves us an addional 256 instructions to avoid generating a 2713 // protective branch. 2714 static constexpr int kVeneerNoProtectionFactor = 2; 2715 static constexpr int kVeneerDistanceCheckMargin = 2716 kVeneerNoProtectionFactor * kVeneerDistanceMargin; 2717 int unresolved_branches_first_limit() const { 2718 DCHECK(!unresolved_branches_.empty()); 2719 return unresolved_branches_.begin()->first; 2720 } 2721 // This PC-offset of the next veneer pool check helps reduce the overhead 2722 // of checking for veneer pools. 2723 // It is maintained to the closest unresolved branch limit minus the maximum 2724 // veneer margin (or kMaxInt if there are no unresolved branches). 2725 int next_veneer_pool_check_; 2726 2727#if defined(V8_OS_WIN) 2728 std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_; 2729#endif 2730 2731 private: 2732 // Avoid overflows for displacements etc. 2733 static const int kMaximalBufferSize = 512 * MB; 2734 2735 // If a veneer is emitted for a branch instruction, that instruction must be 2736 // removed from the associated label's link chain so that the assembler does 2737 // not later attempt (likely unsuccessfully) to patch it to branch directly to 2738 // the label. 2739 void DeleteUnresolvedBranchInfoForLabel(Label* label); 2740 // This function deletes the information related to the label by traversing 2741 // the label chain, and for each PC-relative instruction in the chain checking 2742 // if pending unresolved information exists. Its complexity is proportional to 2743 // the length of the label chain. 2744 void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label); 2745 2746 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); 2747 2748 int WriteCodeComments(); 2749 2750 // The pending constant pool. 2751 ConstantPool constpool_; 2752 2753 friend class EnsureSpace; 2754 friend class ConstantPool; 2755}; 2756 2757class PatchingAssembler : public Assembler { 2758 public: 2759 // Create an Assembler with a buffer starting at 'start'. 2760 // The buffer size is 2761 // size of instructions to patch + kGap 2762 // Where kGap is the distance from which the Assembler tries to grow the 2763 // buffer. 2764 // If more or fewer instructions than expected are generated or if some 2765 // relocation information takes space in the buffer, the PatchingAssembler 2766 // will crash trying to grow the buffer. 2767 // Note that the instruction cache will not be flushed. 2768 PatchingAssembler(const AssemblerOptions& options, byte* start, 2769 unsigned count) 2770 : Assembler(options, 2771 ExternalAssemblerBuffer(start, count * kInstrSize + kGap)), 2772 block_constant_pool_emission_scope(this) {} 2773 2774 ~PatchingAssembler() { 2775 // Verify we have generated the number of instruction we expected. 2776 DCHECK_EQ(pc_offset() + kGap, buffer_->size()); 2777 } 2778 2779 // See definition of PatchAdrFar() for details. 2780 static constexpr int kAdrFarPatchableNNops = 2; 2781 static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2; 2782 void PatchAdrFar(int64_t target_offset); 2783 void PatchSubSp(uint32_t immediate); 2784 2785 private: 2786 BlockPoolsScope block_constant_pool_emission_scope; 2787}; 2788 2789class EnsureSpace { 2790 public: 2791 explicit V8_INLINE EnsureSpace(Assembler* assembler); 2792 2793 private: 2794 Assembler::BlockPoolsScope block_pools_scope_; 2795}; 2796 2797} // namespace internal 2798} // namespace v8 2799 2800#endif // V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 2801