1// Copyright (c) 1994-2006 Sun Microsystems Inc. 2// All Rights Reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// - Redistributions of source code must retain the above copyright notice, 9// this list of conditions and the following disclaimer. 10// 11// - Redistribution in binary form must reproduce the above copyright 12// notice, this list of conditions and the following disclaimer in the 13// documentation and/or other materials provided with the distribution. 14// 15// - Neither the name of Sun Microsystems or the names of contributors may 16// be used to endorse or promote products derived from this software without 17// specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// The original source code covered by the above license above has been 32// modified significantly by Google Inc. 33// Copyright 2011 the V8 project authors. All rights reserved. 34 35// A light-weight IA32 Assembler. 36 37#ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ 38#define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ 39 40#include <deque> 41#include <memory> 42 43#include "src/codegen/assembler.h" 44#include "src/codegen/ia32/constants-ia32.h" 45#include "src/codegen/ia32/fma-instr.h" 46#include "src/codegen/ia32/register-ia32.h" 47#include "src/codegen/ia32/sse-instr.h" 48#include "src/codegen/label.h" 49#include "src/execution/isolate.h" 50#include "src/objects/smi.h" 51#include "src/utils/utils.h" 52 53namespace v8 { 54namespace internal { 55 56class SafepointTableBuilder; 57 58enum Condition { 59 // any value < 0 is considered no_condition 60 no_condition = -1, 61 62 overflow = 0, 63 no_overflow = 1, 64 below = 2, 65 above_equal = 3, 66 equal = 4, 67 not_equal = 5, 68 below_equal = 6, 69 above = 7, 70 negative = 8, 71 positive = 9, 72 parity_even = 10, 73 parity_odd = 11, 74 less = 12, 75 greater_equal = 13, 76 less_equal = 14, 77 greater = 15, 78 79 // aliases 80 carry = below, 81 not_carry = above_equal, 82 zero = equal, 83 not_zero = not_equal, 84 sign = negative, 85 not_sign = positive 86}; 87 88// Returns the equivalent of !cc. 89// Negation of the default no_condition (-1) results in a non-default 90// no_condition value (-2). As long as tests for no_condition check 91// for condition < 0, this will work as expected. 92inline Condition NegateCondition(Condition cc) { 93 return static_cast<Condition>(cc ^ 1); 94} 95 96enum RoundingMode { 97 kRoundToNearest = 0x0, 98 kRoundDown = 0x1, 99 kRoundUp = 0x2, 100 kRoundToZero = 0x3 101}; 102 103// ----------------------------------------------------------------------------- 104// Machine instruction Immediates 105 106class Immediate { 107 public: 108 // Calls where x is an Address (uintptr_t) resolve to this overload. 109 inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NO_INFO) { 110 value_.immediate = x; 111 rmode_ = rmode; 112 } 113 inline explicit Immediate(const ExternalReference& ext) 114 : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {} 115 inline explicit Immediate(Handle<HeapObject> handle) 116 : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {} 117 inline explicit Immediate(Smi value) 118 : Immediate(static_cast<intptr_t>(value.ptr())) {} 119 120 static Immediate EmbeddedNumber(double number); // Smi or HeapNumber. 121 static Immediate EmbeddedStringConstant(const StringConstantBase* str); 122 123 static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); } 124 125 bool is_heap_object_request() const { 126 DCHECK_IMPLIES(is_heap_object_request_, 127 rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT || 128 rmode_ == RelocInfo::CODE_TARGET); 129 return is_heap_object_request_; 130 } 131 132 HeapObjectRequest heap_object_request() const { 133 DCHECK(is_heap_object_request()); 134 return value_.heap_object_request; 135 } 136 137 int immediate() const { 138 DCHECK(!is_heap_object_request()); 139 return value_.immediate; 140 } 141 142 bool is_embedded_object() const { 143 return !is_heap_object_request() && 144 rmode() == RelocInfo::FULL_EMBEDDED_OBJECT; 145 } 146 147 Handle<HeapObject> embedded_object() const { 148 return Handle<HeapObject>(reinterpret_cast<Address*>(immediate())); 149 } 150 151 bool is_external_reference() const { 152 return rmode() == RelocInfo::EXTERNAL_REFERENCE; 153 } 154 155 ExternalReference external_reference() const { 156 DCHECK(is_external_reference()); 157 return bit_cast<ExternalReference>(immediate()); 158 } 159 160 bool is_zero() const { 161 return RelocInfo::IsNoInfo(rmode_) && immediate() == 0; 162 } 163 bool is_int8() const { 164 return RelocInfo::IsNoInfo(rmode_) && i::is_int8(immediate()); 165 } 166 bool is_uint8() const { 167 return RelocInfo::IsNoInfo(rmode_) && i::is_uint8(immediate()); 168 } 169 bool is_int16() const { 170 return RelocInfo::IsNoInfo(rmode_) && i::is_int16(immediate()); 171 } 172 173 bool is_uint16() const { 174 return RelocInfo::IsNoInfo(rmode_) && i::is_uint16(immediate()); 175 } 176 177 RelocInfo::Mode rmode() const { return rmode_; } 178 179 private: 180 inline explicit Immediate(Label* value) { 181 value_.immediate = reinterpret_cast<int32_t>(value); 182 rmode_ = RelocInfo::INTERNAL_REFERENCE; 183 } 184 185 union Value { 186 Value() {} 187 HeapObjectRequest heap_object_request; 188 int immediate; 189 } value_; 190 bool is_heap_object_request_ = false; 191 RelocInfo::Mode rmode_; 192 193 friend class Operand; 194 friend class Assembler; 195 friend class MacroAssembler; 196}; 197 198// ----------------------------------------------------------------------------- 199// Machine instruction Operands 200 201enum ScaleFactor { 202 times_1 = 0, 203 times_2 = 1, 204 times_4 = 2, 205 times_8 = 3, 206 times_int_size = times_4, 207 208 times_half_system_pointer_size = times_2, 209 times_system_pointer_size = times_4, 210 211 times_tagged_size = times_4, 212}; 213 214class V8_EXPORT_PRIVATE Operand { 215 public: 216 // reg 217 V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); } 218 219 // XMM reg 220 V8_INLINE explicit Operand(XMMRegister xmm_reg) { 221 Register reg = Register::from_code(xmm_reg.code()); 222 set_modrm(3, reg); 223 } 224 225 // [disp/r] 226 V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) { 227 set_modrm(0, ebp); 228 set_dispr(disp, rmode); 229 } 230 231 // [disp/r] 232 V8_INLINE explicit Operand(Immediate imm) { 233 set_modrm(0, ebp); 234 set_dispr(imm.immediate(), imm.rmode_); 235 } 236 237 // [base + disp/r] 238 explicit Operand(Register base, int32_t disp, 239 RelocInfo::Mode rmode = RelocInfo::NO_INFO); 240 241 // [rip + disp/r] 242 explicit Operand(Label* label) { 243 set_modrm(0, ebp); 244 set_dispr(reinterpret_cast<intptr_t>(label), RelocInfo::INTERNAL_REFERENCE); 245 } 246 247 // [base + index*scale + disp/r] 248 explicit Operand(Register base, Register index, ScaleFactor scale, 249 int32_t disp, RelocInfo::Mode rmode = RelocInfo::NO_INFO); 250 251 // [index*scale + disp/r] 252 explicit Operand(Register index, ScaleFactor scale, int32_t disp, 253 RelocInfo::Mode rmode = RelocInfo::NO_INFO); 254 255 static Operand JumpTable(Register index, ScaleFactor scale, Label* table) { 256 return Operand(index, scale, reinterpret_cast<int32_t>(table), 257 RelocInfo::INTERNAL_REFERENCE); 258 } 259 260 static Operand ForRegisterPlusImmediate(Register base, Immediate imm) { 261 return Operand(base, imm.value_.immediate, imm.rmode_); 262 } 263 264 // Returns true if this Operand is a wrapper for the specified register. 265 bool is_reg(Register reg) const { return is_reg(reg.code()); } 266 bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); } 267 268 // Returns true if this Operand is a wrapper for one register. 269 bool is_reg_only() const; 270 271 // Asserts that this Operand is a wrapper for one register and returns the 272 // register. 273 Register reg() const; 274 275 base::Vector<const byte> encoded_bytes() const { return {buf_, len_}; } 276 RelocInfo::Mode rmode() { return rmode_; } 277 278 private: 279 // Set the ModRM byte without an encoded 'reg' register. The 280 // register is encoded later as part of the emit_operand operation. 281 inline void set_modrm(int mod, Register rm) { 282 DCHECK_EQ(mod & -4, 0); 283 buf_[0] = mod << 6 | rm.code(); 284 len_ = 1; 285 } 286 287 inline void set_sib(ScaleFactor scale, Register index, Register base); 288 inline void set_disp8(int8_t disp); 289 inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) { 290 DCHECK(len_ == 1 || len_ == 2); 291 Address p = reinterpret_cast<Address>(&buf_[len_]); 292 WriteUnalignedValue(p, disp); 293 len_ += sizeof(int32_t); 294 rmode_ = rmode; 295 } 296 297 inline bool is_reg(int reg_code) const { 298 return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only. 299 && ((buf_[0] & 0x07) == reg_code); // register codes match. 300 } 301 302 byte buf_[6]; 303 // The number of bytes in buf_. 304 uint8_t len_ = 0; 305 // Only valid if len_ > 4. 306 RelocInfo::Mode rmode_ = RelocInfo::NO_INFO; 307}; 308ASSERT_TRIVIALLY_COPYABLE(Operand); 309static_assert(sizeof(Operand) <= 2 * kSystemPointerSize, 310 "Operand must be small enough to pass it by value"); 311 312bool operator!=(Operand op, XMMRegister r); 313 314// ----------------------------------------------------------------------------- 315// A Displacement describes the 32bit immediate field of an instruction which 316// may be used together with a Label in order to refer to a yet unknown code 317// position. Displacements stored in the instruction stream are used to describe 318// the instruction and to chain a list of instructions using the same Label. 319// A Displacement contains 2 different fields: 320// 321// next field: position of next displacement in the chain (0 = end of list) 322// type field: instruction type 323// 324// A next value of null (0) indicates the end of a chain (note that there can 325// be no displacement at position zero, because there is always at least one 326// instruction byte before the displacement). 327// 328// Displacement _data field layout 329// 330// |31.....2|1......0| 331// [ next | type | 332 333class Displacement { 334 public: 335 enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE }; 336 337 int data() const { return data_; } 338 Type type() const { return TypeField::decode(data_); } 339 void next(Label* L) const { 340 int n = NextField::decode(data_); 341 n > 0 ? L->link_to(n) : L->Unuse(); 342 } 343 void link_to(Label* L) { init(L, type()); } 344 345 explicit Displacement(int data) { data_ = data; } 346 347 Displacement(Label* L, Type type) { init(L, type); } 348 349 void print() { 350 PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"), 351 NextField::decode(data_)); 352 } 353 354 private: 355 int data_; 356 357 using TypeField = base::BitField<Type, 0, 2>; 358 using NextField = base::BitField<int, 2, 32 - 2>; 359 360 void init(Label* L, Type type); 361}; 362 363class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { 364 private: 365 // We check before assembling an instruction that there is sufficient 366 // space to write an instruction and its relocation information. 367 // The relocation writer's position must be kGap bytes above the end of 368 // the generated instructions. This leaves enough space for the 369 // longest possible ia32 instruction, 15 bytes, and the longest possible 370 // relocation information encoding, RelocInfoWriter::kMaxLength == 16. 371 // (There is a 15 byte limit on ia32 instruction length that rules out some 372 // otherwise valid instructions.) 373 // This allows for a single, fast space check per instruction. 374 static constexpr int kGap = 32; 375 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); 376 377 public: 378 // Create an assembler. Instructions and relocation information are emitted 379 // into a buffer, with the instructions starting from the beginning and the 380 // relocation information starting from the end of the buffer. See CodeDesc 381 // for a detailed comment on the layout (globals.h). 382 // 383 // If the provided buffer is nullptr, the assembler allocates and grows its 384 // own buffer. Otherwise it takes ownership of the provided buffer. 385 explicit Assembler(const AssemblerOptions&, 386 std::unique_ptr<AssemblerBuffer> = {}); 387 388 // GetCode emits any pending (non-emitted) code and fills the descriptor desc. 389 static constexpr int kNoHandlerTable = 0; 390 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; 391 void GetCode(Isolate* isolate, CodeDesc* desc, 392 SafepointTableBuilder* safepoint_table_builder, 393 int handler_table_offset); 394 395 // Convenience wrapper for code without safepoint or handler tables. 396 void GetCode(Isolate* isolate, CodeDesc* desc) { 397 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); 398 } 399 400 void FinalizeJumpOptimizationInfo(); 401 402 // Unused on this architecture. 403 void MaybeEmitOutOfLineConstantPool() {} 404 405 // Read/Modify the code target in the branch/call instruction at pc. 406 // The isolate argument is unused (and may be nullptr) when skipping flushing. 407 inline static Address target_address_at(Address pc, Address constant_pool); 408 inline static void set_target_address_at( 409 Address pc, Address constant_pool, Address target, 410 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 411 412 // This sets the branch destination (which is in the instruction on x86). 413 // This is for calls and branches within generated code. 414 inline static void deserialization_set_special_target_at( 415 Address instruction_payload, Code code, Address target); 416 417 // Get the size of the special target encoded at 'instruction_payload'. 418 inline static int deserialization_special_target_size( 419 Address instruction_payload); 420 421 // This sets the internal reference at the pc. 422 inline static void deserialization_set_target_internal_reference_at( 423 Address pc, Address target, 424 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 425 426 static constexpr int kSpecialTargetSize = kSystemPointerSize; 427 428 // One byte opcode for test al, 0xXX. 429 static constexpr byte kTestAlByte = 0xA8; 430 // One byte opcode for nop. 431 static constexpr byte kNopByte = 0x90; 432 433 // One byte opcode for a short unconditional jump. 434 static constexpr byte kJmpShortOpcode = 0xEB; 435 // One byte prefix for a short conditional jump. 436 static constexpr byte kJccShortPrefix = 0x70; 437 static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry; 438 static constexpr byte kJcShortOpcode = kJccShortPrefix | carry; 439 static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero; 440 static constexpr byte kJzShortOpcode = kJccShortPrefix | zero; 441 442 // --------------------------------------------------------------------------- 443 // Code generation 444 // 445 // - function names correspond one-to-one to ia32 instruction mnemonics 446 // - unless specified otherwise, instructions operate on 32bit operands 447 // - instructions on 8bit (byte) operands/registers have a trailing '_b' 448 // - instructions on 16bit (word) operands/registers have a trailing '_w' 449 // - naming conflicts with C++ keywords are resolved via a trailing '_' 450 451 // NOTE ON INTERFACE: Currently, the interface is not very consistent 452 // in the sense that some operations (e.g. mov()) can be called in more 453 // the one way to generate the same instruction: The Register argument 454 // can in some cases be replaced with an Operand(Register) argument. 455 // This should be cleaned up and made more orthogonal. The questions 456 // is: should we always use Operands instead of Registers where an 457 // Operand is possible, or should we have a Register (overloaded) form 458 // instead? We must be careful to make sure that the selected instruction 459 // is obvious from the parameters to avoid hard-to-find code generation 460 // bugs. 461 462 // Insert the smallest number of nop instructions 463 // possible to align the pc offset to a multiple 464 // of m. m must be a power of 2. 465 void Align(int m); 466 // Insert the smallest number of zero bytes possible to align the pc offset 467 // to a mulitple of m. m must be a power of 2 (>= 2). 468 void DataAlign(int m); 469 void Nop(int bytes = 1); 470 // Aligns code to something that's optimal for a jump target for the platform. 471 void CodeTargetAlign(); 472 void LoopHeaderAlign() { CodeTargetAlign(); } 473 474 // Stack 475 void pushad(); 476 void popad(); 477 478 void pushfd(); 479 void popfd(); 480 481 void push(const Immediate& x); 482 void push_imm32(int32_t imm32); 483 void push(Register src); 484 void push(Operand src); 485 486 void pop(Register dst); 487 void pop(Operand dst); 488 489 void leave(); 490 491 // Moves 492 void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); } 493 void mov_b(Register dst, Operand src); 494 void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); } 495 void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); } 496 void mov_b(Operand dst, const Immediate& src); 497 void mov_b(Operand dst, Register src); 498 499 void mov_w(Register dst, Operand src); 500 void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); } 501 void mov_w(Operand dst, const Immediate& src); 502 void mov_w(Operand dst, Register src); 503 504 void mov(Register dst, int32_t imm32); 505 void mov(Register dst, const Immediate& x); 506 void mov(Register dst, Handle<HeapObject> handle); 507 void mov(Register dst, Operand src); 508 void mov(Register dst, Register src); 509 void mov(Operand dst, const Immediate& x); 510 void mov(Operand dst, Handle<HeapObject> handle); 511 void mov(Operand dst, Register src); 512 void mov(Operand dst, Address src, RelocInfo::Mode); 513 514 void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); } 515 void movsx_b(Register dst, Operand src); 516 517 void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); } 518 void movsx_w(Register dst, Operand src); 519 520 void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); } 521 void movzx_b(Register dst, Operand src); 522 523 void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); } 524 void movzx_w(Register dst, Operand src); 525 526 void movq(XMMRegister dst, Operand src); 527 void movq(Operand dst, XMMRegister src); 528 529 // Conditional moves 530 void cmov(Condition cc, Register dst, Register src) { 531 cmov(cc, dst, Operand(src)); 532 } 533 void cmov(Condition cc, Register dst, Operand src); 534 535 // Flag management. 536 void cld(); 537 538 // Repetitive string instructions. 539 void rep_movs(); 540 void rep_stos(); 541 void stos(); 542 543 void xadd(Operand dst, Register src); 544 void xadd_b(Operand dst, Register src); 545 void xadd_w(Operand dst, Register src); 546 547 // Exchange 548 void xchg(Register dst, Register src); 549 void xchg(Register dst, Operand src); 550 void xchg_b(Register reg, Operand op); 551 void xchg_w(Register reg, Operand op); 552 553 // Lock prefix 554 void lock(); 555 556 // CompareExchange 557 void cmpxchg(Operand dst, Register src); 558 void cmpxchg_b(Operand dst, Register src); 559 void cmpxchg_w(Operand dst, Register src); 560 void cmpxchg8b(Operand dst); 561 562 // Memory Fence 563 void mfence(); 564 void lfence(); 565 566 void pause(); 567 568 // Arithmetics 569 void adc(Register dst, int32_t imm32); 570 void adc(Register dst, Register src) { adc(dst, Operand(src)); } 571 void adc(Register dst, Operand src); 572 573 void add(Register dst, Register src) { add(dst, Operand(src)); } 574 void add(Register dst, Operand src); 575 void add(Operand dst, Register src); 576 void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); } 577 void add(Operand dst, const Immediate& x); 578 579 void and_(Register dst, int32_t imm32); 580 void and_(Register dst, const Immediate& x); 581 void and_(Register dst, Register src) { and_(dst, Operand(src)); } 582 void and_(Register dst, Operand src); 583 void and_(Operand dst, Register src); 584 void and_(Operand dst, const Immediate& x); 585 586 void cmpb(Register reg, Immediate imm8) { 587 DCHECK(reg.is_byte_register()); 588 cmpb(Operand(reg), imm8); 589 } 590 void cmpb(Operand op, Immediate imm8); 591 void cmpb(Register reg, Operand op); 592 void cmpb(Operand op, Register reg); 593 void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); } 594 void cmpb_al(Operand op); 595 void cmpw_ax(Operand op); 596 void cmpw(Operand dst, Immediate src); 597 void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); } 598 void cmpw(Register dst, Operand src); 599 void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); } 600 void cmpw(Operand dst, Register src); 601 void cmp(Register reg, int32_t imm32); 602 void cmp(Register reg, Handle<HeapObject> handle); 603 void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); } 604 void cmp(Register reg, Operand op); 605 void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); } 606 void cmp(Operand op, Register reg); 607 void cmp(Operand op, const Immediate& imm); 608 void cmp(Operand op, Handle<HeapObject> handle); 609 610 void dec_b(Register dst); 611 void dec_b(Operand dst); 612 613 void dec(Register dst); 614 void dec(Operand dst); 615 616 void cdq(); 617 618 void idiv(Register src) { idiv(Operand(src)); } 619 void idiv(Operand src); 620 void div(Register src) { div(Operand(src)); } 621 void div(Operand src); 622 623 // Signed multiply instructions. 624 void imul(Register src); // edx:eax = eax * src. 625 void imul(Register dst, Register src) { imul(dst, Operand(src)); } 626 void imul(Register dst, Operand src); // dst = dst * src. 627 void imul(Register dst, Register src, int32_t imm32); // dst = src * imm32. 628 void imul(Register dst, Operand src, int32_t imm32); 629 630 void inc(Register dst); 631 void inc(Operand dst); 632 633 void lea(Register dst, Operand src); 634 635 // Unsigned multiply instruction. 636 void mul(Register src); // edx:eax = eax * reg. 637 638 void neg(Register dst); 639 void neg(Operand dst); 640 641 void not_(Register dst); 642 void not_(Operand dst); 643 644 void or_(Register dst, int32_t imm32); 645 void or_(Register dst, Register src) { or_(dst, Operand(src)); } 646 void or_(Register dst, Operand src); 647 void or_(Operand dst, Register src); 648 void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); } 649 void or_(Operand dst, const Immediate& x); 650 651 void rcl(Register dst, uint8_t imm8); 652 void rcr(Register dst, uint8_t imm8); 653 654 void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); } 655 void rol(Operand dst, uint8_t imm8); 656 void rol_cl(Register dst) { rol_cl(Operand(dst)); } 657 void rol_cl(Operand dst); 658 659 void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); } 660 void ror(Operand dst, uint8_t imm8); 661 void ror_cl(Register dst) { ror_cl(Operand(dst)); } 662 void ror_cl(Operand dst); 663 664 void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); } 665 void sar(Operand dst, uint8_t imm8); 666 void sar_cl(Register dst) { sar_cl(Operand(dst)); } 667 void sar_cl(Operand dst); 668 669 void sbb(Register dst, Register src) { sbb(dst, Operand(src)); } 670 void sbb(Register dst, Operand src); 671 672 void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); } 673 void shl(Operand dst, uint8_t imm8); 674 void shl_cl(Register dst) { shl_cl(Operand(dst)); } 675 void shl_cl(Operand dst); 676 void shld(Register dst, Register src, uint8_t shift); 677 void shld_cl(Register dst, Register src); 678 679 void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); } 680 void shr(Operand dst, uint8_t imm8); 681 void shr_cl(Register dst) { shr_cl(Operand(dst)); } 682 void shr_cl(Operand dst); 683 void shrd(Register dst, Register src, uint8_t shift); 684 void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); } 685 void shrd_cl(Operand dst, Register src); 686 687 void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); } 688 void sub(Operand dst, const Immediate& x); 689 void sub(Register dst, Register src) { sub(dst, Operand(src)); } 690 void sub(Register dst, Operand src); 691 void sub(Operand dst, Register src); 692 void sub_sp_32(uint32_t imm); 693 694 void test(Register reg, const Immediate& imm); 695 void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); } 696 void test(Register reg, Operand op); 697 void test(Operand op, const Immediate& imm); 698 void test(Operand op, Register reg) { test(reg, op); } 699 void test_b(Register reg, Operand op); 700 void test_b(Register reg, Immediate imm8); 701 void test_b(Operand op, Immediate imm8); 702 void test_b(Operand op, Register reg) { test_b(reg, op); } 703 void test_b(Register dst, Register src) { test_b(dst, Operand(src)); } 704 void test_w(Register reg, Operand op); 705 void test_w(Register reg, Immediate imm16); 706 void test_w(Operand op, Immediate imm16); 707 void test_w(Operand op, Register reg) { test_w(reg, op); } 708 void test_w(Register dst, Register src) { test_w(dst, Operand(src)); } 709 710 void xor_(Register dst, int32_t imm32); 711 void xor_(Register dst, Register src) { xor_(dst, Operand(src)); } 712 void xor_(Register dst, Operand src); 713 void xor_(Operand dst, Register src); 714 void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); } 715 void xor_(Operand dst, const Immediate& x); 716 717 // Bit operations. 718 void bswap(Register dst); 719 void bt(Operand dst, Register src); 720 void bts(Register dst, Register src) { bts(Operand(dst), src); } 721 void bts(Operand dst, Register src); 722 void bsr(Register dst, Register src) { bsr(dst, Operand(src)); } 723 void bsr(Register dst, Operand src); 724 void bsf(Register dst, Register src) { bsf(dst, Operand(src)); } 725 void bsf(Register dst, Operand src); 726 727 // Miscellaneous 728 void hlt(); 729 void int3(); 730 void nop(); 731 void ret(int imm16); 732 void ud2(); 733 734 // Label operations & relative jumps (PPUM Appendix D) 735 // 736 // Takes a branch opcode (cc) and a label (L) and generates 737 // either a backward branch or a forward branch and links it 738 // to the label fixup chain. Usage: 739 // 740 // Label L; // unbound label 741 // j(cc, &L); // forward branch to unbound label 742 // bind(&L); // bind label to the current pc 743 // j(cc, &L); // backward branch to bound label 744 // bind(&L); // illegal: a label may be bound only once 745 // 746 // Note: The same Label can be used for forward and backward branches 747 // but it may be bound only once. 748 749 void bind(Label* L); // binds an unbound label L to the current code position 750 751 // Calls 752 void call(Label* L); 753 void call(Address entry, RelocInfo::Mode rmode); 754 void call(Register reg) { call(Operand(reg)); } 755 void call(Operand adr); 756 void call(Handle<Code> code, RelocInfo::Mode rmode); 757 void wasm_call(Address address, RelocInfo::Mode rmode); 758 759 // Jumps 760 // unconditional jump to L 761 void jmp(Label* L, Label::Distance distance = Label::kFar); 762 void jmp(Address entry, RelocInfo::Mode rmode); 763 void jmp(Register reg) { jmp(Operand(reg)); } 764 void jmp(Operand adr); 765 void jmp(Handle<Code> code, RelocInfo::Mode rmode); 766 // Unconditional jump relative to the current address. Low-level routine, 767 // use with caution! 768 void jmp_rel(int offset); 769 770 // Conditional jumps 771 void j(Condition cc, Label* L, Label::Distance distance = Label::kFar); 772 void j(Condition cc, byte* entry, RelocInfo::Mode rmode); 773 void j(Condition cc, Handle<Code> code, 774 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET); 775 776 // Floating-point operations 777 void fld(int i); 778 void fstp(int i); 779 780 void fld1(); 781 void fldz(); 782 void fldpi(); 783 void fldln2(); 784 785 void fld_s(Operand adr); 786 void fld_d(Operand adr); 787 788 void fstp_s(Operand adr); 789 void fst_s(Operand adr); 790 void fstp_d(Operand adr); 791 void fst_d(Operand adr); 792 793 void fild_s(Operand adr); 794 void fild_d(Operand adr); 795 796 void fist_s(Operand adr); 797 798 void fistp_s(Operand adr); 799 void fistp_d(Operand adr); 800 801 // The fisttp instructions require SSE3. 802 void fisttp_s(Operand adr); 803 void fisttp_d(Operand adr); 804 805 void fabs(); 806 void fchs(); 807 void fcos(); 808 void fsin(); 809 void fptan(); 810 void fyl2x(); 811 void f2xm1(); 812 void fscale(); 813 void fninit(); 814 815 void fadd(int i); 816 void fadd_i(int i); 817 void fsub(int i); 818 void fsub_i(int i); 819 void fmul(int i); 820 void fmul_i(int i); 821 void fdiv(int i); 822 void fdiv_i(int i); 823 824 void fisub_s(Operand adr); 825 826 void faddp(int i = 1); 827 void fsubp(int i = 1); 828 void fsubrp(int i = 1); 829 void fmulp(int i = 1); 830 void fdivp(int i = 1); 831 void fprem(); 832 void fprem1(); 833 834 void fxch(int i = 1); 835 void fincstp(); 836 void ffree(int i = 0); 837 838 void ftst(); 839 void fucomp(int i); 840 void fucompp(); 841 void fucomi(int i); 842 void fucomip(); 843 void fcompp(); 844 void fnstsw_ax(); 845 void fwait(); 846 void fnclex(); 847 848 void frndint(); 849 850 void sahf(); 851 void setcc(Condition cc, Register reg); 852 853 void cpuid(); 854 855 // SSE instructions 856 void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); } 857 void addss(XMMRegister dst, Operand src); 858 void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); } 859 void subss(XMMRegister dst, Operand src); 860 void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); } 861 void mulss(XMMRegister dst, Operand src); 862 void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); } 863 void divss(XMMRegister dst, Operand src); 864 void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); } 865 void sqrtss(XMMRegister dst, Operand src); 866 867 void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); } 868 void ucomiss(XMMRegister dst, Operand src); 869 void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); } 870 void movaps(XMMRegister dst, Operand src); 871 void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); } 872 void movups(XMMRegister dst, Operand src); 873 void movups(Operand dst, XMMRegister src); 874 void shufps(XMMRegister dst, XMMRegister src, byte imm8); 875 void shufpd(XMMRegister dst, XMMRegister src, byte imm8); 876 877 void movhlps(XMMRegister dst, XMMRegister src); 878 void movlhps(XMMRegister dst, XMMRegister src); 879 void movlps(XMMRegister dst, Operand src); 880 void movlps(Operand dst, XMMRegister src); 881 void movhps(XMMRegister dst, Operand src); 882 void movhps(Operand dst, XMMRegister src); 883 884 void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); } 885 void maxss(XMMRegister dst, Operand src); 886 void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); } 887 void minss(XMMRegister dst, Operand src); 888 889 void haddps(XMMRegister dst, Operand src); 890 void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); } 891 void sqrtpd(XMMRegister dst, Operand src) { 892 sse2_instr(dst, src, 0x66, 0x0F, 0x51); 893 } 894 void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); } 895 896 void cmpps(XMMRegister dst, Operand src, uint8_t cmp); 897 void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) { 898 cmpps(dst, Operand(src), cmp); 899 } 900 void cmppd(XMMRegister dst, Operand src, uint8_t cmp); 901 void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) { 902 cmppd(dst, Operand(src), cmp); 903 } 904 905// Packed floating-point comparison operations. 906#define PACKED_CMP_LIST(V) \ 907 V(cmpeq, 0x0) \ 908 V(cmplt, 0x1) \ 909 V(cmple, 0x2) \ 910 V(cmpunord, 0x3) \ 911 V(cmpneq, 0x4) 912 913#define SSE_CMP_P(instr, imm8) \ 914 void instr##ps(XMMRegister dst, XMMRegister src) { \ 915 cmpps(dst, Operand(src), imm8); \ 916 } \ 917 void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \ 918 void instr##pd(XMMRegister dst, XMMRegister src) { \ 919 cmppd(dst, Operand(src), imm8); \ 920 } \ 921 void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); } 922 923 PACKED_CMP_LIST(SSE_CMP_P) 924#undef SSE_CMP_P 925 926 // SSE2 instructions 927 void cvttss2si(Register dst, Operand src); 928 void cvttss2si(Register dst, XMMRegister src) { 929 cvttss2si(dst, Operand(src)); 930 } 931 void cvttsd2si(Register dst, Operand src); 932 void cvttsd2si(Register dst, XMMRegister src) { 933 cvttsd2si(dst, Operand(src)); 934 } 935 void cvtsd2si(Register dst, XMMRegister src); 936 937 void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); } 938 void cvtsi2ss(XMMRegister dst, Operand src); 939 void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); } 940 void cvtsi2sd(XMMRegister dst, Operand src); 941 void cvtss2sd(XMMRegister dst, Operand src); 942 void cvtss2sd(XMMRegister dst, XMMRegister src) { 943 cvtss2sd(dst, Operand(src)); 944 } 945 void cvtdq2pd(XMMRegister dst, XMMRegister src); 946 void cvtpd2ps(XMMRegister dst, XMMRegister src); 947 void cvttps2dq(XMMRegister dst, XMMRegister src) { 948 cvttps2dq(dst, Operand(src)); 949 } 950 void cvttps2dq(XMMRegister dst, Operand src); 951 void cvttpd2dq(XMMRegister dst, XMMRegister src); 952 953 void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); } 954 void ucomisd(XMMRegister dst, Operand src); 955 956 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode); 957 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); 958 959 void movapd(XMMRegister dst, XMMRegister src) { movapd(dst, Operand(src)); } 960 void movapd(XMMRegister dst, Operand src) { 961 sse2_instr(dst, src, 0x66, 0x0F, 0x28); 962 } 963 void movupd(XMMRegister dst, Operand src) { 964 sse2_instr(dst, src, 0x66, 0x0F, 0x10); 965 } 966 967 void movmskpd(Register dst, XMMRegister src); 968 void movmskps(Register dst, XMMRegister src); 969 970 void pmovmskb(Register dst, XMMRegister src); 971 972 void cmpltsd(XMMRegister dst, XMMRegister src); 973 974 void movdqa(XMMRegister dst, Operand src); 975 void movdqa(Operand dst, XMMRegister src); 976 void movdqa(XMMRegister dst, XMMRegister src); 977 void movdqu(XMMRegister dst, Operand src); 978 void movdqu(Operand dst, XMMRegister src); 979 void movdqu(XMMRegister dst, XMMRegister src); 980 void movdq(bool aligned, XMMRegister dst, Operand src) { 981 if (aligned) { 982 movdqa(dst, src); 983 } else { 984 movdqu(dst, src); 985 } 986 } 987 988 void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); } 989 void movd(XMMRegister dst, Operand src); 990 void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); } 991 void movd(Operand dst, XMMRegister src); 992 void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); } 993 void movsd(XMMRegister dst, Operand src); 994 void movsd(Operand dst, XMMRegister src); 995 996 void movss(XMMRegister dst, Operand src); 997 void movss(Operand dst, XMMRegister src); 998 void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); } 999 1000 void extractps(Operand dst, XMMRegister src, byte imm8); 1001 void extractps(Register dst, XMMRegister src, byte imm8); 1002 1003 void pcmpgtq(XMMRegister dst, XMMRegister src); 1004 1005 void psllw(XMMRegister reg, uint8_t shift); 1006 void pslld(XMMRegister reg, uint8_t shift); 1007 void psrlw(XMMRegister reg, uint8_t shift); 1008 void psrld(XMMRegister reg, uint8_t shift); 1009 void psraw(XMMRegister reg, uint8_t shift); 1010 void psrad(XMMRegister reg, uint8_t shift); 1011 void psllq(XMMRegister reg, uint8_t shift); 1012 void psrlq(XMMRegister reg, uint8_t shift); 1013 1014 void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1015 pshufhw(dst, Operand(src), shuffle); 1016 } 1017 void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle); 1018 void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1019 pshuflw(dst, Operand(src), shuffle); 1020 } 1021 void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle); 1022 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1023 pshufd(dst, Operand(src), shuffle); 1024 } 1025 void pshufd(XMMRegister dst, Operand src, uint8_t shuffle); 1026 1027 void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) { 1028 pblendw(dst, Operand(src), mask); 1029 } 1030 void pblendw(XMMRegister dst, Operand src, uint8_t mask); 1031 1032 void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) { 1033 palignr(dst, Operand(src), mask); 1034 } 1035 void palignr(XMMRegister dst, Operand src, uint8_t mask); 1036 1037 void pextrb(Register dst, XMMRegister src, uint8_t offset) { 1038 pextrb(Operand(dst), src, offset); 1039 } 1040 void pextrb(Operand dst, XMMRegister src, uint8_t offset); 1041 // SSE3 instructions 1042 void movddup(XMMRegister dst, Operand src); 1043 void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); } 1044 void movshdup(XMMRegister dst, XMMRegister src); 1045 1046 // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency 1047 void pextrw(Register dst, XMMRegister src, uint8_t offset) { 1048 pextrw(Operand(dst), src, offset); 1049 } 1050 void pextrw(Operand dst, XMMRegister src, uint8_t offset); 1051 void pextrd(Register dst, XMMRegister src, uint8_t offset) { 1052 pextrd(Operand(dst), src, offset); 1053 } 1054 void pextrd(Operand dst, XMMRegister src, uint8_t offset); 1055 1056 void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) { 1057 insertps(dst, Operand(src), offset); 1058 } 1059 void insertps(XMMRegister dst, Operand src, uint8_t offset); 1060 void pinsrb(XMMRegister dst, Register src, uint8_t offset) { 1061 pinsrb(dst, Operand(src), offset); 1062 } 1063 void pinsrb(XMMRegister dst, Operand src, uint8_t offset); 1064 void pinsrw(XMMRegister dst, Register src, uint8_t offset) { 1065 pinsrw(dst, Operand(src), offset); 1066 } 1067 void pinsrw(XMMRegister dst, Operand src, uint8_t offset); 1068 void pinsrd(XMMRegister dst, Register src, uint8_t offset) { 1069 pinsrd(dst, Operand(src), offset); 1070 } 1071 void pinsrd(XMMRegister dst, Operand src, uint8_t offset); 1072 1073 void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode); 1074 void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode); 1075 1076 // AVX instructions 1077 void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1078 vaddss(dst, src1, Operand(src2)); 1079 } 1080 void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) { 1081 vss(0x58, dst, src1, src2); 1082 } 1083 void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1084 vsubss(dst, src1, Operand(src2)); 1085 } 1086 void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) { 1087 vss(0x5c, dst, src1, src2); 1088 } 1089 void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1090 vmulss(dst, src1, Operand(src2)); 1091 } 1092 void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) { 1093 vss(0x59, dst, src1, src2); 1094 } 1095 void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1096 vdivss(dst, src1, Operand(src2)); 1097 } 1098 void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) { 1099 vss(0x5e, dst, src1, src2); 1100 } 1101 void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1102 vmaxss(dst, src1, Operand(src2)); 1103 } 1104 void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) { 1105 vss(0x5f, dst, src1, src2); 1106 } 1107 void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1108 vminss(dst, src1, Operand(src2)); 1109 } 1110 void vminss(XMMRegister dst, XMMRegister src1, Operand src2) { 1111 vss(0x5d, dst, src1, src2); 1112 } 1113 void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1114 vsqrtss(dst, src1, Operand(src2)); 1115 } 1116 void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) { 1117 vss(0x51, dst, src1, src2); 1118 } 1119 void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2); 1120 1121 void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1122 vhaddps(dst, src1, Operand(src2)); 1123 } 1124 void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) { 1125 vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG); 1126 } 1127 void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); } 1128 void vsqrtpd(XMMRegister dst, Operand src) { 1129 vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG); 1130 } 1131 void vmovss(Operand dst, XMMRegister src) { 1132 vinstr(0x11, src, xmm0, dst, kF3, k0F, kWIG); 1133 } 1134 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1135 vinstr(0x10, dst, src1, src2, kF3, k0F, kWIG); 1136 } 1137 void vmovss(XMMRegister dst, Operand src) { 1138 vinstr(0x10, dst, xmm0, src, kF3, k0F, kWIG); 1139 } 1140 void vmovsd(Operand dst, XMMRegister src) { 1141 vinstr(0x11, src, xmm0, dst, kF2, k0F, kWIG); 1142 } 1143 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1144 vinstr(0x10, dst, src1, src2, kF2, k0F, kWIG); 1145 } 1146 void vmovsd(XMMRegister dst, Operand src) { 1147 vinstr(0x10, dst, xmm0, src, kF2, k0F, kWIG); 1148 } 1149 1150 void vextractps(Operand dst, XMMRegister src, byte imm8); 1151 1152 void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2); 1153 1154 void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); } 1155 void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); } 1156 void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); } 1157 void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); } 1158 void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); } 1159 void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); } 1160 void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); } 1161 void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); } 1162 void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { 1163 vshufps(dst, src1, Operand(src2), imm8); 1164 } 1165 void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8); 1166 void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { 1167 vshufpd(dst, src1, Operand(src2), imm8); 1168 } 1169 void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8); 1170 1171 void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2); 1172 void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2); 1173 void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2); 1174 void vmovlps(Operand dst, XMMRegister src); 1175 void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2); 1176 void vmovhps(Operand dst, XMMRegister src); 1177 1178 void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8); 1179 void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8); 1180 void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8); 1181 void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8); 1182 void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8); 1183 void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8); 1184 void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8); 1185 void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8); 1186 1187 void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1188 vpshufhw(dst, Operand(src), shuffle); 1189 } 1190 void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle); 1191 void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1192 vpshuflw(dst, Operand(src), shuffle); 1193 } 1194 void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle); 1195 void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { 1196 vpshufd(dst, Operand(src), shuffle); 1197 } 1198 void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle); 1199 1200 void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1201 XMMRegister mask); 1202 void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1203 XMMRegister mask); 1204 void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1205 XMMRegister mask); 1206 1207 void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1208 uint8_t mask) { 1209 vpblendw(dst, src1, Operand(src2), mask); 1210 } 1211 void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask); 1212 1213 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1214 uint8_t mask) { 1215 vpalignr(dst, src1, Operand(src2), mask); 1216 } 1217 void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask); 1218 1219 void vpextrb(Register dst, XMMRegister src, uint8_t offset) { 1220 vpextrb(Operand(dst), src, offset); 1221 } 1222 void vpextrb(Operand dst, XMMRegister src, uint8_t offset); 1223 void vpextrw(Register dst, XMMRegister src, uint8_t offset) { 1224 vpextrw(Operand(dst), src, offset); 1225 } 1226 void vpextrw(Operand dst, XMMRegister src, uint8_t offset); 1227 void vpextrd(Register dst, XMMRegister src, uint8_t offset) { 1228 vpextrd(Operand(dst), src, offset); 1229 } 1230 void vpextrd(Operand dst, XMMRegister src, uint8_t offset); 1231 1232 void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1233 uint8_t offset) { 1234 vinsertps(dst, src1, Operand(src2), offset); 1235 } 1236 void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, 1237 uint8_t offset); 1238 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, 1239 uint8_t offset) { 1240 vpinsrb(dst, src1, Operand(src2), offset); 1241 } 1242 void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); 1243 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, 1244 uint8_t offset) { 1245 vpinsrw(dst, src1, Operand(src2), offset); 1246 } 1247 void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); 1248 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, 1249 uint8_t offset) { 1250 vpinsrd(dst, src1, Operand(src2), offset); 1251 } 1252 void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); 1253 1254 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1255 RoundingMode mode); 1256 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1257 RoundingMode mode); 1258 void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode); 1259 void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode); 1260 1261 void vcvtdq2pd(XMMRegister dst, XMMRegister src) { 1262 vinstr(0xE6, dst, xmm0, src, kF3, k0F, kWIG); 1263 } 1264 void vcvtpd2ps(XMMRegister dst, XMMRegister src) { 1265 vinstr(0x5A, dst, xmm0, src, k66, k0F, kWIG); 1266 } 1267 void vcvttps2dq(XMMRegister dst, XMMRegister src) { 1268 vcvttps2dq(dst, Operand(src)); 1269 } 1270 void vcvttps2dq(XMMRegister dst, Operand src) { 1271 vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG); 1272 } 1273 void vcvttpd2dq(XMMRegister dst, XMMRegister src) { 1274 vinstr(0xE6, dst, xmm0, src, k66, k0F, kWIG); 1275 } 1276 void vcvttsd2si(Register dst, XMMRegister src) { 1277 XMMRegister idst = XMMRegister::from_code(dst.code()); 1278 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1279 } 1280 void vcvttsd2si(Register dst, Operand src) { 1281 XMMRegister idst = XMMRegister::from_code(dst.code()); 1282 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1283 } 1284 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1285 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1286 } 1287 void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) { 1288 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1289 } 1290 void vcvttss2si(Register dst, XMMRegister src) { 1291 XMMRegister idst = XMMRegister::from_code(dst.code()); 1292 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1293 } 1294 void vcvttss2si(Register dst, Operand src) { 1295 XMMRegister idst = XMMRegister::from_code(dst.code()); 1296 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1297 } 1298 1299 void vmovddup(XMMRegister dst, Operand src) { 1300 vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG); 1301 } 1302 void vmovddup(XMMRegister dst, XMMRegister src) { 1303 vmovddup(dst, Operand(src)); 1304 } 1305 void vmovshdup(XMMRegister dst, XMMRegister src) { 1306 vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG); 1307 } 1308 void vbroadcastss(XMMRegister dst, XMMRegister src) { 1309 vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2); 1310 } 1311 void vbroadcastss(XMMRegister dst, Operand src) { 1312 vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0); 1313 } 1314 void vmovdqa(XMMRegister dst, Operand src) { 1315 vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG); 1316 } 1317 void vmovdqa(XMMRegister dst, XMMRegister src) { 1318 vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG); 1319 } 1320 void vmovdqu(XMMRegister dst, Operand src) { 1321 vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG); 1322 } 1323 void vmovdqu(Operand dst, XMMRegister src) { 1324 vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG); 1325 } 1326 void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); } 1327 void vmovd(XMMRegister dst, Operand src) { 1328 vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG); 1329 } 1330 void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); } 1331 void vmovd(Operand dst, XMMRegister src) { 1332 vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG); 1333 } 1334 1335 void vmovmskpd(Register dst, XMMRegister src); 1336 void vmovmskps(Register dst, XMMRegister src); 1337 1338 void vpmovmskb(Register dst, XMMRegister src); 1339 1340 void vucomisd(XMMRegister dst, XMMRegister src) { 1341 vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG); 1342 } 1343 void vucomisd(XMMRegister dst, Operand src) { 1344 vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG); 1345 } 1346 void vucomiss(XMMRegister dst, XMMRegister src) { 1347 vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG); 1348 } 1349 void vucomiss(XMMRegister dst, Operand src) { 1350 vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG); 1351 } 1352 1353 // BMI instruction 1354 void andn(Register dst, Register src1, Register src2) { 1355 andn(dst, src1, Operand(src2)); 1356 } 1357 void andn(Register dst, Register src1, Operand src2) { 1358 bmi1(0xf2, dst, src1, src2); 1359 } 1360 void bextr(Register dst, Register src1, Register src2) { 1361 bextr(dst, Operand(src1), src2); 1362 } 1363 void bextr(Register dst, Operand src1, Register src2) { 1364 bmi1(0xf7, dst, src2, src1); 1365 } 1366 void blsi(Register dst, Register src) { blsi(dst, Operand(src)); } 1367 void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); } 1368 void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); } 1369 void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); } 1370 void blsr(Register dst, Register src) { blsr(dst, Operand(src)); } 1371 void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); } 1372 void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); } 1373 void tzcnt(Register dst, Operand src); 1374 1375 void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); } 1376 void lzcnt(Register dst, Operand src); 1377 1378 void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); } 1379 void popcnt(Register dst, Operand src); 1380 1381 void bzhi(Register dst, Register src1, Register src2) { 1382 bzhi(dst, Operand(src1), src2); 1383 } 1384 void bzhi(Register dst, Operand src1, Register src2) { 1385 bmi2(kNoPrefix, 0xf5, dst, src2, src1); 1386 } 1387 void mulx(Register dst1, Register dst2, Register src) { 1388 mulx(dst1, dst2, Operand(src)); 1389 } 1390 void mulx(Register dst1, Register dst2, Operand src) { 1391 bmi2(kF2, 0xf6, dst1, dst2, src); 1392 } 1393 void pdep(Register dst, Register src1, Register src2) { 1394 pdep(dst, src1, Operand(src2)); 1395 } 1396 void pdep(Register dst, Register src1, Operand src2) { 1397 bmi2(kF2, 0xf5, dst, src1, src2); 1398 } 1399 void pext(Register dst, Register src1, Register src2) { 1400 pext(dst, src1, Operand(src2)); 1401 } 1402 void pext(Register dst, Register src1, Operand src2) { 1403 bmi2(kF3, 0xf5, dst, src1, src2); 1404 } 1405 void sarx(Register dst, Register src1, Register src2) { 1406 sarx(dst, Operand(src1), src2); 1407 } 1408 void sarx(Register dst, Operand src1, Register src2) { 1409 bmi2(kF3, 0xf7, dst, src2, src1); 1410 } 1411 void shlx(Register dst, Register src1, Register src2) { 1412 shlx(dst, Operand(src1), src2); 1413 } 1414 void shlx(Register dst, Operand src1, Register src2) { 1415 bmi2(k66, 0xf7, dst, src2, src1); 1416 } 1417 void shrx(Register dst, Register src1, Register src2) { 1418 shrx(dst, Operand(src1), src2); 1419 } 1420 void shrx(Register dst, Operand src1, Register src2) { 1421 bmi2(kF2, 0xf7, dst, src2, src1); 1422 } 1423 void rorx(Register dst, Register src, byte imm8) { 1424 rorx(dst, Operand(src), imm8); 1425 } 1426 void rorx(Register dst, Operand src, byte imm8); 1427 1428 // Implementation of packed single-precision floating-point SSE instructions. 1429 void ps(byte op, XMMRegister dst, Operand src); 1430 // Implementation of packed double-precision floating-point SSE instructions. 1431 void pd(byte op, XMMRegister dst, Operand src); 1432 1433#define PACKED_OP_LIST(V) \ 1434 V(unpckl, 0x14) \ 1435 V(and, 0x54) \ 1436 V(andn, 0x55) \ 1437 V(or, 0x56) \ 1438 V(xor, 0x57) \ 1439 V(add, 0x58) \ 1440 V(mul, 0x59) \ 1441 V(sub, 0x5c) \ 1442 V(min, 0x5d) \ 1443 V(div, 0x5e) \ 1444 V(max, 0x5f) 1445 1446#define SSE_PACKED_OP_DECLARE(name, opcode) \ 1447 void name##ps(XMMRegister dst, XMMRegister src) { \ 1448 ps(opcode, dst, Operand(src)); \ 1449 } \ 1450 void name##ps(XMMRegister dst, Operand src) { ps(opcode, dst, src); } \ 1451 void name##pd(XMMRegister dst, XMMRegister src) { \ 1452 pd(opcode, dst, Operand(src)); \ 1453 } \ 1454 void name##pd(XMMRegister dst, Operand src) { pd(opcode, dst, src); } 1455 1456 PACKED_OP_LIST(SSE_PACKED_OP_DECLARE) 1457#undef SSE_PACKED_OP_DECLARE 1458 1459#define AVX_PACKED_OP_DECLARE(name, opcode) \ 1460 void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1461 vps(opcode, dst, src1, Operand(src2)); \ 1462 } \ 1463 void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1464 vps(opcode, dst, src1, src2); \ 1465 } \ 1466 void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1467 vpd(opcode, dst, src1, Operand(src2)); \ 1468 } \ 1469 void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1470 vpd(opcode, dst, src1, src2); \ 1471 } 1472 1473 PACKED_OP_LIST(AVX_PACKED_OP_DECLARE) 1474#undef AVX_PACKED_OP_DECLARE 1475#undef PACKED_OP_LIST 1476 1477 void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2); 1478 void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2); 1479 1480 void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp); 1481 void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp); 1482 1483#define AVX_CMP_P(instr, imm8) \ 1484 void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1485 vcmpps(dst, src1, Operand(src2), imm8); \ 1486 } \ 1487 void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1488 vcmpps(dst, src1, src2, imm8); \ 1489 } \ 1490 void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1491 vcmppd(dst, src1, Operand(src2), imm8); \ 1492 } \ 1493 void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1494 vcmppd(dst, src1, src2, imm8); \ 1495 } 1496 1497 PACKED_CMP_LIST(AVX_CMP_P) 1498 // vcmpgeps/vcmpgepd only in AVX. 1499 AVX_CMP_P(cmpge, 0xd) 1500#undef AVX_CMP_P 1501#undef PACKED_CMP_LIST 1502 1503// Other SSE and AVX instructions 1504#define DECLARE_SSE_UNOP_AND_AVX(instruction, escape, opcode) \ 1505 void instruction(XMMRegister dst, XMMRegister src) { \ 1506 instruction(dst, Operand(src)); \ 1507 } \ 1508 void instruction(XMMRegister dst, Operand src) { \ 1509 sse_instr(dst, src, 0x##escape, 0x##opcode); \ 1510 } \ 1511 void v##instruction(XMMRegister dst, XMMRegister src) { \ 1512 v##instruction(dst, Operand(src)); \ 1513 } \ 1514 void v##instruction(XMMRegister dst, Operand src) { \ 1515 vinstr(0x##opcode, dst, xmm0, src, kNoPrefix, k##escape, kWIG); \ 1516 } 1517 1518 SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_UNOP_AND_AVX) 1519#undef DECLARE_SSE_UNOP_AND_AVX 1520 1521#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ 1522 void instruction(XMMRegister dst, XMMRegister src) { \ 1523 instruction(dst, Operand(src)); \ 1524 } \ 1525 void instruction(XMMRegister dst, Operand src) { \ 1526 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ 1527 } 1528 1529 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION) 1530 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION) 1531#undef DECLARE_SSE2_INSTRUCTION 1532 1533#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \ 1534 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1535 v##instruction(dst, src1, Operand(src2)); \ 1536 } \ 1537 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1538 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ 1539 } 1540 1541 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) 1542 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_AVX_INSTRUCTION) 1543#undef DECLARE_SSE2_AVX_INSTRUCTION 1544 1545#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1546 opcode) \ 1547 void instruction(XMMRegister dst, XMMRegister src) { \ 1548 instruction(dst, Operand(src)); \ 1549 } \ 1550 void instruction(XMMRegister dst, Operand src) { \ 1551 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1552 } 1553 1554 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) 1555 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) 1556#undef DECLARE_SSSE3_INSTRUCTION 1557 1558#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1559 opcode) \ 1560 void instruction(XMMRegister dst, XMMRegister src) { \ 1561 instruction(dst, Operand(src)); \ 1562 } \ 1563 void instruction(XMMRegister dst, Operand src) { \ 1564 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1565 } 1566 1567 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) 1568 SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) 1569 DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14) 1570 DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15) 1571 DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10) 1572#undef DECLARE_SSE4_INSTRUCTION 1573 1574#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1575 opcode) \ 1576 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1577 v##instruction(dst, src1, Operand(src2)); \ 1578 } \ 1579 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1580 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ 1581 } 1582 1583 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1584 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1585#undef DECLARE_SSE34_AVX_INSTRUCTION 1586 1587#define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1588 opcode) \ 1589 void v##instruction(XMMRegister dst, XMMRegister src) { \ 1590 v##instruction(dst, Operand(src)); \ 1591 } \ 1592 void v##instruction(XMMRegister dst, Operand src) { \ 1593 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \ 1594 } 1595 1596 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION) 1597 SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION) 1598#undef DECLARE_SSE4_AVX_RM_INSTRUCTION 1599 1600 // AVX2 instructions 1601#define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode) \ 1602 void instr(XMMRegister dst, XMMRegister src) { \ 1603 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \ 1604 AVX2); \ 1605 } \ 1606 void instr(XMMRegister dst, Operand src) { \ 1607 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \ 1608 AVX2); \ 1609 } 1610 AVX2_BROADCAST_LIST(AVX2_INSTRUCTION) 1611#undef AVX2_INSTRUCTION 1612 1613#define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \ 1614 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1615 vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix, \ 1616 k##escape1##escape2, k##extension, FMA3); \ 1617 } \ 1618 void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \ 1619 vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix, \ 1620 k##escape1##escape2, k##extension, FMA3); \ 1621 } 1622 FMA_INSTRUCTION_LIST(FMA) 1623#undef FMA 1624 1625 // Prefetch src position into cache level. 1626 // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a 1627 // non-temporal 1628 void prefetch(Operand src, int level); 1629 // TODO(lrn): Need SFENCE for movnt? 1630 1631 // Check the code size generated from label to here. 1632 int SizeOfCodeGeneratedSince(Label* label) { 1633 return pc_offset() - label->pos(); 1634 } 1635 1636 // Record a deoptimization reason that can be used by a log or cpu profiler. 1637 // Use --trace-deopt to enable. 1638 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id, 1639 SourcePosition position, int id); 1640 1641 // Writes a single byte or word of data in the code stream. Used for 1642 // inline tables, e.g., jump-tables. 1643 void db(uint8_t data); 1644 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO); 1645 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO); 1646 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) { 1647 dd(data, rmode); 1648 } 1649 void dd(Label* label); 1650 1651 // Check if there is less than kGap bytes available in the buffer. 1652 // If this is the case, we need to grow the buffer before emitting 1653 // an instruction or relocation information. 1654 inline bool buffer_overflow() const { 1655 return pc_ >= reloc_info_writer.pos() - kGap; 1656 } 1657 1658 // Get the number of bytes available in the buffer. 1659 inline int available_space() const { return reloc_info_writer.pos() - pc_; } 1660 1661 static bool IsNop(Address addr); 1662 1663 int relocation_writer_size() { 1664 return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos(); 1665 } 1666 1667 // Avoid overflows for displacements etc. 1668 static constexpr int kMaximalBufferSize = 512 * MB; 1669 1670 byte byte_at(int pos) { return buffer_start_[pos]; } 1671 void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; } 1672 1673 protected: 1674 void emit_sse_operand(XMMRegister reg, Operand adr); 1675 void emit_sse_operand(XMMRegister dst, XMMRegister src); 1676 void emit_sse_operand(Register dst, XMMRegister src); 1677 void emit_sse_operand(XMMRegister dst, Register src); 1678 1679 Address addr_at(int pos) { 1680 return reinterpret_cast<Address>(buffer_start_ + pos); 1681 } 1682 1683 private: 1684 uint32_t long_at(int pos) { 1685 return ReadUnalignedValue<uint32_t>(addr_at(pos)); 1686 } 1687 void long_at_put(int pos, uint32_t x) { 1688 WriteUnalignedValue(addr_at(pos), x); 1689 } 1690 1691 // code emission 1692 void GrowBuffer(); 1693 inline void emit(uint32_t x); 1694 inline void emit(Handle<HeapObject> handle); 1695 inline void emit(uint32_t x, RelocInfo::Mode rmode); 1696 inline void emit(Handle<Code> code, RelocInfo::Mode rmode); 1697 inline void emit(const Immediate& x); 1698 inline void emit_b(Immediate x); 1699 inline void emit_w(const Immediate& x); 1700 inline void emit_q(uint64_t x); 1701 1702 // Emit the code-object-relative offset of the label's position 1703 inline void emit_code_relative_offset(Label* label); 1704 1705 // instruction generation 1706 void emit_arith_b(int op1, int op2, Register dst, int imm8); 1707 1708 // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81) 1709 // with a given destination expression and an immediate operand. It attempts 1710 // to use the shortest encoding possible. 1711 // sel specifies the /n in the modrm byte (see the Intel PRM). 1712 void emit_arith(int sel, Operand dst, const Immediate& x); 1713 1714 void emit_operand(int code, Operand adr); 1715 void emit_operand(Register reg, Operand adr); 1716 void emit_operand(XMMRegister reg, Operand adr); 1717 1718 void emit_label(Label* label); 1719 1720 void emit_farith(int b1, int b2, int i); 1721 1722 // Emit vex prefix 1723 enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 }; 1724 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 }; 1725 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 }; 1726 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 }; 1727 inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp, 1728 LeadingOpcode m, VexW w); 1729 inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp, 1730 LeadingOpcode m, VexW w); 1731 1732 // labels 1733 void print(const Label* L); 1734 void bind_to(Label* L, int pos); 1735 1736 // displacements 1737 inline Displacement disp_at(Label* L); 1738 inline void disp_at_put(Label* L, Displacement disp); 1739 inline void emit_disp(Label* L, Displacement::Type type); 1740 inline void emit_near_disp(Label* L); 1741 1742 void sse_instr(XMMRegister dst, Operand src, byte prefix, byte opcode); 1743 void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape, 1744 byte opcode); 1745 void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1, 1746 byte escape2, byte opcode); 1747 void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1, 1748 byte escape2, byte opcode); 1749 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, 1750 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX); 1751 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, 1752 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX); 1753 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, 1754 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w, 1755 CpuFeature = AVX); 1756 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, 1757 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w, 1758 CpuFeature = AVX); 1759 // Most BMI instructions are similar. 1760 void bmi1(byte op, Register reg, Register vreg, Operand rm); 1761 void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm); 1762 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, 1763 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w); 1764 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, 1765 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w); 1766 1767 // record reloc info for current pc_ 1768 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); 1769 1770 // record the position of jmp/jcc instruction 1771 void record_farjmp_position(Label* L, int pos); 1772 1773 bool is_optimizable_farjmp(int idx); 1774 1775 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); 1776 1777 int WriteCodeComments(); 1778 1779 friend class EnsureSpace; 1780 1781 // Internal reference positions, required for (potential) patching in 1782 // GrowBuffer(); contains only those internal references whose labels 1783 // are already bound. 1784 std::deque<int> internal_reference_positions_; 1785 1786 // code generation 1787 RelocInfoWriter reloc_info_writer; 1788 1789 // Variables for this instance of assembler 1790 int farjmp_num_ = 0; 1791 std::deque<int> farjmp_positions_; 1792 std::map<Label*, std::vector<int>> label_farjmp_maps_; 1793}; 1794 1795// Helper class that ensures that there is enough space for generating 1796// instructions and relocation information. The constructor makes 1797// sure that there is enough space and (in debug mode) the destructor 1798// checks that we did not generate too much. 1799class EnsureSpace { 1800 public: 1801 explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) { 1802 if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer(); 1803#ifdef DEBUG 1804 space_before_ = assembler->available_space(); 1805#endif 1806 } 1807 1808#ifdef DEBUG 1809 ~EnsureSpace() { 1810 int bytes_generated = space_before_ - assembler_->available_space(); 1811 DCHECK(bytes_generated < assembler_->kGap); 1812 } 1813#endif 1814 1815 private: 1816 Assembler* const assembler_; 1817#ifdef DEBUG 1818 int space_before_; 1819#endif 1820}; 1821 1822} // namespace internal 1823} // namespace v8 1824 1825#endif // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ 1826