1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the distribution.
14//
15// - Neither the name of Sun Microsystems or the names of contributors may
16// be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// The original source code covered by the above license above has been
32// modified significantly by Google Inc.
33// Copyright 2011 the V8 project authors. All rights reserved.
34
35// A light-weight IA32 Assembler.
36
37#ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
38#define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
39
40#include <deque>
41#include <memory>
42
43#include "src/codegen/assembler.h"
44#include "src/codegen/ia32/constants-ia32.h"
45#include "src/codegen/ia32/fma-instr.h"
46#include "src/codegen/ia32/register-ia32.h"
47#include "src/codegen/ia32/sse-instr.h"
48#include "src/codegen/label.h"
49#include "src/execution/isolate.h"
50#include "src/objects/smi.h"
51#include "src/utils/utils.h"
52
53namespace v8 {
54namespace internal {
55
56class SafepointTableBuilder;
57
58enum Condition {
59  // any value < 0 is considered no_condition
60  no_condition = -1,
61
62  overflow = 0,
63  no_overflow = 1,
64  below = 2,
65  above_equal = 3,
66  equal = 4,
67  not_equal = 5,
68  below_equal = 6,
69  above = 7,
70  negative = 8,
71  positive = 9,
72  parity_even = 10,
73  parity_odd = 11,
74  less = 12,
75  greater_equal = 13,
76  less_equal = 14,
77  greater = 15,
78
79  // aliases
80  carry = below,
81  not_carry = above_equal,
82  zero = equal,
83  not_zero = not_equal,
84  sign = negative,
85  not_sign = positive
86};
87
88// Returns the equivalent of !cc.
89// Negation of the default no_condition (-1) results in a non-default
90// no_condition value (-2). As long as tests for no_condition check
91// for condition < 0, this will work as expected.
92inline Condition NegateCondition(Condition cc) {
93  return static_cast<Condition>(cc ^ 1);
94}
95
96enum RoundingMode {
97  kRoundToNearest = 0x0,
98  kRoundDown = 0x1,
99  kRoundUp = 0x2,
100  kRoundToZero = 0x3
101};
102
103// -----------------------------------------------------------------------------
104// Machine instruction Immediates
105
106class Immediate {
107 public:
108  // Calls where x is an Address (uintptr_t) resolve to this overload.
109  inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
110    value_.immediate = x;
111    rmode_ = rmode;
112  }
113  inline explicit Immediate(const ExternalReference& ext)
114      : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {}
115  inline explicit Immediate(Handle<HeapObject> handle)
116      : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {}
117  inline explicit Immediate(Smi value)
118      : Immediate(static_cast<intptr_t>(value.ptr())) {}
119
120  static Immediate EmbeddedNumber(double number);  // Smi or HeapNumber.
121  static Immediate EmbeddedStringConstant(const StringConstantBase* str);
122
123  static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); }
124
125  bool is_heap_object_request() const {
126    DCHECK_IMPLIES(is_heap_object_request_,
127                   rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT ||
128                       rmode_ == RelocInfo::CODE_TARGET);
129    return is_heap_object_request_;
130  }
131
132  HeapObjectRequest heap_object_request() const {
133    DCHECK(is_heap_object_request());
134    return value_.heap_object_request;
135  }
136
137  int immediate() const {
138    DCHECK(!is_heap_object_request());
139    return value_.immediate;
140  }
141
142  bool is_embedded_object() const {
143    return !is_heap_object_request() &&
144           rmode() == RelocInfo::FULL_EMBEDDED_OBJECT;
145  }
146
147  Handle<HeapObject> embedded_object() const {
148    return Handle<HeapObject>(reinterpret_cast<Address*>(immediate()));
149  }
150
151  bool is_external_reference() const {
152    return rmode() == RelocInfo::EXTERNAL_REFERENCE;
153  }
154
155  ExternalReference external_reference() const {
156    DCHECK(is_external_reference());
157    return bit_cast<ExternalReference>(immediate());
158  }
159
160  bool is_zero() const {
161    return RelocInfo::IsNoInfo(rmode_) && immediate() == 0;
162  }
163  bool is_int8() const {
164    return RelocInfo::IsNoInfo(rmode_) && i::is_int8(immediate());
165  }
166  bool is_uint8() const {
167    return RelocInfo::IsNoInfo(rmode_) && i::is_uint8(immediate());
168  }
169  bool is_int16() const {
170    return RelocInfo::IsNoInfo(rmode_) && i::is_int16(immediate());
171  }
172
173  bool is_uint16() const {
174    return RelocInfo::IsNoInfo(rmode_) && i::is_uint16(immediate());
175  }
176
177  RelocInfo::Mode rmode() const { return rmode_; }
178
179 private:
180  inline explicit Immediate(Label* value) {
181    value_.immediate = reinterpret_cast<int32_t>(value);
182    rmode_ = RelocInfo::INTERNAL_REFERENCE;
183  }
184
185  union Value {
186    Value() {}
187    HeapObjectRequest heap_object_request;
188    int immediate;
189  } value_;
190  bool is_heap_object_request_ = false;
191  RelocInfo::Mode rmode_;
192
193  friend class Operand;
194  friend class Assembler;
195  friend class MacroAssembler;
196};
197
198// -----------------------------------------------------------------------------
199// Machine instruction Operands
200
201enum ScaleFactor {
202  times_1 = 0,
203  times_2 = 1,
204  times_4 = 2,
205  times_8 = 3,
206  times_int_size = times_4,
207
208  times_half_system_pointer_size = times_2,
209  times_system_pointer_size = times_4,
210
211  times_tagged_size = times_4,
212};
213
214class V8_EXPORT_PRIVATE Operand {
215 public:
216  // reg
217  V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); }
218
219  // XMM reg
220  V8_INLINE explicit Operand(XMMRegister xmm_reg) {
221    Register reg = Register::from_code(xmm_reg.code());
222    set_modrm(3, reg);
223  }
224
225  // [disp/r]
226  V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) {
227    set_modrm(0, ebp);
228    set_dispr(disp, rmode);
229  }
230
231  // [disp/r]
232  V8_INLINE explicit Operand(Immediate imm) {
233    set_modrm(0, ebp);
234    set_dispr(imm.immediate(), imm.rmode_);
235  }
236
237  // [base + disp/r]
238  explicit Operand(Register base, int32_t disp,
239                   RelocInfo::Mode rmode = RelocInfo::NO_INFO);
240
241  // [rip + disp/r]
242  explicit Operand(Label* label) {
243    set_modrm(0, ebp);
244    set_dispr(reinterpret_cast<intptr_t>(label), RelocInfo::INTERNAL_REFERENCE);
245  }
246
247  // [base + index*scale + disp/r]
248  explicit Operand(Register base, Register index, ScaleFactor scale,
249                   int32_t disp, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
250
251  // [index*scale + disp/r]
252  explicit Operand(Register index, ScaleFactor scale, int32_t disp,
253                   RelocInfo::Mode rmode = RelocInfo::NO_INFO);
254
255  static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
256    return Operand(index, scale, reinterpret_cast<int32_t>(table),
257                   RelocInfo::INTERNAL_REFERENCE);
258  }
259
260  static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
261    return Operand(base, imm.value_.immediate, imm.rmode_);
262  }
263
264  // Returns true if this Operand is a wrapper for the specified register.
265  bool is_reg(Register reg) const { return is_reg(reg.code()); }
266  bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); }
267
268  // Returns true if this Operand is a wrapper for one register.
269  bool is_reg_only() const;
270
271  // Asserts that this Operand is a wrapper for one register and returns the
272  // register.
273  Register reg() const;
274
275  base::Vector<const byte> encoded_bytes() const { return {buf_, len_}; }
276  RelocInfo::Mode rmode() { return rmode_; }
277
278 private:
279  // Set the ModRM byte without an encoded 'reg' register. The
280  // register is encoded later as part of the emit_operand operation.
281  inline void set_modrm(int mod, Register rm) {
282    DCHECK_EQ(mod & -4, 0);
283    buf_[0] = mod << 6 | rm.code();
284    len_ = 1;
285  }
286
287  inline void set_sib(ScaleFactor scale, Register index, Register base);
288  inline void set_disp8(int8_t disp);
289  inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) {
290    DCHECK(len_ == 1 || len_ == 2);
291    Address p = reinterpret_cast<Address>(&buf_[len_]);
292    WriteUnalignedValue(p, disp);
293    len_ += sizeof(int32_t);
294    rmode_ = rmode;
295  }
296
297  inline bool is_reg(int reg_code) const {
298    return ((buf_[0] & 0xF8) == 0xC0)  // addressing mode is register only.
299           && ((buf_[0] & 0x07) == reg_code);  // register codes match.
300  }
301
302  byte buf_[6];
303  // The number of bytes in buf_.
304  uint8_t len_ = 0;
305  // Only valid if len_ > 4.
306  RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
307};
308ASSERT_TRIVIALLY_COPYABLE(Operand);
309static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
310              "Operand must be small enough to pass it by value");
311
312bool operator!=(Operand op, XMMRegister r);
313
314// -----------------------------------------------------------------------------
315// A Displacement describes the 32bit immediate field of an instruction which
316// may be used together with a Label in order to refer to a yet unknown code
317// position. Displacements stored in the instruction stream are used to describe
318// the instruction and to chain a list of instructions using the same Label.
319// A Displacement contains 2 different fields:
320//
321// next field: position of next displacement in the chain (0 = end of list)
322// type field: instruction type
323//
324// A next value of null (0) indicates the end of a chain (note that there can
325// be no displacement at position zero, because there is always at least one
326// instruction byte before the displacement).
327//
328// Displacement _data field layout
329//
330// |31.....2|1......0|
331// [  next  |  type  |
332
333class Displacement {
334 public:
335  enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
336
337  int data() const { return data_; }
338  Type type() const { return TypeField::decode(data_); }
339  void next(Label* L) const {
340    int n = NextField::decode(data_);
341    n > 0 ? L->link_to(n) : L->Unuse();
342  }
343  void link_to(Label* L) { init(L, type()); }
344
345  explicit Displacement(int data) { data_ = data; }
346
347  Displacement(Label* L, Type type) { init(L, type); }
348
349  void print() {
350    PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
351           NextField::decode(data_));
352  }
353
354 private:
355  int data_;
356
357  using TypeField = base::BitField<Type, 0, 2>;
358  using NextField = base::BitField<int, 2, 32 - 2>;
359
360  void init(Label* L, Type type);
361};
362
363class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
364 private:
365  // We check before assembling an instruction that there is sufficient
366  // space to write an instruction and its relocation information.
367  // The relocation writer's position must be kGap bytes above the end of
368  // the generated instructions. This leaves enough space for the
369  // longest possible ia32 instruction, 15 bytes, and the longest possible
370  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
371  // (There is a 15 byte limit on ia32 instruction length that rules out some
372  // otherwise valid instructions.)
373  // This allows for a single, fast space check per instruction.
374  static constexpr int kGap = 32;
375  STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
376
377 public:
378  // Create an assembler. Instructions and relocation information are emitted
379  // into a buffer, with the instructions starting from the beginning and the
380  // relocation information starting from the end of the buffer. See CodeDesc
381  // for a detailed comment on the layout (globals.h).
382  //
383  // If the provided buffer is nullptr, the assembler allocates and grows its
384  // own buffer. Otherwise it takes ownership of the provided buffer.
385  explicit Assembler(const AssemblerOptions&,
386                     std::unique_ptr<AssemblerBuffer> = {});
387
388  // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
389  static constexpr int kNoHandlerTable = 0;
390  static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
391  void GetCode(Isolate* isolate, CodeDesc* desc,
392               SafepointTableBuilder* safepoint_table_builder,
393               int handler_table_offset);
394
395  // Convenience wrapper for code without safepoint or handler tables.
396  void GetCode(Isolate* isolate, CodeDesc* desc) {
397    GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
398  }
399
400  void FinalizeJumpOptimizationInfo();
401
402  // Unused on this architecture.
403  void MaybeEmitOutOfLineConstantPool() {}
404
405  // Read/Modify the code target in the branch/call instruction at pc.
406  // The isolate argument is unused (and may be nullptr) when skipping flushing.
407  inline static Address target_address_at(Address pc, Address constant_pool);
408  inline static void set_target_address_at(
409      Address pc, Address constant_pool, Address target,
410      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
411
412  // This sets the branch destination (which is in the instruction on x86).
413  // This is for calls and branches within generated code.
414  inline static void deserialization_set_special_target_at(
415      Address instruction_payload, Code code, Address target);
416
417  // Get the size of the special target encoded at 'instruction_payload'.
418  inline static int deserialization_special_target_size(
419      Address instruction_payload);
420
421  // This sets the internal reference at the pc.
422  inline static void deserialization_set_target_internal_reference_at(
423      Address pc, Address target,
424      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
425
426  static constexpr int kSpecialTargetSize = kSystemPointerSize;
427
428  // One byte opcode for test al, 0xXX.
429  static constexpr byte kTestAlByte = 0xA8;
430  // One byte opcode for nop.
431  static constexpr byte kNopByte = 0x90;
432
433  // One byte opcode for a short unconditional jump.
434  static constexpr byte kJmpShortOpcode = 0xEB;
435  // One byte prefix for a short conditional jump.
436  static constexpr byte kJccShortPrefix = 0x70;
437  static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
438  static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
439  static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
440  static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
441
442  // ---------------------------------------------------------------------------
443  // Code generation
444  //
445  // - function names correspond one-to-one to ia32 instruction mnemonics
446  // - unless specified otherwise, instructions operate on 32bit operands
447  // - instructions on 8bit (byte) operands/registers have a trailing '_b'
448  // - instructions on 16bit (word) operands/registers have a trailing '_w'
449  // - naming conflicts with C++ keywords are resolved via a trailing '_'
450
451  // NOTE ON INTERFACE: Currently, the interface is not very consistent
452  // in the sense that some operations (e.g. mov()) can be called in more
453  // the one way to generate the same instruction: The Register argument
454  // can in some cases be replaced with an Operand(Register) argument.
455  // This should be cleaned up and made more orthogonal. The questions
456  // is: should we always use Operands instead of Registers where an
457  // Operand is possible, or should we have a Register (overloaded) form
458  // instead? We must be careful to make sure that the selected instruction
459  // is obvious from the parameters to avoid hard-to-find code generation
460  // bugs.
461
462  // Insert the smallest number of nop instructions
463  // possible to align the pc offset to a multiple
464  // of m. m must be a power of 2.
465  void Align(int m);
466  // Insert the smallest number of zero bytes possible to align the pc offset
467  // to a mulitple of m. m must be a power of 2 (>= 2).
468  void DataAlign(int m);
469  void Nop(int bytes = 1);
470  // Aligns code to something that's optimal for a jump target for the platform.
471  void CodeTargetAlign();
472  void LoopHeaderAlign() { CodeTargetAlign(); }
473
474  // Stack
475  void pushad();
476  void popad();
477
478  void pushfd();
479  void popfd();
480
481  void push(const Immediate& x);
482  void push_imm32(int32_t imm32);
483  void push(Register src);
484  void push(Operand src);
485
486  void pop(Register dst);
487  void pop(Operand dst);
488
489  void leave();
490
491  // Moves
492  void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
493  void mov_b(Register dst, Operand src);
494  void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
495  void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); }
496  void mov_b(Operand dst, const Immediate& src);
497  void mov_b(Operand dst, Register src);
498
499  void mov_w(Register dst, Operand src);
500  void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); }
501  void mov_w(Operand dst, const Immediate& src);
502  void mov_w(Operand dst, Register src);
503
504  void mov(Register dst, int32_t imm32);
505  void mov(Register dst, const Immediate& x);
506  void mov(Register dst, Handle<HeapObject> handle);
507  void mov(Register dst, Operand src);
508  void mov(Register dst, Register src);
509  void mov(Operand dst, const Immediate& x);
510  void mov(Operand dst, Handle<HeapObject> handle);
511  void mov(Operand dst, Register src);
512  void mov(Operand dst, Address src, RelocInfo::Mode);
513
514  void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
515  void movsx_b(Register dst, Operand src);
516
517  void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
518  void movsx_w(Register dst, Operand src);
519
520  void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
521  void movzx_b(Register dst, Operand src);
522
523  void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
524  void movzx_w(Register dst, Operand src);
525
526  void movq(XMMRegister dst, Operand src);
527  void movq(Operand dst, XMMRegister src);
528
529  // Conditional moves
530  void cmov(Condition cc, Register dst, Register src) {
531    cmov(cc, dst, Operand(src));
532  }
533  void cmov(Condition cc, Register dst, Operand src);
534
535  // Flag management.
536  void cld();
537
538  // Repetitive string instructions.
539  void rep_movs();
540  void rep_stos();
541  void stos();
542
543  void xadd(Operand dst, Register src);
544  void xadd_b(Operand dst, Register src);
545  void xadd_w(Operand dst, Register src);
546
547  // Exchange
548  void xchg(Register dst, Register src);
549  void xchg(Register dst, Operand src);
550  void xchg_b(Register reg, Operand op);
551  void xchg_w(Register reg, Operand op);
552
553  // Lock prefix
554  void lock();
555
556  // CompareExchange
557  void cmpxchg(Operand dst, Register src);
558  void cmpxchg_b(Operand dst, Register src);
559  void cmpxchg_w(Operand dst, Register src);
560  void cmpxchg8b(Operand dst);
561
562  // Memory Fence
563  void mfence();
564  void lfence();
565
566  void pause();
567
568  // Arithmetics
569  void adc(Register dst, int32_t imm32);
570  void adc(Register dst, Register src) { adc(dst, Operand(src)); }
571  void adc(Register dst, Operand src);
572
573  void add(Register dst, Register src) { add(dst, Operand(src)); }
574  void add(Register dst, Operand src);
575  void add(Operand dst, Register src);
576  void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
577  void add(Operand dst, const Immediate& x);
578
579  void and_(Register dst, int32_t imm32);
580  void and_(Register dst, const Immediate& x);
581  void and_(Register dst, Register src) { and_(dst, Operand(src)); }
582  void and_(Register dst, Operand src);
583  void and_(Operand dst, Register src);
584  void and_(Operand dst, const Immediate& x);
585
586  void cmpb(Register reg, Immediate imm8) {
587    DCHECK(reg.is_byte_register());
588    cmpb(Operand(reg), imm8);
589  }
590  void cmpb(Operand op, Immediate imm8);
591  void cmpb(Register reg, Operand op);
592  void cmpb(Operand op, Register reg);
593  void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
594  void cmpb_al(Operand op);
595  void cmpw_ax(Operand op);
596  void cmpw(Operand dst, Immediate src);
597  void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
598  void cmpw(Register dst, Operand src);
599  void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
600  void cmpw(Operand dst, Register src);
601  void cmp(Register reg, int32_t imm32);
602  void cmp(Register reg, Handle<HeapObject> handle);
603  void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
604  void cmp(Register reg, Operand op);
605  void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
606  void cmp(Operand op, Register reg);
607  void cmp(Operand op, const Immediate& imm);
608  void cmp(Operand op, Handle<HeapObject> handle);
609
610  void dec_b(Register dst);
611  void dec_b(Operand dst);
612
613  void dec(Register dst);
614  void dec(Operand dst);
615
616  void cdq();
617
618  void idiv(Register src) { idiv(Operand(src)); }
619  void idiv(Operand src);
620  void div(Register src) { div(Operand(src)); }
621  void div(Operand src);
622
623  // Signed multiply instructions.
624  void imul(Register src);  // edx:eax = eax * src.
625  void imul(Register dst, Register src) { imul(dst, Operand(src)); }
626  void imul(Register dst, Operand src);                  // dst = dst * src.
627  void imul(Register dst, Register src, int32_t imm32);  // dst = src * imm32.
628  void imul(Register dst, Operand src, int32_t imm32);
629
630  void inc(Register dst);
631  void inc(Operand dst);
632
633  void lea(Register dst, Operand src);
634
635  // Unsigned multiply instruction.
636  void mul(Register src);  // edx:eax = eax * reg.
637
638  void neg(Register dst);
639  void neg(Operand dst);
640
641  void not_(Register dst);
642  void not_(Operand dst);
643
644  void or_(Register dst, int32_t imm32);
645  void or_(Register dst, Register src) { or_(dst, Operand(src)); }
646  void or_(Register dst, Operand src);
647  void or_(Operand dst, Register src);
648  void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
649  void or_(Operand dst, const Immediate& x);
650
651  void rcl(Register dst, uint8_t imm8);
652  void rcr(Register dst, uint8_t imm8);
653
654  void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); }
655  void rol(Operand dst, uint8_t imm8);
656  void rol_cl(Register dst) { rol_cl(Operand(dst)); }
657  void rol_cl(Operand dst);
658
659  void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
660  void ror(Operand dst, uint8_t imm8);
661  void ror_cl(Register dst) { ror_cl(Operand(dst)); }
662  void ror_cl(Operand dst);
663
664  void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
665  void sar(Operand dst, uint8_t imm8);
666  void sar_cl(Register dst) { sar_cl(Operand(dst)); }
667  void sar_cl(Operand dst);
668
669  void sbb(Register dst, Register src) { sbb(dst, Operand(src)); }
670  void sbb(Register dst, Operand src);
671
672  void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
673  void shl(Operand dst, uint8_t imm8);
674  void shl_cl(Register dst) { shl_cl(Operand(dst)); }
675  void shl_cl(Operand dst);
676  void shld(Register dst, Register src, uint8_t shift);
677  void shld_cl(Register dst, Register src);
678
679  void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
680  void shr(Operand dst, uint8_t imm8);
681  void shr_cl(Register dst) { shr_cl(Operand(dst)); }
682  void shr_cl(Operand dst);
683  void shrd(Register dst, Register src, uint8_t shift);
684  void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
685  void shrd_cl(Operand dst, Register src);
686
687  void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
688  void sub(Operand dst, const Immediate& x);
689  void sub(Register dst, Register src) { sub(dst, Operand(src)); }
690  void sub(Register dst, Operand src);
691  void sub(Operand dst, Register src);
692  void sub_sp_32(uint32_t imm);
693
694  void test(Register reg, const Immediate& imm);
695  void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
696  void test(Register reg, Operand op);
697  void test(Operand op, const Immediate& imm);
698  void test(Operand op, Register reg) { test(reg, op); }
699  void test_b(Register reg, Operand op);
700  void test_b(Register reg, Immediate imm8);
701  void test_b(Operand op, Immediate imm8);
702  void test_b(Operand op, Register reg) { test_b(reg, op); }
703  void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
704  void test_w(Register reg, Operand op);
705  void test_w(Register reg, Immediate imm16);
706  void test_w(Operand op, Immediate imm16);
707  void test_w(Operand op, Register reg) { test_w(reg, op); }
708  void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
709
710  void xor_(Register dst, int32_t imm32);
711  void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
712  void xor_(Register dst, Operand src);
713  void xor_(Operand dst, Register src);
714  void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
715  void xor_(Operand dst, const Immediate& x);
716
717  // Bit operations.
718  void bswap(Register dst);
719  void bt(Operand dst, Register src);
720  void bts(Register dst, Register src) { bts(Operand(dst), src); }
721  void bts(Operand dst, Register src);
722  void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
723  void bsr(Register dst, Operand src);
724  void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
725  void bsf(Register dst, Operand src);
726
727  // Miscellaneous
728  void hlt();
729  void int3();
730  void nop();
731  void ret(int imm16);
732  void ud2();
733
734  // Label operations & relative jumps (PPUM Appendix D)
735  //
736  // Takes a branch opcode (cc) and a label (L) and generates
737  // either a backward branch or a forward branch and links it
738  // to the label fixup chain. Usage:
739  //
740  // Label L;    // unbound label
741  // j(cc, &L);  // forward branch to unbound label
742  // bind(&L);   // bind label to the current pc
743  // j(cc, &L);  // backward branch to bound label
744  // bind(&L);   // illegal: a label may be bound only once
745  //
746  // Note: The same Label can be used for forward and backward branches
747  // but it may be bound only once.
748
749  void bind(Label* L);  // binds an unbound label L to the current code position
750
751  // Calls
752  void call(Label* L);
753  void call(Address entry, RelocInfo::Mode rmode);
754  void call(Register reg) { call(Operand(reg)); }
755  void call(Operand adr);
756  void call(Handle<Code> code, RelocInfo::Mode rmode);
757  void wasm_call(Address address, RelocInfo::Mode rmode);
758
759  // Jumps
760  // unconditional jump to L
761  void jmp(Label* L, Label::Distance distance = Label::kFar);
762  void jmp(Address entry, RelocInfo::Mode rmode);
763  void jmp(Register reg) { jmp(Operand(reg)); }
764  void jmp(Operand adr);
765  void jmp(Handle<Code> code, RelocInfo::Mode rmode);
766  // Unconditional jump relative to the current address. Low-level routine,
767  // use with caution!
768  void jmp_rel(int offset);
769
770  // Conditional jumps
771  void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
772  void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
773  void j(Condition cc, Handle<Code> code,
774         RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
775
776  // Floating-point operations
777  void fld(int i);
778  void fstp(int i);
779
780  void fld1();
781  void fldz();
782  void fldpi();
783  void fldln2();
784
785  void fld_s(Operand adr);
786  void fld_d(Operand adr);
787
788  void fstp_s(Operand adr);
789  void fst_s(Operand adr);
790  void fstp_d(Operand adr);
791  void fst_d(Operand adr);
792
793  void fild_s(Operand adr);
794  void fild_d(Operand adr);
795
796  void fist_s(Operand adr);
797
798  void fistp_s(Operand adr);
799  void fistp_d(Operand adr);
800
801  // The fisttp instructions require SSE3.
802  void fisttp_s(Operand adr);
803  void fisttp_d(Operand adr);
804
805  void fabs();
806  void fchs();
807  void fcos();
808  void fsin();
809  void fptan();
810  void fyl2x();
811  void f2xm1();
812  void fscale();
813  void fninit();
814
815  void fadd(int i);
816  void fadd_i(int i);
817  void fsub(int i);
818  void fsub_i(int i);
819  void fmul(int i);
820  void fmul_i(int i);
821  void fdiv(int i);
822  void fdiv_i(int i);
823
824  void fisub_s(Operand adr);
825
826  void faddp(int i = 1);
827  void fsubp(int i = 1);
828  void fsubrp(int i = 1);
829  void fmulp(int i = 1);
830  void fdivp(int i = 1);
831  void fprem();
832  void fprem1();
833
834  void fxch(int i = 1);
835  void fincstp();
836  void ffree(int i = 0);
837
838  void ftst();
839  void fucomp(int i);
840  void fucompp();
841  void fucomi(int i);
842  void fucomip();
843  void fcompp();
844  void fnstsw_ax();
845  void fwait();
846  void fnclex();
847
848  void frndint();
849
850  void sahf();
851  void setcc(Condition cc, Register reg);
852
853  void cpuid();
854
855  // SSE instructions
856  void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
857  void addss(XMMRegister dst, Operand src);
858  void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
859  void subss(XMMRegister dst, Operand src);
860  void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
861  void mulss(XMMRegister dst, Operand src);
862  void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
863  void divss(XMMRegister dst, Operand src);
864  void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
865  void sqrtss(XMMRegister dst, Operand src);
866
867  void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
868  void ucomiss(XMMRegister dst, Operand src);
869  void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); }
870  void movaps(XMMRegister dst, Operand src);
871  void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); }
872  void movups(XMMRegister dst, Operand src);
873  void movups(Operand dst, XMMRegister src);
874  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
875  void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
876
877  void movhlps(XMMRegister dst, XMMRegister src);
878  void movlhps(XMMRegister dst, XMMRegister src);
879  void movlps(XMMRegister dst, Operand src);
880  void movlps(Operand dst, XMMRegister src);
881  void movhps(XMMRegister dst, Operand src);
882  void movhps(Operand dst, XMMRegister src);
883
884  void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
885  void maxss(XMMRegister dst, Operand src);
886  void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
887  void minss(XMMRegister dst, Operand src);
888
889  void haddps(XMMRegister dst, Operand src);
890  void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
891  void sqrtpd(XMMRegister dst, Operand src) {
892    sse2_instr(dst, src, 0x66, 0x0F, 0x51);
893  }
894  void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); }
895
896  void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
897  void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
898    cmpps(dst, Operand(src), cmp);
899  }
900  void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
901  void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
902    cmppd(dst, Operand(src), cmp);
903  }
904
905// Packed floating-point comparison operations.
906#define PACKED_CMP_LIST(V) \
907  V(cmpeq, 0x0)            \
908  V(cmplt, 0x1)            \
909  V(cmple, 0x2)            \
910  V(cmpunord, 0x3)         \
911  V(cmpneq, 0x4)
912
913#define SSE_CMP_P(instr, imm8)                                            \
914  void instr##ps(XMMRegister dst, XMMRegister src) {                      \
915    cmpps(dst, Operand(src), imm8);                                       \
916  }                                                                       \
917  void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
918  void instr##pd(XMMRegister dst, XMMRegister src) {                      \
919    cmppd(dst, Operand(src), imm8);                                       \
920  }                                                                       \
921  void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
922
923  PACKED_CMP_LIST(SSE_CMP_P)
924#undef SSE_CMP_P
925
926  // SSE2 instructions
927  void cvttss2si(Register dst, Operand src);
928  void cvttss2si(Register dst, XMMRegister src) {
929    cvttss2si(dst, Operand(src));
930  }
931  void cvttsd2si(Register dst, Operand src);
932  void cvttsd2si(Register dst, XMMRegister src) {
933    cvttsd2si(dst, Operand(src));
934  }
935  void cvtsd2si(Register dst, XMMRegister src);
936
937  void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
938  void cvtsi2ss(XMMRegister dst, Operand src);
939  void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
940  void cvtsi2sd(XMMRegister dst, Operand src);
941  void cvtss2sd(XMMRegister dst, Operand src);
942  void cvtss2sd(XMMRegister dst, XMMRegister src) {
943    cvtss2sd(dst, Operand(src));
944  }
945  void cvtdq2pd(XMMRegister dst, XMMRegister src);
946  void cvtpd2ps(XMMRegister dst, XMMRegister src);
947  void cvttps2dq(XMMRegister dst, XMMRegister src) {
948    cvttps2dq(dst, Operand(src));
949  }
950  void cvttps2dq(XMMRegister dst, Operand src);
951  void cvttpd2dq(XMMRegister dst, XMMRegister src);
952
953  void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
954  void ucomisd(XMMRegister dst, Operand src);
955
956  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
957  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
958
959  void movapd(XMMRegister dst, XMMRegister src) { movapd(dst, Operand(src)); }
960  void movapd(XMMRegister dst, Operand src) {
961    sse2_instr(dst, src, 0x66, 0x0F, 0x28);
962  }
963  void movupd(XMMRegister dst, Operand src) {
964    sse2_instr(dst, src, 0x66, 0x0F, 0x10);
965  }
966
967  void movmskpd(Register dst, XMMRegister src);
968  void movmskps(Register dst, XMMRegister src);
969
970  void pmovmskb(Register dst, XMMRegister src);
971
972  void cmpltsd(XMMRegister dst, XMMRegister src);
973
974  void movdqa(XMMRegister dst, Operand src);
975  void movdqa(Operand dst, XMMRegister src);
976  void movdqa(XMMRegister dst, XMMRegister src);
977  void movdqu(XMMRegister dst, Operand src);
978  void movdqu(Operand dst, XMMRegister src);
979  void movdqu(XMMRegister dst, XMMRegister src);
980  void movdq(bool aligned, XMMRegister dst, Operand src) {
981    if (aligned) {
982      movdqa(dst, src);
983    } else {
984      movdqu(dst, src);
985    }
986  }
987
988  void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
989  void movd(XMMRegister dst, Operand src);
990  void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
991  void movd(Operand dst, XMMRegister src);
992  void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
993  void movsd(XMMRegister dst, Operand src);
994  void movsd(Operand dst, XMMRegister src);
995
996  void movss(XMMRegister dst, Operand src);
997  void movss(Operand dst, XMMRegister src);
998  void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
999
1000  void extractps(Operand dst, XMMRegister src, byte imm8);
1001  void extractps(Register dst, XMMRegister src, byte imm8);
1002
1003  void pcmpgtq(XMMRegister dst, XMMRegister src);
1004
1005  void psllw(XMMRegister reg, uint8_t shift);
1006  void pslld(XMMRegister reg, uint8_t shift);
1007  void psrlw(XMMRegister reg, uint8_t shift);
1008  void psrld(XMMRegister reg, uint8_t shift);
1009  void psraw(XMMRegister reg, uint8_t shift);
1010  void psrad(XMMRegister reg, uint8_t shift);
1011  void psllq(XMMRegister reg, uint8_t shift);
1012  void psrlq(XMMRegister reg, uint8_t shift);
1013
1014  void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1015    pshufhw(dst, Operand(src), shuffle);
1016  }
1017  void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1018  void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1019    pshuflw(dst, Operand(src), shuffle);
1020  }
1021  void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1022  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1023    pshufd(dst, Operand(src), shuffle);
1024  }
1025  void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1026
1027  void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
1028    pblendw(dst, Operand(src), mask);
1029  }
1030  void pblendw(XMMRegister dst, Operand src, uint8_t mask);
1031
1032  void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) {
1033    palignr(dst, Operand(src), mask);
1034  }
1035  void palignr(XMMRegister dst, Operand src, uint8_t mask);
1036
1037  void pextrb(Register dst, XMMRegister src, uint8_t offset) {
1038    pextrb(Operand(dst), src, offset);
1039  }
1040  void pextrb(Operand dst, XMMRegister src, uint8_t offset);
1041  // SSE3 instructions
1042  void movddup(XMMRegister dst, Operand src);
1043  void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); }
1044  void movshdup(XMMRegister dst, XMMRegister src);
1045
1046  // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
1047  void pextrw(Register dst, XMMRegister src, uint8_t offset) {
1048    pextrw(Operand(dst), src, offset);
1049  }
1050  void pextrw(Operand dst, XMMRegister src, uint8_t offset);
1051  void pextrd(Register dst, XMMRegister src, uint8_t offset) {
1052    pextrd(Operand(dst), src, offset);
1053  }
1054  void pextrd(Operand dst, XMMRegister src, uint8_t offset);
1055
1056  void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) {
1057    insertps(dst, Operand(src), offset);
1058  }
1059  void insertps(XMMRegister dst, Operand src, uint8_t offset);
1060  void pinsrb(XMMRegister dst, Register src, uint8_t offset) {
1061    pinsrb(dst, Operand(src), offset);
1062  }
1063  void pinsrb(XMMRegister dst, Operand src, uint8_t offset);
1064  void pinsrw(XMMRegister dst, Register src, uint8_t offset) {
1065    pinsrw(dst, Operand(src), offset);
1066  }
1067  void pinsrw(XMMRegister dst, Operand src, uint8_t offset);
1068  void pinsrd(XMMRegister dst, Register src, uint8_t offset) {
1069    pinsrd(dst, Operand(src), offset);
1070  }
1071  void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
1072
1073  void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1074  void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1075
1076  // AVX instructions
1077  void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1078    vaddss(dst, src1, Operand(src2));
1079  }
1080  void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) {
1081    vss(0x58, dst, src1, src2);
1082  }
1083  void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1084    vsubss(dst, src1, Operand(src2));
1085  }
1086  void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) {
1087    vss(0x5c, dst, src1, src2);
1088  }
1089  void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1090    vmulss(dst, src1, Operand(src2));
1091  }
1092  void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) {
1093    vss(0x59, dst, src1, src2);
1094  }
1095  void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1096    vdivss(dst, src1, Operand(src2));
1097  }
1098  void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) {
1099    vss(0x5e, dst, src1, src2);
1100  }
1101  void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1102    vmaxss(dst, src1, Operand(src2));
1103  }
1104  void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) {
1105    vss(0x5f, dst, src1, src2);
1106  }
1107  void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1108    vminss(dst, src1, Operand(src2));
1109  }
1110  void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
1111    vss(0x5d, dst, src1, src2);
1112  }
1113  void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1114    vsqrtss(dst, src1, Operand(src2));
1115  }
1116  void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
1117    vss(0x51, dst, src1, src2);
1118  }
1119  void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1120
1121  void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1122    vhaddps(dst, src1, Operand(src2));
1123  }
1124  void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
1125    vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
1126  }
1127  void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); }
1128  void vsqrtpd(XMMRegister dst, Operand src) {
1129    vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
1130  }
1131  void vmovss(Operand dst, XMMRegister src) {
1132    vinstr(0x11, src, xmm0, dst, kF3, k0F, kWIG);
1133  }
1134  void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1135    vinstr(0x10, dst, src1, src2, kF3, k0F, kWIG);
1136  }
1137  void vmovss(XMMRegister dst, Operand src) {
1138    vinstr(0x10, dst, xmm0, src, kF3, k0F, kWIG);
1139  }
1140  void vmovsd(Operand dst, XMMRegister src) {
1141    vinstr(0x11, src, xmm0, dst, kF2, k0F, kWIG);
1142  }
1143  void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1144    vinstr(0x10, dst, src1, src2, kF2, k0F, kWIG);
1145  }
1146  void vmovsd(XMMRegister dst, Operand src) {
1147    vinstr(0x10, dst, xmm0, src, kF2, k0F, kWIG);
1148  }
1149
1150  void vextractps(Operand dst, XMMRegister src, byte imm8);
1151
1152  void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1153
1154  void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
1155  void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
1156  void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
1157  void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
1158  void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
1159  void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
1160  void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
1161  void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
1162  void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1163    vshufps(dst, src1, Operand(src2), imm8);
1164  }
1165  void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1166  void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1167    vshufpd(dst, src1, Operand(src2), imm8);
1168  }
1169  void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1170
1171  void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1172  void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1173  void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1174  void vmovlps(Operand dst, XMMRegister src);
1175  void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1176  void vmovhps(Operand dst, XMMRegister src);
1177
1178  void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1179  void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1180  void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1181  void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1182  void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1183  void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1184  void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8);
1185  void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1186
1187  void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1188    vpshufhw(dst, Operand(src), shuffle);
1189  }
1190  void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1191  void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1192    vpshuflw(dst, Operand(src), shuffle);
1193  }
1194  void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1195  void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1196    vpshufd(dst, Operand(src), shuffle);
1197  }
1198  void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1199
1200  void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1201                 XMMRegister mask);
1202  void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1203                 XMMRegister mask);
1204  void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1205                 XMMRegister mask);
1206
1207  void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1208                uint8_t mask) {
1209    vpblendw(dst, src1, Operand(src2), mask);
1210  }
1211  void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1212
1213  void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1214                uint8_t mask) {
1215    vpalignr(dst, src1, Operand(src2), mask);
1216  }
1217  void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1218
1219  void vpextrb(Register dst, XMMRegister src, uint8_t offset) {
1220    vpextrb(Operand(dst), src, offset);
1221  }
1222  void vpextrb(Operand dst, XMMRegister src, uint8_t offset);
1223  void vpextrw(Register dst, XMMRegister src, uint8_t offset) {
1224    vpextrw(Operand(dst), src, offset);
1225  }
1226  void vpextrw(Operand dst, XMMRegister src, uint8_t offset);
1227  void vpextrd(Register dst, XMMRegister src, uint8_t offset) {
1228    vpextrd(Operand(dst), src, offset);
1229  }
1230  void vpextrd(Operand dst, XMMRegister src, uint8_t offset);
1231
1232  void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1233                 uint8_t offset) {
1234    vinsertps(dst, src1, Operand(src2), offset);
1235  }
1236  void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2,
1237                 uint8_t offset);
1238  void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
1239               uint8_t offset) {
1240    vpinsrb(dst, src1, Operand(src2), offset);
1241  }
1242  void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1243  void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2,
1244               uint8_t offset) {
1245    vpinsrw(dst, src1, Operand(src2), offset);
1246  }
1247  void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1248  void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
1249               uint8_t offset) {
1250    vpinsrd(dst, src1, Operand(src2), offset);
1251  }
1252  void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1253
1254  void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1255                RoundingMode mode);
1256  void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1257                RoundingMode mode);
1258  void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1259  void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1260
1261  void vcvtdq2pd(XMMRegister dst, XMMRegister src) {
1262    vinstr(0xE6, dst, xmm0, src, kF3, k0F, kWIG);
1263  }
1264  void vcvtpd2ps(XMMRegister dst, XMMRegister src) {
1265    vinstr(0x5A, dst, xmm0, src, k66, k0F, kWIG);
1266  }
1267  void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1268    vcvttps2dq(dst, Operand(src));
1269  }
1270  void vcvttps2dq(XMMRegister dst, Operand src) {
1271    vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
1272  }
1273  void vcvttpd2dq(XMMRegister dst, XMMRegister src) {
1274    vinstr(0xE6, dst, xmm0, src, k66, k0F, kWIG);
1275  }
1276  void vcvttsd2si(Register dst, XMMRegister src) {
1277    XMMRegister idst = XMMRegister::from_code(dst.code());
1278    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1279  }
1280  void vcvttsd2si(Register dst, Operand src) {
1281    XMMRegister idst = XMMRegister::from_code(dst.code());
1282    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1283  }
1284  void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1285    vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1286  }
1287  void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1288    vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1289  }
1290  void vcvttss2si(Register dst, XMMRegister src) {
1291    XMMRegister idst = XMMRegister::from_code(dst.code());
1292    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1293  }
1294  void vcvttss2si(Register dst, Operand src) {
1295    XMMRegister idst = XMMRegister::from_code(dst.code());
1296    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1297  }
1298
1299  void vmovddup(XMMRegister dst, Operand src) {
1300    vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
1301  }
1302  void vmovddup(XMMRegister dst, XMMRegister src) {
1303    vmovddup(dst, Operand(src));
1304  }
1305  void vmovshdup(XMMRegister dst, XMMRegister src) {
1306    vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG);
1307  }
1308  void vbroadcastss(XMMRegister dst, XMMRegister src) {
1309    vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2);
1310  }
1311  void vbroadcastss(XMMRegister dst, Operand src) {
1312    vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
1313  }
1314  void vmovdqa(XMMRegister dst, Operand src) {
1315    vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1316  }
1317  void vmovdqa(XMMRegister dst, XMMRegister src) {
1318    vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1319  }
1320  void vmovdqu(XMMRegister dst, Operand src) {
1321    vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
1322  }
1323  void vmovdqu(Operand dst, XMMRegister src) {
1324    vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
1325  }
1326  void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
1327  void vmovd(XMMRegister dst, Operand src) {
1328    vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
1329  }
1330  void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
1331  void vmovd(Operand dst, XMMRegister src) {
1332    vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
1333  }
1334
1335  void vmovmskpd(Register dst, XMMRegister src);
1336  void vmovmskps(Register dst, XMMRegister src);
1337
1338  void vpmovmskb(Register dst, XMMRegister src);
1339
1340  void vucomisd(XMMRegister dst, XMMRegister src) {
1341    vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1342  }
1343  void vucomisd(XMMRegister dst, Operand src) {
1344    vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1345  }
1346  void vucomiss(XMMRegister dst, XMMRegister src) {
1347    vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1348  }
1349  void vucomiss(XMMRegister dst, Operand src) {
1350    vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1351  }
1352
1353  // BMI instruction
1354  void andn(Register dst, Register src1, Register src2) {
1355    andn(dst, src1, Operand(src2));
1356  }
1357  void andn(Register dst, Register src1, Operand src2) {
1358    bmi1(0xf2, dst, src1, src2);
1359  }
1360  void bextr(Register dst, Register src1, Register src2) {
1361    bextr(dst, Operand(src1), src2);
1362  }
1363  void bextr(Register dst, Operand src1, Register src2) {
1364    bmi1(0xf7, dst, src2, src1);
1365  }
1366  void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
1367  void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); }
1368  void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
1369  void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); }
1370  void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
1371  void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); }
1372  void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
1373  void tzcnt(Register dst, Operand src);
1374
1375  void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
1376  void lzcnt(Register dst, Operand src);
1377
1378  void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
1379  void popcnt(Register dst, Operand src);
1380
1381  void bzhi(Register dst, Register src1, Register src2) {
1382    bzhi(dst, Operand(src1), src2);
1383  }
1384  void bzhi(Register dst, Operand src1, Register src2) {
1385    bmi2(kNoPrefix, 0xf5, dst, src2, src1);
1386  }
1387  void mulx(Register dst1, Register dst2, Register src) {
1388    mulx(dst1, dst2, Operand(src));
1389  }
1390  void mulx(Register dst1, Register dst2, Operand src) {
1391    bmi2(kF2, 0xf6, dst1, dst2, src);
1392  }
1393  void pdep(Register dst, Register src1, Register src2) {
1394    pdep(dst, src1, Operand(src2));
1395  }
1396  void pdep(Register dst, Register src1, Operand src2) {
1397    bmi2(kF2, 0xf5, dst, src1, src2);
1398  }
1399  void pext(Register dst, Register src1, Register src2) {
1400    pext(dst, src1, Operand(src2));
1401  }
1402  void pext(Register dst, Register src1, Operand src2) {
1403    bmi2(kF3, 0xf5, dst, src1, src2);
1404  }
1405  void sarx(Register dst, Register src1, Register src2) {
1406    sarx(dst, Operand(src1), src2);
1407  }
1408  void sarx(Register dst, Operand src1, Register src2) {
1409    bmi2(kF3, 0xf7, dst, src2, src1);
1410  }
1411  void shlx(Register dst, Register src1, Register src2) {
1412    shlx(dst, Operand(src1), src2);
1413  }
1414  void shlx(Register dst, Operand src1, Register src2) {
1415    bmi2(k66, 0xf7, dst, src2, src1);
1416  }
1417  void shrx(Register dst, Register src1, Register src2) {
1418    shrx(dst, Operand(src1), src2);
1419  }
1420  void shrx(Register dst, Operand src1, Register src2) {
1421    bmi2(kF2, 0xf7, dst, src2, src1);
1422  }
1423  void rorx(Register dst, Register src, byte imm8) {
1424    rorx(dst, Operand(src), imm8);
1425  }
1426  void rorx(Register dst, Operand src, byte imm8);
1427
1428  // Implementation of packed single-precision floating-point SSE instructions.
1429  void ps(byte op, XMMRegister dst, Operand src);
1430  // Implementation of packed double-precision floating-point SSE instructions.
1431  void pd(byte op, XMMRegister dst, Operand src);
1432
1433#define PACKED_OP_LIST(V) \
1434  V(unpckl, 0x14)         \
1435  V(and, 0x54)            \
1436  V(andn, 0x55)           \
1437  V(or, 0x56)             \
1438  V(xor, 0x57)            \
1439  V(add, 0x58)            \
1440  V(mul, 0x59)            \
1441  V(sub, 0x5c)            \
1442  V(min, 0x5d)            \
1443  V(div, 0x5e)            \
1444  V(max, 0x5f)
1445
1446#define SSE_PACKED_OP_DECLARE(name, opcode)                             \
1447  void name##ps(XMMRegister dst, XMMRegister src) {                     \
1448    ps(opcode, dst, Operand(src));                                      \
1449  }                                                                     \
1450  void name##ps(XMMRegister dst, Operand src) { ps(opcode, dst, src); } \
1451  void name##pd(XMMRegister dst, XMMRegister src) {                     \
1452    pd(opcode, dst, Operand(src));                                      \
1453  }                                                                     \
1454  void name##pd(XMMRegister dst, Operand src) { pd(opcode, dst, src); }
1455
1456  PACKED_OP_LIST(SSE_PACKED_OP_DECLARE)
1457#undef SSE_PACKED_OP_DECLARE
1458
1459#define AVX_PACKED_OP_DECLARE(name, opcode)                               \
1460  void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1461    vps(opcode, dst, src1, Operand(src2));                                \
1462  }                                                                       \
1463  void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1464    vps(opcode, dst, src1, src2);                                         \
1465  }                                                                       \
1466  void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1467    vpd(opcode, dst, src1, Operand(src2));                                \
1468  }                                                                       \
1469  void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1470    vpd(opcode, dst, src1, src2);                                         \
1471  }
1472
1473  PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
1474#undef AVX_PACKED_OP_DECLARE
1475#undef PACKED_OP_LIST
1476
1477  void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1478  void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1479
1480  void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1481  void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1482
1483#define AVX_CMP_P(instr, imm8)                                             \
1484  void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1485    vcmpps(dst, src1, Operand(src2), imm8);                                \
1486  }                                                                        \
1487  void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1488    vcmpps(dst, src1, src2, imm8);                                         \
1489  }                                                                        \
1490  void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1491    vcmppd(dst, src1, Operand(src2), imm8);                                \
1492  }                                                                        \
1493  void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1494    vcmppd(dst, src1, src2, imm8);                                         \
1495  }
1496
1497  PACKED_CMP_LIST(AVX_CMP_P)
1498  // vcmpgeps/vcmpgepd only in AVX.
1499  AVX_CMP_P(cmpge, 0xd)
1500#undef AVX_CMP_P
1501#undef PACKED_CMP_LIST
1502
1503// Other SSE and AVX instructions
1504#define DECLARE_SSE_UNOP_AND_AVX(instruction, escape, opcode)       \
1505  void instruction(XMMRegister dst, XMMRegister src) {              \
1506    instruction(dst, Operand(src));                                 \
1507  }                                                                 \
1508  void instruction(XMMRegister dst, Operand src) {                  \
1509    sse_instr(dst, src, 0x##escape, 0x##opcode);                    \
1510  }                                                                 \
1511  void v##instruction(XMMRegister dst, XMMRegister src) {           \
1512    v##instruction(dst, Operand(src));                              \
1513  }                                                                 \
1514  void v##instruction(XMMRegister dst, Operand src) {               \
1515    vinstr(0x##opcode, dst, xmm0, src, kNoPrefix, k##escape, kWIG); \
1516  }
1517
1518  SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_UNOP_AND_AVX)
1519#undef DECLARE_SSE_UNOP_AND_AVX
1520
1521#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1522  void instruction(XMMRegister dst, XMMRegister src) {                \
1523    instruction(dst, Operand(src));                                   \
1524  }                                                                   \
1525  void instruction(XMMRegister dst, Operand src) {                    \
1526    sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
1527  }
1528
1529  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1530  SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
1531#undef DECLARE_SSE2_INSTRUCTION
1532
1533#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)    \
1534  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1535    v##instruction(dst, src1, Operand(src2));                                \
1536  }                                                                          \
1537  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1538    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
1539  }
1540
1541  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1542  SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_AVX_INSTRUCTION)
1543#undef DECLARE_SSE2_AVX_INSTRUCTION
1544
1545#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1546                                  opcode)                                    \
1547  void instruction(XMMRegister dst, XMMRegister src) {                       \
1548    instruction(dst, Operand(src));                                          \
1549  }                                                                          \
1550  void instruction(XMMRegister dst, Operand src) {                           \
1551    ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1552  }
1553
1554  SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1555  SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1556#undef DECLARE_SSSE3_INSTRUCTION
1557
1558#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1559                                 opcode)                                    \
1560  void instruction(XMMRegister dst, XMMRegister src) {                      \
1561    instruction(dst, Operand(src));                                         \
1562  }                                                                         \
1563  void instruction(XMMRegister dst, Operand src) {                          \
1564    sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1565  }
1566
1567  SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1568  SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1569  DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1570  DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1571  DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1572#undef DECLARE_SSE4_INSTRUCTION
1573
1574#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2,  \
1575                                      opcode)                                 \
1576  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) {  \
1577    v##instruction(dst, src1, Operand(src2));                                 \
1578  }                                                                           \
1579  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {      \
1580    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1581  }
1582
1583  SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1584  SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1585#undef DECLARE_SSE34_AVX_INSTRUCTION
1586
1587#define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
1588                                        opcode)                                \
1589  void v##instruction(XMMRegister dst, XMMRegister src) {                      \
1590    v##instruction(dst, Operand(src));                                         \
1591  }                                                                            \
1592  void v##instruction(XMMRegister dst, Operand src) {                          \
1593    vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0);   \
1594  }
1595
1596  SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1597  SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1598#undef DECLARE_SSE4_AVX_RM_INSTRUCTION
1599
1600  // AVX2 instructions
1601#define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode)           \
1602  void instr(XMMRegister dst, XMMRegister src) {                            \
1603    vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1604           AVX2);                                                           \
1605  }                                                                         \
1606  void instr(XMMRegister dst, Operand src) {                                \
1607    vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1608           AVX2);                                                           \
1609  }
1610  AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)
1611#undef AVX2_INSTRUCTION
1612
1613#define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1614  void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {     \
1615    vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix,           \
1616           k##escape1##escape2, k##extension, FMA3);                    \
1617  }                                                                     \
1618  void instr(XMMRegister dst, XMMRegister src1, Operand src2) {         \
1619    vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix,           \
1620           k##escape1##escape2, k##extension, FMA3);                    \
1621  }
1622  FMA_INSTRUCTION_LIST(FMA)
1623#undef FMA
1624
1625  // Prefetch src position into cache level.
1626  // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
1627  // non-temporal
1628  void prefetch(Operand src, int level);
1629  // TODO(lrn): Need SFENCE for movnt?
1630
1631  // Check the code size generated from label to here.
1632  int SizeOfCodeGeneratedSince(Label* label) {
1633    return pc_offset() - label->pos();
1634  }
1635
1636  // Record a deoptimization reason that can be used by a log or cpu profiler.
1637  // Use --trace-deopt to enable.
1638  void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
1639                         SourcePosition position, int id);
1640
1641  // Writes a single byte or word of data in the code stream.  Used for
1642  // inline tables, e.g., jump-tables.
1643  void db(uint8_t data);
1644  void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1645  void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1646  void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
1647    dd(data, rmode);
1648  }
1649  void dd(Label* label);
1650
1651  // Check if there is less than kGap bytes available in the buffer.
1652  // If this is the case, we need to grow the buffer before emitting
1653  // an instruction or relocation information.
1654  inline bool buffer_overflow() const {
1655    return pc_ >= reloc_info_writer.pos() - kGap;
1656  }
1657
1658  // Get the number of bytes available in the buffer.
1659  inline int available_space() const { return reloc_info_writer.pos() - pc_; }
1660
1661  static bool IsNop(Address addr);
1662
1663  int relocation_writer_size() {
1664    return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos();
1665  }
1666
1667  // Avoid overflows for displacements etc.
1668  static constexpr int kMaximalBufferSize = 512 * MB;
1669
1670  byte byte_at(int pos) { return buffer_start_[pos]; }
1671  void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
1672
1673 protected:
1674  void emit_sse_operand(XMMRegister reg, Operand adr);
1675  void emit_sse_operand(XMMRegister dst, XMMRegister src);
1676  void emit_sse_operand(Register dst, XMMRegister src);
1677  void emit_sse_operand(XMMRegister dst, Register src);
1678
1679  Address addr_at(int pos) {
1680    return reinterpret_cast<Address>(buffer_start_ + pos);
1681  }
1682
1683 private:
1684  uint32_t long_at(int pos) {
1685    return ReadUnalignedValue<uint32_t>(addr_at(pos));
1686  }
1687  void long_at_put(int pos, uint32_t x) {
1688    WriteUnalignedValue(addr_at(pos), x);
1689  }
1690
1691  // code emission
1692  void GrowBuffer();
1693  inline void emit(uint32_t x);
1694  inline void emit(Handle<HeapObject> handle);
1695  inline void emit(uint32_t x, RelocInfo::Mode rmode);
1696  inline void emit(Handle<Code> code, RelocInfo::Mode rmode);
1697  inline void emit(const Immediate& x);
1698  inline void emit_b(Immediate x);
1699  inline void emit_w(const Immediate& x);
1700  inline void emit_q(uint64_t x);
1701
1702  // Emit the code-object-relative offset of the label's position
1703  inline void emit_code_relative_offset(Label* label);
1704
1705  // instruction generation
1706  void emit_arith_b(int op1, int op2, Register dst, int imm8);
1707
1708  // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
1709  // with a given destination expression and an immediate operand.  It attempts
1710  // to use the shortest encoding possible.
1711  // sel specifies the /n in the modrm byte (see the Intel PRM).
1712  void emit_arith(int sel, Operand dst, const Immediate& x);
1713
1714  void emit_operand(int code, Operand adr);
1715  void emit_operand(Register reg, Operand adr);
1716  void emit_operand(XMMRegister reg, Operand adr);
1717
1718  void emit_label(Label* label);
1719
1720  void emit_farith(int b1, int b2, int i);
1721
1722  // Emit vex prefix
1723  enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1724  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1725  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1726  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1727  inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
1728                              LeadingOpcode m, VexW w);
1729  inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
1730                              LeadingOpcode m, VexW w);
1731
1732  // labels
1733  void print(const Label* L);
1734  void bind_to(Label* L, int pos);
1735
1736  // displacements
1737  inline Displacement disp_at(Label* L);
1738  inline void disp_at_put(Label* L, Displacement disp);
1739  inline void emit_disp(Label* L, Displacement::Type type);
1740  inline void emit_near_disp(Label* L);
1741
1742  void sse_instr(XMMRegister dst, Operand src, byte prefix, byte opcode);
1743  void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1744                  byte opcode);
1745  void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1746                   byte escape2, byte opcode);
1747  void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1748                  byte escape2, byte opcode);
1749  void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1750              SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1751  void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1752              SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1753  void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1754              VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1755              CpuFeature = AVX);
1756  void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1757              VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1758              CpuFeature = AVX);
1759  // Most BMI instructions are similar.
1760  void bmi1(byte op, Register reg, Register vreg, Operand rm);
1761  void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
1762  void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1763                 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1764  void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1765                 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1766
1767  // record reloc info for current pc_
1768  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1769
1770  // record the position of jmp/jcc instruction
1771  void record_farjmp_position(Label* L, int pos);
1772
1773  bool is_optimizable_farjmp(int idx);
1774
1775  void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
1776
1777  int WriteCodeComments();
1778
1779  friend class EnsureSpace;
1780
1781  // Internal reference positions, required for (potential) patching in
1782  // GrowBuffer(); contains only those internal references whose labels
1783  // are already bound.
1784  std::deque<int> internal_reference_positions_;
1785
1786  // code generation
1787  RelocInfoWriter reloc_info_writer;
1788
1789  // Variables for this instance of assembler
1790  int farjmp_num_ = 0;
1791  std::deque<int> farjmp_positions_;
1792  std::map<Label*, std::vector<int>> label_farjmp_maps_;
1793};
1794
1795// Helper class that ensures that there is enough space for generating
1796// instructions and relocation information.  The constructor makes
1797// sure that there is enough space and (in debug mode) the destructor
1798// checks that we did not generate too much.
1799class EnsureSpace {
1800 public:
1801  explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1802    if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
1803#ifdef DEBUG
1804    space_before_ = assembler->available_space();
1805#endif
1806  }
1807
1808#ifdef DEBUG
1809  ~EnsureSpace() {
1810    int bytes_generated = space_before_ - assembler_->available_space();
1811    DCHECK(bytes_generated < assembler_->kGap);
1812  }
1813#endif
1814
1815 private:
1816  Assembler* const assembler_;
1817#ifdef DEBUG
1818  int space_before_;
1819#endif
1820};
1821
1822}  // namespace internal
1823}  // namespace v8
1824
1825#endif  // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
1826