diff options
Diffstat (limited to 'deps/v8/src/codegen/ia32/assembler-ia32.h')
-rw-r--r-- | deps/v8/src/codegen/ia32/assembler-ia32.h | 1795 |
1 files changed, 1795 insertions, 0 deletions
diff --git a/deps/v8/src/codegen/ia32/assembler-ia32.h b/deps/v8/src/codegen/ia32/assembler-ia32.h new file mode 100644 index 0000000000..d2dcb0f348 --- /dev/null +++ b/deps/v8/src/codegen/ia32/assembler-ia32.h @@ -0,0 +1,1795 @@ +// Copyright (c) 1994-2006 Sun Microsystems Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistribution in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of Sun Microsystems or the names of contributors may +// be used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The original source code covered by the above license above has been +// modified significantly by Google Inc. +// Copyright 2011 the V8 project authors. All rights reserved. + +// A light-weight IA32 Assembler. + +#ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ +#define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ + +#include <deque> + +#include "src/codegen/assembler.h" +#include "src/codegen/ia32/constants-ia32.h" +#include "src/codegen/ia32/register-ia32.h" +#include "src/codegen/ia32/sse-instr.h" +#include "src/codegen/label.h" +#include "src/execution/isolate.h" +#include "src/objects/smi.h" +#include "src/utils/utils.h" + +namespace v8 { +namespace internal { + +class SafepointTableBuilder; + +enum Condition { + // any value < 0 is considered no_condition + no_condition = -1, + + overflow = 0, + no_overflow = 1, + below = 2, + above_equal = 3, + equal = 4, + not_equal = 5, + below_equal = 6, + above = 7, + negative = 8, + positive = 9, + parity_even = 10, + parity_odd = 11, + less = 12, + greater_equal = 13, + less_equal = 14, + greater = 15, + + // aliases + carry = below, + not_carry = above_equal, + zero = equal, + not_zero = not_equal, + sign = negative, + not_sign = positive +}; + +// Returns the equivalent of !cc. +// Negation of the default no_condition (-1) results in a non-default +// no_condition value (-2). As long as tests for no_condition check +// for condition < 0, this will work as expected. +inline Condition NegateCondition(Condition cc) { + return static_cast<Condition>(cc ^ 1); +} + +enum RoundingMode { + kRoundToNearest = 0x0, + kRoundDown = 0x1, + kRoundUp = 0x2, + kRoundToZero = 0x3 +}; + +// ----------------------------------------------------------------------------- +// Machine instruction Immediates + +class Immediate { + public: + // Calls where x is an Address (uintptr_t) resolve to this overload. + inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NONE) { + value_.immediate = x; + rmode_ = rmode; + } + inline explicit Immediate(const ExternalReference& ext) + : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {} + inline explicit Immediate(Handle<HeapObject> handle) + : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {} + inline explicit Immediate(Smi value) + : Immediate(static_cast<intptr_t>(value.ptr())) {} + + static Immediate EmbeddedNumber(double number); // Smi or HeapNumber. + static Immediate EmbeddedStringConstant(const StringConstantBase* str); + + static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); } + + bool is_heap_object_request() const { + DCHECK_IMPLIES(is_heap_object_request_, + rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT || + rmode_ == RelocInfo::CODE_TARGET); + return is_heap_object_request_; + } + + HeapObjectRequest heap_object_request() const { + DCHECK(is_heap_object_request()); + return value_.heap_object_request; + } + + int immediate() const { + DCHECK(!is_heap_object_request()); + return value_.immediate; + } + + bool is_embedded_object() const { + return !is_heap_object_request() && + rmode() == RelocInfo::FULL_EMBEDDED_OBJECT; + } + + Handle<HeapObject> embedded_object() const { + return Handle<HeapObject>(reinterpret_cast<Address*>(immediate())); + } + + bool is_external_reference() const { + return rmode() == RelocInfo::EXTERNAL_REFERENCE; + } + + ExternalReference external_reference() const { + DCHECK(is_external_reference()); + return bit_cast<ExternalReference>(immediate()); + } + + bool is_zero() const { return RelocInfo::IsNone(rmode_) && immediate() == 0; } + bool is_int8() const { + return RelocInfo::IsNone(rmode_) && i::is_int8(immediate()); + } + bool is_uint8() const { + return RelocInfo::IsNone(rmode_) && i::is_uint8(immediate()); + } + bool is_int16() const { + return RelocInfo::IsNone(rmode_) && i::is_int16(immediate()); + } + + bool is_uint16() const { + return RelocInfo::IsNone(rmode_) && i::is_uint16(immediate()); + } + + RelocInfo::Mode rmode() const { return rmode_; } + + private: + inline explicit Immediate(Label* value) { + value_.immediate = reinterpret_cast<int32_t>(value); + rmode_ = RelocInfo::INTERNAL_REFERENCE; + } + + union Value { + Value() {} + HeapObjectRequest heap_object_request; + int immediate; + } value_; + bool is_heap_object_request_ = false; + RelocInfo::Mode rmode_; + + friend class Operand; + friend class Assembler; + friend class MacroAssembler; +}; + +// ----------------------------------------------------------------------------- +// Machine instruction Operands + +enum ScaleFactor { + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_int_size = times_4, + + times_half_system_pointer_size = times_2, + times_system_pointer_size = times_4, + + times_tagged_size = times_4, +}; + +class V8_EXPORT_PRIVATE Operand { + public: + // reg + V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); } + + // XMM reg + V8_INLINE explicit Operand(XMMRegister xmm_reg) { + Register reg = Register::from_code(xmm_reg.code()); + set_modrm(3, reg); + } + + // [disp/r] + V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) { + set_modrm(0, ebp); + set_dispr(disp, rmode); + } + + // [disp/r] + V8_INLINE explicit Operand(Immediate imm) { + set_modrm(0, ebp); + set_dispr(imm.immediate(), imm.rmode_); + } + + // [base + disp/r] + explicit Operand(Register base, int32_t disp, + RelocInfo::Mode rmode = RelocInfo::NONE); + + // [base + index*scale + disp/r] + explicit Operand(Register base, Register index, ScaleFactor scale, + int32_t disp, RelocInfo::Mode rmode = RelocInfo::NONE); + + // [index*scale + disp/r] + explicit Operand(Register index, ScaleFactor scale, int32_t disp, + RelocInfo::Mode rmode = RelocInfo::NONE); + + static Operand JumpTable(Register index, ScaleFactor scale, Label* table) { + return Operand(index, scale, reinterpret_cast<int32_t>(table), + RelocInfo::INTERNAL_REFERENCE); + } + + static Operand ForRegisterPlusImmediate(Register base, Immediate imm) { + return Operand(base, imm.value_.immediate, imm.rmode_); + } + + // Returns true if this Operand is a wrapper for the specified register. + bool is_reg(Register reg) const { return is_reg(reg.code()); } + bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); } + + // Returns true if this Operand is a wrapper for one register. + bool is_reg_only() const; + + // Asserts that this Operand is a wrapper for one register and returns the + // register. + Register reg() const; + + private: + // Set the ModRM byte without an encoded 'reg' register. The + // register is encoded later as part of the emit_operand operation. + inline void set_modrm(int mod, Register rm) { + DCHECK_EQ(mod & -4, 0); + buf_[0] = mod << 6 | rm.code(); + len_ = 1; + } + + inline void set_sib(ScaleFactor scale, Register index, Register base); + inline void set_disp8(int8_t disp); + inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) { + DCHECK(len_ == 1 || len_ == 2); + Address p = reinterpret_cast<Address>(&buf_[len_]); + WriteUnalignedValue(p, disp); + len_ += sizeof(int32_t); + rmode_ = rmode; + } + + inline bool is_reg(int reg_code) const { + return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only. + && ((buf_[0] & 0x07) == reg_code); // register codes match. + } + + byte buf_[6]; + // The number of bytes in buf_. + uint8_t len_ = 0; + // Only valid if len_ > 4. + RelocInfo::Mode rmode_ = RelocInfo::NONE; + + // TODO(clemensh): Get rid of this friendship, or make Operand immutable. + friend class Assembler; +}; +ASSERT_TRIVIALLY_COPYABLE(Operand); +static_assert(sizeof(Operand) <= 2 * kSystemPointerSize, + "Operand must be small enough to pass it by value"); + +// ----------------------------------------------------------------------------- +// A Displacement describes the 32bit immediate field of an instruction which +// may be used together with a Label in order to refer to a yet unknown code +// position. Displacements stored in the instruction stream are used to describe +// the instruction and to chain a list of instructions using the same Label. +// A Displacement contains 2 different fields: +// +// next field: position of next displacement in the chain (0 = end of list) +// type field: instruction type +// +// A next value of null (0) indicates the end of a chain (note that there can +// be no displacement at position zero, because there is always at least one +// instruction byte before the displacement). +// +// Displacement _data field layout +// +// |31.....2|1......0| +// [ next | type | + +class Displacement { + public: + enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE }; + + int data() const { return data_; } + Type type() const { return TypeField::decode(data_); } + void next(Label* L) const { + int n = NextField::decode(data_); + n > 0 ? L->link_to(n) : L->Unuse(); + } + void link_to(Label* L) { init(L, type()); } + + explicit Displacement(int data) { data_ = data; } + + Displacement(Label* L, Type type) { init(L, type); } + + void print() { + PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"), + NextField::decode(data_)); + } + + private: + int data_; + + class TypeField : public BitField<Type, 0, 2> {}; + class NextField : public BitField<int, 2, 32 - 2> {}; + + void init(Label* L, Type type); +}; + +class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { + private: + // We check before assembling an instruction that there is sufficient + // space to write an instruction and its relocation information. + // The relocation writer's position must be kGap bytes above the end of + // the generated instructions. This leaves enough space for the + // longest possible ia32 instruction, 15 bytes, and the longest possible + // relocation information encoding, RelocInfoWriter::kMaxLength == 16. + // (There is a 15 byte limit on ia32 instruction length that rules out some + // otherwise valid instructions.) + // This allows for a single, fast space check per instruction. + static constexpr int kGap = 32; + + public: + // Create an assembler. Instructions and relocation information are emitted + // into a buffer, with the instructions starting from the beginning and the + // relocation information starting from the end of the buffer. See CodeDesc + // for a detailed comment on the layout (globals.h). + // + // If the provided buffer is nullptr, the assembler allocates and grows its + // own buffer. Otherwise it takes ownership of the provided buffer. + explicit Assembler(const AssemblerOptions&, + std::unique_ptr<AssemblerBuffer> = {}); + virtual ~Assembler() {} + + // GetCode emits any pending (non-emitted) code and fills the descriptor desc. + static constexpr int kNoHandlerTable = 0; + static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; + void GetCode(Isolate* isolate, CodeDesc* desc, + SafepointTableBuilder* safepoint_table_builder, + int handler_table_offset); + + // Convenience wrapper for code without safepoint or handler tables. + void GetCode(Isolate* isolate, CodeDesc* desc) { + GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); + } + + void FinalizeJumpOptimizationInfo(); + + // Unused on this architecture. + void MaybeEmitOutOfLineConstantPool() {} + + // Read/Modify the code target in the branch/call instruction at pc. + // The isolate argument is unused (and may be nullptr) when skipping flushing. + inline static Address target_address_at(Address pc, Address constant_pool); + inline static void set_target_address_at( + Address pc, Address constant_pool, Address target, + ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); + + // This sets the branch destination (which is in the instruction on x86). + // This is for calls and branches within generated code. + inline static void deserialization_set_special_target_at( + Address instruction_payload, Code code, Address target); + + // Get the size of the special target encoded at 'instruction_payload'. + inline static int deserialization_special_target_size( + Address instruction_payload); + + // This sets the internal reference at the pc. + inline static void deserialization_set_target_internal_reference_at( + Address pc, Address target, + RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); + + static constexpr int kSpecialTargetSize = kSystemPointerSize; + + // One byte opcode for test al, 0xXX. + static constexpr byte kTestAlByte = 0xA8; + // One byte opcode for nop. + static constexpr byte kNopByte = 0x90; + + // One byte opcode for a short unconditional jump. + static constexpr byte kJmpShortOpcode = 0xEB; + // One byte prefix for a short conditional jump. + static constexpr byte kJccShortPrefix = 0x70; + static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry; + static constexpr byte kJcShortOpcode = kJccShortPrefix | carry; + static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero; + static constexpr byte kJzShortOpcode = kJccShortPrefix | zero; + + // --------------------------------------------------------------------------- + // Code generation + // + // - function names correspond one-to-one to ia32 instruction mnemonics + // - unless specified otherwise, instructions operate on 32bit operands + // - instructions on 8bit (byte) operands/registers have a trailing '_b' + // - instructions on 16bit (word) operands/registers have a trailing '_w' + // - naming conflicts with C++ keywords are resolved via a trailing '_' + + // NOTE ON INTERFACE: Currently, the interface is not very consistent + // in the sense that some operations (e.g. mov()) can be called in more + // the one way to generate the same instruction: The Register argument + // can in some cases be replaced with an Operand(Register) argument. + // This should be cleaned up and made more orthogonal. The questions + // is: should we always use Operands instead of Registers where an + // Operand is possible, or should we have a Register (overloaded) form + // instead? We must be careful to make sure that the selected instruction + // is obvious from the parameters to avoid hard-to-find code generation + // bugs. + + // Insert the smallest number of nop instructions + // possible to align the pc offset to a multiple + // of m. m must be a power of 2. + void Align(int m); + // Insert the smallest number of zero bytes possible to align the pc offset + // to a mulitple of m. m must be a power of 2 (>= 2). + void DataAlign(int m); + void Nop(int bytes = 1); + // Aligns code to something that's optimal for a jump target for the platform. + void CodeTargetAlign(); + + // Stack + void pushad(); + void popad(); + + void pushfd(); + void popfd(); + + void push(const Immediate& x); + void push_imm32(int32_t imm32); + void push(Register src); + void push(Operand src); + + void pop(Register dst); + void pop(Operand dst); + + void enter(const Immediate& size); + void leave(); + + // Moves + void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); } + void mov_b(Register dst, Operand src); + void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); } + void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); } + void mov_b(Operand dst, const Immediate& src); + void mov_b(Operand dst, Register src); + + void mov_w(Register dst, Operand src); + void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); } + void mov_w(Operand dst, const Immediate& src); + void mov_w(Operand dst, Register src); + + void mov(Register dst, int32_t imm32); + void mov(Register dst, const Immediate& x); + void mov(Register dst, Handle<HeapObject> handle); + void mov(Register dst, Operand src); + void mov(Register dst, Register src); + void mov(Operand dst, const Immediate& x); + void mov(Operand dst, Handle<HeapObject> handle); + void mov(Operand dst, Register src); + void mov(Operand dst, Address src, RelocInfo::Mode); + + void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); } + void movsx_b(Register dst, Operand src); + + void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); } + void movsx_w(Register dst, Operand src); + + void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); } + void movzx_b(Register dst, Operand src); + + void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); } + void movzx_w(Register dst, Operand src); + + void movq(XMMRegister dst, Operand src); + // Conditional moves + void cmov(Condition cc, Register dst, Register src) { + cmov(cc, dst, Operand(src)); + } + void cmov(Condition cc, Register dst, Operand src); + + // Flag management. + void cld(); + + // Repetitive string instructions. + void rep_movs(); + void rep_stos(); + void stos(); + + // Exchange + void xchg(Register dst, Register src); + void xchg(Register dst, Operand src); + void xchg_b(Register reg, Operand op); + void xchg_w(Register reg, Operand op); + + // Lock prefix + void lock(); + + // CompareExchange + void cmpxchg(Operand dst, Register src); + void cmpxchg_b(Operand dst, Register src); + void cmpxchg_w(Operand dst, Register src); + void cmpxchg8b(Operand dst); + + // Memory Fence + void lfence(); + + void pause(); + + // Arithmetics + void adc(Register dst, int32_t imm32); + void adc(Register dst, Register src) { adc(dst, Operand(src)); } + void adc(Register dst, Operand src); + + void add(Register dst, Register src) { add(dst, Operand(src)); } + void add(Register dst, Operand src); + void add(Operand dst, Register src); + void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); } + void add(Operand dst, const Immediate& x); + + void and_(Register dst, int32_t imm32); + void and_(Register dst, const Immediate& x); + void and_(Register dst, Register src) { and_(dst, Operand(src)); } + void and_(Register dst, Operand src); + void and_(Operand dst, Register src); + void and_(Operand dst, const Immediate& x); + + void cmpb(Register reg, Immediate imm8) { + DCHECK(reg.is_byte_register()); + cmpb(Operand(reg), imm8); + } + void cmpb(Operand op, Immediate imm8); + void cmpb(Register reg, Operand op); + void cmpb(Operand op, Register reg); + void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); } + void cmpb_al(Operand op); + void cmpw_ax(Operand op); + void cmpw(Operand dst, Immediate src); + void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); } + void cmpw(Register dst, Operand src); + void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); } + void cmpw(Operand dst, Register src); + void cmp(Register reg, int32_t imm32); + void cmp(Register reg, Handle<HeapObject> handle); + void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); } + void cmp(Register reg, Operand op); + void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); } + void cmp(Operand op, Register reg); + void cmp(Operand op, const Immediate& imm); + void cmp(Operand op, Handle<HeapObject> handle); + + void dec_b(Register dst); + void dec_b(Operand dst); + + void dec(Register dst); + void dec(Operand dst); + + void cdq(); + + void idiv(Register src) { idiv(Operand(src)); } + void idiv(Operand src); + void div(Register src) { div(Operand(src)); } + void div(Operand src); + + // Signed multiply instructions. + void imul(Register src); // edx:eax = eax * src. + void imul(Register dst, Register src) { imul(dst, Operand(src)); } + void imul(Register dst, Operand src); // dst = dst * src. + void imul(Register dst, Register src, int32_t imm32); // dst = src * imm32. + void imul(Register dst, Operand src, int32_t imm32); + + void inc(Register dst); + void inc(Operand dst); + + void lea(Register dst, Operand src); + + // Unsigned multiply instruction. + void mul(Register src); // edx:eax = eax * reg. + + void neg(Register dst); + void neg(Operand dst); + + void not_(Register dst); + void not_(Operand dst); + + void or_(Register dst, int32_t imm32); + void or_(Register dst, Register src) { or_(dst, Operand(src)); } + void or_(Register dst, Operand src); + void or_(Operand dst, Register src); + void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); } + void or_(Operand dst, const Immediate& x); + + void rcl(Register dst, uint8_t imm8); + void rcr(Register dst, uint8_t imm8); + + void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); } + void ror(Operand dst, uint8_t imm8); + void ror_cl(Register dst) { ror_cl(Operand(dst)); } + void ror_cl(Operand dst); + + void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); } + void sar(Operand dst, uint8_t imm8); + void sar_cl(Register dst) { sar_cl(Operand(dst)); } + void sar_cl(Operand dst); + + void sbb(Register dst, Register src) { sbb(dst, Operand(src)); } + void sbb(Register dst, Operand src); + + void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); } + void shl(Operand dst, uint8_t imm8); + void shl_cl(Register dst) { shl_cl(Operand(dst)); } + void shl_cl(Operand dst); + void shld(Register dst, Register src, uint8_t shift); + void shld_cl(Register dst, Register src); + + void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); } + void shr(Operand dst, uint8_t imm8); + void shr_cl(Register dst) { shr_cl(Operand(dst)); } + void shr_cl(Operand dst); + void shrd(Register dst, Register src, uint8_t shift); + void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); } + void shrd_cl(Operand dst, Register src); + + void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); } + void sub(Operand dst, const Immediate& x); + void sub(Register dst, Register src) { sub(dst, Operand(src)); } + void sub(Register dst, Operand src); + void sub(Operand dst, Register src); + void sub_sp_32(uint32_t imm); + + void test(Register reg, const Immediate& imm); + void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); } + void test(Register reg, Operand op); + void test(Operand op, const Immediate& imm); + void test(Operand op, Register reg) { test(reg, op); } + void test_b(Register reg, Operand op); + void test_b(Register reg, Immediate imm8); + void test_b(Operand op, Immediate imm8); + void test_b(Operand op, Register reg) { test_b(reg, op); } + void test_b(Register dst, Register src) { test_b(dst, Operand(src)); } + void test_w(Register reg, Operand op); + void test_w(Register reg, Immediate imm16); + void test_w(Operand op, Immediate imm16); + void test_w(Operand op, Register reg) { test_w(reg, op); } + void test_w(Register dst, Register src) { test_w(dst, Operand(src)); } + + void xor_(Register dst, int32_t imm32); + void xor_(Register dst, Register src) { xor_(dst, Operand(src)); } + void xor_(Register dst, Operand src); + void xor_(Operand dst, Register src); + void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); } + void xor_(Operand dst, const Immediate& x); + + // Bit operations. + void bswap(Register dst); + void bt(Operand dst, Register src); + void bts(Register dst, Register src) { bts(Operand(dst), src); } + void bts(Operand dst, Register src); + void bsr(Register dst, Register src) { bsr(dst, Operand(src)); } + void bsr(Register dst, Operand src); + void bsf(Register dst, Register src) { bsf(dst, Operand(src)); } + void bsf(Register dst, Operand src); + + // Miscellaneous + void hlt(); + void int3(); + void nop(); + void ret(int imm16); + void ud2(); + + // Label operations & relative jumps (PPUM Appendix D) + // + // Takes a branch opcode (cc) and a label (L) and generates + // either a backward branch or a forward branch and links it + // to the label fixup chain. Usage: + // + // Label L; // unbound label + // j(cc, &L); // forward branch to unbound label + // bind(&L); // bind label to the current pc + // j(cc, &L); // backward branch to bound label + // bind(&L); // illegal: a label may be bound only once + // + // Note: The same Label can be used for forward and backward branches + // but it may be bound only once. + + void bind(Label* L); // binds an unbound label L to the current code position + + // Calls + void call(Label* L); + void call(Address entry, RelocInfo::Mode rmode); + void call(Register reg) { call(Operand(reg)); } + void call(Operand adr); + void call(Handle<Code> code, RelocInfo::Mode rmode); + void wasm_call(Address address, RelocInfo::Mode rmode); + + // Jumps + // unconditional jump to L + void jmp(Label* L, Label::Distance distance = Label::kFar); + void jmp(Address entry, RelocInfo::Mode rmode); + void jmp(Register reg) { jmp(Operand(reg)); } + void jmp(Operand adr); + void jmp(Handle<Code> code, RelocInfo::Mode rmode); + // Unconditional jump relative to the current address. Low-level routine, + // use with caution! + void jmp_rel(int offset); + + // Conditional jumps + void j(Condition cc, Label* L, Label::Distance distance = Label::kFar); + void j(Condition cc, byte* entry, RelocInfo::Mode rmode); + void j(Condition cc, Handle<Code> code, + RelocInfo::Mode rmode = RelocInfo::CODE_TARGET); + + // Floating-point operations + void fld(int i); + void fstp(int i); + + void fld1(); + void fldz(); + void fldpi(); + void fldln2(); + + void fld_s(Operand adr); + void fld_d(Operand adr); + + void fstp_s(Operand adr); + void fst_s(Operand adr); + void fstp_d(Operand adr); + void fst_d(Operand adr); + + void fild_s(Operand adr); + void fild_d(Operand adr); + + void fist_s(Operand adr); + + void fistp_s(Operand adr); + void fistp_d(Operand adr); + + // The fisttp instructions require SSE3. + void fisttp_s(Operand adr); + void fisttp_d(Operand adr); + + void fabs(); + void fchs(); + void fcos(); + void fsin(); + void fptan(); + void fyl2x(); + void f2xm1(); + void fscale(); + void fninit(); + + void fadd(int i); + void fadd_i(int i); + void fsub(int i); + void fsub_i(int i); + void fmul(int i); + void fmul_i(int i); + void fdiv(int i); + void fdiv_i(int i); + + void fisub_s(Operand adr); + + void faddp(int i = 1); + void fsubp(int i = 1); + void fsubrp(int i = 1); + void fmulp(int i = 1); + void fdivp(int i = 1); + void fprem(); + void fprem1(); + + void fxch(int i = 1); + void fincstp(); + void ffree(int i = 0); + + void ftst(); + void fucomp(int i); + void fucompp(); + void fucomi(int i); + void fucomip(); + void fcompp(); + void fnstsw_ax(); + void fwait(); + void fnclex(); + + void frndint(); + + void sahf(); + void setcc(Condition cc, Register reg); + + void cpuid(); + + // SSE instructions + void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); } + void addss(XMMRegister dst, Operand src); + void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); } + void subss(XMMRegister dst, Operand src); + void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); } + void mulss(XMMRegister dst, Operand src); + void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); } + void divss(XMMRegister dst, Operand src); + void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); } + void sqrtss(XMMRegister dst, Operand src); + + void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); } + void ucomiss(XMMRegister dst, Operand src); + void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); } + void movaps(XMMRegister dst, Operand src); + void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); } + void movups(XMMRegister dst, Operand src); + void movups(Operand dst, XMMRegister src); + void shufps(XMMRegister dst, XMMRegister src, byte imm8); + + void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); } + void maxss(XMMRegister dst, Operand src); + void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); } + void minss(XMMRegister dst, Operand src); + + void andps(XMMRegister dst, Operand src); + void andps(XMMRegister dst, XMMRegister src) { andps(dst, Operand(src)); } + void andnps(XMMRegister dst, Operand src); + void andnps(XMMRegister dst, XMMRegister src) { andnps(dst, Operand(src)); } + void xorps(XMMRegister dst, Operand src); + void xorps(XMMRegister dst, XMMRegister src) { xorps(dst, Operand(src)); } + void orps(XMMRegister dst, Operand src); + void orps(XMMRegister dst, XMMRegister src) { orps(dst, Operand(src)); } + + void addps(XMMRegister dst, Operand src); + void addps(XMMRegister dst, XMMRegister src) { addps(dst, Operand(src)); } + void subps(XMMRegister dst, Operand src); + void subps(XMMRegister dst, XMMRegister src) { subps(dst, Operand(src)); } + void mulps(XMMRegister dst, Operand src); + void mulps(XMMRegister dst, XMMRegister src) { mulps(dst, Operand(src)); } + void divps(XMMRegister dst, Operand src); + void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); } + void rcpps(XMMRegister dst, Operand src); + void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); } + void rsqrtps(XMMRegister dst, Operand src); + void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); } + void haddps(XMMRegister dst, Operand src); + void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); } + + void minps(XMMRegister dst, Operand src); + void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); } + void maxps(XMMRegister dst, Operand src); + void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); } + + void cmpps(XMMRegister dst, Operand src, uint8_t cmp); + void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) { + cmpps(dst, Operand(src), cmp); + } +#define SSE_CMP_P(instr, imm8) \ + void instr##ps(XMMRegister dst, XMMRegister src) { \ + cmpps(dst, Operand(src), imm8); \ + } \ + void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } + + SSE_CMP_P(cmpeq, 0x0) + SSE_CMP_P(cmplt, 0x1) + SSE_CMP_P(cmple, 0x2) + SSE_CMP_P(cmpneq, 0x4) + +#undef SSE_CMP_P + + // SSE2 instructions + void cvttss2si(Register dst, Operand src); + void cvttss2si(Register dst, XMMRegister src) { + cvttss2si(dst, Operand(src)); + } + void cvttsd2si(Register dst, Operand src); + void cvttsd2si(Register dst, XMMRegister src) { + cvttsd2si(dst, Operand(src)); + } + void cvtsd2si(Register dst, XMMRegister src); + + void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); } + void cvtsi2ss(XMMRegister dst, Operand src); + void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); } + void cvtsi2sd(XMMRegister dst, Operand src); + void cvtss2sd(XMMRegister dst, Operand src); + void cvtss2sd(XMMRegister dst, XMMRegister src) { + cvtss2sd(dst, Operand(src)); + } + void cvtsd2ss(XMMRegister dst, Operand src); + void cvtsd2ss(XMMRegister dst, XMMRegister src) { + cvtsd2ss(dst, Operand(src)); + } + void cvtdq2ps(XMMRegister dst, XMMRegister src) { + cvtdq2ps(dst, Operand(src)); + } + void cvtdq2ps(XMMRegister dst, Operand src); + void cvttps2dq(XMMRegister dst, XMMRegister src) { + cvttps2dq(dst, Operand(src)); + } + void cvttps2dq(XMMRegister dst, Operand src); + + void addsd(XMMRegister dst, XMMRegister src) { addsd(dst, Operand(src)); } + void addsd(XMMRegister dst, Operand src); + void subsd(XMMRegister dst, XMMRegister src) { subsd(dst, Operand(src)); } + void subsd(XMMRegister dst, Operand src); + void mulsd(XMMRegister dst, XMMRegister src) { mulsd(dst, Operand(src)); } + void mulsd(XMMRegister dst, Operand src); + void divsd(XMMRegister dst, XMMRegister src) { divsd(dst, Operand(src)); } + void divsd(XMMRegister dst, Operand src); + void xorpd(XMMRegister dst, XMMRegister src) { xorpd(dst, Operand(src)); } + void xorpd(XMMRegister dst, Operand src); + void sqrtsd(XMMRegister dst, XMMRegister src) { sqrtsd(dst, Operand(src)); } + void sqrtsd(XMMRegister dst, Operand src); + + void andpd(XMMRegister dst, XMMRegister src) { andpd(dst, Operand(src)); } + void andpd(XMMRegister dst, Operand src); + void orpd(XMMRegister dst, XMMRegister src) { orpd(dst, Operand(src)); } + void orpd(XMMRegister dst, Operand src); + + void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); } + void ucomisd(XMMRegister dst, Operand src); + + void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode); + void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); + + void movmskpd(Register dst, XMMRegister src); + void movmskps(Register dst, XMMRegister src); + + void cmpltsd(XMMRegister dst, XMMRegister src); + + void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); } + void maxsd(XMMRegister dst, Operand src); + void minsd(XMMRegister dst, XMMRegister src) { minsd(dst, Operand(src)); } + void minsd(XMMRegister dst, Operand src); + + void movdqa(XMMRegister dst, Operand src); + void movdqa(Operand dst, XMMRegister src); + void movdqu(XMMRegister dst, Operand src); + void movdqu(Operand dst, XMMRegister src); + void movdq(bool aligned, XMMRegister dst, Operand src) { + if (aligned) { + movdqa(dst, src); + } else { + movdqu(dst, src); + } + } + + void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); } + void movd(XMMRegister dst, Operand src); + void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); } + void movd(Operand dst, XMMRegister src); + void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); } + void movsd(XMMRegister dst, Operand src); + void movsd(Operand dst, XMMRegister src); + + void movss(XMMRegister dst, Operand src); + void movss(Operand dst, XMMRegister src); + void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); } + void extractps(Register dst, XMMRegister src, byte imm8); + + void psllw(XMMRegister reg, uint8_t shift); + void pslld(XMMRegister reg, uint8_t shift); + void psrlw(XMMRegister reg, uint8_t shift); + void psrld(XMMRegister reg, uint8_t shift); + void psraw(XMMRegister reg, uint8_t shift); + void psrad(XMMRegister reg, uint8_t shift); + void psllq(XMMRegister reg, uint8_t shift); + void psllq(XMMRegister dst, XMMRegister src); + void psrlq(XMMRegister reg, uint8_t shift); + void psrlq(XMMRegister dst, XMMRegister src); + + void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + pshufhw(dst, Operand(src), shuffle); + } + void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle); + void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + pshuflw(dst, Operand(src), shuffle); + } + void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle); + void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + pshufd(dst, Operand(src), shuffle); + } + void pshufd(XMMRegister dst, Operand src, uint8_t shuffle); + + void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) { + pblendw(dst, Operand(src), mask); + } + void pblendw(XMMRegister dst, Operand src, uint8_t mask); + + void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) { + palignr(dst, Operand(src), mask); + } + void palignr(XMMRegister dst, Operand src, uint8_t mask); + + void pextrb(Register dst, XMMRegister src, uint8_t offset) { + pextrb(Operand(dst), src, offset); + } + void pextrb(Operand dst, XMMRegister src, uint8_t offset); + // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency + void pextrw(Register dst, XMMRegister src, uint8_t offset) { + pextrw(Operand(dst), src, offset); + } + void pextrw(Operand dst, XMMRegister src, uint8_t offset); + void pextrd(Register dst, XMMRegister src, uint8_t offset) { + pextrd(Operand(dst), src, offset); + } + void pextrd(Operand dst, XMMRegister src, uint8_t offset); + + void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) { + insertps(dst, Operand(src), offset); + } + void insertps(XMMRegister dst, Operand src, uint8_t offset); + void pinsrb(XMMRegister dst, Register src, uint8_t offset) { + pinsrb(dst, Operand(src), offset); + } + void pinsrb(XMMRegister dst, Operand src, uint8_t offset); + void pinsrw(XMMRegister dst, Register src, uint8_t offset) { + pinsrw(dst, Operand(src), offset); + } + void pinsrw(XMMRegister dst, Operand src, uint8_t offset); + void pinsrd(XMMRegister dst, Register src, uint8_t offset) { + pinsrd(dst, Operand(src), offset); + } + void pinsrd(XMMRegister dst, Operand src, uint8_t offset); + + // AVX instructions + void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd132sd(dst, src1, Operand(src2)); + } + void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd213sd(dst, src1, Operand(src2)); + } + void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd231sd(dst, src1, Operand(src2)); + } + void vfmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0x99, dst, src1, src2); + } + void vfmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xa9, dst, src1, src2); + } + void vfmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xb9, dst, src1, src2); + } + void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub132sd(dst, src1, Operand(src2)); + } + void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub213sd(dst, src1, Operand(src2)); + } + void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub231sd(dst, src1, Operand(src2)); + } + void vfmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0x9b, dst, src1, src2); + } + void vfmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xab, dst, src1, src2); + } + void vfmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xbb, dst, src1, src2); + } + void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd132sd(dst, src1, Operand(src2)); + } + void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd213sd(dst, src1, Operand(src2)); + } + void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd231sd(dst, src1, Operand(src2)); + } + void vfnmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0x9d, dst, src1, src2); + } + void vfnmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xad, dst, src1, src2); + } + void vfnmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xbd, dst, src1, src2); + } + void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub132sd(dst, src1, Operand(src2)); + } + void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub213sd(dst, src1, Operand(src2)); + } + void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub231sd(dst, src1, Operand(src2)); + } + void vfnmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0x9f, dst, src1, src2); + } + void vfnmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xaf, dst, src1, src2); + } + void vfnmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmasd(0xbf, dst, src1, src2); + } + void vfmasd(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + + void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd132ss(dst, src1, Operand(src2)); + } + void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd213ss(dst, src1, Operand(src2)); + } + void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmadd231ss(dst, src1, Operand(src2)); + } + void vfmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0x99, dst, src1, src2); + } + void vfmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xa9, dst, src1, src2); + } + void vfmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xb9, dst, src1, src2); + } + void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub132ss(dst, src1, Operand(src2)); + } + void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub213ss(dst, src1, Operand(src2)); + } + void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfmsub231ss(dst, src1, Operand(src2)); + } + void vfmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0x9b, dst, src1, src2); + } + void vfmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xab, dst, src1, src2); + } + void vfmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xbb, dst, src1, src2); + } + void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd132ss(dst, src1, Operand(src2)); + } + void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd213ss(dst, src1, Operand(src2)); + } + void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmadd231ss(dst, src1, Operand(src2)); + } + void vfnmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0x9d, dst, src1, src2); + } + void vfnmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xad, dst, src1, src2); + } + void vfnmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xbd, dst, src1, src2); + } + void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub132ss(dst, src1, Operand(src2)); + } + void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub213ss(dst, src1, Operand(src2)); + } + void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vfnmsub231ss(dst, src1, Operand(src2)); + } + void vfnmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0x9f, dst, src1, src2); + } + void vfnmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xaf, dst, src1, src2); + } + void vfnmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) { + vfmass(0xbf, dst, src1, src2); + } + void vfmass(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + + void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vaddsd(dst, src1, Operand(src2)); + } + void vaddsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x58, dst, src1, src2); + } + void vsubsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vsubsd(dst, src1, Operand(src2)); + } + void vsubsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x5c, dst, src1, src2); + } + void vmulsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vmulsd(dst, src1, Operand(src2)); + } + void vmulsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x59, dst, src1, src2); + } + void vdivsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vdivsd(dst, src1, Operand(src2)); + } + void vdivsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x5e, dst, src1, src2); + } + void vmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vmaxsd(dst, src1, Operand(src2)); + } + void vmaxsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x5f, dst, src1, src2); + } + void vminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vminsd(dst, src1, Operand(src2)); + } + void vminsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x5d, dst, src1, src2); + } + void vsqrtsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vsqrtsd(dst, src1, Operand(src2)); + } + void vsqrtsd(XMMRegister dst, XMMRegister src1, Operand src2) { + vsd(0x51, dst, src1, src2); + } + void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + + void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vaddss(dst, src1, Operand(src2)); + } + void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x58, dst, src1, src2); + } + void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vsubss(dst, src1, Operand(src2)); + } + void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x5c, dst, src1, src2); + } + void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vmulss(dst, src1, Operand(src2)); + } + void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x59, dst, src1, src2); + } + void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vdivss(dst, src1, Operand(src2)); + } + void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x5e, dst, src1, src2); + } + void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vmaxss(dst, src1, Operand(src2)); + } + void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x5f, dst, src1, src2); + } + void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vminss(dst, src1, Operand(src2)); + } + void vminss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x5d, dst, src1, src2); + } + void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vsqrtss(dst, src1, Operand(src2)); + } + void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) { + vss(0x51, dst, src1, src2); + } + void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + + void vrcpps(XMMRegister dst, XMMRegister src) { vrcpps(dst, Operand(src)); } + void vrcpps(XMMRegister dst, Operand src) { + vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG); + } + void vrsqrtps(XMMRegister dst, XMMRegister src) { + vrsqrtps(dst, Operand(src)); + } + void vrsqrtps(XMMRegister dst, Operand src) { + vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG); + } + void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vhaddps(dst, src1, Operand(src2)); + } + void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) { + vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG); + } + void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); } + void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); } + void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); } + void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); } + void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { + vshufps(dst, src1, Operand(src2), imm8); + } + void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8); + + void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8); + + void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + vpshufhw(dst, Operand(src), shuffle); + } + void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle); + void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + vpshuflw(dst, Operand(src), shuffle); + } + void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle); + void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + vpshufd(dst, Operand(src), shuffle); + } + void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle); + + void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, + uint8_t mask) { + vpblendw(dst, src1, Operand(src2), mask); + } + void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask); + + void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, + uint8_t mask) { + vpalignr(dst, src1, Operand(src2), mask); + } + void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask); + + void vpextrb(Register dst, XMMRegister src, uint8_t offset) { + vpextrb(Operand(dst), src, offset); + } + void vpextrb(Operand dst, XMMRegister src, uint8_t offset); + void vpextrw(Register dst, XMMRegister src, uint8_t offset) { + vpextrw(Operand(dst), src, offset); + } + void vpextrw(Operand dst, XMMRegister src, uint8_t offset); + void vpextrd(Register dst, XMMRegister src, uint8_t offset) { + vpextrd(Operand(dst), src, offset); + } + void vpextrd(Operand dst, XMMRegister src, uint8_t offset); + + void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2, + uint8_t offset) { + vinsertps(dst, src1, Operand(src2), offset); + } + void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, + uint8_t offset); + void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, + uint8_t offset) { + vpinsrb(dst, src1, Operand(src2), offset); + } + void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); + void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, + uint8_t offset) { + vpinsrw(dst, src1, Operand(src2), offset); + } + void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); + void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, + uint8_t offset) { + vpinsrd(dst, src1, Operand(src2), offset); + } + void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); + + void vcvtdq2ps(XMMRegister dst, XMMRegister src) { + vcvtdq2ps(dst, Operand(src)); + } + void vcvtdq2ps(XMMRegister dst, Operand src) { + vinstr(0x5B, dst, xmm0, src, kNone, k0F, kWIG); + } + void vcvttps2dq(XMMRegister dst, XMMRegister src) { + vcvttps2dq(dst, Operand(src)); + } + void vcvttps2dq(XMMRegister dst, Operand src) { + vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG); + } + + void vmovdqu(XMMRegister dst, Operand src) { + vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG); + } + void vmovdqu(Operand dst, XMMRegister src) { + vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG); + } + void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); } + void vmovd(XMMRegister dst, Operand src) { + vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG); + } + void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); } + void vmovd(Operand dst, XMMRegister src) { + vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG); + } + + // BMI instruction + void andn(Register dst, Register src1, Register src2) { + andn(dst, src1, Operand(src2)); + } + void andn(Register dst, Register src1, Operand src2) { + bmi1(0xf2, dst, src1, src2); + } + void bextr(Register dst, Register src1, Register src2) { + bextr(dst, Operand(src1), src2); + } + void bextr(Register dst, Operand src1, Register src2) { + bmi1(0xf7, dst, src2, src1); + } + void blsi(Register dst, Register src) { blsi(dst, Operand(src)); } + void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); } + void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); } + void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); } + void blsr(Register dst, Register src) { blsr(dst, Operand(src)); } + void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); } + void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); } + void tzcnt(Register dst, Operand src); + + void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); } + void lzcnt(Register dst, Operand src); + + void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); } + void popcnt(Register dst, Operand src); + + void bzhi(Register dst, Register src1, Register src2) { + bzhi(dst, Operand(src1), src2); + } + void bzhi(Register dst, Operand src1, Register src2) { + bmi2(kNone, 0xf5, dst, src2, src1); + } + void mulx(Register dst1, Register dst2, Register src) { + mulx(dst1, dst2, Operand(src)); + } + void mulx(Register dst1, Register dst2, Operand src) { + bmi2(kF2, 0xf6, dst1, dst2, src); + } + void pdep(Register dst, Register src1, Register src2) { + pdep(dst, src1, Operand(src2)); + } + void pdep(Register dst, Register src1, Operand src2) { + bmi2(kF2, 0xf5, dst, src1, src2); + } + void pext(Register dst, Register src1, Register src2) { + pext(dst, src1, Operand(src2)); + } + void pext(Register dst, Register src1, Operand src2) { + bmi2(kF3, 0xf5, dst, src1, src2); + } + void sarx(Register dst, Register src1, Register src2) { + sarx(dst, Operand(src1), src2); + } + void sarx(Register dst, Operand src1, Register src2) { + bmi2(kF3, 0xf7, dst, src2, src1); + } + void shlx(Register dst, Register src1, Register src2) { + shlx(dst, Operand(src1), src2); + } + void shlx(Register dst, Operand src1, Register src2) { + bmi2(k66, 0xf7, dst, src2, src1); + } + void shrx(Register dst, Register src1, Register src2) { + shrx(dst, Operand(src1), src2); + } + void shrx(Register dst, Operand src1, Register src2) { + bmi2(kF2, 0xf7, dst, src2, src1); + } + void rorx(Register dst, Register src, byte imm8) { + rorx(dst, Operand(src), imm8); + } + void rorx(Register dst, Operand src, byte imm8); + +#define PACKED_OP_LIST(V) \ + V(and, 0x54) \ + V(andn, 0x55) \ + V(or, 0x56) \ + V(xor, 0x57) \ + V(add, 0x58) \ + V(mul, 0x59) \ + V(sub, 0x5c) \ + V(min, 0x5d) \ + V(div, 0x5e) \ + V(max, 0x5f) + +#define AVX_PACKED_OP_DECLARE(name, opcode) \ + void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ + vps(opcode, dst, src1, Operand(src2)); \ + } \ + void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \ + vps(opcode, dst, src1, src2); \ + } \ + void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ + vpd(opcode, dst, src1, Operand(src2)); \ + } \ + void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \ + vpd(opcode, dst, src1, src2); \ + } + + PACKED_OP_LIST(AVX_PACKED_OP_DECLARE) + void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2); + + void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp); +#define AVX_CMP_P(instr, imm8) \ + void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ + vcmpps(dst, src1, Operand(src2), imm8); \ + } \ + void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \ + vcmpps(dst, src1, src2, imm8); \ + } + + AVX_CMP_P(vcmpeq, 0x0) + AVX_CMP_P(vcmplt, 0x1) + AVX_CMP_P(vcmple, 0x2) + AVX_CMP_P(vcmpneq, 0x4) + +#undef AVX_CMP_P + +// Other SSE and AVX instructions +#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ + void instruction(XMMRegister dst, XMMRegister src) { \ + instruction(dst, Operand(src)); \ + } \ + void instruction(XMMRegister dst, Operand src) { \ + sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ + } + + SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION) +#undef DECLARE_SSE2_INSTRUCTION + +#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \ + void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ + v##instruction(dst, src1, Operand(src2)); \ + } \ + void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \ + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ + } + + SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) +#undef DECLARE_SSE2_AVX_INSTRUCTION + +#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \ + opcode) \ + void instruction(XMMRegister dst, XMMRegister src) { \ + instruction(dst, Operand(src)); \ + } \ + void instruction(XMMRegister dst, Operand src) { \ + ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ + } + + SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) +#undef DECLARE_SSSE3_INSTRUCTION + +#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ + opcode) \ + void instruction(XMMRegister dst, XMMRegister src) { \ + instruction(dst, Operand(src)); \ + } \ + void instruction(XMMRegister dst, Operand src) { \ + sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ + } + + SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) + SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) +#undef DECLARE_SSE4_INSTRUCTION + +#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ + opcode) \ + void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ + v##instruction(dst, src1, Operand(src2)); \ + } \ + void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \ + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ + } + + SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) + SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) +#undef DECLARE_SSE34_AVX_INSTRUCTION + +#define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \ + opcode) \ + void v##instruction(XMMRegister dst, XMMRegister src) { \ + v##instruction(dst, Operand(src)); \ + } \ + void v##instruction(XMMRegister dst, Operand src) { \ + vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \ + } + + SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION) +#undef DECLARE_SSE4_AVX_RM_INSTRUCTION + + // Prefetch src position into cache level. + // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a + // non-temporal + void prefetch(Operand src, int level); + // TODO(lrn): Need SFENCE for movnt? + + // Check the code size generated from label to here. + int SizeOfCodeGeneratedSince(Label* label) { + return pc_offset() - label->pos(); + } + + // Record a deoptimization reason that can be used by a log or cpu profiler. + // Use --trace-deopt to enable. + void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, + int id); + + // Writes a single byte or word of data in the code stream. Used for + // inline tables, e.g., jump-tables. + void db(uint8_t data); + void dd(uint32_t data); + void dq(uint64_t data); + void dp(uintptr_t data) { dd(data); } + void dd(Label* label); + + // Check if there is less than kGap bytes available in the buffer. + // If this is the case, we need to grow the buffer before emitting + // an instruction or relocation information. + inline bool buffer_overflow() const { + return pc_ >= reloc_info_writer.pos() - kGap; + } + + // Get the number of bytes available in the buffer. + inline int available_space() const { return reloc_info_writer.pos() - pc_; } + + static bool IsNop(Address addr); + + int relocation_writer_size() { + return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos(); + } + + // Avoid overflows for displacements etc. + static constexpr int kMaximalBufferSize = 512 * MB; + + byte byte_at(int pos) { return buffer_start_[pos]; } + void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; } + + protected: + void emit_sse_operand(XMMRegister reg, Operand adr); + void emit_sse_operand(XMMRegister dst, XMMRegister src); + void emit_sse_operand(Register dst, XMMRegister src); + void emit_sse_operand(XMMRegister dst, Register src); + + Address addr_at(int pos) { + return reinterpret_cast<Address>(buffer_start_ + pos); + } + + private: + uint32_t long_at(int pos) { + return ReadUnalignedValue<uint32_t>(addr_at(pos)); + } + void long_at_put(int pos, uint32_t x) { + WriteUnalignedValue(addr_at(pos), x); + } + + // code emission + void GrowBuffer(); + inline void emit(uint32_t x); + inline void emit(Handle<HeapObject> handle); + inline void emit(uint32_t x, RelocInfo::Mode rmode); + inline void emit(Handle<Code> code, RelocInfo::Mode rmode); + inline void emit(const Immediate& x); + inline void emit_b(Immediate x); + inline void emit_w(const Immediate& x); + inline void emit_q(uint64_t x); + + // Emit the code-object-relative offset of the label's position + inline void emit_code_relative_offset(Label* label); + + // instruction generation + void emit_arith_b(int op1, int op2, Register dst, int imm8); + + // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81) + // with a given destination expression and an immediate operand. It attempts + // to use the shortest encoding possible. + // sel specifies the /n in the modrm byte (see the Intel PRM). + void emit_arith(int sel, Operand dst, const Immediate& x); + + void emit_operand(int code, Operand adr); + void emit_operand(Register reg, Operand adr); + void emit_operand(XMMRegister reg, Operand adr); + + void emit_label(Label* label); + + void emit_farith(int b1, int b2, int i); + + // Emit vex prefix + enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 }; + enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 }; + enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 }; + enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 }; + inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp, + LeadingOpcode m, VexW w); + inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp, + LeadingOpcode m, VexW w); + + // labels + void print(const Label* L); + void bind_to(Label* L, int pos); + + // displacements + inline Displacement disp_at(Label* L); + inline void disp_at_put(Label* L, Displacement disp); + inline void emit_disp(Label* L, Displacement::Type type); + inline void emit_near_disp(Label* L); + + void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape, + byte opcode); + void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1, + byte escape2, byte opcode); + void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1, + byte escape2, byte opcode); + void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, + SIMDPrefix pp, LeadingOpcode m, VexW w); + // Most BMI instructions are similar. + void bmi1(byte op, Register reg, Register vreg, Operand rm); + void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm); + + // record reloc info for current pc_ + void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); + + // record the position of jmp/jcc instruction + void record_farjmp_position(Label* L, int pos); + + bool is_optimizable_farjmp(int idx); + + void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); + + int WriteCodeComments(); + + friend class EnsureSpace; + + // Internal reference positions, required for (potential) patching in + // GrowBuffer(); contains only those internal references whose labels + // are already bound. + std::deque<int> internal_reference_positions_; + + // code generation + RelocInfoWriter reloc_info_writer; + + // Variables for this instance of assembler + int farjmp_num_ = 0; + std::deque<int> farjmp_positions_; + std::map<Label*, std::vector<int>> label_farjmp_maps_; +}; + +// Helper class that ensures that there is enough space for generating +// instructions and relocation information. The constructor makes +// sure that there is enough space and (in debug mode) the destructor +// checks that we did not generate too much. +class EnsureSpace { + public: + explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) { + if (assembler_->buffer_overflow()) assembler_->GrowBuffer(); +#ifdef DEBUG + space_before_ = assembler_->available_space(); +#endif + } + +#ifdef DEBUG + ~EnsureSpace() { + int bytes_generated = space_before_ - assembler_->available_space(); + DCHECK(bytes_generated < assembler_->kGap); + } +#endif + + private: + Assembler* assembler_; +#ifdef DEBUG + int space_before_; +#endif +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_ |