summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/backend/x64/code-generator-x64.cc')
-rw-r--r--deps/v8/src/compiler/backend/x64/code-generator-x64.cc4119
1 files changed, 4119 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
new file mode 100644
index 0000000000..bcb37e1b46
--- /dev/null
+++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
@@ -0,0 +1,4119 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/code-generator.h"
+
+#include <limits>
+
+#include "src/base/overflowing-math.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/heap-inl.h" // crbug.com/v8/8499
+#include "src/macro-assembler.h"
+#include "src/objects/smi.h"
+#include "src/optimized-compilation-info.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+#include "src/x64/assembler-x64.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// Adds X64 specific methods for decoding operands.
+class X64OperandConverter : public InstructionOperandConverter {
+ public:
+ X64OperandConverter(CodeGenerator* gen, Instruction* instr)
+ : InstructionOperandConverter(gen, instr) {}
+
+ Immediate InputImmediate(size_t index) {
+ return ToImmediate(instr_->InputAt(index));
+ }
+
+ Operand InputOperand(size_t index, int extra = 0) {
+ return ToOperand(instr_->InputAt(index), extra);
+ }
+
+ Operand OutputOperand() { return ToOperand(instr_->Output()); }
+
+ Immediate ToImmediate(InstructionOperand* operand) {
+ Constant constant = ToConstant(operand);
+ if (constant.type() == Constant::kFloat64) {
+ DCHECK_EQ(0, constant.ToFloat64().AsUint64());
+ return Immediate(0);
+ }
+ if (RelocInfo::IsWasmReference(constant.rmode())) {
+ return Immediate(constant.ToInt32(), constant.rmode());
+ }
+ return Immediate(constant.ToInt32());
+ }
+
+ Operand ToOperand(InstructionOperand* op, int extra = 0) {
+ DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+ return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
+ }
+
+ Operand SlotToOperand(int slot_index, int extra = 0) {
+ FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
+ return Operand(offset.from_stack_pointer() ? rsp : rbp,
+ offset.offset() + extra);
+ }
+
+ static size_t NextOffset(size_t* offset) {
+ size_t i = *offset;
+ (*offset)++;
+ return i;
+ }
+
+ static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
+ STATIC_ASSERT(0 == static_cast<int>(times_1));
+ STATIC_ASSERT(1 == static_cast<int>(times_2));
+ STATIC_ASSERT(2 == static_cast<int>(times_4));
+ STATIC_ASSERT(3 == static_cast<int>(times_8));
+ int scale = static_cast<int>(mode - one);
+ DCHECK(scale >= 0 && scale < 4);
+ return static_cast<ScaleFactor>(scale);
+ }
+
+ Operand MemoryOperand(size_t* offset) {
+ AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+ switch (mode) {
+ case kMode_MR: {
+ Register base = InputRegister(NextOffset(offset));
+ int32_t disp = 0;
+ return Operand(base, disp);
+ }
+ case kMode_MRI: {
+ Register base = InputRegister(NextOffset(offset));
+ int32_t disp = InputInt32(NextOffset(offset));
+ return Operand(base, disp);
+ }
+ case kMode_MR1:
+ case kMode_MR2:
+ case kMode_MR4:
+ case kMode_MR8: {
+ Register base = InputRegister(NextOffset(offset));
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_MR1, mode);
+ int32_t disp = 0;
+ return Operand(base, index, scale, disp);
+ }
+ case kMode_MR1I:
+ case kMode_MR2I:
+ case kMode_MR4I:
+ case kMode_MR8I: {
+ Register base = InputRegister(NextOffset(offset));
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
+ int32_t disp = InputInt32(NextOffset(offset));
+ return Operand(base, index, scale, disp);
+ }
+ case kMode_M1: {
+ Register base = InputRegister(NextOffset(offset));
+ int32_t disp = 0;
+ return Operand(base, disp);
+ }
+ case kMode_M2:
+ UNREACHABLE(); // Should use kModeMR with more compact encoding instead
+ return Operand(no_reg, 0);
+ case kMode_M4:
+ case kMode_M8: {
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_M1, mode);
+ int32_t disp = 0;
+ return Operand(index, scale, disp);
+ }
+ case kMode_M1I:
+ case kMode_M2I:
+ case kMode_M4I:
+ case kMode_M8I: {
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_M1I, mode);
+ int32_t disp = InputInt32(NextOffset(offset));
+ return Operand(index, scale, disp);
+ }
+ case kMode_Root: {
+ Register base = kRootRegister;
+ int32_t disp = InputInt32(NextOffset(offset));
+ return Operand(base, disp);
+ }
+ case kMode_None:
+ UNREACHABLE();
+ }
+ UNREACHABLE();
+ }
+
+ Operand MemoryOperand(size_t first_input = 0) {
+ return MemoryOperand(&first_input);
+ }
+};
+
+namespace {
+
+bool HasImmediateInput(Instruction* instr, size_t index) {
+ return instr->InputAt(index)->IsImmediate();
+}
+
+class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
+ public:
+ OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
+ : OutOfLineCode(gen), result_(result) {}
+
+ void Generate() final {
+ __ Xorps(result_, result_);
+ __ Divss(result_, result_);
+ }
+
+ private:
+ XMMRegister const result_;
+};
+
+class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
+ public:
+ OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
+ : OutOfLineCode(gen), result_(result) {}
+
+ void Generate() final {
+ __ Xorpd(result_, result_);
+ __ Divsd(result_, result_);
+ }
+
+ private:
+ XMMRegister const result_;
+};
+
+class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
+ public:
+ OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
+ XMMRegister input, StubCallMode stub_mode,
+ UnwindingInfoWriter* unwinding_info_writer)
+ : OutOfLineCode(gen),
+ result_(result),
+ input_(input),
+ stub_mode_(stub_mode),
+ unwinding_info_writer_(unwinding_info_writer),
+ isolate_(gen->isolate()),
+ zone_(gen->zone()) {}
+
+ void Generate() final {
+ __ subp(rsp, Immediate(kDoubleSize));
+ unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kDoubleSize);
+ __ Movsd(MemOperand(rsp, 0), input_);
+ if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
+ } else {
+ __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
+ }
+ __ movl(result_, MemOperand(rsp, 0));
+ __ addp(rsp, Immediate(kDoubleSize));
+ unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kDoubleSize);
+ }
+
+ private:
+ Register const result_;
+ XMMRegister const input_;
+ StubCallMode stub_mode_;
+ UnwindingInfoWriter* const unwinding_info_writer_;
+ Isolate* isolate_;
+ Zone* zone_;
+};
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+ OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
+ Register value, Register scratch0, Register scratch1,
+ RecordWriteMode mode, StubCallMode stub_mode)
+ : OutOfLineCode(gen),
+ object_(object),
+ operand_(operand),
+ value_(value),
+ scratch0_(scratch0),
+ scratch1_(scratch1),
+ mode_(mode),
+ stub_mode_(stub_mode),
+ zone_(gen->zone()) {}
+
+ void Generate() final {
+ if (mode_ > RecordWriteMode::kValueIsPointer) {
+ __ JumpIfSmi(value_, exit());
+ }
+ __ CheckPageFlag(value_, scratch0_,
+ MemoryChunk::kPointersToHereAreInterestingMask, zero,
+ exit());
+ __ leap(scratch1_, operand_);
+
+ RememberedSetAction const remembered_set_action =
+ mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+ : OMIT_REMEMBERED_SET;
+ SaveFPRegsMode const save_fp_mode =
+ frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+
+ if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+ save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
+ } else {
+ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+ save_fp_mode);
+ }
+ }
+
+ private:
+ Register const object_;
+ Operand const operand_;
+ Register const value_;
+ Register const scratch0_;
+ Register const scratch1_;
+ RecordWriteMode const mode_;
+ StubCallMode const stub_mode_;
+ Zone* zone_;
+};
+
+class WasmOutOfLineTrap : public OutOfLineCode {
+ public:
+ WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+ : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
+
+ void Generate() override {
+ X64OperandConverter i(gen_, instr_);
+ TrapId trap_id =
+ static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+ GenerateWithTrapId(trap_id);
+ }
+
+ protected:
+ CodeGenerator* gen_;
+
+ void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
+
+ private:
+ void GenerateCallToTrap(TrapId trap_id) {
+ if (!gen_->wasm_runtime_exception_support()) {
+ // We cannot test calls to the runtime in cctest/test-run-wasm.
+ // Therefore we emit a call to C here instead of a call to the runtime.
+ __ PrepareCallCFunction(0);
+ __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
+ 0);
+ __ LeaveFrame(StackFrame::WASM_COMPILED);
+ auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+ size_t pop_size =
+ call_descriptor->StackParameterCount() * kSystemPointerSize;
+ // Use rcx as a scratch register, we return anyways immediately.
+ __ Ret(static_cast<int>(pop_size), rcx);
+ } else {
+ gen_->AssembleSourcePosition(instr_);
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+ ReferenceMap* reference_map =
+ new (gen_->zone()) ReferenceMap(gen_->zone());
+ gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
+ Safepoint::kNoLazyDeopt);
+ __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+ }
+ }
+
+ Instruction* instr_;
+};
+
+class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
+ public:
+ WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
+ : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
+
+ void Generate() final {
+ gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
+ GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
+ }
+
+ private:
+ int pc_;
+};
+
+void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ X64OperandConverter& i, int pc) {
+ const MemoryAccessMode access_mode =
+ static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+ if (access_mode == kMemoryAccessProtected) {
+ new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
+ }
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ X64OperandConverter& i) {
+ const MemoryAccessMode access_mode =
+ static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+ if (access_mode == kMemoryAccessPoisoned) {
+ Register value = i.OutputRegister();
+ codegen->tasm()->andq(value, kSpeculationPoisonRegister);
+ }
+}
+
+} // namespace
+
+#define ASSEMBLE_UNOP(asm_instr) \
+ do { \
+ if (instr->Output()->IsRegister()) { \
+ __ asm_instr(i.OutputRegister()); \
+ } else { \
+ __ asm_instr(i.OutputOperand()); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_BINOP(asm_instr) \
+ do { \
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
+ size_t index = 1; \
+ Operand right = i.MemoryOperand(&index); \
+ __ asm_instr(i.InputRegister(0), right); \
+ } else { \
+ if (HasImmediateInput(instr, 1)) { \
+ if (instr->InputAt(0)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+ } else { \
+ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
+ } \
+ } else { \
+ if (instr->InputAt(1)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
+ } else { \
+ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
+ } \
+ } \
+ } \
+ } while (false)
+
+#define ASSEMBLE_COMPARE(asm_instr) \
+ do { \
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
+ size_t index = 0; \
+ Operand left = i.MemoryOperand(&index); \
+ if (HasImmediateInput(instr, index)) { \
+ __ asm_instr(left, i.InputImmediate(index)); \
+ } else { \
+ __ asm_instr(left, i.InputRegister(index)); \
+ } \
+ } else { \
+ if (HasImmediateInput(instr, 1)) { \
+ if (instr->InputAt(0)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+ } else { \
+ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
+ } \
+ } else { \
+ if (instr->InputAt(1)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
+ } else { \
+ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
+ } \
+ } \
+ } \
+ } while (false)
+
+#define ASSEMBLE_MULT(asm_instr) \
+ do { \
+ if (HasImmediateInput(instr, 1)) { \
+ if (instr->InputAt(0)->IsRegister()) { \
+ __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
+ i.InputImmediate(1)); \
+ } else { \
+ __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
+ i.InputImmediate(1)); \
+ } \
+ } else { \
+ if (instr->InputAt(1)->IsRegister()) { \
+ __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
+ } else { \
+ __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
+ } \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SHIFT(asm_instr, width) \
+ do { \
+ if (HasImmediateInput(instr, 1)) { \
+ if (instr->Output()->IsRegister()) { \
+ __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
+ } else { \
+ __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
+ } \
+ } else { \
+ if (instr->Output()->IsRegister()) { \
+ __ asm_instr##_cl(i.OutputRegister()); \
+ } else { \
+ __ asm_instr##_cl(i.OutputOperand()); \
+ } \
+ } \
+ } while (false)
+
+#define ASSEMBLE_MOVX(asm_instr) \
+ do { \
+ if (instr->addressing_mode() != kMode_None) { \
+ __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
+ } else if (instr->InputAt(0)->IsRegister()) { \
+ __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
+ } else { \
+ __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SSE_BINOP(asm_instr) \
+ do { \
+ if (instr->InputAt(1)->IsFPRegister()) { \
+ __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
+ } else { \
+ __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SSE_UNOP(asm_instr) \
+ do { \
+ if (instr->InputAt(0)->IsFPRegister()) { \
+ __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
+ } else { \
+ __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_AVX_BINOP(asm_instr) \
+ do { \
+ CpuFeatureScope avx_scope(tasm(), AVX); \
+ if (instr->InputAt(1)->IsFPRegister()) { \
+ __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
+ i.InputDoubleRegister(1)); \
+ } else { \
+ __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
+ i.InputOperand(1)); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_IEEE754_BINOP(name) \
+ do { \
+ __ PrepareCallCFunction(2); \
+ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
+ } while (false)
+
+#define ASSEMBLE_IEEE754_UNOP(name) \
+ do { \
+ __ PrepareCallCFunction(1); \
+ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
+ } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+ do { \
+ Label binop; \
+ __ bind(&binop); \
+ __ mov_inst(rax, i.MemoryOperand(1)); \
+ __ movl(i.TempRegister(0), rax); \
+ __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
+ __ lock(); \
+ __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
+ __ j(not_equal, &binop); \
+ } while (false)
+
+#define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+ do { \
+ Label binop; \
+ __ bind(&binop); \
+ __ mov_inst(rax, i.MemoryOperand(1)); \
+ __ movq(i.TempRegister(0), rax); \
+ __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
+ __ lock(); \
+ __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
+ __ j(not_equal, &binop); \
+ } while (false)
+
+#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
+ do { \
+ if (instr->InputAt(index)->IsSimd128Register()) { \
+ __ opcode(dst_operand, i.InputSimd128Register(index)); \
+ } else { \
+ __ opcode(dst_operand, i.InputOperand(index)); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
+ do { \
+ if (instr->InputAt(index)->IsSimd128Register()) { \
+ __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
+ } else { \
+ __ opcode(dst_operand, i.InputOperand(index), imm); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
+ do { \
+ XMMRegister dst = i.OutputSimd128Register(); \
+ DCHECK_EQ(dst, i.InputSimd128Register(0)); \
+ byte input_index = instr->InputCount() == 2 ? 1 : 0; \
+ ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
+ } while (false)
+
+#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
+ do { \
+ CpuFeatureScope sse_scope(tasm(), SSELevel); \
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
+ __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
+ } while (false)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+ unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+ __ movq(rsp, rbp);
+ __ popq(rbp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+ if (frame_access_state()->has_frame()) {
+ __ movq(rbp, MemOperand(rbp, 0));
+ }
+ frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+ Register scratch1,
+ Register scratch2,
+ Register scratch3) {
+ DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+ Label done;
+
+ // Check if current frame is an arguments adaptor frame.
+ __ cmpp(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
+ Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+ __ j(not_equal, &done, Label::kNear);
+
+ // Load arguments count from current arguments adaptor frame (note, it
+ // does not include receiver).
+ Register caller_args_count_reg = scratch1;
+ __ SmiUntag(caller_args_count_reg,
+ Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+
+ ParameterCount callee_args_count(args_reg);
+ __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
+ scratch3);
+ __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(Assembler* assembler,
+ FrameAccessState* state,
+ int new_slot_above_sp,
+ bool allow_shrinkage = true) {
+ int current_sp_offset = state->GetSPToFPSlotCount() +
+ StandardFrameConstants::kFixedSlotCountAboveFp;
+ int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+ if (stack_slot_delta > 0) {
+ assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
+ state->IncreaseSPDelta(stack_slot_delta);
+ } else if (allow_shrinkage && stack_slot_delta < 0) {
+ assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
+ state->IncreaseSPDelta(stack_slot_delta);
+ }
+}
+
+void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
+ int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
+ assembler->movq(kScratchRegister, shuffle_mask);
+ assembler->Push(kScratchRegister);
+ shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
+ assembler->movq(kScratchRegister, shuffle_mask);
+ assembler->Push(kScratchRegister);
+}
+
+} // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+ int first_unused_stack_slot) {
+ CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
+ ZoneVector<MoveOperands*> pushes(zone());
+ GetPushCompatibleMoves(instr, flags, &pushes);
+
+ if (!pushes.empty() &&
+ (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+ first_unused_stack_slot)) {
+ X64OperandConverter g(this, instr);
+ for (auto move : pushes) {
+ LocationOperand destination_location(
+ LocationOperand::cast(move->destination()));
+ InstructionOperand source(move->source());
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ destination_location.index());
+ if (source.IsStackSlot()) {
+ LocationOperand source_location(LocationOperand::cast(source));
+ __ Push(g.SlotToOperand(source_location.index()));
+ } else if (source.IsRegister()) {
+ LocationOperand source_location(LocationOperand::cast(source));
+ __ Push(source_location.GetRegister());
+ } else if (source.IsImmediate()) {
+ __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
+ } else {
+ // Pushes of non-scalar data types is not supported.
+ UNIMPLEMENTED();
+ }
+ frame_access_state()->IncreaseSPDelta(1);
+ move->Eliminate();
+ }
+ }
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+ int first_unused_stack_slot) {
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+ __ ComputeCodeStartAddress(rbx);
+ __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
+ __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+// 1. read from memory the word that contains that bit, which can be found in
+// the flags in the referenced {CodeDataContainer} object;
+// 2. test kMarkedForDeoptimizationBit in those flags; and
+// 3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+ int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+ __ LoadTaggedPointerField(rbx,
+ Operand(kJavaScriptCallCodeStartRegister, offset));
+ __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
+ Immediate(1 << Code::kMarkedForDeoptimizationBit));
+ __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+ RelocInfo::CODE_TARGET, not_zero);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+ // Set a mask which has all bits set in the normal case, but has all
+ // bits cleared if we are speculatively executing the wrong PC.
+ __ ComputeCodeStartAddress(rbx);
+ __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
+ __ cmpp(kJavaScriptCallCodeStartRegister, rbx);
+ __ movp(rbx, Immediate(-1));
+ __ cmovq(equal, kSpeculationPoisonRegister, rbx);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+ __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
+ __ andq(kContextRegister, kSpeculationPoisonRegister);
+ __ andq(rsp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+ Instruction* instr) {
+ X64OperandConverter i(this, instr);
+ InstructionCode opcode = instr->opcode();
+ ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+ switch (arch_opcode) {
+ case kArchCallCodeObject: {
+ if (HasImmediateInput(instr, 0)) {
+ Handle<Code> code = i.InputCode(0);
+ __ Call(code, RelocInfo::CODE_TARGET);
+ } else {
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ __ LoadCodeObjectEntry(reg, reg);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(reg);
+ } else {
+ __ call(reg);
+ }
+ }
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchCallBuiltinPointer: {
+ DCHECK(!HasImmediateInput(instr, 0));
+ Register builtin_pointer = i.InputRegister(0);
+ __ CallBuiltinPointer(builtin_pointer);
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchCallWasmFunction: {
+ if (HasImmediateInput(instr, 0)) {
+ Constant constant = i.ToConstant(instr->InputAt(0));
+ Address wasm_code = static_cast<Address>(constant.ToInt64());
+ if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+ __ near_call(wasm_code, constant.rmode());
+ } else {
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(wasm_code, constant.rmode());
+ } else {
+ __ Call(wasm_code, constant.rmode());
+ }
+ }
+ } else {
+ Register reg = i.InputRegister(0);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(reg);
+ } else {
+ __ call(reg);
+ }
+ }
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchTailCallCodeObjectFromJSFunction:
+ case kArchTailCallCodeObject: {
+ if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+ AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+ i.TempRegister(0), i.TempRegister(1),
+ i.TempRegister(2));
+ }
+ if (HasImmediateInput(instr, 0)) {
+ Handle<Code> code = i.InputCode(0);
+ __ Jump(code, RelocInfo::CODE_TARGET);
+ } else {
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ __ LoadCodeObjectEntry(reg, reg);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ }
+ unwinding_info_writer_.MarkBlockWillExit();
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchTailCallWasm: {
+ if (HasImmediateInput(instr, 0)) {
+ Constant constant = i.ToConstant(instr->InputAt(0));
+ Address wasm_code = static_cast<Address>(constant.ToInt64());
+ if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+ __ near_jmp(wasm_code, constant.rmode());
+ } else {
+ __ Move(kScratchRegister, wasm_code, constant.rmode());
+ __ jmp(kScratchRegister);
+ }
+ } else {
+ Register reg = i.InputRegister(0);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ }
+ unwinding_info_writer_.MarkBlockWillExit();
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchTailCallAddress: {
+ CHECK(!HasImmediateInput(instr, 0));
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ unwinding_info_writer_.MarkBlockWillExit();
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchCallJSFunction: {
+ Register func = i.InputRegister(0);
+ if (FLAG_debug_code) {
+ // Check the function's context matches the context argument.
+ __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
+ __ Assert(equal, AbortReason::kWrongFunctionContext);
+ }
+ static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
+ __ LoadTaggedPointerField(rcx,
+ FieldOperand(func, JSFunction::kCodeOffset));
+ __ CallCodeObject(rcx);
+ frame_access_state()->ClearSPDelta();
+ RecordCallPosition(instr);
+ break;
+ }
+ case kArchPrepareCallCFunction: {
+ // Frame alignment requires using FP-relative frame addressing.
+ frame_access_state()->SetFrameAccessToFP();
+ int const num_parameters = MiscField::decode(instr->opcode());
+ __ PrepareCallCFunction(num_parameters);
+ break;
+ }
+ case kArchSaveCallerRegisters: {
+ fp_mode_ =
+ static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+ // kReturnRegister0 should have been saved before entering the stub.
+ int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+ DCHECK(IsAligned(bytes, kSystemPointerSize));
+ DCHECK_EQ(0, frame_access_state()->sp_delta());
+ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+ DCHECK(!caller_registers_saved_);
+ caller_registers_saved_ = true;
+ break;
+ }
+ case kArchRestoreCallerRegisters: {
+ DCHECK(fp_mode_ ==
+ static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+ // Don't overwrite the returned value.
+ int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+ frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+ DCHECK_EQ(0, frame_access_state()->sp_delta());
+ DCHECK(caller_registers_saved_);
+ caller_registers_saved_ = false;
+ break;
+ }
+ case kArchPrepareTailCall:
+ AssemblePrepareTailCall();
+ break;
+ case kArchCallCFunction: {
+ int const num_parameters = MiscField::decode(instr->opcode());
+ if (HasImmediateInput(instr, 0)) {
+ ExternalReference ref = i.InputExternalReference(0);
+ __ CallCFunction(ref, num_parameters);
+ } else {
+ Register func = i.InputRegister(0);
+ __ CallCFunction(func, num_parameters);
+ }
+ frame_access_state()->SetFrameAccessToDefault();
+ // Ideally, we should decrement SP delta to match the change of stack
+ // pointer in CallCFunction. However, for certain architectures (e.g.
+ // ARM), there may be more strict alignment requirement, causing old SP
+ // to be saved on the stack. In those cases, we can not calculate the SP
+ // delta statically.
+ frame_access_state()->ClearSPDelta();
+ if (caller_registers_saved_) {
+ // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+ // Here, we assume the sequence to be:
+ // kArchSaveCallerRegisters;
+ // kArchCallCFunction;
+ // kArchRestoreCallerRegisters;
+ int bytes =
+ __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+ }
+ // TODO(tebbi): Do we need an lfence here?
+ break;
+ }
+ case kArchJmp:
+ AssembleArchJump(i.InputRpo(0));
+ break;
+ case kArchBinarySearchSwitch:
+ AssembleArchBinarySearchSwitch(instr);
+ break;
+ case kArchLookupSwitch:
+ AssembleArchLookupSwitch(instr);
+ break;
+ case kArchTableSwitch:
+ AssembleArchTableSwitch(instr);
+ break;
+ case kArchComment:
+ __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+ break;
+ case kArchDebugAbort:
+ DCHECK(i.InputRegister(0) == rdx);
+ if (!frame_access_state()->has_frame()) {
+ // We don't actually want to generate a pile of code for this, so just
+ // claim there is a stack frame, without generating one.
+ FrameScope scope(tasm(), StackFrame::NONE);
+ __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
+ RelocInfo::CODE_TARGET);
+ } else {
+ __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
+ RelocInfo::CODE_TARGET);
+ }
+ __ int3();
+ unwinding_info_writer_.MarkBlockWillExit();
+ break;
+ case kArchDebugBreak:
+ __ int3();
+ break;
+ case kArchThrowTerminator:
+ unwinding_info_writer_.MarkBlockWillExit();
+ break;
+ case kArchNop:
+ // don't emit code for nops.
+ break;
+ case kArchDeoptimize: {
+ int deopt_state_id =
+ BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+ CodeGenResult result =
+ AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
+ if (result != kSuccess) return result;
+ unwinding_info_writer_.MarkBlockWillExit();
+ break;
+ }
+ case kArchRet:
+ AssembleReturn(instr->InputAt(0));
+ break;
+ case kArchStackPointer:
+ __ movq(i.OutputRegister(), rsp);
+ break;
+ case kArchFramePointer:
+ __ movq(i.OutputRegister(), rbp);
+ break;
+ case kArchParentFramePointer:
+ if (frame_access_state()->has_frame()) {
+ __ movq(i.OutputRegister(), Operand(rbp, 0));
+ } else {
+ __ movq(i.OutputRegister(), rbp);
+ }
+ break;
+ case kArchTruncateDoubleToI: {
+ auto result = i.OutputRegister();
+ auto input = i.InputDoubleRegister(0);
+ auto ool = new (zone()) OutOfLineTruncateDoubleToI(
+ this, result, input, DetermineStubCallMode(),
+ &unwinding_info_writer_);
+ // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
+ // use of Cvttsd2siq requires the movl below to avoid sign extension.
+ __ Cvttsd2siq(result, input);
+ __ cmpq(result, Immediate(1));
+ __ j(overflow, ool->entry());
+ __ bind(ool->exit());
+ __ movl(result, result);
+ break;
+ }
+ case kArchStoreWithWriteBarrier: {
+ RecordWriteMode mode =
+ static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+ Register object = i.InputRegister(0);
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ Register value = i.InputRegister(index);
+ Register scratch0 = i.TempRegister(0);
+ Register scratch1 = i.TempRegister(1);
+ auto ool = new (zone())
+ OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
+ mode, DetermineStubCallMode());
+ __ movp(operand, value);
+ __ CheckPageFlag(object, scratch0,
+ MemoryChunk::kPointersFromHereAreInterestingMask,
+ not_zero, ool->entry());
+ __ bind(ool->exit());
+ break;
+ }
+ case kArchWordPoisonOnSpeculation:
+ DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
+ __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
+ break;
+ case kLFence:
+ __ lfence();
+ break;
+ case kArchStackSlot: {
+ FrameOffset offset =
+ frame_access_state()->GetFrameOffset(i.InputInt32(0));
+ Register base = offset.from_stack_pointer() ? rsp : rbp;
+ __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
+ break;
+ }
+ case kIeee754Float64Acos:
+ ASSEMBLE_IEEE754_UNOP(acos);
+ break;
+ case kIeee754Float64Acosh:
+ ASSEMBLE_IEEE754_UNOP(acosh);
+ break;
+ case kIeee754Float64Asin:
+ ASSEMBLE_IEEE754_UNOP(asin);
+ break;
+ case kIeee754Float64Asinh:
+ ASSEMBLE_IEEE754_UNOP(asinh);
+ break;
+ case kIeee754Float64Atan:
+ ASSEMBLE_IEEE754_UNOP(atan);
+ break;
+ case kIeee754Float64Atanh:
+ ASSEMBLE_IEEE754_UNOP(atanh);
+ break;
+ case kIeee754Float64Atan2:
+ ASSEMBLE_IEEE754_BINOP(atan2);
+ break;
+ case kIeee754Float64Cbrt:
+ ASSEMBLE_IEEE754_UNOP(cbrt);
+ break;
+ case kIeee754Float64Cos:
+ ASSEMBLE_IEEE754_UNOP(cos);
+ break;
+ case kIeee754Float64Cosh:
+ ASSEMBLE_IEEE754_UNOP(cosh);
+ break;
+ case kIeee754Float64Exp:
+ ASSEMBLE_IEEE754_UNOP(exp);
+ break;
+ case kIeee754Float64Expm1:
+ ASSEMBLE_IEEE754_UNOP(expm1);
+ break;
+ case kIeee754Float64Log:
+ ASSEMBLE_IEEE754_UNOP(log);
+ break;
+ case kIeee754Float64Log1p:
+ ASSEMBLE_IEEE754_UNOP(log1p);
+ break;
+ case kIeee754Float64Log2:
+ ASSEMBLE_IEEE754_UNOP(log2);
+ break;
+ case kIeee754Float64Log10:
+ ASSEMBLE_IEEE754_UNOP(log10);
+ break;
+ case kIeee754Float64Pow: {
+ // TODO(bmeurer): Improve integration of the stub.
+ __ Movsd(xmm2, xmm0);
+ __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
+ __ Movsd(xmm0, xmm3);
+ break;
+ }
+ case kIeee754Float64Sin:
+ ASSEMBLE_IEEE754_UNOP(sin);
+ break;
+ case kIeee754Float64Sinh:
+ ASSEMBLE_IEEE754_UNOP(sinh);
+ break;
+ case kIeee754Float64Tan:
+ ASSEMBLE_IEEE754_UNOP(tan);
+ break;
+ case kIeee754Float64Tanh:
+ ASSEMBLE_IEEE754_UNOP(tanh);
+ break;
+ case kX64Add32:
+ ASSEMBLE_BINOP(addl);
+ break;
+ case kX64Add:
+ ASSEMBLE_BINOP(addq);
+ break;
+ case kX64Sub32:
+ ASSEMBLE_BINOP(subl);
+ break;
+ case kX64Sub:
+ ASSEMBLE_BINOP(subq);
+ break;
+ case kX64And32:
+ ASSEMBLE_BINOP(andl);
+ break;
+ case kX64And:
+ ASSEMBLE_BINOP(andq);
+ break;
+ case kX64Cmp8:
+ ASSEMBLE_COMPARE(cmpb);
+ break;
+ case kX64Cmp16:
+ ASSEMBLE_COMPARE(cmpw);
+ break;
+ case kX64Cmp32:
+ ASSEMBLE_COMPARE(cmpl);
+ break;
+ case kX64Cmp:
+ ASSEMBLE_COMPARE(cmpq);
+ break;
+ case kX64Test8:
+ ASSEMBLE_COMPARE(testb);
+ break;
+ case kX64Test16:
+ ASSEMBLE_COMPARE(testw);
+ break;
+ case kX64Test32:
+ ASSEMBLE_COMPARE(testl);
+ break;
+ case kX64Test:
+ ASSEMBLE_COMPARE(testq);
+ break;
+ case kX64Imul32:
+ ASSEMBLE_MULT(imull);
+ break;
+ case kX64Imul:
+ ASSEMBLE_MULT(imulq);
+ break;
+ case kX64ImulHigh32:
+ if (instr->InputAt(1)->IsRegister()) {
+ __ imull(i.InputRegister(1));
+ } else {
+ __ imull(i.InputOperand(1));
+ }
+ break;
+ case kX64UmulHigh32:
+ if (instr->InputAt(1)->IsRegister()) {
+ __ mull(i.InputRegister(1));
+ } else {
+ __ mull(i.InputOperand(1));
+ }
+ break;
+ case kX64Idiv32:
+ __ cdq();
+ __ idivl(i.InputRegister(1));
+ break;
+ case kX64Idiv:
+ __ cqo();
+ __ idivq(i.InputRegister(1));
+ break;
+ case kX64Udiv32:
+ __ xorl(rdx, rdx);
+ __ divl(i.InputRegister(1));
+ break;
+ case kX64Udiv:
+ __ xorq(rdx, rdx);
+ __ divq(i.InputRegister(1));
+ break;
+ case kX64Not:
+ ASSEMBLE_UNOP(notq);
+ break;
+ case kX64Not32:
+ ASSEMBLE_UNOP(notl);
+ break;
+ case kX64Neg:
+ ASSEMBLE_UNOP(negq);
+ break;
+ case kX64Neg32:
+ ASSEMBLE_UNOP(negl);
+ break;
+ case kX64Or32:
+ ASSEMBLE_BINOP(orl);
+ break;
+ case kX64Or:
+ ASSEMBLE_BINOP(orq);
+ break;
+ case kX64Xor32:
+ ASSEMBLE_BINOP(xorl);
+ break;
+ case kX64Xor:
+ ASSEMBLE_BINOP(xorq);
+ break;
+ case kX64Shl32:
+ ASSEMBLE_SHIFT(shll, 5);
+ break;
+ case kX64Shl:
+ ASSEMBLE_SHIFT(shlq, 6);
+ break;
+ case kX64Shr32:
+ ASSEMBLE_SHIFT(shrl, 5);
+ break;
+ case kX64Shr:
+ ASSEMBLE_SHIFT(shrq, 6);
+ break;
+ case kX64Sar32:
+ ASSEMBLE_SHIFT(sarl, 5);
+ break;
+ case kX64Sar:
+ ASSEMBLE_SHIFT(sarq, 6);
+ break;
+ case kX64Ror32:
+ ASSEMBLE_SHIFT(rorl, 5);
+ break;
+ case kX64Ror:
+ ASSEMBLE_SHIFT(rorq, 6);
+ break;
+ case kX64Lzcnt:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Lzcnt32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Tzcnt:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Tzcnt32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Popcnt:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Popcntq(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Popcntq(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Popcnt32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Popcntl(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ Popcntl(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Bswap:
+ __ bswapq(i.OutputRegister());
+ break;
+ case kX64Bswap32:
+ __ bswapl(i.OutputRegister());
+ break;
+ case kSSEFloat32Cmp:
+ ASSEMBLE_SSE_BINOP(Ucomiss);
+ break;
+ case kSSEFloat32Add:
+ ASSEMBLE_SSE_BINOP(addss);
+ break;
+ case kSSEFloat32Sub:
+ ASSEMBLE_SSE_BINOP(subss);
+ break;
+ case kSSEFloat32Mul:
+ ASSEMBLE_SSE_BINOP(mulss);
+ break;
+ case kSSEFloat32Div:
+ ASSEMBLE_SSE_BINOP(divss);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kSSEFloat32Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 33);
+ __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat32Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 31);
+ __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat32Sqrt:
+ ASSEMBLE_SSE_UNOP(sqrtss);
+ break;
+ case kSSEFloat32ToFloat64:
+ ASSEMBLE_SSE_UNOP(Cvtss2sd);
+ break;
+ case kSSEFloat32Round: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+ break;
+ }
+ case kSSEFloat32ToInt32:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEFloat32ToUint32: {
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ }
+ case kSSEFloat64Cmp:
+ ASSEMBLE_SSE_BINOP(Ucomisd);
+ break;
+ case kSSEFloat64Add:
+ ASSEMBLE_SSE_BINOP(addsd);
+ break;
+ case kSSEFloat64Sub:
+ ASSEMBLE_SSE_BINOP(subsd);
+ break;
+ case kSSEFloat64Mul:
+ ASSEMBLE_SSE_BINOP(mulsd);
+ break;
+ case kSSEFloat64Div:
+ ASSEMBLE_SSE_BINOP(divsd);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kSSEFloat64Mod: {
+ __ subq(rsp, Immediate(kDoubleSize));
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kDoubleSize);
+ // Move values to st(0) and st(1).
+ __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
+ __ fld_d(Operand(rsp, 0));
+ __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
+ __ fld_d(Operand(rsp, 0));
+ // Loop while fprem isn't done.
+ Label mod_loop;
+ __ bind(&mod_loop);
+ // This instructions traps on all kinds inputs, but we are assuming the
+ // floating point control word is set to ignore them all.
+ __ fprem();
+ // The following 2 instruction implicitly use rax.
+ __ fnstsw_ax();
+ if (CpuFeatures::IsSupported(SAHF)) {
+ CpuFeatureScope sahf_scope(tasm(), SAHF);
+ __ sahf();
+ } else {
+ __ shrl(rax, Immediate(8));
+ __ andl(rax, Immediate(0xFF));
+ __ pushq(rax);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ __ popfq();
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kSystemPointerSize);
+ }
+ __ j(parity_even, &mod_loop);
+ // Move output to stack and clean up.
+ __ fstp(1);
+ __ fstp_d(Operand(rsp, 0));
+ __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
+ __ addq(rsp, Immediate(kDoubleSize));
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kDoubleSize);
+ break;
+ }
+ case kSSEFloat32Max: {
+ Label compare_nan, compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(above, &done_compare, Label::kNear);
+ __ j(below, &compare_swap, Label::kNear);
+ __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
+ __ testl(kScratchRegister, Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat32Min: {
+ Label compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(below, &done_compare, Label::kNear);
+ __ j(above, &compare_swap, Label::kNear);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
+ } else {
+ __ Movss(kScratchDoubleReg, i.InputOperand(1));
+ __ Movmskps(kScratchRegister, kScratchDoubleReg);
+ }
+ __ testl(kScratchRegister, Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat64Max: {
+ Label compare_nan, compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(above, &done_compare, Label::kNear);
+ __ j(below, &compare_swap, Label::kNear);
+ __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
+ __ testl(kScratchRegister, Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat64Min: {
+ Label compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(below, &done_compare, Label::kNear);
+ __ j(above, &compare_swap, Label::kNear);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
+ } else {
+ __ Movsd(kScratchDoubleReg, i.InputOperand(1));
+ __ Movmskpd(kScratchRegister, kScratchDoubleReg);
+ }
+ __ testl(kScratchRegister, Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat64Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 1);
+ __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat64Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 63);
+ __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat64Sqrt:
+ ASSEMBLE_SSE_UNOP(Sqrtsd);
+ break;
+ case kSSEFloat64Round: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+ break;
+ }
+ case kSSEFloat64ToFloat32:
+ ASSEMBLE_SSE_UNOP(Cvtsd2ss);
+ break;
+ case kSSEFloat64ToInt32:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEFloat64ToUint32: {
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
+ }
+ if (MiscField::decode(instr->opcode())) {
+ __ AssertZeroExtended(i.OutputRegister());
+ }
+ break;
+ }
+ case kSSEFloat32ToInt64:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+ }
+ if (instr->OutputCount() > 1) {
+ __ Set(i.OutputRegister(1), 1);
+ Label done;
+ Label fail;
+ __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
+ } else {
+ __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
+ }
+ // If the input is NaN, then the conversion fails.
+ __ j(parity_even, &fail);
+ // If the input is INT64_MIN, then the conversion succeeds.
+ __ j(equal, &done);
+ __ cmpq(i.OutputRegister(0), Immediate(1));
+ // If the conversion results in INT64_MIN, but the input was not
+ // INT64_MIN, then the conversion fails.
+ __ j(no_overflow, &done);
+ __ bind(&fail);
+ __ Set(i.OutputRegister(1), 0);
+ __ bind(&done);
+ }
+ break;
+ case kSSEFloat64ToInt64:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
+ } else {
+ __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
+ }
+ if (instr->OutputCount() > 1) {
+ __ Set(i.OutputRegister(1), 1);
+ Label done;
+ Label fail;
+ __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
+ } else {
+ __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
+ }
+ // If the input is NaN, then the conversion fails.
+ __ j(parity_even, &fail);
+ // If the input is INT64_MIN, then the conversion succeeds.
+ __ j(equal, &done);
+ __ cmpq(i.OutputRegister(0), Immediate(1));
+ // If the conversion results in INT64_MIN, but the input was not
+ // INT64_MIN, then the conversion fails.
+ __ j(no_overflow, &done);
+ __ bind(&fail);
+ __ Set(i.OutputRegister(1), 0);
+ __ bind(&done);
+ }
+ break;
+ case kSSEFloat32ToUint64: {
+ Label fail;
+ if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
+ } else {
+ __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
+ }
+ if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
+ __ bind(&fail);
+ break;
+ }
+ case kSSEFloat64ToUint64: {
+ Label fail;
+ if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
+ } else {
+ __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
+ }
+ if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
+ __ bind(&fail);
+ break;
+ }
+ case kSSEInt32ToFloat64:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEInt32ToFloat32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEInt64ToFloat32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEInt64ToFloat64:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEUint64ToFloat32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEUint64ToFloat64:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEUint32ToFloat64:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEUint32ToFloat32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kSSEFloat64ExtractLowWord32:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ movl(i.OutputRegister(), i.InputOperand(0));
+ } else {
+ __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+ }
+ break;
+ case kSSEFloat64ExtractHighWord32:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
+ } else {
+ __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
+ }
+ break;
+ case kSSEFloat64InsertLowWord32:
+ if (instr->InputAt(1)->IsRegister()) {
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
+ } else {
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
+ }
+ break;
+ case kSSEFloat64InsertHighWord32:
+ if (instr->InputAt(1)->IsRegister()) {
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
+ } else {
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
+ }
+ break;
+ case kSSEFloat64LoadLowWord32:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kAVXFloat32Cmp: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ break;
+ }
+ case kAVXFloat32Add:
+ ASSEMBLE_AVX_BINOP(vaddss);
+ break;
+ case kAVXFloat32Sub:
+ ASSEMBLE_AVX_BINOP(vsubss);
+ break;
+ case kAVXFloat32Mul:
+ ASSEMBLE_AVX_BINOP(vmulss);
+ break;
+ case kAVXFloat32Div:
+ ASSEMBLE_AVX_BINOP(vdivss);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kAVXFloat64Cmp: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ break;
+ }
+ case kAVXFloat64Add:
+ ASSEMBLE_AVX_BINOP(vaddsd);
+ break;
+ case kAVXFloat64Sub:
+ ASSEMBLE_AVX_BINOP(vsubsd);
+ break;
+ case kAVXFloat64Mul:
+ ASSEMBLE_AVX_BINOP(vmulsd);
+ break;
+ case kAVXFloat64Div:
+ ASSEMBLE_AVX_BINOP(vdivsd);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kAVXFloat32Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputDoubleRegister(0));
+ } else {
+ __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputOperand(0));
+ }
+ break;
+ }
+ case kAVXFloat32Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputDoubleRegister(0));
+ } else {
+ __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputOperand(0));
+ }
+ break;
+ }
+ case kAVXFloat64Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputDoubleRegister(0));
+ } else {
+ __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputOperand(0));
+ }
+ break;
+ }
+ case kAVXFloat64Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputDoubleRegister(0));
+ } else {
+ __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+ i.InputOperand(0));
+ }
+ break;
+ }
+ case kSSEFloat64SilenceNaN:
+ __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
+ break;
+ case kX64Movsxbl:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movsxbl);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movzxbl:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movzxbl);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movsxbq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movsxbq);
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movzxbq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movzxbq);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movb: {
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ movb(operand, Immediate(i.InputInt8(index)));
+ } else {
+ __ movb(operand, i.InputRegister(index));
+ }
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ }
+ case kX64Movsxwl:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movsxwl);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movzxwl:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movzxwl);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movsxwq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movsxwq);
+ break;
+ case kX64Movzxwq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movzxwq);
+ __ AssertZeroExtended(i.OutputRegister());
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movw: {
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ movw(operand, Immediate(i.InputInt16(index)));
+ } else {
+ __ movw(operand, i.InputRegister(index));
+ }
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ }
+ case kX64Movl:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ if (instr->HasOutput()) {
+ if (instr->addressing_mode() == kMode_None) {
+ if (instr->InputAt(0)->IsRegister()) {
+ __ movl(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ movl(i.OutputRegister(), i.InputOperand(0));
+ }
+ } else {
+ __ movl(i.OutputRegister(), i.MemoryOperand());
+ }
+ __ AssertZeroExtended(i.OutputRegister());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ movl(operand, i.InputImmediate(index));
+ } else {
+ __ movl(operand, i.InputRegister(index));
+ }
+ }
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movsxlq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ ASSEMBLE_MOVX(movsxlq);
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64MovqDecompressTaggedSigned: {
+ CHECK(instr->HasOutput());
+ __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand(),
+ DEBUG_BOOL ? i.TempRegister(0) : no_reg);
+ break;
+ }
+ case kX64MovqDecompressTaggedPointer: {
+ CHECK(instr->HasOutput());
+ __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand(),
+ DEBUG_BOOL ? i.TempRegister(0) : no_reg);
+ break;
+ }
+ case kX64MovqDecompressAnyTagged: {
+ CHECK(instr->HasOutput());
+ __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
+ i.TempRegister(0),
+ DEBUG_BOOL ? i.TempRegister(1) : no_reg);
+ break;
+ }
+ case kX64Movq:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ if (instr->HasOutput()) {
+ __ movq(i.OutputRegister(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ movq(operand, i.InputImmediate(index));
+ } else {
+ __ movq(operand, i.InputRegister(index));
+ }
+ }
+ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+ break;
+ case kX64Movss:
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ if (instr->HasOutput()) {
+ __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ movss(operand, i.InputDoubleRegister(index));
+ }
+ break;
+ case kX64Movsd: {
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ if (instr->HasOutput()) {
+ const MemoryAccessMode access_mode =
+ static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+ if (access_mode == kMemoryAccessPoisoned) {
+ // If we have to poison the loaded value, we load into a general
+ // purpose register first, mask it with the poison, and move the
+ // value from the general purpose register into the double register.
+ __ movq(kScratchRegister, i.MemoryOperand());
+ __ andq(kScratchRegister, kSpeculationPoisonRegister);
+ __ Movq(i.OutputDoubleRegister(), kScratchRegister);
+ } else {
+ __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
+ }
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ Movsd(operand, i.InputDoubleRegister(index));
+ }
+ break;
+ }
+ case kX64Movdqu: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ if (instr->HasOutput()) {
+ __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ movdqu(operand, i.InputSimd128Register(index));
+ }
+ break;
+ }
+ case kX64BitcastFI:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ movl(i.OutputRegister(), i.InputOperand(0));
+ } else {
+ __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+ }
+ break;
+ case kX64BitcastDL:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ movq(i.OutputRegister(), i.InputOperand(0));
+ } else {
+ __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
+ }
+ break;
+ case kX64BitcastIF:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64BitcastLD:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kX64Lea32: {
+ AddressingMode mode = AddressingModeField::decode(instr->opcode());
+ // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
+ // and addressing mode just happens to work out. The "addl"/"subl" forms
+ // in these cases are faster based on measurements.
+ if (i.InputRegister(0) == i.OutputRegister()) {
+ if (mode == kMode_MRI) {
+ int32_t constant_summand = i.InputInt32(1);
+ DCHECK_NE(0, constant_summand);
+ if (constant_summand > 0) {
+ __ addl(i.OutputRegister(), Immediate(constant_summand));
+ } else {
+ __ subl(i.OutputRegister(),
+ Immediate(base::NegateWithWraparound(constant_summand)));
+ }
+ } else if (mode == kMode_MR1) {
+ if (i.InputRegister(1) == i.OutputRegister()) {
+ __ shll(i.OutputRegister(), Immediate(1));
+ } else {
+ __ addl(i.OutputRegister(), i.InputRegister(1));
+ }
+ } else if (mode == kMode_M2) {
+ __ shll(i.OutputRegister(), Immediate(1));
+ } else if (mode == kMode_M4) {
+ __ shll(i.OutputRegister(), Immediate(2));
+ } else if (mode == kMode_M8) {
+ __ shll(i.OutputRegister(), Immediate(3));
+ } else {
+ __ leal(i.OutputRegister(), i.MemoryOperand());
+ }
+ } else if (mode == kMode_MR1 &&
+ i.InputRegister(1) == i.OutputRegister()) {
+ __ addl(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ leal(i.OutputRegister(), i.MemoryOperand());
+ }
+ __ AssertZeroExtended(i.OutputRegister());
+ break;
+ }
+ case kX64Lea: {
+ AddressingMode mode = AddressingModeField::decode(instr->opcode());
+ // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
+ // and addressing mode just happens to work out. The "addq"/"subq" forms
+ // in these cases are faster based on measurements.
+ if (i.InputRegister(0) == i.OutputRegister()) {
+ if (mode == kMode_MRI) {
+ int32_t constant_summand = i.InputInt32(1);
+ if (constant_summand > 0) {
+ __ addq(i.OutputRegister(), Immediate(constant_summand));
+ } else if (constant_summand < 0) {
+ __ subq(i.OutputRegister(), Immediate(-constant_summand));
+ }
+ } else if (mode == kMode_MR1) {
+ if (i.InputRegister(1) == i.OutputRegister()) {
+ __ shlq(i.OutputRegister(), Immediate(1));
+ } else {
+ __ addq(i.OutputRegister(), i.InputRegister(1));
+ }
+ } else if (mode == kMode_M2) {
+ __ shlq(i.OutputRegister(), Immediate(1));
+ } else if (mode == kMode_M4) {
+ __ shlq(i.OutputRegister(), Immediate(2));
+ } else if (mode == kMode_M8) {
+ __ shlq(i.OutputRegister(), Immediate(3));
+ } else {
+ __ leaq(i.OutputRegister(), i.MemoryOperand());
+ }
+ } else if (mode == kMode_MR1 &&
+ i.InputRegister(1) == i.OutputRegister()) {
+ __ addq(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ leaq(i.OutputRegister(), i.MemoryOperand());
+ }
+ break;
+ }
+ case kX64Dec32:
+ __ decl(i.OutputRegister());
+ break;
+ case kX64Inc32:
+ __ incl(i.OutputRegister());
+ break;
+ case kX64Push:
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ pushq(operand);
+ frame_access_state()->IncreaseSPDelta(1);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ } else if (HasImmediateInput(instr, 0)) {
+ __ pushq(i.InputImmediate(0));
+ frame_access_state()->IncreaseSPDelta(1);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ } else if (instr->InputAt(0)->IsRegister()) {
+ __ pushq(i.InputRegister(0));
+ frame_access_state()->IncreaseSPDelta(1);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ } else if (instr->InputAt(0)->IsFloatRegister() ||
+ instr->InputAt(0)->IsDoubleRegister()) {
+ // TODO(titzer): use another machine instruction?
+ __ subq(rsp, Immediate(kDoubleSize));
+ frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kDoubleSize);
+ __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
+ } else if (instr->InputAt(0)->IsSimd128Register()) {
+ // TODO(titzer): use another machine instruction?
+ __ subq(rsp, Immediate(kSimd128Size));
+ frame_access_state()->IncreaseSPDelta(kSimd128Size /
+ kSystemPointerSize);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSimd128Size);
+ __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
+ } else if (instr->InputAt(0)->IsStackSlot() ||
+ instr->InputAt(0)->IsFloatStackSlot() ||
+ instr->InputAt(0)->IsDoubleStackSlot()) {
+ __ pushq(i.InputOperand(0));
+ frame_access_state()->IncreaseSPDelta(1);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ } else {
+ DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
+ __ Movups(kScratchDoubleReg, i.InputOperand(0));
+ // TODO(titzer): use another machine instruction?
+ __ subq(rsp, Immediate(kSimd128Size));
+ frame_access_state()->IncreaseSPDelta(kSimd128Size /
+ kSystemPointerSize);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSimd128Size);
+ __ Movups(Operand(rsp, 0), kScratchDoubleReg);
+ }
+ break;
+ case kX64Poke: {
+ int slot = MiscField::decode(instr->opcode());
+ if (HasImmediateInput(instr, 0)) {
+ __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
+ } else {
+ __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
+ }
+ break;
+ }
+ case kX64Peek: {
+ int reverse_slot = i.InputInt32(0);
+ int offset =
+ FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+ if (instr->OutputAt(0)->IsFPRegister()) {
+ LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+ if (op->representation() == MachineRepresentation::kFloat64) {
+ __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
+ } else {
+ DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
+ __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
+ }
+ } else {
+ __ movq(i.OutputRegister(), Operand(rbp, offset));
+ }
+ break;
+ }
+ // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
+ case kX64F32x4Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ movss(dst, i.InputDoubleRegister(0));
+ } else {
+ __ movss(dst, i.InputOperand(0));
+ }
+ __ shufps(dst, dst, 0x0);
+ break;
+ }
+ case kX64F32x4ExtractLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
+ __ movd(i.OutputDoubleRegister(), kScratchRegister);
+ break;
+ }
+ case kX64F32x4ReplaceLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ // The insertps instruction uses imm8[5:4] to indicate the lane
+ // that needs to be replaced.
+ byte select = i.InputInt8(1) << 4 & 0x30;
+ __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
+ break;
+ }
+ case kX64F32x4SConvertI32x4: {
+ __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64F32x4UConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
+ __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
+ __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
+ __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
+ __ psrld(dst, 1); // divide by 2 to get in unsigned range
+ __ cvtdq2ps(dst, dst); // convert hi exactly
+ __ addps(dst, dst); // double hi, exactly
+ __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
+ break;
+ }
+ case kX64F32x4Abs: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1);
+ __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ psrld(dst, 1);
+ __ andps(dst, i.InputSimd128Register(0));
+ }
+ break;
+ }
+ case kX64F32x4Neg: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pslld(kScratchDoubleReg, 31);
+ __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ pslld(dst, 31);
+ __ xorps(dst, i.InputSimd128Register(0));
+ }
+ break;
+ }
+ case kX64F32x4RecipApprox: {
+ __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64F32x4RecipSqrtApprox: {
+ __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64F32x4Add: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4AddHoriz: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE3);
+ __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Sub: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Mul: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Min: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Max: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Eq: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
+ break;
+ }
+ case kX64F32x4Ne: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
+ break;
+ }
+ case kX64F32x4Lt: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F32x4Le: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ movd(dst, i.InputRegister(0));
+ __ pshufd(dst, dst, 0x0);
+ break;
+ }
+ case kX64I32x4ExtractLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
+ break;
+ }
+ case kX64I32x4ReplaceLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ if (instr->InputAt(2)->IsRegister()) {
+ __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
+ i.InputInt8(1));
+ } else {
+ __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ }
+ break;
+ }
+ case kX64I32x4SConvertF32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ // NAN->0
+ __ movaps(kScratchDoubleReg, dst);
+ __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
+ __ pand(dst, kScratchDoubleReg);
+ // Set top bit if >= 0 (but not -0.0!)
+ __ pxor(kScratchDoubleReg, dst);
+ // Convert
+ __ cvttps2dq(dst, dst);
+ // Set top bit if >=0 is now < 0
+ __ pand(kScratchDoubleReg, dst);
+ __ psrad(kScratchDoubleReg, 31);
+ // Set positive overflow lanes to 0x7FFFFFFF
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I32x4SConvertI16x8Low: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64I32x4SConvertI16x8High: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ palignr(dst, i.InputSimd128Register(0), 8);
+ __ pmovsxwd(dst, dst);
+ break;
+ }
+ case kX64I32x4Neg: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psignd(dst, kScratchDoubleReg);
+ } else {
+ __ pxor(dst, dst);
+ __ psubd(dst, src);
+ }
+ break;
+ }
+ case kX64I32x4Shl: {
+ __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I32x4ShrS: {
+ __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I32x4Add: {
+ __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4AddHoriz: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4Sub: {
+ __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4Mul: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4MinS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4MaxS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4Eq: {
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4Ne: {
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kX64I32x4GtS: {
+ __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4GeS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminsd(dst, src);
+ __ pcmpeqd(dst, src);
+ break;
+ }
+ case kX64I32x4UConvertF32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+ // NAN->0, negative->0
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ __ maxps(dst, kScratchDoubleReg);
+ // scratch: float representation of max_signed
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
+ __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
+ // tmp: convert (src-max_signed).
+ // Positive overflow lanes -> 0x7FFFFFFF
+ // Negative lanes -> 0
+ __ movaps(tmp, dst);
+ __ subps(tmp, kScratchDoubleReg);
+ __ cmpleps(kScratchDoubleReg, tmp);
+ __ cvttps2dq(tmp, tmp);
+ __ pxor(tmp, kScratchDoubleReg);
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ __ pmaxsd(tmp, kScratchDoubleReg);
+ // convert. Overflow lanes above max_signed will be 0x80000000
+ __ cvttps2dq(dst, dst);
+ // Add (src-max_signed) for overflow lanes.
+ __ paddd(dst, tmp);
+ break;
+ }
+ case kX64I32x4UConvertI16x8Low: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64I32x4UConvertI16x8High: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ palignr(dst, i.InputSimd128Register(0), 8);
+ __ pmovzxwd(dst, dst);
+ break;
+ }
+ case kX64I32x4ShrU: {
+ __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I32x4MinU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4MaxU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I32x4GtU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pmaxud(dst, src);
+ __ pcmpeqd(dst, src);
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I32x4GeU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminud(dst, src);
+ __ pcmpeqd(dst, src);
+ break;
+ }
+ case kX64S128Zero: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ xorps(dst, dst);
+ break;
+ }
+ case kX64I16x8Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ movd(dst, i.InputRegister(0));
+ __ pshuflw(dst, dst, 0x0);
+ __ pshufd(dst, dst, 0x0);
+ break;
+ }
+ case kX64I16x8ExtractLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ Register dst = i.OutputRegister();
+ __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
+ __ movsxwl(dst, dst);
+ break;
+ }
+ case kX64I16x8ReplaceLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ if (instr->InputAt(2)->IsRegister()) {
+ __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
+ i.InputInt8(1));
+ } else {
+ __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ }
+ break;
+ }
+ case kX64I16x8SConvertI8x16Low: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64I16x8SConvertI8x16High: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ palignr(dst, i.InputSimd128Register(0), 8);
+ __ pmovsxbw(dst, dst);
+ break;
+ }
+ case kX64I16x8Neg: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psignw(dst, kScratchDoubleReg);
+ } else {
+ __ pxor(dst, dst);
+ __ psubw(dst, src);
+ }
+ break;
+ }
+ case kX64I16x8Shl: {
+ __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I16x8ShrS: {
+ __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I16x8SConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8Add: {
+ __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8AddSaturateS: {
+ __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8AddHoriz: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8Sub: {
+ __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8SubSaturateS: {
+ __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8Mul: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8MinS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8MaxS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8Eq: {
+ __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8Ne: {
+ __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kX64I16x8GtS: {
+ __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8GeS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminsw(dst, src);
+ __ pcmpeqw(dst, src);
+ break;
+ }
+ case kX64I16x8UConvertI8x16Low: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kX64I16x8UConvertI8x16High: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ palignr(dst, i.InputSimd128Register(0), 8);
+ __ pmovzxbw(dst, dst);
+ break;
+ }
+ case kX64I16x8ShrU: {
+ __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kX64I16x8UConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFFFFFF
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1);
+ __ pminud(dst, kScratchDoubleReg);
+ __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ packusdw(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I16x8AddSaturateU: {
+ __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8SubSaturateU: {
+ __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8MinU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8MaxU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I16x8GtU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pmaxuw(dst, src);
+ __ pcmpeqw(dst, src);
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I16x8GeU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminuw(dst, src);
+ __ pcmpeqw(dst, src);
+ break;
+ }
+ case kX64I8x16Splat: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ movd(dst, i.InputRegister(0));
+ __ xorps(kScratchDoubleReg, kScratchDoubleReg);
+ __ pshufb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16ExtractLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ Register dst = i.OutputRegister();
+ __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
+ __ movsxbl(dst, dst);
+ break;
+ }
+ case kX64I8x16ReplaceLane: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ if (instr->InputAt(2)->IsRegister()) {
+ __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
+ i.InputInt8(1));
+ } else {
+ __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ }
+ break;
+ }
+ case kX64I8x16SConvertI16x8: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16Neg: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psignb(dst, kScratchDoubleReg);
+ } else {
+ __ pxor(dst, dst);
+ __ psubb(dst, src);
+ }
+ break;
+ }
+ case kX64I8x16Shl: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ int8_t shift = i.InputInt8(1) & 0x7;
+ if (shift < 4) {
+ // For small shifts, doubling is faster.
+ for (int i = 0; i < shift; ++i) {
+ __ paddb(dst, dst);
+ }
+ } else {
+ // Mask off the unwanted bits before word-shifting.
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlw(kScratchDoubleReg, 8 + shift);
+ __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pand(dst, kScratchDoubleReg);
+ __ psllw(dst, shift);
+ }
+ break;
+ }
+ case kX64I8x16ShrS: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t shift = i.InputInt8(1) & 0x7;
+ // Unpack the bytes into words, do arithmetic shifts, and repack.
+ __ punpckhbw(kScratchDoubleReg, src);
+ __ punpcklbw(dst, src);
+ __ psraw(kScratchDoubleReg, 8 + shift);
+ __ psraw(dst, 8 + shift);
+ __ packsswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16Add: {
+ __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16AddSaturateS: {
+ __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16Sub: {
+ __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16SubSaturateS: {
+ __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16Mul: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ XMMRegister right = i.InputSimd128Register(1);
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+ // I16x8 view of I8x16
+ // left = AAaa AAaa ... AAaa AAaa
+ // right= BBbb BBbb ... BBbb BBbb
+ // t = 00AA 00AA ... 00AA 00AA
+ // s = 00BB 00BB ... 00BB 00BB
+ __ movaps(tmp, dst);
+ __ movaps(kScratchDoubleReg, right);
+ __ psrlw(tmp, 8);
+ __ psrlw(kScratchDoubleReg, 8);
+ // dst = left * 256
+ __ psllw(dst, 8);
+ // t = I16x8Mul(t, s)
+ // => __PP __PP ... __PP __PP
+ __ pmullw(tmp, kScratchDoubleReg);
+ // dst = I16x8Mul(left * 256, right)
+ // => pp__ pp__ ... pp__ pp__
+ __ pmullw(dst, right);
+ // t = I16x8Shl(t, 8)
+ // => PP00 PP00 ... PP00 PP00
+ __ psllw(tmp, 8);
+ // dst = I16x8Shr(dst, 8)
+ // => 00pp 00pp ... 00pp 00pp
+ __ psrlw(dst, 8);
+ // dst = I16x8Or(dst, t)
+ // => PPpp PPpp ... PPpp PPpp
+ __ por(dst, tmp);
+ break;
+ }
+ case kX64I8x16MinS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16MaxS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16Eq: {
+ __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16Ne: {
+ __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16GtS: {
+ __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16GeS: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminsb(dst, src);
+ __ pcmpeqb(dst, src);
+ break;
+ }
+ case kX64I8x16UConvertI16x8: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFF
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlw(kScratchDoubleReg, 1);
+ __ pminuw(dst, kScratchDoubleReg);
+ __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ packuswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16ShrU: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t shift = i.InputInt8(1) & 0x7;
+ // Unpack the bytes into words, do logical shifts, and repack.
+ __ punpckhbw(kScratchDoubleReg, src);
+ __ punpcklbw(dst, src);
+ __ psrlw(kScratchDoubleReg, 8 + shift);
+ __ psrlw(dst, 8 + shift);
+ __ packuswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16AddSaturateU: {
+ __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16SubSaturateU: {
+ __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16MinU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16MaxU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64I8x16GtU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pmaxub(dst, src);
+ __ pcmpeqb(dst, src);
+ __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64I8x16GeU: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(1);
+ __ pminub(dst, src);
+ __ pcmpeqb(dst, src);
+ break;
+ }
+ case kX64S128And: {
+ __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64S128Or: {
+ __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64S128Xor: {
+ __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64S128Not: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ if (dst == src) {
+ __ movaps(kScratchDoubleReg, dst);
+ __ pcmpeqd(dst, dst);
+ __ pxor(dst, kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ pxor(dst, src);
+ }
+
+ break;
+ }
+ case kX64S128Select: {
+ // Mask used here is stored in dst.
+ XMMRegister dst = i.OutputSimd128Register();
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+ __ andps(dst, kScratchDoubleReg);
+ __ xorps(dst, i.InputSimd128Register(2));
+ break;
+ }
+ case kX64S8x16Shuffle: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Register tmp = i.TempRegister(0);
+ // Prepare 16 byte aligned buffer for shuffle control mask
+ __ movq(tmp, rsp);
+ __ andq(rsp, Immediate(-16));
+ if (instr->InputCount() == 5) { // only one input operand
+ uint32_t mask[4] = {};
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ for (int j = 4; j > 0; j--) {
+ mask[j - 1] = i.InputUint32(j);
+ }
+
+ SetupShuffleMaskOnStack(tasm(), mask);
+ __ pshufb(dst, Operand(rsp, 0));
+ } else { // two input operands
+ DCHECK_EQ(6, instr->InputCount());
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
+ uint32_t mask[4] = {};
+ for (int j = 5; j > 1; j--) {
+ uint32_t lanes = i.InputUint32(j);
+ for (int k = 0; k < 32; k += 8) {
+ uint8_t lane = lanes >> k;
+ mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
+ }
+ }
+ SetupShuffleMaskOnStack(tasm(), mask);
+ __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
+ uint32_t mask1[4] = {};
+ if (instr->InputAt(1)->IsSimd128Register()) {
+ XMMRegister src1 = i.InputSimd128Register(1);
+ if (src1 != dst) __ movups(dst, src1);
+ } else {
+ __ movups(dst, i.InputOperand(1));
+ }
+ for (int j = 5; j > 1; j--) {
+ uint32_t lanes = i.InputUint32(j);
+ for (int k = 0; k < 32; k += 8) {
+ uint8_t lane = lanes >> k;
+ mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
+ }
+ }
+ SetupShuffleMaskOnStack(tasm(), mask1);
+ __ pshufb(dst, Operand(rsp, 0));
+ __ por(dst, kScratchDoubleReg);
+ }
+ __ movq(rsp, tmp);
+ break;
+ }
+ case kX64S32x4Swizzle: {
+ DCHECK_EQ(2, instr->InputCount());
+ ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
+ i.InputInt8(1));
+ break;
+ }
+ case kX64S32x4Shuffle: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
+ int8_t shuffle = i.InputInt8(2);
+ DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
+ ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
+ ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
+ __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
+ break;
+ }
+ case kX64S16x8Blend: {
+ ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
+ break;
+ }
+ case kX64S16x8HalfShuffle1: {
+ XMMRegister dst = i.OutputSimd128Register();
+ ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
+ __ pshufhw(dst, dst, i.InputInt8(2));
+ break;
+ }
+ case kX64S16x8HalfShuffle2: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
+ __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
+ ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
+ __ pshufhw(dst, dst, i.InputInt8(3));
+ __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
+ break;
+ }
+ case kX64S8x16Alignr: {
+ ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
+ break;
+ }
+ case kX64S16x8Dup: {
+ XMMRegister dst = i.OutputSimd128Register();
+ int8_t lane = i.InputInt8(1) & 0x7;
+ int8_t lane4 = lane & 0x3;
+ int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+ if (lane < 4) {
+ ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
+ __ pshufd(dst, dst, 0);
+ } else {
+ ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
+ __ pshufd(dst, dst, 0xaa);
+ }
+ break;
+ }
+ case kX64S8x16Dup: {
+ XMMRegister dst = i.OutputSimd128Register();
+ int8_t lane = i.InputInt8(1) & 0xf;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (lane < 8) {
+ __ punpcklbw(dst, dst);
+ } else {
+ __ punpckhbw(dst, dst);
+ }
+ lane &= 0x7;
+ int8_t lane4 = lane & 0x3;
+ int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+ if (lane < 4) {
+ __ pshuflw(dst, dst, half_dup);
+ __ pshufd(dst, dst, 0);
+ } else {
+ __ pshufhw(dst, dst, half_dup);
+ __ pshufd(dst, dst, 0xaa);
+ }
+ break;
+ }
+ case kX64S64x2UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
+ break;
+ case kX64S32x4UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
+ break;
+ case kX64S16x8UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
+ break;
+ case kX64S8x16UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
+ break;
+ case kX64S64x2UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
+ break;
+ case kX64S32x4UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
+ break;
+ case kX64S16x8UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
+ break;
+ case kX64S8x16UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
+ break;
+ case kX64S16x8UnzipHigh: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
+ __ psrld(kScratchDoubleReg, 16);
+ src2 = kScratchDoubleReg;
+ }
+ __ psrld(dst, 16);
+ __ packusdw(dst, src2);
+ break;
+ }
+ case kX64S16x8UnzipLow: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ if (instr->InputCount() == 2) {
+ ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
+ src2 = kScratchDoubleReg;
+ }
+ __ pblendw(dst, kScratchDoubleReg, 0xaa);
+ __ packusdw(dst, src2);
+ break;
+ }
+ case kX64S8x16UnzipHigh: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
+ __ psrlw(kScratchDoubleReg, 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ psrlw(dst, 8);
+ __ packuswb(dst, src2);
+ break;
+ }
+ case kX64S8x16UnzipLow: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
+ __ psllw(kScratchDoubleReg, 8);
+ __ psrlw(kScratchDoubleReg, 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ psllw(dst, 8);
+ __ psrlw(dst, 8);
+ __ packuswb(dst, src2);
+ break;
+ }
+ case kX64S8x16TransposeLow: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ psllw(dst, 8);
+ if (instr->InputCount() == 1) {
+ __ movups(kScratchDoubleReg, dst);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
+ __ psllw(kScratchDoubleReg, 8);
+ }
+ __ psrlw(dst, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64S8x16TransposeHigh: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ psrlw(dst, 8);
+ if (instr->InputCount() == 1) {
+ __ movups(kScratchDoubleReg, dst);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
+ __ psrlw(kScratchDoubleReg, 8);
+ }
+ __ psllw(kScratchDoubleReg, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64S8x8Reverse:
+ case kX64S8x4Reverse:
+ case kX64S8x2Reverse: {
+ DCHECK_EQ(1, instr->InputCount());
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (arch_opcode != kX64S8x2Reverse) {
+ // First shuffle words into position.
+ int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
+ __ pshuflw(dst, dst, shuffle_mask);
+ __ pshufhw(dst, dst, shuffle_mask);
+ }
+ __ movaps(kScratchDoubleReg, dst);
+ __ psrlw(kScratchDoubleReg, 8);
+ __ psllw(dst, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kX64S1x4AnyTrue:
+ case kX64S1x8AnyTrue:
+ case kX64S1x16AnyTrue: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ Register dst = i.OutputRegister();
+ XMMRegister src = i.InputSimd128Register(0);
+ Register tmp = i.TempRegister(0);
+ __ xorq(tmp, tmp);
+ __ movq(dst, Immediate(1));
+ __ ptest(src, src);
+ __ cmovq(zero, dst, tmp);
+ break;
+ }
+ case kX64S1x4AllTrue:
+ case kX64S1x8AllTrue:
+ case kX64S1x16AllTrue: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ Register dst = i.OutputRegister();
+ XMMRegister src = i.InputSimd128Register(0);
+ Register tmp = i.TempRegister(0);
+ __ movq(tmp, Immediate(1));
+ __ xorq(dst, dst);
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(kScratchDoubleReg, src);
+ __ ptest(kScratchDoubleReg, kScratchDoubleReg);
+ __ cmovq(zero, dst, tmp);
+ break;
+ }
+ case kX64StackCheck:
+ __ CompareRoot(rsp, RootIndex::kStackLimit);
+ break;
+ case kWord32AtomicExchangeInt8: {
+ __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+ __ movsxbl(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeUint8: {
+ __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzxbl(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeInt16: {
+ __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+ __ movsxwl(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeUint16: {
+ __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzxwl(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeWord32: {
+ __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
+ break;
+ }
+ case kWord32AtomicCompareExchangeInt8: {
+ __ lock();
+ __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+ __ movsxbl(rax, rax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeUint8: {
+ __ lock();
+ __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzxbl(rax, rax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeInt16: {
+ __ lock();
+ __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+ __ movsxwl(rax, rax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeUint16: {
+ __ lock();
+ __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzxwl(rax, rax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeWord32: {
+ __ lock();
+ __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
+ break;
+ }
+#define ATOMIC_BINOP_CASE(op, inst) \
+ case kWord32Atomic##op##Int8: \
+ ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
+ __ movsxbl(rax, rax); \
+ break; \
+ case kWord32Atomic##op##Uint8: \
+ ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
+ __ movzxbl(rax, rax); \
+ break; \
+ case kWord32Atomic##op##Int16: \
+ ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
+ __ movsxwl(rax, rax); \
+ break; \
+ case kWord32Atomic##op##Uint16: \
+ ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
+ __ movzxwl(rax, rax); \
+ break; \
+ case kWord32Atomic##op##Word32: \
+ ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
+ break;
+ ATOMIC_BINOP_CASE(Add, addl)
+ ATOMIC_BINOP_CASE(Sub, subl)
+ ATOMIC_BINOP_CASE(And, andl)
+ ATOMIC_BINOP_CASE(Or, orl)
+ ATOMIC_BINOP_CASE(Xor, xorl)
+#undef ATOMIC_BINOP_CASE
+ case kX64Word64AtomicExchangeUint8: {
+ __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzxbq(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kX64Word64AtomicExchangeUint16: {
+ __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzxwq(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kX64Word64AtomicExchangeUint32: {
+ __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
+ break;
+ }
+ case kX64Word64AtomicExchangeUint64: {
+ __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
+ break;
+ }
+ case kX64Word64AtomicCompareExchangeUint8: {
+ __ lock();
+ __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzxbq(rax, rax);
+ break;
+ }
+ case kX64Word64AtomicCompareExchangeUint16: {
+ __ lock();
+ __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzxwq(rax, rax);
+ break;
+ }
+ case kX64Word64AtomicCompareExchangeUint32: {
+ __ lock();
+ __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
+ break;
+ }
+ case kX64Word64AtomicCompareExchangeUint64: {
+ __ lock();
+ __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
+ break;
+ }
+#define ATOMIC64_BINOP_CASE(op, inst) \
+ case kX64Word64Atomic##op##Uint8: \
+ ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
+ __ movzxbq(rax, rax); \
+ break; \
+ case kX64Word64Atomic##op##Uint16: \
+ ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
+ __ movzxwq(rax, rax); \
+ break; \
+ case kX64Word64Atomic##op##Uint32: \
+ ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
+ break; \
+ case kX64Word64Atomic##op##Uint64: \
+ ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
+ break;
+ ATOMIC64_BINOP_CASE(Add, addq)
+ ATOMIC64_BINOP_CASE(Sub, subq)
+ ATOMIC64_BINOP_CASE(And, andq)
+ ATOMIC64_BINOP_CASE(Or, orq)
+ ATOMIC64_BINOP_CASE(Xor, xorq)
+#undef ATOMIC64_BINOP_CASE
+ case kWord32AtomicLoadInt8:
+ case kWord32AtomicLoadUint8:
+ case kWord32AtomicLoadInt16:
+ case kWord32AtomicLoadUint16:
+ case kWord32AtomicLoadWord32:
+ case kWord32AtomicStoreWord8:
+ case kWord32AtomicStoreWord16:
+ case kWord32AtomicStoreWord32:
+ case kX64Word64AtomicLoadUint8:
+ case kX64Word64AtomicLoadUint16:
+ case kX64Word64AtomicLoadUint32:
+ case kX64Word64AtomicLoadUint64:
+ case kX64Word64AtomicStoreWord8:
+ case kX64Word64AtomicStoreWord16:
+ case kX64Word64AtomicStoreWord32:
+ case kX64Word64AtomicStoreWord64:
+ UNREACHABLE(); // Won't be generated by instruction selector.
+ break;
+ }
+ return kSuccess;
+} // NOLadability/fn_size)
+
+#undef ASSEMBLE_UNOP
+#undef ASSEMBLE_BINOP
+#undef ASSEMBLE_COMPARE
+#undef ASSEMBLE_MULT
+#undef ASSEMBLE_SHIFT
+#undef ASSEMBLE_MOVX
+#undef ASSEMBLE_SSE_BINOP
+#undef ASSEMBLE_SSE_UNOP
+#undef ASSEMBLE_AVX_BINOP
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_ATOMIC64_BINOP
+#undef ASSEMBLE_SIMD_INSTR
+#undef ASSEMBLE_SIMD_IMM_INSTR
+#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
+#undef ASSEMBLE_SIMD_IMM_SHUFFLE
+
+namespace {
+
+Condition FlagsConditionToCondition(FlagsCondition condition) {
+ switch (condition) {
+ case kUnorderedEqual:
+ case kEqual:
+ return equal;
+ case kUnorderedNotEqual:
+ case kNotEqual:
+ return not_equal;
+ case kSignedLessThan:
+ return less;
+ case kSignedGreaterThanOrEqual:
+ return greater_equal;
+ case kSignedLessThanOrEqual:
+ return less_equal;
+ case kSignedGreaterThan:
+ return greater;
+ case kUnsignedLessThan:
+ return below;
+ case kUnsignedGreaterThanOrEqual:
+ return above_equal;
+ case kUnsignedLessThanOrEqual:
+ return below_equal;
+ case kUnsignedGreaterThan:
+ return above;
+ case kOverflow:
+ return overflow;
+ case kNotOverflow:
+ return no_overflow;
+ default:
+ break;
+ }
+ UNREACHABLE();
+}
+
+} // namespace
+
+// Assembles branches after this instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+ Label::Distance flabel_distance =
+ branch->fallthru ? Label::kNear : Label::kFar;
+ Label* tlabel = branch->true_label;
+ Label* flabel = branch->false_label;
+ if (branch->condition == kUnorderedEqual) {
+ __ j(parity_even, flabel, flabel_distance);
+ } else if (branch->condition == kUnorderedNotEqual) {
+ __ j(parity_even, tlabel);
+ }
+ __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+ if (!branch->fallthru) __ jmp(flabel, flabel_distance);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+ Instruction* instr) {
+ // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+ if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+ return;
+ }
+
+ condition = NegateFlagsCondition(condition);
+ __ movl(kScratchRegister, Immediate(0));
+ __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
+ kScratchRegister);
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+ BranchInfo* branch) {
+ Label::Distance flabel_distance =
+ branch->fallthru ? Label::kNear : Label::kFar;
+ Label* tlabel = branch->true_label;
+ Label* flabel = branch->false_label;
+ Label nodeopt;
+ if (branch->condition == kUnorderedEqual) {
+ __ j(parity_even, flabel, flabel_distance);
+ } else if (branch->condition == kUnorderedNotEqual) {
+ __ j(parity_even, tlabel);
+ }
+ __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+ if (FLAG_deopt_every_n_times > 0) {
+ ExternalReference counter =
+ ExternalReference::stress_deopt_count(isolate());
+
+ __ pushfq();
+ __ pushq(rax);
+ __ load_rax(counter);
+ __ decl(rax);
+ __ j(not_zero, &nodeopt);
+
+ __ Set(rax, FLAG_deopt_every_n_times);
+ __ store_rax(counter);
+ __ popq(rax);
+ __ popfq();
+ __ jmp(tlabel);
+
+ __ bind(&nodeopt);
+ __ store_rax(counter);
+ __ popq(rax);
+ __ popfq();
+ }
+
+ if (!branch->fallthru) {
+ __ jmp(flabel, flabel_distance);
+ }
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+ if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+ FlagsCondition condition) {
+ auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
+ Label* tlabel = ool->entry();
+ Label end;
+ if (condition == kUnorderedEqual) {
+ __ j(parity_even, &end);
+ } else if (condition == kUnorderedNotEqual) {
+ __ j(parity_even, tlabel);
+ }
+ __ j(FlagsConditionToCondition(condition), tlabel);
+ __ bind(&end);
+}
+
+// Assembles boolean materializations after this instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+ FlagsCondition condition) {
+ X64OperandConverter i(this, instr);
+ Label done;
+
+ // Materialize a full 64-bit 1 or 0 value. The result register is always the
+ // last output of the instruction.
+ Label check;
+ DCHECK_NE(0u, instr->OutputCount());
+ Register reg = i.OutputRegister(instr->OutputCount() - 1);
+ if (condition == kUnorderedEqual) {
+ __ j(parity_odd, &check, Label::kNear);
+ __ movl(reg, Immediate(0));
+ __ jmp(&done, Label::kNear);
+ } else if (condition == kUnorderedNotEqual) {
+ __ j(parity_odd, &check, Label::kNear);
+ __ movl(reg, Immediate(1));
+ __ jmp(&done, Label::kNear);
+ }
+ __ bind(&check);
+ __ setcc(FlagsConditionToCondition(condition), reg);
+ __ movzxbl(reg, reg);
+ __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+ X64OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ std::vector<std::pair<int32_t, Label*>> cases;
+ for (size_t index = 2; index < instr->InputCount(); index += 2) {
+ cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+ }
+ AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+ cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
+ X64OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ for (size_t index = 2; index < instr->InputCount(); index += 2) {
+ __ cmpl(input, Immediate(i.InputInt32(index + 0)));
+ __ j(equal, GetLabel(i.InputRpo(index + 1)));
+ }
+ AssembleArchJump(i.InputRpo(1));
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+ X64OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
+ Label** cases = zone()->NewArray<Label*>(case_count);
+ for (int32_t index = 0; index < case_count; ++index) {
+ cases[index] = GetLabel(i.InputRpo(index + 2));
+ }
+ Label* const table = AddJumpTable(cases, case_count);
+ __ cmpl(input, Immediate(case_count));
+ __ j(above_equal, GetLabel(i.InputRpo(1)));
+ __ leaq(kScratchRegister, Operand(table));
+ __ jmp(Operand(kScratchRegister, input, times_8, 0));
+}
+
+namespace {
+
+static const int kQuadWordSize = 16;
+
+} // namespace
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+ const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+ if (saves_fp != 0) {
+ frame->AlignSavedCalleeRegisterSlots();
+ if (saves_fp != 0) { // Save callee-saved XMM registers.
+ const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+ frame->AllocateSavedCalleeRegisterSlots(
+ saves_fp_count * (kQuadWordSize / kSystemPointerSize));
+ }
+ }
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ if (saves != 0) { // Save callee-saved registers.
+ int count = 0;
+ for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+ if (((1 << i) & saves)) {
+ ++count;
+ }
+ }
+ frame->AllocateSavedCalleeRegisterSlots(count);
+ }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+ if (frame_access_state()->has_frame()) {
+ int pc_base = __ pc_offset();
+
+ if (call_descriptor->IsCFunctionCall()) {
+ __ pushq(rbp);
+ __ movq(rbp, rsp);
+ } else if (call_descriptor->IsJSFunctionCall()) {
+ __ Prologue();
+ if (call_descriptor->PushArgumentCount()) {
+ __ pushq(kJavaScriptCallArgCountRegister);
+ }
+ } else {
+ __ StubPrologue(info()->GetOutputStackFrameType());
+ if (call_descriptor->IsWasmFunctionCall()) {
+ __ pushq(kWasmInstanceRegister);
+ } else if (call_descriptor->IsWasmImportWrapper()) {
+ // WASM import wrappers are passed a tuple in the place of the instance.
+ // Unpack the tuple into the instance and the target callable.
+ // This must be done here in the codegen because it cannot be expressed
+ // properly in the graph.
+ __ LoadTaggedPointerField(
+ kJSFunctionRegister,
+ FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+ __ LoadTaggedPointerField(
+ kWasmInstanceRegister,
+ FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+ __ pushq(kWasmInstanceRegister);
+ }
+ }
+
+ unwinding_info_writer_.MarkFrameConstructed(pc_base);
+ }
+ int shrink_slots = frame()->GetTotalFrameSlotCount() -
+ call_descriptor->CalculateFixedFrameSize();
+
+ if (info()->is_osr()) {
+ // TurboFan OSR-compiled functions cannot be entered directly.
+ __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+ // Unoptimized code jumps directly to this entrypoint while the unoptimized
+ // frame is still on the stack. Optimized code uses OSR values directly from
+ // the unoptimized frame. Thus, all that needs to be done is to allocate the
+ // remaining stack slots.
+ if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+ osr_pc_offset_ = __ pc_offset();
+ shrink_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
+ ResetSpeculationPoison();
+ }
+
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+
+ if (shrink_slots > 0) {
+ DCHECK(frame_access_state()->has_frame());
+ if (info()->IsWasm() && shrink_slots > 128) {
+ // For WebAssembly functions with big frames we have to do the stack
+ // overflow check before we construct the frame. Otherwise we may not
+ // have enough space on the stack to call the runtime for the stack
+ // overflow.
+ Label done;
+
+ // If the frame is bigger than the stack, we throw the stack overflow
+ // exception unconditionally. Thereby we can avoid the integer overflow
+ // check in the condition code.
+ if (shrink_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
+ __ movq(kScratchRegister,
+ FieldOperand(kWasmInstanceRegister,
+ WasmInstanceObject::kRealStackLimitAddressOffset));
+ __ movq(kScratchRegister, Operand(kScratchRegister, 0));
+ __ addq(kScratchRegister, Immediate(shrink_slots * kSystemPointerSize));
+ __ cmpq(rsp, kScratchRegister);
+ __ j(above_equal, &done);
+ }
+ __ LoadTaggedPointerField(
+ rcx, FieldOperand(kWasmInstanceRegister,
+ WasmInstanceObject::kCEntryStubOffset));
+ __ Move(rsi, Smi::zero());
+ __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
+ ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
+ RecordSafepoint(reference_map, Safepoint::kSimple,
+ Safepoint::kNoLazyDeopt);
+ __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+ __ bind(&done);
+ }
+
+ // Skip callee-saved and return slots, which are created below.
+ shrink_slots -= base::bits::CountPopulation(saves);
+ shrink_slots -= base::bits::CountPopulation(saves_fp) *
+ (kQuadWordSize / kSystemPointerSize);
+ shrink_slots -= frame()->GetReturnSlotCount();
+ if (shrink_slots > 0) {
+ __ subq(rsp, Immediate(shrink_slots * kSystemPointerSize));
+ }
+ }
+
+ if (saves_fp != 0) { // Save callee-saved XMM registers.
+ const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+ const int stack_size = saves_fp_count * kQuadWordSize;
+ // Adjust the stack pointer.
+ __ subp(rsp, Immediate(stack_size));
+ // Store the registers on the stack.
+ int slot_idx = 0;
+ for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
+ if (!((1 << i) & saves_fp)) continue;
+ __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
+ XMMRegister::from_code(i));
+ slot_idx++;
+ }
+ }
+
+ if (saves != 0) { // Save callee-saved registers.
+ for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+ if (!((1 << i) & saves)) continue;
+ __ pushq(Register::from_code(i));
+ }
+ }
+
+ // Allocate return slots (located after callee-saved).
+ if (frame()->GetReturnSlotCount() > 0) {
+ __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
+ }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+ // Restore registers.
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ if (saves != 0) {
+ const int returns = frame()->GetReturnSlotCount();
+ if (returns != 0) {
+ __ addq(rsp, Immediate(returns * kSystemPointerSize));
+ }
+ for (int i = 0; i < Register::kNumRegisters; i++) {
+ if (!((1 << i) & saves)) continue;
+ __ popq(Register::from_code(i));
+ }
+ }
+ const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+ if (saves_fp != 0) {
+ const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+ const int stack_size = saves_fp_count * kQuadWordSize;
+ // Load the registers from the stack.
+ int slot_idx = 0;
+ for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
+ if (!((1 << i) & saves_fp)) continue;
+ __ movdqu(XMMRegister::from_code(i),
+ Operand(rsp, kQuadWordSize * slot_idx));
+ slot_idx++;
+ }
+ // Adjust the stack pointer.
+ __ addp(rsp, Immediate(stack_size));
+ }
+
+ unwinding_info_writer_.MarkBlockWillExit();
+
+ // Might need rcx for scratch if pop_size is too big or if there is a variable
+ // pop count.
+ DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
+ DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
+ size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
+ X64OperandConverter g(this, nullptr);
+ if (call_descriptor->IsCFunctionCall()) {
+ AssembleDeconstructFrame();
+ } else if (frame_access_state()->has_frame()) {
+ if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
+ // Canonicalize JSFunction return sites for now.
+ if (return_label_.is_bound()) {
+ __ jmp(&return_label_);
+ return;
+ } else {
+ __ bind(&return_label_);
+ AssembleDeconstructFrame();
+ }
+ } else {
+ AssembleDeconstructFrame();
+ }
+ }
+
+ if (pop->IsImmediate()) {
+ pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
+ CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
+ __ Ret(static_cast<int>(pop_size), rcx);
+ } else {
+ Register pop_reg = g.ToRegister(pop);
+ Register scratch_reg = pop_reg == rcx ? rdx : rcx;
+ __ popq(scratch_reg);
+ __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
+ __ jmp(scratch_reg);
+ }
+}
+
+void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+ InstructionOperand* destination) {
+ X64OperandConverter g(this, nullptr);
+ // Helper function to write the given constant to the dst register.
+ auto MoveConstantToRegister = [&](Register dst, Constant src) {
+ switch (src.type()) {
+ case Constant::kInt32: {
+ if (RelocInfo::IsWasmReference(src.rmode())) {
+ __ movq(dst, src.ToInt64(), src.rmode());
+ } else {
+ int32_t value = src.ToInt32();
+ if (value == 0) {
+ __ xorl(dst, dst);
+ } else {
+ __ movl(dst, Immediate(value));
+ }
+ }
+ break;
+ }
+ case Constant::kInt64:
+ if (RelocInfo::IsWasmReference(src.rmode())) {
+ __ movq(dst, src.ToInt64(), src.rmode());
+ } else {
+ __ Set(dst, src.ToInt64());
+ }
+ break;
+ case Constant::kFloat32:
+ __ MoveNumber(dst, src.ToFloat32());
+ break;
+ case Constant::kFloat64:
+ __ MoveNumber(dst, src.ToFloat64().value());
+ break;
+ case Constant::kExternalReference:
+ __ Move(dst, src.ToExternalReference());
+ break;
+ case Constant::kHeapObject: {
+ Handle<HeapObject> src_object = src.ToHeapObject();
+ RootIndex index;
+ if (IsMaterializableFromRoot(src_object, &index)) {
+ __ LoadRoot(dst, index);
+ } else {
+ __ Move(dst, src_object);
+ }
+ break;
+ }
+ case Constant::kDelayedStringConstant: {
+ const StringConstantBase* src_constant = src.ToDelayedStringConstant();
+ __ MoveStringConstant(dst, src_constant);
+ break;
+ }
+ case Constant::kRpoNumber:
+ UNREACHABLE(); // TODO(dcarney): load of labels on x64.
+ break;
+ }
+ };
+ // Helper function to write the given constant to the stack.
+ auto MoveConstantToSlot = [&](Operand dst, Constant src) {
+ if (!RelocInfo::IsWasmReference(src.rmode())) {
+ switch (src.type()) {
+ case Constant::kInt32:
+ __ movq(dst, Immediate(src.ToInt32()));
+ return;
+ case Constant::kInt64:
+ __ Set(dst, src.ToInt64());
+ return;
+ default:
+ break;
+ }
+ }
+ MoveConstantToRegister(kScratchRegister, src);
+ __ movq(dst, kScratchRegister);
+ };
+ // Dispatch on the source and destination operand kinds.
+ switch (MoveType::InferMove(source, destination)) {
+ case MoveType::kRegisterToRegister:
+ if (source->IsRegister()) {
+ __ movq(g.ToRegister(destination), g.ToRegister(source));
+ } else {
+ DCHECK(source->IsFPRegister());
+ __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+ }
+ return;
+ case MoveType::kRegisterToStack: {
+ Operand dst = g.ToOperand(destination);
+ if (source->IsRegister()) {
+ __ movq(dst, g.ToRegister(source));
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep != MachineRepresentation::kSimd128) {
+ __ Movsd(dst, src);
+ } else {
+ __ Movups(dst, src);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToRegister: {
+ Operand src = g.ToOperand(source);
+ if (source->IsStackSlot()) {
+ __ movq(g.ToRegister(destination), src);
+ } else {
+ DCHECK(source->IsFPStackSlot());
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep != MachineRepresentation::kSimd128) {
+ __ Movsd(dst, src);
+ } else {
+ __ Movups(dst, src);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToStack: {
+ Operand src = g.ToOperand(source);
+ Operand dst = g.ToOperand(destination);
+ if (source->IsStackSlot()) {
+ // Spill on demand to use a temporary register for memory-to-memory
+ // moves.
+ __ movq(kScratchRegister, src);
+ __ movq(dst, kScratchRegister);
+ } else {
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep != MachineRepresentation::kSimd128) {
+ __ Movsd(kScratchDoubleReg, src);
+ __ Movsd(dst, kScratchDoubleReg);
+ } else {
+ DCHECK(source->IsSimd128StackSlot());
+ __ Movups(kScratchDoubleReg, src);
+ __ Movups(dst, kScratchDoubleReg);
+ }
+ }
+ return;
+ }
+ case MoveType::kConstantToRegister: {
+ Constant src = g.ToConstant(source);
+ if (destination->IsRegister()) {
+ MoveConstantToRegister(g.ToRegister(destination), src);
+ } else {
+ DCHECK(destination->IsFPRegister());
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ if (src.type() == Constant::kFloat32) {
+ // TODO(turbofan): Can we do better here?
+ __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
+ } else {
+ DCHECK_EQ(src.type(), Constant::kFloat64);
+ __ Move(dst, src.ToFloat64().AsUint64());
+ }
+ }
+ return;
+ }
+ case MoveType::kConstantToStack: {
+ Constant src = g.ToConstant(source);
+ Operand dst = g.ToOperand(destination);
+ if (destination->IsStackSlot()) {
+ MoveConstantToSlot(dst, src);
+ } else {
+ DCHECK(destination->IsFPStackSlot());
+ if (src.type() == Constant::kFloat32) {
+ __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
+ } else {
+ DCHECK_EQ(src.type(), Constant::kFloat64);
+ __ movq(kScratchRegister, src.ToFloat64().AsUint64());
+ __ movq(dst, kScratchRegister);
+ }
+ }
+ return;
+ }
+ }
+ UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+ InstructionOperand* destination) {
+ X64OperandConverter g(this, nullptr);
+ // Dispatch on the source and destination operand kinds. Not all
+ // combinations are possible.
+ switch (MoveType::InferSwap(source, destination)) {
+ case MoveType::kRegisterToRegister: {
+ if (source->IsRegister()) {
+ Register src = g.ToRegister(source);
+ Register dst = g.ToRegister(destination);
+ __ movq(kScratchRegister, src);
+ __ movq(src, dst);
+ __ movq(dst, kScratchRegister);
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ __ Movapd(kScratchDoubleReg, src);
+ __ Movapd(src, dst);
+ __ Movapd(dst, kScratchDoubleReg);
+ }
+ return;
+ }
+ case MoveType::kRegisterToStack: {
+ if (source->IsRegister()) {
+ Register src = g.ToRegister(source);
+ __ pushq(src);
+ frame_access_state()->IncreaseSPDelta(1);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ __ movq(src, g.ToOperand(destination));
+ frame_access_state()->IncreaseSPDelta(-1);
+ __ popq(g.ToOperand(destination));
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kSystemPointerSize);
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ Operand dst = g.ToOperand(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep != MachineRepresentation::kSimd128) {
+ __ Movsd(kScratchDoubleReg, src);
+ __ Movsd(src, dst);
+ __ Movsd(dst, kScratchDoubleReg);
+ } else {
+ __ Movups(kScratchDoubleReg, src);
+ __ Movups(src, dst);
+ __ Movups(dst, kScratchDoubleReg);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToStack: {
+ Operand src = g.ToOperand(source);
+ Operand dst = g.ToOperand(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep != MachineRepresentation::kSimd128) {
+ Register tmp = kScratchRegister;
+ __ movq(tmp, dst);
+ __ pushq(src); // Then use stack to copy src to destination.
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ __ popq(dst);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kSystemPointerSize);
+ __ movq(src, tmp);
+ } else {
+ // Without AVX, misaligned reads and writes will trap. Move using the
+ // stack, in two parts.
+ __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
+ __ pushq(src); // Then use stack to copy src to destination.
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ __ popq(dst);
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kSystemPointerSize);
+ __ pushq(g.ToOperand(source, kSystemPointerSize));
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ kSystemPointerSize);
+ __ popq(g.ToOperand(destination, kSystemPointerSize));
+ unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+ -kSystemPointerSize);
+ __ movups(src, kScratchDoubleReg);
+ }
+ return;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+ for (size_t index = 0; index < target_count; ++index) {
+ __ dq(targets[index]);
+ }
+}
+
+#undef __
+
+} // namespace compiler
+} // namespace internal
+} // namespace v8