summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc')
-rw-r--r--deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc4580
1 files changed, 4580 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
new file mode 100644
index 0000000000..9dc6e50e4e
--- /dev/null
+++ b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
@@ -0,0 +1,4580 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/code-generator.h"
+
+#include "src/assembler-inl.h"
+#include "src/base/overflowing-math.h"
+#include "src/callable.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/frame-constants.h"
+#include "src/frames.h"
+#include "src/heap/heap-inl.h" // crbug.com/v8/8499
+#include "src/ia32/assembler-ia32.h"
+#include "src/macro-assembler.h"
+#include "src/objects/smi.h"
+#include "src/optimized-compilation-info.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+#define kScratchDoubleReg xmm0
+
+// Adds IA-32 specific methods for decoding operands.
+class IA32OperandConverter : public InstructionOperandConverter {
+ public:
+ IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
+ : InstructionOperandConverter(gen, instr) {}
+
+ Operand InputOperand(size_t index, int extra = 0) {
+ return ToOperand(instr_->InputAt(index), extra);
+ }
+
+ Immediate InputImmediate(size_t index) {
+ return ToImmediate(instr_->InputAt(index));
+ }
+
+ Operand OutputOperand() { return ToOperand(instr_->Output()); }
+
+ Operand ToOperand(InstructionOperand* op, int extra = 0) {
+ if (op->IsRegister()) {
+ DCHECK_EQ(0, extra);
+ return Operand(ToRegister(op));
+ } else if (op->IsFPRegister()) {
+ DCHECK_EQ(0, extra);
+ return Operand(ToDoubleRegister(op));
+ }
+ DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+ return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
+ }
+
+ Operand SlotToOperand(int slot, int extra = 0) {
+ FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+ return Operand(offset.from_stack_pointer() ? esp : ebp,
+ offset.offset() + extra);
+ }
+
+ Immediate ToImmediate(InstructionOperand* operand) {
+ Constant constant = ToConstant(operand);
+ if (constant.type() == Constant::kInt32 &&
+ RelocInfo::IsWasmReference(constant.rmode())) {
+ return Immediate(static_cast<Address>(constant.ToInt32()),
+ constant.rmode());
+ }
+ switch (constant.type()) {
+ case Constant::kInt32:
+ return Immediate(constant.ToInt32());
+ case Constant::kFloat32:
+ return Immediate::EmbeddedNumber(constant.ToFloat32());
+ case Constant::kFloat64:
+ return Immediate::EmbeddedNumber(constant.ToFloat64().value());
+ case Constant::kExternalReference:
+ return Immediate(constant.ToExternalReference());
+ case Constant::kHeapObject:
+ return Immediate(constant.ToHeapObject());
+ case Constant::kDelayedStringConstant:
+ return Immediate::EmbeddedStringConstant(
+ constant.ToDelayedStringConstant());
+ case Constant::kInt64:
+ break;
+ case Constant::kRpoNumber:
+ return Immediate::CodeRelativeOffset(ToLabel(operand));
+ }
+ UNREACHABLE();
+ }
+
+ static size_t NextOffset(size_t* offset) {
+ size_t i = *offset;
+ (*offset)++;
+ return i;
+ }
+
+ static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
+ STATIC_ASSERT(0 == static_cast<int>(times_1));
+ STATIC_ASSERT(1 == static_cast<int>(times_2));
+ STATIC_ASSERT(2 == static_cast<int>(times_4));
+ STATIC_ASSERT(3 == static_cast<int>(times_8));
+ int scale = static_cast<int>(mode - one);
+ DCHECK(scale >= 0 && scale < 4);
+ return static_cast<ScaleFactor>(scale);
+ }
+
+ Operand MemoryOperand(size_t* offset) {
+ AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+ switch (mode) {
+ case kMode_MR: {
+ Register base = InputRegister(NextOffset(offset));
+ int32_t disp = 0;
+ return Operand(base, disp);
+ }
+ case kMode_MRI: {
+ Register base = InputRegister(NextOffset(offset));
+ Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+ return Operand(base, ctant.ToInt32(), ctant.rmode());
+ }
+ case kMode_MR1:
+ case kMode_MR2:
+ case kMode_MR4:
+ case kMode_MR8: {
+ Register base = InputRegister(NextOffset(offset));
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_MR1, mode);
+ int32_t disp = 0;
+ return Operand(base, index, scale, disp);
+ }
+ case kMode_MR1I:
+ case kMode_MR2I:
+ case kMode_MR4I:
+ case kMode_MR8I: {
+ Register base = InputRegister(NextOffset(offset));
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
+ Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+ return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
+ }
+ case kMode_M1:
+ case kMode_M2:
+ case kMode_M4:
+ case kMode_M8: {
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_M1, mode);
+ int32_t disp = 0;
+ return Operand(index, scale, disp);
+ }
+ case kMode_M1I:
+ case kMode_M2I:
+ case kMode_M4I:
+ case kMode_M8I: {
+ Register index = InputRegister(NextOffset(offset));
+ ScaleFactor scale = ScaleFor(kMode_M1I, mode);
+ Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+ return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
+ }
+ case kMode_MI: {
+ Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+ return Operand(ctant.ToInt32(), ctant.rmode());
+ }
+ case kMode_None:
+ UNREACHABLE();
+ }
+ UNREACHABLE();
+ }
+
+ Operand MemoryOperand(size_t first_input = 0) {
+ return MemoryOperand(&first_input);
+ }
+
+ Operand NextMemoryOperand(size_t offset = 0) {
+ AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+ Register base = InputRegister(NextOffset(&offset));
+ const int32_t disp = 4;
+ if (mode == kMode_MR1) {
+ Register index = InputRegister(NextOffset(&offset));
+ ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
+ return Operand(base, index, scale, disp);
+ } else if (mode == kMode_MRI) {
+ Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
+ return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
+ } else {
+ UNREACHABLE();
+ }
+ }
+
+ void MoveInstructionOperandToRegister(Register destination,
+ InstructionOperand* op) {
+ if (op->IsImmediate() || op->IsConstant()) {
+ gen_->tasm()->mov(destination, ToImmediate(op));
+ } else if (op->IsRegister()) {
+ gen_->tasm()->Move(destination, ToRegister(op));
+ } else {
+ gen_->tasm()->mov(destination, ToOperand(op));
+ }
+ }
+};
+
+namespace {
+
+bool HasImmediateInput(Instruction* instr, size_t index) {
+ return instr->InputAt(index)->IsImmediate();
+}
+
+class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
+ public:
+ OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
+ : OutOfLineCode(gen), result_(result) {}
+
+ void Generate() final {
+ __ xorps(result_, result_);
+ __ divss(result_, result_);
+ }
+
+ private:
+ XMMRegister const result_;
+};
+
+class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
+ public:
+ OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
+ : OutOfLineCode(gen), result_(result) {}
+
+ void Generate() final {
+ __ xorpd(result_, result_);
+ __ divsd(result_, result_);
+ }
+
+ private:
+ XMMRegister const result_;
+};
+
+class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
+ public:
+ OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
+ XMMRegister input, StubCallMode stub_mode)
+ : OutOfLineCode(gen),
+ result_(result),
+ input_(input),
+ stub_mode_(stub_mode),
+ isolate_(gen->isolate()),
+ zone_(gen->zone()) {}
+
+ void Generate() final {
+ __ sub(esp, Immediate(kDoubleSize));
+ __ movsd(MemOperand(esp, 0), input_);
+ if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
+ } else {
+ __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
+ }
+ __ mov(result_, MemOperand(esp, 0));
+ __ add(esp, Immediate(kDoubleSize));
+ }
+
+ private:
+ Register const result_;
+ XMMRegister const input_;
+ StubCallMode stub_mode_;
+ Isolate* isolate_;
+ Zone* zone_;
+};
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+ OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
+ Register value, Register scratch0, Register scratch1,
+ RecordWriteMode mode, StubCallMode stub_mode)
+ : OutOfLineCode(gen),
+ object_(object),
+ operand_(operand),
+ value_(value),
+ scratch0_(scratch0),
+ scratch1_(scratch1),
+ mode_(mode),
+ stub_mode_(stub_mode),
+ zone_(gen->zone()) {}
+
+ void Generate() final {
+ if (mode_ > RecordWriteMode::kValueIsPointer) {
+ __ JumpIfSmi(value_, exit());
+ }
+ __ CheckPageFlag(value_, scratch0_,
+ MemoryChunk::kPointersToHereAreInterestingMask, zero,
+ exit());
+ __ lea(scratch1_, operand_);
+ RememberedSetAction const remembered_set_action =
+ mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+ : OMIT_REMEMBERED_SET;
+ SaveFPRegsMode const save_fp_mode =
+ frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+ if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+ save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
+ } else {
+ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+ save_fp_mode);
+ }
+ }
+
+ private:
+ Register const object_;
+ Operand const operand_;
+ Register const value_;
+ Register const scratch0_;
+ Register const scratch1_;
+ RecordWriteMode const mode_;
+ StubCallMode const stub_mode_;
+ Zone* zone_;
+};
+
+} // namespace
+
+#define ASSEMBLE_COMPARE(asm_instr) \
+ do { \
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
+ size_t index = 0; \
+ Operand left = i.MemoryOperand(&index); \
+ if (HasImmediateInput(instr, index)) { \
+ __ asm_instr(left, i.InputImmediate(index)); \
+ } else { \
+ __ asm_instr(left, i.InputRegister(index)); \
+ } \
+ } else { \
+ if (HasImmediateInput(instr, 1)) { \
+ if (instr->InputAt(0)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+ } else { \
+ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
+ } \
+ } else { \
+ if (instr->InputAt(1)->IsRegister()) { \
+ __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
+ } else { \
+ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
+ } \
+ } \
+ } \
+ } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name) \
+ do { \
+ /* Pass two doubles as arguments on the stack. */ \
+ __ PrepareCallCFunction(4, eax); \
+ __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
+ __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
+ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
+ /* Return value is in st(0) on ia32. */ \
+ /* Store it into the result register. */ \
+ __ sub(esp, Immediate(kDoubleSize)); \
+ __ fstp_d(Operand(esp, 0)); \
+ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
+ __ add(esp, Immediate(kDoubleSize)); \
+ } while (false)
+
+#define ASSEMBLE_IEEE754_UNOP(name) \
+ do { \
+ /* Pass one double as argument on the stack. */ \
+ __ PrepareCallCFunction(2, eax); \
+ __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
+ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
+ /* Return value is in st(0) on ia32. */ \
+ /* Store it into the result register. */ \
+ __ sub(esp, Immediate(kDoubleSize)); \
+ __ fstp_d(Operand(esp, 0)); \
+ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
+ __ add(esp, Immediate(kDoubleSize)); \
+ } while (false)
+
+#define ASSEMBLE_BINOP(asm_instr) \
+ do { \
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
+ size_t index = 1; \
+ Operand right = i.MemoryOperand(&index); \
+ __ asm_instr(i.InputRegister(0), right); \
+ } else { \
+ if (HasImmediateInput(instr, 1)) { \
+ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
+ } else { \
+ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
+ } \
+ } \
+ } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+ do { \
+ Label binop; \
+ __ bind(&binop); \
+ __ mov_inst(eax, i.MemoryOperand(1)); \
+ __ Move(i.TempRegister(0), eax); \
+ __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
+ __ lock(); \
+ __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
+ __ j(not_equal, &binop); \
+ } while (false)
+
+#define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
+ do { \
+ Label binop; \
+ __ bind(&binop); \
+ __ mov(eax, i.MemoryOperand(2)); \
+ __ mov(edx, i.NextMemoryOperand(2)); \
+ __ push(ebx); \
+ frame_access_state()->IncreaseSPDelta(1); \
+ i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
+ __ push(i.InputRegister(1)); \
+ __ instr1(ebx, eax); \
+ __ instr2(i.InputRegister(1), edx); \
+ __ lock(); \
+ __ cmpxchg8b(i.MemoryOperand(2)); \
+ __ pop(i.InputRegister(1)); \
+ __ pop(ebx); \
+ frame_access_state()->IncreaseSPDelta(-1); \
+ __ j(not_equal, &binop); \
+ } while (false);
+
+#define ASSEMBLE_MOVX(mov_instr) \
+ do { \
+ if (instr->addressing_mode() != kMode_None) { \
+ __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
+ } else if (instr->InputAt(0)->IsRegister()) { \
+ __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
+ } else { \
+ __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
+ } \
+ } while (0)
+
+#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
+ do { \
+ XMMRegister src0 = i.InputSimd128Register(0); \
+ Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
+ if (CpuFeatures::IsSupported(AVX)) { \
+ CpuFeatureScope avx_scope(tasm(), AVX); \
+ __ v##opcode(i.OutputSimd128Register(), src0, src1); \
+ } else { \
+ DCHECK_EQ(i.OutputSimd128Register(), src0); \
+ __ opcode(i.OutputSimd128Register(), src1); \
+ } \
+ } while (false)
+
+#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
+ if (CpuFeatures::IsSupported(AVX)) { \
+ CpuFeatureScope avx_scope(tasm(), AVX); \
+ __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+ i.InputOperand(1), imm); \
+ } else { \
+ CpuFeatureScope sse_scope(tasm(), SSELevel); \
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
+ __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
+ }
+
+void CodeGenerator::AssembleDeconstructFrame() {
+ __ mov(esp, ebp);
+ __ pop(ebp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+ if (frame_access_state()->has_frame()) {
+ __ mov(ebp, MemOperand(ebp, 0));
+ }
+ frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+ Register, Register,
+ Register) {
+ // There are not enough temp registers left on ia32 for a call instruction
+ // so we pick some scratch registers and save/restore them manually here.
+ int scratch_count = 3;
+ Register scratch1 = esi;
+ Register scratch2 = ecx;
+ Register scratch3 = edx;
+ DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+ Label done;
+
+ // Check if current frame is an arguments adaptor frame.
+ __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
+ Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+ __ j(not_equal, &done, Label::kNear);
+
+ __ push(scratch1);
+ __ push(scratch2);
+ __ push(scratch3);
+
+ // Load arguments count from current arguments adaptor frame (note, it
+ // does not include receiver).
+ Register caller_args_count_reg = scratch1;
+ __ mov(caller_args_count_reg,
+ Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+ __ SmiUntag(caller_args_count_reg);
+
+ ParameterCount callee_args_count(args_reg);
+ __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
+ scratch3, scratch_count);
+ __ pop(scratch3);
+ __ pop(scratch2);
+ __ pop(scratch1);
+
+ __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(TurboAssembler* tasm,
+ FrameAccessState* state,
+ int new_slot_above_sp,
+ bool allow_shrinkage = true) {
+ int current_sp_offset = state->GetSPToFPSlotCount() +
+ StandardFrameConstants::kFixedSlotCountAboveFp;
+ int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+ if (stack_slot_delta > 0) {
+ tasm->sub(esp, Immediate(stack_slot_delta * kSystemPointerSize));
+ state->IncreaseSPDelta(stack_slot_delta);
+ } else if (allow_shrinkage && stack_slot_delta < 0) {
+ tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
+ state->IncreaseSPDelta(stack_slot_delta);
+ }
+}
+
+#ifdef DEBUG
+bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
+ const Instruction* instr) {
+ if (instr->OutputCount() > 0) {
+ if (converter->OutputRegister(0) != eax) return false;
+ if (instr->OutputCount() == 2 && converter->OutputRegister(1) != edx)
+ return false;
+ }
+ return true;
+}
+#endif
+
+} // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+ int first_unused_stack_slot) {
+ CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
+ ZoneVector<MoveOperands*> pushes(zone());
+ GetPushCompatibleMoves(instr, flags, &pushes);
+
+ if (!pushes.empty() &&
+ (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+ first_unused_stack_slot)) {
+ IA32OperandConverter g(this, instr);
+ for (auto move : pushes) {
+ LocationOperand destination_location(
+ LocationOperand::cast(move->destination()));
+ InstructionOperand source(move->source());
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ destination_location.index());
+ if (source.IsStackSlot()) {
+ LocationOperand source_location(LocationOperand::cast(source));
+ __ push(g.SlotToOperand(source_location.index()));
+ } else if (source.IsRegister()) {
+ LocationOperand source_location(LocationOperand::cast(source));
+ __ push(source_location.GetRegister());
+ } else if (source.IsImmediate()) {
+ __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
+ } else {
+ // Pushes of non-scalar data types is not supported.
+ UNIMPLEMENTED();
+ }
+ frame_access_state()->IncreaseSPDelta(1);
+ move->Eliminate();
+ }
+ }
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+ int first_unused_stack_slot) {
+ AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+ first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+ __ push(eax); // Push eax so we can use it as a scratch register.
+ __ ComputeCodeStartAddress(eax);
+ __ cmp(eax, kJavaScriptCallCodeStartRegister);
+ __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
+ __ pop(eax); // Restore eax.
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+// 1. read from memory the word that contains that bit, which can be found in
+// the flags in the referenced {CodeDataContainer} object;
+// 2. test kMarkedForDeoptimizationBit in those flags; and
+// 3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+ int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+ __ push(eax); // Push eax so we can use it as a scratch register.
+ __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
+ __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
+ Immediate(1 << Code::kMarkedForDeoptimizationBit));
+ __ pop(eax); // Restore eax.
+
+ Label skip;
+ __ j(zero, &skip);
+ __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+ RelocInfo::CODE_TARGET);
+ __ bind(&skip);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+ // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+ UNREACHABLE();
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+ // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+ UNREACHABLE();
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+ Instruction* instr) {
+ IA32OperandConverter i(this, instr);
+ InstructionCode opcode = instr->opcode();
+ ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+ switch (arch_opcode) {
+ case kArchCallCodeObject: {
+ InstructionOperand* op = instr->InputAt(0);
+ if (op->IsImmediate()) {
+ Handle<Code> code = i.InputCode(0);
+ __ Call(code, RelocInfo::CODE_TARGET);
+ } else if (op->IsRegister()) {
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ __ LoadCodeObjectEntry(reg, reg);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(reg);
+ } else {
+ __ call(reg);
+ }
+ } else {
+ CHECK(tasm()->root_array_available());
+ // This is used to allow calls to the arguments adaptor trampoline from
+ // code that only has 5 gp registers available and cannot call through
+ // an immediate. This happens when the arguments adaptor trampoline is
+ // not an embedded builtin.
+ // TODO(v8:6666): Remove once only embedded builtins are supported.
+ __ push(eax);
+ frame_access_state()->IncreaseSPDelta(1);
+ Operand virtual_call_target_register(
+ kRootRegister, IsolateData::virtual_call_target_register_offset());
+ __ mov(eax, i.InputOperand(0));
+ __ LoadCodeObjectEntry(eax, eax);
+ __ mov(virtual_call_target_register, eax);
+ __ pop(eax);
+ frame_access_state()->IncreaseSPDelta(-1);
+ __ call(virtual_call_target_register);
+ }
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchCallBuiltinPointer: {
+ DCHECK(!HasImmediateInput(instr, 0));
+ Register builtin_pointer = i.InputRegister(0);
+ __ CallBuiltinPointer(builtin_pointer);
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchCallWasmFunction: {
+ if (HasImmediateInput(instr, 0)) {
+ Constant constant = i.ToConstant(instr->InputAt(0));
+ Address wasm_code = static_cast<Address>(constant.ToInt32());
+ if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+ __ wasm_call(wasm_code, constant.rmode());
+ } else {
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(wasm_code, constant.rmode());
+ } else {
+ __ call(wasm_code, constant.rmode());
+ }
+ }
+ } else {
+ Register reg = i.InputRegister(0);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineCall(reg);
+ } else {
+ __ call(reg);
+ }
+ }
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchTailCallCodeObjectFromJSFunction:
+ case kArchTailCallCodeObject: {
+ if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+ AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+ no_reg, no_reg, no_reg);
+ }
+ if (HasImmediateInput(instr, 0)) {
+ Handle<Code> code = i.InputCode(0);
+ __ Jump(code, RelocInfo::CODE_TARGET);
+ } else {
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ __ LoadCodeObjectEntry(reg, reg);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ }
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchTailCallWasm: {
+ if (HasImmediateInput(instr, 0)) {
+ Constant constant = i.ToConstant(instr->InputAt(0));
+ Address wasm_code = static_cast<Address>(constant.ToInt32());
+ __ jmp(wasm_code, constant.rmode());
+ } else {
+ Register reg = i.InputRegister(0);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ }
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchTailCallAddress: {
+ CHECK(!HasImmediateInput(instr, 0));
+ Register reg = i.InputRegister(0);
+ DCHECK_IMPLIES(
+ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
+ reg == kJavaScriptCallCodeStartRegister);
+ if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
+ __ RetpolineJump(reg);
+ } else {
+ __ jmp(reg);
+ }
+ frame_access_state()->ClearSPDelta();
+ frame_access_state()->SetFrameAccessToDefault();
+ break;
+ }
+ case kArchCallJSFunction: {
+ Register func = i.InputRegister(0);
+ if (FLAG_debug_code) {
+ // Check the function's context matches the context argument.
+ __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
+ __ Assert(equal, AbortReason::kWrongFunctionContext);
+ }
+ static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
+ __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
+ __ CallCodeObject(ecx);
+ RecordCallPosition(instr);
+ frame_access_state()->ClearSPDelta();
+ break;
+ }
+ case kArchPrepareCallCFunction: {
+ // Frame alignment requires using FP-relative frame addressing.
+ frame_access_state()->SetFrameAccessToFP();
+ int const num_parameters = MiscField::decode(instr->opcode());
+ __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
+ break;
+ }
+ case kArchSaveCallerRegisters: {
+ fp_mode_ =
+ static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+ // kReturnRegister0 should have been saved before entering the stub.
+ int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+ DCHECK(IsAligned(bytes, kSystemPointerSize));
+ DCHECK_EQ(0, frame_access_state()->sp_delta());
+ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+ DCHECK(!caller_registers_saved_);
+ caller_registers_saved_ = true;
+ break;
+ }
+ case kArchRestoreCallerRegisters: {
+ DCHECK(fp_mode_ ==
+ static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+ // Don't overwrite the returned value.
+ int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+ frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+ DCHECK_EQ(0, frame_access_state()->sp_delta());
+ DCHECK(caller_registers_saved_);
+ caller_registers_saved_ = false;
+ break;
+ }
+ case kArchPrepareTailCall:
+ AssemblePrepareTailCall();
+ break;
+ case kArchCallCFunction: {
+ int const num_parameters = MiscField::decode(instr->opcode());
+ if (HasImmediateInput(instr, 0)) {
+ ExternalReference ref = i.InputExternalReference(0);
+ __ CallCFunction(ref, num_parameters);
+ } else {
+ Register func = i.InputRegister(0);
+ __ CallCFunction(func, num_parameters);
+ }
+ frame_access_state()->SetFrameAccessToDefault();
+ // Ideally, we should decrement SP delta to match the change of stack
+ // pointer in CallCFunction. However, for certain architectures (e.g.
+ // ARM), there may be more strict alignment requirement, causing old SP
+ // to be saved on the stack. In those cases, we can not calculate the SP
+ // delta statically.
+ frame_access_state()->ClearSPDelta();
+ if (caller_registers_saved_) {
+ // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+ // Here, we assume the sequence to be:
+ // kArchSaveCallerRegisters;
+ // kArchCallCFunction;
+ // kArchRestoreCallerRegisters;
+ int bytes =
+ __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+ }
+ break;
+ }
+ case kArchJmp:
+ AssembleArchJump(i.InputRpo(0));
+ break;
+ case kArchBinarySearchSwitch:
+ AssembleArchBinarySearchSwitch(instr);
+ break;
+ case kArchLookupSwitch:
+ AssembleArchLookupSwitch(instr);
+ break;
+ case kArchTableSwitch:
+ AssembleArchTableSwitch(instr);
+ break;
+ case kArchComment:
+ __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+ break;
+ case kArchDebugAbort:
+ DCHECK(i.InputRegister(0) == edx);
+ if (!frame_access_state()->has_frame()) {
+ // We don't actually want to generate a pile of code for this, so just
+ // claim there is a stack frame, without generating one.
+ FrameScope scope(tasm(), StackFrame::NONE);
+ __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
+ RelocInfo::CODE_TARGET);
+ } else {
+ __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
+ RelocInfo::CODE_TARGET);
+ }
+ __ int3();
+ break;
+ case kArchDebugBreak:
+ __ int3();
+ break;
+ case kArchNop:
+ case kArchThrowTerminator:
+ // don't emit code for nops.
+ break;
+ case kArchDeoptimize: {
+ int deopt_state_id =
+ BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+ CodeGenResult result =
+ AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
+ if (result != kSuccess) return result;
+ break;
+ }
+ case kArchRet:
+ AssembleReturn(instr->InputAt(0));
+ break;
+ case kArchStackPointer:
+ __ mov(i.OutputRegister(), esp);
+ break;
+ case kArchFramePointer:
+ __ mov(i.OutputRegister(), ebp);
+ break;
+ case kArchParentFramePointer:
+ if (frame_access_state()->has_frame()) {
+ __ mov(i.OutputRegister(), Operand(ebp, 0));
+ } else {
+ __ mov(i.OutputRegister(), ebp);
+ }
+ break;
+ case kArchTruncateDoubleToI: {
+ auto result = i.OutputRegister();
+ auto input = i.InputDoubleRegister(0);
+ auto ool = new (zone()) OutOfLineTruncateDoubleToI(
+ this, result, input, DetermineStubCallMode());
+ __ cvttsd2si(result, Operand(input));
+ __ cmp(result, 1);
+ __ j(overflow, ool->entry());
+ __ bind(ool->exit());
+ break;
+ }
+ case kArchStoreWithWriteBarrier: {
+ RecordWriteMode mode =
+ static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+ Register object = i.InputRegister(0);
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ Register value = i.InputRegister(index);
+ Register scratch0 = i.TempRegister(0);
+ Register scratch1 = i.TempRegister(1);
+ auto ool = new (zone())
+ OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
+ mode, DetermineStubCallMode());
+ __ mov(operand, value);
+ __ CheckPageFlag(object, scratch0,
+ MemoryChunk::kPointersFromHereAreInterestingMask,
+ not_zero, ool->entry());
+ __ bind(ool->exit());
+ break;
+ }
+ case kArchStackSlot: {
+ FrameOffset offset =
+ frame_access_state()->GetFrameOffset(i.InputInt32(0));
+ Register base = offset.from_stack_pointer() ? esp : ebp;
+ __ lea(i.OutputRegister(), Operand(base, offset.offset()));
+ break;
+ }
+ case kIeee754Float64Acos:
+ ASSEMBLE_IEEE754_UNOP(acos);
+ break;
+ case kIeee754Float64Acosh:
+ ASSEMBLE_IEEE754_UNOP(acosh);
+ break;
+ case kIeee754Float64Asin:
+ ASSEMBLE_IEEE754_UNOP(asin);
+ break;
+ case kIeee754Float64Asinh:
+ ASSEMBLE_IEEE754_UNOP(asinh);
+ break;
+ case kIeee754Float64Atan:
+ ASSEMBLE_IEEE754_UNOP(atan);
+ break;
+ case kIeee754Float64Atanh:
+ ASSEMBLE_IEEE754_UNOP(atanh);
+ break;
+ case kIeee754Float64Atan2:
+ ASSEMBLE_IEEE754_BINOP(atan2);
+ break;
+ case kIeee754Float64Cbrt:
+ ASSEMBLE_IEEE754_UNOP(cbrt);
+ break;
+ case kIeee754Float64Cos:
+ ASSEMBLE_IEEE754_UNOP(cos);
+ break;
+ case kIeee754Float64Cosh:
+ ASSEMBLE_IEEE754_UNOP(cosh);
+ break;
+ case kIeee754Float64Expm1:
+ ASSEMBLE_IEEE754_UNOP(expm1);
+ break;
+ case kIeee754Float64Exp:
+ ASSEMBLE_IEEE754_UNOP(exp);
+ break;
+ case kIeee754Float64Log:
+ ASSEMBLE_IEEE754_UNOP(log);
+ break;
+ case kIeee754Float64Log1p:
+ ASSEMBLE_IEEE754_UNOP(log1p);
+ break;
+ case kIeee754Float64Log2:
+ ASSEMBLE_IEEE754_UNOP(log2);
+ break;
+ case kIeee754Float64Log10:
+ ASSEMBLE_IEEE754_UNOP(log10);
+ break;
+ case kIeee754Float64Pow: {
+ // TODO(bmeurer): Improve integration of the stub.
+ if (i.InputDoubleRegister(1) != xmm2) {
+ __ movaps(xmm2, i.InputDoubleRegister(0));
+ __ movaps(xmm1, i.InputDoubleRegister(1));
+ } else {
+ __ movaps(xmm0, i.InputDoubleRegister(0));
+ __ movaps(xmm1, xmm2);
+ __ movaps(xmm2, xmm0);
+ }
+ __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
+ __ movaps(i.OutputDoubleRegister(), xmm3);
+ break;
+ }
+ case kIeee754Float64Sin:
+ ASSEMBLE_IEEE754_UNOP(sin);
+ break;
+ case kIeee754Float64Sinh:
+ ASSEMBLE_IEEE754_UNOP(sinh);
+ break;
+ case kIeee754Float64Tan:
+ ASSEMBLE_IEEE754_UNOP(tan);
+ break;
+ case kIeee754Float64Tanh:
+ ASSEMBLE_IEEE754_UNOP(tanh);
+ break;
+ case kIA32Add:
+ ASSEMBLE_BINOP(add);
+ break;
+ case kIA32And:
+ ASSEMBLE_BINOP(and_);
+ break;
+ case kIA32Cmp:
+ ASSEMBLE_COMPARE(cmp);
+ break;
+ case kIA32Cmp16:
+ ASSEMBLE_COMPARE(cmpw);
+ break;
+ case kIA32Cmp8:
+ ASSEMBLE_COMPARE(cmpb);
+ break;
+ case kIA32Test:
+ ASSEMBLE_COMPARE(test);
+ break;
+ case kIA32Test16:
+ ASSEMBLE_COMPARE(test_w);
+ break;
+ case kIA32Test8:
+ ASSEMBLE_COMPARE(test_b);
+ break;
+ case kIA32Imul:
+ if (HasImmediateInput(instr, 1)) {
+ __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
+ } else {
+ __ imul(i.OutputRegister(), i.InputOperand(1));
+ }
+ break;
+ case kIA32ImulHigh:
+ __ imul(i.InputRegister(1));
+ break;
+ case kIA32UmulHigh:
+ __ mul(i.InputRegister(1));
+ break;
+ case kIA32Idiv:
+ __ cdq();
+ __ idiv(i.InputOperand(1));
+ break;
+ case kIA32Udiv:
+ __ Move(edx, Immediate(0));
+ __ div(i.InputOperand(1));
+ break;
+ case kIA32Not:
+ __ not_(i.OutputOperand());
+ break;
+ case kIA32Neg:
+ __ neg(i.OutputOperand());
+ break;
+ case kIA32Or:
+ ASSEMBLE_BINOP(or_);
+ break;
+ case kIA32Xor:
+ ASSEMBLE_BINOP(xor_);
+ break;
+ case kIA32Sub:
+ ASSEMBLE_BINOP(sub);
+ break;
+ case kIA32Shl:
+ if (HasImmediateInput(instr, 1)) {
+ __ shl(i.OutputOperand(), i.InputInt5(1));
+ } else {
+ __ shl_cl(i.OutputOperand());
+ }
+ break;
+ case kIA32Shr:
+ if (HasImmediateInput(instr, 1)) {
+ __ shr(i.OutputOperand(), i.InputInt5(1));
+ } else {
+ __ shr_cl(i.OutputOperand());
+ }
+ break;
+ case kIA32Sar:
+ if (HasImmediateInput(instr, 1)) {
+ __ sar(i.OutputOperand(), i.InputInt5(1));
+ } else {
+ __ sar_cl(i.OutputOperand());
+ }
+ break;
+ case kIA32AddPair: {
+ // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
+ // i.InputRegister(1) ... left high word.
+ // i.InputRegister(2) ... right low word.
+ // i.InputRegister(3) ... right high word.
+ bool use_temp = false;
+ if ((instr->InputAt(1)->IsRegister() &&
+ i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
+ i.OutputRegister(0).code() == i.InputRegister(3).code()) {
+ // We cannot write to the output register directly, because it would
+ // overwrite an input for adc. We have to use the temp register.
+ use_temp = true;
+ __ Move(i.TempRegister(0), i.InputRegister(0));
+ __ add(i.TempRegister(0), i.InputRegister(2));
+ } else {
+ __ add(i.OutputRegister(0), i.InputRegister(2));
+ }
+ i.MoveInstructionOperandToRegister(i.OutputRegister(1),
+ instr->InputAt(1));
+ __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
+ if (use_temp) {
+ __ Move(i.OutputRegister(0), i.TempRegister(0));
+ }
+ break;
+ }
+ case kIA32SubPair: {
+ // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
+ // i.InputRegister(1) ... left high word.
+ // i.InputRegister(2) ... right low word.
+ // i.InputRegister(3) ... right high word.
+ bool use_temp = false;
+ if ((instr->InputAt(1)->IsRegister() &&
+ i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
+ i.OutputRegister(0).code() == i.InputRegister(3).code()) {
+ // We cannot write to the output register directly, because it would
+ // overwrite an input for adc. We have to use the temp register.
+ use_temp = true;
+ __ Move(i.TempRegister(0), i.InputRegister(0));
+ __ sub(i.TempRegister(0), i.InputRegister(2));
+ } else {
+ __ sub(i.OutputRegister(0), i.InputRegister(2));
+ }
+ i.MoveInstructionOperandToRegister(i.OutputRegister(1),
+ instr->InputAt(1));
+ __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
+ if (use_temp) {
+ __ Move(i.OutputRegister(0), i.TempRegister(0));
+ }
+ break;
+ }
+ case kIA32MulPair: {
+ __ imul(i.OutputRegister(1), i.InputOperand(0));
+ i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
+ __ imul(i.TempRegister(0), i.InputOperand(2));
+ __ add(i.OutputRegister(1), i.TempRegister(0));
+ __ mov(i.OutputRegister(0), i.InputOperand(0));
+ // Multiplies the low words and stores them in eax and edx.
+ __ mul(i.InputRegister(2));
+ __ add(i.OutputRegister(1), i.TempRegister(0));
+
+ break;
+ }
+ case kIA32ShlPair:
+ if (HasImmediateInput(instr, 2)) {
+ __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+ } else {
+ // Shift has been loaded into CL by the register allocator.
+ __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
+ }
+ break;
+ case kIA32ShrPair:
+ if (HasImmediateInput(instr, 2)) {
+ __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+ } else {
+ // Shift has been loaded into CL by the register allocator.
+ __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
+ }
+ break;
+ case kIA32SarPair:
+ if (HasImmediateInput(instr, 2)) {
+ __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+ } else {
+ // Shift has been loaded into CL by the register allocator.
+ __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
+ }
+ break;
+ case kIA32Ror:
+ if (HasImmediateInput(instr, 1)) {
+ __ ror(i.OutputOperand(), i.InputInt5(1));
+ } else {
+ __ ror_cl(i.OutputOperand());
+ }
+ break;
+ case kIA32Lzcnt:
+ __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
+ break;
+ case kIA32Tzcnt:
+ __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
+ break;
+ case kIA32Popcnt:
+ __ Popcnt(i.OutputRegister(), i.InputOperand(0));
+ break;
+ case kIA32Bswap:
+ __ bswap(i.OutputRegister());
+ break;
+ case kArchWordPoisonOnSpeculation:
+ // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+ UNREACHABLE();
+ break;
+ case kLFence:
+ __ lfence();
+ break;
+ case kSSEFloat32Cmp:
+ __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat32Add:
+ __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat32Sub:
+ __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat32Mul:
+ __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat32Div:
+ __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kSSEFloat32Sqrt:
+ __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat32Abs: {
+ // TODO(bmeurer): Use 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 33);
+ __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat32Neg: {
+ // TODO(bmeurer): Use 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 31);
+ __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat32Round: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+ break;
+ }
+ case kSSEFloat64Cmp:
+ __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat64Add:
+ __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat64Sub:
+ __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat64Mul:
+ __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ break;
+ case kSSEFloat64Div:
+ __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ case kSSEFloat32Max: {
+ Label compare_nan, compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(above, &done_compare, Label::kNear);
+ __ j(below, &compare_swap, Label::kNear);
+ __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
+ __ test(i.TempRegister(0), Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+
+ case kSSEFloat64Max: {
+ Label compare_nan, compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(above, &done_compare, Label::kNear);
+ __ j(below, &compare_swap, Label::kNear);
+ __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
+ __ test(i.TempRegister(0), Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat32Min: {
+ Label compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(below, &done_compare, Label::kNear);
+ __ j(above, &compare_swap, Label::kNear);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movss(kScratchDoubleReg, i.InputOperand(1));
+ __ movmskps(i.TempRegister(0), kScratchDoubleReg);
+ }
+ __ test(i.TempRegister(0), Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat64Min: {
+ Label compare_swap, done_compare;
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ auto ool =
+ new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
+ __ j(parity_even, ool->entry());
+ __ j(below, &done_compare, Label::kNear);
+ __ j(above, &compare_swap, Label::kNear);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movsd(kScratchDoubleReg, i.InputOperand(1));
+ __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
+ }
+ __ test(i.TempRegister(0), Immediate(1));
+ __ j(zero, &done_compare, Label::kNear);
+ __ bind(&compare_swap);
+ if (instr->InputAt(1)->IsFPRegister()) {
+ __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+ } else {
+ __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ }
+ __ bind(&done_compare);
+ __ bind(ool->exit());
+ break;
+ }
+ case kSSEFloat64Mod: {
+ Register tmp = i.TempRegister(1);
+ __ mov(tmp, esp);
+ __ sub(esp, Immediate(kDoubleSize));
+ __ and_(esp, -8); // align to 8 byte boundary.
+ // Move values to st(0) and st(1).
+ __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
+ __ fld_d(Operand(esp, 0));
+ __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+ __ fld_d(Operand(esp, 0));
+ // Loop while fprem isn't done.
+ Label mod_loop;
+ __ bind(&mod_loop);
+ // This instruction traps on all kinds of inputs, but we are assuming the
+ // floating point control word is set to ignore them all.
+ __ fprem();
+ // fnstsw_ax clobbers eax.
+ DCHECK_EQ(eax, i.TempRegister(0));
+ __ fnstsw_ax();
+ __ sahf();
+ __ j(parity_even, &mod_loop);
+ // Move output to stack and clean up.
+ __ fstp(1);
+ __ fstp_d(Operand(esp, 0));
+ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
+ __ mov(esp, tmp);
+ break;
+ }
+ case kSSEFloat64Abs: {
+ // TODO(bmeurer): Use 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 1);
+ __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat64Neg: {
+ // TODO(bmeurer): Use 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 63);
+ __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ break;
+ }
+ case kSSEFloat64Sqrt:
+ __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat64Round: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+ break;
+ }
+ case kSSEFloat32ToFloat64:
+ __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat64ToFloat32:
+ __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat32ToInt32:
+ __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat32ToUint32:
+ __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
+ break;
+ case kSSEFloat64ToInt32:
+ __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
+ break;
+ case kSSEFloat64ToUint32:
+ __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
+ break;
+ case kSSEInt32ToFloat32:
+ __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEUint32ToFloat32:
+ __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
+ i.TempRegister(0));
+ break;
+ case kSSEInt32ToFloat64:
+ __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kSSEUint32ToFloat64:
+ __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
+ i.TempRegister(0));
+ break;
+ case kSSEFloat64ExtractLowWord32:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ mov(i.OutputRegister(), i.InputOperand(0));
+ } else {
+ __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+ }
+ break;
+ case kSSEFloat64ExtractHighWord32:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
+ } else {
+ __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
+ }
+ break;
+ case kSSEFloat64InsertLowWord32:
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
+ break;
+ case kSSEFloat64InsertHighWord32:
+ __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
+ break;
+ case kSSEFloat64LoadLowWord32:
+ __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
+ break;
+ case kAVXFloat32Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat32Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat32Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat32Div: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ }
+ case kAVXFloat64Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat64Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat64Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kAVXFloat64Div: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+ break;
+ }
+ case kAVXFloat32Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 33);
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ break;
+ }
+ case kAVXFloat32Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 31);
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ break;
+ }
+ case kAVXFloat64Abs: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlq(kScratchDoubleReg, 1);
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ break;
+ }
+ case kAVXFloat64Neg: {
+ // TODO(bmeurer): Use RIP relative 128-bit constants.
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psllq(kScratchDoubleReg, 63);
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ break;
+ }
+ case kSSEFloat64SilenceNaN:
+ __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
+ __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
+ break;
+ case kIA32Movsxbl:
+ ASSEMBLE_MOVX(movsx_b);
+ break;
+ case kIA32Movzxbl:
+ ASSEMBLE_MOVX(movzx_b);
+ break;
+ case kIA32Movb: {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ mov_b(operand, i.InputInt8(index));
+ } else {
+ __ mov_b(operand, i.InputRegister(index));
+ }
+ break;
+ }
+ case kIA32Movsxwl:
+ ASSEMBLE_MOVX(movsx_w);
+ break;
+ case kIA32Movzxwl:
+ ASSEMBLE_MOVX(movzx_w);
+ break;
+ case kIA32Movw: {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ mov_w(operand, i.InputInt16(index));
+ } else {
+ __ mov_w(operand, i.InputRegister(index));
+ }
+ break;
+ }
+ case kIA32Movl:
+ if (instr->HasOutput()) {
+ __ mov(i.OutputRegister(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ if (HasImmediateInput(instr, index)) {
+ __ mov(operand, i.InputImmediate(index));
+ } else {
+ __ mov(operand, i.InputRegister(index));
+ }
+ }
+ break;
+ case kIA32Movsd:
+ if (instr->HasOutput()) {
+ __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ movsd(operand, i.InputDoubleRegister(index));
+ }
+ break;
+ case kIA32Movss:
+ if (instr->HasOutput()) {
+ __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ movss(operand, i.InputDoubleRegister(index));
+ }
+ break;
+ case kIA32Movdqu:
+ if (instr->HasOutput()) {
+ __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
+ } else {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ Movdqu(operand, i.InputSimd128Register(index));
+ }
+ break;
+ case kIA32BitcastFI:
+ if (instr->InputAt(0)->IsFPStackSlot()) {
+ __ mov(i.OutputRegister(), i.InputOperand(0));
+ } else {
+ __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+ }
+ break;
+ case kIA32BitcastIF:
+ if (instr->InputAt(0)->IsRegister()) {
+ __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
+ } else {
+ __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
+ }
+ break;
+ case kIA32Lea: {
+ AddressingMode mode = AddressingModeField::decode(instr->opcode());
+ // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
+ // and addressing mode just happens to work out. The "addl"/"subl" forms
+ // in these cases are faster based on measurements.
+ if (mode == kMode_MI) {
+ __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
+ } else if (i.InputRegister(0) == i.OutputRegister()) {
+ if (mode == kMode_MRI) {
+ int32_t constant_summand = i.InputInt32(1);
+ if (constant_summand > 0) {
+ __ add(i.OutputRegister(), Immediate(constant_summand));
+ } else if (constant_summand < 0) {
+ __ sub(i.OutputRegister(),
+ Immediate(base::NegateWithWraparound(constant_summand)));
+ }
+ } else if (mode == kMode_MR1) {
+ if (i.InputRegister(1) == i.OutputRegister()) {
+ __ shl(i.OutputRegister(), 1);
+ } else {
+ __ add(i.OutputRegister(), i.InputRegister(1));
+ }
+ } else if (mode == kMode_M2) {
+ __ shl(i.OutputRegister(), 1);
+ } else if (mode == kMode_M4) {
+ __ shl(i.OutputRegister(), 2);
+ } else if (mode == kMode_M8) {
+ __ shl(i.OutputRegister(), 3);
+ } else {
+ __ lea(i.OutputRegister(), i.MemoryOperand());
+ }
+ } else if (mode == kMode_MR1 &&
+ i.InputRegister(1) == i.OutputRegister()) {
+ __ add(i.OutputRegister(), i.InputRegister(0));
+ } else {
+ __ lea(i.OutputRegister(), i.MemoryOperand());
+ }
+ break;
+ }
+ case kIA32PushFloat32:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ sub(esp, Immediate(kFloatSize));
+ __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
+ frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+ } else if (HasImmediateInput(instr, 0)) {
+ __ Move(kScratchDoubleReg, i.InputFloat32(0));
+ __ sub(esp, Immediate(kFloatSize));
+ __ movss(Operand(esp, 0), kScratchDoubleReg);
+ frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+ } else {
+ __ movss(kScratchDoubleReg, i.InputOperand(0));
+ __ sub(esp, Immediate(kFloatSize));
+ __ movss(Operand(esp, 0), kScratchDoubleReg);
+ frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+ }
+ break;
+ case kIA32PushFloat64:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ sub(esp, Immediate(kDoubleSize));
+ __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+ frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+ } else if (HasImmediateInput(instr, 0)) {
+ __ Move(kScratchDoubleReg, i.InputDouble(0));
+ __ sub(esp, Immediate(kDoubleSize));
+ __ movsd(Operand(esp, 0), kScratchDoubleReg);
+ frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+ } else {
+ __ movsd(kScratchDoubleReg, i.InputOperand(0));
+ __ sub(esp, Immediate(kDoubleSize));
+ __ movsd(Operand(esp, 0), kScratchDoubleReg);
+ frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+ }
+ break;
+ case kIA32PushSimd128:
+ if (instr->InputAt(0)->IsFPRegister()) {
+ __ sub(esp, Immediate(kSimd128Size));
+ __ movups(Operand(esp, 0), i.InputSimd128Register(0));
+ } else {
+ __ movups(kScratchDoubleReg, i.InputOperand(0));
+ __ sub(esp, Immediate(kSimd128Size));
+ __ movups(Operand(esp, 0), kScratchDoubleReg);
+ }
+ frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize);
+ break;
+ case kIA32Push:
+ if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
+ size_t index = 0;
+ Operand operand = i.MemoryOperand(&index);
+ __ push(operand);
+ frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+ } else if (instr->InputAt(0)->IsFPRegister()) {
+ __ sub(esp, Immediate(kFloatSize));
+ __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+ frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+ } else if (HasImmediateInput(instr, 0)) {
+ __ push(i.InputImmediate(0));
+ frame_access_state()->IncreaseSPDelta(1);
+ } else {
+ __ push(i.InputOperand(0));
+ frame_access_state()->IncreaseSPDelta(1);
+ }
+ break;
+ case kIA32Poke: {
+ int slot = MiscField::decode(instr->opcode());
+ if (HasImmediateInput(instr, 0)) {
+ __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
+ } else {
+ __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
+ }
+ break;
+ }
+ case kIA32Peek: {
+ int reverse_slot = i.InputInt32(0) + 1;
+ int offset =
+ FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+ if (instr->OutputAt(0)->IsFPRegister()) {
+ LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+ if (op->representation() == MachineRepresentation::kFloat64) {
+ __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
+ } else {
+ DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
+ __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
+ }
+ } else {
+ __ mov(i.OutputRegister(), Operand(ebp, offset));
+ }
+ break;
+ }
+ case kSSEF32x4Splat: {
+ DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ __ shufps(dst, dst, 0x0);
+ break;
+ }
+ case kAVXF32x4Splat: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src = i.InputFloatRegister(0);
+ __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
+ break;
+ }
+ case kSSEF32x4ExtractLane: {
+ DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+ XMMRegister dst = i.OutputFloatRegister();
+ int8_t lane = i.InputInt8(1);
+ if (lane != 0) {
+ DCHECK_LT(lane, 4);
+ __ shufps(dst, dst, lane);
+ }
+ break;
+ }
+ case kAVXF32x4ExtractLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputFloatRegister();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t lane = i.InputInt8(1);
+ if (lane == 0) {
+ if (dst != src) __ vmovaps(dst, src);
+ } else {
+ DCHECK_LT(lane, 4);
+ __ vshufps(dst, src, src, lane);
+ }
+ break;
+ }
+ case kSSEF32x4ReplaceLane: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
+ i.InputInt8(1) << 4);
+ break;
+ }
+ case kAVXF32x4ReplaceLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(2), i.InputInt8(1) << 4);
+ break;
+ }
+ case kIA32F32x4SConvertI32x4: {
+ __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kSSEF32x4UConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
+ __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
+ __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
+ __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
+ __ psrld(dst, 1); // divide by 2 to get in unsigned range
+ __ cvtdq2ps(dst, dst); // convert hi exactly
+ __ addps(dst, dst); // double hi, exactly
+ __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
+ break;
+ }
+ case kAVXF32x4UConvertI32x4: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
+ kScratchDoubleReg); // zeros
+ __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
+ 0x55); // get lo 16 bits
+ __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
+ __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
+ __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
+ __ vcvtdq2ps(dst, dst); // convert hi exactly
+ __ vaddps(dst, dst, dst); // double hi, exactly
+ __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
+ break;
+ }
+ case kSSEF32x4Abs: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1);
+ __ andps(dst, kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ psrld(dst, 1);
+ __ andps(dst, src);
+ }
+ break;
+ }
+ case kAVXF32x4Abs: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
+ __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
+ i.InputOperand(0));
+ break;
+ }
+ case kSSEF32x4Neg: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pslld(kScratchDoubleReg, 31);
+ __ xorps(dst, kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ pslld(dst, 31);
+ __ xorps(dst, src);
+ }
+ break;
+ }
+ case kAVXF32x4Neg: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
+ __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
+ i.InputOperand(0));
+ break;
+ }
+ case kIA32F32x4RecipApprox: {
+ __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32F32x4RecipSqrtApprox: {
+ __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kSSEF32x4Add: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ addps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4AddHoriz: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE3);
+ __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4AddHoriz: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Sub: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ subps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Mul: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Min: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ minps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Min: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Max: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ maxps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Max: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Eq: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Eq: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Ne: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Ne: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Lt: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Lt: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEF32x4Le: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXF32x4Le: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32I32x4Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Movd(dst, i.InputOperand(0));
+ __ Pshufd(dst, dst, 0x0);
+ break;
+ }
+ case kIA32I32x4ExtractLane: {
+ __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
+ break;
+ }
+ case kSSEI32x4ReplaceLane: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kAVXI32x4ReplaceLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kSSEI32x4SConvertF32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ // NAN->0
+ __ movaps(kScratchDoubleReg, dst);
+ __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
+ __ pand(dst, kScratchDoubleReg);
+ // Set top bit if >= 0 (but not -0.0!)
+ __ pxor(kScratchDoubleReg, dst);
+ // Convert
+ __ cvttps2dq(dst, dst);
+ // Set top bit if >=0 is now < 0
+ __ pand(kScratchDoubleReg, dst);
+ __ psrad(kScratchDoubleReg, 31);
+ // Set positive overflow lanes to 0x7FFFFFFF
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI32x4SConvertF32x4: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ // NAN->0
+ __ vcmpeqps(kScratchDoubleReg, src, src);
+ __ vpand(dst, src, kScratchDoubleReg);
+ // Set top bit if >= 0 (but not -0.0!)
+ __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
+ // Convert
+ __ vcvttps2dq(dst, dst);
+ // Set top bit if >=0 is now < 0
+ __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
+ __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
+ // Set positive overflow lanes to 0x7FFFFFFF
+ __ vpxor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kIA32I32x4SConvertI16x8Low: {
+ __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32I32x4SConvertI16x8High: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Palignr(dst, i.InputOperand(0), 8);
+ __ Pmovsxwd(dst, dst);
+ break;
+ }
+ case kIA32I32x4Neg: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Psignd(dst, kScratchDoubleReg);
+ } else {
+ __ Pxor(dst, dst);
+ __ Psubd(dst, src);
+ }
+ break;
+ }
+ case kSSEI32x4Shl: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI32x4Shl: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI32x4ShrS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI32x4ShrS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI32x4Add: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4AddHoriz: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4AddHoriz: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4Sub: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4Mul: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4MinS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4MinS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4MaxS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4MaxS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4Eq: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4Eq: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4Ne: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kAVXI32x4Ne: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kSSEI32x4GtS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4GtS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4GeS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminsd(dst, src);
+ __ pcmpeqd(dst, src);
+ break;
+ }
+ case kAVXI32x4GeS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminsd(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kSSEI32x4UConvertF32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+ // NAN->0, negative->0
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ __ maxps(dst, kScratchDoubleReg);
+ // scratch: float representation of max_signed
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
+ __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
+ // tmp: convert (src-max_signed).
+ // Positive overflow lanes -> 0x7FFFFFFF
+ // Negative lanes -> 0
+ __ movaps(tmp, dst);
+ __ subps(tmp, kScratchDoubleReg);
+ __ cmpleps(kScratchDoubleReg, tmp);
+ __ cvttps2dq(tmp, tmp);
+ __ pxor(tmp, kScratchDoubleReg);
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ __ pmaxsd(tmp, kScratchDoubleReg);
+ // convert. Overflow lanes above max_signed will be 0x80000000
+ __ cvttps2dq(dst, dst);
+ // Add (src-max_signed) for overflow lanes.
+ __ paddd(dst, tmp);
+ break;
+ }
+ case kAVXI32x4UConvertF32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+ // NAN->0, negative->0
+ __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vmaxps(dst, dst, kScratchDoubleReg);
+ // scratch: float representation of max_signed
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
+ __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
+ // tmp: convert (src-max_signed).
+ // Positive overflow lanes -> 0x7FFFFFFF
+ // Negative lanes -> 0
+ __ vsubps(tmp, dst, kScratchDoubleReg);
+ __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
+ __ vcvttps2dq(tmp, tmp);
+ __ vpxor(tmp, tmp, kScratchDoubleReg);
+ __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
+ // convert. Overflow lanes above max_signed will be 0x80000000
+ __ vcvttps2dq(dst, dst);
+ // Add (src-max_signed) for overflow lanes.
+ __ vpaddd(dst, dst, tmp);
+ break;
+ }
+ case kIA32I32x4UConvertI16x8Low: {
+ __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32I32x4UConvertI16x8High: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Palignr(dst, i.InputOperand(0), 8);
+ __ Pmovzxwd(dst, dst);
+ break;
+ }
+ case kSSEI32x4ShrU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI32x4ShrU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI32x4MinU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4MinU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4MaxU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI32x4MaxU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI32x4GtU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pmaxud(dst, src);
+ __ pcmpeqd(dst, src);
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI32x4GtU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpmaxud(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqd(dst, kScratchDoubleReg, src2);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI32x4GeU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminud(dst, src);
+ __ pcmpeqd(dst, src);
+ break;
+ }
+ case kAVXI32x4GeU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminud(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kIA32I16x8Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Movd(dst, i.InputOperand(0));
+ __ Pshuflw(dst, dst, 0x0);
+ __ Pshufd(dst, dst, 0x0);
+ break;
+ }
+ case kIA32I16x8ExtractLane: {
+ Register dst = i.OutputRegister();
+ __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
+ __ movsx_w(dst, dst);
+ break;
+ }
+ case kSSEI16x8ReplaceLane: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kAVXI16x8ReplaceLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kIA32I16x8SConvertI8x16Low: {
+ __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32I16x8SConvertI8x16High: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Palignr(dst, i.InputOperand(0), 8);
+ __ Pmovsxbw(dst, dst);
+ break;
+ }
+ case kIA32I16x8Neg: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Psignw(dst, kScratchDoubleReg);
+ } else {
+ __ Pxor(dst, dst);
+ __ Psubw(dst, src);
+ }
+ break;
+ }
+ case kSSEI16x8Shl: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI16x8Shl: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI16x8ShrS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI16x8ShrS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI16x8SConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ packssdw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8SConvertI32x4: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8Add: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8AddSaturateS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8AddSaturateS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8AddHoriz: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8AddHoriz: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8Sub: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8SubSaturateS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8SubSaturateS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8Mul: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8MinS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8MinS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8MaxS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8MaxS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8Eq: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8Eq: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8Ne: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kAVXI16x8Ne: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kSSEI16x8GtS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8GtS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8GeS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminsw(dst, src);
+ __ pcmpeqw(dst, src);
+ break;
+ }
+ case kAVXI16x8GeS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminsw(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kIA32I16x8UConvertI8x16Low: {
+ __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32I16x8UConvertI8x16High: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Palignr(dst, i.InputOperand(0), 8);
+ __ Pmovzxbw(dst, dst);
+ break;
+ }
+ case kSSEI16x8ShrU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
+ break;
+ }
+ case kAVXI16x8ShrU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputInt8(1));
+ break;
+ }
+ case kSSEI16x8UConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFFFFFF
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrld(kScratchDoubleReg, 1);
+ __ pminud(dst, kScratchDoubleReg);
+ __ pminud(kScratchDoubleReg, i.InputOperand(1));
+ __ packusdw(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI16x8UConvertI32x4: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFFFFFF
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
+ __ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
+ __ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
+ __ vpackusdw(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI16x8AddSaturateU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8AddSaturateU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8SubSaturateU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8SubSaturateU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8MinU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8MinU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8MaxU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI16x8MaxU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI16x8GtU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pmaxuw(dst, src);
+ __ pcmpeqw(dst, src);
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI16x8GtU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpmaxuw(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqw(dst, kScratchDoubleReg, src2);
+ __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI16x8GeU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminuw(dst, src);
+ __ pcmpeqw(dst, src);
+ break;
+ }
+ case kAVXI16x8GeU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminuw(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kIA32I8x16Splat: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Movd(dst, i.InputOperand(0));
+ __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+ __ Pshufb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kIA32I8x16ExtractLane: {
+ Register dst = i.OutputRegister();
+ __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
+ __ movsx_b(dst, dst);
+ break;
+ }
+ case kSSEI8x16ReplaceLane: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kAVXI8x16ReplaceLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(2), i.InputInt8(1));
+ break;
+ }
+ case kSSEI8x16SConvertI16x8: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16SConvertI16x8: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32I8x16Neg: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Psignb(dst, kScratchDoubleReg);
+ } else {
+ __ Pxor(dst, dst);
+ __ Psubb(dst, src);
+ }
+ break;
+ }
+ case kSSEI8x16Shl: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ int8_t shift = i.InputInt8(1) & 0x7;
+ if (shift < 4) {
+ // For small shifts, doubling is faster.
+ for (int i = 0; i < shift; ++i) {
+ __ paddb(dst, dst);
+ }
+ } else {
+ // Mask off the unwanted bits before word-shifting.
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlw(kScratchDoubleReg, 8 + shift);
+ __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pand(dst, kScratchDoubleReg);
+ __ psllw(dst, shift);
+ }
+ break;
+ }
+ case kAVXI8x16Shl: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t shift = i.InputInt8(1) & 0x7;
+ if (shift < 4) {
+ // For small shifts, doubling is faster.
+ for (int i = 0; i < shift; ++i) {
+ __ vpaddb(dst, src, src);
+ src = dst;
+ }
+ } else {
+ // Mask off the unwanted bits before word-shifting.
+ __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift);
+ __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpand(dst, src, kScratchDoubleReg);
+ __ vpsllw(dst, dst, shift);
+ }
+ break;
+ }
+ case kIA32I8x16ShrS: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t shift = i.InputInt8(1) & 0x7;
+ // Unpack the bytes into words, do arithmetic shifts, and repack.
+ __ Punpckhbw(kScratchDoubleReg, src);
+ __ Punpcklbw(dst, src);
+ __ Psraw(kScratchDoubleReg, 8 + shift);
+ __ Psraw(dst, 8 + shift);
+ __ Packsswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI8x16Add: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16Add: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16AddSaturateS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16AddSaturateS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16Sub: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16Sub: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16SubSaturateS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16SubSaturateS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16Mul: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ XMMRegister right = i.InputSimd128Register(1);
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+
+ // I16x8 view of I8x16
+ // left = AAaa AAaa ... AAaa AAaa
+ // right= BBbb BBbb ... BBbb BBbb
+
+ // t = 00AA 00AA ... 00AA 00AA
+ // s = 00BB 00BB ... 00BB 00BB
+ __ movaps(tmp, dst);
+ __ movaps(kScratchDoubleReg, right);
+ __ psrlw(tmp, 8);
+ __ psrlw(kScratchDoubleReg, 8);
+ // dst = left * 256
+ __ psllw(dst, 8);
+
+ // t = I16x8Mul(t, s)
+ // => __PP __PP ... __PP __PP
+ __ pmullw(tmp, kScratchDoubleReg);
+ // dst = I16x8Mul(left * 256, right)
+ // => pp__ pp__ ... pp__ pp__
+ __ pmullw(dst, right);
+
+ // t = I16x8Shl(t, 8)
+ // => PP00 PP00 ... PP00 PP00
+ __ psllw(tmp, 8);
+
+ // dst = I16x8Shr(dst, 8)
+ // => 00pp 00pp ... 00pp 00pp
+ __ psrlw(dst, 8);
+
+ // dst = I16x8Or(dst, t)
+ // => PPpp PPpp ... PPpp PPpp
+ __ por(dst, tmp);
+ break;
+ }
+ case kAVXI8x16Mul: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister left = i.InputSimd128Register(0);
+ XMMRegister right = i.InputSimd128Register(1);
+ XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+
+ // I16x8 view of I8x16
+ // left = AAaa AAaa ... AAaa AAaa
+ // right= BBbb BBbb ... BBbb BBbb
+
+ // t = 00AA 00AA ... 00AA 00AA
+ // s = 00BB 00BB ... 00BB 00BB
+ __ vpsrlw(tmp, left, 8);
+ __ vpsrlw(kScratchDoubleReg, right, 8);
+
+ // t = I16x8Mul(t0, t1)
+ // => __PP __PP ... __PP __PP
+ __ vpmullw(tmp, tmp, kScratchDoubleReg);
+
+ // s = left * 256
+ __ vpsllw(kScratchDoubleReg, left, 8);
+
+ // dst = I16x8Mul(left * 256, right)
+ // => pp__ pp__ ... pp__ pp__
+ __ vpmullw(dst, kScratchDoubleReg, right);
+
+ // dst = I16x8Shr(dst, 8)
+ // => 00pp 00pp ... 00pp 00pp
+ __ vpsrlw(dst, dst, 8);
+
+ // t = I16x8Shl(t, 8)
+ // => PP00 PP00 ... PP00 PP00
+ __ vpsllw(tmp, tmp, 8);
+
+ // dst = I16x8Or(dst, t)
+ // => PPpp PPpp ... PPpp PPpp
+ __ vpor(dst, dst, tmp);
+ break;
+ }
+ case kSSEI8x16MinS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16MinS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16MaxS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16MaxS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16Eq: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16Eq: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16Ne: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
+ __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+ break;
+ }
+ case kAVXI8x16Ne: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kSSEI8x16GtS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16GtS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16GeS: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminsb(dst, src);
+ __ pcmpeqb(dst, src);
+ break;
+ }
+ case kAVXI8x16GeS: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminsb(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kSSEI8x16UConvertI16x8: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFF
+ __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ __ psrlw(kScratchDoubleReg, 1);
+ __ pminuw(dst, kScratchDoubleReg);
+ __ pminuw(kScratchDoubleReg, i.InputOperand(1));
+ __ packuswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI8x16UConvertI16x8: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ // Change negative lanes to 0x7FFF
+ __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
+ __ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
+ __ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
+ __ vpackuswb(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI8x16AddSaturateU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16AddSaturateU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16SubSaturateU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16SubSaturateU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32I8x16ShrU: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t shift = i.InputInt8(1) & 0x7;
+ // Unpack the bytes into words, do logical shifts, and repack.
+ __ Punpckhbw(kScratchDoubleReg, src);
+ __ Punpcklbw(dst, src);
+ __ Psrlw(kScratchDoubleReg, 8 + shift);
+ __ Psrlw(dst, 8 + shift);
+ __ Packuswb(dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI8x16MinU: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ pminub(dst, i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16MinU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16MaxU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXI8x16MaxU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSEI8x16GtU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pmaxub(dst, src);
+ __ pcmpeqb(dst, src);
+ __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXI8x16GtU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpmaxub(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqb(dst, kScratchDoubleReg, src2);
+ __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSEI8x16GeU: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(1);
+ __ pminub(dst, src);
+ __ pcmpeqb(dst, src);
+ break;
+ }
+ case kAVXI8x16GeU: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src1 = i.InputSimd128Register(0);
+ Operand src2 = i.InputOperand(1);
+ __ vpminub(kScratchDoubleReg, src1, src2);
+ __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+ break;
+ }
+ case kIA32S128Zero: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Pxor(dst, dst);
+ break;
+ }
+ case kSSES128Not: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ if (src.is_reg(dst)) {
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ pxor(dst, kScratchDoubleReg);
+ } else {
+ __ pcmpeqd(dst, dst);
+ __ pxor(dst, src);
+ }
+ break;
+ }
+ case kAVXS128Not: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
+ break;
+ }
+ case kSSES128And: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pand(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXS128And: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSES128Or: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ por(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXS128Or: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSES128Xor: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
+ break;
+ }
+ case kAVXS128Xor: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kSSES128Select: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ // Mask used here is stored in dst.
+ XMMRegister dst = i.OutputSimd128Register();
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+ __ andps(dst, kScratchDoubleReg);
+ __ xorps(dst, i.InputSimd128Register(2));
+ break;
+ }
+ case kAVXS128Select: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
+ i.InputOperand(1));
+ __ vandps(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(0));
+ __ vxorps(dst, kScratchDoubleReg, i.InputSimd128Register(2));
+ break;
+ }
+ case kIA32S8x16Shuffle: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src0 = i.InputOperand(0);
+ Register tmp = i.TempRegister(0);
+ // Prepare 16 byte aligned buffer for shuffle control mask
+ __ mov(tmp, esp);
+ __ and_(esp, -16);
+ if (instr->InputCount() == 5) { // only one input operand
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ for (int j = 4; j > 0; j--) {
+ uint32_t mask = i.InputUint32(j);
+ __ push(Immediate(mask));
+ }
+ __ Pshufb(dst, Operand(esp, 0));
+ } else { // two input operands
+ DCHECK_EQ(6, instr->InputCount());
+ __ movups(kScratchDoubleReg, src0);
+ for (int j = 5; j > 1; j--) {
+ uint32_t lanes = i.InputUint32(j);
+ uint32_t mask = 0;
+ for (int k = 0; k < 32; k += 8) {
+ uint8_t lane = lanes >> k;
+ mask |= (lane < kSimd128Size ? lane : 0x80) << k;
+ }
+ __ push(Immediate(mask));
+ }
+ __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
+ Operand src1 = i.InputOperand(1);
+ if (!src1.is_reg(dst)) __ movups(dst, src1);
+ for (int j = 5; j > 1; j--) {
+ uint32_t lanes = i.InputUint32(j);
+ uint32_t mask = 0;
+ for (int k = 0; k < 32; k += 8) {
+ uint8_t lane = lanes >> k;
+ mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
+ }
+ __ push(Immediate(mask));
+ }
+ __ Pshufb(dst, Operand(esp, 0));
+ __ por(dst, kScratchDoubleReg);
+ }
+ __ mov(esp, tmp);
+ break;
+ }
+ case kIA32S32x4Swizzle: {
+ DCHECK_EQ(2, instr->InputCount());
+ __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
+ break;
+ }
+ case kIA32S32x4Shuffle: {
+ DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
+ int8_t shuffle = i.InputInt8(2);
+ DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
+ __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
+ __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
+ __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
+ break;
+ }
+ case kIA32S16x8Blend:
+ ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
+ break;
+ case kIA32S16x8HalfShuffle1: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
+ __ Pshufhw(dst, dst, i.InputInt8(2));
+ break;
+ }
+ case kIA32S16x8HalfShuffle2: {
+ XMMRegister dst = i.OutputSimd128Register();
+ __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
+ __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
+ __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
+ __ Pshufhw(dst, dst, i.InputInt8(3));
+ __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
+ break;
+ }
+ case kIA32S8x16Alignr:
+ ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
+ break;
+ case kIA32S16x8Dup: {
+ XMMRegister dst = i.OutputSimd128Register();
+ Operand src = i.InputOperand(0);
+ int8_t lane = i.InputInt8(1) & 0x7;
+ int8_t lane4 = lane & 0x3;
+ int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+ if (lane < 4) {
+ __ Pshuflw(dst, src, half_dup);
+ __ Pshufd(dst, dst, 0);
+ } else {
+ __ Pshufhw(dst, src, half_dup);
+ __ Pshufd(dst, dst, 0xaa);
+ }
+ break;
+ }
+ case kIA32S8x16Dup: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t lane = i.InputInt8(1) & 0xf;
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ if (lane < 8) {
+ __ vpunpcklbw(dst, src, src);
+ } else {
+ __ vpunpckhbw(dst, src, src);
+ }
+ } else {
+ DCHECK_EQ(dst, src);
+ if (lane < 8) {
+ __ punpcklbw(dst, dst);
+ } else {
+ __ punpckhbw(dst, dst);
+ }
+ }
+ lane &= 0x7;
+ int8_t lane4 = lane & 0x3;
+ int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+ if (lane < 4) {
+ __ Pshuflw(dst, dst, half_dup);
+ __ Pshufd(dst, dst, 0);
+ } else {
+ __ Pshufhw(dst, dst, half_dup);
+ __ Pshufd(dst, dst, 0xaa);
+ }
+ break;
+ }
+ case kIA32S64x2UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
+ break;
+ case kIA32S32x4UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
+ break;
+ case kIA32S16x8UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
+ break;
+ case kIA32S8x16UnpackHigh:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
+ break;
+ case kIA32S64x2UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
+ break;
+ case kIA32S32x4UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
+ break;
+ case kIA32S16x8UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
+ break;
+ case kIA32S8x16UnpackLow:
+ ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
+ break;
+ case kSSES16x8UnzipHigh: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ __ movups(kScratchDoubleReg, i.InputOperand(1));
+ __ psrld(kScratchDoubleReg, 16);
+ src2 = kScratchDoubleReg;
+ }
+ __ psrld(dst, 16);
+ __ packusdw(dst, src2);
+ break;
+ }
+ case kAVXS16x8UnzipHigh: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ if (instr->InputCount() == 2) {
+ __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
+ src2 = kScratchDoubleReg;
+ }
+ __ vpsrld(dst, i.InputSimd128Register(0), 16);
+ __ vpackusdw(dst, dst, src2);
+ break;
+ }
+ case kSSES16x8UnzipLow: {
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+ if (instr->InputCount() == 2) {
+ __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
+ src2 = kScratchDoubleReg;
+ }
+ __ pblendw(dst, kScratchDoubleReg, 0xaa);
+ __ packusdw(dst, src2);
+ break;
+ }
+ case kAVXS16x8UnzipLow: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ if (instr->InputCount() == 2) {
+ __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
+ 0x55);
+ src2 = kScratchDoubleReg;
+ }
+ __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
+ __ vpackusdw(dst, dst, src2);
+ break;
+ }
+ case kSSES8x16UnzipHigh: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ __ movups(kScratchDoubleReg, i.InputOperand(1));
+ __ psrlw(kScratchDoubleReg, 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ psrlw(dst, 8);
+ __ packuswb(dst, src2);
+ break;
+ }
+ case kAVXS8x16UnzipHigh: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ if (instr->InputCount() == 2) {
+ __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+ __ vpackuswb(dst, dst, src2);
+ break;
+ }
+ case kSSES8x16UnzipLow: {
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (instr->InputCount() == 2) {
+ __ movups(kScratchDoubleReg, i.InputOperand(1));
+ __ psllw(kScratchDoubleReg, 8);
+ __ psrlw(kScratchDoubleReg, 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ psllw(dst, 8);
+ __ psrlw(dst, 8);
+ __ packuswb(dst, src2);
+ break;
+ }
+ case kAVXS8x16UnzipLow: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src2 = dst;
+ if (instr->InputCount() == 2) {
+ __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+ __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
+ src2 = kScratchDoubleReg;
+ }
+ __ vpsllw(dst, i.InputSimd128Register(0), 8);
+ __ vpsrlw(dst, dst, 8);
+ __ vpackuswb(dst, dst, src2);
+ break;
+ }
+ case kSSES8x16TransposeLow: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ psllw(dst, 8);
+ if (instr->InputCount() == 1) {
+ __ movups(kScratchDoubleReg, dst);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ __ movups(kScratchDoubleReg, i.InputOperand(1));
+ __ psllw(kScratchDoubleReg, 8);
+ }
+ __ psrlw(dst, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXS8x16TransposeLow: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ if (instr->InputCount() == 1) {
+ __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
+ __ vpsrlw(dst, kScratchDoubleReg, 8);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+ __ vpsllw(dst, i.InputSimd128Register(0), 8);
+ __ vpsrlw(dst, dst, 8);
+ }
+ __ vpor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSES8x16TransposeHigh: {
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ __ psrlw(dst, 8);
+ if (instr->InputCount() == 1) {
+ __ movups(kScratchDoubleReg, dst);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ __ movups(kScratchDoubleReg, i.InputOperand(1));
+ __ psrlw(kScratchDoubleReg, 8);
+ }
+ __ psllw(kScratchDoubleReg, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXS8x16TransposeHigh: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ if (instr->InputCount() == 1) {
+ __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+ __ vpsllw(kScratchDoubleReg, dst, 8);
+ } else {
+ DCHECK_EQ(2, instr->InputCount());
+ __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+ __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+ __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
+ }
+ __ vpor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kSSES8x8Reverse:
+ case kSSES8x4Reverse:
+ case kSSES8x2Reverse: {
+ DCHECK_EQ(1, instr->InputCount());
+ XMMRegister dst = i.OutputSimd128Register();
+ DCHECK_EQ(dst, i.InputSimd128Register(0));
+ if (arch_opcode != kSSES8x2Reverse) {
+ // First shuffle words into position.
+ int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
+ __ pshuflw(dst, dst, shuffle_mask);
+ __ pshufhw(dst, dst, shuffle_mask);
+ }
+ __ movaps(kScratchDoubleReg, dst);
+ __ psrlw(kScratchDoubleReg, 8);
+ __ psllw(dst, 8);
+ __ por(dst, kScratchDoubleReg);
+ break;
+ }
+ case kAVXS8x2Reverse:
+ case kAVXS8x4Reverse:
+ case kAVXS8x8Reverse: {
+ DCHECK_EQ(1, instr->InputCount());
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = dst;
+ if (arch_opcode != kAVXS8x2Reverse) {
+ // First shuffle words into position.
+ int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
+ __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
+ __ vpshufhw(dst, dst, shuffle_mask);
+ } else {
+ src = i.InputSimd128Register(0);
+ }
+ // Reverse each 16 bit lane.
+ __ vpsrlw(kScratchDoubleReg, src, 8);
+ __ vpsllw(dst, src, 8);
+ __ vpor(dst, dst, kScratchDoubleReg);
+ break;
+ }
+ case kIA32S1x4AnyTrue:
+ case kIA32S1x8AnyTrue:
+ case kIA32S1x16AnyTrue: {
+ Register dst = i.OutputRegister();
+ XMMRegister src = i.InputSimd128Register(0);
+ Register tmp = i.TempRegister(0);
+ __ xor_(tmp, tmp);
+ __ mov(dst, Immediate(1));
+ __ Ptest(src, src);
+ __ cmov(zero, dst, tmp);
+ break;
+ }
+ case kIA32S1x4AllTrue:
+ case kIA32S1x8AllTrue:
+ case kIA32S1x16AllTrue: {
+ Register dst = i.OutputRegister();
+ Operand src = i.InputOperand(0);
+ Register tmp = i.TempRegister(0);
+ __ mov(tmp, Immediate(1));
+ __ xor_(dst, dst);
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Pxor(kScratchDoubleReg, src);
+ __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
+ __ cmov(zero, dst, tmp);
+ break;
+ }
+ case kIA32StackCheck: {
+ __ CompareStackLimit(esp);
+ break;
+ }
+ case kIA32Word32AtomicPairLoad: {
+ XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+ __ movq(tmp, i.MemoryOperand());
+ if (instr->OutputCount() == 2) {
+ __ Pextrd(i.OutputRegister(0), tmp, 0);
+ __ Pextrd(i.OutputRegister(1), tmp, 1);
+ } else if (instr->OutputCount() == 1) {
+ __ Pextrd(i.OutputRegister(0), tmp, 0);
+ __ Pextrd(i.TempRegister(1), tmp, 1);
+ }
+ break;
+ }
+ case kIA32Word32AtomicPairStore: {
+ Label store;
+ __ bind(&store);
+ __ mov(i.TempRegister(0), i.MemoryOperand(2));
+ __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
+ __ push(ebx);
+ frame_access_state()->IncreaseSPDelta(1);
+ i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+ __ lock();
+ __ cmpxchg8b(i.MemoryOperand(2));
+ __ pop(ebx);
+ frame_access_state()->IncreaseSPDelta(-1);
+ __ j(not_equal, &store);
+ break;
+ }
+ case kWord32AtomicExchangeInt8: {
+ __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
+ __ movsx_b(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeUint8: {
+ __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzx_b(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeInt16: {
+ __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
+ __ movsx_w(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeUint16: {
+ __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
+ __ movzx_w(i.InputRegister(0), i.InputRegister(0));
+ break;
+ }
+ case kWord32AtomicExchangeWord32: {
+ __ xchg(i.InputRegister(0), i.MemoryOperand(1));
+ break;
+ }
+ case kIA32Word32AtomicPairExchange: {
+ DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
+ Label exchange;
+ __ bind(&exchange);
+ __ mov(eax, i.MemoryOperand(2));
+ __ mov(edx, i.NextMemoryOperand(2));
+ __ push(ebx);
+ frame_access_state()->IncreaseSPDelta(1);
+ i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+ __ lock();
+ __ cmpxchg8b(i.MemoryOperand(2));
+ __ pop(ebx);
+ frame_access_state()->IncreaseSPDelta(-1);
+ __ j(not_equal, &exchange);
+ break;
+ }
+ case kWord32AtomicCompareExchangeInt8: {
+ __ lock();
+ __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
+ __ movsx_b(eax, eax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeUint8: {
+ __ lock();
+ __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzx_b(eax, eax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeInt16: {
+ __ lock();
+ __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
+ __ movsx_w(eax, eax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeUint16: {
+ __ lock();
+ __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
+ __ movzx_w(eax, eax);
+ break;
+ }
+ case kWord32AtomicCompareExchangeWord32: {
+ __ lock();
+ __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
+ break;
+ }
+ case kIA32Word32AtomicPairCompareExchange: {
+ __ push(ebx);
+ frame_access_state()->IncreaseSPDelta(1);
+ i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
+ __ lock();
+ __ cmpxchg8b(i.MemoryOperand(4));
+ __ pop(ebx);
+ frame_access_state()->IncreaseSPDelta(-1);
+ break;
+ }
+#define ATOMIC_BINOP_CASE(op, inst) \
+ case kWord32Atomic##op##Int8: { \
+ ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
+ __ movsx_b(eax, eax); \
+ break; \
+ } \
+ case kWord32Atomic##op##Uint8: { \
+ ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
+ __ movzx_b(eax, eax); \
+ break; \
+ } \
+ case kWord32Atomic##op##Int16: { \
+ ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
+ __ movsx_w(eax, eax); \
+ break; \
+ } \
+ case kWord32Atomic##op##Uint16: { \
+ ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
+ __ movzx_w(eax, eax); \
+ break; \
+ } \
+ case kWord32Atomic##op##Word32: { \
+ ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
+ break; \
+ }
+ ATOMIC_BINOP_CASE(Add, add)
+ ATOMIC_BINOP_CASE(Sub, sub)
+ ATOMIC_BINOP_CASE(And, and_)
+ ATOMIC_BINOP_CASE(Or, or_)
+ ATOMIC_BINOP_CASE(Xor, xor_)
+#undef ATOMIC_BINOP_CASE
+#define ATOMIC_BINOP_CASE(op, instr1, instr2) \
+ case kIA32Word32AtomicPair##op: { \
+ DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
+ ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
+ break; \
+ }
+ ATOMIC_BINOP_CASE(Add, add, adc)
+ ATOMIC_BINOP_CASE(And, and_, and_)
+ ATOMIC_BINOP_CASE(Or, or_, or_)
+ ATOMIC_BINOP_CASE(Xor, xor_, xor_)
+#undef ATOMIC_BINOP_CASE
+ case kIA32Word32AtomicPairSub: {
+ DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
+ Label binop;
+ __ bind(&binop);
+ // Move memory operand into edx:eax
+ __ mov(eax, i.MemoryOperand(2));
+ __ mov(edx, i.NextMemoryOperand(2));
+ // Save input registers temporarily on the stack.
+ __ push(ebx);
+ frame_access_state()->IncreaseSPDelta(1);
+ i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+ __ push(i.InputRegister(1));
+ // Negate input in place
+ __ neg(ebx);
+ __ adc(i.InputRegister(1), 0);
+ __ neg(i.InputRegister(1));
+ // Add memory operand, negated input.
+ __ add(ebx, eax);
+ __ adc(i.InputRegister(1), edx);
+ __ lock();
+ __ cmpxchg8b(i.MemoryOperand(2));
+ // Restore input registers
+ __ pop(i.InputRegister(1));
+ __ pop(ebx);
+ frame_access_state()->IncreaseSPDelta(-1);
+ __ j(not_equal, &binop);
+ break;
+ }
+ case kWord32AtomicLoadInt8:
+ case kWord32AtomicLoadUint8:
+ case kWord32AtomicLoadInt16:
+ case kWord32AtomicLoadUint16:
+ case kWord32AtomicLoadWord32:
+ case kWord32AtomicStoreWord8:
+ case kWord32AtomicStoreWord16:
+ case kWord32AtomicStoreWord32:
+ UNREACHABLE(); // Won't be generated by instruction selector.
+ break;
+ }
+ return kSuccess;
+} // NOLINT(readability/fn_size)
+
+static Condition FlagsConditionToCondition(FlagsCondition condition) {
+ switch (condition) {
+ case kUnorderedEqual:
+ case kEqual:
+ return equal;
+ break;
+ case kUnorderedNotEqual:
+ case kNotEqual:
+ return not_equal;
+ break;
+ case kSignedLessThan:
+ return less;
+ break;
+ case kSignedGreaterThanOrEqual:
+ return greater_equal;
+ break;
+ case kSignedLessThanOrEqual:
+ return less_equal;
+ break;
+ case kSignedGreaterThan:
+ return greater;
+ break;
+ case kUnsignedLessThan:
+ return below;
+ break;
+ case kUnsignedGreaterThanOrEqual:
+ return above_equal;
+ break;
+ case kUnsignedLessThanOrEqual:
+ return below_equal;
+ break;
+ case kUnsignedGreaterThan:
+ return above;
+ break;
+ case kOverflow:
+ return overflow;
+ break;
+ case kNotOverflow:
+ return no_overflow;
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+// Assembles a branch after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+ Label::Distance flabel_distance =
+ branch->fallthru ? Label::kNear : Label::kFar;
+ Label* tlabel = branch->true_label;
+ Label* flabel = branch->false_label;
+ if (branch->condition == kUnorderedEqual) {
+ __ j(parity_even, flabel, flabel_distance);
+ } else if (branch->condition == kUnorderedNotEqual) {
+ __ j(parity_even, tlabel);
+ }
+ __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+ // Add a jump if not falling through to the next block.
+ if (!branch->fallthru) __ jmp(flabel);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+ Instruction* instr) {
+ // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+ UNREACHABLE();
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+ BranchInfo* branch) {
+ AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+ if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+ FlagsCondition condition) {
+ class OutOfLineTrap final : public OutOfLineCode {
+ public:
+ OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+ : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+ void Generate() final {
+ IA32OperandConverter i(gen_, instr_);
+ TrapId trap_id =
+ static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+ GenerateCallToTrap(trap_id);
+ }
+
+ private:
+ void GenerateCallToTrap(TrapId trap_id) {
+ if (trap_id == TrapId::kInvalid) {
+ // We cannot test calls to the runtime in cctest/test-run-wasm.
+ // Therefore we emit a call to C here instead of a call to the runtime.
+ __ PrepareCallCFunction(0, esi);
+ __ CallCFunction(
+ ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+ __ LeaveFrame(StackFrame::WASM_COMPILED);
+ auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+ size_t pop_size =
+ call_descriptor->StackParameterCount() * kSystemPointerSize;
+ // Use ecx as a scratch register, we return anyways immediately.
+ __ Ret(static_cast<int>(pop_size), ecx);
+ } else {
+ gen_->AssembleSourcePosition(instr_);
+ // A direct call to a wasm runtime stub defined in this module.
+ // Just encode the stub index. This will be patched when the code
+ // is added to the native module and copied into wasm code space.
+ __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+ ReferenceMap* reference_map =
+ new (gen_->zone()) ReferenceMap(gen_->zone());
+ gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
+ Safepoint::kNoLazyDeopt);
+ __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+ }
+ }
+
+ Instruction* instr_;
+ CodeGenerator* gen_;
+ };
+ auto ool = new (zone()) OutOfLineTrap(this, instr);
+ Label* tlabel = ool->entry();
+ Label end;
+ if (condition == kUnorderedEqual) {
+ __ j(parity_even, &end);
+ } else if (condition == kUnorderedNotEqual) {
+ __ j(parity_even, tlabel);
+ }
+ __ j(FlagsConditionToCondition(condition), tlabel);
+ __ bind(&end);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+ FlagsCondition condition) {
+ IA32OperandConverter i(this, instr);
+ Label done;
+
+ // Materialize a full 32-bit 1 or 0 value. The result register is always the
+ // last output of the instruction.
+ Label check;
+ DCHECK_NE(0u, instr->OutputCount());
+ Register reg = i.OutputRegister(instr->OutputCount() - 1);
+ if (condition == kUnorderedEqual) {
+ __ j(parity_odd, &check, Label::kNear);
+ __ Move(reg, Immediate(0));
+ __ jmp(&done, Label::kNear);
+ } else if (condition == kUnorderedNotEqual) {
+ __ j(parity_odd, &check, Label::kNear);
+ __ mov(reg, Immediate(1));
+ __ jmp(&done, Label::kNear);
+ }
+ Condition cc = FlagsConditionToCondition(condition);
+
+ __ bind(&check);
+ if (reg.is_byte_register()) {
+ // setcc for byte registers (al, bl, cl, dl).
+ __ setcc(cc, reg);
+ __ movzx_b(reg, reg);
+ } else {
+ // Emit a branch to set a register to either 1 or 0.
+ Label set;
+ __ j(cc, &set, Label::kNear);
+ __ Move(reg, Immediate(0));
+ __ jmp(&done, Label::kNear);
+ __ bind(&set);
+ __ mov(reg, Immediate(1));
+ }
+ __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+ IA32OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ std::vector<std::pair<int32_t, Label*>> cases;
+ for (size_t index = 2; index < instr->InputCount(); index += 2) {
+ cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+ }
+ AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+ cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
+ IA32OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ for (size_t index = 2; index < instr->InputCount(); index += 2) {
+ __ cmp(input, Immediate(i.InputInt32(index + 0)));
+ __ j(equal, GetLabel(i.InputRpo(index + 1)));
+ }
+ AssembleArchJump(i.InputRpo(1));
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+ IA32OperandConverter i(this, instr);
+ Register input = i.InputRegister(0);
+ size_t const case_count = instr->InputCount() - 2;
+ Label** cases = zone()->NewArray<Label*>(case_count);
+ for (size_t index = 0; index < case_count; ++index) {
+ cases[index] = GetLabel(i.InputRpo(index + 2));
+ }
+ Label* const table = AddJumpTable(cases, case_count);
+ __ cmp(input, Immediate(case_count));
+ __ j(above_equal, GetLabel(i.InputRpo(1)));
+ __ jmp(Operand::JumpTable(input, times_4, table));
+}
+
+// The calling convention for JSFunctions on IA32 passes arguments on the
+// stack and the JSFunction and context in EDI and ESI, respectively, thus
+// the steps of the call look as follows:
+
+// --{ before the call instruction }--------------------------------------------
+// | caller frame |
+// ^ esp ^ ebp
+
+// --{ push arguments and setup ESI, EDI }--------------------------------------
+// | args + receiver | caller frame |
+// ^ esp ^ ebp
+// [edi = JSFunction, esi = context]
+
+// --{ call [edi + kCodeEntryOffset] }------------------------------------------
+// | RET | args + receiver | caller frame |
+// ^ esp ^ ebp
+
+// =={ prologue of called function }============================================
+// --{ push ebp }---------------------------------------------------------------
+// | FP | RET | args + receiver | caller frame |
+// ^ esp ^ ebp
+
+// --{ mov ebp, esp }-----------------------------------------------------------
+// | FP | RET | args + receiver | caller frame |
+// ^ ebp,esp
+
+// --{ push esi }---------------------------------------------------------------
+// | CTX | FP | RET | args + receiver | caller frame |
+// ^esp ^ ebp
+
+// --{ push edi }---------------------------------------------------------------
+// | FNC | CTX | FP | RET | args + receiver | caller frame |
+// ^esp ^ ebp
+
+// --{ subi esp, #N }-----------------------------------------------------------
+// | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
+// ^esp ^ ebp
+
+// =={ body of called function }================================================
+
+// =={ epilogue of called function }============================================
+// --{ mov esp, ebp }-----------------------------------------------------------
+// | FP | RET | args + receiver | caller frame |
+// ^ esp,ebp
+
+// --{ pop ebp }-----------------------------------------------------------
+// | | RET | args + receiver | caller frame |
+// ^ esp ^ ebp
+
+// --{ ret #A+1 }-----------------------------------------------------------
+// | | caller frame |
+// ^ esp ^ ebp
+
+// Runtime function calls are accomplished by doing a stub call to the
+// CEntry (a real code object). On IA32 passes arguments on the
+// stack, the number of arguments in EAX, the address of the runtime function
+// in EBX, and the context in ESI.
+
+// --{ before the call instruction }--------------------------------------------
+// | caller frame |
+// ^ esp ^ ebp
+
+// --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
+// | args + receiver | caller frame |
+// ^ esp ^ ebp
+// [eax = #args, ebx = runtime function, esi = context]
+
+// --{ call #CEntry }-----------------------------------------------------------
+// | RET | args + receiver | caller frame |
+// ^ esp ^ ebp
+
+// =={ body of runtime function }===============================================
+
+// --{ runtime returns }--------------------------------------------------------
+// | caller frame |
+// ^ esp ^ ebp
+
+// Other custom linkages (e.g. for calling directly into and out of C++) may
+// need to save callee-saved registers on the stack, which is done in the
+// function prologue of generated code.
+
+// --{ before the call instruction }--------------------------------------------
+// | caller frame |
+// ^ esp ^ ebp
+
+// --{ set up arguments in registers on stack }---------------------------------
+// | args | caller frame |
+// ^ esp ^ ebp
+// [r0 = arg0, r1 = arg1, ...]
+
+// --{ call code }--------------------------------------------------------------
+// | RET | args | caller frame |
+// ^ esp ^ ebp
+
+// =={ prologue of called function }============================================
+// --{ push ebp }---------------------------------------------------------------
+// | FP | RET | args | caller frame |
+// ^ esp ^ ebp
+
+// --{ mov ebp, esp }-----------------------------------------------------------
+// | FP | RET | args | caller frame |
+// ^ ebp,esp
+
+// --{ save registers }---------------------------------------------------------
+// | regs | FP | RET | args | caller frame |
+// ^ esp ^ ebp
+
+// --{ subi esp, #N }-----------------------------------------------------------
+// | callee frame | regs | FP | RET | args | caller frame |
+// ^esp ^ ebp
+
+// =={ body of called function }================================================
+
+// =={ epilogue of called function }============================================
+// --{ restore registers }------------------------------------------------------
+// | regs | FP | RET | args | caller frame |
+// ^ esp ^ ebp
+
+// --{ mov esp, ebp }-----------------------------------------------------------
+// | FP | RET | args | caller frame |
+// ^ esp,ebp
+
+// --{ pop ebp }----------------------------------------------------------------
+// | RET | args | caller frame |
+// ^ esp ^ ebp
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ if (saves != 0) { // Save callee-saved registers.
+ DCHECK(!info()->is_osr());
+ int pushed = 0;
+ for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+ if (!((1 << i) & saves)) continue;
+ ++pushed;
+ }
+ frame->AllocateSavedCalleeRegisterSlots(pushed);
+ }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+ if (frame_access_state()->has_frame()) {
+ if (call_descriptor->IsCFunctionCall()) {
+ __ push(ebp);
+ __ mov(ebp, esp);
+ } else if (call_descriptor->IsJSFunctionCall()) {
+ __ Prologue();
+ if (call_descriptor->PushArgumentCount()) {
+ __ push(kJavaScriptCallArgCountRegister);
+ }
+ } else {
+ __ StubPrologue(info()->GetOutputStackFrameType());
+ if (call_descriptor->IsWasmFunctionCall()) {
+ __ push(kWasmInstanceRegister);
+ } else if (call_descriptor->IsWasmImportWrapper()) {
+ // WASM import wrappers are passed a tuple in the place of the instance.
+ // Unpack the tuple into the instance and the target callable.
+ // This must be done here in the codegen because it cannot be expressed
+ // properly in the graph.
+ __ mov(kJSFunctionRegister,
+ Operand(kWasmInstanceRegister,
+ Tuple2::kValue2Offset - kHeapObjectTag));
+ __ mov(kWasmInstanceRegister,
+ Operand(kWasmInstanceRegister,
+ Tuple2::kValue1Offset - kHeapObjectTag));
+ __ push(kWasmInstanceRegister);
+ }
+ }
+ }
+
+ int shrink_slots = frame()->GetTotalFrameSlotCount() -
+ call_descriptor->CalculateFixedFrameSize();
+
+ if (info()->is_osr()) {
+ // TurboFan OSR-compiled functions cannot be entered directly.
+ __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+ // Unoptimized code jumps directly to this entrypoint while the unoptimized
+ // frame is still on the stack. Optimized code uses OSR values directly from
+ // the unoptimized frame. Thus, all that needs to be done is to allocate the
+ // remaining stack slots.
+ if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+ osr_pc_offset_ = __ pc_offset();
+ shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
+ }
+
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ if (shrink_slots > 0) {
+ DCHECK(frame_access_state()->has_frame());
+ if (info()->IsWasm() && shrink_slots > 128) {
+ // For WebAssembly functions with big frames we have to do the stack
+ // overflow check before we construct the frame. Otherwise we may not
+ // have enough space on the stack to call the runtime for the stack
+ // overflow.
+ Label done;
+
+ // If the frame is bigger than the stack, we throw the stack overflow
+ // exception unconditionally. Thereby we can avoid the integer overflow
+ // check in the condition code.
+ if (shrink_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
+ Register scratch = esi;
+ __ push(scratch);
+ __ mov(scratch,
+ FieldOperand(kWasmInstanceRegister,
+ WasmInstanceObject::kRealStackLimitAddressOffset));
+ __ mov(scratch, Operand(scratch, 0));
+ __ add(scratch, Immediate(shrink_slots * kSystemPointerSize));
+ __ cmp(esp, scratch);
+ __ pop(scratch);
+ __ j(above_equal, &done);
+ }
+ __ mov(ecx, FieldOperand(kWasmInstanceRegister,
+ WasmInstanceObject::kCEntryStubOffset));
+ __ Move(esi, Smi::zero());
+ __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx);
+ ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
+ RecordSafepoint(reference_map, Safepoint::kSimple,
+ Safepoint::kNoLazyDeopt);
+ __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+ __ bind(&done);
+ }
+
+ // Skip callee-saved and return slots, which are created below.
+ shrink_slots -= base::bits::CountPopulation(saves);
+ shrink_slots -= frame()->GetReturnSlotCount();
+ if (shrink_slots > 0) {
+ __ sub(esp, Immediate(shrink_slots * kSystemPointerSize));
+ }
+ }
+
+ if (saves != 0) { // Save callee-saved registers.
+ DCHECK(!info()->is_osr());
+ for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+ if (((1 << i) & saves)) __ push(Register::from_code(i));
+ }
+ }
+
+ // Allocate return slots (located after callee-saved).
+ if (frame()->GetReturnSlotCount() > 0) {
+ __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
+ }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
+ auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+ const RegList saves = call_descriptor->CalleeSavedRegisters();
+ // Restore registers.
+ if (saves != 0) {
+ const int returns = frame()->GetReturnSlotCount();
+ if (returns != 0) {
+ __ add(esp, Immediate(returns * kSystemPointerSize));
+ }
+ for (int i = 0; i < Register::kNumRegisters; i++) {
+ if (!((1 << i) & saves)) continue;
+ __ pop(Register::from_code(i));
+ }
+ }
+
+ // Might need ecx for scratch if pop_size is too big or if there is a variable
+ // pop count.
+ DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
+ size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
+ IA32OperandConverter g(this, nullptr);
+ if (call_descriptor->IsCFunctionCall()) {
+ AssembleDeconstructFrame();
+ } else if (frame_access_state()->has_frame()) {
+ // Canonicalize JSFunction return sites for now if they always have the same
+ // number of return args.
+ if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
+ if (return_label_.is_bound()) {
+ __ jmp(&return_label_);
+ return;
+ } else {
+ __ bind(&return_label_);
+ AssembleDeconstructFrame();
+ }
+ } else {
+ AssembleDeconstructFrame();
+ }
+ }
+ DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
+ DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
+ if (pop->IsImmediate()) {
+ DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
+ pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
+ __ Ret(static_cast<int>(pop_size), ecx);
+ } else {
+ Register pop_reg = g.ToRegister(pop);
+ Register scratch_reg = pop_reg == ecx ? edx : ecx;
+ __ pop(scratch_reg);
+ __ lea(esp, Operand(esp, pop_reg, times_4, static_cast<int>(pop_size)));
+ __ jmp(scratch_reg);
+ }
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+ InstructionOperand* destination) {
+ IA32OperandConverter g(this, nullptr);
+ // Dispatch on the source and destination operand kinds.
+ switch (MoveType::InferMove(source, destination)) {
+ case MoveType::kRegisterToRegister:
+ if (source->IsRegister()) {
+ __ mov(g.ToRegister(destination), g.ToRegister(source));
+ } else {
+ DCHECK(source->IsFPRegister());
+ __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+ }
+ return;
+ case MoveType::kRegisterToStack: {
+ Operand dst = g.ToOperand(destination);
+ if (source->IsRegister()) {
+ __ mov(dst, g.ToRegister(source));
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep == MachineRepresentation::kFloat32) {
+ __ movss(dst, src);
+ } else if (rep == MachineRepresentation::kFloat64) {
+ __ movsd(dst, src);
+ } else {
+ DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+ __ movups(dst, src);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToRegister: {
+ Operand src = g.ToOperand(source);
+ if (source->IsStackSlot()) {
+ __ mov(g.ToRegister(destination), src);
+ } else {
+ DCHECK(source->IsFPStackSlot());
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep == MachineRepresentation::kFloat32) {
+ __ movss(dst, src);
+ } else if (rep == MachineRepresentation::kFloat64) {
+ __ movsd(dst, src);
+ } else {
+ DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+ __ movups(dst, src);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToStack: {
+ Operand src = g.ToOperand(source);
+ Operand dst = g.ToOperand(destination);
+ if (source->IsStackSlot()) {
+ __ push(src);
+ __ pop(dst);
+ } else {
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep == MachineRepresentation::kFloat32) {
+ __ movss(kScratchDoubleReg, src);
+ __ movss(dst, kScratchDoubleReg);
+ } else if (rep == MachineRepresentation::kFloat64) {
+ __ movsd(kScratchDoubleReg, src);
+ __ movsd(dst, kScratchDoubleReg);
+ } else {
+ DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+ __ movups(kScratchDoubleReg, src);
+ __ movups(dst, kScratchDoubleReg);
+ }
+ }
+ return;
+ }
+ case MoveType::kConstantToRegister: {
+ Constant src = g.ToConstant(source);
+ if (destination->IsRegister()) {
+ Register dst = g.ToRegister(destination);
+ if (src.type() == Constant::kHeapObject) {
+ __ Move(dst, src.ToHeapObject());
+ } else {
+ __ Move(dst, g.ToImmediate(source));
+ }
+ } else {
+ DCHECK(destination->IsFPRegister());
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ if (src.type() == Constant::kFloat32) {
+ // TODO(turbofan): Can we do better here?
+ __ Move(dst, src.ToFloat32AsInt());
+ } else {
+ DCHECK_EQ(src.type(), Constant::kFloat64);
+ __ Move(dst, src.ToFloat64().AsUint64());
+ }
+ }
+ return;
+ }
+ case MoveType::kConstantToStack: {
+ Constant src = g.ToConstant(source);
+ Operand dst = g.ToOperand(destination);
+ if (destination->IsStackSlot()) {
+ __ Move(dst, g.ToImmediate(source));
+ } else {
+ DCHECK(destination->IsFPStackSlot());
+ if (src.type() == Constant::kFloat32) {
+ __ Move(dst, Immediate(src.ToFloat32AsInt()));
+ } else {
+ DCHECK_EQ(src.type(), Constant::kFloat64);
+ uint64_t constant_value = src.ToFloat64().AsUint64();
+ uint32_t lower = static_cast<uint32_t>(constant_value);
+ uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
+ Operand dst0 = dst;
+ Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
+ __ Move(dst0, Immediate(lower));
+ __ Move(dst1, Immediate(upper));
+ }
+ }
+ return;
+ }
+ }
+ UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+ InstructionOperand* destination) {
+ IA32OperandConverter g(this, nullptr);
+ // Dispatch on the source and destination operand kinds. Not all
+ // combinations are possible.
+ switch (MoveType::InferSwap(source, destination)) {
+ case MoveType::kRegisterToRegister: {
+ if (source->IsRegister()) {
+ Register src = g.ToRegister(source);
+ Register dst = g.ToRegister(destination);
+ __ push(src);
+ __ mov(src, dst);
+ __ pop(dst);
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ XMMRegister dst = g.ToDoubleRegister(destination);
+ __ movaps(kScratchDoubleReg, src);
+ __ movaps(src, dst);
+ __ movaps(dst, kScratchDoubleReg);
+ }
+ return;
+ }
+ case MoveType::kRegisterToStack: {
+ if (source->IsRegister()) {
+ Register src = g.ToRegister(source);
+ __ push(src);
+ frame_access_state()->IncreaseSPDelta(1);
+ Operand dst = g.ToOperand(destination);
+ __ mov(src, dst);
+ frame_access_state()->IncreaseSPDelta(-1);
+ dst = g.ToOperand(destination);
+ __ pop(dst);
+ } else {
+ DCHECK(source->IsFPRegister());
+ XMMRegister src = g.ToDoubleRegister(source);
+ Operand dst = g.ToOperand(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep == MachineRepresentation::kFloat32) {
+ __ movss(kScratchDoubleReg, dst);
+ __ movss(dst, src);
+ __ movaps(src, kScratchDoubleReg);
+ } else if (rep == MachineRepresentation::kFloat64) {
+ __ movsd(kScratchDoubleReg, dst);
+ __ movsd(dst, src);
+ __ movaps(src, kScratchDoubleReg);
+ } else {
+ DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+ __ movups(kScratchDoubleReg, dst);
+ __ movups(dst, src);
+ __ movups(src, kScratchDoubleReg);
+ }
+ }
+ return;
+ }
+ case MoveType::kStackToStack: {
+ if (source->IsStackSlot()) {
+ Operand dst1 = g.ToOperand(destination);
+ __ push(dst1);
+ frame_access_state()->IncreaseSPDelta(1);
+ Operand src1 = g.ToOperand(source);
+ __ push(src1);
+ Operand dst2 = g.ToOperand(destination);
+ __ pop(dst2);
+ frame_access_state()->IncreaseSPDelta(-1);
+ Operand src2 = g.ToOperand(source);
+ __ pop(src2);
+ } else {
+ DCHECK(source->IsFPStackSlot());
+ Operand src0 = g.ToOperand(source);
+ Operand dst0 = g.ToOperand(destination);
+ MachineRepresentation rep =
+ LocationOperand::cast(source)->representation();
+ if (rep == MachineRepresentation::kFloat32) {
+ __ movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
+ __ push(src0); // Then use stack to copy src to destination.
+ __ pop(dst0);
+ __ movss(src0, kScratchDoubleReg);
+ } else if (rep == MachineRepresentation::kFloat64) {
+ __ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
+ __ push(src0); // Then use stack to copy src to destination.
+ __ pop(dst0);
+ __ push(g.ToOperand(source, kSystemPointerSize));
+ __ pop(g.ToOperand(destination, kSystemPointerSize));
+ __ movsd(src0, kScratchDoubleReg);
+ } else {
+ DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+ __ movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
+ __ push(src0); // Then use stack to copy src to destination.
+ __ pop(dst0);
+ __ push(g.ToOperand(source, kSystemPointerSize));
+ __ pop(g.ToOperand(destination, kSystemPointerSize));
+ __ push(g.ToOperand(source, 2 * kSystemPointerSize));
+ __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
+ __ push(g.ToOperand(source, 3 * kSystemPointerSize));
+ __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
+ __ movups(src0, kScratchDoubleReg);
+ }
+ }
+ return;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+ for (size_t index = 0; index < target_count; ++index) {
+ __ dd(targets[index]);
+ }
+}
+
+#undef __
+#undef kScratchDoubleReg
+#undef ASSEMBLE_COMPARE
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+#undef ASSEMBLE_BINOP
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_I64ATOMIC_BINOP
+#undef ASSEMBLE_MOVX
+#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
+#undef ASSEMBLE_SIMD_IMM_SHUFFLE
+
+} // namespace compiler
+} // namespace internal
+} // namespace v8