diff options
Diffstat (limited to 'deps/v8/src/compiler/backend/x64')
4 files changed, 630 insertions, 308 deletions
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc index a108edeff0..a4f82b153b 100644 --- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -155,10 +155,18 @@ class X64OperandConverter : public InstructionOperandConverter { namespace { +bool HasAddressingMode(Instruction* instr) { + return instr->addressing_mode() != kMode_None; +} + bool HasImmediateInput(Instruction* instr, size_t index) { return instr->InputAt(index)->IsImmediate(); } +bool HasRegisterInput(Instruction* instr, size_t index) { + return instr->InputAt(index)->IsRegister(); +} + class OutOfLineLoadFloat32NaN final : public OutOfLineCode { public: OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) @@ -210,6 +218,10 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { // Just encode the stub index. This will be patched when the code // is added to the native module and copied into wasm code space. __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); + } else if (tasm()->options().inline_offheap_trampolines) { + // With embedded builtins we do not need the isolate here. This allows + // the call to be generated asynchronously. + __ CallBuiltin(Builtins::kDoubleToI); } else { __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); } @@ -380,60 +392,60 @@ void EmitWordLoadPoisoningIfNeeded( } \ } while (false) -#define ASSEMBLE_BINOP(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 1; \ - Operand right = i.MemoryOperand(&index); \ - __ asm_instr(i.InputRegister(0), right); \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } \ - } else { \ - if (instr->InputAt(1)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ - } \ +#define ASSEMBLE_BINOP(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 1; \ + Operand right = i.MemoryOperand(&index); \ + __ asm_instr(i.InputRegister(0), right); \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + if (HasRegisterInput(instr, 0)) { \ + __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } \ + } else { \ + if (HasRegisterInput(instr, 1)) { \ + __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ + } \ } while (false) -#define ASSEMBLE_COMPARE(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 0; \ - Operand left = i.MemoryOperand(&index); \ - if (HasImmediateInput(instr, index)) { \ - __ asm_instr(left, i.InputImmediate(index)); \ - } else { \ - __ asm_instr(left, i.InputRegister(index)); \ - } \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } \ - } else { \ - if (instr->InputAt(1)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ - } \ +#define ASSEMBLE_COMPARE(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 0; \ + Operand left = i.MemoryOperand(&index); \ + if (HasImmediateInput(instr, index)) { \ + __ asm_instr(left, i.InputImmediate(index)); \ + } else { \ + __ asm_instr(left, i.InputRegister(index)); \ + } \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + if (HasRegisterInput(instr, 0)) { \ + __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } \ + } else { \ + if (HasRegisterInput(instr, 1)) { \ + __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ + } \ } while (false) #define ASSEMBLE_MULT(asm_instr) \ do { \ if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ + if (HasRegisterInput(instr, 0)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(0), \ i.InputImmediate(1)); \ } else { \ @@ -441,7 +453,7 @@ void EmitWordLoadPoisoningIfNeeded( i.InputImmediate(1)); \ } \ } else { \ - if (instr->InputAt(1)->IsRegister()) { \ + if (HasRegisterInput(instr, 1)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \ } else { \ __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \ @@ -468,9 +480,9 @@ void EmitWordLoadPoisoningIfNeeded( #define ASSEMBLE_MOVX(asm_instr) \ do { \ - if (instr->addressing_mode() != kMode_None) { \ + if (HasAddressingMode(instr)) { \ __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \ - } else if (instr->InputAt(0)->IsRegister()) { \ + } else if (HasRegisterInput(instr, 0)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \ } else { \ __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \ @@ -576,17 +588,18 @@ void EmitWordLoadPoisoningIfNeeded( __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \ } while (false) -#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ - do { \ - CpuFeatureScope sse_scope(tasm(), SSE4_1); \ - Register dst = i.OutputRegister(); \ - Register tmp = i.TempRegister(0); \ - __ movq(tmp, Immediate(1)); \ - __ xorq(dst, dst); \ - __ pxor(kScratchDoubleReg, kScratchDoubleReg); \ - __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \ - __ ptest(kScratchDoubleReg, kScratchDoubleReg); \ - __ cmovq(zero, dst, tmp); \ +#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ + do { \ + CpuFeatureScope sse_scope(tasm(), SSE4_1); \ + Register dst = i.OutputRegister(); \ + Register tmp1 = i.TempRegister(0); \ + XMMRegister tmp2 = i.TempSimd128Register(1); \ + __ movq(tmp1, Immediate(1)); \ + __ xorq(dst, dst); \ + __ pxor(tmp2, tmp2); \ + __ opcode(tmp2, i.InputSimd128Register(0)); \ + __ ptest(tmp2, tmp2); \ + __ cmovq(zero, dst, tmp1); \ } while (false) void CodeGenerator::AssembleDeconstructFrame() { @@ -989,10 +1002,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; unwinding_info_writer_.MarkBlockWillExit(); break; @@ -1000,9 +1012,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ movq(i.OutputRegister(), rsp); - break; case kArchFramePointer: __ movq(i.OutputRegister(), rbp); break; @@ -1013,6 +1022,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputRegister(), rbp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + if (HasAddressingMode(instr)) { + __ cmpq(rsp, i.MemoryOperand(kValueIndex)); + } else { + __ cmpq(rsp, i.InputRegister(kValueIndex)); + } + break; + } case kArchTruncateDoubleToI: { auto result = i.OutputRegister(); auto input = i.InputDoubleRegister(0); @@ -1176,14 +1194,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_MULT(imulq); break; case kX64ImulHigh32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ imull(i.InputRegister(1)); } else { __ imull(i.InputOperand(1)); } break; case kX64UmulHigh32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ mull(i.InputRegister(1)); } else { __ mull(i.InputOperand(1)); @@ -1254,42 +1272,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_SHIFT(rorq, 6); break; case kX64Lzcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Lzcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Lzcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Lzcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Lzcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Lzcntl(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Tzcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Tzcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Tzcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Tzcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Tzcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Tzcntl(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Popcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Popcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Popcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Popcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Popcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Popcntl(i.OutputRegister(), i.InputOperand(0)); @@ -1321,16 +1339,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kSSEFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psrlq(kScratchDoubleReg, 33); - __ Andps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psrlq(tmp, 33); + __ Andps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psllq(kScratchDoubleReg, 31); - __ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psllq(tmp, 31); + __ Xorps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Sqrt: @@ -1532,17 +1552,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64F64x2Abs: case kSSEFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psrlq(kScratchDoubleReg, 1); - __ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psrlq(tmp, 1); + __ Andpd(i.OutputDoubleRegister(), tmp); break; } case kX64F64x2Neg: case kSSEFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psllq(kScratchDoubleReg, 63); - __ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psllq(tmp, 63); + __ Xorpd(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat64Sqrt: @@ -1659,56 +1681,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kSSEInt32ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt32ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt64ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt64ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint64ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint64ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint32ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint32ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1729,21 +1751,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kSSEFloat64InsertLowWord32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0); } else { __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); } break; case kSSEFloat64InsertHighWord32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1); } else { __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); } break; case kSSEFloat64LoadLowWord32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movd(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1800,56 +1822,52 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kAVXFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsrlq(tmp, tmp, 33); if (instr->InputAt(0)->IsFPRegister()) { - __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsllq(tmp, tmp, 31); if (instr->InputAt(0)->IsFPRegister()) { - __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsrlq(tmp, tmp, 1); if (instr->InputAt(0)->IsFPRegister()) { - __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsllq(tmp, tmp, 63); if (instr->InputAt(0)->IsFPRegister()) { - __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } @@ -1929,14 +1947,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64Movl: EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); if (instr->HasOutput()) { - if (instr->addressing_mode() == kMode_None) { - if (instr->InputAt(0)->IsRegister()) { + if (HasAddressingMode(instr)) { + __ movl(i.OutputRegister(), i.MemoryOperand()); + } else { + if (HasRegisterInput(instr, 0)) { __ movl(i.OutputRegister(), i.InputRegister(0)); } else { __ movl(i.OutputRegister(), i.InputOperand(0)); } - } else { - __ movl(i.OutputRegister(), i.MemoryOperand()); } __ AssertZeroExtended(i.OutputRegister()); } else { @@ -2002,12 +2020,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; } - case kX64CompressSigned: // Fall through. - case kX64CompressPointer: // Fall through. - case kX64CompressAny: { - ASSEMBLE_MOVX(movl); - break; - } case kX64Movq: EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); if (instr->HasOutput()) { @@ -2082,14 +2094,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kX64BitcastIF: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kX64BitcastLD: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movq(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -2177,7 +2189,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ incl(i.OutputRegister()); break; case kX64Push: - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { + if (HasAddressingMode(instr)) { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ pushq(operand); @@ -2189,7 +2201,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( frame_access_state()->IncreaseSPDelta(1); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), kSystemPointerSize); - } else if (instr->InputAt(0)->IsRegister()) { + } else if (HasRegisterInput(instr, 0)) { __ pushq(i.InputRegister(0)); frame_access_state()->IncreaseSPDelta(1); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), @@ -2256,11 +2268,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64F64x2Splat: { + CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); if (instr->InputAt(0)->IsFPRegister()) { - __ pshufd(dst, i.InputDoubleRegister(0), 0x44); + __ movddup(dst, i.InputDoubleRegister(0)); } else { - __ pshufd(dst, i.InputOperand(0), 0x44); + __ movddup(dst, i.InputOperand(0)); } break; } @@ -2280,6 +2293,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputDoubleRegister(), kScratchRegister); break; } + case kX64F64x2Add: { + ASSEMBLE_SSE_BINOP(addpd); + break; + } + case kX64F64x2Sub: { + ASSEMBLE_SSE_BINOP(subpd); + break; + } + case kX64F64x2Mul: { + ASSEMBLE_SSE_BINOP(mulpd); + break; + } + case kX64F64x2Div: { + ASSEMBLE_SSE_BINOP(divpd); + break; + } + case kX64F64x2Min: { + XMMRegister src1 = i.InputSimd128Register(1), + dst = i.OutputSimd128Register(); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // The minpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minpd in both orders, merge the resuls, and adjust. + __ movapd(kScratchDoubleReg, src1); + __ minpd(kScratchDoubleReg, dst); + __ minpd(dst, src1); + // propagate -0's and NaNs, which may be non-canonical. + __ orpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by quieting and clearing the payload. + __ cmppd(dst, kScratchDoubleReg, 3); + __ orpd(kScratchDoubleReg, dst); + __ psrlq(dst, 13); + __ andnpd(dst, kScratchDoubleReg); + break; + } + case kX64F64x2Max: { + XMMRegister src1 = i.InputSimd128Register(1), + dst = i.OutputSimd128Register(); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // The maxpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform maxpd in both orders, merge the resuls, and adjust. + __ movapd(kScratchDoubleReg, src1); + __ maxpd(kScratchDoubleReg, dst); + __ maxpd(dst, src1); + // Find discrepancies. + __ xorpd(dst, kScratchDoubleReg); + // Propagate NaNs, which may be non-canonical. + __ orpd(kScratchDoubleReg, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + __ subpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ cmppd(dst, kScratchDoubleReg, 3); + __ psrlq(dst, 13); + __ andnpd(dst, kScratchDoubleReg); + break; + } case kX64F64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); @@ -2406,6 +2474,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } + case kX64F32x4Div: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ divps(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } case kX64F32x4Min: { XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); @@ -2466,13 +2539,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2Splat: { + CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movq(dst, i.InputRegister(0)); } else { __ movq(dst, i.InputOperand(0)); } - __ pshufd(dst, dst, 0x44); + __ movddup(dst, dst); break; } case kX64I64x2ExtractLane: { @@ -2482,7 +2556,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrq(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2502,7 +2576,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2Shl: { - __ psllq(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psllq(i.OutputSimd128Register(), tmp); break; } case kX64I64x2ShrS: { @@ -2511,16 +2587,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // ShrS on each quadword one at a time XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); + Register tmp = i.ToRegister(instr->TempAt(0)); // lower quadword - __ pextrq(kScratchRegister, src, 0x0); - __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); - __ pinsrq(dst, kScratchRegister, 0x0); + __ pextrq(tmp, src, 0x0); + __ sarq_cl(tmp); + __ pinsrq(dst, tmp, 0x0); // upper quadword - __ pextrq(kScratchRegister, src, 0x1); - __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); - __ pinsrq(dst, kScratchRegister, 0x1); + __ pextrq(tmp, src, 0x1); + __ sarq_cl(tmp); + __ pinsrq(dst, tmp, 0x1); break; } case kX64I64x2Add: { @@ -2538,8 +2615,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister left = i.InputSimd128Register(0); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp1 = i.ToSimd128Register(instr->TempAt(0)); - XMMRegister tmp2 = i.ToSimd128Register(instr->TempAt(1)); + XMMRegister tmp1 = i.TempSimd128Register(0); + XMMRegister tmp2 = i.TempSimd128Register(1); __ movaps(tmp1, left); __ movaps(tmp2, right); @@ -2559,6 +2636,66 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ paddq(left, tmp2); // left == dst break; } + case kX64I64x2MinS: { + if (CpuFeatures::IsSupported(SSE4_2)) { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(tmp, src); + __ pcmpgtq(src, dst); + __ blendvpd(tmp, dst); // implicit use of xmm0 as mask + __ movaps(dst, tmp); + } else { + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + Register tmp1 = i.TempRegister(1); + Register tmp2 = i.TempRegister(2); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // backup src since we cannot change it + __ movaps(tmp, src); + + // compare the lower quardwords + __ movq(tmp1, dst); + __ movq(tmp2, tmp); + __ cmpq(tmp1, tmp2); + // tmp2 now has the min of lower quadwords + __ cmovq(less_equal, tmp2, tmp1); + // tmp1 now has the higher quadword + // must do this before movq, movq clears top quadword + __ pextrq(tmp1, dst, 1); + // save tmp2 into dst + __ movq(dst, tmp2); + // tmp2 now has the higher quadword + __ pextrq(tmp2, tmp, 1); + // compare higher quadwords + __ cmpq(tmp1, tmp2); + // tmp2 now has the min of higher quadwords + __ cmovq(less_equal, tmp2, tmp1); + __ movq(tmp, tmp2); + // dst = [tmp[0], dst[0]] + __ punpcklqdq(dst, tmp); + } + break; + } + case kX64I64x2MaxS: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(tmp, src); + __ pcmpgtq(src, dst); + __ blendvpd(dst, tmp); // implicit use of xmm0 as mask + break; + } case kX64I64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); @@ -2568,9 +2705,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I64x2Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqq(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I64x2GtS: { @@ -2584,7 +2722,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ movaps(tmp, src); __ pcmpgtq(tmp, dst); @@ -2593,7 +2731,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2ShrU: { - __ psrlq(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrlq(i.OutputSimd128Register(), tmp); + break; + } + case kX64I64x2MinU: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister src_tmp = i.TempSimd128Register(0); + XMMRegister dst_tmp = i.TempSimd128Register(1); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(src_tmp, src); + __ movaps(dst_tmp, dst); + + __ pcmpeqd(src, src); + __ psllq(src, 63); + + __ pxor(dst_tmp, src); + __ pxor(src, src_tmp); + + __ pcmpgtq(src, dst_tmp); + __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask + __ movaps(dst, src_tmp); + break; + } + case kX64I64x2MaxU: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister src_tmp = i.TempSimd128Register(0); + XMMRegister dst_tmp = i.TempSimd128Register(1); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(src_tmp, src); + __ movaps(dst_tmp, dst); + + __ pcmpeqd(src, src); + __ psllq(src, 63); + + __ pxor(dst_tmp, src); + __ pxor(src, src_tmp); + + __ pcmpgtq(src, dst_tmp); + __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask break; } case kX64I64x2GtU: { @@ -2601,7 +2788,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); @@ -2617,7 +2804,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); @@ -2632,7 +2819,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -2647,7 +2834,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2658,19 +2845,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I32x4SConvertF32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); + XMMRegister tmp = i.TempSimd128Register(0); // NAN->0 - __ movaps(kScratchDoubleReg, dst); - __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); - __ pand(dst, kScratchDoubleReg); + __ movaps(tmp, dst); + __ cmpeqps(tmp, tmp); + __ pand(dst, tmp); // Set top bit if >= 0 (but not -0.0!) - __ pxor(kScratchDoubleReg, dst); + __ pxor(tmp, dst); // Convert __ cvttps2dq(dst, dst); // Set top bit if >=0 is now < 0 - __ pand(kScratchDoubleReg, dst); - __ psrad(kScratchDoubleReg, 31); + __ pand(tmp, dst); + __ psrad(tmp, 31); // Set positive overflow lanes to 0x7FFFFFFF - __ pxor(dst, kScratchDoubleReg); + __ pxor(dst, tmp); break; } case kX64I32x4SConvertI16x8Low: { @@ -2699,11 +2887,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Shl: { - __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ pslld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4ShrS: { - __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrad(i.OutputSimd128Register(), tmp); break; } case kX64I32x4Add: { @@ -2739,9 +2931,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqd(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I32x4GtS: { @@ -2760,24 +2953,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); + XMMRegister tmp2 = i.TempSimd128Register(1); // NAN->0, negative->0 - __ pxor(kScratchDoubleReg, kScratchDoubleReg); - __ maxps(dst, kScratchDoubleReg); + __ pxor(tmp2, tmp2); + __ maxps(dst, tmp2); // scratch: float representation of max_signed - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrld(kScratchDoubleReg, 1); // 0x7fffffff - __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 + __ pcmpeqd(tmp2, tmp2); + __ psrld(tmp2, 1); // 0x7fffffff + __ cvtdq2ps(tmp2, tmp2); // 0x4f000000 // tmp: convert (src-max_signed). // Positive overflow lanes -> 0x7FFFFFFF // Negative lanes -> 0 __ movaps(tmp, dst); - __ subps(tmp, kScratchDoubleReg); - __ cmpleps(kScratchDoubleReg, tmp); + __ subps(tmp, tmp2); + __ cmpleps(tmp2, tmp); __ cvttps2dq(tmp, tmp); - __ pxor(tmp, kScratchDoubleReg); - __ pxor(kScratchDoubleReg, kScratchDoubleReg); - __ pmaxsd(tmp, kScratchDoubleReg); + __ pxor(tmp, tmp2); + __ pxor(tmp2, tmp2); + __ pmaxsd(tmp, tmp2); // convert. Overflow lanes above max_signed will be 0x80000000 __ cvttps2dq(dst, dst); // Add (src-max_signed) for overflow lanes. @@ -2797,7 +2991,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4ShrU: { - __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4MinU: { @@ -2814,10 +3010,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxud(dst, src); __ pcmpeqd(dst, src); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqd(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I32x4GeU: { @@ -2835,7 +3032,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8Splat: { XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -2853,7 +3050,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2887,11 +3084,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8Shl: { - __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psllw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8ShrS: { - __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psraw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8SConvertI32x4: { @@ -2940,9 +3141,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqw(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I16x8GtS: { @@ -2970,7 +3172,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8ShrU: { - __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrlw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8UConvertI32x4: { @@ -3007,10 +3211,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxuw(dst, src); __ pcmpeqw(dst, src); - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqw(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I16x8GeU: { @@ -3024,7 +3229,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I8x16Splat: { CpuFeatureScope sse_scope(tasm(), SSSE3); XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -3042,7 +3247,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I8x16ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -3071,31 +3276,36 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I8x16Shl: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - int8_t shift = i.InputInt8(1) & 0x7; - if (shift < 4) { - // For small shifts, doubling is faster. - for (int i = 0; i < shift; ++i) { - __ paddb(dst, dst); - } - } else { - // Mask off the unwanted bits before word-shifting. - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ psrlw(kScratchDoubleReg, 8 + shift); - __ packuswb(kScratchDoubleReg, kScratchDoubleReg); - __ pand(dst, kScratchDoubleReg); - __ psllw(dst, shift); - } + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + // Mask off the unwanted bits before word-shifting. + __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ packuswb(kScratchDoubleReg, kScratchDoubleReg); + __ pand(dst, kScratchDoubleReg); + __ movq(tmp_simd, i.InputRegister(1)); + __ psllw(dst, tmp_simd); break; } case kX64I8x16ShrS: { XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); // Unpack the bytes into words, do arithmetic shifts, and repack. - __ punpckhbw(kScratchDoubleReg, src); - __ punpcklbw(dst, src); - __ psraw(kScratchDoubleReg, 8 + shift); - __ psraw(dst, 8 + shift); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + // Prepare shift value + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psraw(kScratchDoubleReg, tmp_simd); + __ psraw(dst, tmp_simd); __ packsswb(dst, kScratchDoubleReg); break; } @@ -3119,7 +3329,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa // right= BBbb BBbb ... BBbb BBbb @@ -3163,9 +3373,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I8x16Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqb(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I8x16GtS: { @@ -3194,13 +3405,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I8x16ShrU: { XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; // Unpack the bytes into words, do logical shifts, and repack. - __ punpckhbw(kScratchDoubleReg, src); - __ punpcklbw(dst, src); - __ psrlw(kScratchDoubleReg, 8 + shift); - __ psrlw(dst, 8 + shift); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + // Prepare shift value + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ psrlw(dst, tmp_simd); __ packuswb(dst, kScratchDoubleReg); break; } @@ -3226,10 +3443,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxub(dst, src); __ pcmpeqb(dst, src); - __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqb(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I8x16GeU: { @@ -3561,9 +3779,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb); break; } - case kX64StackCheck: - __ CompareRoot(rsp, RootIndex::kStackLimit); - break; case kWord32AtomicExchangeInt8: { __ xchgb(i.InputRegister(0), i.MemoryOperand(1)); __ movsxbl(i.InputRegister(0), i.InputRegister(0)); @@ -4167,6 +4382,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); } +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { X64OperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h index d6ac3f43df..8a0a45a916 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -140,9 +140,6 @@ namespace compiler { V(X64DecompressSigned) \ V(X64DecompressPointer) \ V(X64DecompressAny) \ - V(X64CompressSigned) \ - V(X64CompressPointer) \ - V(X64CompressAny) \ V(X64Movq) \ V(X64Movsd) \ V(X64Movss) \ @@ -158,12 +155,17 @@ namespace compiler { V(X64Push) \ V(X64Poke) \ V(X64Peek) \ - V(X64StackCheck) \ V(X64F64x2Splat) \ V(X64F64x2ExtractLane) \ V(X64F64x2ReplaceLane) \ V(X64F64x2Abs) \ V(X64F64x2Neg) \ + V(X64F64x2Add) \ + V(X64F64x2Sub) \ + V(X64F64x2Mul) \ + V(X64F64x2Div) \ + V(X64F64x2Min) \ + V(X64F64x2Max) \ V(X64F64x2Eq) \ V(X64F64x2Ne) \ V(X64F64x2Lt) \ @@ -181,6 +183,7 @@ namespace compiler { V(X64F32x4AddHoriz) \ V(X64F32x4Sub) \ V(X64F32x4Mul) \ + V(X64F32x4Div) \ V(X64F32x4Min) \ V(X64F32x4Max) \ V(X64F32x4Eq) \ @@ -196,11 +199,15 @@ namespace compiler { V(X64I64x2Add) \ V(X64I64x2Sub) \ V(X64I64x2Mul) \ + V(X64I64x2MinS) \ + V(X64I64x2MaxS) \ V(X64I64x2Eq) \ V(X64I64x2Ne) \ V(X64I64x2GtS) \ V(X64I64x2GeS) \ V(X64I64x2ShrU) \ + V(X64I64x2MinU) \ + V(X64I64x2MaxU) \ V(X64I64x2GtU) \ V(X64I64x2GeU) \ V(X64I32x4Splat) \ diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index 6389ef2e50..e9fa450c38 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -129,6 +129,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2ReplaceLane: case kX64F64x2Abs: case kX64F64x2Neg: + case kX64F64x2Add: + case kX64F64x2Sub: + case kX64F64x2Mul: + case kX64F64x2Div: + case kX64F64x2Min: + case kX64F64x2Max: case kX64F64x2Eq: case kX64F64x2Ne: case kX64F64x2Lt: @@ -146,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4AddHoriz: case kX64F32x4Sub: case kX64F32x4Mul: + case kX64F32x4Div: case kX64F32x4Min: case kX64F32x4Max: case kX64F32x4Eq: @@ -161,11 +168,15 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64I64x2Add: case kX64I64x2Sub: case kX64I64x2Mul: + case kX64I64x2MinS: + case kX64I64x2MaxS: case kX64I64x2Eq: case kX64I64x2Ne: case kX64I64x2GtS: case kX64I64x2GeS: case kX64I64x2ShrU: + case kX64I64x2MinU: + case kX64I64x2MaxU: case kX64I64x2GtU: case kX64I64x2GeU: case kX64I32x4Splat: @@ -295,9 +306,6 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64DecompressSigned: case kX64DecompressPointer: case kX64DecompressAny: - case kX64CompressSigned: - case kX64CompressPointer: - case kX64CompressAny: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; @@ -346,7 +354,6 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64Movdqu: return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; - case kX64StackCheck: case kX64Peek: return kIsLoadOperation; diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc index a4908fb846..5379074bac 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator { AddressingMode GetEffectiveAddressMemoryOperand(Node* operand, InstructionOperand inputs[], size_t* input_count) { - if (selector()->CanAddressRelativeToRootsRegister()) { + { LoadMatcher<ExternalReferenceMatcher> m(operand); - if (m.index().HasValue() && m.object().HasValue()) { + if (m.index().HasValue() && m.object().HasValue() && + selector()->CanAddressRelativeToRootsRegister(m.object().Value())) { ptrdiff_t const delta = m.index().Value() + TurboAssemblerBase::RootRegisterOffsetForExternalReference( @@ -350,7 +351,8 @@ void InstructionSelector::VisitStore(Node* node) { StoreRepresentation store_rep = StoreRepresentationOf(node->op()); WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind(); - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedOrCompressedPointer(store_rep.representation())); AddressingMode addressing_mode; InstructionOperand inputs[] = { @@ -528,6 +530,35 @@ void InstructionSelector::VisitWord64Xor(Node* node) { } } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + DCHECK(cont->IsBranch()); + const int effect_level = + GetEffectLevel(cont->true_block()->PredecessorAt(0)->control_input()); + + X64OperandGenerator g(this); + if (g.CanBeMemoryOperand(kX64Cmp, node, value, effect_level)) { + DCHECK_EQ(IrOpcode::kLoad, value->opcode()); + + // GetEffectiveAddressMemoryOperand can create at most 3 inputs. + static constexpr int kMaxInputCount = 3; + + size_t input_count = 0; + InstructionOperand inputs[kMaxInputCount]; + AddressingMode addressing_mode = + g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count); + opcode |= AddressingModeField::encode(addressing_mode); + DCHECK_LE(input_count, kMaxInputCount); + + EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont); + } else { + EmitWithContinuation(opcode, g.UseRegister(value), cont); + } +} + namespace { bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector, @@ -1238,23 +1269,23 @@ void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { } void InstructionSelector::VisitChangeTaggedToCompressed(Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressAny, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedPointerToCompressedPointer( Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressPointer, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedSignedToCompressedSigned( Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressSigned, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeCompressedToTagged(Node* node) { @@ -1338,10 +1369,13 @@ void VisitFloatBinop(InstructionSelector* selector, Node* node, void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, ArchOpcode avx_opcode, ArchOpcode sse_opcode) { X64OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempDoubleRegister()}; if (selector->IsSupported(AVX)) { - selector->Emit(avx_opcode, g.DefineAsRegister(node), g.Use(input)); + selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input), + arraysize(temps), temps); } else { - selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input)); + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input), + arraysize(temps), temps); } } @@ -1838,30 +1872,6 @@ void VisitWord64Compare(InstructionSelector* selector, Node* node, g.UseRegister(m.right().node()), cont); } } - if (selector->isolate() != nullptr) { - StackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> m( - selector->isolate(), node); - if (m.Matched()) { - // Compare(Load(js_stack_limit), LoadStackPointer) - if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute(); - InstructionCode opcode = cont->Encode(kX64StackCheck); - CHECK(cont->IsBranch()); - selector->EmitWithContinuation(opcode, cont); - return; - } - } - WasmStackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> wasm_m( - node); - if (wasm_m.Matched()) { - // This is a wasm stack check. By structure, we know that we can use the - // stack pointer directly, as wasm code does not modify the stack at points - // where stack checks are performed. - Node* left = node->InputAt(0); - LocationOperand rsp(InstructionOperand::EXPLICIT, LocationOperand::REGISTER, - InstructionSequence::DefaultRepresentation(), - RegisterCode::kRegCode_rsp); - return VisitCompareWithMemoryOperand(selector, kX64Cmp, left, rsp, cont); - } VisitWordCompare(selector, node, kX64Cmp, cont); } @@ -2157,6 +2167,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, return VisitWordCompare(this, value, kX64Cmp32, cont); case IrOpcode::kWord32And: return VisitWordCompare(this, value, kX64Test32, cont); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2586,6 +2599,12 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16) #define SIMD_BINOP_LIST(V) \ + V(F64x2Add) \ + V(F64x2Sub) \ + V(F64x2Mul) \ + V(F64x2Div) \ + V(F64x2Min) \ + V(F64x2Max) \ V(F64x2Eq) \ V(F64x2Ne) \ V(F64x2Lt) \ @@ -2594,6 +2613,7 @@ VISIT_ATOMIC_BINOP(Xor) V(F32x4AddHoriz) \ V(F32x4Sub) \ V(F32x4Mul) \ + V(F32x4Div) \ V(F32x4Min) \ V(F32x4Max) \ V(F32x4Eq) \ @@ -2603,7 +2623,6 @@ VISIT_ATOMIC_BINOP(Xor) V(I64x2Add) \ V(I64x2Sub) \ V(I64x2Eq) \ - V(I64x2Ne) \ V(I64x2GtS) \ V(I32x4Add) \ V(I32x4AddHoriz) \ @@ -2612,12 +2631,10 @@ VISIT_ATOMIC_BINOP(Xor) V(I32x4MinS) \ V(I32x4MaxS) \ V(I32x4Eq) \ - V(I32x4Ne) \ V(I32x4GtS) \ V(I32x4GeS) \ V(I32x4MinU) \ V(I32x4MaxU) \ - V(I32x4GtU) \ V(I32x4GeU) \ V(I16x8SConvertI32x4) \ V(I16x8Add) \ @@ -2629,14 +2646,12 @@ VISIT_ATOMIC_BINOP(Xor) V(I16x8MinS) \ V(I16x8MaxS) \ V(I16x8Eq) \ - V(I16x8Ne) \ V(I16x8GtS) \ V(I16x8GeS) \ V(I16x8AddSaturateU) \ V(I16x8SubSaturateU) \ V(I16x8MinU) \ V(I16x8MaxU) \ - V(I16x8GtU) \ V(I16x8GeU) \ V(I8x16SConvertI16x8) \ V(I8x16Add) \ @@ -2646,23 +2661,28 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16MinS) \ V(I8x16MaxS) \ V(I8x16Eq) \ - V(I8x16Ne) \ V(I8x16GtS) \ V(I8x16GeS) \ V(I8x16AddSaturateU) \ V(I8x16SubSaturateU) \ V(I8x16MinU) \ V(I8x16MaxU) \ - V(I8x16GtU) \ V(I8x16GeU) \ V(S128And) \ V(S128Or) \ V(S128Xor) #define SIMD_BINOP_ONE_TEMP_LIST(V) \ + V(I64x2Ne) \ V(I64x2GeS) \ V(I64x2GtU) \ - V(I64x2GeU) + V(I64x2GeU) \ + V(I32x4Ne) \ + V(I32x4GtU) \ + V(I16x8Ne) \ + V(I16x8GtU) \ + V(I8x16Ne) \ + V(I8x16GtU) #define SIMD_UNOP_LIST(V) \ V(F32x4SConvertI32x4) \ @@ -2686,16 +2706,17 @@ VISIT_ATOMIC_BINOP(Xor) #define SIMD_SHIFT_OPCODES(V) \ V(I64x2Shl) \ - V(I64x2ShrS) \ V(I64x2ShrU) \ V(I32x4Shl) \ V(I32x4ShrS) \ V(I32x4ShrU) \ V(I16x8Shl) \ V(I16x8ShrS) \ - V(I16x8ShrU) \ - V(I8x16Shl) \ - V(I8x16ShrS) \ + V(I16x8ShrU) + +#define SIMD_NARROW_SHIFT_OPCODES(V) \ + V(I8x16Shl) \ + V(I8x16ShrS) \ V(I8x16ShrU) #define SIMD_ANYTRUE_LIST(V) \ @@ -2745,17 +2766,30 @@ SIMD_TYPES(VISIT_SIMD_EXTRACT_LANE) SIMD_TYPES(VISIT_SIMD_REPLACE_LANE) #undef VISIT_SIMD_REPLACE_LANE -#define VISIT_SIMD_SHIFT(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - X64OperandGenerator g(this); \ - int32_t value = OpParameter<int32_t>(node->op()); \ - Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ - g.UseRegister(node->InputAt(0)), g.UseImmediate(value)); \ +#define VISIT_SIMD_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ } SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT) #undef VISIT_SIMD_SHIFT #undef SIMD_SHIFT_OPCODES +#define VISIT_SIMD_NARROW_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ + } +SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT) +#undef VISIT_SIMD_NARROW_SHIFT +#undef SIMD_NARROW_SHIFT_OPCODES + #define VISIT_SIMD_UNOP(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ X64OperandGenerator g(this); \ @@ -2799,12 +2833,12 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) #undef VISIT_SIMD_ANYTRUE #undef SIMD_ANYTRUE_LIST -#define VISIT_SIMD_ALLTRUE(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - X64OperandGenerator g(this); \ - InstructionOperand temps[] = {g.TempRegister()}; \ - Emit(kX64##Opcode, g.DefineAsRegister(node), \ - g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ +#define VISIT_SIMD_ALLTRUE(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ } SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) #undef VISIT_SIMD_ALLTRUE @@ -2820,14 +2854,16 @@ void InstructionSelector::VisitS128Select(Node* node) { void InstructionSelector::VisitF64x2Abs(Node* node) { X64OperandGenerator g(this); - Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + InstructionOperand temps[] = {g.TempDoubleRegister()}; + Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), + arraysize(temps), temps); } void InstructionSelector::VisitF64x2Neg(Node* node) { X64OperandGenerator g(this); - Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + InstructionOperand temps[] = {g.TempDoubleRegister()}; + Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), + arraysize(temps), temps); } void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { @@ -2836,6 +2872,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitI64x2ShrS(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempRegister()}; + // Use fixed to rcx, to use sarq_cl in codegen. + Emit(kX64I64x2ShrS, g.DefineSameAsFirst(node), + g.UseUniqueRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), rcx), + arraysize(temps), temps); +} + void InstructionSelector::VisitI64x2Mul(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register(), @@ -2845,15 +2890,59 @@ void InstructionSelector::VisitI64x2Mul(Node* node) { g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } +void InstructionSelector::VisitI64x2MinS(Node* node) { + X64OperandGenerator g(this); + if (this->IsSupported(SSE4_2)) { + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kX64I64x2MinS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); + } else { + InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister(), + g.TempRegister()}; + Emit(kX64I64x2MinS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + arraysize(temps), temps); + } +} + +void InstructionSelector::VisitI64x2MaxS(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + +void InstructionSelector::VisitI64x2MinU(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; + Emit(kX64I64x2MinU, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + +void InstructionSelector::VisitI64x2MaxU(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; + Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + g.UseRegister(node->InputAt(0)), arraysize(temps), temps); } void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { X64OperandGenerator g(this); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), arraysize(temps), temps); } @@ -2997,12 +3086,12 @@ static const ShuffleEntry arch_shuffles[] = { true}, {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kX64S8x8Reverse, - false, - false}, + true, + true}, {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kX64S8x4Reverse, - false, - false}, + true, + true}, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kX64S8x2Reverse, true, @@ -3060,6 +3149,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { SwapShuffleInputs(node); is_swizzle = false; // It's simpler to just handle the general case. no_same_as_first = false; // SSE requires same-as-first. + // TODO(v8:9608): also see v8:9083 + src1_needs_reg = true; opcode = kX64S8x16Alignr; // palignr takes a single imm8 offset. imms[imm_count++] = offset; |