diff options
Diffstat (limited to 'deps/v8/src/compiler/backend/x64/code-generator-x64.cc')
-rw-r--r-- | deps/v8/src/compiler/backend/x64/code-generator-x64.cc | 343 |
1 files changed, 222 insertions, 121 deletions
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc index a4f82b153b..44da872f26 100644 --- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -361,7 +361,6 @@ class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap { void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - X64OperandConverter& i, // NOLINT(runtime/references) int pc) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); @@ -370,9 +369,9 @@ void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, } } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - X64OperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + X64OperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -1876,30 +1875,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg); break; case kX64Movsxbl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxbl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxbl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxbl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxbq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxbq); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxbq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxbq); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movb: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { @@ -1911,29 +1910,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movsxwl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxwl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxwl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxwl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxwq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxwq); break; case kX64Movzxwq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxwq); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movw: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { @@ -1945,7 +1944,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { if (HasAddressingMode(instr)) { __ movl(i.OutputRegister(), i.MemoryOperand()); @@ -1969,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxlq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxlq); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; @@ -2021,7 +2020,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ movq(i.OutputRegister(), i.MemoryOperand()); } else { @@ -2036,7 +2035,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movss: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ Movss(i.OutputDoubleRegister(), i.MemoryOperand()); } else { @@ -2046,7 +2045,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kX64Movsd: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); @@ -2069,7 +2068,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64Movdqu: { CpuFeatureScope sse_scope(tasm(), SSSE3); - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand()); } else { @@ -2293,6 +2292,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputDoubleRegister(), kScratchRegister); break; } + case kX64F64x2Sqrt: { + __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kX64F64x2Add: { ASSEMBLE_SSE_BINOP(addpd); break; @@ -2350,22 +2353,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64F64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Lt: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Le: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } + case kX64F64x2Qfma: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movapd(tmp, i.InputSimd128Register(2)); + __ mulpd(tmp, i.InputSimd128Register(1)); + __ addpd(i.OutputSimd128Register(), tmp); + } + break; + } + case kX64F64x2Qfms: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movapd(tmp, i.InputSimd128Register(2)); + __ mulpd(tmp, i.InputSimd128Register(1)); + __ subpd(i.OutputSimd128Register(), tmp); + } break; } // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below @@ -2445,6 +2474,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kX64F32x4Sqrt: { + __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kX64F32x4RecipApprox: { __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; @@ -2538,6 +2571,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } + case kX64F32x4Qfma: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movaps(tmp, i.InputSimd128Register(2)); + __ mulps(tmp, i.InputSimd128Register(1)); + __ addps(i.OutputSimd128Register(), tmp); + } + break; + } + case kX64F32x4Qfms: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movaps(tmp, i.InputSimd128Register(2)); + __ mulps(tmp, i.InputSimd128Register(1)); + __ subps(i.OutputSimd128Register(), tmp); + } + break; + } case kX64I64x2Splat: { CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); @@ -2577,7 +2636,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 8. + __ andq(shift, Immediate(63)); + __ movq(tmp, shift); __ psllq(i.OutputSimd128Register(), tmp); break; } @@ -2588,6 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); Register tmp = i.ToRegister(instr->TempAt(0)); + // Modulo 64 not required as sarq_cl will mask cl to 6 bits. // lower quadword __ pextrq(tmp, src, 0x0); @@ -2640,15 +2703,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( if (CpuFeatures::IsSupported(SSE4_2)) { CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(1); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); - __ movaps(tmp, src); - __ pcmpgtq(src, dst); - __ blendvpd(tmp, dst); // implicit use of xmm0 as mask - __ movaps(dst, tmp); + __ movaps(tmp, src1); + __ pcmpgtq(tmp, src0); + __ movaps(dst, src1); + __ blendvpd(dst, src0); // implicit use of xmm0 as mask } else { CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); @@ -2689,11 +2752,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); __ movaps(tmp, src); - __ pcmpgtq(src, dst); - __ blendvpd(dst, tmp); // implicit use of xmm0 as mask + __ pcmpgtq(tmp, dst); + __ blendvpd(dst, src); // implicit use of xmm0 as mask break; } case kX64I64x2Eq: { @@ -2732,7 +2795,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 64. + __ andq(shift, Immediate(63)); + __ movq(tmp, shift); __ psrlq(i.OutputSimd128Register(), tmp); break; } @@ -2740,24 +2806,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(1); - XMMRegister src_tmp = i.TempSimd128Register(0); - XMMRegister dst_tmp = i.TempSimd128Register(1); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); + XMMRegister tmp0 = i.TempSimd128Register(0); + XMMRegister tmp1 = i.TempSimd128Register(1); + DCHECK_EQ(tmp1, xmm0); - __ movaps(src_tmp, src); - __ movaps(dst_tmp, dst); + __ movaps(dst, src1); + __ movaps(tmp0, src0); - __ pcmpeqd(src, src); - __ psllq(src, 63); + __ pcmpeqd(tmp1, tmp1); + __ psllq(tmp1, 63); - __ pxor(dst_tmp, src); - __ pxor(src, src_tmp); + __ pxor(tmp0, tmp1); + __ pxor(tmp1, dst); - __ pcmpgtq(src, dst_tmp); - __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask - __ movaps(dst, src_tmp); + __ pcmpgtq(tmp1, tmp0); + __ blendvpd(dst, src0); // implicit use of xmm0 as mask break; } case kX64I64x2MaxU: { @@ -2765,22 +2830,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister src_tmp = i.TempSimd128Register(0); - XMMRegister dst_tmp = i.TempSimd128Register(1); + XMMRegister dst_tmp = i.TempSimd128Register(0); + XMMRegister tmp = i.TempSimd128Register(1); DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); - __ movaps(src_tmp, src); __ movaps(dst_tmp, dst); - __ pcmpeqd(src, src); - __ psllq(src, 63); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 63); - __ pxor(dst_tmp, src); - __ pxor(src, src_tmp); + __ pxor(dst_tmp, tmp); + __ pxor(tmp, src); - __ pcmpgtq(src, dst_tmp); - __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask + __ pcmpgtq(tmp, dst_tmp); + __ blendvpd(dst, src); // implicit use of xmm0 as mask break; } case kX64I64x2GtU: { @@ -2820,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); if (HasRegisterInput(instr, 0)) { - __ movd(dst, i.InputRegister(0)); + __ Movd(dst, i.InputRegister(0)); } else { - __ movd(dst, i.InputOperand(0)); + __ Movd(dst, i.InputOperand(0)); } - __ pshufd(dst, dst, 0x0); + __ Pshufd(dst, dst, 0x0); break; } case kX64I32x4ExtractLane: { @@ -2878,28 +2942,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); if (dst == src) { - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psignd(dst, kScratchDoubleReg); + __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ Psignd(dst, kScratchDoubleReg); } else { - __ pxor(dst, dst); - __ psubd(dst, src); + __ Pxor(dst, dst); + __ Psubd(dst, src); } break; } case kX64I32x4Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ pslld(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Pslld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4ShrS: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ psrad(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Psrad(i.OutputSimd128Register(), tmp); break; } case kX64I32x4Add: { - __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4AddHoriz: { @@ -2908,45 +2978,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Sub: { - __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Mul: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MinS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MaxS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Eq: { - __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Ne: { XMMRegister tmp = i.TempSimd128Register(0); - __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqd(tmp, tmp); - __ pxor(i.OutputSimd128Register(), tmp); + __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpeqd(tmp, tmp); + __ Pxor(i.OutputSimd128Register(), tmp); break; } case kX64I32x4GtS: { - __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4GeS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - __ pminsd(dst, src); - __ pcmpeqd(dst, src); + __ Pminsd(dst, src); + __ Pcmpeqd(dst, src); break; } case kX64I32x4UConvertF32x4: { @@ -2992,18 +3062,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ psrld(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Psrld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4MinU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MaxU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4GtU: { @@ -3011,18 +3084,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); - __ pmaxud(dst, src); - __ pcmpeqd(dst, src); - __ pcmpeqd(tmp, tmp); - __ pxor(dst, tmp); + __ Pmaxud(dst, src); + __ Pcmpeqd(dst, src); + __ Pcmpeqd(tmp, tmp); + __ Pxor(dst, tmp); break; } case kX64I32x4GeU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - __ pminud(dst, src); - __ pcmpeqd(dst, src); + __ Pminud(dst, src); + __ Pcmpeqd(dst, src); break; } case kX64S128Zero: { @@ -3044,17 +3117,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I16x8ExtractLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); Register dst = i.OutputRegister(); - __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsxwl(dst, dst); + __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); break; } case kX64I16x8ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); if (HasRegisterInput(instr, 2)) { - __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), + __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { - __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); + __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); } break; } @@ -3085,13 +3157,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psllw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8ShrS: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psraw(i.OutputSimd128Register(), tmp); break; } @@ -3173,7 +3251,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psrlw(i.OutputSimd128Register(), tmp); break; } @@ -3230,28 +3311,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSSE3); XMMRegister dst = i.OutputSimd128Register(); if (HasRegisterInput(instr, 0)) { - __ movd(dst, i.InputRegister(0)); + __ Movd(dst, i.InputRegister(0)); } else { - __ movd(dst, i.InputOperand(0)); + __ Movd(dst, i.InputOperand(0)); } - __ xorps(kScratchDoubleReg, kScratchDoubleReg); - __ pshufb(dst, kScratchDoubleReg); + __ Xorps(kScratchDoubleReg, kScratchDoubleReg); + __ Pshufb(dst, kScratchDoubleReg); break; } case kX64I8x16ExtractLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); Register dst = i.OutputRegister(); - __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsxbl(dst, dst); + __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); break; } case kX64I8x16ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); if (HasRegisterInput(instr, 2)) { - __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), + __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { - __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); + __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); } break; } @@ -3279,15 +3359,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // Temp registers for shift mask andadditional moves to XMM registers. Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); + Register shift = i.InputRegister(1); // Mask off the unwanted bits before word-shifting. __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(shift, Immediate(7)); + __ movq(tmp, shift); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psrlw(kScratchDoubleReg, tmp_simd); __ packuswb(kScratchDoubleReg, kScratchDoubleReg); __ pand(dst, kScratchDoubleReg); - __ movq(tmp_simd, i.InputRegister(1)); + __ movq(tmp_simd, shift); __ psllw(dst, tmp_simd); break; } @@ -3302,6 +3385,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpcklbw(dst, dst); // Prepare shift value __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(tmp, Immediate(7)); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psraw(kScratchDoubleReg, tmp_simd); @@ -3414,6 +3499,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpcklbw(dst, dst); // Prepare shift value __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(tmp, Immediate(7)); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psrlw(kScratchDoubleReg, tmp_simd); @@ -3422,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I8x16AddSaturateU: { - __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I8x16SubSaturateU: { @@ -3487,10 +3574,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64S128Select: { // Mask used here is stored in dst. XMMRegister dst = i.OutputSimd128Register(); - __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); - __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); - __ andps(dst, kScratchDoubleReg); - __ xorps(dst, i.InputSimd128Register(2)); + __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1)); + __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2)); + __ Andps(dst, kScratchDoubleReg); + __ Xorps(dst, i.InputSimd128Register(2)); + break; + } + case kX64S8x16Swizzle: { + CpuFeatureScope sse_scope(tasm(), SSSE3); + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister mask = i.TempSimd128Register(0); + + // Out-of-range indices should return 0, add 112 so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + __ Move(mask, static_cast<uint32_t>(0x70707070)); + __ Pshufd(mask, mask, 0x0); + __ Paddusb(mask, i.InputSimd128Register(1)); + __ Pshufb(dst, mask); break; } case kX64S8x16Shuffle: { @@ -3507,10 +3608,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SetupShuffleMaskOnStack(tasm(), mask); - __ pshufb(dst, Operand(rsp, 0)); + __ Pshufb(dst, Operand(rsp, 0)); } else { // two input operands DCHECK_EQ(6, instr->InputCount()); - ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0); + ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0); uint32_t mask[4] = {}; for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); @@ -3520,13 +3621,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } } SetupShuffleMaskOnStack(tasm(), mask); - __ pshufb(kScratchDoubleReg, Operand(rsp, 0)); + __ Pshufb(kScratchDoubleReg, Operand(rsp, 0)); uint32_t mask1[4] = {}; if (instr->InputAt(1)->IsSimd128Register()) { XMMRegister src1 = i.InputSimd128Register(1); if (src1 != dst) __ movups(dst, src1); } else { - __ movups(dst, i.InputOperand(1)); + __ Movups(dst, i.InputOperand(1)); } for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); @@ -3536,8 +3637,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } } SetupShuffleMaskOnStack(tasm(), mask1); - __ pshufb(dst, Operand(rsp, 0)); - __ por(dst, kScratchDoubleReg); + __ Pshufb(dst, Operand(rsp, 0)); + __ Por(dst, kScratchDoubleReg); } __ movq(rsp, tmp); break; |