summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/backend/x64/code-generator-x64.cc')
-rw-r--r--deps/v8/src/compiler/backend/x64/code-generator-x64.cc343
1 files changed, 222 insertions, 121 deletions
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
index a4f82b153b..44da872f26 100644
--- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
@@ -361,7 +361,6 @@ class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
InstructionCode opcode, Instruction* instr,
- X64OperandConverter& i, // NOLINT(runtime/references)
int pc) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
@@ -370,9 +369,9 @@ void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
}
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
- X64OperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ X64OperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -1876,30 +1875,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
break;
case kX64Movsxbl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxbq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxbq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxbq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movb: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
@@ -1911,29 +1910,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movsxwl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxwl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxwq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxwq);
break;
case kX64Movzxwq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxwq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movw: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
@@ -1945,7 +1944,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
if (HasAddressingMode(instr)) {
__ movl(i.OutputRegister(), i.MemoryOperand());
@@ -1969,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxlq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxlq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
@@ -2021,7 +2020,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ movq(i.OutputRegister(), i.MemoryOperand());
} else {
@@ -2036,7 +2035,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movss:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
} else {
@@ -2046,7 +2045,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kX64Movsd: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
@@ -2069,7 +2068,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64Movdqu: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
@@ -2293,6 +2292,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movq(i.OutputDoubleRegister(), kScratchRegister);
break;
}
+ case kX64F64x2Sqrt: {
+ __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kX64F64x2Add: {
ASSEMBLE_SSE_BINOP(addpd);
break;
@@ -2350,22 +2353,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F64x2Qfma: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movapd(tmp, i.InputSimd128Register(2));
+ __ mulpd(tmp, i.InputSimd128Register(1));
+ __ addpd(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
+ case kX64F64x2Qfms: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movapd(tmp, i.InputSimd128Register(2));
+ __ mulpd(tmp, i.InputSimd128Register(1));
+ __ subpd(i.OutputSimd128Register(), tmp);
+ }
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
@@ -2445,6 +2474,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
+ case kX64F32x4Sqrt: {
+ __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
@@ -2538,6 +2571,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
+ case kX64F32x4Qfma: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movaps(tmp, i.InputSimd128Register(2));
+ __ mulps(tmp, i.InputSimd128Register(1));
+ __ addps(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
+ case kX64F32x4Qfms: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movaps(tmp, i.InputSimd128Register(2));
+ __ mulps(tmp, i.InputSimd128Register(1));
+ __ subps(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
case kX64I64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
@@ -2577,7 +2636,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 8.
+ __ andq(shift, Immediate(63));
+ __ movq(tmp, shift);
__ psllq(i.OutputSimd128Register(), tmp);
break;
}
@@ -2588,6 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.ToRegister(instr->TempAt(0));
+ // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
// lower quadword
__ pextrq(tmp, src, 0x0);
@@ -2640,15 +2703,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
- XMMRegister src = i.InputSimd128Register(1);
+ XMMRegister src0 = i.InputSimd128Register(0);
+ XMMRegister src1 = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
- DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
- __ movaps(tmp, src);
- __ pcmpgtq(src, dst);
- __ blendvpd(tmp, dst); // implicit use of xmm0 as mask
- __ movaps(dst, tmp);
+ __ movaps(tmp, src1);
+ __ pcmpgtq(tmp, src0);
+ __ movaps(dst, src1);
+ __ blendvpd(dst, src0); // implicit use of xmm0 as mask
} else {
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
@@ -2689,11 +2752,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
__ movaps(tmp, src);
- __ pcmpgtq(src, dst);
- __ blendvpd(dst, tmp); // implicit use of xmm0 as mask
+ __ pcmpgtq(tmp, dst);
+ __ blendvpd(dst, src); // implicit use of xmm0 as mask
break;
}
case kX64I64x2Eq: {
@@ -2732,7 +2795,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 64.
+ __ andq(shift, Immediate(63));
+ __ movq(tmp, shift);
__ psrlq(i.OutputSimd128Register(), tmp);
break;
}
@@ -2740,24 +2806,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
- XMMRegister src = i.InputSimd128Register(1);
- XMMRegister src_tmp = i.TempSimd128Register(0);
- XMMRegister dst_tmp = i.TempSimd128Register(1);
- DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ XMMRegister src0 = i.InputSimd128Register(0);
+ XMMRegister src1 = i.InputSimd128Register(1);
+ XMMRegister tmp0 = i.TempSimd128Register(0);
+ XMMRegister tmp1 = i.TempSimd128Register(1);
+ DCHECK_EQ(tmp1, xmm0);
- __ movaps(src_tmp, src);
- __ movaps(dst_tmp, dst);
+ __ movaps(dst, src1);
+ __ movaps(tmp0, src0);
- __ pcmpeqd(src, src);
- __ psllq(src, 63);
+ __ pcmpeqd(tmp1, tmp1);
+ __ psllq(tmp1, 63);
- __ pxor(dst_tmp, src);
- __ pxor(src, src_tmp);
+ __ pxor(tmp0, tmp1);
+ __ pxor(tmp1, dst);
- __ pcmpgtq(src, dst_tmp);
- __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask
- __ movaps(dst, src_tmp);
+ __ pcmpgtq(tmp1, tmp0);
+ __ blendvpd(dst, src0); // implicit use of xmm0 as mask
break;
}
case kX64I64x2MaxU: {
@@ -2765,22 +2830,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- XMMRegister src_tmp = i.TempSimd128Register(0);
- XMMRegister dst_tmp = i.TempSimd128Register(1);
+ XMMRegister dst_tmp = i.TempSimd128Register(0);
+ XMMRegister tmp = i.TempSimd128Register(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
- __ movaps(src_tmp, src);
__ movaps(dst_tmp, dst);
- __ pcmpeqd(src, src);
- __ psllq(src, 63);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 63);
- __ pxor(dst_tmp, src);
- __ pxor(src, src_tmp);
+ __ pxor(dst_tmp, tmp);
+ __ pxor(tmp, src);
- __ pcmpgtq(src, dst_tmp);
- __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask
+ __ pcmpgtq(tmp, dst_tmp);
+ __ blendvpd(dst, src); // implicit use of xmm0 as mask
break;
}
case kX64I64x2GtU: {
@@ -2820,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
- __ movd(dst, i.InputRegister(0));
+ __ Movd(dst, i.InputRegister(0));
} else {
- __ movd(dst, i.InputOperand(0));
+ __ Movd(dst, i.InputOperand(0));
}
- __ pshufd(dst, dst, 0x0);
+ __ Pshufd(dst, dst, 0x0);
break;
}
case kX64I32x4ExtractLane: {
@@ -2878,28 +2942,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psignd(dst, kScratchDoubleReg);
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Psignd(dst, kScratchDoubleReg);
} else {
- __ pxor(dst, dst);
- __ psubd(dst, src);
+ __ Pxor(dst, dst);
+ __ Psubd(dst, src);
}
break;
}
case kX64I32x4Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ pslld(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Pslld(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ psrad(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Psrad(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4Add: {
- __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4AddHoriz: {
@@ -2908,45 +2978,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4Sub: {
- __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Mul: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MinS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MaxS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Eq: {
- __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Ne: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
- __ pcmpeqd(tmp, tmp);
- __ pxor(i.OutputSimd128Register(), tmp);
+ __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpeqd(tmp, tmp);
+ __ Pxor(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4GtS: {
- __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4GeS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- __ pminsd(dst, src);
- __ pcmpeqd(dst, src);
+ __ Pminsd(dst, src);
+ __ Pcmpeqd(dst, src);
break;
}
case kX64I32x4UConvertF32x4: {
@@ -2992,18 +3062,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ psrld(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Psrld(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4MinU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MaxU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4GtU: {
@@ -3011,18 +3084,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
- __ pmaxud(dst, src);
- __ pcmpeqd(dst, src);
- __ pcmpeqd(tmp, tmp);
- __ pxor(dst, tmp);
+ __ Pmaxud(dst, src);
+ __ Pcmpeqd(dst, src);
+ __ Pcmpeqd(tmp, tmp);
+ __ Pxor(dst, tmp);
break;
}
case kX64I32x4GeU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- __ pminud(dst, src);
- __ pcmpeqd(dst, src);
+ __ Pminud(dst, src);
+ __ Pcmpeqd(dst, src);
break;
}
case kX64S128Zero: {
@@ -3044,17 +3117,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I16x8ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
- __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsxwl(dst, dst);
+ __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I16x8ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
- __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
+ __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
- __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@@ -3085,13 +3157,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kX64I16x8ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
break;
}
@@ -3173,7 +3251,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
@@ -3230,28 +3311,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
- __ movd(dst, i.InputRegister(0));
+ __ Movd(dst, i.InputRegister(0));
} else {
- __ movd(dst, i.InputOperand(0));
+ __ Movd(dst, i.InputOperand(0));
}
- __ xorps(kScratchDoubleReg, kScratchDoubleReg);
- __ pshufb(dst, kScratchDoubleReg);
+ __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
+ __ Pshufb(dst, kScratchDoubleReg);
break;
}
case kX64I8x16ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
- __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsxbl(dst, dst);
+ __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I8x16ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
- __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
+ __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
- __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@@ -3279,15 +3359,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Temp registers for shift mask andadditional moves to XMM registers.
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
+ Register shift = i.InputRegister(1);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
- __ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(shift, Immediate(7));
+ __ movq(tmp, shift);
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
- __ movq(tmp_simd, i.InputRegister(1));
+ __ movq(tmp_simd, shift);
__ psllw(dst, tmp_simd);
break;
}
@@ -3302,6 +3385,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psraw(kScratchDoubleReg, tmp_simd);
@@ -3414,6 +3499,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
@@ -3422,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16AddSaturateU: {
- __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16SubSaturateU: {
@@ -3487,10 +3574,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64S128Select: {
// Mask used here is stored in dst.
XMMRegister dst = i.OutputSimd128Register();
- __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
- __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
- __ andps(dst, kScratchDoubleReg);
- __ xorps(dst, i.InputSimd128Register(2));
+ __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+ __ Andps(dst, kScratchDoubleReg);
+ __ Xorps(dst, i.InputSimd128Register(2));
+ break;
+ }
+ case kX64S8x16Swizzle: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister mask = i.TempSimd128Register(0);
+
+ // Out-of-range indices should return 0, add 112 so that any value > 15
+ // saturates to 128 (top bit set), so pshufb will zero that lane.
+ __ Move(mask, static_cast<uint32_t>(0x70707070));
+ __ Pshufd(mask, mask, 0x0);
+ __ Paddusb(mask, i.InputSimd128Register(1));
+ __ Pshufb(dst, mask);
break;
}
case kX64S8x16Shuffle: {
@@ -3507,10 +3608,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SetupShuffleMaskOnStack(tasm(), mask);
- __ pshufb(dst, Operand(rsp, 0));
+ __ Pshufb(dst, Operand(rsp, 0));
} else { // two input operands
DCHECK_EQ(6, instr->InputCount());
- ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
+ ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
uint32_t mask[4] = {};
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@@ -3520,13 +3621,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask);
- __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
+ __ Pshufb(kScratchDoubleReg, Operand(rsp, 0));
uint32_t mask1[4] = {};
if (instr->InputAt(1)->IsSimd128Register()) {
XMMRegister src1 = i.InputSimd128Register(1);
if (src1 != dst) __ movups(dst, src1);
} else {
- __ movups(dst, i.InputOperand(1));
+ __ Movups(dst, i.InputOperand(1));
}
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@@ -3536,8 +3637,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask1);
- __ pshufb(dst, Operand(rsp, 0));
- __ por(dst, kScratchDoubleReg);
+ __ Pshufb(dst, Operand(rsp, 0));
+ __ Por(dst, kScratchDoubleReg);
}
__ movq(rsp, tmp);
break;