diff options
Diffstat (limited to 'deps/v8/src/compiler/backend')
44 files changed, 1735 insertions, 746 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc index 65a569d755..3fe5361083 100644 --- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -44,7 +44,7 @@ class ArmOperandConverter final : public InstructionOperandConverter { UNREACHABLE(); } - Operand InputImmediate(size_t index) { + Operand InputImmediate(size_t index) const { return ToImmediate(instr_->InputAt(index)); } @@ -111,7 +111,7 @@ class ArmOperandConverter final : public InstructionOperandConverter { return InputOffset(&first_index); } - Operand ToImmediate(InstructionOperand* operand) { + Operand ToImmediate(InstructionOperand* operand) const { Constant constant = ToConstant(operand); switch (constant.type()) { case Constant::kInt32: @@ -153,9 +153,6 @@ class ArmOperandConverter final : public InstructionOperandConverter { NeonMemOperand NeonInputOperand(size_t first_index) { const size_t index = first_index; switch (AddressingModeField::decode(instr_->opcode())) { - case kMode_Offset_RR: - return NeonMemOperand(InputRegister(index + 0), - InputRegister(index + 1)); case kMode_Operand2_R: return NeonMemOperand(InputRegister(index + 0)); default: @@ -309,9 +306,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { UNREACHABLE(); } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, - ArmOperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, + ArmOperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -320,10 +317,10 @@ void EmitWordLoadPoisoningIfNeeded( } } -void ComputePoisonedAddressForLoad( - CodeGenerator* codegen, InstructionCode opcode, - ArmOperandConverter& i, // NOLINT(runtime/references) - Register address) { +void ComputePoisonedAddressForLoad(CodeGenerator* codegen, + InstructionCode opcode, + ArmOperandConverter const& i, + Register address) { DCHECK_EQ(kMemoryAccessPoisoned, static_cast<MemoryAccessMode>(MiscField::decode(opcode))); switch (AddressingModeField::decode(opcode)) { @@ -1798,6 +1795,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } + case kArmF32x4Sqrt: { + QwNeonRegister dst = i.OutputSimd128Register(); + QwNeonRegister src1 = i.InputSimd128Register(0); + DCHECK_EQ(dst, q0); + DCHECK_EQ(src1, q0); +#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane) + __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0)); + __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1)); + __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2)); + __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3)); +#undef S_FROM_Q + break; + } case kArmF32x4RecipApprox: { __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; @@ -1919,14 +1929,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI32x4Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kArmI32x4ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vneg(Neon32, tmp, tmp); __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -1998,7 +2014,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI32x4ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vneg(Neon32, tmp, tmp); __ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2029,7 +2048,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI16x8ExtractLane: { - __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16, + __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16, i.InputInt8(1)); break; } @@ -2054,14 +2073,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI16x8Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kArmI16x8ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vneg(Neon16, tmp, tmp); __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2142,7 +2167,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI16x8ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vneg(Neon16, tmp, tmp); __ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2186,7 +2214,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI8x16ExtractLane: { - __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8, + __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8, i.InputInt8(1)); break; } @@ -2201,6 +2229,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); __ vdup(Neon8, tmp, i.InputRegister(1)); __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2208,7 +2239,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon8, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); + __ vdup(Neon8, tmp, shift); __ vneg(Neon8, tmp, tmp); __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2275,7 +2309,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon8, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); + __ vdup(Neon8, tmp, shift); __ vneg(Neon8, tmp, tmp); __ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h index 3551e26aea..d398ec0ed6 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -135,6 +135,7 @@ namespace compiler { V(ArmF32x4UConvertI32x4) \ V(ArmF32x4Abs) \ V(ArmF32x4Neg) \ + V(ArmF32x4Sqrt) \ V(ArmF32x4RecipApprox) \ V(ArmF32x4RecipSqrtApprox) \ V(ArmF32x4Add) \ diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 1d7cf61dfe..92be55dcc3 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -115,6 +115,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4UConvertI32x4: case kArmF32x4Abs: case kArmF32x4Neg: + case kArmF32x4Sqrt: case kArmF32x4RecipApprox: case kArmF32x4RecipSqrtApprox: case kArmF32x4Add: diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc index ce74faa4a6..303648051f 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" #include "src/base/bits.h" #include "src/base/enum-set.h" +#include "src/base/iterator.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -94,7 +94,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { ArmOperandGenerator g(selector); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), arraysize(temps), temps); @@ -352,6 +352,26 @@ void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode div_opcode, } } +// Adds the base and offset into a register, then change the addressing +// mode of opcode_return to use this register. Certain instructions, e.g. +// vld1 and vst1, when given two registers, will post-increment the offset, i.e. +// perform the operation at base, then add offset to base. What we intend is to +// access at (base+offset). +void EmitAddBeforeS128LoadStore(InstructionSelector* selector, + InstructionCode* opcode_return, + size_t* input_count_return, + InstructionOperand* inputs) { + DCHECK(*opcode_return == kArmVld1S128 || *opcode_return == kArmVst1S128); + ArmOperandGenerator g(selector); + InstructionOperand addr = g.TempRegister(); + InstructionCode op = kArmAdd; + op |= AddressingModeField::encode(kMode_Operand2_R); + selector->Emit(op, 1, &addr, 2, inputs); + *opcode_return |= AddressingModeField::encode(kMode_Operand2_R); + *input_count_return -= 1; + inputs[0] = addr; +} + void EmitLoad(InstructionSelector* selector, InstructionCode opcode, InstructionOperand* output, Node* base, Node* index) { ArmOperandGenerator g(selector); @@ -368,7 +388,11 @@ void EmitLoad(InstructionSelector* selector, InstructionCode opcode, input_count = 3; } else { inputs[1] = g.UseRegister(index); - opcode |= AddressingModeField::encode(kMode_Offset_RR); + if (opcode == kArmVld1S128) { + EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[0]); + } else { + opcode |= AddressingModeField::encode(kMode_Offset_RR); + } } selector->Emit(opcode, 1, output, input_count, inputs); } @@ -386,7 +410,12 @@ void EmitStore(InstructionSelector* selector, InstructionCode opcode, input_count = 4; } else { inputs[input_count++] = g.UseRegister(index); - opcode |= AddressingModeField::encode(kMode_Offset_RR); + if (opcode == kArmVst1S128) { + // Inputs are value, base, index, only care about base and index. + EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[1]); + } else { + opcode |= AddressingModeField::encode(kMode_Offset_RR); + } } selector->Emit(opcode, 0, nullptr, input_count, inputs); } @@ -596,8 +625,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); return; } - case MachineRepresentation::kFloat64: - case MachineRepresentation::kSimd128: { + case MachineRepresentation::kFloat64: { // Compute the address of the least-significant byte of the FP value. // We assume that the base node is unlikely to be an encodable immediate // or the result of a shift operation, so only consider the addressing @@ -623,13 +651,10 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { if (CpuFeatures::IsSupported(NEON)) { // With NEON we can load directly from the calculated address. - InstructionCode op = load_rep == MachineRepresentation::kFloat64 - ? kArmVld1F64 - : kArmVld1S128; + InstructionCode op = kArmVld1F64; op |= AddressingModeField::encode(kMode_Operand2_R); Emit(op, g.DefineAsRegister(node), addr); } else { - DCHECK_NE(MachineRepresentation::kSimd128, load_rep); // Load both halves and move to an FP register. InstructionOperand fp_lo = g.TempRegister(); InstructionOperand fp_hi = g.TempRegister(); @@ -670,8 +695,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { EmitStore(this, kArmStr, input_count, inputs, index); return; } - case MachineRepresentation::kFloat64: - case MachineRepresentation::kSimd128: { + case MachineRepresentation::kFloat64: { if (CpuFeatures::IsSupported(NEON)) { InstructionOperand address = g.TempRegister(); { @@ -697,13 +721,10 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { inputs[input_count++] = g.UseRegister(value); inputs[input_count++] = address; - InstructionCode op = store_rep == MachineRepresentation::kFloat64 - ? kArmVst1F64 - : kArmVst1S128; + InstructionCode op = kArmVst1F64; op |= AddressingModeField::encode(kMode_Operand2_R); Emit(op, 0, nullptr, input_count, inputs); } else { - DCHECK_NE(MachineRepresentation::kSimd128, store_rep); // Store a 64-bit floating point value using two 32-bit integer stores. // Computing the store address here would require three live temporary // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after @@ -942,7 +963,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) { uint32_t lsb = m.right().Value(); Int32BinopMatcher mleft(m.left().node()); if (mleft.right().HasValue()) { - uint32_t value = (mleft.right().Value() >> lsb) << lsb; + uint32_t value = static_cast<uint32_t>(mleft.right().Value() >> lsb) + << lsb; uint32_t width = base::bits::CountPopulation(value); uint32_t msb = base::bits::CountLeadingZeros32(value); if ((width != 0) && (msb + width + lsb == 32)) { @@ -1119,6 +1141,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { VisitRR(this, kArmRev, node); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); } void InstructionSelector::VisitInt32Add(Node* node) { @@ -2513,6 +2539,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP) #undef SIMD_VISIT_BINOP #undef SIMD_BINOP_LIST +void InstructionSelector::VisitF32x4Sqrt(Node* node) { + ArmOperandGenerator g(this); + // Use fixed registers in the lower 8 Q-registers so we can directly access + // mapped registers S0-S31. + Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0), + g.UseFixed(node->InputAt(0), q0)); +} + void InstructionSelector::VisitF32x4Div(Node* node) { ArmOperandGenerator g(this); // Use fixed registers in the lower 8 Q-registers so we can directly access diff --git a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc index 66ca7f6cf0..6f65c905dd 100644 --- a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc @@ -376,9 +376,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { UNREACHABLE(); } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - Arm64OperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + Arm64OperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -389,6 +389,36 @@ void EmitWordLoadPoisoningIfNeeded( } } +void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode, + Arm64OperandConverter* i, VRegister output_reg) { + const MemoryAccessMode access_mode = + static_cast<MemoryAccessMode>(MiscField::decode(opcode)); + AddressingMode address_mode = AddressingModeField::decode(opcode); + if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) { + UseScratchRegisterScope temps(codegen->tasm()); + Register address = temps.AcquireX(); + switch (address_mode) { + case kMode_MRI: // Fall through. + case kMode_MRR: + codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1)); + break; + case kMode_Operand2_R_LSL_I: + codegen->tasm()->Add(address, i->InputRegister(0), + i->InputOperand2_64(1)); + break; + default: + // Note: we don't need poisoning for kMode_Root loads as those loads + // target a fixed offset from root register which is set once when + // initializing the vm. + UNREACHABLE(); + } + codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister)); + codegen->tasm()->Ldr(output_reg, MemOperand(address)); + } else { + codegen->tasm()->Ldr(output_reg, i->MemoryOperand()); + } +} + } // namespace #define ASSEMBLE_SHIFT(asm_instr, width) \ @@ -1198,6 +1228,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArm64Sxtw: __ Sxtw(i.OutputRegister(), i.InputRegister32(0)); break; + case kArm64Sbfx: + __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1), + i.InputInt6(2)); + break; case kArm64Sbfx32: __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1), i.InputInt5(2)); @@ -1586,6 +1620,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArm64Str: __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1)); break; + case kArm64StrCompressTagged: + __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1)); + break; case kArm64DecompressSigned: { __ DecompressTaggedSigned(i.OutputRegister(), i.InputRegister(0)); break; @@ -1599,13 +1636,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64LdrS: - __ Ldr(i.OutputDoubleRegister().S(), i.MemoryOperand()); + EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S()); break; case kArm64StrS: __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1)); break; case kArm64LdrD: - __ Ldr(i.OutputDoubleRegister(), i.MemoryOperand()); + EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister()); break; case kArm64StrD: __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1)); @@ -1616,9 +1653,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArm64StrQ: __ Str(i.InputSimd128Register(0), i.MemoryOperand(1)); break; - case kArm64StrCompressTagged: - __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1)); - break; case kArm64DmbIsh: __ Dmb(InnerShareable, BarrierAll); break; @@ -1794,6 +1828,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D); SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D); + SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D); SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D); SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D); SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D); @@ -1818,6 +1853,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0).V2D()); break; } + case kArm64F64x2Qfma: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ Fmla(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(), + i.InputSimd128Register(2).V2D()); + break; + } + case kArm64F64x2Qfms: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ Fmls(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(), + i.InputSimd128Register(2).V2D()); + break; + } case kArm64F32x4Splat: { __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); break; @@ -1840,6 +1887,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S); SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S); SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S); + SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S); SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S); SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S); SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S); @@ -1867,6 +1915,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0).V4S()); break; } + case kArm64F32x4Qfma: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ Fmla(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(), + i.InputSimd128Register(2).V4S()); + break; + } + case kArm64F32x4Qfms: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ Fmls(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(), + i.InputSimd128Register(2).V4S()); + break; + } case kArm64I64x2Splat: { __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); break; @@ -1888,14 +1948,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D); case kArm64I64x2Shl: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V2D(), i.InputRegister64(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 64. + __ And(shift, i.InputRegister64(1), 63); + __ Dup(tmp.V2D(), shift); __ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), tmp.V2D()); break; } case kArm64I64x2ShrS: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V2D(), i.InputRegister64(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 64. + __ And(shift, i.InputRegister64(1), 63); + __ Dup(tmp.V2D(), shift); __ Neg(tmp.V2D(), tmp.V2D()); __ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), tmp.V2D()); @@ -1903,6 +1969,65 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D); SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D); + case kArm64I64x2Mul: { + UseScratchRegisterScope scope(tasm()); + VRegister dst = i.OutputSimd128Register(); + VRegister src1 = i.InputSimd128Register(0); + VRegister src2 = i.InputSimd128Register(1); + VRegister tmp1 = scope.AcquireSameSizeAs(dst); + VRegister tmp2 = scope.AcquireSameSizeAs(dst); + VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0)); + + // This 2x64-bit multiplication is performed with several 32-bit + // multiplications. + + // 64-bit numbers x and y, can be represented as: + // x = a + 2^32(b) + // y = c + 2^32(d) + + // A 64-bit multiplication is: + // x * y = ac + 2^32(ad + bc) + 2^64(bd) + // note: `2^64(bd)` can be ignored, the value is too large to fit in + // 64-bits. + + // This sequence implements a 2x64bit multiply, where the registers + // `src1` and `src2` are split up into 32-bit components: + // src1 = |d|c|b|a| + // src2 = |h|g|f|e| + // + // src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)| + + // Reverse the 32-bit elements in the 64-bit words. + // tmp2 = |g|h|e|f| + __ Rev64(tmp2.V4S(), src2.V4S()); + + // Calculate the high half components. + // tmp2 = |dg|ch|be|af| + __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S()); + + // Extract the low half components of src1. + // tmp1 = |c|a| + __ Xtn(tmp1.V2S(), src1.V2D()); + + // Sum the respective high half components. + // tmp2 = |dg+ch|be+af||dg+ch|be+af| + __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S()); + + // Extract the low half components of src2. + // tmp3 = |g|e| + __ Xtn(tmp3.V2S(), src2.V2D()); + + // Shift the high half components, into the high half. + // dst = |dg+ch << 32|be+af << 32| + __ Shll(dst.V2D(), tmp2.V2S(), 32); + + // Multiply the low components together, and accumulate with the high + // half. + // dst = |dst[1] + cg|dst[0] + ae| + __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S()); + + break; + } SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D); case kArm64I64x2Ne: { VRegister dst = i.OutputSimd128Register().V2D(); @@ -1915,7 +2040,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D); case kArm64I64x2ShrU: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V2D(), i.InputRegister64(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 64. + __ And(shift, i.InputRegister64(1), 63); + __ Dup(tmp.V2D(), shift); __ Neg(tmp.V2D(), tmp.V2D()); __ Ushl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), tmp.V2D()); @@ -1947,14 +2075,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S); case kArm64I32x4Shl: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V4S(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 32. + __ And(shift, i.InputRegister32(1), 31); + __ Dup(tmp.V4S(), shift); __ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), tmp.V4S()); break; } case kArm64I32x4ShrS: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V4S(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 32. + __ And(shift, i.InputRegister32(1), 31); + __ Dup(tmp.V4S(), shift); __ Neg(tmp.V4S(), tmp.V4S()); __ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), tmp.V4S()); @@ -1981,7 +2115,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H); case kArm64I32x4ShrU: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V4S(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 32. + __ And(shift, i.InputRegister32(1), 31); + __ Dup(tmp.V4S(), shift); __ Neg(tmp.V4S(), tmp.V4S()); __ Ushl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), tmp.V4S()); @@ -1996,7 +2133,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64I16x8ExtractLane: { - __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(), + __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(), i.InputInt8(1)); break; } @@ -2014,14 +2151,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H); case kArm64I16x8Shl: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V8H(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 16. + __ And(shift, i.InputRegister32(1), 15); + __ Dup(tmp.V8H(), shift); __ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), tmp.V8H()); break; } case kArm64I16x8ShrS: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V8H(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 16. + __ And(shift, i.InputRegister32(1), 15); + __ Dup(tmp.V8H(), shift); __ Neg(tmp.V8H(), tmp.V8H()); __ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), tmp.V8H()); @@ -2070,7 +2213,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArm64I16x8ShrU: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V8H(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 16. + __ And(shift, i.InputRegister32(1), 15); + __ Dup(tmp.V8H(), shift); __ Neg(tmp.V8H(), tmp.V8H()); __ Ushl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), tmp.V8H()); @@ -2101,7 +2247,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64I8x16ExtractLane: { - __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(), + __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(), i.InputInt8(1)); break; } @@ -2117,14 +2263,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B); case kArm64I8x16Shl: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V16B(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 8. + __ And(shift, i.InputRegister32(1), 7); + __ Dup(tmp.V16B(), shift); __ Sshl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), tmp.V16B()); break; } case kArm64I8x16ShrS: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V16B(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 8. + __ And(shift, i.InputRegister32(1), 7); + __ Dup(tmp.V16B(), shift); __ Neg(tmp.V16B(), tmp.V16B()); __ Sshl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), tmp.V16B()); @@ -2163,7 +2315,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B); case kArm64I8x16ShrU: { VRegister tmp = i.TempSimd128Register(0); - __ Dup(tmp.V16B(), i.InputRegister32(1)); + Register shift = i.TempRegister32(1); + // Take shift value modulo 8. + __ And(shift, i.InputRegister32(1), 7); + __ Dup(tmp.V16B(), shift); __ Neg(tmp.V16B(), tmp.V16B()); __ Ushl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), tmp.V16B()); @@ -2277,6 +2432,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1).V16B(), i.InputInt4(2)); break; } + case kArm64S8x16Swizzle: { + __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), + i.InputSimd128Register(1).V16B()); + break; + } case kArm64S8x16Shuffle: { Simd128Register dst = i.OutputSimd128Register().V16B(), src0 = i.InputSimd128Register(0).V16B(), diff --git a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h index 4b56e402c1..880a3fbf9e 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -70,6 +70,7 @@ namespace compiler { V(Arm64Sxtb) \ V(Arm64Sxth) \ V(Arm64Sxtw) \ + V(Arm64Sbfx) \ V(Arm64Sbfx32) \ V(Arm64Ubfx) \ V(Arm64Ubfx32) \ @@ -175,6 +176,7 @@ namespace compiler { V(Arm64F64x2ReplaceLane) \ V(Arm64F64x2Abs) \ V(Arm64F64x2Neg) \ + V(Arm64F64x2Sqrt) \ V(Arm64F64x2Add) \ V(Arm64F64x2Sub) \ V(Arm64F64x2Mul) \ @@ -185,6 +187,8 @@ namespace compiler { V(Arm64F64x2Ne) \ V(Arm64F64x2Lt) \ V(Arm64F64x2Le) \ + V(Arm64F64x2Qfma) \ + V(Arm64F64x2Qfms) \ V(Arm64F32x4Splat) \ V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ReplaceLane) \ @@ -192,6 +196,7 @@ namespace compiler { V(Arm64F32x4UConvertI32x4) \ V(Arm64F32x4Abs) \ V(Arm64F32x4Neg) \ + V(Arm64F32x4Sqrt) \ V(Arm64F32x4RecipApprox) \ V(Arm64F32x4RecipSqrtApprox) \ V(Arm64F32x4Add) \ @@ -205,6 +210,8 @@ namespace compiler { V(Arm64F32x4Ne) \ V(Arm64F32x4Lt) \ V(Arm64F32x4Le) \ + V(Arm64F32x4Qfma) \ + V(Arm64F32x4Qfms) \ V(Arm64I64x2Splat) \ V(Arm64I64x2ExtractLane) \ V(Arm64I64x2ReplaceLane) \ @@ -213,6 +220,7 @@ namespace compiler { V(Arm64I64x2ShrS) \ V(Arm64I64x2Add) \ V(Arm64I64x2Sub) \ + V(Arm64I64x2Mul) \ V(Arm64I64x2Eq) \ V(Arm64I64x2Ne) \ V(Arm64I64x2GtS) \ @@ -331,6 +339,7 @@ namespace compiler { V(Arm64S8x16TransposeLeft) \ V(Arm64S8x16TransposeRight) \ V(Arm64S8x16Concat) \ + V(Arm64S8x16Swizzle) \ V(Arm64S8x16Shuffle) \ V(Arm64S32x2Reverse) \ V(Arm64S16x4Reverse) \ diff --git a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 7cba2d50ea..b0f9202968 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -71,6 +71,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64Sxth: case kArm64Sxth32: case kArm64Sxtw: + case kArm64Sbfx: case kArm64Sbfx32: case kArm64Ubfx: case kArm64Ubfx32: @@ -142,6 +143,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F64x2ReplaceLane: case kArm64F64x2Abs: case kArm64F64x2Neg: + case kArm64F64x2Sqrt: case kArm64F64x2Add: case kArm64F64x2Sub: case kArm64F64x2Mul: @@ -152,6 +154,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F64x2Ne: case kArm64F64x2Lt: case kArm64F64x2Le: + case kArm64F64x2Qfma: + case kArm64F64x2Qfms: case kArm64F32x4Splat: case kArm64F32x4ExtractLane: case kArm64F32x4ReplaceLane: @@ -159,6 +163,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F32x4UConvertI32x4: case kArm64F32x4Abs: case kArm64F32x4Neg: + case kArm64F32x4Sqrt: case kArm64F32x4RecipApprox: case kArm64F32x4RecipSqrtApprox: case kArm64F32x4Add: @@ -172,6 +177,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F32x4Ne: case kArm64F32x4Lt: case kArm64F32x4Le: + case kArm64F32x4Qfma: + case kArm64F32x4Qfms: case kArm64I64x2Splat: case kArm64I64x2ExtractLane: case kArm64I64x2ReplaceLane: @@ -180,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64I64x2ShrS: case kArm64I64x2Add: case kArm64I64x2Sub: + case kArm64I64x2Mul: case kArm64I64x2Eq: case kArm64I64x2Ne: case kArm64I64x2GtS: @@ -298,6 +306,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64S8x16TransposeLeft: case kArm64S8x16TransposeRight: case kArm64S8x16Concat: + case kArm64S8x16Swizzle: case kArm64S8x16Shuffle: case kArm64S32x2Reverse: case kArm64S16x4Reverse: @@ -439,6 +448,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { case kArm64Clz: case kArm64Clz32: + case kArm64Sbfx: case kArm64Sbfx32: case kArm64Sxtb32: case kArm64Sxth32: diff --git a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc index 4abbd68c49..53a289fe6a 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -153,7 +153,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { Arm64OperandGenerator g(selector); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), arraysize(temps), temps); @@ -499,6 +499,7 @@ void VisitAddSub(InstructionSelector* selector, Node* node, ArchOpcode opcode, Arm64OperandGenerator g(selector); Matcher m(node); if (m.right().HasValue() && (m.right().Value() < 0) && + (m.right().Value() > std::numeric_limits<int>::min()) && g.CanBeImmediate(-m.right().Value(), kArithmeticImm)) { selector->Emit(negate_opcode, g.DefineAsRegister(node), g.UseRegister(m.left().node()), @@ -627,9 +628,24 @@ void InstructionSelector::VisitLoad(Node* node) { #else UNREACHABLE(); #endif +#ifdef V8_COMPRESS_POINTERS + case MachineRepresentation::kTaggedSigned: + opcode = kArm64LdrDecompressTaggedSigned; + immediate_mode = kLoadStoreImm32; + break; + case MachineRepresentation::kTaggedPointer: + opcode = kArm64LdrDecompressTaggedPointer; + immediate_mode = kLoadStoreImm32; + break; + case MachineRepresentation::kTagged: + opcode = kArm64LdrDecompressAnyTagged; + immediate_mode = kLoadStoreImm32; + break; +#else case MachineRepresentation::kTaggedSigned: // Fall through. case MachineRepresentation::kTaggedPointer: // Fall through. case MachineRepresentation::kTagged: // Fall through. +#endif case MachineRepresentation::kWord64: opcode = kArm64Ldr; immediate_mode = kLoadStoreImm64; @@ -723,7 +739,7 @@ void InstructionSelector::VisitStore(Node* node) { case MachineRepresentation::kCompressedPointer: // Fall through. case MachineRepresentation::kCompressed: #ifdef V8_COMPRESS_POINTERS - opcode = kArm64StrW; + opcode = kArm64StrCompressTagged; immediate_mode = kLoadStoreImm32; break; #else @@ -731,7 +747,11 @@ void InstructionSelector::VisitStore(Node* node) { #endif case MachineRepresentation::kTaggedSigned: // Fall through. case MachineRepresentation::kTaggedPointer: // Fall through. - case MachineRepresentation::kTagged: // Fall through. + case MachineRepresentation::kTagged: + opcode = kArm64StrCompressTagged; + immediate_mode = + COMPRESS_POINTERS_BOOL ? kLoadStoreImm32 : kLoadStoreImm64; + break; case MachineRepresentation::kWord64: opcode = kArm64Str; immediate_mode = kLoadStoreImm64; @@ -770,6 +790,10 @@ void InstructionSelector::VisitProtectedStore(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + // Architecture supports unaligned access, therefore VisitLoad is used instead void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); } @@ -1048,7 +1072,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) { if (mleft.right().HasValue() && mleft.right().Value() != 0) { // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is // shifted into the least-significant bits. - uint32_t mask = (mleft.right().Value() >> lsb) << lsb; + uint32_t mask = static_cast<uint32_t>(mleft.right().Value() >> lsb) + << lsb; unsigned mask_width = base::bits::CountPopulation(mask); unsigned mask_msb = base::bits::CountLeadingZeros32(mask); if ((mask_msb + mask_width + lsb) == 32) { @@ -1091,7 +1116,8 @@ void InstructionSelector::VisitWord64Shr(Node* node) { if (mleft.right().HasValue() && mleft.right().Value() != 0) { // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is // shifted into the least-significant bits. - uint64_t mask = (mleft.right().Value() >> lsb) << lsb; + uint64_t mask = static_cast<uint64_t>(mleft.right().Value() >> lsb) + << lsb; unsigned mask_width = base::bits::CountPopulation(mask); unsigned mask_msb = base::bits::CountLeadingZeros64(mask); if ((mask_msb + mask_width + lsb) == 64) { @@ -1240,7 +1266,8 @@ void InstructionSelector::VisitWord64Ror(Node* node) { V(Float32Max, kArm64Float32Max) \ V(Float64Max, kArm64Float64Max) \ V(Float32Min, kArm64Float32Min) \ - V(Float64Min, kArm64Float64Min) + V(Float64Min, kArm64Float64Min) \ + V(S8x16Swizzle, kArm64S8x16Swizzle) #define RR_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ @@ -1572,9 +1599,22 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { return; } EmitLoad(this, value, opcode, immediate_mode, rep, node); - } else { - VisitRR(this, kArm64Sxtw, node); + return; + } + + if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) { + Int32BinopMatcher m(value); + if (m.right().HasValue()) { + Arm64OperandGenerator g(this); + // Mask the shift amount, to keep the same semantics as Word32Sar. + int right = m.right().Value() & 0x1F; + Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()), + g.TempImmediate(right), g.TempImmediate(32 - right)); + return; + } } + + VisitRR(this, kArm64Sxtw, node); } void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { @@ -1830,31 +1870,6 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode, selector->EmitWithContinuation(opcode, left, right, cont); } -// Shared routine for multiple word compare operations. -void VisitWordCompare(InstructionSelector* selector, Node* node, - InstructionCode opcode, FlagsContinuation* cont, - ImmediateMode immediate_mode) { - Arm64OperandGenerator g(selector); - - Node* left = node->InputAt(0); - Node* right = node->InputAt(1); - - // If one of the two inputs is an immediate, make sure it's on the right. - if (!g.CanBeImmediate(right, immediate_mode) && - g.CanBeImmediate(left, immediate_mode)) { - cont->Commute(); - std::swap(left, right); - } - - if (g.CanBeImmediate(right, immediate_mode)) { - VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right), - cont); - } else { - VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right), - cont); - } -} - // This function checks whether we can convert: // ((a <op> b) cmp 0), b.<cond> // to: @@ -1986,9 +2001,35 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector, selector->EmitWithContinuation(opcode, value, cont); } +template <int N> +struct CbzOrTbzMatchTrait {}; + +template <> +struct CbzOrTbzMatchTrait<32> { + using IntegralType = uint32_t; + using BinopMatcher = Int32BinopMatcher; + static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And; + static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32; + static constexpr ArchOpcode kCompareAndBranchOpcode = + kArm64CompareAndBranch32; + static constexpr unsigned kSignBit = kWSignBit; +}; + +template <> +struct CbzOrTbzMatchTrait<64> { + using IntegralType = uint64_t; + using BinopMatcher = Int64BinopMatcher; + static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And; + static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch; + static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch; + static constexpr unsigned kSignBit = kXSignBit; +}; + // Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node} // against {value}, depending on the condition. -bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, +template <int N> +bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, + typename CbzOrTbzMatchTrait<N>::IntegralType value, Node* user, FlagsCondition cond, FlagsContinuation* cont) { // Branch poisoning requires flags to be set, so when it's enabled for // a particular branch, we shouldn't be applying the cbz/tbz optimization. @@ -2007,28 +2048,33 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, if (cont->IsDeoptimize()) return false; Arm64OperandGenerator g(selector); cont->Overwrite(MapForTbz(cond)); - Int32Matcher m(node); - if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) { - // SignedLessThan(Float64ExtractHighWord32(x), 0) and - // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) essentially - // check the sign bit of a 64-bit floating point value. - InstructionOperand temp = g.TempRegister(); - selector->Emit(kArm64U64MoveFloat64, temp, - g.UseRegister(node->InputAt(0))); - selector->EmitWithContinuation(kArm64TestAndBranch, temp, - g.TempImmediate(63), cont); - return true; + + if (N == 32) { + Int32Matcher m(node); + if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) { + // SignedLessThan(Float64ExtractHighWord32(x), 0) and + // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) + // essentially check the sign bit of a 64-bit floating point value. + InstructionOperand temp = g.TempRegister(); + selector->Emit(kArm64U64MoveFloat64, temp, + g.UseRegister(node->InputAt(0))); + selector->EmitWithContinuation(kArm64TestAndBranch, temp, + g.TempImmediate(kDSignBit), cont); + return true; + } } - selector->EmitWithContinuation(kArm64TestAndBranch32, g.UseRegister(node), - g.TempImmediate(31), cont); + + selector->EmitWithContinuation( + CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node), + g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont); return true; } case kEqual: case kNotEqual: { - if (node->opcode() == IrOpcode::kWord32And) { + if (node->opcode() == CbzOrTbzMatchTrait<N>::kAndOpcode) { // Emit a tbz/tbnz if we are comparing with a single-bit mask: - // Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false) - Int32BinopMatcher m_and(node); + // Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false) + typename CbzOrTbzMatchTrait<N>::BinopMatcher m_and(node); if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) && m_and.right().Is(value) && selector->CanCover(user, node)) { Arm64OperandGenerator g(selector); @@ -2036,7 +2082,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, // the opposite here so negate the condition. cont->Negate(); selector->EmitWithContinuation( - kArm64TestAndBranch32, g.UseRegister(m_and.left().node()), + CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, + g.UseRegister(m_and.left().node()), g.TempImmediate(base::bits::CountTrailingZeros(value)), cont); return true; } @@ -2048,7 +2095,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, if (value != 0) return false; Arm64OperandGenerator g(selector); cont->Overwrite(MapForCbz(cond)); - EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32, + EmitBranchOrDeoptimize(selector, + CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode, g.UseRegister(node), cont); return true; } @@ -2057,20 +2105,50 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, } } +// Shared routine for multiple word compare operations. +void VisitWordCompare(InstructionSelector* selector, Node* node, + InstructionCode opcode, FlagsContinuation* cont, + ImmediateMode immediate_mode) { + Arm64OperandGenerator g(selector); + + Node* left = node->InputAt(0); + Node* right = node->InputAt(1); + + // If one of the two inputs is an immediate, make sure it's on the right. + if (!g.CanBeImmediate(right, immediate_mode) && + g.CanBeImmediate(left, immediate_mode)) { + cont->Commute(); + std::swap(left, right); + } + + if (opcode == kArm64Cmp && !cont->IsPoisoned()) { + Int64Matcher m(right); + if (m.HasValue()) { + if (TryEmitCbzOrTbz<64>(selector, left, m.Value(), node, + cont->condition(), cont)) { + return; + } + } + } + + VisitCompare(selector, opcode, g.UseRegister(left), + g.UseOperand(right, immediate_mode), cont); +} + void VisitWord32Compare(InstructionSelector* selector, Node* node, FlagsContinuation* cont) { Int32BinopMatcher m(node); FlagsCondition cond = cont->condition(); if (!cont->IsPoisoned()) { if (m.right().HasValue()) { - if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node, - cond, cont)) { + if (TryEmitCbzOrTbz<32>(selector, m.left().node(), m.right().Value(), + node, cond, cont)) { return; } } else if (m.left().HasValue()) { FlagsCondition commuted_cond = CommuteFlagsCondition(cond); - if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node, - commuted_cond, cont)) { + if (TryEmitCbzOrTbz<32>(selector, m.right().node(), m.left().Value(), + node, commuted_cond, cont)) { return; } } @@ -2378,13 +2456,6 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) { return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm); } - // Merge the Word64Equal(x, 0) comparison into a cbz instruction. - if ((cont->IsBranch() || cont->IsDeoptimize()) && - !cont->IsPoisoned()) { - EmitBranchOrDeoptimize(this, kArm64CompareAndBranch, - g.UseRegister(left), cont); - return; - } } return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); } @@ -3054,10 +3125,12 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { #define SIMD_UNOP_LIST(V) \ V(F64x2Abs, kArm64F64x2Abs) \ V(F64x2Neg, kArm64F64x2Neg) \ + V(F64x2Sqrt, kArm64F64x2Sqrt) \ V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \ V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \ V(F32x4Abs, kArm64F32x4Abs) \ V(F32x4Neg, kArm64F32x4Neg) \ + V(F32x4Sqrt, kArm64F32x4Sqrt) \ V(F32x4RecipApprox, kArm64F32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \ V(I64x2Neg, kArm64I64x2Neg) \ @@ -3236,6 +3309,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP) #undef SIMD_VISIT_BINOP #undef SIMD_BINOP_LIST +void InstructionSelector::VisitI64x2Mul(Node* node) { + Arm64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kArm64I64x2Mul, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + arraysize(temps), temps); +} + void InstructionSelector::VisitS128Select(Node* node) { Arm64OperandGenerator g(this); Emit(kArm64S128Select, g.DefineSameAsFirst(node), @@ -3243,6 +3324,19 @@ void InstructionSelector::VisitS128Select(Node* node) { g.UseRegister(node->InputAt(2))); } +#define VISIT_SIMD_QFMOP(op) \ + void InstructionSelector::Visit##op(Node* node) { \ + Arm64OperandGenerator g(this); \ + Emit(kArm64##op, g.DefineSameAsFirst(node), \ + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \ + g.UseRegister(node->InputAt(2))); \ + } +VISIT_SIMD_QFMOP(F64x2Qfma) +VISIT_SIMD_QFMOP(F64x2Qfms) +VISIT_SIMD_QFMOP(F32x4Qfma) +VISIT_SIMD_QFMOP(F32x4Qfms) +#undef VISIT_SIMD_QFMOP + namespace { struct ShuffleEntry { diff --git a/deps/v8/src/compiler/backend/code-generator-impl.h b/deps/v8/src/compiler/backend/code-generator-impl.h index 2bfb009980..530dc0a813 100644 --- a/deps/v8/src/compiler/backend/code-generator-impl.h +++ b/deps/v8/src/compiler/backend/code-generator-impl.h @@ -26,7 +26,7 @@ class InstructionOperandConverter { // -- Instruction operand accesses with conversions -------------------------- - Register InputRegister(size_t index) { + Register InputRegister(size_t index) const { return ToRegister(instr_->InputAt(index)); } @@ -96,7 +96,7 @@ class InstructionOperandConverter { return ToRpoNumber(instr_->InputAt(index)); } - Register OutputRegister(size_t index = 0) { + Register OutputRegister(size_t index = 0) const { return ToRegister(instr_->OutputAt(index)); } @@ -130,7 +130,7 @@ class InstructionOperandConverter { return ToConstant(op).ToRpoNumber(); } - Register ToRegister(InstructionOperand* op) { + Register ToRegister(InstructionOperand* op) const { return LocationOperand::cast(op)->GetRegister(); } @@ -146,7 +146,7 @@ class InstructionOperandConverter { return LocationOperand::cast(op)->GetSimd128Register(); } - Constant ToConstant(InstructionOperand* op) { + Constant ToConstant(InstructionOperand* op) const { if (op->IsImmediate()) { return gen_->instructions()->GetImmediate(ImmediateOperand::cast(op)); } diff --git a/deps/v8/src/compiler/backend/code-generator.cc b/deps/v8/src/compiler/backend/code-generator.cc index e7702bcdf6..43eb4a1f15 100644 --- a/deps/v8/src/compiler/backend/code-generator.cc +++ b/deps/v8/src/compiler/backend/code-generator.cc @@ -4,7 +4,7 @@ #include "src/compiler/backend/code-generator.h" -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/codegen/assembler-inl.h" #include "src/codegen/macro-assembler-inl.h" #include "src/codegen/optimized-compilation-info.h" diff --git a/deps/v8/src/compiler/backend/code-generator.h b/deps/v8/src/compiler/backend/code-generator.h index e9ebf67590..d56b1edae0 100644 --- a/deps/v8/src/compiler/backend/code-generator.h +++ b/deps/v8/src/compiler/backend/code-generator.h @@ -5,6 +5,8 @@ #ifndef V8_COMPILER_BACKEND_CODE_GENERATOR_H_ #define V8_COMPILER_BACKEND_CODE_GENERATOR_H_ +#include <memory> + #include "src/base/optional.h" #include "src/codegen/macro-assembler.h" #include "src/codegen/safepoint-table.h" diff --git a/deps/v8/src/compiler/backend/frame-elider.cc b/deps/v8/src/compiler/backend/frame-elider.cc index 064501b097..293fc9352c 100644 --- a/deps/v8/src/compiler/backend/frame-elider.cc +++ b/deps/v8/src/compiler/backend/frame-elider.cc @@ -4,7 +4,7 @@ #include "src/compiler/backend/frame-elider.h" -#include "src/base/adapters.h" +#include "src/base/iterator.h" namespace v8 { namespace internal { diff --git a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc index 4542da643b..068268a3da 100644 --- a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc @@ -479,17 +479,18 @@ class OutOfLineRecordWrite final : public OutOfLineCode { __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \ } -#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ - do { \ - Register dst = i.OutputRegister(); \ - Operand src = i.InputOperand(0); \ - Register tmp = i.TempRegister(0); \ - __ mov(tmp, Immediate(1)); \ - __ xor_(dst, dst); \ - __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \ - __ opcode(kScratchDoubleReg, src); \ - __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \ - __ cmov(zero, dst, tmp); \ +#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ + do { \ + Register dst = i.OutputRegister(); \ + Operand src = i.InputOperand(0); \ + Register tmp = i.TempRegister(0); \ + XMMRegister tmp_simd = i.TempSimd128Register(1); \ + __ mov(tmp, Immediate(1)); \ + __ xor_(dst, dst); \ + __ Pxor(tmp_simd, tmp_simd); \ + __ opcode(tmp_simd, src); \ + __ Ptest(tmp_simd, tmp_simd); \ + __ cmov(zero, dst, tmp); \ } while (false) void CodeGenerator::AssembleDeconstructFrame() { @@ -1266,16 +1267,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kSSEFloat32Abs: { // TODO(bmeurer): Use 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrlq(kScratchDoubleReg, 33); - __ andps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psrlq(tmp, 33); + __ andps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Neg: { // TODO(bmeurer): Use 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psllq(kScratchDoubleReg, 31); - __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 31); + __ xorps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Round: { @@ -1444,16 +1447,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kSSEFloat64Abs: { // TODO(bmeurer): Use 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrlq(kScratchDoubleReg, 1); - __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psrlq(tmp, 1); + __ andpd(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat64Neg: { // TODO(bmeurer): Use 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psllq(kScratchDoubleReg, 63); - __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 63); + __ xorpd(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat64Sqrt: @@ -1476,13 +1481,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ cvttss2si(i.OutputRegister(), i.InputOperand(0)); break; case kSSEFloat32ToUint32: - __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); + __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), + i.TempSimd128Register(0)); break; case kSSEFloat64ToInt32: __ cvttsd2si(i.OutputRegister(), i.InputOperand(0)); break; case kSSEFloat64ToUint32: - __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); + __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), + i.TempSimd128Register(0)); break; case kSSEInt32ToFloat32: __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1577,34 +1584,38 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kAVXFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrlq(kScratchDoubleReg, 33); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psrlq(tmp, 33); CpuFeatureScope avx_scope(tasm(), AVX); - __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); + __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); break; } case kAVXFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psllq(kScratchDoubleReg, 31); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 31); CpuFeatureScope avx_scope(tasm(), AVX); - __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); + __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); break; } case kAVXFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrlq(kScratchDoubleReg, 1); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psrlq(tmp, 1); CpuFeatureScope avx_scope(tasm(), AVX); - __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); + __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); break; } case kAVXFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psllq(kScratchDoubleReg, 63); + XMMRegister tmp = i.TempSimd128Register(0); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 63); CpuFeatureScope avx_scope(tasm(), AVX); - __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); + __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); break; } case kSSEFloat64SilenceNaN: @@ -1825,6 +1836,164 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kSSEF64x2Splat: { + DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + XMMRegister dst = i.OutputSimd128Register(); + __ shufpd(dst, dst, 0x0); + break; + } + case kAVXF64x2Splat: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister src = i.InputDoubleRegister(0); + __ vshufpd(i.OutputSimd128Register(), src, src, 0x0); + break; + } + case kSSEF64x2ExtractLane: { + DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + XMMRegister dst = i.OutputDoubleRegister(); + int8_t lane = i.InputInt8(1); + if (lane != 0) { + DCHECK_LT(lane, 4); + __ shufpd(dst, dst, lane); + } + break; + } + case kAVXF64x2ExtractLane: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister dst = i.OutputDoubleRegister(); + XMMRegister src = i.InputSimd128Register(0); + int8_t lane = i.InputInt8(1); + if (lane == 0) { + if (dst != src) __ vmovapd(dst, src); + } else { + DCHECK_LT(lane, 4); + __ vshufpd(dst, src, src, lane); + } + break; + } + case kSSEF64x2ReplaceLane: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + CpuFeatureScope sse_scope(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + int8_t lane = i.InputInt8(1); + DoubleRegister rep = i.InputDoubleRegister(2); + + // insertps takes a mask which contains (high to low): + // - 2 bit specifying source float element to copy + // - 2 bit specifying destination float element to write to + // - 4 bits specifying which elements of the destination to zero + DCHECK_LT(lane, 2); + if (lane == 0) { + __ insertps(dst, rep, 0b00000000); + __ insertps(dst, rep, 0b01010000); + } else { + __ insertps(dst, rep, 0b00100000); + __ insertps(dst, rep, 0b01110000); + } + break; + } + case kAVXF64x2ReplaceLane: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(0); + int8_t lane = i.InputInt8(1); + DoubleRegister rep = i.InputDoubleRegister(2); + + DCHECK_LT(lane, 2); + if (lane == 0) { + __ vinsertps(dst, src, rep, 0b00000000); + __ vinsertps(dst, src, rep, 0b01010000); + } else { + __ vinsertps(dst, src, rep, 0b10100000); + __ vinsertps(dst, src, rep, 0b11110000); + } + break; + } + case kIA32F64x2Sqrt: { + __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0)); + break; + } + case kIA32F64x2Add: { + __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Sub: { + __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Mul: { + __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Div: { + __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Min: { + Operand src1 = i.InputOperand(1); + XMMRegister dst = i.OutputSimd128Register(), + src = i.InputSimd128Register(0), + tmp = i.TempSimd128Register(0); + // The minpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minpd in both orders, merge the resuls, and adjust. + __ Movapd(tmp, src1); + __ Minpd(tmp, tmp, src); + __ Minpd(dst, src, src1); + // propagate -0's and NaNs, which may be non-canonical. + __ Orpd(tmp, dst); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmpunordpd(dst, dst, tmp); + __ Orpd(tmp, dst); + __ Psrlq(dst, 13); + __ Andnpd(dst, tmp); + break; + } + case kIA32F64x2Max: { + Operand src1 = i.InputOperand(1); + XMMRegister dst = i.OutputSimd128Register(), + src = i.InputSimd128Register(0), + tmp = i.TempSimd128Register(0); + // The maxpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform maxpd in both orders, merge the resuls, and adjust. + __ Movapd(tmp, src1); + __ Maxpd(tmp, tmp, src); + __ Maxpd(dst, src, src1); + // Find discrepancies. + __ Xorpd(dst, tmp); + // Propagate NaNs, which may be non-canonical. + __ Orpd(tmp, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + __ Subpd(tmp, tmp, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmpunordpd(dst, dst, tmp); + __ Psrlq(dst, 13); + __ Andnpd(dst, tmp); + break; + } + case kIA32F64x2Eq: { + __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Ne: { + __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Lt: { + __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } + case kIA32F64x2Le: { + __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } case kSSEF32x4Splat: { DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); XMMRegister dst = i.OutputSimd128Register(); @@ -1951,6 +2120,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputOperand(0)); break; } + case kSSEF32x4Sqrt: { + __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kAVXF32x4Sqrt: { + CpuFeatureScope avx_scope(tasm(), AVX); + __ vsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kIA32F32x4RecipApprox: { __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0)); break; @@ -2212,28 +2390,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEI32x4Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ pslld(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4Shl: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI32x4ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ psrad(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } @@ -2430,14 +2620,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEI32x4ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ psrld(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ and_(shift, 31); + __ movd(tmp, shift); __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } @@ -2514,7 +2710,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kIA32I16x8ExtractLane: { Register dst = i.OutputRegister(); __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsx_w(dst, dst); break; } case kSSEI16x8ReplaceLane: { @@ -2553,28 +2748,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEI16x8Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ psllw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8Shl: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI16x8ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ psraw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } @@ -2745,14 +2952,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEI16x8ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ psrlw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister tmp = i.TempSimd128Register(0); - __ movd(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ and_(shift, 15); + __ movd(tmp, shift); __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } @@ -2875,7 +3088,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kIA32I8x16ExtractLane: { Register dst = i.OutputRegister(); __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsx_b(dst, dst); break; } case kSSEI8x16ReplaceLane: { @@ -2919,6 +3131,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register shift = i.InputRegister(1); Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); + // Take shift value modulo 8. + __ and_(shift, 7); // Mask off the unwanted bits before word-shifting. __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ mov(tmp, shift); @@ -2938,6 +3152,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register shift = i.InputRegister(1); Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); + // Take shift value modulo 8. + __ and_(shift, 7); // Mask off the unwanted bits before word-shifting. __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ mov(tmp, shift); @@ -2959,6 +3175,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpckhbw(kScratchDoubleReg, dst); __ punpcklbw(dst, dst); __ mov(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ and_(tmp, 7); __ add(tmp, Immediate(8)); __ movd(tmp_simd, tmp); __ psraw(kScratchDoubleReg, tmp_simd); @@ -3223,6 +3441,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpckhbw(kScratchDoubleReg, dst); __ punpcklbw(dst, dst); __ mov(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ and_(tmp, 7); __ add(tmp, Immediate(8)); __ movd(tmp_simd, tmp); __ psrlw(kScratchDoubleReg, tmp_simd); @@ -3365,6 +3585,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vxorps(dst, kScratchDoubleReg, i.InputSimd128Register(2)); break; } + case kIA32S8x16Swizzle: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister mask = i.TempSimd128Register(0); + + // Out-of-range indices should return 0, add 112 so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + __ Move(mask, (uint32_t)0x70707070); + __ Pshufd(mask, mask, 0x0); + __ Paddusb(mask, i.InputSimd128Register(1)); + __ Pshufb(dst, mask); + break; + } case kIA32S8x16Shuffle: { XMMRegister dst = i.OutputSimd128Register(); Operand src0 = i.InputOperand(0); diff --git a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h index 7530c716b8..a77fb8cd37 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -116,6 +116,23 @@ namespace compiler { V(IA32PushSimd128) \ V(IA32Poke) \ V(IA32Peek) \ + V(SSEF64x2Splat) \ + V(AVXF64x2Splat) \ + V(SSEF64x2ExtractLane) \ + V(AVXF64x2ExtractLane) \ + V(SSEF64x2ReplaceLane) \ + V(AVXF64x2ReplaceLane) \ + V(IA32F64x2Sqrt) \ + V(IA32F64x2Add) \ + V(IA32F64x2Sub) \ + V(IA32F64x2Mul) \ + V(IA32F64x2Div) \ + V(IA32F64x2Min) \ + V(IA32F64x2Max) \ + V(IA32F64x2Eq) \ + V(IA32F64x2Ne) \ + V(IA32F64x2Lt) \ + V(IA32F64x2Le) \ V(SSEF32x4Splat) \ V(AVXF32x4Splat) \ V(SSEF32x4ExtractLane) \ @@ -129,6 +146,8 @@ namespace compiler { V(AVXF32x4Abs) \ V(SSEF32x4Neg) \ V(AVXF32x4Neg) \ + V(SSEF32x4Sqrt) \ + V(AVXF32x4Sqrt) \ V(IA32F32x4RecipApprox) \ V(IA32F32x4RecipSqrtApprox) \ V(SSEF32x4Add) \ @@ -313,6 +332,7 @@ namespace compiler { V(AVXS128Xor) \ V(SSES128Select) \ V(AVXS128Select) \ + V(IA32S8x16Swizzle) \ V(IA32S8x16Shuffle) \ V(IA32S32x4Swizzle) \ V(IA32S32x4Shuffle) \ diff --git a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index c2097a6691..287eb49a48 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -97,6 +97,23 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXFloat32Neg: case kIA32BitcastFI: case kIA32BitcastIF: + case kSSEF64x2Splat: + case kAVXF64x2Splat: + case kSSEF64x2ExtractLane: + case kAVXF64x2ExtractLane: + case kSSEF64x2ReplaceLane: + case kAVXF64x2ReplaceLane: + case kIA32F64x2Sqrt: + case kIA32F64x2Add: + case kIA32F64x2Sub: + case kIA32F64x2Mul: + case kIA32F64x2Div: + case kIA32F64x2Min: + case kIA32F64x2Max: + case kIA32F64x2Eq: + case kIA32F64x2Ne: + case kIA32F64x2Lt: + case kIA32F64x2Le: case kSSEF32x4Splat: case kAVXF32x4Splat: case kSSEF32x4ExtractLane: @@ -110,6 +127,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXF32x4Abs: case kSSEF32x4Neg: case kAVXF32x4Neg: + case kSSEF32x4Sqrt: + case kAVXF32x4Sqrt: case kIA32F32x4RecipApprox: case kIA32F32x4RecipSqrtApprox: case kSSEF32x4Add: @@ -294,6 +313,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXS128Xor: case kSSES128Select: case kAVXS128Select: + case kIA32S8x16Swizzle: case kIA32S8x16Shuffle: case kIA32S32x4Swizzle: case kIA32S32x4Shuffle: diff --git a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc index ebef39a93a..a24727aba2 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -200,12 +200,27 @@ namespace { void VisitRO(InstructionSelector* selector, Node* node, ArchOpcode opcode) { IA32OperandGenerator g(selector); - InstructionOperand temps[] = {g.TempRegister()}; Node* input = node->InputAt(0); // We have to use a byte register as input to movsxb. InstructionOperand input_op = opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input); - selector->Emit(opcode, g.DefineAsRegister(node), input_op, arraysize(temps), + selector->Emit(opcode, g.DefineAsRegister(node), input_op); +} + +void VisitROWithTemp(InstructionSelector* selector, Node* node, + ArchOpcode opcode) { + IA32OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempRegister()}; + selector->Emit(opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), + arraysize(temps), temps); +} + +void VisitROWithTempSimd(InstructionSelector* selector, Node* node, + ArchOpcode opcode) { + IA32OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempSimd128Register()}; + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); } @@ -231,10 +246,13 @@ void VisitRROFloat(InstructionSelector* selector, Node* node, void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, ArchOpcode avx_opcode, ArchOpcode sse_opcode) { IA32OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempSimd128Register()}; if (selector->IsSupported(AVX)) { - selector->Emit(avx_opcode, g.DefineAsRegister(node), g.Use(input)); + selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input), + arraysize(temps), temps); } else { - selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input)); + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), + g.UseUniqueRegister(input), arraysize(temps), temps); } } @@ -804,12 +822,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) { V(ChangeFloat32ToFloat64, kSSEFloat32ToFloat64) \ V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \ V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \ - V(ChangeUint32ToFloat64, kSSEUint32ToFloat64) \ V(TruncateFloat32ToInt32, kSSEFloat32ToInt32) \ - V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \ V(ChangeFloat64ToInt32, kSSEFloat64ToInt32) \ - V(ChangeFloat64ToUint32, kSSEFloat64ToUint32) \ - V(TruncateFloat64ToUint32, kSSEFloat64ToUint32) \ V(TruncateFloat64ToFloat32, kSSEFloat64ToFloat32) \ V(RoundFloat64ToInt32, kSSEFloat64ToInt32) \ V(BitcastFloat32ToInt32, kIA32BitcastFI) \ @@ -819,7 +833,15 @@ void InstructionSelector::VisitWord32Ror(Node* node) { V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32) \ V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \ V(SignExtendWord8ToInt32, kIA32Movsxbl) \ - V(SignExtendWord16ToInt32, kIA32Movsxwl) + V(SignExtendWord16ToInt32, kIA32Movsxwl) \ + V(F64x2Sqrt, kIA32F64x2Sqrt) + +#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64) + +#define RO_WITH_TEMP_SIMD_OP_LIST(V) \ + V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \ + V(ChangeFloat64ToUint32, kSSEFloat64ToUint32) \ + V(TruncateFloat64ToUint32, kSSEFloat64ToUint32) #define RR_OP_LIST(V) \ V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \ @@ -841,13 +863,23 @@ void InstructionSelector::VisitWord32Ror(Node* node) { V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \ V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \ V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \ - V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) + V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \ + V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \ + V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \ + V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \ + V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) \ + V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq) \ + V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne) \ + V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt) \ + V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le) #define FLOAT_UNOP_LIST(V) \ V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \ V(Float64Abs, kAVXFloat64Abs, kSSEFloat64Abs) \ V(Float32Neg, kAVXFloat32Neg, kSSEFloat32Neg) \ - V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) + V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) \ + V(F64x2Abs, kAVXFloat64Abs, kSSEFloat64Abs) \ + V(F64x2Neg, kAVXFloat64Neg, kSSEFloat64Neg) #define RO_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ @@ -857,6 +889,22 @@ RO_OP_LIST(RO_VISITOR) #undef RO_VISITOR #undef RO_OP_LIST +#define RO_WITH_TEMP_VISITOR(Name, opcode) \ + void InstructionSelector::Visit##Name(Node* node) { \ + VisitROWithTemp(this, node, opcode); \ + } +RO_WITH_TEMP_OP_LIST(RO_WITH_TEMP_VISITOR) +#undef RO_WITH_TEMP_VISITOR +#undef RO_WITH_TEMP_OP_LIST + +#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode) \ + void InstructionSelector::Visit##Name(Node* node) { \ + VisitROWithTempSimd(this, node, opcode); \ + } +RO_WITH_TEMP_SIMD_OP_LIST(RO_WITH_TEMP_SIMD_VISITOR) +#undef RO_WITH_TEMP_SIMD_VISITOR +#undef RO_WITH_TEMP_SIMD_OP_LIST + #define RR_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ VisitRR(this, node, opcode); \ @@ -890,6 +938,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitInt32Add(Node* node) { IA32OperandGenerator g(this); @@ -1971,6 +2023,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { #define SIMD_UNOP_PREFIX_LIST(V) \ V(F32x4Abs) \ V(F32x4Neg) \ + V(F32x4Sqrt) \ V(S128Not) #define SIMD_ANYTRUE_LIST(V) \ @@ -1995,6 +2048,43 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I8x16ShrS) \ V(I8x16ShrU) +void InstructionSelector::VisitF64x2Min(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); + InstructionOperand operand1 = g.UseUnique(node->InputAt(1)); + + if (IsSupported(AVX)) { + Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1, + arraysize(temps), temps); + } else { + Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1, + arraysize(temps), temps); + } +} + +void InstructionSelector::VisitF64x2Max(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); + InstructionOperand operand1 = g.UseUnique(node->InputAt(1)); + if (IsSupported(AVX)) { + Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1, + arraysize(temps), temps); + } else { + Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1, + arraysize(temps), temps); + } +} + +void InstructionSelector::VisitF64x2Splat(Node* node) { + VisitRRSimd(this, node, kAVXF64x2Splat, kSSEF64x2Splat); +} + +void InstructionSelector::VisitF64x2ExtractLane(Node* node) { + VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane); +} + void InstructionSelector::VisitF32x4Splat(Node* node) { VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat); } @@ -2086,6 +2176,28 @@ VISIT_SIMD_REPLACE_LANE(F32x4) #undef VISIT_SIMD_REPLACE_LANE #undef SIMD_INT_TYPES +// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces +// operand2 to be UseRegister, because the codegen relies on insertps using +// registers. +// TODO(v8:9764) Remove this UseRegister requirement +#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \ + void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \ + IA32OperandGenerator g(this); \ + InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \ + InstructionOperand operand1 = \ + g.UseImmediate(OpParameter<int32_t>(node->op())); \ + InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \ + if (IsSupported(AVX)) { \ + Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \ + operand1, operand2); \ + } else { \ + Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \ + operand1, operand2); \ + } \ + } +VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2) +#undef VISIT_SIMD_REPLACE_LANE_USE_REG + #define VISIT_SIMD_SHIFT(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \ @@ -2132,12 +2244,12 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) #undef VISIT_SIMD_ANYTRUE #undef SIMD_ANYTRUE_LIST -#define VISIT_SIMD_ALLTRUE(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - IA32OperandGenerator g(this); \ - InstructionOperand temps[] = {g.TempRegister()}; \ - Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), \ - arraysize(temps), temps); \ +#define VISIT_SIMD_ALLTRUE(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + IA32OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ + Emit(kIA32##Opcode, g.DefineAsRegister(node), \ + g.UseUnique(node->InputAt(0)), arraysize(temps), temps); \ } SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) #undef VISIT_SIMD_ALLTRUE @@ -2489,6 +2601,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps); } +void InstructionSelector::VisitS8x16Swizzle(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kIA32S8x16Swizzle, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), + arraysize(temps), temps); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/deps/v8/src/compiler/backend/instruction-scheduler.cc b/deps/v8/src/compiler/backend/instruction-scheduler.cc index dc66813740..d4920cd575 100644 --- a/deps/v8/src/compiler/backend/instruction-scheduler.cc +++ b/deps/v8/src/compiler/backend/instruction-scheduler.cc @@ -4,7 +4,7 @@ #include "src/compiler/backend/instruction-scheduler.h" -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/base/utils/random-number-generator.h" #include "src/execution/isolate.h" diff --git a/deps/v8/src/compiler/backend/instruction-selector-impl.h b/deps/v8/src/compiler/backend/instruction-selector-impl.h index a3f62e7ba4..13ea049eba 100644 --- a/deps/v8/src/compiler/backend/instruction-selector-impl.h +++ b/deps/v8/src/compiler/backend/instruction-selector-impl.h @@ -29,8 +29,8 @@ inline bool operator<(const CaseInfo& l, const CaseInfo& r) { // Helper struct containing data about a table or lookup switch. class SwitchInfo { public: - SwitchInfo(ZoneVector<CaseInfo>& cases, // NOLINT(runtime/references) - int32_t min_value, int32_t max_value, BasicBlock* default_branch) + SwitchInfo(ZoneVector<CaseInfo> const& cases, int32_t min_value, + int32_t max_value, BasicBlock* default_branch) : cases_(cases), min_value_(min_value), max_value_(max_value), @@ -193,17 +193,6 @@ class OperandGenerator { reg.code(), GetVReg(node))); } - InstructionOperand UseExplicit(LinkageLocation location) { - MachineRepresentation rep = InstructionSequence::DefaultRepresentation(); - if (location.IsRegister()) { - return ExplicitOperand(LocationOperand::REGISTER, rep, - location.AsRegister()); - } else { - return ExplicitOperand(LocationOperand::STACK_SLOT, rep, - location.GetLocation()); - } - } - InstructionOperand UseImmediate(int immediate) { return sequence()->AddImmediate(Constant(immediate)); } @@ -275,6 +264,16 @@ class OperandGenerator { InstructionOperand::kInvalidVirtualRegister); } + template <typename FPRegType> + InstructionOperand TempFpRegister(FPRegType reg) { + UnallocatedOperand op = + UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER, reg.code(), + sequence()->NextVirtualRegister()); + sequence()->MarkAsRepresentation(MachineRepresentation::kSimd128, + op.virtual_register()); + return op; + } + InstructionOperand TempImmediate(int32_t imm) { return sequence()->AddImmediate(Constant(imm)); } diff --git a/deps/v8/src/compiler/backend/instruction-selector.cc b/deps/v8/src/compiler/backend/instruction-selector.cc index 43193ec2b1..22d81c0c55 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.cc +++ b/deps/v8/src/compiler/backend/instruction-selector.cc @@ -6,7 +6,7 @@ #include <limits> -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/codegen/assembler-inl.h" #include "src/codegen/tick-counter.h" #include "src/compiler/backend/instruction-selector-impl.h" @@ -1439,6 +1439,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsWord64(node), VisitWord64ReverseBits(node); case IrOpcode::kWord64ReverseBytes: return MarkAsWord64(node), VisitWord64ReverseBytes(node); + case IrOpcode::kSimd128ReverseBytes: + return MarkAsSimd128(node), VisitSimd128ReverseBytes(node); case IrOpcode::kInt64AbsWithOverflow: return MarkAsWord64(node), VisitInt64AbsWithOverflow(node); case IrOpcode::kWord64Equal: @@ -1502,7 +1504,7 @@ void InstructionSelector::VisitNode(Node* node) { case IrOpcode::kUint64Mod: return MarkAsWord64(node), VisitUint64Mod(node); case IrOpcode::kBitcastTaggedToWord: - case IrOpcode::kBitcastTaggedSignedToWord: + case IrOpcode::kBitcastTaggedToWordForTagAndSmiBits: return MarkAsRepresentation(MachineType::PointerRepresentation(), node), VisitBitcastTaggedToWord(node); case IrOpcode::kBitcastWordToTagged: @@ -1857,6 +1859,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF64x2Abs(node); case IrOpcode::kF64x2Neg: return MarkAsSimd128(node), VisitF64x2Neg(node); + case IrOpcode::kF64x2Sqrt: + return MarkAsSimd128(node), VisitF64x2Sqrt(node); case IrOpcode::kF64x2Add: return MarkAsSimd128(node), VisitF64x2Add(node); case IrOpcode::kF64x2Sub: @@ -1877,6 +1881,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF64x2Lt(node); case IrOpcode::kF64x2Le: return MarkAsSimd128(node), VisitF64x2Le(node); + case IrOpcode::kF64x2Qfma: + return MarkAsSimd128(node), VisitF64x2Qfma(node); + case IrOpcode::kF64x2Qfms: + return MarkAsSimd128(node), VisitF64x2Qfms(node); case IrOpcode::kF32x4Splat: return MarkAsSimd128(node), VisitF32x4Splat(node); case IrOpcode::kF32x4ExtractLane: @@ -1891,6 +1899,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4Abs(node); case IrOpcode::kF32x4Neg: return MarkAsSimd128(node), VisitF32x4Neg(node); + case IrOpcode::kF32x4Sqrt: + return MarkAsSimd128(node), VisitF32x4Sqrt(node); case IrOpcode::kF32x4RecipApprox: return MarkAsSimd128(node), VisitF32x4RecipApprox(node); case IrOpcode::kF32x4RecipSqrtApprox: @@ -1917,6 +1927,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4Lt(node); case IrOpcode::kF32x4Le: return MarkAsSimd128(node), VisitF32x4Le(node); + case IrOpcode::kF32x4Qfma: + return MarkAsSimd128(node), VisitF32x4Qfma(node); + case IrOpcode::kF32x4Qfms: + return MarkAsSimd128(node), VisitF32x4Qfms(node); case IrOpcode::kI64x2Splat: return MarkAsSimd128(node), VisitI64x2Splat(node); case IrOpcode::kI64x2ExtractLane: @@ -2137,6 +2151,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitS128Not(node); case IrOpcode::kS128Select: return MarkAsSimd128(node), VisitS128Select(node); + case IrOpcode::kS8x16Swizzle: + return MarkAsSimd128(node), VisitS8x16Swizzle(node); case IrOpcode::kS8x16Shuffle: return MarkAsSimd128(node), VisitS8x16Shuffle(node); case IrOpcode::kS1x2AnyTrue: @@ -2286,8 +2302,8 @@ void InstructionSelector::VisitFloat64Tanh(Node* node) { VisitFloat64Ieee754Unop(node, kIeee754Float64Tanh); } -void InstructionSelector::EmitTableSwitch(const SwitchInfo& sw, - InstructionOperand& index_operand) { +void InstructionSelector::EmitTableSwitch( + const SwitchInfo& sw, InstructionOperand const& index_operand) { OperandGenerator g(this); size_t input_count = 2 + sw.value_range(); DCHECK_LE(sw.value_range(), std::numeric_limits<size_t>::max() - 2); @@ -2304,8 +2320,8 @@ void InstructionSelector::EmitTableSwitch(const SwitchInfo& sw, Emit(kArchTableSwitch, 0, nullptr, input_count, inputs, 0, nullptr); } -void InstructionSelector::EmitLookupSwitch(const SwitchInfo& sw, - InstructionOperand& value_operand) { +void InstructionSelector::EmitLookupSwitch( + const SwitchInfo& sw, InstructionOperand const& value_operand) { OperandGenerator g(this); std::vector<CaseInfo> cases = sw.CasesSortedByOriginalOrder(); size_t input_count = 2 + sw.case_count() * 2; @@ -2322,7 +2338,7 @@ void InstructionSelector::EmitLookupSwitch(const SwitchInfo& sw, } void InstructionSelector::EmitBinarySearchSwitch( - const SwitchInfo& sw, InstructionOperand& value_operand) { + const SwitchInfo& sw, InstructionOperand const& value_operand) { OperandGenerator g(this); size_t input_count = 2 + sw.case_count() * 2; DCHECK_LE(sw.case_count(), (std::numeric_limits<size_t>::max() - 2) / 2); @@ -2607,21 +2623,25 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { #if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_ARM64 +#if !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } @@ -2630,6 +2650,7 @@ void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); } @@ -2639,8 +2660,11 @@ void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_ARM64 -void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); } diff --git a/deps/v8/src/compiler/backend/instruction-selector.h b/deps/v8/src/compiler/backend/instruction-selector.h index eb3e098427..e951c90f95 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.h +++ b/deps/v8/src/compiler/backend/instruction-selector.h @@ -502,15 +502,12 @@ class V8_EXPORT_PRIVATE InstructionSelector final { FeedbackSource const& feedback, Node* frame_state); - void EmitTableSwitch( - const SwitchInfo& sw, - InstructionOperand& index_operand); // NOLINT(runtime/references) - void EmitLookupSwitch( - const SwitchInfo& sw, - InstructionOperand& value_operand); // NOLINT(runtime/references) - void EmitBinarySearchSwitch( - const SwitchInfo& sw, - InstructionOperand& value_operand); // NOLINT(runtime/references) + void EmitTableSwitch(const SwitchInfo& sw, + InstructionOperand const& index_operand); + void EmitLookupSwitch(const SwitchInfo& sw, + InstructionOperand const& value_operand); + void EmitBinarySearchSwitch(const SwitchInfo& sw, + InstructionOperand const& value_operand); void TryRename(InstructionOperand* op); int GetRename(int virtual_register); diff --git a/deps/v8/src/compiler/backend/instruction.cc b/deps/v8/src/compiler/backend/instruction.cc index 06158b0c72..076f1b596e 100644 --- a/deps/v8/src/compiler/backend/instruction.cc +++ b/deps/v8/src/compiler/backend/instruction.cc @@ -168,7 +168,6 @@ std::ostream& operator<<(std::ostream& os, const InstructionOperand& op) { return os << "[immediate:" << imm.indexed_value() << "]"; } } - case InstructionOperand::EXPLICIT: case InstructionOperand::ALLOCATED: { LocationOperand allocated = LocationOperand::cast(op); if (op.IsStackSlot()) { @@ -192,9 +191,6 @@ std::ostream& operator<<(std::ostream& os, const InstructionOperand& op) { os << "[" << Simd128Register::from_code(allocated.register_code()) << "|R"; } - if (allocated.IsExplicit()) { - os << "|E"; - } switch (allocated.representation()) { case MachineRepresentation::kNone: os << "|-"; @@ -294,17 +290,6 @@ void ParallelMove::PrepareInsertAfter( if (replacement != nullptr) move->set_source(replacement->source()); } -ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep, - int index) - : LocationOperand(EXPLICIT, kind, rep, index) { - DCHECK_IMPLIES(kind == REGISTER && !IsFloatingPoint(rep), - GetRegConfig()->IsAllocatableGeneralCode(index)); - DCHECK_IMPLIES(kind == REGISTER && rep == MachineRepresentation::kFloat32, - GetRegConfig()->IsAllocatableFloatCode(index)); - DCHECK_IMPLIES(kind == REGISTER && (rep == MachineRepresentation::kFloat64), - GetRegConfig()->IsAllocatableDoubleCode(index)); -} - Instruction::Instruction(InstructionCode opcode) : opcode_(opcode), bit_field_(OutputCountField::encode(0) | InputCountField::encode(0) | diff --git a/deps/v8/src/compiler/backend/instruction.h b/deps/v8/src/compiler/backend/instruction.h index f5f7f64c51..321f069531 100644 --- a/deps/v8/src/compiler/backend/instruction.h +++ b/deps/v8/src/compiler/backend/instruction.h @@ -43,9 +43,8 @@ class V8_EXPORT_PRIVATE InstructionOperand { CONSTANT, IMMEDIATE, // Location operand kinds. - EXPLICIT, ALLOCATED, - FIRST_LOCATION_OPERAND_KIND = EXPLICIT + FIRST_LOCATION_OPERAND_KIND = ALLOCATED // Location operand kinds must be last. }; @@ -68,11 +67,6 @@ class V8_EXPORT_PRIVATE InstructionOperand { // embedded directly in instructions, e.g. small integers and on some // platforms Objects. INSTRUCTION_OPERAND_PREDICATE(Immediate, IMMEDIATE) - // ExplicitOperands do not participate in register allocation. They are - // created by the instruction selector for direct access to registers and - // stack slots, completely bypassing the register allocator. They are never - // associated with a virtual register - INSTRUCTION_OPERAND_PREDICATE(Explicit, EXPLICIT) // AllocatedOperands are registers or stack slots that are assigned by the // register allocator and are always associated with a virtual register. INSTRUCTION_OPERAND_PREDICATE(Allocated, ALLOCATED) @@ -515,19 +509,6 @@ class LocationOperand : public InstructionOperand { using IndexField = BitField64<int32_t, 35, 29>; }; -class V8_EXPORT_PRIVATE ExplicitOperand - : public NON_EXPORTED_BASE(LocationOperand) { - public: - ExplicitOperand(LocationKind kind, MachineRepresentation rep, int index); - - static ExplicitOperand* New(Zone* zone, LocationKind kind, - MachineRepresentation rep, int index) { - return InstructionOperand::New(zone, ExplicitOperand(kind, rep, index)); - } - - INSTRUCTION_OPERAND_CASTS(ExplicitOperand, EXPLICIT) -}; - class AllocatedOperand : public LocationOperand { public: AllocatedOperand(LocationKind kind, MachineRepresentation rep, int index) @@ -643,7 +624,7 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const { } return InstructionOperand::KindField::update( LocationOperand::RepresentationField::update(this->value_, canonical), - LocationOperand::EXPLICIT); + LocationOperand::ALLOCATED); } return this->value_; } @@ -776,11 +757,11 @@ class V8_EXPORT_PRIVATE Instruction final { public: size_t OutputCount() const { return OutputCountField::decode(bit_field_); } const InstructionOperand* OutputAt(size_t i) const { - DCHECK(i < OutputCount()); + DCHECK_LT(i, OutputCount()); return &operands_[i]; } InstructionOperand* OutputAt(size_t i) { - DCHECK(i < OutputCount()); + DCHECK_LT(i, OutputCount()); return &operands_[i]; } @@ -790,21 +771,21 @@ class V8_EXPORT_PRIVATE Instruction final { size_t InputCount() const { return InputCountField::decode(bit_field_); } const InstructionOperand* InputAt(size_t i) const { - DCHECK(i < InputCount()); + DCHECK_LT(i, InputCount()); return &operands_[OutputCount() + i]; } InstructionOperand* InputAt(size_t i) { - DCHECK(i < InputCount()); + DCHECK_LT(i, InputCount()); return &operands_[OutputCount() + i]; } size_t TempCount() const { return TempCountField::decode(bit_field_); } const InstructionOperand* TempAt(size_t i) const { - DCHECK(i < TempCount()); + DCHECK_LT(i, TempCount()); return &operands_[OutputCount() + InputCount() + i]; } InstructionOperand* TempAt(size_t i) { - DCHECK(i < TempCount()); + DCHECK_LT(i, TempCount()); return &operands_[OutputCount() + InputCount() + i]; } diff --git a/deps/v8/src/compiler/backend/jump-threading.cc b/deps/v8/src/compiler/backend/jump-threading.cc index dfb917a58c..ee195bf51e 100644 --- a/deps/v8/src/compiler/backend/jump-threading.cc +++ b/deps/v8/src/compiler/backend/jump-threading.cc @@ -69,11 +69,11 @@ bool IsBlockWithBranchPoisoning(InstructionSequence* code, } // namespace bool JumpThreading::ComputeForwarding(Zone* local_zone, - ZoneVector<RpoNumber>& result, + ZoneVector<RpoNumber>* result, InstructionSequence* code, bool frame_at_start) { ZoneStack<RpoNumber> stack(local_zone); - JumpThreadingState state = {false, result, stack}; + JumpThreadingState state = {false, *result, stack}; state.Clear(code->InstructionBlockCount()); // Iterate over the blocks forward, pushing the blocks onto the stack. @@ -135,15 +135,15 @@ bool JumpThreading::ComputeForwarding(Zone* local_zone, } #ifdef DEBUG - for (RpoNumber num : result) { + for (RpoNumber num : *result) { DCHECK(num.IsValid()); } #endif if (FLAG_trace_turbo_jt) { - for (int i = 0; i < static_cast<int>(result.size()); i++) { + for (int i = 0; i < static_cast<int>(result->size()); i++) { TRACE("B%d ", i); - int to = result[i].ToInt(); + int to = (*result)[i].ToInt(); if (i != to) { TRACE("-> B%d\n", to); } else { @@ -156,7 +156,7 @@ bool JumpThreading::ComputeForwarding(Zone* local_zone, } void JumpThreading::ApplyForwarding(Zone* local_zone, - ZoneVector<RpoNumber>& result, + ZoneVector<RpoNumber> const& result, InstructionSequence* code) { if (!FLAG_turbo_jt) return; diff --git a/deps/v8/src/compiler/backend/jump-threading.h b/deps/v8/src/compiler/backend/jump-threading.h index ce60ebcb2e..ce9e394924 100644 --- a/deps/v8/src/compiler/backend/jump-threading.h +++ b/deps/v8/src/compiler/backend/jump-threading.h @@ -17,17 +17,14 @@ class V8_EXPORT_PRIVATE JumpThreading { public: // Compute the forwarding map of basic blocks to their ultimate destination. // Returns {true} if there is at least one block that is forwarded. - static bool ComputeForwarding( - Zone* local_zone, - ZoneVector<RpoNumber>& result, // NOLINT(runtime/references) - InstructionSequence* code, bool frame_at_start); + static bool ComputeForwarding(Zone* local_zone, ZoneVector<RpoNumber>* result, + InstructionSequence* code, bool frame_at_start); // Rewrite the instructions to forward jumps and branches. // May also negate some branches. - static void ApplyForwarding( - Zone* local_zone, - ZoneVector<RpoNumber>& forwarding, // NOLINT(runtime/references) - InstructionSequence* code); + static void ApplyForwarding(Zone* local_zone, + ZoneVector<RpoNumber> const& forwarding, + InstructionSequence* code); }; } // namespace compiler diff --git a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc index 239075392a..ee23402e69 100644 --- a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc +++ b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc @@ -265,34 +265,33 @@ Condition FlagsConditionToConditionTst(FlagsCondition condition) { UNREACHABLE(); } -FPUCondition FlagsConditionToConditionCmpFPU( - bool& predicate, // NOLINT(runtime/references) - FlagsCondition condition) { +FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate, + FlagsCondition condition) { switch (condition) { case kEqual: - predicate = true; + *predicate = true; return EQ; case kNotEqual: - predicate = false; + *predicate = false; return EQ; case kUnsignedLessThan: - predicate = true; + *predicate = true; return OLT; case kUnsignedGreaterThanOrEqual: - predicate = false; + *predicate = false; return OLT; case kUnsignedLessThanOrEqual: - predicate = true; + *predicate = true; return OLE; case kUnsignedGreaterThan: - predicate = false; + *predicate = false; return OLE; case kUnorderedEqual: case kUnorderedNotEqual: - predicate = true; + *predicate = true; break; default: - predicate = true; + *predicate = true; break; } UNREACHABLE(); @@ -303,9 +302,9 @@ FPUCondition FlagsConditionToConditionCmpFPU( << "\""; \ UNIMPLEMENTED(); -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - MipsOperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + MipsOperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -780,12 +779,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArchCallCFunction: { int const num_parameters = MiscField::decode(instr->opcode()); - Label return_location; - if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { + Label start_call; + bool isWasmCapiFunction = + linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); + // from start_call to return address. + int offset = 40; +#if V8_HOST_ARCH_MIPS + if (__ emit_debug_code()) { + offset += 16; + } +#endif + if (isWasmCapiFunction) { // Put the return address in a stack slot. - __ LoadAddress(kScratchReg, &return_location); - __ sw(kScratchReg, - MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); + __ mov(kScratchReg, ra); + __ bind(&start_call); + __ nal(); + __ nop(); + __ Addu(ra, ra, offset - 8); // 8 = nop + nal + __ sw(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); + __ mov(ra, kScratchReg); } if (instr->InputAt(0)->IsImmediate()) { ExternalReference ref = i.InputExternalReference(0); @@ -794,7 +806,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register func = i.InputRegister(0); __ CallCFunction(func, num_parameters); } - __ bind(&return_location); + if (isWasmCapiFunction) { + CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call)); + } + RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); frame_access_state()->SetFrameAccessToDefault(); // Ideally, we should decrement SP delta to match the change of stack @@ -1179,7 +1194,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( FPURegister right = i.InputOrZeroSingleRegister(1); bool predicate; FPUCondition cc = - FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition()); + FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); if ((left == kDoubleRegZero || right == kDoubleRegZero) && !__ IsDoubleZeroRegSet()) { @@ -1239,7 +1254,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( FPURegister right = i.InputOrZeroDoubleRegister(1); bool predicate; FPUCondition cc = - FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition()); + FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); if ((left == kDoubleRegZero || right == kDoubleRegZero) && !__ IsDoubleZeroRegSet()) { __ Move(kDoubleRegZero, 0.0); @@ -2038,6 +2053,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31); break; } + case kMipsF32x4Sqrt: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kMipsF32x4RecipApprox: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); @@ -3026,7 +3046,7 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm, } else if (instr->arch_opcode() == kMipsCmpS || instr->arch_opcode() == kMipsCmpD) { bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (predicate) { __ BranchTrueF(tlabel); } else { @@ -3116,7 +3136,7 @@ void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, case kMipsCmpS: case kMipsCmpD: { bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (predicate) { __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister); } else { @@ -3314,7 +3334,7 @@ void CodeGenerator::AssembleArchBoolean(Instruction* instr, __ Move(kDoubleRegZero, 0.0); } bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (!IsMipsArchVariant(kMips32r6)) { __ li(result, Operand(1)); if (predicate) { diff --git a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h index e8020d9e89..af0774f468 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h +++ b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h @@ -159,6 +159,7 @@ namespace compiler { V(MipsI32x4MinU) \ V(MipsF32x4Abs) \ V(MipsF32x4Neg) \ + V(MipsF32x4Sqrt) \ V(MipsF32x4RecipApprox) \ V(MipsF32x4RecipSqrtApprox) \ V(MipsF32x4Add) \ diff --git a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc index 4e6aef52f4..ba17ad2581 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc @@ -54,6 +54,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsF32x4Div: case kMipsF32x4Ne: case kMipsF32x4Neg: + case kMipsF32x4Sqrt: case kMipsF32x4RecipApprox: case kMipsF32x4RecipSqrtApprox: case kMipsF32x4ReplaceLane: diff --git a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc index bb47262c6c..7ee5c7c2c7 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" #include "src/base/bits.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" @@ -781,6 +780,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitWord32Ctz(Node* node) { MipsOperandGenerator g(this); Emit(kMipsCtz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); @@ -2015,6 +2018,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \ V(F32x4Abs, kMipsF32x4Abs) \ V(F32x4Neg, kMipsF32x4Neg) \ + V(F32x4Sqrt, kMipsF32x4Sqrt) \ V(F32x4RecipApprox, kMipsF32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \ V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \ diff --git a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc index 5682bed71a..9cec463e87 100644 --- a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc @@ -278,42 +278,41 @@ Condition FlagsConditionToConditionOvf(FlagsCondition condition) { UNREACHABLE(); } -FPUCondition FlagsConditionToConditionCmpFPU( - bool& predicate, // NOLINT(runtime/references) - FlagsCondition condition) { +FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate, + FlagsCondition condition) { switch (condition) { case kEqual: - predicate = true; + *predicate = true; return EQ; case kNotEqual: - predicate = false; + *predicate = false; return EQ; case kUnsignedLessThan: - predicate = true; + *predicate = true; return OLT; case kUnsignedGreaterThanOrEqual: - predicate = false; + *predicate = false; return OLT; case kUnsignedLessThanOrEqual: - predicate = true; + *predicate = true; return OLE; case kUnsignedGreaterThan: - predicate = false; + *predicate = false; return OLE; case kUnorderedEqual: case kUnorderedNotEqual: - predicate = true; + *predicate = true; break; default: - predicate = true; + *predicate = true; break; } UNREACHABLE(); } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - MipsOperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + MipsOperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -758,12 +757,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArchCallCFunction: { int const num_parameters = MiscField::decode(instr->opcode()); - Label return_location; - if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { + Label start_call; + bool isWasmCapiFunction = + linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); + // from start_call to return address. + int offset = 48; +#if V8_HOST_ARCH_MIPS64 + if (__ emit_debug_code()) { + offset += 16; + } +#endif + if (isWasmCapiFunction) { // Put the return address in a stack slot. - __ LoadAddress(kScratchReg, &return_location); - __ sd(kScratchReg, - MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); + __ mov(kScratchReg, ra); + __ bind(&start_call); + __ nal(); + __ nop(); + __ Daddu(ra, ra, offset - 8); // 8 = nop + nal + __ sd(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); + __ mov(ra, kScratchReg); } if (instr->InputAt(0)->IsImmediate()) { ExternalReference ref = i.InputExternalReference(0); @@ -772,7 +784,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register func = i.InputRegister(0); __ CallCFunction(func, num_parameters); } - __ bind(&return_location); + if (isWasmCapiFunction) { + CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call)); + } + RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); frame_access_state()->SetFrameAccessToDefault(); // Ideally, we should decrement SP delta to match the change of stack @@ -1276,7 +1291,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( FPURegister right = i.InputOrZeroSingleRegister(1); bool predicate; FPUCondition cc = - FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition()); + FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); if ((left == kDoubleRegZero || right == kDoubleRegZero) && !__ IsDoubleZeroRegSet()) { @@ -1339,7 +1354,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( FPURegister right = i.InputOrZeroDoubleRegister(1); bool predicate; FPUCondition cc = - FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition()); + FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); if ((left == kDoubleRegZero || right == kDoubleRegZero) && !__ IsDoubleZeroRegSet()) { __ Move(kDoubleRegZero, 0.0); @@ -2233,6 +2248,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } + case kMips64F32x4Sqrt: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kMips64I32x4Neg: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); @@ -3151,7 +3171,7 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm, } else if (instr->arch_opcode() == kMips64CmpS || instr->arch_opcode() == kMips64CmpD) { bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (predicate) { __ BranchTrueF(tlabel); } else { @@ -3261,7 +3281,7 @@ void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, case kMips64CmpS: case kMips64CmpD: { bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (predicate) { __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister); } else { @@ -3470,7 +3490,7 @@ void CodeGenerator::AssembleArchBoolean(Instruction* instr, __ Move(kDoubleRegZero, 0.0); } bool predicate; - FlagsConditionToConditionCmpFPU(predicate, condition); + FlagsConditionToConditionCmpFPU(&predicate, condition); if (kArchVariant != kMips64r6) { __ li(result, Operand(1)); if (predicate) { diff --git a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h index edc8924757..bcf3532b57 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h +++ b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h @@ -189,6 +189,7 @@ namespace compiler { V(Mips64I32x4MinU) \ V(Mips64F32x4Abs) \ V(Mips64F32x4Neg) \ + V(Mips64F32x4Sqrt) \ V(Mips64F32x4RecipApprox) \ V(Mips64F32x4RecipSqrtApprox) \ V(Mips64F32x4Add) \ diff --git a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc index 880b424c41..fe2d33d1db 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc @@ -82,6 +82,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64F32x4Div: case kMips64F32x4Ne: case kMips64F32x4Neg: + case kMips64F32x4Sqrt: case kMips64F32x4RecipApprox: case kMips64F32x4RecipSqrtApprox: case kMips64F32x4ReplaceLane: diff --git a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc index 9c717ab1e9..dfc0ff5bad 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" #include "src/base/bits.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" @@ -823,6 +822,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitWord32Ctz(Node* node) { Mips64OperandGenerator g(this); Emit(kMips64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); @@ -2678,6 +2681,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4) \ V(F32x4Abs, kMips64F32x4Abs) \ V(F32x4Neg, kMips64F32x4Neg) \ + V(F32x4Sqrt, kMips64F32x4Sqrt) \ V(F32x4RecipApprox, kMips64F32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox) \ V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4) \ diff --git a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc index 5c69bc34a1..dde1804adb 100644 --- a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc @@ -263,9 +263,8 @@ Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) { UNREACHABLE(); } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, Instruction* instr, - PPCOperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr, + PPCOperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode())); if (access_mode == kMemoryAccessPoisoned) { @@ -1024,7 +1023,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Label start_call; bool isWasmCapiFunction = linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); +#if defined(_AIX) + // AIX/PPC64BE Linux uses a function descriptor + // and emits 2 extra Load instrcutions under CallCFunctionHelper. + constexpr int offset = 11 * kInstrSize; +#else constexpr int offset = 9 * kInstrSize; +#endif if (isWasmCapiFunction) { __ mflr(r0); __ bind(&start_call); @@ -1043,9 +1048,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } // TODO(miladfar): In the above block, kScratchReg must be populated with // the strictly-correct PC, which is the return address at this spot. The - // offset is set to 36 (9 * kInstrSize) right now, which is counted from - // where we are binding to the label and ends at this spot. If failed, - // replace it with the correct offset suggested. More info on f5ab7d3. + // offset is set to 36 (9 * kInstrSize) on pLinux and 44 on AIX, which is + // counted from where we are binding to the label and ends at this spot. + // If failed, replace it with the correct offset suggested. More info on + // f5ab7d3. if (isWasmCapiFunction) CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call)); diff --git a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc index ef8490a726..2ffd6495d7 100644 --- a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -926,6 +926,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + // TODO(miladfar): Implement the ppc selector for reversing SIMD bytes. + // Check if the input node is a Load and do a Load Reverse at once. + UNIMPLEMENTED(); +} + void InstructionSelector::VisitInt32Add(Node* node) { VisitBinop<Int32BinopMatcher>(this, node, kPPC_Add32, kInt16Imm); } @@ -2283,6 +2289,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); } + void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } diff --git a/deps/v8/src/compiler/backend/register-allocator-verifier.cc b/deps/v8/src/compiler/backend/register-allocator-verifier.cc index 53349c9c2b..17e0b8ca75 100644 --- a/deps/v8/src/compiler/backend/register-allocator-verifier.cc +++ b/deps/v8/src/compiler/backend/register-allocator-verifier.cc @@ -92,7 +92,7 @@ RegisterAllocatorVerifier::RegisterAllocatorVerifier( void RegisterAllocatorVerifier::VerifyInput( const OperandConstraint& constraint) { CHECK_NE(kSameAsFirst, constraint.type_); - if (constraint.type_ != kImmediate && constraint.type_ != kExplicit) { + if (constraint.type_ != kImmediate) { CHECK_NE(InstructionOperand::kInvalidVirtualRegister, constraint.virtual_register_); } @@ -102,14 +102,12 @@ void RegisterAllocatorVerifier::VerifyTemp( const OperandConstraint& constraint) { CHECK_NE(kSameAsFirst, constraint.type_); CHECK_NE(kImmediate, constraint.type_); - CHECK_NE(kExplicit, constraint.type_); CHECK_NE(kConstant, constraint.type_); } void RegisterAllocatorVerifier::VerifyOutput( const OperandConstraint& constraint) { CHECK_NE(kImmediate, constraint.type_); - CHECK_NE(kExplicit, constraint.type_); CHECK_NE(InstructionOperand::kInvalidVirtualRegister, constraint.virtual_register_); } @@ -149,8 +147,6 @@ void RegisterAllocatorVerifier::BuildConstraint(const InstructionOperand* op, constraint->type_ = kConstant; constraint->value_ = ConstantOperand::cast(op)->virtual_register(); constraint->virtual_register_ = constraint->value_; - } else if (op->IsExplicit()) { - constraint->type_ = kExplicit; } else if (op->IsImmediate()) { const ImmediateOperand* imm = ImmediateOperand::cast(op); int value = imm->type() == ImmediateOperand::INLINE ? imm->inline_value() @@ -235,9 +231,6 @@ void RegisterAllocatorVerifier::CheckConstraint( case kFPRegister: CHECK_WITH_MSG(op->IsFPRegister(), caller_info_); return; - case kExplicit: - CHECK_WITH_MSG(op->IsExplicit(), caller_info_); - return; case kFixedRegister: case kRegisterAndSlot: CHECK_WITH_MSG(op->IsRegister(), caller_info_); @@ -503,8 +496,7 @@ void RegisterAllocatorVerifier::VerifyGapMoves() { instr_constraint.operand_constraints_; size_t count = 0; for (size_t i = 0; i < instr->InputCount(); ++i, ++count) { - if (op_constraints[count].type_ == kImmediate || - op_constraints[count].type_ == kExplicit) { + if (op_constraints[count].type_ == kImmediate) { continue; } int virtual_register = op_constraints[count].virtual_register_; diff --git a/deps/v8/src/compiler/backend/register-allocator-verifier.h b/deps/v8/src/compiler/backend/register-allocator-verifier.h index 68e69c0d16..7110c2eb42 100644 --- a/deps/v8/src/compiler/backend/register-allocator-verifier.h +++ b/deps/v8/src/compiler/backend/register-allocator-verifier.h @@ -188,7 +188,6 @@ class RegisterAllocatorVerifier final : public ZoneObject { kRegisterOrSlot, kRegisterOrSlotFP, kRegisterOrSlotOrConstant, - kExplicit, kSameAsFirst, kRegisterAndSlot }; diff --git a/deps/v8/src/compiler/backend/register-allocator.cc b/deps/v8/src/compiler/backend/register-allocator.cc index 21eef0485c..945554eb32 100644 --- a/deps/v8/src/compiler/backend/register-allocator.cc +++ b/deps/v8/src/compiler/backend/register-allocator.cc @@ -6,7 +6,7 @@ #include <iomanip> -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/base/small-vector.h" #include "src/codegen/assembler-inl.h" #include "src/codegen/tick-counter.h" @@ -317,7 +317,6 @@ UsePositionHintType UsePosition::HintTypeForOperand( switch (op.kind()) { case InstructionOperand::CONSTANT: case InstructionOperand::IMMEDIATE: - case InstructionOperand::EXPLICIT: return UsePositionHintType::kNone; case InstructionOperand::UNALLOCATED: return UsePositionHintType::kUnresolved; @@ -797,12 +796,13 @@ LifetimePosition LiveRange::NextEndAfter(LifetimePosition position) const { return start_search->end(); } -LifetimePosition LiveRange::NextStartAfter(LifetimePosition position) const { +LifetimePosition LiveRange::NextStartAfter(LifetimePosition position) { UseInterval* start_search = FirstSearchIntervalForPosition(position); while (start_search->start() < position) { start_search = start_search->next(); } - return start_search->start(); + next_start_ = start_search->start(); + return next_start_; } LifetimePosition LiveRange::FirstIntersection(LiveRange* other) const { @@ -1940,8 +1940,8 @@ void ConstraintBuilder::MeetConstraintsBefore(int instr_index) { // Handle fixed input operands of second instruction. for (size_t i = 0; i < second->InputCount(); i++) { InstructionOperand* input = second->InputAt(i); - if (input->IsImmediate() || input->IsExplicit()) { - continue; // Ignore immediates and explicitly reserved registers. + if (input->IsImmediate()) { + continue; // Ignore immediates. } UnallocatedOperand* cur_input = UnallocatedOperand::cast(input); if (cur_input->HasFixedPolicy()) { @@ -2323,8 +2323,8 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block, for (size_t i = 0; i < instr->InputCount(); i++) { InstructionOperand* input = instr->InputAt(i); - if (input->IsImmediate() || input->IsExplicit()) { - continue; // Ignore immediates and explicitly reserved registers. + if (input->IsImmediate()) { + continue; // Ignore immediates. } LifetimePosition use_pos; if (input->IsUnallocated() && @@ -2504,10 +2504,10 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block, predecessor_hint_preference |= kNotDeferredBlockPreference; } - // - Prefer hints from allocated (or explicit) operands. + // - Prefer hints from allocated operands. // - // Already-allocated or explicit operands are typically assigned using - // the parallel moves on the last instruction. For example: + // Already-allocated operands are typically assigned using the parallel + // moves on the last instruction. For example: // // gap (v101 = [x0|R|w32]) (v100 = v101) // ArchJmp @@ -2515,7 +2515,7 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block, // phi: v100 = v101 v102 // // We have already found the END move, so look for a matching START move - // from an allocated (or explicit) operand. + // from an allocated operand. // // Note that we cannot simply look up data()->live_ranges()[vreg] here // because the live ranges are still being built when this function is @@ -2527,7 +2527,7 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block, for (MoveOperands* move : *moves) { InstructionOperand& to = move->destination(); if (predecessor_hint->Equals(to)) { - if (move->source().IsAllocated() || move->source().IsExplicit()) { + if (move->source().IsAllocated()) { predecessor_hint_preference |= kMoveIsAllocatedPreference; } break; @@ -3095,11 +3095,11 @@ LinearScanAllocator::LinearScanAllocator(RegisterAllocationData* data, : RegisterAllocator(data, kind), unhandled_live_ranges_(local_zone), active_live_ranges_(local_zone), - inactive_live_ranges_(local_zone), + inactive_live_ranges_(num_registers(), InactiveLiveRangeQueue(local_zone), + local_zone), next_active_ranges_change_(LifetimePosition::Invalid()), next_inactive_ranges_change_(LifetimePosition::Invalid()) { active_live_ranges().reserve(8); - inactive_live_ranges().reserve(8); } void LinearScanAllocator::MaybeSpillPreviousRanges(LiveRange* begin_range, @@ -3143,15 +3143,15 @@ void LinearScanAllocator::MaybeUndoPreviousSplit(LiveRange* range) { } } -void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet& to_be_live, +void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet* to_be_live, LifetimePosition position, SpillMode spill_mode) { for (auto it = active_live_ranges().begin(); it != active_live_ranges().end();) { LiveRange* active_range = *it; TopLevelLiveRange* toplevel = (*it)->TopLevel(); - auto found = to_be_live.find({toplevel, kUnassignedRegister}); - if (found == to_be_live.end()) { + auto found = to_be_live->find({toplevel, kUnassignedRegister}); + if (found == to_be_live->end()) { // Is not contained in {to_be_live}, spill it. // Fixed registers are exempt from this. They might have been // added from inactive at the block boundary but we know that @@ -3207,7 +3207,7 @@ void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet& to_be_live, } else { // This range is contained in {to_be_live}, so we can keep it. int expected_register = (*found).expected_register; - to_be_live.erase(found); + to_be_live->erase(found); if (expected_register == active_range->assigned_register()) { // Was life and in correct register, simply pass through. TRACE("Keeping %d:%d in %s\n", toplevel->vreg(), @@ -3238,31 +3238,22 @@ LiveRange* LinearScanAllocator::AssignRegisterOnReload(LiveRange* range, // give reloading registers pecedence. That way we would compute the // intersection for the entire future. LifetimePosition new_end = range->End(); - for (const auto inactive : inactive_live_ranges()) { - if (kSimpleFPAliasing || !check_fp_aliasing()) { - if (inactive->assigned_register() != reg) continue; - } else { - bool conflict = inactive->assigned_register() == reg; - if (!conflict) { - int alias_base_index = -1; - int aliases = data()->config()->GetAliases(range->representation(), reg, - inactive->representation(), - &alias_base_index); - DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); - while (aliases-- && !conflict) { - int aliased_reg = alias_base_index + aliases; - if (aliased_reg == reg) { - conflict = true; - } - } - } - if (!conflict) continue; + for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) { + if ((kSimpleFPAliasing || !check_fp_aliasing()) && cur_reg != reg) { + continue; } - for (auto interval = inactive->first_interval(); interval != nullptr; - interval = interval->next()) { - if (interval->start() > new_end) break; - if (interval->end() <= range->Start()) continue; - if (new_end > interval->start()) new_end = interval->start(); + for (const auto cur_inactive : inactive_live_ranges(cur_reg)) { + if (!kSimpleFPAliasing && check_fp_aliasing() && + !data()->config()->AreAliases(cur_inactive->representation(), cur_reg, + range->representation(), reg)) { + continue; + } + for (auto interval = cur_inactive->first_interval(); interval != nullptr; + interval = interval->next()) { + if (interval->start() > new_end) break; + if (interval->end() <= range->Start()) continue; + if (new_end > interval->start()) new_end = interval->start(); + } } } if (new_end != range->End()) { @@ -3275,8 +3266,8 @@ LiveRange* LinearScanAllocator::AssignRegisterOnReload(LiveRange* range, return range; } -void LinearScanAllocator::ReloadLiveRanges(RangeWithRegisterSet& to_be_live, - LifetimePosition position) { +void LinearScanAllocator::ReloadLiveRanges( + RangeWithRegisterSet const& to_be_live, LifetimePosition position) { // Assumption: All ranges in {to_be_live} are currently spilled and there are // no conflicting registers in the active ranges. // The former is ensured by SpillNotLiveRanges, the latter is by construction @@ -3558,11 +3549,17 @@ void LinearScanAllocator::UpdateDeferredFixedRanges(SpillMode spill_mode, Min(updated->End(), next_active_ranges_change_); }); } - for (auto inactive : inactive_live_ranges()) { - split_conflicting(range, inactive, [this](LiveRange* updated) { - next_inactive_ranges_change_ = - Min(updated->End(), next_inactive_ranges_change_); - }); + for (int reg = 0; reg < num_registers(); ++reg) { + if ((kSimpleFPAliasing || !check_fp_aliasing()) && + reg != range->assigned_register()) { + continue; + } + for (auto inactive : inactive_live_ranges(reg)) { + split_conflicting(range, inactive, [this](LiveRange* updated) { + next_inactive_ranges_change_ = + Min(updated->End(), next_inactive_ranges_change_); + }); + } } }; if (mode() == GENERAL_REGISTERS) { @@ -3600,12 +3597,14 @@ void LinearScanAllocator::UpdateDeferredFixedRanges(SpillMode spill_mode, } } else { // Remove all ranges. - for (auto it = inactive_live_ranges().begin(); - it != inactive_live_ranges().end();) { - if ((*it)->TopLevel()->IsDeferredFixed()) { - it = inactive_live_ranges().erase(it); - } else { - ++it; + for (int reg = 0; reg < num_registers(); ++reg) { + for (auto it = inactive_live_ranges(reg).begin(); + it != inactive_live_ranges(reg).end();) { + if ((*it)->TopLevel()->IsDeferredFixed()) { + it = inactive_live_ranges(reg).erase(it); + } else { + ++it; + } } } } @@ -3636,7 +3635,9 @@ bool LinearScanAllocator::HasNonDeferredPredecessor(InstructionBlock* block) { void LinearScanAllocator::AllocateRegisters() { DCHECK(unhandled_live_ranges().empty()); DCHECK(active_live_ranges().empty()); - DCHECK(inactive_live_ranges().empty()); + for (int reg = 0; reg < num_registers(); ++reg) { + DCHECK(inactive_live_ranges(reg).empty()); + } SplitAndSpillRangesDefinedByMemoryOperand(); data()->ResetSpillState(); @@ -3853,7 +3854,7 @@ void LinearScanAllocator::AllocateRegisters() { } if (!no_change_required) { - SpillNotLiveRanges(to_be_live, next_block_boundary, spill_mode); + SpillNotLiveRanges(&to_be_live, next_block_boundary, spill_mode); ReloadLiveRanges(to_be_live, next_block_boundary); } @@ -3941,9 +3942,10 @@ void LinearScanAllocator::AddToActive(LiveRange* range) { void LinearScanAllocator::AddToInactive(LiveRange* range) { TRACE("Add live range %d:%d to inactive\n", range->TopLevel()->vreg(), range->relative_id()); - inactive_live_ranges().push_back(range); next_inactive_ranges_change_ = std::min( next_inactive_ranges_change_, range->NextStartAfter(range->Start())); + DCHECK(range->HasRegisterAssigned()); + inactive_live_ranges(range->assigned_register()).insert(range); } void LinearScanAllocator::AddToUnhandled(LiveRange* range) { @@ -3966,30 +3968,36 @@ ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToHandled( ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToInactive( const ZoneVector<LiveRange*>::iterator it, LifetimePosition position) { LiveRange* range = *it; - inactive_live_ranges().push_back(range); TRACE("Moving live range %d:%d from active to inactive\n", (range)->TopLevel()->vreg(), range->relative_id()); + LifetimePosition next_active = range->NextStartAfter(position); next_inactive_ranges_change_ = - std::min(next_inactive_ranges_change_, range->NextStartAfter(position)); + std::min(next_inactive_ranges_change_, next_active); + DCHECK(range->HasRegisterAssigned()); + inactive_live_ranges(range->assigned_register()).insert(range); return active_live_ranges().erase(it); } -ZoneVector<LiveRange*>::iterator LinearScanAllocator::InactiveToHandled( - ZoneVector<LiveRange*>::iterator it) { +LinearScanAllocator::InactiveLiveRangeQueue::iterator +LinearScanAllocator::InactiveToHandled(InactiveLiveRangeQueue::iterator it) { + LiveRange* range = *it; TRACE("Moving live range %d:%d from inactive to handled\n", - (*it)->TopLevel()->vreg(), (*it)->relative_id()); - return inactive_live_ranges().erase(it); + range->TopLevel()->vreg(), range->relative_id()); + int reg = range->assigned_register(); + return inactive_live_ranges(reg).erase(it); } -ZoneVector<LiveRange*>::iterator LinearScanAllocator::InactiveToActive( - ZoneVector<LiveRange*>::iterator it, LifetimePosition position) { +LinearScanAllocator::InactiveLiveRangeQueue::iterator +LinearScanAllocator::InactiveToActive(InactiveLiveRangeQueue::iterator it, + LifetimePosition position) { LiveRange* range = *it; active_live_ranges().push_back(range); TRACE("Moving live range %d:%d from inactive to active\n", range->TopLevel()->vreg(), range->relative_id()); next_active_ranges_change_ = std::min(next_active_ranges_change_, range->NextEndAfter(position)); - return inactive_live_ranges().erase(it); + int reg = range->assigned_register(); + return inactive_live_ranges(reg).erase(it); } void LinearScanAllocator::ForwardStateTo(LifetimePosition position) { @@ -4012,18 +4020,25 @@ void LinearScanAllocator::ForwardStateTo(LifetimePosition position) { if (position >= next_inactive_ranges_change_) { next_inactive_ranges_change_ = LifetimePosition::MaxPosition(); - for (auto it = inactive_live_ranges().begin(); - it != inactive_live_ranges().end();) { - LiveRange* cur_inactive = *it; - if (cur_inactive->End() <= position) { - it = InactiveToHandled(it); - } else if (cur_inactive->Covers(position)) { - it = InactiveToActive(it, position); - } else { - next_inactive_ranges_change_ = - std::min(next_inactive_ranges_change_, - cur_inactive->NextStartAfter(position)); - ++it; + for (int reg = 0; reg < num_registers(); ++reg) { + ZoneVector<LiveRange*> reorder(data()->allocation_zone()); + for (auto it = inactive_live_ranges(reg).begin(); + it != inactive_live_ranges(reg).end();) { + LiveRange* cur_inactive = *it; + if (cur_inactive->End() <= position) { + it = InactiveToHandled(it); + } else if (cur_inactive->Covers(position)) { + it = InactiveToActive(it, position); + } else { + next_inactive_ranges_change_ = + std::min(next_inactive_ranges_change_, + cur_inactive->NextStartAfter(position)); + it = inactive_live_ranges(reg).erase(it); + reorder.push_back(cur_inactive); + } + } + for (LiveRange* range : reorder) { + inactive_live_ranges(reg).insert(range); } } } @@ -4094,31 +4109,34 @@ void LinearScanAllocator::FindFreeRegistersForRange( } } - for (LiveRange* cur_inactive : inactive_live_ranges()) { - DCHECK(cur_inactive->End() > range->Start()); - int cur_reg = cur_inactive->assigned_register(); - // No need to carry out intersections, when this register won't be - // interesting to this range anyway. - // TODO(mtrofin): extend to aliased ranges, too. - if ((kSimpleFPAliasing || !check_fp_aliasing()) && - positions[cur_reg] < range->Start()) { - continue; - } - - LifetimePosition next_intersection = cur_inactive->FirstIntersection(range); - if (!next_intersection.IsValid()) continue; - if (kSimpleFPAliasing || !check_fp_aliasing()) { - positions[cur_reg] = Min(positions[cur_reg], next_intersection); - TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg), - Min(positions[cur_reg], next_intersection).value()); - } else { - int alias_base_index = -1; - int aliases = data()->config()->GetAliases( - cur_inactive->representation(), cur_reg, rep, &alias_base_index); - DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); - while (aliases--) { - int aliased_reg = alias_base_index + aliases; - positions[aliased_reg] = Min(positions[aliased_reg], next_intersection); + for (int cur_reg = 0; cur_reg < num_regs; ++cur_reg) { + for (LiveRange* cur_inactive : inactive_live_ranges(cur_reg)) { + DCHECK_GT(cur_inactive->End(), range->Start()); + CHECK_EQ(cur_inactive->assigned_register(), cur_reg); + // No need to carry out intersections, when this register won't be + // interesting to this range anyway. + // TODO(mtrofin): extend to aliased ranges, too. + if ((kSimpleFPAliasing || !check_fp_aliasing()) && + positions[cur_reg] <= cur_inactive->NextStart()) { + break; + } + LifetimePosition next_intersection = + cur_inactive->FirstIntersection(range); + if (!next_intersection.IsValid()) continue; + if (kSimpleFPAliasing || !check_fp_aliasing()) { + positions[cur_reg] = std::min(positions[cur_reg], next_intersection); + TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg), + positions[cur_reg].value()); + } else { + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + cur_inactive->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + positions[aliased_reg] = + std::min(positions[aliased_reg], next_intersection); + } } } } @@ -4337,46 +4355,46 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current, } } - for (LiveRange* range : inactive_live_ranges()) { - DCHECK(range->End() > current->Start()); - int cur_reg = range->assigned_register(); - bool is_fixed = range->TopLevel()->IsFixed(); - - // Don't perform costly intersections if they are guaranteed to not update - // block_pos or use_pos. - // TODO(mtrofin): extend to aliased ranges, too. - if ((kSimpleFPAliasing || !check_fp_aliasing())) { - if (is_fixed) { - if (block_pos[cur_reg] < range->Start()) continue; - } else { - if (use_pos[cur_reg] < range->Start()) continue; + for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) { + for (LiveRange* range : inactive_live_ranges(cur_reg)) { + DCHECK(range->End() > current->Start()); + DCHECK_EQ(range->assigned_register(), cur_reg); + bool is_fixed = range->TopLevel()->IsFixed(); + + // Don't perform costly intersections if they are guaranteed to not update + // block_pos or use_pos. + // TODO(mtrofin): extend to aliased ranges, too. + if ((kSimpleFPAliasing || !check_fp_aliasing())) { + DCHECK_LE(use_pos[cur_reg], block_pos[cur_reg]); + if (block_pos[cur_reg] <= range->NextStart()) break; + if (!is_fixed && use_pos[cur_reg] <= range->NextStart()) continue; } - } - LifetimePosition next_intersection = range->FirstIntersection(current); - if (!next_intersection.IsValid()) continue; + LifetimePosition next_intersection = range->FirstIntersection(current); + if (!next_intersection.IsValid()) continue; - if (kSimpleFPAliasing || !check_fp_aliasing()) { - if (is_fixed) { - block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection); - use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]); - } else { - use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection); - } - } else { - int alias_base_index = -1; - int aliases = data()->config()->GetAliases( - range->representation(), cur_reg, rep, &alias_base_index); - DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); - while (aliases--) { - int aliased_reg = alias_base_index + aliases; + if (kSimpleFPAliasing || !check_fp_aliasing()) { if (is_fixed) { - block_pos[aliased_reg] = - Min(block_pos[aliased_reg], next_intersection); - use_pos[aliased_reg] = - Min(block_pos[aliased_reg], use_pos[aliased_reg]); + block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection); + use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]); } else { - use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection); + use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection); + } + } else { + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + range->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + if (is_fixed) { + block_pos[aliased_reg] = + Min(block_pos[aliased_reg], next_intersection); + use_pos[aliased_reg] = + Min(block_pos[aliased_reg], use_pos[aliased_reg]); + } else { + use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection); + } } } } @@ -4490,40 +4508,38 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current, it = ActiveToHandled(it); } - for (auto it = inactive_live_ranges().begin(); - it != inactive_live_ranges().end();) { - LiveRange* range = *it; - DCHECK(range->End() > current->Start()); - if (range->TopLevel()->IsFixed()) { - ++it; - continue; - } + for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) { if (kSimpleFPAliasing || !check_fp_aliasing()) { - if (range->assigned_register() != reg) { + if (cur_reg != reg) continue; + } + for (auto it = inactive_live_ranges(cur_reg).begin(); + it != inactive_live_ranges(cur_reg).end();) { + LiveRange* range = *it; + if (!kSimpleFPAliasing && check_fp_aliasing() && + !data()->config()->AreAliases(current->representation(), reg, + range->representation(), cur_reg)) { ++it; continue; } - } else { - if (!data()->config()->AreAliases(current->representation(), reg, - range->representation(), - range->assigned_register())) { + DCHECK(range->End() > current->Start()); + if (range->TopLevel()->IsFixed()) { ++it; continue; } - } - LifetimePosition next_intersection = range->FirstIntersection(current); - if (next_intersection.IsValid()) { - UsePosition* next_pos = range->NextRegisterPosition(current->Start()); - if (next_pos == nullptr) { - SpillAfter(range, split_pos, spill_mode); + LifetimePosition next_intersection = range->FirstIntersection(current); + if (next_intersection.IsValid()) { + UsePosition* next_pos = range->NextRegisterPosition(current->Start()); + if (next_pos == nullptr) { + SpillAfter(range, split_pos, spill_mode); + } else { + next_intersection = Min(next_intersection, next_pos->pos()); + SpillBetween(range, split_pos, next_intersection, spill_mode); + } + it = InactiveToHandled(it); } else { - next_intersection = Min(next_intersection, next_pos->pos()); - SpillBetween(range, split_pos, next_intersection, spill_mode); + ++it; } - it = InactiveToHandled(it); - } else { - ++it; } } } diff --git a/deps/v8/src/compiler/backend/register-allocator.h b/deps/v8/src/compiler/backend/register-allocator.h index bc7b09d147..17d664e507 100644 --- a/deps/v8/src/compiler/backend/register-allocator.h +++ b/deps/v8/src/compiler/backend/register-allocator.h @@ -335,7 +335,11 @@ class RegisterAllocationData final : public ZoneObject { return result; } - void ResetSpillState() { spill_state_.clear(); } + void ResetSpillState() { + for (auto& state : spill_state_) { + state.clear(); + } + } TickCounter* tick_counter() { return tick_counter_; } @@ -626,9 +630,10 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { bool ShouldBeAllocatedBefore(const LiveRange* other) const; bool CanCover(LifetimePosition position) const; bool Covers(LifetimePosition position) const; - LifetimePosition NextStartAfter(LifetimePosition position) const; + LifetimePosition NextStartAfter(LifetimePosition position); LifetimePosition NextEndAfter(LifetimePosition position) const; LifetimePosition FirstIntersection(LiveRange* other) const; + LifetimePosition NextStart() const { return next_start_; } void VerifyChildStructure() const { VerifyIntervals(); @@ -689,6 +694,8 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { // Cache the last position splintering stopped at. mutable UsePosition* splitting_pointer_; LiveRangeBundle* bundle_ = nullptr; + // Next interval start, relative to the current linear scan position. + LifetimePosition next_start_; DISALLOW_COPY_AND_ASSIGN(LiveRange); }; @@ -1298,29 +1305,39 @@ class LinearScanAllocator final : public RegisterAllocator { LifetimePosition begin_pos, LiveRange* end_range); void MaybeUndoPreviousSplit(LiveRange* range); - void SpillNotLiveRanges( - RangeWithRegisterSet& to_be_live, // NOLINT(runtime/references) - LifetimePosition position, SpillMode spill_mode); + void SpillNotLiveRanges(RangeWithRegisterSet* to_be_live, + LifetimePosition position, SpillMode spill_mode); LiveRange* AssignRegisterOnReload(LiveRange* range, int reg); - void ReloadLiveRanges( - RangeWithRegisterSet& to_be_live, // NOLINT(runtime/references) - LifetimePosition position); + void ReloadLiveRanges(RangeWithRegisterSet const& to_be_live, + LifetimePosition position); void UpdateDeferredFixedRanges(SpillMode spill_mode, InstructionBlock* block); bool BlockIsDeferredOrImmediatePredecessorIsNotDeferred( const InstructionBlock* block); bool HasNonDeferredPredecessor(InstructionBlock* block); - struct LiveRangeOrdering { + struct UnhandledLiveRangeOrdering { bool operator()(const LiveRange* a, const LiveRange* b) const { return a->ShouldBeAllocatedBefore(b); } }; - using LiveRangeQueue = ZoneMultiset<LiveRange*, LiveRangeOrdering>; - LiveRangeQueue& unhandled_live_ranges() { return unhandled_live_ranges_; } + + struct InactiveLiveRangeOrdering { + bool operator()(const LiveRange* a, const LiveRange* b) const { + return a->NextStart() < b->NextStart(); + } + }; + + using UnhandledLiveRangeQueue = + ZoneMultiset<LiveRange*, UnhandledLiveRangeOrdering>; + using InactiveLiveRangeQueue = + ZoneMultiset<LiveRange*, InactiveLiveRangeOrdering>; + UnhandledLiveRangeQueue& unhandled_live_ranges() { + return unhandled_live_ranges_; + } ZoneVector<LiveRange*>& active_live_ranges() { return active_live_ranges_; } - ZoneVector<LiveRange*>& inactive_live_ranges() { - return inactive_live_ranges_; + InactiveLiveRangeQueue& inactive_live_ranges(int reg) { + return inactive_live_ranges_[reg]; } void SetLiveRangeAssignedRegister(LiveRange* range, int reg); @@ -1333,10 +1350,10 @@ class LinearScanAllocator final : public RegisterAllocator { ZoneVector<LiveRange*>::iterator it); ZoneVector<LiveRange*>::iterator ActiveToInactive( ZoneVector<LiveRange*>::iterator it, LifetimePosition position); - ZoneVector<LiveRange*>::iterator InactiveToHandled( - ZoneVector<LiveRange*>::iterator it); - ZoneVector<LiveRange*>::iterator InactiveToActive( - ZoneVector<LiveRange*>::iterator it, LifetimePosition position); + InactiveLiveRangeQueue::iterator InactiveToHandled( + InactiveLiveRangeQueue::iterator it); + InactiveLiveRangeQueue::iterator InactiveToActive( + InactiveLiveRangeQueue::iterator it, LifetimePosition position); void ForwardStateTo(LifetimePosition position); @@ -1386,9 +1403,9 @@ class LinearScanAllocator final : public RegisterAllocator { void PrintRangeOverview(std::ostream& os); - LiveRangeQueue unhandled_live_ranges_; + UnhandledLiveRangeQueue unhandled_live_ranges_; ZoneVector<LiveRange*> active_live_ranges_; - ZoneVector<LiveRange*> inactive_live_ranges_; + ZoneVector<InactiveLiveRangeQueue> inactive_live_ranges_; // Approximate at what position the set of ranges will change next. // Used to avoid scanning for updates even if none are present. diff --git a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc index 4c2d862fc4..d0f97eca57 100644 --- a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc +++ b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc @@ -1246,9 +1246,8 @@ void AdjustStackPointerForTailCall( } } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, Instruction* instr, - S390OperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr, + S390OperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode())); if (access_mode == kMemoryAccessPoisoned) { diff --git a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc index 7f3277fc68..7b002fe6d3 100644 --- a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc +++ b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -436,68 +435,64 @@ void VisitTryTruncateDouble(InstructionSelector* selector, ArchOpcode opcode, #endif template <class CanCombineWithLoad> -void GenerateRightOperands( - InstructionSelector* selector, Node* node, Node* right, - InstructionCode& opcode, // NOLINT(runtime/references) - OperandModes& operand_mode, // NOLINT(runtime/references) - InstructionOperand* inputs, - size_t& input_count, // NOLINT(runtime/references) - CanCombineWithLoad canCombineWithLoad) { +void GenerateRightOperands(InstructionSelector* selector, Node* node, + Node* right, InstructionCode* opcode, + OperandModes* operand_mode, + InstructionOperand* inputs, size_t* input_count, + CanCombineWithLoad canCombineWithLoad) { S390OperandGenerator g(selector); - if ((operand_mode & OperandMode::kAllowImmediate) && - g.CanBeImmediate(right, operand_mode)) { - inputs[input_count++] = g.UseImmediate(right); + if ((*operand_mode & OperandMode::kAllowImmediate) && + g.CanBeImmediate(right, *operand_mode)) { + inputs[(*input_count)++] = g.UseImmediate(right); // Can only be RI or RRI - operand_mode &= OperandMode::kAllowImmediate; - } else if (operand_mode & OperandMode::kAllowMemoryOperand) { + *operand_mode &= OperandMode::kAllowImmediate; + } else if (*operand_mode & OperandMode::kAllowMemoryOperand) { NodeMatcher mright(right); if (mright.IsLoad() && selector->CanCover(node, right) && canCombineWithLoad(SelectLoadOpcode(right))) { AddressingMode mode = g.GetEffectiveAddressMemoryOperand( - right, inputs, &input_count, OpcodeImmMode(opcode)); - opcode |= AddressingModeField::encode(mode); - operand_mode &= ~OperandMode::kAllowImmediate; - if (operand_mode & OperandMode::kAllowRM) - operand_mode &= ~OperandMode::kAllowDistinctOps; - } else if (operand_mode & OperandMode::kAllowRM) { - DCHECK(!(operand_mode & OperandMode::kAllowRRM)); - inputs[input_count++] = g.UseAnyExceptImmediate(right); + right, inputs, input_count, OpcodeImmMode(*opcode)); + *opcode |= AddressingModeField::encode(mode); + *operand_mode &= ~OperandMode::kAllowImmediate; + if (*operand_mode & OperandMode::kAllowRM) + *operand_mode &= ~OperandMode::kAllowDistinctOps; + } else if (*operand_mode & OperandMode::kAllowRM) { + DCHECK(!(*operand_mode & OperandMode::kAllowRRM)); + inputs[(*input_count)++] = g.UseAnyExceptImmediate(right); // Can not be Immediate - operand_mode &= + *operand_mode &= ~OperandMode::kAllowImmediate & ~OperandMode::kAllowDistinctOps; - } else if (operand_mode & OperandMode::kAllowRRM) { - DCHECK(!(operand_mode & OperandMode::kAllowRM)); - inputs[input_count++] = g.UseAnyExceptImmediate(right); + } else if (*operand_mode & OperandMode::kAllowRRM) { + DCHECK(!(*operand_mode & OperandMode::kAllowRM)); + inputs[(*input_count)++] = g.UseAnyExceptImmediate(right); // Can not be Immediate - operand_mode &= ~OperandMode::kAllowImmediate; + *operand_mode &= ~OperandMode::kAllowImmediate; } else { UNREACHABLE(); } } else { - inputs[input_count++] = g.UseRegister(right); + inputs[(*input_count)++] = g.UseRegister(right); // Can only be RR or RRR - operand_mode &= OperandMode::kAllowRRR; + *operand_mode &= OperandMode::kAllowRRR; } } template <class CanCombineWithLoad> -void GenerateBinOpOperands( - InstructionSelector* selector, Node* node, Node* left, Node* right, - InstructionCode& opcode, // NOLINT(runtime/references) - OperandModes& operand_mode, // NOLINT(runtime/references) - InstructionOperand* inputs, - size_t& input_count, // NOLINT(runtime/references) - CanCombineWithLoad canCombineWithLoad) { +void GenerateBinOpOperands(InstructionSelector* selector, Node* node, + Node* left, Node* right, InstructionCode* opcode, + OperandModes* operand_mode, + InstructionOperand* inputs, size_t* input_count, + CanCombineWithLoad canCombineWithLoad) { S390OperandGenerator g(selector); // left is always register InstructionOperand const left_input = g.UseRegister(left); - inputs[input_count++] = left_input; + inputs[(*input_count)++] = left_input; if (left == right) { - inputs[input_count++] = left_input; + inputs[(*input_count)++] = left_input; // Can only be RR or RRR - operand_mode &= OperandMode::kAllowRRR; + *operand_mode &= OperandMode::kAllowRRR; } else { GenerateRightOperands(selector, node, right, opcode, operand_mode, inputs, input_count, canCombineWithLoad); @@ -575,8 +570,8 @@ void VisitUnaryOp(InstructionSelector* selector, Node* node, size_t output_count = 0; Node* input = node->InputAt(0); - GenerateRightOperands(selector, node, input, opcode, operand_mode, inputs, - input_count, canCombineWithLoad); + GenerateRightOperands(selector, node, input, &opcode, &operand_mode, inputs, + &input_count, canCombineWithLoad); bool input_is_word32 = ProduceWord32Result(input); @@ -631,8 +626,8 @@ void VisitBinOp(InstructionSelector* selector, Node* node, std::swap(left, right); } - GenerateBinOpOperands(selector, node, left, right, opcode, operand_mode, - inputs, input_count, canCombineWithLoad); + GenerateBinOpOperands(selector, node, left, right, &opcode, &operand_mode, + inputs, &input_count, canCombineWithLoad); bool left_is_word32 = ProduceWord32Result(left); @@ -1175,6 +1170,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + // TODO(miladfar): Implement the s390 selector for reversing SIMD bytes. + // Check if the input node is a Load and do a Load Reverse at once. + UNIMPLEMENTED(); +} + template <class Matcher, ArchOpcode neg_opcode> static inline bool TryMatchNegFromSub(InstructionSelector* selector, Node* node) { @@ -2691,6 +2692,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); } + void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc index a4f82b153b..44da872f26 100644 --- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -361,7 +361,6 @@ class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap { void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - X64OperandConverter& i, // NOLINT(runtime/references) int pc) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); @@ -370,9 +369,9 @@ void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, } } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, - X64OperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + X64OperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -1876,30 +1875,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg); break; case kX64Movsxbl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxbl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxbl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxbl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxbq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxbq); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxbq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxbq); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movb: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { @@ -1911,29 +1910,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movsxwl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxwl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movzxwl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxwl); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxwq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxwq); break; case kX64Movzxwq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movzxwq); __ AssertZeroExtended(i.OutputRegister()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movw: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { @@ -1945,7 +1944,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movl: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { if (HasAddressingMode(instr)) { __ movl(i.OutputRegister(), i.MemoryOperand()); @@ -1969,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movsxlq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); ASSEMBLE_MOVX(movsxlq); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; @@ -2021,7 +2020,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64Movq: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ movq(i.OutputRegister(), i.MemoryOperand()); } else { @@ -2036,7 +2035,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kX64Movss: - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ Movss(i.OutputDoubleRegister(), i.MemoryOperand()); } else { @@ -2046,7 +2045,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kX64Movsd: { - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); @@ -2069,7 +2068,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64Movdqu: { CpuFeatureScope sse_scope(tasm(), SSSE3); - EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); if (instr->HasOutput()) { __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand()); } else { @@ -2293,6 +2292,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputDoubleRegister(), kScratchRegister); break; } + case kX64F64x2Sqrt: { + __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kX64F64x2Add: { ASSEMBLE_SSE_BINOP(addpd); break; @@ -2350,22 +2353,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64F64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Lt: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64F64x2Le: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } + case kX64F64x2Qfma: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movapd(tmp, i.InputSimd128Register(2)); + __ mulpd(tmp, i.InputSimd128Register(1)); + __ addpd(i.OutputSimd128Register(), tmp); + } + break; + } + case kX64F64x2Qfms: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movapd(tmp, i.InputSimd128Register(2)); + __ mulpd(tmp, i.InputSimd128Register(1)); + __ subpd(i.OutputSimd128Register(), tmp); + } break; } // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below @@ -2445,6 +2474,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kX64F32x4Sqrt: { + __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kX64F32x4RecipApprox: { __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; @@ -2538,6 +2571,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } + case kX64F32x4Qfma: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movaps(tmp, i.InputSimd128Register(2)); + __ mulps(tmp, i.InputSimd128Register(1)); + __ addps(i.OutputSimd128Register(), tmp); + } + break; + } + case kX64F32x4Qfms: { + if (CpuFeatures::IsSupported(FMA3)) { + CpuFeatureScope fma3_scope(tasm(), FMA3); + __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(2)); + } else { + XMMRegister tmp = i.TempSimd128Register(0); + __ movaps(tmp, i.InputSimd128Register(2)); + __ mulps(tmp, i.InputSimd128Register(1)); + __ subps(i.OutputSimd128Register(), tmp); + } + break; + } case kX64I64x2Splat: { CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); @@ -2577,7 +2636,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 8. + __ andq(shift, Immediate(63)); + __ movq(tmp, shift); __ psllq(i.OutputSimd128Register(), tmp); break; } @@ -2588,6 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); Register tmp = i.ToRegister(instr->TempAt(0)); + // Modulo 64 not required as sarq_cl will mask cl to 6 bits. // lower quadword __ pextrq(tmp, src, 0x0); @@ -2640,15 +2703,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( if (CpuFeatures::IsSupported(SSE4_2)) { CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(1); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); - __ movaps(tmp, src); - __ pcmpgtq(src, dst); - __ blendvpd(tmp, dst); // implicit use of xmm0 as mask - __ movaps(dst, tmp); + __ movaps(tmp, src1); + __ pcmpgtq(tmp, src0); + __ movaps(dst, src1); + __ blendvpd(dst, src0); // implicit use of xmm0 as mask } else { CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); @@ -2689,11 +2752,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); __ movaps(tmp, src); - __ pcmpgtq(src, dst); - __ blendvpd(dst, tmp); // implicit use of xmm0 as mask + __ pcmpgtq(tmp, dst); + __ blendvpd(dst, src); // implicit use of xmm0 as mask break; } case kX64I64x2Eq: { @@ -2732,7 +2795,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 64. + __ andq(shift, Immediate(63)); + __ movq(tmp, shift); __ psrlq(i.OutputSimd128Register(), tmp); break; } @@ -2740,24 +2806,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(1); - XMMRegister src_tmp = i.TempSimd128Register(0); - XMMRegister dst_tmp = i.TempSimd128Register(1); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); + XMMRegister tmp0 = i.TempSimd128Register(0); + XMMRegister tmp1 = i.TempSimd128Register(1); + DCHECK_EQ(tmp1, xmm0); - __ movaps(src_tmp, src); - __ movaps(dst_tmp, dst); + __ movaps(dst, src1); + __ movaps(tmp0, src0); - __ pcmpeqd(src, src); - __ psllq(src, 63); + __ pcmpeqd(tmp1, tmp1); + __ psllq(tmp1, 63); - __ pxor(dst_tmp, src); - __ pxor(src, src_tmp); + __ pxor(tmp0, tmp1); + __ pxor(tmp1, dst); - __ pcmpgtq(src, dst_tmp); - __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask - __ movaps(dst, src_tmp); + __ pcmpgtq(tmp1, tmp0); + __ blendvpd(dst, src0); // implicit use of xmm0 as mask break; } case kX64I64x2MaxU: { @@ -2765,22 +2830,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister src_tmp = i.TempSimd128Register(0); - XMMRegister dst_tmp = i.TempSimd128Register(1); + XMMRegister dst_tmp = i.TempSimd128Register(0); + XMMRegister tmp = i.TempSimd128Register(1); DCHECK_EQ(dst, i.InputSimd128Register(0)); - DCHECK_EQ(src, xmm0); + DCHECK_EQ(tmp, xmm0); - __ movaps(src_tmp, src); __ movaps(dst_tmp, dst); - __ pcmpeqd(src, src); - __ psllq(src, 63); + __ pcmpeqd(tmp, tmp); + __ psllq(tmp, 63); - __ pxor(dst_tmp, src); - __ pxor(src, src_tmp); + __ pxor(dst_tmp, tmp); + __ pxor(tmp, src); - __ pcmpgtq(src, dst_tmp); - __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask + __ pcmpgtq(tmp, dst_tmp); + __ blendvpd(dst, src); // implicit use of xmm0 as mask break; } case kX64I64x2GtU: { @@ -2820,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); if (HasRegisterInput(instr, 0)) { - __ movd(dst, i.InputRegister(0)); + __ Movd(dst, i.InputRegister(0)); } else { - __ movd(dst, i.InputOperand(0)); + __ Movd(dst, i.InputOperand(0)); } - __ pshufd(dst, dst, 0x0); + __ Pshufd(dst, dst, 0x0); break; } case kX64I32x4ExtractLane: { @@ -2878,28 +2942,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); if (dst == src) { - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psignd(dst, kScratchDoubleReg); + __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ Psignd(dst, kScratchDoubleReg); } else { - __ pxor(dst, dst); - __ psubd(dst, src); + __ Pxor(dst, dst); + __ Psubd(dst, src); } break; } case kX64I32x4Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ pslld(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Pslld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4ShrS: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ psrad(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Psrad(i.OutputSimd128Register(), tmp); break; } case kX64I32x4Add: { - __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4AddHoriz: { @@ -2908,45 +2978,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Sub: { - __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Mul: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MinS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MaxS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Eq: { - __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4Ne: { XMMRegister tmp = i.TempSimd128Register(0); - __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqd(tmp, tmp); - __ pxor(i.OutputSimd128Register(), tmp); + __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpeqd(tmp, tmp); + __ Pxor(i.OutputSimd128Register(), tmp); break; } case kX64I32x4GtS: { - __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4GeS: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - __ pminsd(dst, src); - __ pcmpeqd(dst, src); + __ Pminsd(dst, src); + __ Pcmpeqd(dst, src); break; } case kX64I32x4UConvertF32x4: { @@ -2992,18 +3062,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); - __ psrld(i.OutputSimd128Register(), tmp); + Register shift = i.InputRegister(1); + // Take shift value modulo 32. + __ andq(shift, Immediate(31)); + __ Movq(tmp, shift); + __ Psrld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4MinU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4MaxU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I32x4GtU: { @@ -3011,18 +3084,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); XMMRegister tmp = i.TempSimd128Register(0); - __ pmaxud(dst, src); - __ pcmpeqd(dst, src); - __ pcmpeqd(tmp, tmp); - __ pxor(dst, tmp); + __ Pmaxud(dst, src); + __ Pcmpeqd(dst, src); + __ Pcmpeqd(tmp, tmp); + __ Pxor(dst, tmp); break; } case kX64I32x4GeU: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - __ pminud(dst, src); - __ pcmpeqd(dst, src); + __ Pminud(dst, src); + __ Pcmpeqd(dst, src); break; } case kX64S128Zero: { @@ -3044,17 +3117,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I16x8ExtractLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); Register dst = i.OutputRegister(); - __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsxwl(dst, dst); + __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); break; } case kX64I16x8ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); if (HasRegisterInput(instr, 2)) { - __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), + __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { - __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); + __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); } break; } @@ -3085,13 +3157,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8Shl: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psllw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8ShrS: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psraw(i.OutputSimd128Register(), tmp); break; } @@ -3173,7 +3251,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8ShrU: { XMMRegister tmp = i.TempSimd128Register(0); - __ movq(tmp, i.InputRegister(1)); + Register shift = i.InputRegister(1); + // Take shift value modulo 16. + __ andq(shift, Immediate(15)); + __ movq(tmp, shift); __ psrlw(i.OutputSimd128Register(), tmp); break; } @@ -3230,28 +3311,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSSE3); XMMRegister dst = i.OutputSimd128Register(); if (HasRegisterInput(instr, 0)) { - __ movd(dst, i.InputRegister(0)); + __ Movd(dst, i.InputRegister(0)); } else { - __ movd(dst, i.InputOperand(0)); + __ Movd(dst, i.InputOperand(0)); } - __ xorps(kScratchDoubleReg, kScratchDoubleReg); - __ pshufb(dst, kScratchDoubleReg); + __ Xorps(kScratchDoubleReg, kScratchDoubleReg); + __ Pshufb(dst, kScratchDoubleReg); break; } case kX64I8x16ExtractLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); Register dst = i.OutputRegister(); - __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); - __ movsxbl(dst, dst); + __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); break; } case kX64I8x16ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); if (HasRegisterInput(instr, 2)) { - __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), + __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { - __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); + __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); } break; } @@ -3279,15 +3359,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // Temp registers for shift mask andadditional moves to XMM registers. Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); + Register shift = i.InputRegister(1); // Mask off the unwanted bits before word-shifting. __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(shift, Immediate(7)); + __ movq(tmp, shift); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psrlw(kScratchDoubleReg, tmp_simd); __ packuswb(kScratchDoubleReg, kScratchDoubleReg); __ pand(dst, kScratchDoubleReg); - __ movq(tmp_simd, i.InputRegister(1)); + __ movq(tmp_simd, shift); __ psllw(dst, tmp_simd); break; } @@ -3302,6 +3385,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpcklbw(dst, dst); // Prepare shift value __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(tmp, Immediate(7)); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psraw(kScratchDoubleReg, tmp_simd); @@ -3414,6 +3499,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ punpcklbw(dst, dst); // Prepare shift value __ movq(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ andq(tmp, Immediate(7)); __ addq(tmp, Immediate(8)); __ movq(tmp_simd, tmp); __ psrlw(kScratchDoubleReg, tmp_simd); @@ -3422,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I8x16AddSaturateU: { - __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); + __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } case kX64I8x16SubSaturateU: { @@ -3487,10 +3574,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64S128Select: { // Mask used here is stored in dst. XMMRegister dst = i.OutputSimd128Register(); - __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); - __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); - __ andps(dst, kScratchDoubleReg); - __ xorps(dst, i.InputSimd128Register(2)); + __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1)); + __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2)); + __ Andps(dst, kScratchDoubleReg); + __ Xorps(dst, i.InputSimd128Register(2)); + break; + } + case kX64S8x16Swizzle: { + CpuFeatureScope sse_scope(tasm(), SSSE3); + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister mask = i.TempSimd128Register(0); + + // Out-of-range indices should return 0, add 112 so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + __ Move(mask, static_cast<uint32_t>(0x70707070)); + __ Pshufd(mask, mask, 0x0); + __ Paddusb(mask, i.InputSimd128Register(1)); + __ Pshufb(dst, mask); break; } case kX64S8x16Shuffle: { @@ -3507,10 +3608,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SetupShuffleMaskOnStack(tasm(), mask); - __ pshufb(dst, Operand(rsp, 0)); + __ Pshufb(dst, Operand(rsp, 0)); } else { // two input operands DCHECK_EQ(6, instr->InputCount()); - ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0); + ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0); uint32_t mask[4] = {}; for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); @@ -3520,13 +3621,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } } SetupShuffleMaskOnStack(tasm(), mask); - __ pshufb(kScratchDoubleReg, Operand(rsp, 0)); + __ Pshufb(kScratchDoubleReg, Operand(rsp, 0)); uint32_t mask1[4] = {}; if (instr->InputAt(1)->IsSimd128Register()) { XMMRegister src1 = i.InputSimd128Register(1); if (src1 != dst) __ movups(dst, src1); } else { - __ movups(dst, i.InputOperand(1)); + __ Movups(dst, i.InputOperand(1)); } for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); @@ -3536,8 +3637,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } } SetupShuffleMaskOnStack(tasm(), mask1); - __ pshufb(dst, Operand(rsp, 0)); - __ por(dst, kScratchDoubleReg); + __ Pshufb(dst, Operand(rsp, 0)); + __ Por(dst, kScratchDoubleReg); } __ movq(rsp, tmp); break; diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h index 8a0a45a916..e390c6922c 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -160,6 +160,7 @@ namespace compiler { V(X64F64x2ReplaceLane) \ V(X64F64x2Abs) \ V(X64F64x2Neg) \ + V(X64F64x2Sqrt) \ V(X64F64x2Add) \ V(X64F64x2Sub) \ V(X64F64x2Mul) \ @@ -170,6 +171,8 @@ namespace compiler { V(X64F64x2Ne) \ V(X64F64x2Lt) \ V(X64F64x2Le) \ + V(X64F64x2Qfma) \ + V(X64F64x2Qfms) \ V(X64F32x4Splat) \ V(X64F32x4ExtractLane) \ V(X64F32x4ReplaceLane) \ @@ -177,6 +180,7 @@ namespace compiler { V(X64F32x4UConvertI32x4) \ V(X64F32x4Abs) \ V(X64F32x4Neg) \ + V(X64F32x4Sqrt) \ V(X64F32x4RecipApprox) \ V(X64F32x4RecipSqrtApprox) \ V(X64F32x4Add) \ @@ -190,6 +194,8 @@ namespace compiler { V(X64F32x4Ne) \ V(X64F32x4Lt) \ V(X64F32x4Le) \ + V(X64F32x4Qfma) \ + V(X64F32x4Qfms) \ V(X64I64x2Splat) \ V(X64I64x2ExtractLane) \ V(X64I64x2ReplaceLane) \ @@ -300,6 +306,7 @@ namespace compiler { V(X64S128Or) \ V(X64S128Xor) \ V(X64S128Select) \ + V(X64S8x16Swizzle) \ V(X64S8x16Shuffle) \ V(X64S32x4Swizzle) \ V(X64S32x4Shuffle) \ diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index e9fa450c38..28a935fd91 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -129,6 +129,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2ReplaceLane: case kX64F64x2Abs: case kX64F64x2Neg: + case kX64F64x2Sqrt: case kX64F64x2Add: case kX64F64x2Sub: case kX64F64x2Mul: @@ -139,6 +140,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2Ne: case kX64F64x2Lt: case kX64F64x2Le: + case kX64F64x2Qfma: + case kX64F64x2Qfms: case kX64F32x4Splat: case kX64F32x4ExtractLane: case kX64F32x4ReplaceLane: @@ -148,6 +151,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4RecipSqrtApprox: case kX64F32x4Abs: case kX64F32x4Neg: + case kX64F32x4Sqrt: case kX64F32x4Add: case kX64F32x4AddHoriz: case kX64F32x4Sub: @@ -159,6 +163,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4Ne: case kX64F32x4Lt: case kX64F32x4Le: + case kX64F32x4Qfma: + case kX64F32x4Qfms: case kX64I64x2Splat: case kX64I64x2ExtractLane: case kX64I64x2ReplaceLane: @@ -275,6 +281,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64S1x4AllTrue: case kX64S1x8AnyTrue: case kX64S1x8AllTrue: + case kX64S8x16Swizzle: case kX64S8x16Shuffle: case kX64S32x4Swizzle: case kX64S32x4Shuffle: diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc index 5379074bac..f5d05fdd85 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -4,7 +4,7 @@ #include <algorithm> -#include "src/base/adapters.h" +#include "src/base/iterator.h" #include "src/base/overflowing-math.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" @@ -250,9 +250,21 @@ ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) { #else UNREACHABLE(); #endif +#ifdef V8_COMPRESS_POINTERS + case MachineRepresentation::kTaggedSigned: + opcode = kX64MovqDecompressTaggedSigned; + break; + case MachineRepresentation::kTaggedPointer: + opcode = kX64MovqDecompressTaggedPointer; + break; + case MachineRepresentation::kTagged: + opcode = kX64MovqDecompressAnyTagged; + break; +#else case MachineRepresentation::kTaggedSigned: // Fall through. case MachineRepresentation::kTaggedPointer: // Fall through. case MachineRepresentation::kTagged: // Fall through. +#endif case MachineRepresentation::kWord64: opcode = kX64Movq; break; @@ -288,7 +300,8 @@ ArchOpcode GetStoreOpcode(StoreRepresentation store_rep) { #endif case MachineRepresentation::kTaggedSigned: // Fall through. case MachineRepresentation::kTaggedPointer: // Fall through. - case MachineRepresentation::kTagged: // Fall through. + case MachineRepresentation::kTagged: + return kX64MovqCompressTagged; case MachineRepresentation::kWord64: return kX64Movq; case MachineRepresentation::kSimd128: // Fall through. @@ -875,6 +888,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { Emit(kX64Bswap32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitInt32Add(Node* node) { X64OperandGenerator g(this); @@ -1843,17 +1860,15 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, node->op()->HasProperty(Operator::kCommutative)); } -// Shared routine for 64-bit word comparison operations. -void VisitWord64Compare(InstructionSelector* selector, Node* node, - FlagsContinuation* cont) { - X64OperandGenerator g(selector); +void VisitWord64EqualImpl(InstructionSelector* selector, Node* node, + FlagsContinuation* cont) { if (selector->CanUseRootsRegister()) { + X64OperandGenerator g(selector); const RootsTable& roots_table = selector->isolate()->roots_table(); RootIndex root_index; HeapObjectBinopMatcher m(node); if (m.right().HasValue() && roots_table.IsRootHandle(m.right().Value(), &root_index)) { - if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute(); InstructionCode opcode = kX64Cmp | AddressingModeField::encode(kMode_Root); return VisitCompare( @@ -1861,18 +1876,30 @@ void VisitWord64Compare(InstructionSelector* selector, Node* node, g.TempImmediate( TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)), g.UseRegister(m.left().node()), cont); - } else if (m.left().HasValue() && - roots_table.IsRootHandle(m.left().Value(), &root_index)) { + } + } + VisitWordCompare(selector, node, kX64Cmp, cont); +} + +void VisitWord32EqualImpl(InstructionSelector* selector, Node* node, + FlagsContinuation* cont) { + if (COMPRESS_POINTERS_BOOL && selector->CanUseRootsRegister()) { + X64OperandGenerator g(selector); + const RootsTable& roots_table = selector->isolate()->roots_table(); + RootIndex root_index; + CompressedHeapObjectBinopMatcher m(node); + if (m.right().HasValue() && + roots_table.IsRootHandle(m.right().Value(), &root_index)) { InstructionCode opcode = - kX64Cmp | AddressingModeField::encode(kMode_Root); + kX64Cmp32 | AddressingModeField::encode(kMode_Root); return VisitCompare( selector, opcode, g.TempImmediate( TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)), - g.UseRegister(m.right().node()), cont); + g.UseRegister(m.left().node()), cont); } } - VisitWordCompare(selector, node, kX64Cmp, cont); + VisitWordCompare(selector, node, kX64Cmp32, cont); } // Shared routine for comparison with zero. @@ -2048,7 +2075,7 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, switch (value->opcode()) { case IrOpcode::kWord32Equal: cont->OverwriteAndNegateIfEqual(kEqual); - return VisitWordCompare(this, value, kX64Cmp32, cont); + return VisitWord32EqualImpl(this, value, cont); case IrOpcode::kInt32LessThan: cont->OverwriteAndNegateIfEqual(kSignedLessThan); return VisitWordCompare(this, value, kX64Cmp32, cont); @@ -2071,7 +2098,7 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, if (CanCover(user, value)) { switch (value->opcode()) { case IrOpcode::kInt64Sub: - return VisitWord64Compare(this, value, cont); + return VisitWordCompare(this, value, kX64Cmp, cont); case IrOpcode::kWord64And: return VisitWordCompare(this, value, kX64Test, cont); default: @@ -2080,20 +2107,20 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, } return VisitCompareZero(this, user, value, kX64Cmp, cont); } - return VisitWord64Compare(this, value, cont); + return VisitWord64EqualImpl(this, value, cont); } case IrOpcode::kInt64LessThan: cont->OverwriteAndNegateIfEqual(kSignedLessThan); - return VisitWord64Compare(this, value, cont); + return VisitWordCompare(this, value, kX64Cmp, cont); case IrOpcode::kInt64LessThanOrEqual: cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual); - return VisitWord64Compare(this, value, cont); + return VisitWordCompare(this, value, kX64Cmp, cont); case IrOpcode::kUint64LessThan: cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); - return VisitWord64Compare(this, value, cont); + return VisitWordCompare(this, value, kX64Cmp, cont); case IrOpcode::kUint64LessThanOrEqual: cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); - return VisitWord64Compare(this, value, cont); + return VisitWordCompare(this, value, kX64Cmp, cont); case IrOpcode::kFloat32Equal: cont->OverwriteAndNegateIfEqual(kUnorderedEqual); return VisitFloat32Compare(this, value, cont); @@ -2221,7 +2248,7 @@ void InstructionSelector::VisitWord32Equal(Node* const node) { if (m.right().Is(0)) { return VisitWordCompareZero(m.node(), m.left().node(), &cont); } - VisitWordCompare(this, node, kX64Cmp32, &cont); + VisitWord32EqualImpl(this, node, &cont); } void InstructionSelector::VisitInt32LessThan(Node* node) { @@ -2246,7 +2273,7 @@ void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) { VisitWordCompare(this, node, kX64Cmp32, &cont); } -void InstructionSelector::VisitWord64Equal(Node* const node) { +void InstructionSelector::VisitWord64Equal(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); Int64BinopMatcher m(node); if (m.right().Is(0)) { @@ -2256,7 +2283,7 @@ void InstructionSelector::VisitWord64Equal(Node* const node) { if (CanCover(user, value)) { switch (value->opcode()) { case IrOpcode::kInt64Sub: - return VisitWord64Compare(this, value, &cont); + return VisitWordCompare(this, value, kX64Cmp, &cont); case IrOpcode::kWord64And: return VisitWordCompare(this, value, kX64Test, &cont); default: @@ -2264,7 +2291,7 @@ void InstructionSelector::VisitWord64Equal(Node* const node) { } } } - VisitWord64Compare(this, node, &cont); + VisitWord64EqualImpl(this, node, &cont); } void InstructionSelector::VisitInt32AddWithOverflow(Node* node) { @@ -2287,24 +2314,24 @@ void InstructionSelector::VisitInt32SubWithOverflow(Node* node) { void InstructionSelector::VisitInt64LessThan(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node); - VisitWord64Compare(this, node, &cont); + VisitWordCompare(this, node, kX64Cmp, &cont); } void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThanOrEqual, node); - VisitWord64Compare(this, node, &cont); + VisitWordCompare(this, node, kX64Cmp, &cont); } void InstructionSelector::VisitUint64LessThan(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); - VisitWord64Compare(this, node, &cont); + VisitWordCompare(this, node, kX64Cmp, &cont); } void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); - VisitWord64Compare(this, node, &cont); + VisitWordCompare(this, node, kX64Cmp, &cont); } void InstructionSelector::VisitFloat32Equal(Node* node) { @@ -2685,9 +2712,11 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16GtU) #define SIMD_UNOP_LIST(V) \ + V(F64x2Sqrt) \ V(F32x4SConvertI32x4) \ V(F32x4Abs) \ V(F32x4Neg) \ + V(F32x4Sqrt) \ V(F32x4RecipApprox) \ V(F32x4RecipSqrtApprox) \ V(I64x2Neg) \ @@ -2872,6 +2901,27 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { g.UseRegister(node->InputAt(0))); } +#define VISIT_SIMD_QFMOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + if (CpuFeatures::IsSupported(FMA3)) { \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \ + g.UseRegister(node->InputAt(2))); \ + } else { \ + InstructionOperand temps[] = {g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), \ + g.UseRegister(node->InputAt(2)), arraysize(temps), temps); \ + } \ + } +VISIT_SIMD_QFMOP(F64x2Qfma) +VISIT_SIMD_QFMOP(F64x2Qfms) +VISIT_SIMD_QFMOP(F32x4Qfma) +VISIT_SIMD_QFMOP(F32x4Qfms) +#undef VISIT_SIMD_QFMOP + void InstructionSelector::VisitI64x2ShrS(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempRegister()}; @@ -2893,10 +2943,10 @@ void InstructionSelector::VisitI64x2Mul(Node* node) { void InstructionSelector::VisitI64x2MinS(Node* node) { X64OperandGenerator g(this); if (this->IsSupported(SSE4_2)) { - InstructionOperand temps[] = {g.TempSimd128Register()}; - Emit(kX64I64x2MinS, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), - arraysize(temps), temps); + InstructionOperand temps[] = {g.TempFpRegister(xmm0)}; + Emit(kX64I64x2MinS, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } else { InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister(), g.TempRegister()}; @@ -2908,27 +2958,27 @@ void InstructionSelector::VisitI64x2MinS(Node* node) { void InstructionSelector::VisitI64x2MaxS(Node* node) { X64OperandGenerator g(this); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempFpRegister(xmm0)}; Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } void InstructionSelector::VisitI64x2MinU(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register(), - g.TempSimd128Register()}; - Emit(kX64I64x2MinU, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), - arraysize(temps), temps); + g.TempFpRegister(xmm0)}; + Emit(kX64I64x2MinU, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } void InstructionSelector::VisitI64x2MaxU(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register(), - g.TempSimd128Register()}; + g.TempFpRegister(xmm0)}; Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } @@ -3256,6 +3306,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps); } +void InstructionSelector::VisitS8x16Swizzle(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kX64S8x16Swizzle, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), + arraysize(temps), temps); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { |