summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/backend
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/backend')
-rw-r--r--deps/v8/src/compiler/backend/arm/code-generator-arm.cc81
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-codes-arm.h1
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc1
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc68
-rw-r--r--deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc204
-rw-r--r--deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h9
-rw-r--r--deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc10
-rw-r--r--deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc220
-rw-r--r--deps/v8/src/compiler/backend/code-generator-impl.h8
-rw-r--r--deps/v8/src/compiler/backend/code-generator.cc2
-rw-r--r--deps/v8/src/compiler/backend/code-generator.h2
-rw-r--r--deps/v8/src/compiler/backend/frame-elider.cc2
-rw-r--r--deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc335
-rw-r--r--deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h20
-rw-r--r--deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc20
-rw-r--r--deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc156
-rw-r--r--deps/v8/src/compiler/backend/instruction-scheduler.cc2
-rw-r--r--deps/v8/src/compiler/backend/instruction-selector-impl.h25
-rw-r--r--deps/v8/src/compiler/backend/instruction-selector.cc44
-rw-r--r--deps/v8/src/compiler/backend/instruction-selector.h15
-rw-r--r--deps/v8/src/compiler/backend/instruction.cc15
-rw-r--r--deps/v8/src/compiler/backend/instruction.h35
-rw-r--r--deps/v8/src/compiler/backend/jump-threading.cc12
-rw-r--r--deps/v8/src/compiler/backend/jump-threading.h13
-rw-r--r--deps/v8/src/compiler/backend/mips/code-generator-mips.cc70
-rw-r--r--deps/v8/src/compiler/backend/mips/instruction-codes-mips.h1
-rw-r--r--deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc1
-rw-r--r--deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc6
-rw-r--r--deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc70
-rw-r--r--deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h1
-rw-r--r--deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc1
-rw-r--r--deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc6
-rw-r--r--deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc18
-rw-r--r--deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc10
-rw-r--r--deps/v8/src/compiler/backend/register-allocator-verifier.cc12
-rw-r--r--deps/v8/src/compiler/backend/register-allocator-verifier.h1
-rw-r--r--deps/v8/src/compiler/backend/register-allocator.cc344
-rw-r--r--deps/v8/src/compiler/backend/register-allocator.h55
-rw-r--r--deps/v8/src/compiler/backend/s390/code-generator-s390.cc5
-rw-r--r--deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc87
-rw-r--r--deps/v8/src/compiler/backend/x64/code-generator-x64.cc343
-rw-r--r--deps/v8/src/compiler/backend/x64/instruction-codes-x64.h7
-rw-r--r--deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc7
-rw-r--r--deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc136
44 files changed, 1735 insertions, 746 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
index 65a569d755..3fe5361083 100644
--- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
@@ -44,7 +44,7 @@ class ArmOperandConverter final : public InstructionOperandConverter {
UNREACHABLE();
}
- Operand InputImmediate(size_t index) {
+ Operand InputImmediate(size_t index) const {
return ToImmediate(instr_->InputAt(index));
}
@@ -111,7 +111,7 @@ class ArmOperandConverter final : public InstructionOperandConverter {
return InputOffset(&first_index);
}
- Operand ToImmediate(InstructionOperand* operand) {
+ Operand ToImmediate(InstructionOperand* operand) const {
Constant constant = ToConstant(operand);
switch (constant.type()) {
case Constant::kInt32:
@@ -153,9 +153,6 @@ class ArmOperandConverter final : public InstructionOperandConverter {
NeonMemOperand NeonInputOperand(size_t first_index) {
const size_t index = first_index;
switch (AddressingModeField::decode(instr_->opcode())) {
- case kMode_Offset_RR:
- return NeonMemOperand(InputRegister(index + 0),
- InputRegister(index + 1));
case kMode_Operand2_R:
return NeonMemOperand(InputRegister(index + 0));
default:
@@ -309,9 +306,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE();
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode,
- ArmOperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode,
+ ArmOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -320,10 +317,10 @@ void EmitWordLoadPoisoningIfNeeded(
}
}
-void ComputePoisonedAddressForLoad(
- CodeGenerator* codegen, InstructionCode opcode,
- ArmOperandConverter& i, // NOLINT(runtime/references)
- Register address) {
+void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
+ InstructionCode opcode,
+ ArmOperandConverter const& i,
+ Register address) {
DCHECK_EQ(kMemoryAccessPoisoned,
static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
switch (AddressingModeField::decode(opcode)) {
@@ -1798,6 +1795,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
+ case kArmF32x4Sqrt: {
+ QwNeonRegister dst = i.OutputSimd128Register();
+ QwNeonRegister src1 = i.InputSimd128Register(0);
+ DCHECK_EQ(dst, q0);
+ DCHECK_EQ(src1, q0);
+#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
+ __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
+ __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
+ __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
+ __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
+#undef S_FROM_Q
+ break;
+ }
case kArmF32x4RecipApprox: {
__ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
@@ -1919,14 +1929,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI32x4ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -1998,7 +2014,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2029,7 +2048,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI16x8ExtractLane: {
- __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
+ __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
i.InputInt8(1));
break;
}
@@ -2054,14 +2073,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI16x8ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2142,7 +2167,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2186,7 +2214,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI8x16ExtractLane: {
- __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
+ __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
i.InputInt8(1));
break;
}
@@ -2201,6 +2229,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
__ vdup(Neon8, tmp, i.InputRegister(1));
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2208,7 +2239,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon8, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
+ __ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2275,7 +2309,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon8, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
+ __ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
index 3551e26aea..d398ec0ed6 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
@@ -135,6 +135,7 @@ namespace compiler {
V(ArmF32x4UConvertI32x4) \
V(ArmF32x4Abs) \
V(ArmF32x4Neg) \
+ V(ArmF32x4Sqrt) \
V(ArmF32x4RecipApprox) \
V(ArmF32x4RecipSqrtApprox) \
V(ArmF32x4Add) \
diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
index 1d7cf61dfe..92be55dcc3 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
@@ -115,6 +115,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4UConvertI32x4:
case kArmF32x4Abs:
case kArmF32x4Neg:
+ case kArmF32x4Sqrt:
case kArmF32x4RecipApprox:
case kArmF32x4RecipSqrtApprox:
case kArmF32x4Add:
diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
index ce74faa4a6..303648051f 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
#include "src/base/bits.h"
#include "src/base/enum-set.h"
+#include "src/base/iterator.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@@ -94,7 +94,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
- InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
@@ -352,6 +352,26 @@ void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode div_opcode,
}
}
+// Adds the base and offset into a register, then change the addressing
+// mode of opcode_return to use this register. Certain instructions, e.g.
+// vld1 and vst1, when given two registers, will post-increment the offset, i.e.
+// perform the operation at base, then add offset to base. What we intend is to
+// access at (base+offset).
+void EmitAddBeforeS128LoadStore(InstructionSelector* selector,
+ InstructionCode* opcode_return,
+ size_t* input_count_return,
+ InstructionOperand* inputs) {
+ DCHECK(*opcode_return == kArmVld1S128 || *opcode_return == kArmVst1S128);
+ ArmOperandGenerator g(selector);
+ InstructionOperand addr = g.TempRegister();
+ InstructionCode op = kArmAdd;
+ op |= AddressingModeField::encode(kMode_Operand2_R);
+ selector->Emit(op, 1, &addr, 2, inputs);
+ *opcode_return |= AddressingModeField::encode(kMode_Operand2_R);
+ *input_count_return -= 1;
+ inputs[0] = addr;
+}
+
void EmitLoad(InstructionSelector* selector, InstructionCode opcode,
InstructionOperand* output, Node* base, Node* index) {
ArmOperandGenerator g(selector);
@@ -368,7 +388,11 @@ void EmitLoad(InstructionSelector* selector, InstructionCode opcode,
input_count = 3;
} else {
inputs[1] = g.UseRegister(index);
- opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ if (opcode == kArmVld1S128) {
+ EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[0]);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ }
}
selector->Emit(opcode, 1, output, input_count, inputs);
}
@@ -386,7 +410,12 @@ void EmitStore(InstructionSelector* selector, InstructionCode opcode,
input_count = 4;
} else {
inputs[input_count++] = g.UseRegister(index);
- opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ if (opcode == kArmVst1S128) {
+ // Inputs are value, base, index, only care about base and index.
+ EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[1]);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ }
}
selector->Emit(opcode, 0, nullptr, input_count, inputs);
}
@@ -596,8 +625,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
return;
}
- case MachineRepresentation::kFloat64:
- case MachineRepresentation::kSimd128: {
+ case MachineRepresentation::kFloat64: {
// Compute the address of the least-significant byte of the FP value.
// We assume that the base node is unlikely to be an encodable immediate
// or the result of a shift operation, so only consider the addressing
@@ -623,13 +651,10 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
if (CpuFeatures::IsSupported(NEON)) {
// With NEON we can load directly from the calculated address.
- InstructionCode op = load_rep == MachineRepresentation::kFloat64
- ? kArmVld1F64
- : kArmVld1S128;
+ InstructionCode op = kArmVld1F64;
op |= AddressingModeField::encode(kMode_Operand2_R);
Emit(op, g.DefineAsRegister(node), addr);
} else {
- DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
// Load both halves and move to an FP register.
InstructionOperand fp_lo = g.TempRegister();
InstructionOperand fp_hi = g.TempRegister();
@@ -670,8 +695,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
EmitStore(this, kArmStr, input_count, inputs, index);
return;
}
- case MachineRepresentation::kFloat64:
- case MachineRepresentation::kSimd128: {
+ case MachineRepresentation::kFloat64: {
if (CpuFeatures::IsSupported(NEON)) {
InstructionOperand address = g.TempRegister();
{
@@ -697,13 +721,10 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
inputs[input_count++] = g.UseRegister(value);
inputs[input_count++] = address;
- InstructionCode op = store_rep == MachineRepresentation::kFloat64
- ? kArmVst1F64
- : kArmVst1S128;
+ InstructionCode op = kArmVst1F64;
op |= AddressingModeField::encode(kMode_Operand2_R);
Emit(op, 0, nullptr, input_count, inputs);
} else {
- DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
// Store a 64-bit floating point value using two 32-bit integer stores.
// Computing the store address here would require three live temporary
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
@@ -942,7 +963,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) {
uint32_t lsb = m.right().Value();
Int32BinopMatcher mleft(m.left().node());
if (mleft.right().HasValue()) {
- uint32_t value = (mleft.right().Value() >> lsb) << lsb;
+ uint32_t value = static_cast<uint32_t>(mleft.right().Value() >> lsb)
+ << lsb;
uint32_t width = base::bits::CountPopulation(value);
uint32_t msb = base::bits::CountLeadingZeros32(value);
if ((width != 0) && (msb + width + lsb == 32)) {
@@ -1119,6 +1141,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
VisitRR(this, kArmRev, node);
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitInt32Add(Node* node) {
@@ -2513,6 +2539,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
+void InstructionSelector::VisitF32x4Sqrt(Node* node) {
+ ArmOperandGenerator g(this);
+ // Use fixed registers in the lower 8 Q-registers so we can directly access
+ // mapped registers S0-S31.
+ Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0),
+ g.UseFixed(node->InputAt(0), q0));
+}
+
void InstructionSelector::VisitF32x4Div(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access
diff --git a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc
index 66ca7f6cf0..6f65c905dd 100644
--- a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc
+++ b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc
@@ -376,9 +376,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE();
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
- Arm64OperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ Arm64OperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -389,6 +389,36 @@ void EmitWordLoadPoisoningIfNeeded(
}
}
+void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
+ Arm64OperandConverter* i, VRegister output_reg) {
+ const MemoryAccessMode access_mode =
+ static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+ AddressingMode address_mode = AddressingModeField::decode(opcode);
+ if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) {
+ UseScratchRegisterScope temps(codegen->tasm());
+ Register address = temps.AcquireX();
+ switch (address_mode) {
+ case kMode_MRI: // Fall through.
+ case kMode_MRR:
+ codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1));
+ break;
+ case kMode_Operand2_R_LSL_I:
+ codegen->tasm()->Add(address, i->InputRegister(0),
+ i->InputOperand2_64(1));
+ break;
+ default:
+ // Note: we don't need poisoning for kMode_Root loads as those loads
+ // target a fixed offset from root register which is set once when
+ // initializing the vm.
+ UNREACHABLE();
+ }
+ codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister));
+ codegen->tasm()->Ldr(output_reg, MemOperand(address));
+ } else {
+ codegen->tasm()->Ldr(output_reg, i->MemoryOperand());
+ }
+}
+
} // namespace
#define ASSEMBLE_SHIFT(asm_instr, width) \
@@ -1198,6 +1228,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Sxtw:
__ Sxtw(i.OutputRegister(), i.InputRegister32(0));
break;
+ case kArm64Sbfx:
+ __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
+ i.InputInt6(2));
+ break;
case kArm64Sbfx32:
__ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
i.InputInt5(2));
@@ -1586,6 +1620,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Str:
__ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
break;
+ case kArm64StrCompressTagged:
+ __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
+ break;
case kArm64DecompressSigned: {
__ DecompressTaggedSigned(i.OutputRegister(), i.InputRegister(0));
break;
@@ -1599,13 +1636,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64LdrS:
- __ Ldr(i.OutputDoubleRegister().S(), i.MemoryOperand());
+ EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S());
break;
case kArm64StrS:
__ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
break;
case kArm64LdrD:
- __ Ldr(i.OutputDoubleRegister(), i.MemoryOperand());
+ EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister());
break;
case kArm64StrD:
__ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
@@ -1616,9 +1653,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64StrQ:
__ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
break;
- case kArm64StrCompressTagged:
- __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
- break;
case kArm64DmbIsh:
__ Dmb(InnerShareable, BarrierAll);
break;
@@ -1794,6 +1828,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D);
SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D);
+ SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D);
SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D);
SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D);
SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D);
@@ -1818,6 +1853,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0).V2D());
break;
}
+ case kArm64F64x2Qfma: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ Fmla(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
+ i.InputSimd128Register(2).V2D());
+ break;
+ }
+ case kArm64F64x2Qfms: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ Fmls(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
+ i.InputSimd128Register(2).V2D());
+ break;
+ }
case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break;
@@ -1840,6 +1887,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S);
SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S);
+ SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S);
SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
@@ -1867,6 +1915,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0).V4S());
break;
}
+ case kArm64F32x4Qfma: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ Fmla(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
+ i.InputSimd128Register(2).V4S());
+ break;
+ }
+ case kArm64F32x4Qfms: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ __ Fmls(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
+ i.InputSimd128Register(2).V4S());
+ break;
+ }
case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break;
@@ -1888,14 +1948,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
case kArm64I64x2Shl: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V2D(), i.InputRegister64(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 64.
+ __ And(shift, i.InputRegister64(1), 63);
+ __ Dup(tmp.V2D(), shift);
__ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
break;
}
case kArm64I64x2ShrS: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V2D(), i.InputRegister64(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 64.
+ __ And(shift, i.InputRegister64(1), 63);
+ __ Dup(tmp.V2D(), shift);
__ Neg(tmp.V2D(), tmp.V2D());
__ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
@@ -1903,6 +1969,65 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D);
SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D);
+ case kArm64I64x2Mul: {
+ UseScratchRegisterScope scope(tasm());
+ VRegister dst = i.OutputSimd128Register();
+ VRegister src1 = i.InputSimd128Register(0);
+ VRegister src2 = i.InputSimd128Register(1);
+ VRegister tmp1 = scope.AcquireSameSizeAs(dst);
+ VRegister tmp2 = scope.AcquireSameSizeAs(dst);
+ VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
+
+ // This 2x64-bit multiplication is performed with several 32-bit
+ // multiplications.
+
+ // 64-bit numbers x and y, can be represented as:
+ // x = a + 2^32(b)
+ // y = c + 2^32(d)
+
+ // A 64-bit multiplication is:
+ // x * y = ac + 2^32(ad + bc) + 2^64(bd)
+ // note: `2^64(bd)` can be ignored, the value is too large to fit in
+ // 64-bits.
+
+ // This sequence implements a 2x64bit multiply, where the registers
+ // `src1` and `src2` are split up into 32-bit components:
+ // src1 = |d|c|b|a|
+ // src2 = |h|g|f|e|
+ //
+ // src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
+
+ // Reverse the 32-bit elements in the 64-bit words.
+ // tmp2 = |g|h|e|f|
+ __ Rev64(tmp2.V4S(), src2.V4S());
+
+ // Calculate the high half components.
+ // tmp2 = |dg|ch|be|af|
+ __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
+
+ // Extract the low half components of src1.
+ // tmp1 = |c|a|
+ __ Xtn(tmp1.V2S(), src1.V2D());
+
+ // Sum the respective high half components.
+ // tmp2 = |dg+ch|be+af||dg+ch|be+af|
+ __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
+
+ // Extract the low half components of src2.
+ // tmp3 = |g|e|
+ __ Xtn(tmp3.V2S(), src2.V2D());
+
+ // Shift the high half components, into the high half.
+ // dst = |dg+ch << 32|be+af << 32|
+ __ Shll(dst.V2D(), tmp2.V2S(), 32);
+
+ // Multiply the low components together, and accumulate with the high
+ // half.
+ // dst = |dst[1] + cg|dst[0] + ae|
+ __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
+
+ break;
+ }
SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
case kArm64I64x2Ne: {
VRegister dst = i.OutputSimd128Register().V2D();
@@ -1915,7 +2040,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D);
case kArm64I64x2ShrU: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V2D(), i.InputRegister64(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 64.
+ __ And(shift, i.InputRegister64(1), 63);
+ __ Dup(tmp.V2D(), shift);
__ Neg(tmp.V2D(), tmp.V2D());
__ Ushl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
@@ -1947,14 +2075,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
case kArm64I32x4Shl: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V4S(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 32.
+ __ And(shift, i.InputRegister32(1), 31);
+ __ Dup(tmp.V4S(), shift);
__ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
break;
}
case kArm64I32x4ShrS: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V4S(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 32.
+ __ And(shift, i.InputRegister32(1), 31);
+ __ Dup(tmp.V4S(), shift);
__ Neg(tmp.V4S(), tmp.V4S());
__ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
@@ -1981,7 +2115,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H);
case kArm64I32x4ShrU: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V4S(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 32.
+ __ And(shift, i.InputRegister32(1), 31);
+ __ Dup(tmp.V4S(), shift);
__ Neg(tmp.V4S(), tmp.V4S());
__ Ushl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
@@ -1996,7 +2133,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I16x8ExtractLane: {
- __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
+ __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
i.InputInt8(1));
break;
}
@@ -2014,14 +2151,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
case kArm64I16x8Shl: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V8H(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 16.
+ __ And(shift, i.InputRegister32(1), 15);
+ __ Dup(tmp.V8H(), shift);
__ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
break;
}
case kArm64I16x8ShrS: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V8H(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 16.
+ __ And(shift, i.InputRegister32(1), 15);
+ __ Dup(tmp.V8H(), shift);
__ Neg(tmp.V8H(), tmp.V8H());
__ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
@@ -2070,7 +2213,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArm64I16x8ShrU: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V8H(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 16.
+ __ And(shift, i.InputRegister32(1), 15);
+ __ Dup(tmp.V8H(), shift);
__ Neg(tmp.V8H(), tmp.V8H());
__ Ushl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
@@ -2101,7 +2247,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I8x16ExtractLane: {
- __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
+ __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
i.InputInt8(1));
break;
}
@@ -2117,14 +2263,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
case kArm64I8x16Shl: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V16B(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 8.
+ __ And(shift, i.InputRegister32(1), 7);
+ __ Dup(tmp.V16B(), shift);
__ Sshl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
break;
}
case kArm64I8x16ShrS: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V16B(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 8.
+ __ And(shift, i.InputRegister32(1), 7);
+ __ Dup(tmp.V16B(), shift);
__ Neg(tmp.V16B(), tmp.V16B());
__ Sshl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
@@ -2163,7 +2315,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
case kArm64I8x16ShrU: {
VRegister tmp = i.TempSimd128Register(0);
- __ Dup(tmp.V16B(), i.InputRegister32(1));
+ Register shift = i.TempRegister32(1);
+ // Take shift value modulo 8.
+ __ And(shift, i.InputRegister32(1), 7);
+ __ Dup(tmp.V16B(), shift);
__ Neg(tmp.V16B(), tmp.V16B());
__ Ushl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
@@ -2277,6 +2432,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1).V16B(), i.InputInt4(2));
break;
}
+ case kArm64S8x16Swizzle: {
+ __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
+ i.InputSimd128Register(1).V16B());
+ break;
+ }
case kArm64S8x16Shuffle: {
Simd128Register dst = i.OutputSimd128Register().V16B(),
src0 = i.InputSimd128Register(0).V16B(),
diff --git a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
index 4b56e402c1..880a3fbf9e 100644
--- a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
+++ b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
@@ -70,6 +70,7 @@ namespace compiler {
V(Arm64Sxtb) \
V(Arm64Sxth) \
V(Arm64Sxtw) \
+ V(Arm64Sbfx) \
V(Arm64Sbfx32) \
V(Arm64Ubfx) \
V(Arm64Ubfx32) \
@@ -175,6 +176,7 @@ namespace compiler {
V(Arm64F64x2ReplaceLane) \
V(Arm64F64x2Abs) \
V(Arm64F64x2Neg) \
+ V(Arm64F64x2Sqrt) \
V(Arm64F64x2Add) \
V(Arm64F64x2Sub) \
V(Arm64F64x2Mul) \
@@ -185,6 +187,8 @@ namespace compiler {
V(Arm64F64x2Ne) \
V(Arm64F64x2Lt) \
V(Arm64F64x2Le) \
+ V(Arm64F64x2Qfma) \
+ V(Arm64F64x2Qfms) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
@@ -192,6 +196,7 @@ namespace compiler {
V(Arm64F32x4UConvertI32x4) \
V(Arm64F32x4Abs) \
V(Arm64F32x4Neg) \
+ V(Arm64F32x4Sqrt) \
V(Arm64F32x4RecipApprox) \
V(Arm64F32x4RecipSqrtApprox) \
V(Arm64F32x4Add) \
@@ -205,6 +210,8 @@ namespace compiler {
V(Arm64F32x4Ne) \
V(Arm64F32x4Lt) \
V(Arm64F32x4Le) \
+ V(Arm64F32x4Qfma) \
+ V(Arm64F32x4Qfms) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
@@ -213,6 +220,7 @@ namespace compiler {
V(Arm64I64x2ShrS) \
V(Arm64I64x2Add) \
V(Arm64I64x2Sub) \
+ V(Arm64I64x2Mul) \
V(Arm64I64x2Eq) \
V(Arm64I64x2Ne) \
V(Arm64I64x2GtS) \
@@ -331,6 +339,7 @@ namespace compiler {
V(Arm64S8x16TransposeLeft) \
V(Arm64S8x16TransposeRight) \
V(Arm64S8x16Concat) \
+ V(Arm64S8x16Swizzle) \
V(Arm64S8x16Shuffle) \
V(Arm64S32x2Reverse) \
V(Arm64S16x4Reverse) \
diff --git a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
index 7cba2d50ea..b0f9202968 100644
--- a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
+++ b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
@@ -71,6 +71,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Sxth:
case kArm64Sxth32:
case kArm64Sxtw:
+ case kArm64Sbfx:
case kArm64Sbfx32:
case kArm64Ubfx:
case kArm64Ubfx32:
@@ -142,6 +143,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2ReplaceLane:
case kArm64F64x2Abs:
case kArm64F64x2Neg:
+ case kArm64F64x2Sqrt:
case kArm64F64x2Add:
case kArm64F64x2Sub:
case kArm64F64x2Mul:
@@ -152,6 +154,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Ne:
case kArm64F64x2Lt:
case kArm64F64x2Le:
+ case kArm64F64x2Qfma:
+ case kArm64F64x2Qfms:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
@@ -159,6 +163,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4UConvertI32x4:
case kArm64F32x4Abs:
case kArm64F32x4Neg:
+ case kArm64F32x4Sqrt:
case kArm64F32x4RecipApprox:
case kArm64F32x4RecipSqrtApprox:
case kArm64F32x4Add:
@@ -172,6 +177,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Ne:
case kArm64F32x4Lt:
case kArm64F32x4Le:
+ case kArm64F32x4Qfma:
+ case kArm64F32x4Qfms:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
@@ -180,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I64x2ShrS:
case kArm64I64x2Add:
case kArm64I64x2Sub:
+ case kArm64I64x2Mul:
case kArm64I64x2Eq:
case kArm64I64x2Ne:
case kArm64I64x2GtS:
@@ -298,6 +306,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64S8x16TransposeLeft:
case kArm64S8x16TransposeRight:
case kArm64S8x16Concat:
+ case kArm64S8x16Swizzle:
case kArm64S8x16Shuffle:
case kArm64S32x2Reverse:
case kArm64S16x4Reverse:
@@ -439,6 +448,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kArm64Clz:
case kArm64Clz32:
+ case kArm64Sbfx:
case kArm64Sbfx32:
case kArm64Sxtb32:
case kArm64Sxth32:
diff --git a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
index 4abbd68c49..53a289fe6a 100644
--- a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
+++ b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
@@ -153,7 +153,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
- InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
@@ -499,6 +499,7 @@ void VisitAddSub(InstructionSelector* selector, Node* node, ArchOpcode opcode,
Arm64OperandGenerator g(selector);
Matcher m(node);
if (m.right().HasValue() && (m.right().Value() < 0) &&
+ (m.right().Value() > std::numeric_limits<int>::min()) &&
g.CanBeImmediate(-m.right().Value(), kArithmeticImm)) {
selector->Emit(negate_opcode, g.DefineAsRegister(node),
g.UseRegister(m.left().node()),
@@ -627,9 +628,24 @@ void InstructionSelector::VisitLoad(Node* node) {
#else
UNREACHABLE();
#endif
+#ifdef V8_COMPRESS_POINTERS
+ case MachineRepresentation::kTaggedSigned:
+ opcode = kArm64LdrDecompressTaggedSigned;
+ immediate_mode = kLoadStoreImm32;
+ break;
+ case MachineRepresentation::kTaggedPointer:
+ opcode = kArm64LdrDecompressTaggedPointer;
+ immediate_mode = kLoadStoreImm32;
+ break;
+ case MachineRepresentation::kTagged:
+ opcode = kArm64LdrDecompressAnyTagged;
+ immediate_mode = kLoadStoreImm32;
+ break;
+#else
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
case MachineRepresentation::kTagged: // Fall through.
+#endif
case MachineRepresentation::kWord64:
opcode = kArm64Ldr;
immediate_mode = kLoadStoreImm64;
@@ -723,7 +739,7 @@ void InstructionSelector::VisitStore(Node* node) {
case MachineRepresentation::kCompressedPointer: // Fall through.
case MachineRepresentation::kCompressed:
#ifdef V8_COMPRESS_POINTERS
- opcode = kArm64StrW;
+ opcode = kArm64StrCompressTagged;
immediate_mode = kLoadStoreImm32;
break;
#else
@@ -731,7 +747,11 @@ void InstructionSelector::VisitStore(Node* node) {
#endif
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
- case MachineRepresentation::kTagged: // Fall through.
+ case MachineRepresentation::kTagged:
+ opcode = kArm64StrCompressTagged;
+ immediate_mode =
+ COMPRESS_POINTERS_BOOL ? kLoadStoreImm32 : kLoadStoreImm64;
+ break;
case MachineRepresentation::kWord64:
opcode = kArm64Str;
immediate_mode = kLoadStoreImm64;
@@ -770,6 +790,10 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
UNIMPLEMENTED();
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
// Architecture supports unaligned access, therefore VisitLoad is used instead
void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
@@ -1048,7 +1072,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) {
if (mleft.right().HasValue() && mleft.right().Value() != 0) {
// Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
// shifted into the least-significant bits.
- uint32_t mask = (mleft.right().Value() >> lsb) << lsb;
+ uint32_t mask = static_cast<uint32_t>(mleft.right().Value() >> lsb)
+ << lsb;
unsigned mask_width = base::bits::CountPopulation(mask);
unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
if ((mask_msb + mask_width + lsb) == 32) {
@@ -1091,7 +1116,8 @@ void InstructionSelector::VisitWord64Shr(Node* node) {
if (mleft.right().HasValue() && mleft.right().Value() != 0) {
// Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
// shifted into the least-significant bits.
- uint64_t mask = (mleft.right().Value() >> lsb) << lsb;
+ uint64_t mask = static_cast<uint64_t>(mleft.right().Value() >> lsb)
+ << lsb;
unsigned mask_width = base::bits::CountPopulation(mask);
unsigned mask_msb = base::bits::CountLeadingZeros64(mask);
if ((mask_msb + mask_width + lsb) == 64) {
@@ -1240,7 +1266,8 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(Float32Max, kArm64Float32Max) \
V(Float64Max, kArm64Float64Max) \
V(Float32Min, kArm64Float32Min) \
- V(Float64Min, kArm64Float64Min)
+ V(Float64Min, kArm64Float64Min) \
+ V(S8x16Swizzle, kArm64S8x16Swizzle)
#define RR_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
@@ -1572,9 +1599,22 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
return;
}
EmitLoad(this, value, opcode, immediate_mode, rep, node);
- } else {
- VisitRR(this, kArm64Sxtw, node);
+ return;
+ }
+
+ if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) {
+ Int32BinopMatcher m(value);
+ if (m.right().HasValue()) {
+ Arm64OperandGenerator g(this);
+ // Mask the shift amount, to keep the same semantics as Word32Sar.
+ int right = m.right().Value() & 0x1F;
+ Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+ g.TempImmediate(right), g.TempImmediate(32 - right));
+ return;
+ }
}
+
+ VisitRR(this, kArm64Sxtw, node);
}
void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
@@ -1830,31 +1870,6 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
selector->EmitWithContinuation(opcode, left, right, cont);
}
-// Shared routine for multiple word compare operations.
-void VisitWordCompare(InstructionSelector* selector, Node* node,
- InstructionCode opcode, FlagsContinuation* cont,
- ImmediateMode immediate_mode) {
- Arm64OperandGenerator g(selector);
-
- Node* left = node->InputAt(0);
- Node* right = node->InputAt(1);
-
- // If one of the two inputs is an immediate, make sure it's on the right.
- if (!g.CanBeImmediate(right, immediate_mode) &&
- g.CanBeImmediate(left, immediate_mode)) {
- cont->Commute();
- std::swap(left, right);
- }
-
- if (g.CanBeImmediate(right, immediate_mode)) {
- VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
- cont);
- } else {
- VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
- cont);
- }
-}
-
// This function checks whether we can convert:
// ((a <op> b) cmp 0), b.<cond>
// to:
@@ -1986,9 +2001,35 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector,
selector->EmitWithContinuation(opcode, value, cont);
}
+template <int N>
+struct CbzOrTbzMatchTrait {};
+
+template <>
+struct CbzOrTbzMatchTrait<32> {
+ using IntegralType = uint32_t;
+ using BinopMatcher = Int32BinopMatcher;
+ static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And;
+ static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32;
+ static constexpr ArchOpcode kCompareAndBranchOpcode =
+ kArm64CompareAndBranch32;
+ static constexpr unsigned kSignBit = kWSignBit;
+};
+
+template <>
+struct CbzOrTbzMatchTrait<64> {
+ using IntegralType = uint64_t;
+ using BinopMatcher = Int64BinopMatcher;
+ static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And;
+ static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch;
+ static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch;
+ static constexpr unsigned kSignBit = kXSignBit;
+};
+
// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
// against {value}, depending on the condition.
-bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
+template <int N>
+bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node,
+ typename CbzOrTbzMatchTrait<N>::IntegralType value,
Node* user, FlagsCondition cond, FlagsContinuation* cont) {
// Branch poisoning requires flags to be set, so when it's enabled for
// a particular branch, we shouldn't be applying the cbz/tbz optimization.
@@ -2007,28 +2048,33 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
if (cont->IsDeoptimize()) return false;
Arm64OperandGenerator g(selector);
cont->Overwrite(MapForTbz(cond));
- Int32Matcher m(node);
- if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) {
- // SignedLessThan(Float64ExtractHighWord32(x), 0) and
- // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) essentially
- // check the sign bit of a 64-bit floating point value.
- InstructionOperand temp = g.TempRegister();
- selector->Emit(kArm64U64MoveFloat64, temp,
- g.UseRegister(node->InputAt(0)));
- selector->EmitWithContinuation(kArm64TestAndBranch, temp,
- g.TempImmediate(63), cont);
- return true;
+
+ if (N == 32) {
+ Int32Matcher m(node);
+ if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) {
+ // SignedLessThan(Float64ExtractHighWord32(x), 0) and
+ // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0)
+ // essentially check the sign bit of a 64-bit floating point value.
+ InstructionOperand temp = g.TempRegister();
+ selector->Emit(kArm64U64MoveFloat64, temp,
+ g.UseRegister(node->InputAt(0)));
+ selector->EmitWithContinuation(kArm64TestAndBranch, temp,
+ g.TempImmediate(kDSignBit), cont);
+ return true;
+ }
}
- selector->EmitWithContinuation(kArm64TestAndBranch32, g.UseRegister(node),
- g.TempImmediate(31), cont);
+
+ selector->EmitWithContinuation(
+ CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node),
+ g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont);
return true;
}
case kEqual:
case kNotEqual: {
- if (node->opcode() == IrOpcode::kWord32And) {
+ if (node->opcode() == CbzOrTbzMatchTrait<N>::kAndOpcode) {
// Emit a tbz/tbnz if we are comparing with a single-bit mask:
- // Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false)
- Int32BinopMatcher m_and(node);
+ // Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false)
+ typename CbzOrTbzMatchTrait<N>::BinopMatcher m_and(node);
if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
m_and.right().Is(value) && selector->CanCover(user, node)) {
Arm64OperandGenerator g(selector);
@@ -2036,7 +2082,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
// the opposite here so negate the condition.
cont->Negate();
selector->EmitWithContinuation(
- kArm64TestAndBranch32, g.UseRegister(m_and.left().node()),
+ CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode,
+ g.UseRegister(m_and.left().node()),
g.TempImmediate(base::bits::CountTrailingZeros(value)), cont);
return true;
}
@@ -2048,7 +2095,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
if (value != 0) return false;
Arm64OperandGenerator g(selector);
cont->Overwrite(MapForCbz(cond));
- EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32,
+ EmitBranchOrDeoptimize(selector,
+ CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode,
g.UseRegister(node), cont);
return true;
}
@@ -2057,20 +2105,50 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
}
}
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+ InstructionCode opcode, FlagsContinuation* cont,
+ ImmediateMode immediate_mode) {
+ Arm64OperandGenerator g(selector);
+
+ Node* left = node->InputAt(0);
+ Node* right = node->InputAt(1);
+
+ // If one of the two inputs is an immediate, make sure it's on the right.
+ if (!g.CanBeImmediate(right, immediate_mode) &&
+ g.CanBeImmediate(left, immediate_mode)) {
+ cont->Commute();
+ std::swap(left, right);
+ }
+
+ if (opcode == kArm64Cmp && !cont->IsPoisoned()) {
+ Int64Matcher m(right);
+ if (m.HasValue()) {
+ if (TryEmitCbzOrTbz<64>(selector, left, m.Value(), node,
+ cont->condition(), cont)) {
+ return;
+ }
+ }
+ }
+
+ VisitCompare(selector, opcode, g.UseRegister(left),
+ g.UseOperand(right, immediate_mode), cont);
+}
+
void VisitWord32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Int32BinopMatcher m(node);
FlagsCondition cond = cont->condition();
if (!cont->IsPoisoned()) {
if (m.right().HasValue()) {
- if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node,
- cond, cont)) {
+ if (TryEmitCbzOrTbz<32>(selector, m.left().node(), m.right().Value(),
+ node, cond, cont)) {
return;
}
} else if (m.left().HasValue()) {
FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
- if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node,
- commuted_cond, cont)) {
+ if (TryEmitCbzOrTbz<32>(selector, m.right().node(), m.left().Value(),
+ node, commuted_cond, cont)) {
return;
}
}
@@ -2378,13 +2456,6 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) {
return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm);
}
- // Merge the Word64Equal(x, 0) comparison into a cbz instruction.
- if ((cont->IsBranch() || cont->IsDeoptimize()) &&
- !cont->IsPoisoned()) {
- EmitBranchOrDeoptimize(this, kArm64CompareAndBranch,
- g.UseRegister(left), cont);
- return;
- }
}
return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
}
@@ -3054,10 +3125,12 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kArm64F64x2Abs) \
V(F64x2Neg, kArm64F64x2Neg) \
+ V(F64x2Sqrt, kArm64F64x2Sqrt) \
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
V(F32x4Abs, kArm64F32x4Abs) \
V(F32x4Neg, kArm64F32x4Neg) \
+ V(F32x4Sqrt, kArm64F32x4Sqrt) \
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \
@@ -3236,6 +3309,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
+void InstructionSelector::VisitI64x2Mul(Node* node) {
+ Arm64OperandGenerator g(this);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ Emit(kArm64I64x2Mul, g.DefineAsRegister(node),
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+ arraysize(temps), temps);
+}
+
void InstructionSelector::VisitS128Select(Node* node) {
Arm64OperandGenerator g(this);
Emit(kArm64S128Select, g.DefineSameAsFirst(node),
@@ -3243,6 +3324,19 @@ void InstructionSelector::VisitS128Select(Node* node) {
g.UseRegister(node->InputAt(2)));
}
+#define VISIT_SIMD_QFMOP(op) \
+ void InstructionSelector::Visit##op(Node* node) { \
+ Arm64OperandGenerator g(this); \
+ Emit(kArm64##op, g.DefineSameAsFirst(node), \
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
+ g.UseRegister(node->InputAt(2))); \
+ }
+VISIT_SIMD_QFMOP(F64x2Qfma)
+VISIT_SIMD_QFMOP(F64x2Qfms)
+VISIT_SIMD_QFMOP(F32x4Qfma)
+VISIT_SIMD_QFMOP(F32x4Qfms)
+#undef VISIT_SIMD_QFMOP
+
namespace {
struct ShuffleEntry {
diff --git a/deps/v8/src/compiler/backend/code-generator-impl.h b/deps/v8/src/compiler/backend/code-generator-impl.h
index 2bfb009980..530dc0a813 100644
--- a/deps/v8/src/compiler/backend/code-generator-impl.h
+++ b/deps/v8/src/compiler/backend/code-generator-impl.h
@@ -26,7 +26,7 @@ class InstructionOperandConverter {
// -- Instruction operand accesses with conversions --------------------------
- Register InputRegister(size_t index) {
+ Register InputRegister(size_t index) const {
return ToRegister(instr_->InputAt(index));
}
@@ -96,7 +96,7 @@ class InstructionOperandConverter {
return ToRpoNumber(instr_->InputAt(index));
}
- Register OutputRegister(size_t index = 0) {
+ Register OutputRegister(size_t index = 0) const {
return ToRegister(instr_->OutputAt(index));
}
@@ -130,7 +130,7 @@ class InstructionOperandConverter {
return ToConstant(op).ToRpoNumber();
}
- Register ToRegister(InstructionOperand* op) {
+ Register ToRegister(InstructionOperand* op) const {
return LocationOperand::cast(op)->GetRegister();
}
@@ -146,7 +146,7 @@ class InstructionOperandConverter {
return LocationOperand::cast(op)->GetSimd128Register();
}
- Constant ToConstant(InstructionOperand* op) {
+ Constant ToConstant(InstructionOperand* op) const {
if (op->IsImmediate()) {
return gen_->instructions()->GetImmediate(ImmediateOperand::cast(op));
}
diff --git a/deps/v8/src/compiler/backend/code-generator.cc b/deps/v8/src/compiler/backend/code-generator.cc
index e7702bcdf6..43eb4a1f15 100644
--- a/deps/v8/src/compiler/backend/code-generator.cc
+++ b/deps/v8/src/compiler/backend/code-generator.cc
@@ -4,7 +4,7 @@
#include "src/compiler/backend/code-generator.h"
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler-inl.h"
#include "src/codegen/optimized-compilation-info.h"
diff --git a/deps/v8/src/compiler/backend/code-generator.h b/deps/v8/src/compiler/backend/code-generator.h
index e9ebf67590..d56b1edae0 100644
--- a/deps/v8/src/compiler/backend/code-generator.h
+++ b/deps/v8/src/compiler/backend/code-generator.h
@@ -5,6 +5,8 @@
#ifndef V8_COMPILER_BACKEND_CODE_GENERATOR_H_
#define V8_COMPILER_BACKEND_CODE_GENERATOR_H_
+#include <memory>
+
#include "src/base/optional.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/safepoint-table.h"
diff --git a/deps/v8/src/compiler/backend/frame-elider.cc b/deps/v8/src/compiler/backend/frame-elider.cc
index 064501b097..293fc9352c 100644
--- a/deps/v8/src/compiler/backend/frame-elider.cc
+++ b/deps/v8/src/compiler/backend/frame-elider.cc
@@ -4,7 +4,7 @@
#include "src/compiler/backend/frame-elider.h"
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
namespace v8 {
namespace internal {
diff --git a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
index 4542da643b..068268a3da 100644
--- a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
+++ b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc
@@ -479,17 +479,18 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
__ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
}
-#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
- do { \
- Register dst = i.OutputRegister(); \
- Operand src = i.InputOperand(0); \
- Register tmp = i.TempRegister(0); \
- __ mov(tmp, Immediate(1)); \
- __ xor_(dst, dst); \
- __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \
- __ opcode(kScratchDoubleReg, src); \
- __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \
- __ cmov(zero, dst, tmp); \
+#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
+ do { \
+ Register dst = i.OutputRegister(); \
+ Operand src = i.InputOperand(0); \
+ Register tmp = i.TempRegister(0); \
+ XMMRegister tmp_simd = i.TempSimd128Register(1); \
+ __ mov(tmp, Immediate(1)); \
+ __ xor_(dst, dst); \
+ __ Pxor(tmp_simd, tmp_simd); \
+ __ opcode(tmp_simd, src); \
+ __ Ptest(tmp_simd, tmp_simd); \
+ __ cmov(zero, dst, tmp); \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() {
@@ -1266,16 +1267,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psrlq(kScratchDoubleReg, 33);
- __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psrlq(tmp, 33);
+ __ andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psllq(kScratchDoubleReg, 31);
- __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 31);
+ __ xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Round: {
@@ -1444,16 +1447,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kSSEFloat64Abs: {
// TODO(bmeurer): Use 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psrlq(kScratchDoubleReg, 1);
- __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psrlq(tmp, 1);
+ __ andpd(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Neg: {
// TODO(bmeurer): Use 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psllq(kScratchDoubleReg, 63);
- __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 63);
+ __ xorpd(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Sqrt:
@@ -1476,13 +1481,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ cvttss2si(i.OutputRegister(), i.InputOperand(0));
break;
case kSSEFloat32ToUint32:
- __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
+ __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
+ i.TempSimd128Register(0));
break;
case kSSEFloat64ToInt32:
__ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
break;
case kSSEFloat64ToUint32:
- __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
+ __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
+ i.TempSimd128Register(0));
break;
case kSSEInt32ToFloat32:
__ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
@@ -1577,34 +1584,38 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kAVXFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psrlq(kScratchDoubleReg, 33);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psrlq(tmp, 33);
CpuFeatureScope avx_scope(tasm(), AVX);
- __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
break;
}
case kAVXFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psllq(kScratchDoubleReg, 31);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 31);
CpuFeatureScope avx_scope(tasm(), AVX);
- __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
break;
}
case kAVXFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psrlq(kScratchDoubleReg, 1);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psrlq(tmp, 1);
CpuFeatureScope avx_scope(tasm(), AVX);
- __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
break;
}
case kAVXFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psllq(kScratchDoubleReg, 63);
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 63);
CpuFeatureScope avx_scope(tasm(), AVX);
- __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
+ __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
break;
}
case kSSEFloat64SilenceNaN:
@@ -1825,6 +1836,164 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
+ case kSSEF64x2Splat: {
+ DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ __ shufpd(dst, dst, 0x0);
+ break;
+ }
+ case kAVXF64x2Splat: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister src = i.InputDoubleRegister(0);
+ __ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
+ break;
+ }
+ case kSSEF64x2ExtractLane: {
+ DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+ XMMRegister dst = i.OutputDoubleRegister();
+ int8_t lane = i.InputInt8(1);
+ if (lane != 0) {
+ DCHECK_LT(lane, 4);
+ __ shufpd(dst, dst, lane);
+ }
+ break;
+ }
+ case kAVXF64x2ExtractLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputDoubleRegister();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t lane = i.InputInt8(1);
+ if (lane == 0) {
+ if (dst != src) __ vmovapd(dst, src);
+ } else {
+ DCHECK_LT(lane, 4);
+ __ vshufpd(dst, src, src, lane);
+ }
+ break;
+ }
+ case kSSEF64x2ReplaceLane: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ CpuFeatureScope sse_scope(tasm(), SSE4_1);
+ XMMRegister dst = i.OutputSimd128Register();
+ int8_t lane = i.InputInt8(1);
+ DoubleRegister rep = i.InputDoubleRegister(2);
+
+ // insertps takes a mask which contains (high to low):
+ // - 2 bit specifying source float element to copy
+ // - 2 bit specifying destination float element to write to
+ // - 4 bits specifying which elements of the destination to zero
+ DCHECK_LT(lane, 2);
+ if (lane == 0) {
+ __ insertps(dst, rep, 0b00000000);
+ __ insertps(dst, rep, 0b01010000);
+ } else {
+ __ insertps(dst, rep, 0b00100000);
+ __ insertps(dst, rep, 0b01110000);
+ }
+ break;
+ }
+ case kAVXF64x2ReplaceLane: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister src = i.InputSimd128Register(0);
+ int8_t lane = i.InputInt8(1);
+ DoubleRegister rep = i.InputDoubleRegister(2);
+
+ DCHECK_LT(lane, 2);
+ if (lane == 0) {
+ __ vinsertps(dst, src, rep, 0b00000000);
+ __ vinsertps(dst, src, rep, 0b01010000);
+ } else {
+ __ vinsertps(dst, src, rep, 0b10100000);
+ __ vinsertps(dst, src, rep, 0b11110000);
+ }
+ break;
+ }
+ case kIA32F64x2Sqrt: {
+ __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
+ break;
+ }
+ case kIA32F64x2Add: {
+ __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Sub: {
+ __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Mul: {
+ __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Div: {
+ __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Min: {
+ Operand src1 = i.InputOperand(1);
+ XMMRegister dst = i.OutputSimd128Register(),
+ src = i.InputSimd128Register(0),
+ tmp = i.TempSimd128Register(0);
+ // The minpd instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform minpd in both orders, merge the resuls, and adjust.
+ __ Movapd(tmp, src1);
+ __ Minpd(tmp, tmp, src);
+ __ Minpd(dst, src, src1);
+ // propagate -0's and NaNs, which may be non-canonical.
+ __ Orpd(tmp, dst);
+ // Canonicalize NaNs by quieting and clearing the payload.
+ __ Cmpunordpd(dst, dst, tmp);
+ __ Orpd(tmp, dst);
+ __ Psrlq(dst, 13);
+ __ Andnpd(dst, tmp);
+ break;
+ }
+ case kIA32F64x2Max: {
+ Operand src1 = i.InputOperand(1);
+ XMMRegister dst = i.OutputSimd128Register(),
+ src = i.InputSimd128Register(0),
+ tmp = i.TempSimd128Register(0);
+ // The maxpd instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform maxpd in both orders, merge the resuls, and adjust.
+ __ Movapd(tmp, src1);
+ __ Maxpd(tmp, tmp, src);
+ __ Maxpd(dst, src, src1);
+ // Find discrepancies.
+ __ Xorpd(dst, tmp);
+ // Propagate NaNs, which may be non-canonical.
+ __ Orpd(tmp, dst);
+ // Propagate sign discrepancy and (subtle) quiet NaNs.
+ __ Subpd(tmp, tmp, dst);
+ // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+ __ Cmpunordpd(dst, dst, tmp);
+ __ Psrlq(dst, 13);
+ __ Andnpd(dst, tmp);
+ break;
+ }
+ case kIA32F64x2Eq: {
+ __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Ne: {
+ __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Lt: {
+ __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
+ case kIA32F64x2Le: {
+ __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputOperand(1));
+ break;
+ }
case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register();
@@ -1951,6 +2120,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(0));
break;
}
+ case kSSEF32x4Sqrt: {
+ __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kAVXF32x4Sqrt: {
+ CpuFeatureScope avx_scope(tasm(), AVX);
+ __ vsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kIA32F32x4RecipApprox: {
__ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
break;
@@ -2212,28 +2390,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI32x4Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ pslld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ psrad(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
@@ -2430,14 +2620,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI32x4ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ psrld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, 31);
+ __ movd(tmp, shift);
__ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
@@ -2514,7 +2710,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32I16x8ExtractLane: {
Register dst = i.OutputRegister();
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsx_w(dst, dst);
break;
}
case kSSEI16x8ReplaceLane: {
@@ -2553,28 +2748,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI16x8Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI16x8ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
@@ -2745,14 +2952,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI16x8ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
- __ movd(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, 15);
+ __ movd(tmp, shift);
__ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
@@ -2875,7 +3088,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32I8x16ExtractLane: {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsx_b(dst, dst);
break;
}
case kSSEI8x16ReplaceLane: {
@@ -2919,6 +3131,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register shift = i.InputRegister(1);
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
+ // Take shift value modulo 8.
+ __ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
@@ -2938,6 +3152,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register shift = i.InputRegister(1);
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
+ // Take shift value modulo 8.
+ __ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
@@ -2959,6 +3175,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpckhbw(kScratchDoubleReg, dst);
__ punpcklbw(dst, dst);
__ mov(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ and_(tmp, 7);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ psraw(kScratchDoubleReg, tmp_simd);
@@ -3223,6 +3441,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpckhbw(kScratchDoubleReg, dst);
__ punpcklbw(dst, dst);
__ mov(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ and_(tmp, 7);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
@@ -3365,6 +3585,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vxorps(dst, kScratchDoubleReg, i.InputSimd128Register(2));
break;
}
+ case kIA32S8x16Swizzle: {
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister mask = i.TempSimd128Register(0);
+
+ // Out-of-range indices should return 0, add 112 so that any value > 15
+ // saturates to 128 (top bit set), so pshufb will zero that lane.
+ __ Move(mask, (uint32_t)0x70707070);
+ __ Pshufd(mask, mask, 0x0);
+ __ Paddusb(mask, i.InputSimd128Register(1));
+ __ Pshufb(dst, mask);
+ break;
+ }
case kIA32S8x16Shuffle: {
XMMRegister dst = i.OutputSimd128Register();
Operand src0 = i.InputOperand(0);
diff --git a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
index 7530c716b8..a77fb8cd37 100644
--- a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
+++ b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
@@ -116,6 +116,23 @@ namespace compiler {
V(IA32PushSimd128) \
V(IA32Poke) \
V(IA32Peek) \
+ V(SSEF64x2Splat) \
+ V(AVXF64x2Splat) \
+ V(SSEF64x2ExtractLane) \
+ V(AVXF64x2ExtractLane) \
+ V(SSEF64x2ReplaceLane) \
+ V(AVXF64x2ReplaceLane) \
+ V(IA32F64x2Sqrt) \
+ V(IA32F64x2Add) \
+ V(IA32F64x2Sub) \
+ V(IA32F64x2Mul) \
+ V(IA32F64x2Div) \
+ V(IA32F64x2Min) \
+ V(IA32F64x2Max) \
+ V(IA32F64x2Eq) \
+ V(IA32F64x2Ne) \
+ V(IA32F64x2Lt) \
+ V(IA32F64x2Le) \
V(SSEF32x4Splat) \
V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \
@@ -129,6 +146,8 @@ namespace compiler {
V(AVXF32x4Abs) \
V(SSEF32x4Neg) \
V(AVXF32x4Neg) \
+ V(SSEF32x4Sqrt) \
+ V(AVXF32x4Sqrt) \
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
V(SSEF32x4Add) \
@@ -313,6 +332,7 @@ namespace compiler {
V(AVXS128Xor) \
V(SSES128Select) \
V(AVXS128Select) \
+ V(IA32S8x16Swizzle) \
V(IA32S8x16Shuffle) \
V(IA32S32x4Swizzle) \
V(IA32S32x4Shuffle) \
diff --git a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
index c2097a6691..287eb49a48 100644
--- a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
+++ b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
@@ -97,6 +97,23 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
+ case kSSEF64x2Splat:
+ case kAVXF64x2Splat:
+ case kSSEF64x2ExtractLane:
+ case kAVXF64x2ExtractLane:
+ case kSSEF64x2ReplaceLane:
+ case kAVXF64x2ReplaceLane:
+ case kIA32F64x2Sqrt:
+ case kIA32F64x2Add:
+ case kIA32F64x2Sub:
+ case kIA32F64x2Mul:
+ case kIA32F64x2Div:
+ case kIA32F64x2Min:
+ case kIA32F64x2Max:
+ case kIA32F64x2Eq:
+ case kIA32F64x2Ne:
+ case kIA32F64x2Lt:
+ case kIA32F64x2Le:
case kSSEF32x4Splat:
case kAVXF32x4Splat:
case kSSEF32x4ExtractLane:
@@ -110,6 +127,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Abs:
case kSSEF32x4Neg:
case kAVXF32x4Neg:
+ case kSSEF32x4Sqrt:
+ case kAVXF32x4Sqrt:
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
case kSSEF32x4Add:
@@ -294,6 +313,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS128Xor:
case kSSES128Select:
case kAVXS128Select:
+ case kIA32S8x16Swizzle:
case kIA32S8x16Shuffle:
case kIA32S32x4Swizzle:
case kIA32S32x4Shuffle:
diff --git a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
index ebef39a93a..a24727aba2 100644
--- a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
+++ b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@@ -200,12 +200,27 @@ namespace {
void VisitRO(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
IA32OperandGenerator g(selector);
- InstructionOperand temps[] = {g.TempRegister()};
Node* input = node->InputAt(0);
// We have to use a byte register as input to movsxb.
InstructionOperand input_op =
opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input);
- selector->Emit(opcode, g.DefineAsRegister(node), input_op, arraysize(temps),
+ selector->Emit(opcode, g.DefineAsRegister(node), input_op);
+}
+
+void VisitROWithTemp(InstructionSelector* selector, Node* node,
+ ArchOpcode opcode) {
+ IA32OperandGenerator g(selector);
+ InstructionOperand temps[] = {g.TempRegister()};
+ selector->Emit(opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
+ arraysize(temps), temps);
+}
+
+void VisitROWithTempSimd(InstructionSelector* selector, Node* node,
+ ArchOpcode opcode) {
+ IA32OperandGenerator g(selector);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)), arraysize(temps),
temps);
}
@@ -231,10 +246,13 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
if (selector->IsSupported(AVX)) {
- selector->Emit(avx_opcode, g.DefineAsRegister(node), g.Use(input));
+ selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
+ arraysize(temps), temps);
} else {
- selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
+ selector->Emit(sse_opcode, g.DefineSameAsFirst(node),
+ g.UseUniqueRegister(input), arraysize(temps), temps);
}
}
@@ -804,12 +822,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(ChangeFloat32ToFloat64, kSSEFloat32ToFloat64) \
V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
- V(ChangeUint32ToFloat64, kSSEUint32ToFloat64) \
V(TruncateFloat32ToInt32, kSSEFloat32ToInt32) \
- V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \
V(ChangeFloat64ToInt32, kSSEFloat64ToInt32) \
- V(ChangeFloat64ToUint32, kSSEFloat64ToUint32) \
- V(TruncateFloat64ToUint32, kSSEFloat64ToUint32) \
V(TruncateFloat64ToFloat32, kSSEFloat64ToFloat32) \
V(RoundFloat64ToInt32, kSSEFloat64ToInt32) \
V(BitcastFloat32ToInt32, kIA32BitcastFI) \
@@ -819,7 +833,15 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32) \
V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \
V(SignExtendWord8ToInt32, kIA32Movsxbl) \
- V(SignExtendWord16ToInt32, kIA32Movsxwl)
+ V(SignExtendWord16ToInt32, kIA32Movsxwl) \
+ V(F64x2Sqrt, kIA32F64x2Sqrt)
+
+#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64)
+
+#define RO_WITH_TEMP_SIMD_OP_LIST(V) \
+ V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \
+ V(ChangeFloat64ToUint32, kSSEFloat64ToUint32) \
+ V(TruncateFloat64ToUint32, kSSEFloat64ToUint32)
#define RR_OP_LIST(V) \
V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
@@ -841,13 +863,23 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
- V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
+ V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
+ V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \
+ V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \
+ V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \
+ V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) \
+ V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq) \
+ V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne) \
+ V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt) \
+ V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
V(Float64Abs, kAVXFloat64Abs, kSSEFloat64Abs) \
V(Float32Neg, kAVXFloat32Neg, kSSEFloat32Neg) \
- V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg)
+ V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) \
+ V(F64x2Abs, kAVXFloat64Abs, kSSEFloat64Abs) \
+ V(F64x2Neg, kAVXFloat64Neg, kSSEFloat64Neg)
#define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
@@ -857,6 +889,22 @@ RO_OP_LIST(RO_VISITOR)
#undef RO_VISITOR
#undef RO_OP_LIST
+#define RO_WITH_TEMP_VISITOR(Name, opcode) \
+ void InstructionSelector::Visit##Name(Node* node) { \
+ VisitROWithTemp(this, node, opcode); \
+ }
+RO_WITH_TEMP_OP_LIST(RO_WITH_TEMP_VISITOR)
+#undef RO_WITH_TEMP_VISITOR
+#undef RO_WITH_TEMP_OP_LIST
+
+#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode) \
+ void InstructionSelector::Visit##Name(Node* node) { \
+ VisitROWithTempSimd(this, node, opcode); \
+ }
+RO_WITH_TEMP_SIMD_OP_LIST(RO_WITH_TEMP_SIMD_VISITOR)
+#undef RO_WITH_TEMP_SIMD_VISITOR
+#undef RO_WITH_TEMP_SIMD_OP_LIST
+
#define RR_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRR(this, node, opcode); \
@@ -890,6 +938,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitInt32Add(Node* node) {
IA32OperandGenerator g(this);
@@ -1971,6 +2023,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
+ V(F32x4Sqrt) \
V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \
@@ -1995,6 +2048,43 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16ShrS) \
V(I8x16ShrU)
+void InstructionSelector::VisitF64x2Min(Node* node) {
+ IA32OperandGenerator g(this);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+ InstructionOperand operand1 = g.UseUnique(node->InputAt(1));
+
+ if (IsSupported(AVX)) {
+ Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1,
+ arraysize(temps), temps);
+ } else {
+ Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1,
+ arraysize(temps), temps);
+ }
+}
+
+void InstructionSelector::VisitF64x2Max(Node* node) {
+ IA32OperandGenerator g(this);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+ InstructionOperand operand1 = g.UseUnique(node->InputAt(1));
+ if (IsSupported(AVX)) {
+ Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1,
+ arraysize(temps), temps);
+ } else {
+ Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1,
+ arraysize(temps), temps);
+ }
+}
+
+void InstructionSelector::VisitF64x2Splat(Node* node) {
+ VisitRRSimd(this, node, kAVXF64x2Splat, kSSEF64x2Splat);
+}
+
+void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
+ VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane);
+}
+
void InstructionSelector::VisitF32x4Splat(Node* node) {
VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat);
}
@@ -2086,6 +2176,28 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
#undef VISIT_SIMD_REPLACE_LANE
#undef SIMD_INT_TYPES
+// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces
+// operand2 to be UseRegister, because the codegen relies on insertps using
+// registers.
+// TODO(v8:9764) Remove this UseRegister requirement
+#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \
+ void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+ IA32OperandGenerator g(this); \
+ InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
+ InstructionOperand operand1 = \
+ g.UseImmediate(OpParameter<int32_t>(node->op())); \
+ InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \
+ if (IsSupported(AVX)) { \
+ Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
+ operand1, operand2); \
+ } else { \
+ Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
+ operand1, operand2); \
+ } \
+ }
+VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
+#undef VISIT_SIMD_REPLACE_LANE_USE_REG
+
#define VISIT_SIMD_SHIFT(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \
@@ -2132,12 +2244,12 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
#undef VISIT_SIMD_ANYTRUE
#undef SIMD_ANYTRUE_LIST
-#define VISIT_SIMD_ALLTRUE(Opcode) \
- void InstructionSelector::Visit##Opcode(Node* node) { \
- IA32OperandGenerator g(this); \
- InstructionOperand temps[] = {g.TempRegister()}; \
- Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), \
- arraysize(temps), temps); \
+#define VISIT_SIMD_ALLTRUE(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ IA32OperandGenerator g(this); \
+ InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
+ Emit(kIA32##Opcode, g.DefineAsRegister(node), \
+ g.UseUnique(node->InputAt(0)), arraysize(temps), temps); \
}
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE
@@ -2489,6 +2601,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
}
+void InstructionSelector::VisitS8x16Swizzle(Node* node) {
+ IA32OperandGenerator g(this);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ Emit(kIA32S8x16Swizzle, g.DefineSameAsFirst(node),
+ g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+ arraysize(temps), temps);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
diff --git a/deps/v8/src/compiler/backend/instruction-scheduler.cc b/deps/v8/src/compiler/backend/instruction-scheduler.cc
index dc66813740..d4920cd575 100644
--- a/deps/v8/src/compiler/backend/instruction-scheduler.cc
+++ b/deps/v8/src/compiler/backend/instruction-scheduler.cc
@@ -4,7 +4,7 @@
#include "src/compiler/backend/instruction-scheduler.h"
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/base/utils/random-number-generator.h"
#include "src/execution/isolate.h"
diff --git a/deps/v8/src/compiler/backend/instruction-selector-impl.h b/deps/v8/src/compiler/backend/instruction-selector-impl.h
index a3f62e7ba4..13ea049eba 100644
--- a/deps/v8/src/compiler/backend/instruction-selector-impl.h
+++ b/deps/v8/src/compiler/backend/instruction-selector-impl.h
@@ -29,8 +29,8 @@ inline bool operator<(const CaseInfo& l, const CaseInfo& r) {
// Helper struct containing data about a table or lookup switch.
class SwitchInfo {
public:
- SwitchInfo(ZoneVector<CaseInfo>& cases, // NOLINT(runtime/references)
- int32_t min_value, int32_t max_value, BasicBlock* default_branch)
+ SwitchInfo(ZoneVector<CaseInfo> const& cases, int32_t min_value,
+ int32_t max_value, BasicBlock* default_branch)
: cases_(cases),
min_value_(min_value),
max_value_(max_value),
@@ -193,17 +193,6 @@ class OperandGenerator {
reg.code(), GetVReg(node)));
}
- InstructionOperand UseExplicit(LinkageLocation location) {
- MachineRepresentation rep = InstructionSequence::DefaultRepresentation();
- if (location.IsRegister()) {
- return ExplicitOperand(LocationOperand::REGISTER, rep,
- location.AsRegister());
- } else {
- return ExplicitOperand(LocationOperand::STACK_SLOT, rep,
- location.GetLocation());
- }
- }
-
InstructionOperand UseImmediate(int immediate) {
return sequence()->AddImmediate(Constant(immediate));
}
@@ -275,6 +264,16 @@ class OperandGenerator {
InstructionOperand::kInvalidVirtualRegister);
}
+ template <typename FPRegType>
+ InstructionOperand TempFpRegister(FPRegType reg) {
+ UnallocatedOperand op =
+ UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER, reg.code(),
+ sequence()->NextVirtualRegister());
+ sequence()->MarkAsRepresentation(MachineRepresentation::kSimd128,
+ op.virtual_register());
+ return op;
+ }
+
InstructionOperand TempImmediate(int32_t imm) {
return sequence()->AddImmediate(Constant(imm));
}
diff --git a/deps/v8/src/compiler/backend/instruction-selector.cc b/deps/v8/src/compiler/backend/instruction-selector.cc
index 43193ec2b1..22d81c0c55 100644
--- a/deps/v8/src/compiler/backend/instruction-selector.cc
+++ b/deps/v8/src/compiler/backend/instruction-selector.cc
@@ -6,7 +6,7 @@
#include <limits>
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/tick-counter.h"
#include "src/compiler/backend/instruction-selector-impl.h"
@@ -1439,6 +1439,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64ReverseBits(node);
case IrOpcode::kWord64ReverseBytes:
return MarkAsWord64(node), VisitWord64ReverseBytes(node);
+ case IrOpcode::kSimd128ReverseBytes:
+ return MarkAsSimd128(node), VisitSimd128ReverseBytes(node);
case IrOpcode::kInt64AbsWithOverflow:
return MarkAsWord64(node), VisitInt64AbsWithOverflow(node);
case IrOpcode::kWord64Equal:
@@ -1502,7 +1504,7 @@ void InstructionSelector::VisitNode(Node* node) {
case IrOpcode::kUint64Mod:
return MarkAsWord64(node), VisitUint64Mod(node);
case IrOpcode::kBitcastTaggedToWord:
- case IrOpcode::kBitcastTaggedSignedToWord:
+ case IrOpcode::kBitcastTaggedToWordForTagAndSmiBits:
return MarkAsRepresentation(MachineType::PointerRepresentation(), node),
VisitBitcastTaggedToWord(node);
case IrOpcode::kBitcastWordToTagged:
@@ -1857,6 +1859,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Abs(node);
case IrOpcode::kF64x2Neg:
return MarkAsSimd128(node), VisitF64x2Neg(node);
+ case IrOpcode::kF64x2Sqrt:
+ return MarkAsSimd128(node), VisitF64x2Sqrt(node);
case IrOpcode::kF64x2Add:
return MarkAsSimd128(node), VisitF64x2Add(node);
case IrOpcode::kF64x2Sub:
@@ -1877,6 +1881,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Lt(node);
case IrOpcode::kF64x2Le:
return MarkAsSimd128(node), VisitF64x2Le(node);
+ case IrOpcode::kF64x2Qfma:
+ return MarkAsSimd128(node), VisitF64x2Qfma(node);
+ case IrOpcode::kF64x2Qfms:
+ return MarkAsSimd128(node), VisitF64x2Qfms(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
@@ -1891,6 +1899,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Abs(node);
case IrOpcode::kF32x4Neg:
return MarkAsSimd128(node), VisitF32x4Neg(node);
+ case IrOpcode::kF32x4Sqrt:
+ return MarkAsSimd128(node), VisitF32x4Sqrt(node);
case IrOpcode::kF32x4RecipApprox:
return MarkAsSimd128(node), VisitF32x4RecipApprox(node);
case IrOpcode::kF32x4RecipSqrtApprox:
@@ -1917,6 +1927,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Lt(node);
case IrOpcode::kF32x4Le:
return MarkAsSimd128(node), VisitF32x4Le(node);
+ case IrOpcode::kF32x4Qfma:
+ return MarkAsSimd128(node), VisitF32x4Qfma(node);
+ case IrOpcode::kF32x4Qfms:
+ return MarkAsSimd128(node), VisitF32x4Qfms(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2ExtractLane:
@@ -2137,6 +2151,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS128Not(node);
case IrOpcode::kS128Select:
return MarkAsSimd128(node), VisitS128Select(node);
+ case IrOpcode::kS8x16Swizzle:
+ return MarkAsSimd128(node), VisitS8x16Swizzle(node);
case IrOpcode::kS8x16Shuffle:
return MarkAsSimd128(node), VisitS8x16Shuffle(node);
case IrOpcode::kS1x2AnyTrue:
@@ -2286,8 +2302,8 @@ void InstructionSelector::VisitFloat64Tanh(Node* node) {
VisitFloat64Ieee754Unop(node, kIeee754Float64Tanh);
}
-void InstructionSelector::EmitTableSwitch(const SwitchInfo& sw,
- InstructionOperand& index_operand) {
+void InstructionSelector::EmitTableSwitch(
+ const SwitchInfo& sw, InstructionOperand const& index_operand) {
OperandGenerator g(this);
size_t input_count = 2 + sw.value_range();
DCHECK_LE(sw.value_range(), std::numeric_limits<size_t>::max() - 2);
@@ -2304,8 +2320,8 @@ void InstructionSelector::EmitTableSwitch(const SwitchInfo& sw,
Emit(kArchTableSwitch, 0, nullptr, input_count, inputs, 0, nullptr);
}
-void InstructionSelector::EmitLookupSwitch(const SwitchInfo& sw,
- InstructionOperand& value_operand) {
+void InstructionSelector::EmitLookupSwitch(
+ const SwitchInfo& sw, InstructionOperand const& value_operand) {
OperandGenerator g(this);
std::vector<CaseInfo> cases = sw.CasesSortedByOriginalOrder();
size_t input_count = 2 + sw.case_count() * 2;
@@ -2322,7 +2338,7 @@ void InstructionSelector::EmitLookupSwitch(const SwitchInfo& sw,
}
void InstructionSelector::EmitBinarySearchSwitch(
- const SwitchInfo& sw, InstructionOperand& value_operand) {
+ const SwitchInfo& sw, InstructionOperand const& value_operand) {
OperandGenerator g(this);
size_t input_count = 2 + sw.case_count() * 2;
DCHECK_LE(sw.case_count(), (std::numeric_limits<size_t>::max() - 2) / 2);
@@ -2607,21 +2623,25 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#if !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM64
+#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
+#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
@@ -2630,6 +2650,7 @@ void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
@@ -2639,8 +2660,11 @@ void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64
-void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
diff --git a/deps/v8/src/compiler/backend/instruction-selector.h b/deps/v8/src/compiler/backend/instruction-selector.h
index eb3e098427..e951c90f95 100644
--- a/deps/v8/src/compiler/backend/instruction-selector.h
+++ b/deps/v8/src/compiler/backend/instruction-selector.h
@@ -502,15 +502,12 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
FeedbackSource const& feedback,
Node* frame_state);
- void EmitTableSwitch(
- const SwitchInfo& sw,
- InstructionOperand& index_operand); // NOLINT(runtime/references)
- void EmitLookupSwitch(
- const SwitchInfo& sw,
- InstructionOperand& value_operand); // NOLINT(runtime/references)
- void EmitBinarySearchSwitch(
- const SwitchInfo& sw,
- InstructionOperand& value_operand); // NOLINT(runtime/references)
+ void EmitTableSwitch(const SwitchInfo& sw,
+ InstructionOperand const& index_operand);
+ void EmitLookupSwitch(const SwitchInfo& sw,
+ InstructionOperand const& value_operand);
+ void EmitBinarySearchSwitch(const SwitchInfo& sw,
+ InstructionOperand const& value_operand);
void TryRename(InstructionOperand* op);
int GetRename(int virtual_register);
diff --git a/deps/v8/src/compiler/backend/instruction.cc b/deps/v8/src/compiler/backend/instruction.cc
index 06158b0c72..076f1b596e 100644
--- a/deps/v8/src/compiler/backend/instruction.cc
+++ b/deps/v8/src/compiler/backend/instruction.cc
@@ -168,7 +168,6 @@ std::ostream& operator<<(std::ostream& os, const InstructionOperand& op) {
return os << "[immediate:" << imm.indexed_value() << "]";
}
}
- case InstructionOperand::EXPLICIT:
case InstructionOperand::ALLOCATED: {
LocationOperand allocated = LocationOperand::cast(op);
if (op.IsStackSlot()) {
@@ -192,9 +191,6 @@ std::ostream& operator<<(std::ostream& os, const InstructionOperand& op) {
os << "[" << Simd128Register::from_code(allocated.register_code())
<< "|R";
}
- if (allocated.IsExplicit()) {
- os << "|E";
- }
switch (allocated.representation()) {
case MachineRepresentation::kNone:
os << "|-";
@@ -294,17 +290,6 @@ void ParallelMove::PrepareInsertAfter(
if (replacement != nullptr) move->set_source(replacement->source());
}
-ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep,
- int index)
- : LocationOperand(EXPLICIT, kind, rep, index) {
- DCHECK_IMPLIES(kind == REGISTER && !IsFloatingPoint(rep),
- GetRegConfig()->IsAllocatableGeneralCode(index));
- DCHECK_IMPLIES(kind == REGISTER && rep == MachineRepresentation::kFloat32,
- GetRegConfig()->IsAllocatableFloatCode(index));
- DCHECK_IMPLIES(kind == REGISTER && (rep == MachineRepresentation::kFloat64),
- GetRegConfig()->IsAllocatableDoubleCode(index));
-}
-
Instruction::Instruction(InstructionCode opcode)
: opcode_(opcode),
bit_field_(OutputCountField::encode(0) | InputCountField::encode(0) |
diff --git a/deps/v8/src/compiler/backend/instruction.h b/deps/v8/src/compiler/backend/instruction.h
index f5f7f64c51..321f069531 100644
--- a/deps/v8/src/compiler/backend/instruction.h
+++ b/deps/v8/src/compiler/backend/instruction.h
@@ -43,9 +43,8 @@ class V8_EXPORT_PRIVATE InstructionOperand {
CONSTANT,
IMMEDIATE,
// Location operand kinds.
- EXPLICIT,
ALLOCATED,
- FIRST_LOCATION_OPERAND_KIND = EXPLICIT
+ FIRST_LOCATION_OPERAND_KIND = ALLOCATED
// Location operand kinds must be last.
};
@@ -68,11 +67,6 @@ class V8_EXPORT_PRIVATE InstructionOperand {
// embedded directly in instructions, e.g. small integers and on some
// platforms Objects.
INSTRUCTION_OPERAND_PREDICATE(Immediate, IMMEDIATE)
- // ExplicitOperands do not participate in register allocation. They are
- // created by the instruction selector for direct access to registers and
- // stack slots, completely bypassing the register allocator. They are never
- // associated with a virtual register
- INSTRUCTION_OPERAND_PREDICATE(Explicit, EXPLICIT)
// AllocatedOperands are registers or stack slots that are assigned by the
// register allocator and are always associated with a virtual register.
INSTRUCTION_OPERAND_PREDICATE(Allocated, ALLOCATED)
@@ -515,19 +509,6 @@ class LocationOperand : public InstructionOperand {
using IndexField = BitField64<int32_t, 35, 29>;
};
-class V8_EXPORT_PRIVATE ExplicitOperand
- : public NON_EXPORTED_BASE(LocationOperand) {
- public:
- ExplicitOperand(LocationKind kind, MachineRepresentation rep, int index);
-
- static ExplicitOperand* New(Zone* zone, LocationKind kind,
- MachineRepresentation rep, int index) {
- return InstructionOperand::New(zone, ExplicitOperand(kind, rep, index));
- }
-
- INSTRUCTION_OPERAND_CASTS(ExplicitOperand, EXPLICIT)
-};
-
class AllocatedOperand : public LocationOperand {
public:
AllocatedOperand(LocationKind kind, MachineRepresentation rep, int index)
@@ -643,7 +624,7 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const {
}
return InstructionOperand::KindField::update(
LocationOperand::RepresentationField::update(this->value_, canonical),
- LocationOperand::EXPLICIT);
+ LocationOperand::ALLOCATED);
}
return this->value_;
}
@@ -776,11 +757,11 @@ class V8_EXPORT_PRIVATE Instruction final {
public:
size_t OutputCount() const { return OutputCountField::decode(bit_field_); }
const InstructionOperand* OutputAt(size_t i) const {
- DCHECK(i < OutputCount());
+ DCHECK_LT(i, OutputCount());
return &operands_[i];
}
InstructionOperand* OutputAt(size_t i) {
- DCHECK(i < OutputCount());
+ DCHECK_LT(i, OutputCount());
return &operands_[i];
}
@@ -790,21 +771,21 @@ class V8_EXPORT_PRIVATE Instruction final {
size_t InputCount() const { return InputCountField::decode(bit_field_); }
const InstructionOperand* InputAt(size_t i) const {
- DCHECK(i < InputCount());
+ DCHECK_LT(i, InputCount());
return &operands_[OutputCount() + i];
}
InstructionOperand* InputAt(size_t i) {
- DCHECK(i < InputCount());
+ DCHECK_LT(i, InputCount());
return &operands_[OutputCount() + i];
}
size_t TempCount() const { return TempCountField::decode(bit_field_); }
const InstructionOperand* TempAt(size_t i) const {
- DCHECK(i < TempCount());
+ DCHECK_LT(i, TempCount());
return &operands_[OutputCount() + InputCount() + i];
}
InstructionOperand* TempAt(size_t i) {
- DCHECK(i < TempCount());
+ DCHECK_LT(i, TempCount());
return &operands_[OutputCount() + InputCount() + i];
}
diff --git a/deps/v8/src/compiler/backend/jump-threading.cc b/deps/v8/src/compiler/backend/jump-threading.cc
index dfb917a58c..ee195bf51e 100644
--- a/deps/v8/src/compiler/backend/jump-threading.cc
+++ b/deps/v8/src/compiler/backend/jump-threading.cc
@@ -69,11 +69,11 @@ bool IsBlockWithBranchPoisoning(InstructionSequence* code,
} // namespace
bool JumpThreading::ComputeForwarding(Zone* local_zone,
- ZoneVector<RpoNumber>& result,
+ ZoneVector<RpoNumber>* result,
InstructionSequence* code,
bool frame_at_start) {
ZoneStack<RpoNumber> stack(local_zone);
- JumpThreadingState state = {false, result, stack};
+ JumpThreadingState state = {false, *result, stack};
state.Clear(code->InstructionBlockCount());
// Iterate over the blocks forward, pushing the blocks onto the stack.
@@ -135,15 +135,15 @@ bool JumpThreading::ComputeForwarding(Zone* local_zone,
}
#ifdef DEBUG
- for (RpoNumber num : result) {
+ for (RpoNumber num : *result) {
DCHECK(num.IsValid());
}
#endif
if (FLAG_trace_turbo_jt) {
- for (int i = 0; i < static_cast<int>(result.size()); i++) {
+ for (int i = 0; i < static_cast<int>(result->size()); i++) {
TRACE("B%d ", i);
- int to = result[i].ToInt();
+ int to = (*result)[i].ToInt();
if (i != to) {
TRACE("-> B%d\n", to);
} else {
@@ -156,7 +156,7 @@ bool JumpThreading::ComputeForwarding(Zone* local_zone,
}
void JumpThreading::ApplyForwarding(Zone* local_zone,
- ZoneVector<RpoNumber>& result,
+ ZoneVector<RpoNumber> const& result,
InstructionSequence* code) {
if (!FLAG_turbo_jt) return;
diff --git a/deps/v8/src/compiler/backend/jump-threading.h b/deps/v8/src/compiler/backend/jump-threading.h
index ce60ebcb2e..ce9e394924 100644
--- a/deps/v8/src/compiler/backend/jump-threading.h
+++ b/deps/v8/src/compiler/backend/jump-threading.h
@@ -17,17 +17,14 @@ class V8_EXPORT_PRIVATE JumpThreading {
public:
// Compute the forwarding map of basic blocks to their ultimate destination.
// Returns {true} if there is at least one block that is forwarded.
- static bool ComputeForwarding(
- Zone* local_zone,
- ZoneVector<RpoNumber>& result, // NOLINT(runtime/references)
- InstructionSequence* code, bool frame_at_start);
+ static bool ComputeForwarding(Zone* local_zone, ZoneVector<RpoNumber>* result,
+ InstructionSequence* code, bool frame_at_start);
// Rewrite the instructions to forward jumps and branches.
// May also negate some branches.
- static void ApplyForwarding(
- Zone* local_zone,
- ZoneVector<RpoNumber>& forwarding, // NOLINT(runtime/references)
- InstructionSequence* code);
+ static void ApplyForwarding(Zone* local_zone,
+ ZoneVector<RpoNumber> const& forwarding,
+ InstructionSequence* code);
};
} // namespace compiler
diff --git a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc
index 239075392a..ee23402e69 100644
--- a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc
+++ b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc
@@ -265,34 +265,33 @@ Condition FlagsConditionToConditionTst(FlagsCondition condition) {
UNREACHABLE();
}
-FPUCondition FlagsConditionToConditionCmpFPU(
- bool& predicate, // NOLINT(runtime/references)
- FlagsCondition condition) {
+FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
+ FlagsCondition condition) {
switch (condition) {
case kEqual:
- predicate = true;
+ *predicate = true;
return EQ;
case kNotEqual:
- predicate = false;
+ *predicate = false;
return EQ;
case kUnsignedLessThan:
- predicate = true;
+ *predicate = true;
return OLT;
case kUnsignedGreaterThanOrEqual:
- predicate = false;
+ *predicate = false;
return OLT;
case kUnsignedLessThanOrEqual:
- predicate = true;
+ *predicate = true;
return OLE;
case kUnsignedGreaterThan:
- predicate = false;
+ *predicate = false;
return OLE;
case kUnorderedEqual:
case kUnorderedNotEqual:
- predicate = true;
+ *predicate = true;
break;
default:
- predicate = true;
+ *predicate = true;
break;
}
UNREACHABLE();
@@ -303,9 +302,9 @@ FPUCondition FlagsConditionToConditionCmpFPU(
<< "\""; \
UNIMPLEMENTED();
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
- MipsOperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ MipsOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -780,12 +779,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
- Label return_location;
- if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+ Label start_call;
+ bool isWasmCapiFunction =
+ linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+ // from start_call to return address.
+ int offset = 40;
+#if V8_HOST_ARCH_MIPS
+ if (__ emit_debug_code()) {
+ offset += 16;
+ }
+#endif
+ if (isWasmCapiFunction) {
// Put the return address in a stack slot.
- __ LoadAddress(kScratchReg, &return_location);
- __ sw(kScratchReg,
- MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+ __ mov(kScratchReg, ra);
+ __ bind(&start_call);
+ __ nal();
+ __ nop();
+ __ Addu(ra, ra, offset - 8); // 8 = nop + nal
+ __ sw(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+ __ mov(ra, kScratchReg);
}
if (instr->InputAt(0)->IsImmediate()) {
ExternalReference ref = i.InputExternalReference(0);
@@ -794,7 +806,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters);
}
- __ bind(&return_location);
+ if (isWasmCapiFunction) {
+ CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
+ }
+
RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
frame_access_state()->SetFrameAccessToDefault();
// Ideally, we should decrement SP delta to match the change of stack
@@ -1179,7 +1194,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
FPURegister right = i.InputOrZeroSingleRegister(1);
bool predicate;
FPUCondition cc =
- FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition());
+ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
!__ IsDoubleZeroRegSet()) {
@@ -1239,7 +1254,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
FPURegister right = i.InputOrZeroDoubleRegister(1);
bool predicate;
FPUCondition cc =
- FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition());
+ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
!__ IsDoubleZeroRegSet()) {
__ Move(kDoubleRegZero, 0.0);
@@ -2038,6 +2053,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
break;
}
+ case kMipsF32x4Sqrt: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kMipsF32x4RecipApprox: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
@@ -3026,7 +3046,7 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
} else if (instr->arch_opcode() == kMipsCmpS ||
instr->arch_opcode() == kMipsCmpD) {
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (predicate) {
__ BranchTrueF(tlabel);
} else {
@@ -3116,7 +3136,7 @@ void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
case kMipsCmpS:
case kMipsCmpD: {
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (predicate) {
__ LoadZeroIfFPUCondition(kSpeculationPoisonRegister);
} else {
@@ -3314,7 +3334,7 @@ void CodeGenerator::AssembleArchBoolean(Instruction* instr,
__ Move(kDoubleRegZero, 0.0);
}
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (!IsMipsArchVariant(kMips32r6)) {
__ li(result, Operand(1));
if (predicate) {
diff --git a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h
index e8020d9e89..af0774f468 100644
--- a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h
+++ b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h
@@ -159,6 +159,7 @@ namespace compiler {
V(MipsI32x4MinU) \
V(MipsF32x4Abs) \
V(MipsF32x4Neg) \
+ V(MipsF32x4Sqrt) \
V(MipsF32x4RecipApprox) \
V(MipsF32x4RecipSqrtApprox) \
V(MipsF32x4Add) \
diff --git a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
index 4e6aef52f4..ba17ad2581 100644
--- a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
+++ b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
@@ -54,6 +54,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF32x4Div:
case kMipsF32x4Ne:
case kMipsF32x4Neg:
+ case kMipsF32x4Sqrt:
case kMipsF32x4RecipApprox:
case kMipsF32x4RecipSqrtApprox:
case kMipsF32x4ReplaceLane:
diff --git a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc
index bb47262c6c..7ee5c7c2c7 100644
--- a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc
+++ b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc
@@ -2,7 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
#include "src/base/bits.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
@@ -781,6 +780,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitWord32Ctz(Node* node) {
MipsOperandGenerator g(this);
Emit(kMipsCtz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
@@ -2015,6 +2018,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \
V(F32x4Abs, kMipsF32x4Abs) \
V(F32x4Neg, kMipsF32x4Neg) \
+ V(F32x4Sqrt, kMipsF32x4Sqrt) \
V(F32x4RecipApprox, kMipsF32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \
V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \
diff --git a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc
index 5682bed71a..9cec463e87 100644
--- a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc
+++ b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc
@@ -278,42 +278,41 @@ Condition FlagsConditionToConditionOvf(FlagsCondition condition) {
UNREACHABLE();
}
-FPUCondition FlagsConditionToConditionCmpFPU(
- bool& predicate, // NOLINT(runtime/references)
- FlagsCondition condition) {
+FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
+ FlagsCondition condition) {
switch (condition) {
case kEqual:
- predicate = true;
+ *predicate = true;
return EQ;
case kNotEqual:
- predicate = false;
+ *predicate = false;
return EQ;
case kUnsignedLessThan:
- predicate = true;
+ *predicate = true;
return OLT;
case kUnsignedGreaterThanOrEqual:
- predicate = false;
+ *predicate = false;
return OLT;
case kUnsignedLessThanOrEqual:
- predicate = true;
+ *predicate = true;
return OLE;
case kUnsignedGreaterThan:
- predicate = false;
+ *predicate = false;
return OLE;
case kUnorderedEqual:
case kUnorderedNotEqual:
- predicate = true;
+ *predicate = true;
break;
default:
- predicate = true;
+ *predicate = true;
break;
}
UNREACHABLE();
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
- MipsOperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ MipsOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -758,12 +757,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
- Label return_location;
- if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+ Label start_call;
+ bool isWasmCapiFunction =
+ linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+ // from start_call to return address.
+ int offset = 48;
+#if V8_HOST_ARCH_MIPS64
+ if (__ emit_debug_code()) {
+ offset += 16;
+ }
+#endif
+ if (isWasmCapiFunction) {
// Put the return address in a stack slot.
- __ LoadAddress(kScratchReg, &return_location);
- __ sd(kScratchReg,
- MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+ __ mov(kScratchReg, ra);
+ __ bind(&start_call);
+ __ nal();
+ __ nop();
+ __ Daddu(ra, ra, offset - 8); // 8 = nop + nal
+ __ sd(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+ __ mov(ra, kScratchReg);
}
if (instr->InputAt(0)->IsImmediate()) {
ExternalReference ref = i.InputExternalReference(0);
@@ -772,7 +784,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters);
}
- __ bind(&return_location);
+ if (isWasmCapiFunction) {
+ CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
+ }
+
RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
frame_access_state()->SetFrameAccessToDefault();
// Ideally, we should decrement SP delta to match the change of stack
@@ -1276,7 +1291,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
FPURegister right = i.InputOrZeroSingleRegister(1);
bool predicate;
FPUCondition cc =
- FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition());
+ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
!__ IsDoubleZeroRegSet()) {
@@ -1339,7 +1354,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
FPURegister right = i.InputOrZeroDoubleRegister(1);
bool predicate;
FPUCondition cc =
- FlagsConditionToConditionCmpFPU(predicate, instr->flags_condition());
+ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
!__ IsDoubleZeroRegSet()) {
__ Move(kDoubleRegZero, 0.0);
@@ -2233,6 +2248,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
+ case kMips64F32x4Sqrt: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kMips64I32x4Neg: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
@@ -3151,7 +3171,7 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
} else if (instr->arch_opcode() == kMips64CmpS ||
instr->arch_opcode() == kMips64CmpD) {
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (predicate) {
__ BranchTrueF(tlabel);
} else {
@@ -3261,7 +3281,7 @@ void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
case kMips64CmpS:
case kMips64CmpD: {
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (predicate) {
__ LoadZeroIfFPUCondition(kSpeculationPoisonRegister);
} else {
@@ -3470,7 +3490,7 @@ void CodeGenerator::AssembleArchBoolean(Instruction* instr,
__ Move(kDoubleRegZero, 0.0);
}
bool predicate;
- FlagsConditionToConditionCmpFPU(predicate, condition);
+ FlagsConditionToConditionCmpFPU(&predicate, condition);
if (kArchVariant != kMips64r6) {
__ li(result, Operand(1));
if (predicate) {
diff --git a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
index edc8924757..bcf3532b57 100644
--- a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
+++ b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
@@ -189,6 +189,7 @@ namespace compiler {
V(Mips64I32x4MinU) \
V(Mips64F32x4Abs) \
V(Mips64F32x4Neg) \
+ V(Mips64F32x4Sqrt) \
V(Mips64F32x4RecipApprox) \
V(Mips64F32x4RecipSqrtApprox) \
V(Mips64F32x4Add) \
diff --git a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
index 880b424c41..fe2d33d1db 100644
--- a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
+++ b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
@@ -82,6 +82,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F32x4Div:
case kMips64F32x4Ne:
case kMips64F32x4Neg:
+ case kMips64F32x4Sqrt:
case kMips64F32x4RecipApprox:
case kMips64F32x4RecipSqrtApprox:
case kMips64F32x4ReplaceLane:
diff --git a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
index 9c717ab1e9..dfc0ff5bad 100644
--- a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
+++ b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
@@ -2,7 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
#include "src/base/bits.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
@@ -823,6 +822,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitWord32Ctz(Node* node) {
Mips64OperandGenerator g(this);
Emit(kMips64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
@@ -2678,6 +2681,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4) \
V(F32x4Abs, kMips64F32x4Abs) \
V(F32x4Neg, kMips64F32x4Neg) \
+ V(F32x4Sqrt, kMips64F32x4Sqrt) \
V(F32x4RecipApprox, kMips64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox) \
V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4) \
diff --git a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc
index 5c69bc34a1..dde1804adb 100644
--- a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -263,9 +263,8 @@ Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) {
UNREACHABLE();
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, Instruction* instr,
- PPCOperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr,
+ PPCOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode()));
if (access_mode == kMemoryAccessPoisoned) {
@@ -1024,7 +1023,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Label start_call;
bool isWasmCapiFunction =
linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+#if defined(_AIX)
+ // AIX/PPC64BE Linux uses a function descriptor
+ // and emits 2 extra Load instrcutions under CallCFunctionHelper.
+ constexpr int offset = 11 * kInstrSize;
+#else
constexpr int offset = 9 * kInstrSize;
+#endif
if (isWasmCapiFunction) {
__ mflr(r0);
__ bind(&start_call);
@@ -1043,9 +1048,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
// TODO(miladfar): In the above block, kScratchReg must be populated with
// the strictly-correct PC, which is the return address at this spot. The
- // offset is set to 36 (9 * kInstrSize) right now, which is counted from
- // where we are binding to the label and ends at this spot. If failed,
- // replace it with the correct offset suggested. More info on f5ab7d3.
+ // offset is set to 36 (9 * kInstrSize) on pLinux and 44 on AIX, which is
+ // counted from where we are binding to the label and ends at this spot.
+ // If failed, replace it with the correct offset suggested. More info on
+ // f5ab7d3.
if (isWasmCapiFunction)
CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
diff --git a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
index ef8490a726..2ffd6495d7 100644
--- a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
+++ b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@@ -926,6 +926,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ // TODO(miladfar): Implement the ppc selector for reversing SIMD bytes.
+ // Check if the input node is a Load and do a Load Reverse at once.
+ UNIMPLEMENTED();
+}
+
void InstructionSelector::VisitInt32Add(Node* node) {
VisitBinop<Int32BinopMatcher>(this, node, kPPC_Add32, kInt16Imm);
}
@@ -2283,6 +2289,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
+
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
diff --git a/deps/v8/src/compiler/backend/register-allocator-verifier.cc b/deps/v8/src/compiler/backend/register-allocator-verifier.cc
index 53349c9c2b..17e0b8ca75 100644
--- a/deps/v8/src/compiler/backend/register-allocator-verifier.cc
+++ b/deps/v8/src/compiler/backend/register-allocator-verifier.cc
@@ -92,7 +92,7 @@ RegisterAllocatorVerifier::RegisterAllocatorVerifier(
void RegisterAllocatorVerifier::VerifyInput(
const OperandConstraint& constraint) {
CHECK_NE(kSameAsFirst, constraint.type_);
- if (constraint.type_ != kImmediate && constraint.type_ != kExplicit) {
+ if (constraint.type_ != kImmediate) {
CHECK_NE(InstructionOperand::kInvalidVirtualRegister,
constraint.virtual_register_);
}
@@ -102,14 +102,12 @@ void RegisterAllocatorVerifier::VerifyTemp(
const OperandConstraint& constraint) {
CHECK_NE(kSameAsFirst, constraint.type_);
CHECK_NE(kImmediate, constraint.type_);
- CHECK_NE(kExplicit, constraint.type_);
CHECK_NE(kConstant, constraint.type_);
}
void RegisterAllocatorVerifier::VerifyOutput(
const OperandConstraint& constraint) {
CHECK_NE(kImmediate, constraint.type_);
- CHECK_NE(kExplicit, constraint.type_);
CHECK_NE(InstructionOperand::kInvalidVirtualRegister,
constraint.virtual_register_);
}
@@ -149,8 +147,6 @@ void RegisterAllocatorVerifier::BuildConstraint(const InstructionOperand* op,
constraint->type_ = kConstant;
constraint->value_ = ConstantOperand::cast(op)->virtual_register();
constraint->virtual_register_ = constraint->value_;
- } else if (op->IsExplicit()) {
- constraint->type_ = kExplicit;
} else if (op->IsImmediate()) {
const ImmediateOperand* imm = ImmediateOperand::cast(op);
int value = imm->type() == ImmediateOperand::INLINE ? imm->inline_value()
@@ -235,9 +231,6 @@ void RegisterAllocatorVerifier::CheckConstraint(
case kFPRegister:
CHECK_WITH_MSG(op->IsFPRegister(), caller_info_);
return;
- case kExplicit:
- CHECK_WITH_MSG(op->IsExplicit(), caller_info_);
- return;
case kFixedRegister:
case kRegisterAndSlot:
CHECK_WITH_MSG(op->IsRegister(), caller_info_);
@@ -503,8 +496,7 @@ void RegisterAllocatorVerifier::VerifyGapMoves() {
instr_constraint.operand_constraints_;
size_t count = 0;
for (size_t i = 0; i < instr->InputCount(); ++i, ++count) {
- if (op_constraints[count].type_ == kImmediate ||
- op_constraints[count].type_ == kExplicit) {
+ if (op_constraints[count].type_ == kImmediate) {
continue;
}
int virtual_register = op_constraints[count].virtual_register_;
diff --git a/deps/v8/src/compiler/backend/register-allocator-verifier.h b/deps/v8/src/compiler/backend/register-allocator-verifier.h
index 68e69c0d16..7110c2eb42 100644
--- a/deps/v8/src/compiler/backend/register-allocator-verifier.h
+++ b/deps/v8/src/compiler/backend/register-allocator-verifier.h
@@ -188,7 +188,6 @@ class RegisterAllocatorVerifier final : public ZoneObject {
kRegisterOrSlot,
kRegisterOrSlotFP,
kRegisterOrSlotOrConstant,
- kExplicit,
kSameAsFirst,
kRegisterAndSlot
};
diff --git a/deps/v8/src/compiler/backend/register-allocator.cc b/deps/v8/src/compiler/backend/register-allocator.cc
index 21eef0485c..945554eb32 100644
--- a/deps/v8/src/compiler/backend/register-allocator.cc
+++ b/deps/v8/src/compiler/backend/register-allocator.cc
@@ -6,7 +6,7 @@
#include <iomanip>
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/base/small-vector.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/tick-counter.h"
@@ -317,7 +317,6 @@ UsePositionHintType UsePosition::HintTypeForOperand(
switch (op.kind()) {
case InstructionOperand::CONSTANT:
case InstructionOperand::IMMEDIATE:
- case InstructionOperand::EXPLICIT:
return UsePositionHintType::kNone;
case InstructionOperand::UNALLOCATED:
return UsePositionHintType::kUnresolved;
@@ -797,12 +796,13 @@ LifetimePosition LiveRange::NextEndAfter(LifetimePosition position) const {
return start_search->end();
}
-LifetimePosition LiveRange::NextStartAfter(LifetimePosition position) const {
+LifetimePosition LiveRange::NextStartAfter(LifetimePosition position) {
UseInterval* start_search = FirstSearchIntervalForPosition(position);
while (start_search->start() < position) {
start_search = start_search->next();
}
- return start_search->start();
+ next_start_ = start_search->start();
+ return next_start_;
}
LifetimePosition LiveRange::FirstIntersection(LiveRange* other) const {
@@ -1940,8 +1940,8 @@ void ConstraintBuilder::MeetConstraintsBefore(int instr_index) {
// Handle fixed input operands of second instruction.
for (size_t i = 0; i < second->InputCount(); i++) {
InstructionOperand* input = second->InputAt(i);
- if (input->IsImmediate() || input->IsExplicit()) {
- continue; // Ignore immediates and explicitly reserved registers.
+ if (input->IsImmediate()) {
+ continue; // Ignore immediates.
}
UnallocatedOperand* cur_input = UnallocatedOperand::cast(input);
if (cur_input->HasFixedPolicy()) {
@@ -2323,8 +2323,8 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
for (size_t i = 0; i < instr->InputCount(); i++) {
InstructionOperand* input = instr->InputAt(i);
- if (input->IsImmediate() || input->IsExplicit()) {
- continue; // Ignore immediates and explicitly reserved registers.
+ if (input->IsImmediate()) {
+ continue; // Ignore immediates.
}
LifetimePosition use_pos;
if (input->IsUnallocated() &&
@@ -2504,10 +2504,10 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
predecessor_hint_preference |= kNotDeferredBlockPreference;
}
- // - Prefer hints from allocated (or explicit) operands.
+ // - Prefer hints from allocated operands.
//
- // Already-allocated or explicit operands are typically assigned using
- // the parallel moves on the last instruction. For example:
+ // Already-allocated operands are typically assigned using the parallel
+ // moves on the last instruction. For example:
//
// gap (v101 = [x0|R|w32]) (v100 = v101)
// ArchJmp
@@ -2515,7 +2515,7 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
// phi: v100 = v101 v102
//
// We have already found the END move, so look for a matching START move
- // from an allocated (or explicit) operand.
+ // from an allocated operand.
//
// Note that we cannot simply look up data()->live_ranges()[vreg] here
// because the live ranges are still being built when this function is
@@ -2527,7 +2527,7 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
for (MoveOperands* move : *moves) {
InstructionOperand& to = move->destination();
if (predecessor_hint->Equals(to)) {
- if (move->source().IsAllocated() || move->source().IsExplicit()) {
+ if (move->source().IsAllocated()) {
predecessor_hint_preference |= kMoveIsAllocatedPreference;
}
break;
@@ -3095,11 +3095,11 @@ LinearScanAllocator::LinearScanAllocator(RegisterAllocationData* data,
: RegisterAllocator(data, kind),
unhandled_live_ranges_(local_zone),
active_live_ranges_(local_zone),
- inactive_live_ranges_(local_zone),
+ inactive_live_ranges_(num_registers(), InactiveLiveRangeQueue(local_zone),
+ local_zone),
next_active_ranges_change_(LifetimePosition::Invalid()),
next_inactive_ranges_change_(LifetimePosition::Invalid()) {
active_live_ranges().reserve(8);
- inactive_live_ranges().reserve(8);
}
void LinearScanAllocator::MaybeSpillPreviousRanges(LiveRange* begin_range,
@@ -3143,15 +3143,15 @@ void LinearScanAllocator::MaybeUndoPreviousSplit(LiveRange* range) {
}
}
-void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet& to_be_live,
+void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet* to_be_live,
LifetimePosition position,
SpillMode spill_mode) {
for (auto it = active_live_ranges().begin();
it != active_live_ranges().end();) {
LiveRange* active_range = *it;
TopLevelLiveRange* toplevel = (*it)->TopLevel();
- auto found = to_be_live.find({toplevel, kUnassignedRegister});
- if (found == to_be_live.end()) {
+ auto found = to_be_live->find({toplevel, kUnassignedRegister});
+ if (found == to_be_live->end()) {
// Is not contained in {to_be_live}, spill it.
// Fixed registers are exempt from this. They might have been
// added from inactive at the block boundary but we know that
@@ -3207,7 +3207,7 @@ void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet& to_be_live,
} else {
// This range is contained in {to_be_live}, so we can keep it.
int expected_register = (*found).expected_register;
- to_be_live.erase(found);
+ to_be_live->erase(found);
if (expected_register == active_range->assigned_register()) {
// Was life and in correct register, simply pass through.
TRACE("Keeping %d:%d in %s\n", toplevel->vreg(),
@@ -3238,31 +3238,22 @@ LiveRange* LinearScanAllocator::AssignRegisterOnReload(LiveRange* range,
// give reloading registers pecedence. That way we would compute the
// intersection for the entire future.
LifetimePosition new_end = range->End();
- for (const auto inactive : inactive_live_ranges()) {
- if (kSimpleFPAliasing || !check_fp_aliasing()) {
- if (inactive->assigned_register() != reg) continue;
- } else {
- bool conflict = inactive->assigned_register() == reg;
- if (!conflict) {
- int alias_base_index = -1;
- int aliases = data()->config()->GetAliases(range->representation(), reg,
- inactive->representation(),
- &alias_base_index);
- DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
- while (aliases-- && !conflict) {
- int aliased_reg = alias_base_index + aliases;
- if (aliased_reg == reg) {
- conflict = true;
- }
- }
- }
- if (!conflict) continue;
+ for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
+ if ((kSimpleFPAliasing || !check_fp_aliasing()) && cur_reg != reg) {
+ continue;
}
- for (auto interval = inactive->first_interval(); interval != nullptr;
- interval = interval->next()) {
- if (interval->start() > new_end) break;
- if (interval->end() <= range->Start()) continue;
- if (new_end > interval->start()) new_end = interval->start();
+ for (const auto cur_inactive : inactive_live_ranges(cur_reg)) {
+ if (!kSimpleFPAliasing && check_fp_aliasing() &&
+ !data()->config()->AreAliases(cur_inactive->representation(), cur_reg,
+ range->representation(), reg)) {
+ continue;
+ }
+ for (auto interval = cur_inactive->first_interval(); interval != nullptr;
+ interval = interval->next()) {
+ if (interval->start() > new_end) break;
+ if (interval->end() <= range->Start()) continue;
+ if (new_end > interval->start()) new_end = interval->start();
+ }
}
}
if (new_end != range->End()) {
@@ -3275,8 +3266,8 @@ LiveRange* LinearScanAllocator::AssignRegisterOnReload(LiveRange* range,
return range;
}
-void LinearScanAllocator::ReloadLiveRanges(RangeWithRegisterSet& to_be_live,
- LifetimePosition position) {
+void LinearScanAllocator::ReloadLiveRanges(
+ RangeWithRegisterSet const& to_be_live, LifetimePosition position) {
// Assumption: All ranges in {to_be_live} are currently spilled and there are
// no conflicting registers in the active ranges.
// The former is ensured by SpillNotLiveRanges, the latter is by construction
@@ -3558,11 +3549,17 @@ void LinearScanAllocator::UpdateDeferredFixedRanges(SpillMode spill_mode,
Min(updated->End(), next_active_ranges_change_);
});
}
- for (auto inactive : inactive_live_ranges()) {
- split_conflicting(range, inactive, [this](LiveRange* updated) {
- next_inactive_ranges_change_ =
- Min(updated->End(), next_inactive_ranges_change_);
- });
+ for (int reg = 0; reg < num_registers(); ++reg) {
+ if ((kSimpleFPAliasing || !check_fp_aliasing()) &&
+ reg != range->assigned_register()) {
+ continue;
+ }
+ for (auto inactive : inactive_live_ranges(reg)) {
+ split_conflicting(range, inactive, [this](LiveRange* updated) {
+ next_inactive_ranges_change_ =
+ Min(updated->End(), next_inactive_ranges_change_);
+ });
+ }
}
};
if (mode() == GENERAL_REGISTERS) {
@@ -3600,12 +3597,14 @@ void LinearScanAllocator::UpdateDeferredFixedRanges(SpillMode spill_mode,
}
} else {
// Remove all ranges.
- for (auto it = inactive_live_ranges().begin();
- it != inactive_live_ranges().end();) {
- if ((*it)->TopLevel()->IsDeferredFixed()) {
- it = inactive_live_ranges().erase(it);
- } else {
- ++it;
+ for (int reg = 0; reg < num_registers(); ++reg) {
+ for (auto it = inactive_live_ranges(reg).begin();
+ it != inactive_live_ranges(reg).end();) {
+ if ((*it)->TopLevel()->IsDeferredFixed()) {
+ it = inactive_live_ranges(reg).erase(it);
+ } else {
+ ++it;
+ }
}
}
}
@@ -3636,7 +3635,9 @@ bool LinearScanAllocator::HasNonDeferredPredecessor(InstructionBlock* block) {
void LinearScanAllocator::AllocateRegisters() {
DCHECK(unhandled_live_ranges().empty());
DCHECK(active_live_ranges().empty());
- DCHECK(inactive_live_ranges().empty());
+ for (int reg = 0; reg < num_registers(); ++reg) {
+ DCHECK(inactive_live_ranges(reg).empty());
+ }
SplitAndSpillRangesDefinedByMemoryOperand();
data()->ResetSpillState();
@@ -3853,7 +3854,7 @@ void LinearScanAllocator::AllocateRegisters() {
}
if (!no_change_required) {
- SpillNotLiveRanges(to_be_live, next_block_boundary, spill_mode);
+ SpillNotLiveRanges(&to_be_live, next_block_boundary, spill_mode);
ReloadLiveRanges(to_be_live, next_block_boundary);
}
@@ -3941,9 +3942,10 @@ void LinearScanAllocator::AddToActive(LiveRange* range) {
void LinearScanAllocator::AddToInactive(LiveRange* range) {
TRACE("Add live range %d:%d to inactive\n", range->TopLevel()->vreg(),
range->relative_id());
- inactive_live_ranges().push_back(range);
next_inactive_ranges_change_ = std::min(
next_inactive_ranges_change_, range->NextStartAfter(range->Start()));
+ DCHECK(range->HasRegisterAssigned());
+ inactive_live_ranges(range->assigned_register()).insert(range);
}
void LinearScanAllocator::AddToUnhandled(LiveRange* range) {
@@ -3966,30 +3968,36 @@ ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToHandled(
ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToInactive(
const ZoneVector<LiveRange*>::iterator it, LifetimePosition position) {
LiveRange* range = *it;
- inactive_live_ranges().push_back(range);
TRACE("Moving live range %d:%d from active to inactive\n",
(range)->TopLevel()->vreg(), range->relative_id());
+ LifetimePosition next_active = range->NextStartAfter(position);
next_inactive_ranges_change_ =
- std::min(next_inactive_ranges_change_, range->NextStartAfter(position));
+ std::min(next_inactive_ranges_change_, next_active);
+ DCHECK(range->HasRegisterAssigned());
+ inactive_live_ranges(range->assigned_register()).insert(range);
return active_live_ranges().erase(it);
}
-ZoneVector<LiveRange*>::iterator LinearScanAllocator::InactiveToHandled(
- ZoneVector<LiveRange*>::iterator it) {
+LinearScanAllocator::InactiveLiveRangeQueue::iterator
+LinearScanAllocator::InactiveToHandled(InactiveLiveRangeQueue::iterator it) {
+ LiveRange* range = *it;
TRACE("Moving live range %d:%d from inactive to handled\n",
- (*it)->TopLevel()->vreg(), (*it)->relative_id());
- return inactive_live_ranges().erase(it);
+ range->TopLevel()->vreg(), range->relative_id());
+ int reg = range->assigned_register();
+ return inactive_live_ranges(reg).erase(it);
}
-ZoneVector<LiveRange*>::iterator LinearScanAllocator::InactiveToActive(
- ZoneVector<LiveRange*>::iterator it, LifetimePosition position) {
+LinearScanAllocator::InactiveLiveRangeQueue::iterator
+LinearScanAllocator::InactiveToActive(InactiveLiveRangeQueue::iterator it,
+ LifetimePosition position) {
LiveRange* range = *it;
active_live_ranges().push_back(range);
TRACE("Moving live range %d:%d from inactive to active\n",
range->TopLevel()->vreg(), range->relative_id());
next_active_ranges_change_ =
std::min(next_active_ranges_change_, range->NextEndAfter(position));
- return inactive_live_ranges().erase(it);
+ int reg = range->assigned_register();
+ return inactive_live_ranges(reg).erase(it);
}
void LinearScanAllocator::ForwardStateTo(LifetimePosition position) {
@@ -4012,18 +4020,25 @@ void LinearScanAllocator::ForwardStateTo(LifetimePosition position) {
if (position >= next_inactive_ranges_change_) {
next_inactive_ranges_change_ = LifetimePosition::MaxPosition();
- for (auto it = inactive_live_ranges().begin();
- it != inactive_live_ranges().end();) {
- LiveRange* cur_inactive = *it;
- if (cur_inactive->End() <= position) {
- it = InactiveToHandled(it);
- } else if (cur_inactive->Covers(position)) {
- it = InactiveToActive(it, position);
- } else {
- next_inactive_ranges_change_ =
- std::min(next_inactive_ranges_change_,
- cur_inactive->NextStartAfter(position));
- ++it;
+ for (int reg = 0; reg < num_registers(); ++reg) {
+ ZoneVector<LiveRange*> reorder(data()->allocation_zone());
+ for (auto it = inactive_live_ranges(reg).begin();
+ it != inactive_live_ranges(reg).end();) {
+ LiveRange* cur_inactive = *it;
+ if (cur_inactive->End() <= position) {
+ it = InactiveToHandled(it);
+ } else if (cur_inactive->Covers(position)) {
+ it = InactiveToActive(it, position);
+ } else {
+ next_inactive_ranges_change_ =
+ std::min(next_inactive_ranges_change_,
+ cur_inactive->NextStartAfter(position));
+ it = inactive_live_ranges(reg).erase(it);
+ reorder.push_back(cur_inactive);
+ }
+ }
+ for (LiveRange* range : reorder) {
+ inactive_live_ranges(reg).insert(range);
}
}
}
@@ -4094,31 +4109,34 @@ void LinearScanAllocator::FindFreeRegistersForRange(
}
}
- for (LiveRange* cur_inactive : inactive_live_ranges()) {
- DCHECK(cur_inactive->End() > range->Start());
- int cur_reg = cur_inactive->assigned_register();
- // No need to carry out intersections, when this register won't be
- // interesting to this range anyway.
- // TODO(mtrofin): extend to aliased ranges, too.
- if ((kSimpleFPAliasing || !check_fp_aliasing()) &&
- positions[cur_reg] < range->Start()) {
- continue;
- }
-
- LifetimePosition next_intersection = cur_inactive->FirstIntersection(range);
- if (!next_intersection.IsValid()) continue;
- if (kSimpleFPAliasing || !check_fp_aliasing()) {
- positions[cur_reg] = Min(positions[cur_reg], next_intersection);
- TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
- Min(positions[cur_reg], next_intersection).value());
- } else {
- int alias_base_index = -1;
- int aliases = data()->config()->GetAliases(
- cur_inactive->representation(), cur_reg, rep, &alias_base_index);
- DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
- while (aliases--) {
- int aliased_reg = alias_base_index + aliases;
- positions[aliased_reg] = Min(positions[aliased_reg], next_intersection);
+ for (int cur_reg = 0; cur_reg < num_regs; ++cur_reg) {
+ for (LiveRange* cur_inactive : inactive_live_ranges(cur_reg)) {
+ DCHECK_GT(cur_inactive->End(), range->Start());
+ CHECK_EQ(cur_inactive->assigned_register(), cur_reg);
+ // No need to carry out intersections, when this register won't be
+ // interesting to this range anyway.
+ // TODO(mtrofin): extend to aliased ranges, too.
+ if ((kSimpleFPAliasing || !check_fp_aliasing()) &&
+ positions[cur_reg] <= cur_inactive->NextStart()) {
+ break;
+ }
+ LifetimePosition next_intersection =
+ cur_inactive->FirstIntersection(range);
+ if (!next_intersection.IsValid()) continue;
+ if (kSimpleFPAliasing || !check_fp_aliasing()) {
+ positions[cur_reg] = std::min(positions[cur_reg], next_intersection);
+ TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
+ positions[cur_reg].value());
+ } else {
+ int alias_base_index = -1;
+ int aliases = data()->config()->GetAliases(
+ cur_inactive->representation(), cur_reg, rep, &alias_base_index);
+ DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+ while (aliases--) {
+ int aliased_reg = alias_base_index + aliases;
+ positions[aliased_reg] =
+ std::min(positions[aliased_reg], next_intersection);
+ }
}
}
}
@@ -4337,46 +4355,46 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current,
}
}
- for (LiveRange* range : inactive_live_ranges()) {
- DCHECK(range->End() > current->Start());
- int cur_reg = range->assigned_register();
- bool is_fixed = range->TopLevel()->IsFixed();
-
- // Don't perform costly intersections if they are guaranteed to not update
- // block_pos or use_pos.
- // TODO(mtrofin): extend to aliased ranges, too.
- if ((kSimpleFPAliasing || !check_fp_aliasing())) {
- if (is_fixed) {
- if (block_pos[cur_reg] < range->Start()) continue;
- } else {
- if (use_pos[cur_reg] < range->Start()) continue;
+ for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
+ for (LiveRange* range : inactive_live_ranges(cur_reg)) {
+ DCHECK(range->End() > current->Start());
+ DCHECK_EQ(range->assigned_register(), cur_reg);
+ bool is_fixed = range->TopLevel()->IsFixed();
+
+ // Don't perform costly intersections if they are guaranteed to not update
+ // block_pos or use_pos.
+ // TODO(mtrofin): extend to aliased ranges, too.
+ if ((kSimpleFPAliasing || !check_fp_aliasing())) {
+ DCHECK_LE(use_pos[cur_reg], block_pos[cur_reg]);
+ if (block_pos[cur_reg] <= range->NextStart()) break;
+ if (!is_fixed && use_pos[cur_reg] <= range->NextStart()) continue;
}
- }
- LifetimePosition next_intersection = range->FirstIntersection(current);
- if (!next_intersection.IsValid()) continue;
+ LifetimePosition next_intersection = range->FirstIntersection(current);
+ if (!next_intersection.IsValid()) continue;
- if (kSimpleFPAliasing || !check_fp_aliasing()) {
- if (is_fixed) {
- block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
- use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
- } else {
- use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
- }
- } else {
- int alias_base_index = -1;
- int aliases = data()->config()->GetAliases(
- range->representation(), cur_reg, rep, &alias_base_index);
- DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
- while (aliases--) {
- int aliased_reg = alias_base_index + aliases;
+ if (kSimpleFPAliasing || !check_fp_aliasing()) {
if (is_fixed) {
- block_pos[aliased_reg] =
- Min(block_pos[aliased_reg], next_intersection);
- use_pos[aliased_reg] =
- Min(block_pos[aliased_reg], use_pos[aliased_reg]);
+ block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
+ use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
} else {
- use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection);
+ use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
+ }
+ } else {
+ int alias_base_index = -1;
+ int aliases = data()->config()->GetAliases(
+ range->representation(), cur_reg, rep, &alias_base_index);
+ DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+ while (aliases--) {
+ int aliased_reg = alias_base_index + aliases;
+ if (is_fixed) {
+ block_pos[aliased_reg] =
+ Min(block_pos[aliased_reg], next_intersection);
+ use_pos[aliased_reg] =
+ Min(block_pos[aliased_reg], use_pos[aliased_reg]);
+ } else {
+ use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection);
+ }
}
}
}
@@ -4490,40 +4508,38 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current,
it = ActiveToHandled(it);
}
- for (auto it = inactive_live_ranges().begin();
- it != inactive_live_ranges().end();) {
- LiveRange* range = *it;
- DCHECK(range->End() > current->Start());
- if (range->TopLevel()->IsFixed()) {
- ++it;
- continue;
- }
+ for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
if (kSimpleFPAliasing || !check_fp_aliasing()) {
- if (range->assigned_register() != reg) {
+ if (cur_reg != reg) continue;
+ }
+ for (auto it = inactive_live_ranges(cur_reg).begin();
+ it != inactive_live_ranges(cur_reg).end();) {
+ LiveRange* range = *it;
+ if (!kSimpleFPAliasing && check_fp_aliasing() &&
+ !data()->config()->AreAliases(current->representation(), reg,
+ range->representation(), cur_reg)) {
++it;
continue;
}
- } else {
- if (!data()->config()->AreAliases(current->representation(), reg,
- range->representation(),
- range->assigned_register())) {
+ DCHECK(range->End() > current->Start());
+ if (range->TopLevel()->IsFixed()) {
++it;
continue;
}
- }
- LifetimePosition next_intersection = range->FirstIntersection(current);
- if (next_intersection.IsValid()) {
- UsePosition* next_pos = range->NextRegisterPosition(current->Start());
- if (next_pos == nullptr) {
- SpillAfter(range, split_pos, spill_mode);
+ LifetimePosition next_intersection = range->FirstIntersection(current);
+ if (next_intersection.IsValid()) {
+ UsePosition* next_pos = range->NextRegisterPosition(current->Start());
+ if (next_pos == nullptr) {
+ SpillAfter(range, split_pos, spill_mode);
+ } else {
+ next_intersection = Min(next_intersection, next_pos->pos());
+ SpillBetween(range, split_pos, next_intersection, spill_mode);
+ }
+ it = InactiveToHandled(it);
} else {
- next_intersection = Min(next_intersection, next_pos->pos());
- SpillBetween(range, split_pos, next_intersection, spill_mode);
+ ++it;
}
- it = InactiveToHandled(it);
- } else {
- ++it;
}
}
}
diff --git a/deps/v8/src/compiler/backend/register-allocator.h b/deps/v8/src/compiler/backend/register-allocator.h
index bc7b09d147..17d664e507 100644
--- a/deps/v8/src/compiler/backend/register-allocator.h
+++ b/deps/v8/src/compiler/backend/register-allocator.h
@@ -335,7 +335,11 @@ class RegisterAllocationData final : public ZoneObject {
return result;
}
- void ResetSpillState() { spill_state_.clear(); }
+ void ResetSpillState() {
+ for (auto& state : spill_state_) {
+ state.clear();
+ }
+ }
TickCounter* tick_counter() { return tick_counter_; }
@@ -626,9 +630,10 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
bool ShouldBeAllocatedBefore(const LiveRange* other) const;
bool CanCover(LifetimePosition position) const;
bool Covers(LifetimePosition position) const;
- LifetimePosition NextStartAfter(LifetimePosition position) const;
+ LifetimePosition NextStartAfter(LifetimePosition position);
LifetimePosition NextEndAfter(LifetimePosition position) const;
LifetimePosition FirstIntersection(LiveRange* other) const;
+ LifetimePosition NextStart() const { return next_start_; }
void VerifyChildStructure() const {
VerifyIntervals();
@@ -689,6 +694,8 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
// Cache the last position splintering stopped at.
mutable UsePosition* splitting_pointer_;
LiveRangeBundle* bundle_ = nullptr;
+ // Next interval start, relative to the current linear scan position.
+ LifetimePosition next_start_;
DISALLOW_COPY_AND_ASSIGN(LiveRange);
};
@@ -1298,29 +1305,39 @@ class LinearScanAllocator final : public RegisterAllocator {
LifetimePosition begin_pos,
LiveRange* end_range);
void MaybeUndoPreviousSplit(LiveRange* range);
- void SpillNotLiveRanges(
- RangeWithRegisterSet& to_be_live, // NOLINT(runtime/references)
- LifetimePosition position, SpillMode spill_mode);
+ void SpillNotLiveRanges(RangeWithRegisterSet* to_be_live,
+ LifetimePosition position, SpillMode spill_mode);
LiveRange* AssignRegisterOnReload(LiveRange* range, int reg);
- void ReloadLiveRanges(
- RangeWithRegisterSet& to_be_live, // NOLINT(runtime/references)
- LifetimePosition position);
+ void ReloadLiveRanges(RangeWithRegisterSet const& to_be_live,
+ LifetimePosition position);
void UpdateDeferredFixedRanges(SpillMode spill_mode, InstructionBlock* block);
bool BlockIsDeferredOrImmediatePredecessorIsNotDeferred(
const InstructionBlock* block);
bool HasNonDeferredPredecessor(InstructionBlock* block);
- struct LiveRangeOrdering {
+ struct UnhandledLiveRangeOrdering {
bool operator()(const LiveRange* a, const LiveRange* b) const {
return a->ShouldBeAllocatedBefore(b);
}
};
- using LiveRangeQueue = ZoneMultiset<LiveRange*, LiveRangeOrdering>;
- LiveRangeQueue& unhandled_live_ranges() { return unhandled_live_ranges_; }
+
+ struct InactiveLiveRangeOrdering {
+ bool operator()(const LiveRange* a, const LiveRange* b) const {
+ return a->NextStart() < b->NextStart();
+ }
+ };
+
+ using UnhandledLiveRangeQueue =
+ ZoneMultiset<LiveRange*, UnhandledLiveRangeOrdering>;
+ using InactiveLiveRangeQueue =
+ ZoneMultiset<LiveRange*, InactiveLiveRangeOrdering>;
+ UnhandledLiveRangeQueue& unhandled_live_ranges() {
+ return unhandled_live_ranges_;
+ }
ZoneVector<LiveRange*>& active_live_ranges() { return active_live_ranges_; }
- ZoneVector<LiveRange*>& inactive_live_ranges() {
- return inactive_live_ranges_;
+ InactiveLiveRangeQueue& inactive_live_ranges(int reg) {
+ return inactive_live_ranges_[reg];
}
void SetLiveRangeAssignedRegister(LiveRange* range, int reg);
@@ -1333,10 +1350,10 @@ class LinearScanAllocator final : public RegisterAllocator {
ZoneVector<LiveRange*>::iterator it);
ZoneVector<LiveRange*>::iterator ActiveToInactive(
ZoneVector<LiveRange*>::iterator it, LifetimePosition position);
- ZoneVector<LiveRange*>::iterator InactiveToHandled(
- ZoneVector<LiveRange*>::iterator it);
- ZoneVector<LiveRange*>::iterator InactiveToActive(
- ZoneVector<LiveRange*>::iterator it, LifetimePosition position);
+ InactiveLiveRangeQueue::iterator InactiveToHandled(
+ InactiveLiveRangeQueue::iterator it);
+ InactiveLiveRangeQueue::iterator InactiveToActive(
+ InactiveLiveRangeQueue::iterator it, LifetimePosition position);
void ForwardStateTo(LifetimePosition position);
@@ -1386,9 +1403,9 @@ class LinearScanAllocator final : public RegisterAllocator {
void PrintRangeOverview(std::ostream& os);
- LiveRangeQueue unhandled_live_ranges_;
+ UnhandledLiveRangeQueue unhandled_live_ranges_;
ZoneVector<LiveRange*> active_live_ranges_;
- ZoneVector<LiveRange*> inactive_live_ranges_;
+ ZoneVector<InactiveLiveRangeQueue> inactive_live_ranges_;
// Approximate at what position the set of ranges will change next.
// Used to avoid scanning for updates even if none are present.
diff --git a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc
index 4c2d862fc4..d0f97eca57 100644
--- a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc
+++ b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc
@@ -1246,9 +1246,8 @@ void AdjustStackPointerForTailCall(
}
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, Instruction* instr,
- S390OperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr,
+ S390OperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode()));
if (access_mode == kMemoryAccessPoisoned) {
diff --git a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc
index 7f3277fc68..7b002fe6d3 100644
--- a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc
+++ b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc
@@ -2,7 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@@ -436,68 +435,64 @@ void VisitTryTruncateDouble(InstructionSelector* selector, ArchOpcode opcode,
#endif
template <class CanCombineWithLoad>
-void GenerateRightOperands(
- InstructionSelector* selector, Node* node, Node* right,
- InstructionCode& opcode, // NOLINT(runtime/references)
- OperandModes& operand_mode, // NOLINT(runtime/references)
- InstructionOperand* inputs,
- size_t& input_count, // NOLINT(runtime/references)
- CanCombineWithLoad canCombineWithLoad) {
+void GenerateRightOperands(InstructionSelector* selector, Node* node,
+ Node* right, InstructionCode* opcode,
+ OperandModes* operand_mode,
+ InstructionOperand* inputs, size_t* input_count,
+ CanCombineWithLoad canCombineWithLoad) {
S390OperandGenerator g(selector);
- if ((operand_mode & OperandMode::kAllowImmediate) &&
- g.CanBeImmediate(right, operand_mode)) {
- inputs[input_count++] = g.UseImmediate(right);
+ if ((*operand_mode & OperandMode::kAllowImmediate) &&
+ g.CanBeImmediate(right, *operand_mode)) {
+ inputs[(*input_count)++] = g.UseImmediate(right);
// Can only be RI or RRI
- operand_mode &= OperandMode::kAllowImmediate;
- } else if (operand_mode & OperandMode::kAllowMemoryOperand) {
+ *operand_mode &= OperandMode::kAllowImmediate;
+ } else if (*operand_mode & OperandMode::kAllowMemoryOperand) {
NodeMatcher mright(right);
if (mright.IsLoad() && selector->CanCover(node, right) &&
canCombineWithLoad(SelectLoadOpcode(right))) {
AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
- right, inputs, &input_count, OpcodeImmMode(opcode));
- opcode |= AddressingModeField::encode(mode);
- operand_mode &= ~OperandMode::kAllowImmediate;
- if (operand_mode & OperandMode::kAllowRM)
- operand_mode &= ~OperandMode::kAllowDistinctOps;
- } else if (operand_mode & OperandMode::kAllowRM) {
- DCHECK(!(operand_mode & OperandMode::kAllowRRM));
- inputs[input_count++] = g.UseAnyExceptImmediate(right);
+ right, inputs, input_count, OpcodeImmMode(*opcode));
+ *opcode |= AddressingModeField::encode(mode);
+ *operand_mode &= ~OperandMode::kAllowImmediate;
+ if (*operand_mode & OperandMode::kAllowRM)
+ *operand_mode &= ~OperandMode::kAllowDistinctOps;
+ } else if (*operand_mode & OperandMode::kAllowRM) {
+ DCHECK(!(*operand_mode & OperandMode::kAllowRRM));
+ inputs[(*input_count)++] = g.UseAnyExceptImmediate(right);
// Can not be Immediate
- operand_mode &=
+ *operand_mode &=
~OperandMode::kAllowImmediate & ~OperandMode::kAllowDistinctOps;
- } else if (operand_mode & OperandMode::kAllowRRM) {
- DCHECK(!(operand_mode & OperandMode::kAllowRM));
- inputs[input_count++] = g.UseAnyExceptImmediate(right);
+ } else if (*operand_mode & OperandMode::kAllowRRM) {
+ DCHECK(!(*operand_mode & OperandMode::kAllowRM));
+ inputs[(*input_count)++] = g.UseAnyExceptImmediate(right);
// Can not be Immediate
- operand_mode &= ~OperandMode::kAllowImmediate;
+ *operand_mode &= ~OperandMode::kAllowImmediate;
} else {
UNREACHABLE();
}
} else {
- inputs[input_count++] = g.UseRegister(right);
+ inputs[(*input_count)++] = g.UseRegister(right);
// Can only be RR or RRR
- operand_mode &= OperandMode::kAllowRRR;
+ *operand_mode &= OperandMode::kAllowRRR;
}
}
template <class CanCombineWithLoad>
-void GenerateBinOpOperands(
- InstructionSelector* selector, Node* node, Node* left, Node* right,
- InstructionCode& opcode, // NOLINT(runtime/references)
- OperandModes& operand_mode, // NOLINT(runtime/references)
- InstructionOperand* inputs,
- size_t& input_count, // NOLINT(runtime/references)
- CanCombineWithLoad canCombineWithLoad) {
+void GenerateBinOpOperands(InstructionSelector* selector, Node* node,
+ Node* left, Node* right, InstructionCode* opcode,
+ OperandModes* operand_mode,
+ InstructionOperand* inputs, size_t* input_count,
+ CanCombineWithLoad canCombineWithLoad) {
S390OperandGenerator g(selector);
// left is always register
InstructionOperand const left_input = g.UseRegister(left);
- inputs[input_count++] = left_input;
+ inputs[(*input_count)++] = left_input;
if (left == right) {
- inputs[input_count++] = left_input;
+ inputs[(*input_count)++] = left_input;
// Can only be RR or RRR
- operand_mode &= OperandMode::kAllowRRR;
+ *operand_mode &= OperandMode::kAllowRRR;
} else {
GenerateRightOperands(selector, node, right, opcode, operand_mode, inputs,
input_count, canCombineWithLoad);
@@ -575,8 +570,8 @@ void VisitUnaryOp(InstructionSelector* selector, Node* node,
size_t output_count = 0;
Node* input = node->InputAt(0);
- GenerateRightOperands(selector, node, input, opcode, operand_mode, inputs,
- input_count, canCombineWithLoad);
+ GenerateRightOperands(selector, node, input, &opcode, &operand_mode, inputs,
+ &input_count, canCombineWithLoad);
bool input_is_word32 = ProduceWord32Result(input);
@@ -631,8 +626,8 @@ void VisitBinOp(InstructionSelector* selector, Node* node,
std::swap(left, right);
}
- GenerateBinOpOperands(selector, node, left, right, opcode, operand_mode,
- inputs, input_count, canCombineWithLoad);
+ GenerateBinOpOperands(selector, node, left, right, &opcode, &operand_mode,
+ inputs, &input_count, canCombineWithLoad);
bool left_is_word32 = ProduceWord32Result(left);
@@ -1175,6 +1170,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ // TODO(miladfar): Implement the s390 selector for reversing SIMD bytes.
+ // Check if the input node is a Load and do a Load Reverse at once.
+ UNIMPLEMENTED();
+}
+
template <class Matcher, ArchOpcode neg_opcode>
static inline bool TryMatchNegFromSub(InstructionSelector* selector,
Node* node) {
@@ -2691,6 +2692,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
+
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
index a4f82b153b..44da872f26 100644
--- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
@@ -361,7 +361,6 @@ class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
InstructionCode opcode, Instruction* instr,
- X64OperandConverter& i, // NOLINT(runtime/references)
int pc) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
@@ -370,9 +369,9 @@ void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
}
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
- X64OperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode, Instruction* instr,
+ X64OperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -1876,30 +1875,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
break;
case kX64Movsxbl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxbq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxbq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxbq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movb: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
@@ -1911,29 +1910,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movsxwl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxwl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxwq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxwq);
break;
case kX64Movzxwq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movzxwq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movw: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
@@ -1945,7 +1944,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movl:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
if (HasAddressingMode(instr)) {
__ movl(i.OutputRegister(), i.MemoryOperand());
@@ -1969,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxlq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
ASSEMBLE_MOVX(movsxlq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
@@ -2021,7 +2020,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64Movq:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ movq(i.OutputRegister(), i.MemoryOperand());
} else {
@@ -2036,7 +2035,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movss:
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
} else {
@@ -2046,7 +2045,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kX64Movsd: {
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
@@ -2069,7 +2068,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64Movdqu: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
- EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
+ EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
if (instr->HasOutput()) {
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
@@ -2293,6 +2292,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movq(i.OutputDoubleRegister(), kScratchRegister);
break;
}
+ case kX64F64x2Sqrt: {
+ __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kX64F64x2Add: {
ASSEMBLE_SSE_BINOP(addpd);
break;
@@ -2350,22 +2353,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
- __ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
+ case kX64F64x2Qfma: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movapd(tmp, i.InputSimd128Register(2));
+ __ mulpd(tmp, i.InputSimd128Register(1));
+ __ addpd(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
+ case kX64F64x2Qfms: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movapd(tmp, i.InputSimd128Register(2));
+ __ mulpd(tmp, i.InputSimd128Register(1));
+ __ subpd(i.OutputSimd128Register(), tmp);
+ }
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
@@ -2445,6 +2474,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
+ case kX64F32x4Sqrt: {
+ __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
@@ -2538,6 +2571,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
+ case kX64F32x4Qfma: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movaps(tmp, i.InputSimd128Register(2));
+ __ mulps(tmp, i.InputSimd128Register(1));
+ __ addps(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
+ case kX64F32x4Qfms: {
+ if (CpuFeatures::IsSupported(FMA3)) {
+ CpuFeatureScope fma3_scope(tasm(), FMA3);
+ __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(2));
+ } else {
+ XMMRegister tmp = i.TempSimd128Register(0);
+ __ movaps(tmp, i.InputSimd128Register(2));
+ __ mulps(tmp, i.InputSimd128Register(1));
+ __ subps(i.OutputSimd128Register(), tmp);
+ }
+ break;
+ }
case kX64I64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
@@ -2577,7 +2636,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 8.
+ __ andq(shift, Immediate(63));
+ __ movq(tmp, shift);
__ psllq(i.OutputSimd128Register(), tmp);
break;
}
@@ -2588,6 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.ToRegister(instr->TempAt(0));
+ // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
// lower quadword
__ pextrq(tmp, src, 0x0);
@@ -2640,15 +2703,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
- XMMRegister src = i.InputSimd128Register(1);
+ XMMRegister src0 = i.InputSimd128Register(0);
+ XMMRegister src1 = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
- DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
- __ movaps(tmp, src);
- __ pcmpgtq(src, dst);
- __ blendvpd(tmp, dst); // implicit use of xmm0 as mask
- __ movaps(dst, tmp);
+ __ movaps(tmp, src1);
+ __ pcmpgtq(tmp, src0);
+ __ movaps(dst, src1);
+ __ blendvpd(dst, src0); // implicit use of xmm0 as mask
} else {
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
@@ -2689,11 +2752,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
__ movaps(tmp, src);
- __ pcmpgtq(src, dst);
- __ blendvpd(dst, tmp); // implicit use of xmm0 as mask
+ __ pcmpgtq(tmp, dst);
+ __ blendvpd(dst, src); // implicit use of xmm0 as mask
break;
}
case kX64I64x2Eq: {
@@ -2732,7 +2795,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 64.
+ __ andq(shift, Immediate(63));
+ __ movq(tmp, shift);
__ psrlq(i.OutputSimd128Register(), tmp);
break;
}
@@ -2740,24 +2806,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
- XMMRegister src = i.InputSimd128Register(1);
- XMMRegister src_tmp = i.TempSimd128Register(0);
- XMMRegister dst_tmp = i.TempSimd128Register(1);
- DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ XMMRegister src0 = i.InputSimd128Register(0);
+ XMMRegister src1 = i.InputSimd128Register(1);
+ XMMRegister tmp0 = i.TempSimd128Register(0);
+ XMMRegister tmp1 = i.TempSimd128Register(1);
+ DCHECK_EQ(tmp1, xmm0);
- __ movaps(src_tmp, src);
- __ movaps(dst_tmp, dst);
+ __ movaps(dst, src1);
+ __ movaps(tmp0, src0);
- __ pcmpeqd(src, src);
- __ psllq(src, 63);
+ __ pcmpeqd(tmp1, tmp1);
+ __ psllq(tmp1, 63);
- __ pxor(dst_tmp, src);
- __ pxor(src, src_tmp);
+ __ pxor(tmp0, tmp1);
+ __ pxor(tmp1, dst);
- __ pcmpgtq(src, dst_tmp);
- __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask
- __ movaps(dst, src_tmp);
+ __ pcmpgtq(tmp1, tmp0);
+ __ blendvpd(dst, src0); // implicit use of xmm0 as mask
break;
}
case kX64I64x2MaxU: {
@@ -2765,22 +2830,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- XMMRegister src_tmp = i.TempSimd128Register(0);
- XMMRegister dst_tmp = i.TempSimd128Register(1);
+ XMMRegister dst_tmp = i.TempSimd128Register(0);
+ XMMRegister tmp = i.TempSimd128Register(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
- DCHECK_EQ(src, xmm0);
+ DCHECK_EQ(tmp, xmm0);
- __ movaps(src_tmp, src);
__ movaps(dst_tmp, dst);
- __ pcmpeqd(src, src);
- __ psllq(src, 63);
+ __ pcmpeqd(tmp, tmp);
+ __ psllq(tmp, 63);
- __ pxor(dst_tmp, src);
- __ pxor(src, src_tmp);
+ __ pxor(dst_tmp, tmp);
+ __ pxor(tmp, src);
- __ pcmpgtq(src, dst_tmp);
- __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask
+ __ pcmpgtq(tmp, dst_tmp);
+ __ blendvpd(dst, src); // implicit use of xmm0 as mask
break;
}
case kX64I64x2GtU: {
@@ -2820,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
- __ movd(dst, i.InputRegister(0));
+ __ Movd(dst, i.InputRegister(0));
} else {
- __ movd(dst, i.InputOperand(0));
+ __ Movd(dst, i.InputOperand(0));
}
- __ pshufd(dst, dst, 0x0);
+ __ Pshufd(dst, dst, 0x0);
break;
}
case kX64I32x4ExtractLane: {
@@ -2878,28 +2942,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
- __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
- __ psignd(dst, kScratchDoubleReg);
+ __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ __ Psignd(dst, kScratchDoubleReg);
} else {
- __ pxor(dst, dst);
- __ psubd(dst, src);
+ __ Pxor(dst, dst);
+ __ Psubd(dst, src);
}
break;
}
case kX64I32x4Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ pslld(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Pslld(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ psrad(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Psrad(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4Add: {
- __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4AddHoriz: {
@@ -2908,45 +2978,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4Sub: {
- __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Mul: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MinS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MaxS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Eq: {
- __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Ne: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
- __ pcmpeqd(tmp, tmp);
- __ pxor(i.OutputSimd128Register(), tmp);
+ __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpeqd(tmp, tmp);
+ __ Pxor(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4GtS: {
- __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4GeS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- __ pminsd(dst, src);
- __ pcmpeqd(dst, src);
+ __ Pminsd(dst, src);
+ __ Pcmpeqd(dst, src);
break;
}
case kX64I32x4UConvertF32x4: {
@@ -2992,18 +3062,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
- __ psrld(i.OutputSimd128Register(), tmp);
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 32.
+ __ andq(shift, Immediate(31));
+ __ Movq(tmp, shift);
+ __ Psrld(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4MinU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4MaxU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
- __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4GtU: {
@@ -3011,18 +3084,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0);
- __ pmaxud(dst, src);
- __ pcmpeqd(dst, src);
- __ pcmpeqd(tmp, tmp);
- __ pxor(dst, tmp);
+ __ Pmaxud(dst, src);
+ __ Pcmpeqd(dst, src);
+ __ Pcmpeqd(tmp, tmp);
+ __ Pxor(dst, tmp);
break;
}
case kX64I32x4GeU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
- __ pminud(dst, src);
- __ pcmpeqd(dst, src);
+ __ Pminud(dst, src);
+ __ Pcmpeqd(dst, src);
break;
}
case kX64S128Zero: {
@@ -3044,17 +3117,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I16x8ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
- __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsxwl(dst, dst);
+ __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I16x8ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
- __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
+ __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
- __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@@ -3085,13 +3157,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kX64I16x8ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
break;
}
@@ -3173,7 +3251,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
- __ movq(tmp, i.InputRegister(1));
+ Register shift = i.InputRegister(1);
+ // Take shift value modulo 16.
+ __ andq(shift, Immediate(15));
+ __ movq(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
@@ -3230,28 +3311,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
- __ movd(dst, i.InputRegister(0));
+ __ Movd(dst, i.InputRegister(0));
} else {
- __ movd(dst, i.InputOperand(0));
+ __ Movd(dst, i.InputOperand(0));
}
- __ xorps(kScratchDoubleReg, kScratchDoubleReg);
- __ pshufb(dst, kScratchDoubleReg);
+ __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
+ __ Pshufb(dst, kScratchDoubleReg);
break;
}
case kX64I8x16ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
- __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
- __ movsxbl(dst, dst);
+ __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I8x16ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
- __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
+ __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
- __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+ __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@@ -3279,15 +3359,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Temp registers for shift mask andadditional moves to XMM registers.
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
+ Register shift = i.InputRegister(1);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
- __ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(shift, Immediate(7));
+ __ movq(tmp, shift);
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
- __ movq(tmp_simd, i.InputRegister(1));
+ __ movq(tmp_simd, shift);
__ psllw(dst, tmp_simd);
break;
}
@@ -3302,6 +3385,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psraw(kScratchDoubleReg, tmp_simd);
@@ -3414,6 +3499,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
+ // Take shift value modulo 8.
+ __ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
@@ -3422,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16AddSaturateU: {
- __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16SubSaturateU: {
@@ -3487,10 +3574,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64S128Select: {
// Mask used here is stored in dst.
XMMRegister dst = i.OutputSimd128Register();
- __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
- __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
- __ andps(dst, kScratchDoubleReg);
- __ xorps(dst, i.InputSimd128Register(2));
+ __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+ __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+ __ Andps(dst, kScratchDoubleReg);
+ __ Xorps(dst, i.InputSimd128Register(2));
+ break;
+ }
+ case kX64S8x16Swizzle: {
+ CpuFeatureScope sse_scope(tasm(), SSSE3);
+ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+ XMMRegister dst = i.OutputSimd128Register();
+ XMMRegister mask = i.TempSimd128Register(0);
+
+ // Out-of-range indices should return 0, add 112 so that any value > 15
+ // saturates to 128 (top bit set), so pshufb will zero that lane.
+ __ Move(mask, static_cast<uint32_t>(0x70707070));
+ __ Pshufd(mask, mask, 0x0);
+ __ Paddusb(mask, i.InputSimd128Register(1));
+ __ Pshufb(dst, mask);
break;
}
case kX64S8x16Shuffle: {
@@ -3507,10 +3608,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SetupShuffleMaskOnStack(tasm(), mask);
- __ pshufb(dst, Operand(rsp, 0));
+ __ Pshufb(dst, Operand(rsp, 0));
} else { // two input operands
DCHECK_EQ(6, instr->InputCount());
- ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
+ ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
uint32_t mask[4] = {};
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@@ -3520,13 +3621,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask);
- __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
+ __ Pshufb(kScratchDoubleReg, Operand(rsp, 0));
uint32_t mask1[4] = {};
if (instr->InputAt(1)->IsSimd128Register()) {
XMMRegister src1 = i.InputSimd128Register(1);
if (src1 != dst) __ movups(dst, src1);
} else {
- __ movups(dst, i.InputOperand(1));
+ __ Movups(dst, i.InputOperand(1));
}
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@@ -3536,8 +3637,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask1);
- __ pshufb(dst, Operand(rsp, 0));
- __ por(dst, kScratchDoubleReg);
+ __ Pshufb(dst, Operand(rsp, 0));
+ __ Por(dst, kScratchDoubleReg);
}
__ movq(rsp, tmp);
break;
diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
index 8a0a45a916..e390c6922c 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
+++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
@@ -160,6 +160,7 @@ namespace compiler {
V(X64F64x2ReplaceLane) \
V(X64F64x2Abs) \
V(X64F64x2Neg) \
+ V(X64F64x2Sqrt) \
V(X64F64x2Add) \
V(X64F64x2Sub) \
V(X64F64x2Mul) \
@@ -170,6 +171,8 @@ namespace compiler {
V(X64F64x2Ne) \
V(X64F64x2Lt) \
V(X64F64x2Le) \
+ V(X64F64x2Qfma) \
+ V(X64F64x2Qfms) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
@@ -177,6 +180,7 @@ namespace compiler {
V(X64F32x4UConvertI32x4) \
V(X64F32x4Abs) \
V(X64F32x4Neg) \
+ V(X64F32x4Sqrt) \
V(X64F32x4RecipApprox) \
V(X64F32x4RecipSqrtApprox) \
V(X64F32x4Add) \
@@ -190,6 +194,8 @@ namespace compiler {
V(X64F32x4Ne) \
V(X64F32x4Lt) \
V(X64F32x4Le) \
+ V(X64F32x4Qfma) \
+ V(X64F32x4Qfms) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
@@ -300,6 +306,7 @@ namespace compiler {
V(X64S128Or) \
V(X64S128Xor) \
V(X64S128Select) \
+ V(X64S8x16Swizzle) \
V(X64S8x16Shuffle) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
index e9fa450c38..28a935fd91 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
@@ -129,6 +129,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2ReplaceLane:
case kX64F64x2Abs:
case kX64F64x2Neg:
+ case kX64F64x2Sqrt:
case kX64F64x2Add:
case kX64F64x2Sub:
case kX64F64x2Mul:
@@ -139,6 +140,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Ne:
case kX64F64x2Lt:
case kX64F64x2Le:
+ case kX64F64x2Qfma:
+ case kX64F64x2Qfms:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
@@ -148,6 +151,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4RecipSqrtApprox:
case kX64F32x4Abs:
case kX64F32x4Neg:
+ case kX64F32x4Sqrt:
case kX64F32x4Add:
case kX64F32x4AddHoriz:
case kX64F32x4Sub:
@@ -159,6 +163,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Ne:
case kX64F32x4Lt:
case kX64F32x4Le:
+ case kX64F32x4Qfma:
+ case kX64F32x4Qfms:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
@@ -275,6 +281,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S1x4AllTrue:
case kX64S1x8AnyTrue:
case kX64S1x8AllTrue:
+ case kX64S8x16Swizzle:
case kX64S8x16Shuffle:
case kX64S32x4Swizzle:
case kX64S32x4Shuffle:
diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
index 5379074bac..f5d05fdd85 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
@@ -4,7 +4,7 @@
#include <algorithm>
-#include "src/base/adapters.h"
+#include "src/base/iterator.h"
#include "src/base/overflowing-math.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
@@ -250,9 +250,21 @@ ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
#else
UNREACHABLE();
#endif
+#ifdef V8_COMPRESS_POINTERS
+ case MachineRepresentation::kTaggedSigned:
+ opcode = kX64MovqDecompressTaggedSigned;
+ break;
+ case MachineRepresentation::kTaggedPointer:
+ opcode = kX64MovqDecompressTaggedPointer;
+ break;
+ case MachineRepresentation::kTagged:
+ opcode = kX64MovqDecompressAnyTagged;
+ break;
+#else
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
case MachineRepresentation::kTagged: // Fall through.
+#endif
case MachineRepresentation::kWord64:
opcode = kX64Movq;
break;
@@ -288,7 +300,8 @@ ArchOpcode GetStoreOpcode(StoreRepresentation store_rep) {
#endif
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
- case MachineRepresentation::kTagged: // Fall through.
+ case MachineRepresentation::kTagged:
+ return kX64MovqCompressTagged;
case MachineRepresentation::kWord64:
return kX64Movq;
case MachineRepresentation::kSimd128: // Fall through.
@@ -875,6 +888,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
Emit(kX64Bswap32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitInt32Add(Node* node) {
X64OperandGenerator g(this);
@@ -1843,17 +1860,15 @@ void VisitWordCompare(InstructionSelector* selector, Node* node,
node->op()->HasProperty(Operator::kCommutative));
}
-// Shared routine for 64-bit word comparison operations.
-void VisitWord64Compare(InstructionSelector* selector, Node* node,
- FlagsContinuation* cont) {
- X64OperandGenerator g(selector);
+void VisitWord64EqualImpl(InstructionSelector* selector, Node* node,
+ FlagsContinuation* cont) {
if (selector->CanUseRootsRegister()) {
+ X64OperandGenerator g(selector);
const RootsTable& roots_table = selector->isolate()->roots_table();
RootIndex root_index;
HeapObjectBinopMatcher m(node);
if (m.right().HasValue() &&
roots_table.IsRootHandle(m.right().Value(), &root_index)) {
- if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
InstructionCode opcode =
kX64Cmp | AddressingModeField::encode(kMode_Root);
return VisitCompare(
@@ -1861,18 +1876,30 @@ void VisitWord64Compare(InstructionSelector* selector, Node* node,
g.TempImmediate(
TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)),
g.UseRegister(m.left().node()), cont);
- } else if (m.left().HasValue() &&
- roots_table.IsRootHandle(m.left().Value(), &root_index)) {
+ }
+ }
+ VisitWordCompare(selector, node, kX64Cmp, cont);
+}
+
+void VisitWord32EqualImpl(InstructionSelector* selector, Node* node,
+ FlagsContinuation* cont) {
+ if (COMPRESS_POINTERS_BOOL && selector->CanUseRootsRegister()) {
+ X64OperandGenerator g(selector);
+ const RootsTable& roots_table = selector->isolate()->roots_table();
+ RootIndex root_index;
+ CompressedHeapObjectBinopMatcher m(node);
+ if (m.right().HasValue() &&
+ roots_table.IsRootHandle(m.right().Value(), &root_index)) {
InstructionCode opcode =
- kX64Cmp | AddressingModeField::encode(kMode_Root);
+ kX64Cmp32 | AddressingModeField::encode(kMode_Root);
return VisitCompare(
selector, opcode,
g.TempImmediate(
TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)),
- g.UseRegister(m.right().node()), cont);
+ g.UseRegister(m.left().node()), cont);
}
}
- VisitWordCompare(selector, node, kX64Cmp, cont);
+ VisitWordCompare(selector, node, kX64Cmp32, cont);
}
// Shared routine for comparison with zero.
@@ -2048,7 +2075,7 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
switch (value->opcode()) {
case IrOpcode::kWord32Equal:
cont->OverwriteAndNegateIfEqual(kEqual);
- return VisitWordCompare(this, value, kX64Cmp32, cont);
+ return VisitWord32EqualImpl(this, value, cont);
case IrOpcode::kInt32LessThan:
cont->OverwriteAndNegateIfEqual(kSignedLessThan);
return VisitWordCompare(this, value, kX64Cmp32, cont);
@@ -2071,7 +2098,7 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
if (CanCover(user, value)) {
switch (value->opcode()) {
case IrOpcode::kInt64Sub:
- return VisitWord64Compare(this, value, cont);
+ return VisitWordCompare(this, value, kX64Cmp, cont);
case IrOpcode::kWord64And:
return VisitWordCompare(this, value, kX64Test, cont);
default:
@@ -2080,20 +2107,20 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
}
return VisitCompareZero(this, user, value, kX64Cmp, cont);
}
- return VisitWord64Compare(this, value, cont);
+ return VisitWord64EqualImpl(this, value, cont);
}
case IrOpcode::kInt64LessThan:
cont->OverwriteAndNegateIfEqual(kSignedLessThan);
- return VisitWord64Compare(this, value, cont);
+ return VisitWordCompare(this, value, kX64Cmp, cont);
case IrOpcode::kInt64LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
- return VisitWord64Compare(this, value, cont);
+ return VisitWordCompare(this, value, kX64Cmp, cont);
case IrOpcode::kUint64LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
- return VisitWord64Compare(this, value, cont);
+ return VisitWordCompare(this, value, kX64Cmp, cont);
case IrOpcode::kUint64LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
- return VisitWord64Compare(this, value, cont);
+ return VisitWordCompare(this, value, kX64Cmp, cont);
case IrOpcode::kFloat32Equal:
cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
return VisitFloat32Compare(this, value, cont);
@@ -2221,7 +2248,7 @@ void InstructionSelector::VisitWord32Equal(Node* const node) {
if (m.right().Is(0)) {
return VisitWordCompareZero(m.node(), m.left().node(), &cont);
}
- VisitWordCompare(this, node, kX64Cmp32, &cont);
+ VisitWord32EqualImpl(this, node, &cont);
}
void InstructionSelector::VisitInt32LessThan(Node* node) {
@@ -2246,7 +2273,7 @@ void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
VisitWordCompare(this, node, kX64Cmp32, &cont);
}
-void InstructionSelector::VisitWord64Equal(Node* const node) {
+void InstructionSelector::VisitWord64Equal(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
Int64BinopMatcher m(node);
if (m.right().Is(0)) {
@@ -2256,7 +2283,7 @@ void InstructionSelector::VisitWord64Equal(Node* const node) {
if (CanCover(user, value)) {
switch (value->opcode()) {
case IrOpcode::kInt64Sub:
- return VisitWord64Compare(this, value, &cont);
+ return VisitWordCompare(this, value, kX64Cmp, &cont);
case IrOpcode::kWord64And:
return VisitWordCompare(this, value, kX64Test, &cont);
default:
@@ -2264,7 +2291,7 @@ void InstructionSelector::VisitWord64Equal(Node* const node) {
}
}
}
- VisitWord64Compare(this, node, &cont);
+ VisitWord64EqualImpl(this, node, &cont);
}
void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
@@ -2287,24 +2314,24 @@ void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
void InstructionSelector::VisitInt64LessThan(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
- VisitWord64Compare(this, node, &cont);
+ VisitWordCompare(this, node, kX64Cmp, &cont);
}
void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
- VisitWord64Compare(this, node, &cont);
+ VisitWordCompare(this, node, kX64Cmp, &cont);
}
void InstructionSelector::VisitUint64LessThan(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
- VisitWord64Compare(this, node, &cont);
+ VisitWordCompare(this, node, kX64Cmp, &cont);
}
void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
- VisitWord64Compare(this, node, &cont);
+ VisitWordCompare(this, node, kX64Cmp, &cont);
}
void InstructionSelector::VisitFloat32Equal(Node* node) {
@@ -2685,9 +2712,11 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16GtU)
#define SIMD_UNOP_LIST(V) \
+ V(F64x2Sqrt) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
V(F32x4Neg) \
+ V(F32x4Sqrt) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(I64x2Neg) \
@@ -2872,6 +2901,27 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
g.UseRegister(node->InputAt(0)));
}
+#define VISIT_SIMD_QFMOP(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ X64OperandGenerator g(this); \
+ if (CpuFeatures::IsSupported(FMA3)) { \
+ Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
+ g.UseRegister(node->InputAt(2))); \
+ } else { \
+ InstructionOperand temps[] = {g.TempSimd128Register()}; \
+ Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
+ g.UseUniqueRegister(node->InputAt(0)), \
+ g.UseUniqueRegister(node->InputAt(1)), \
+ g.UseRegister(node->InputAt(2)), arraysize(temps), temps); \
+ } \
+ }
+VISIT_SIMD_QFMOP(F64x2Qfma)
+VISIT_SIMD_QFMOP(F64x2Qfms)
+VISIT_SIMD_QFMOP(F32x4Qfma)
+VISIT_SIMD_QFMOP(F32x4Qfms)
+#undef VISIT_SIMD_QFMOP
+
void InstructionSelector::VisitI64x2ShrS(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
@@ -2893,10 +2943,10 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
void InstructionSelector::VisitI64x2MinS(Node* node) {
X64OperandGenerator g(this);
if (this->IsSupported(SSE4_2)) {
- InstructionOperand temps[] = {g.TempSimd128Register()};
- Emit(kX64I64x2MinS, g.DefineSameAsFirst(node),
- g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
- arraysize(temps), temps);
+ InstructionOperand temps[] = {g.TempFpRegister(xmm0)};
+ Emit(kX64I64x2MinS, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
} else {
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister(),
g.TempRegister()};
@@ -2908,27 +2958,27 @@ void InstructionSelector::VisitI64x2MinS(Node* node) {
void InstructionSelector::VisitI64x2MaxS(Node* node) {
X64OperandGenerator g(this);
- InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand temps[] = {g.TempFpRegister(xmm0)};
Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node),
- g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+ g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MinU(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
- g.TempSimd128Register()};
- Emit(kX64I64x2MinU, g.DefineSameAsFirst(node),
- g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
- arraysize(temps), temps);
+ g.TempFpRegister(xmm0)};
+ Emit(kX64I64x2MinU, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MaxU(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
- g.TempSimd128Register()};
+ g.TempFpRegister(xmm0)};
Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node),
- g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+ g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
arraysize(temps), temps);
}
@@ -3256,6 +3306,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
}
+void InstructionSelector::VisitS8x16Swizzle(Node* node) {
+ X64OperandGenerator g(this);
+ InstructionOperand temps[] = {g.TempSimd128Register()};
+ Emit(kX64S8x16Swizzle, g.DefineSameAsFirst(node),
+ g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+ arraysize(temps), temps);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {