aboutsummaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/backend/arm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/backend/arm')
-rw-r--r--deps/v8/src/compiler/backend/arm/code-generator-arm.cc81
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-codes-arm.h1
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc1
-rw-r--r--deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc68
4 files changed, 112 insertions, 39 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
index 65a569d755..3fe5361083 100644
--- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
@@ -44,7 +44,7 @@ class ArmOperandConverter final : public InstructionOperandConverter {
UNREACHABLE();
}
- Operand InputImmediate(size_t index) {
+ Operand InputImmediate(size_t index) const {
return ToImmediate(instr_->InputAt(index));
}
@@ -111,7 +111,7 @@ class ArmOperandConverter final : public InstructionOperandConverter {
return InputOffset(&first_index);
}
- Operand ToImmediate(InstructionOperand* operand) {
+ Operand ToImmediate(InstructionOperand* operand) const {
Constant constant = ToConstant(operand);
switch (constant.type()) {
case Constant::kInt32:
@@ -153,9 +153,6 @@ class ArmOperandConverter final : public InstructionOperandConverter {
NeonMemOperand NeonInputOperand(size_t first_index) {
const size_t index = first_index;
switch (AddressingModeField::decode(instr_->opcode())) {
- case kMode_Offset_RR:
- return NeonMemOperand(InputRegister(index + 0),
- InputRegister(index + 1));
case kMode_Operand2_R:
return NeonMemOperand(InputRegister(index + 0));
default:
@@ -309,9 +306,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE();
}
-void EmitWordLoadPoisoningIfNeeded(
- CodeGenerator* codegen, InstructionCode opcode,
- ArmOperandConverter& i) { // NOLINT(runtime/references)
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+ InstructionCode opcode,
+ ArmOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
@@ -320,10 +317,10 @@ void EmitWordLoadPoisoningIfNeeded(
}
}
-void ComputePoisonedAddressForLoad(
- CodeGenerator* codegen, InstructionCode opcode,
- ArmOperandConverter& i, // NOLINT(runtime/references)
- Register address) {
+void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
+ InstructionCode opcode,
+ ArmOperandConverter const& i,
+ Register address) {
DCHECK_EQ(kMemoryAccessPoisoned,
static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
switch (AddressingModeField::decode(opcode)) {
@@ -1798,6 +1795,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
+ case kArmF32x4Sqrt: {
+ QwNeonRegister dst = i.OutputSimd128Register();
+ QwNeonRegister src1 = i.InputSimd128Register(0);
+ DCHECK_EQ(dst, q0);
+ DCHECK_EQ(src1, q0);
+#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
+ __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
+ __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
+ __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
+ __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
+#undef S_FROM_Q
+ break;
+ }
case kArmF32x4RecipApprox: {
__ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
@@ -1919,14 +1929,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI32x4ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -1998,7 +2014,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon32, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 32.
+ __ and_(shift, i.InputRegister(1), Operand(31));
+ __ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2029,7 +2048,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI16x8ExtractLane: {
- __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
+ __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
i.InputInt8(1));
break;
}
@@ -2054,14 +2073,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI16x8ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2142,7 +2167,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon16, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 16.
+ __ and_(shift, i.InputRegister(1), Operand(15));
+ __ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2186,7 +2214,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI8x16ExtractLane: {
- __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
+ __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
i.InputInt8(1));
break;
}
@@ -2201,6 +2229,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
__ vdup(Neon8, tmp, i.InputRegister(1));
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2208,7 +2239,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon8, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
+ __ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
@@ -2275,7 +2309,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
- __ vdup(Neon8, tmp, i.InputRegister(1));
+ Register shift = i.TempRegister(1);
+ // Take shift value modulo 8.
+ __ and_(shift, i.InputRegister(1), Operand(7));
+ __ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
index 3551e26aea..d398ec0ed6 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h
@@ -135,6 +135,7 @@ namespace compiler {
V(ArmF32x4UConvertI32x4) \
V(ArmF32x4Abs) \
V(ArmF32x4Neg) \
+ V(ArmF32x4Sqrt) \
V(ArmF32x4RecipApprox) \
V(ArmF32x4RecipSqrtApprox) \
V(ArmF32x4Add) \
diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
index 1d7cf61dfe..92be55dcc3 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
@@ -115,6 +115,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4UConvertI32x4:
case kArmF32x4Abs:
case kArmF32x4Neg:
+ case kArmF32x4Sqrt:
case kArmF32x4RecipApprox:
case kArmF32x4RecipSqrtApprox:
case kArmF32x4Add:
diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
index ce74faa4a6..303648051f 100644
--- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/base/adapters.h"
#include "src/base/bits.h"
#include "src/base/enum-set.h"
+#include "src/base/iterator.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@@ -94,7 +94,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
- InstructionOperand temps[] = {g.TempSimd128Register()};
+ InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
@@ -352,6 +352,26 @@ void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode div_opcode,
}
}
+// Adds the base and offset into a register, then change the addressing
+// mode of opcode_return to use this register. Certain instructions, e.g.
+// vld1 and vst1, when given two registers, will post-increment the offset, i.e.
+// perform the operation at base, then add offset to base. What we intend is to
+// access at (base+offset).
+void EmitAddBeforeS128LoadStore(InstructionSelector* selector,
+ InstructionCode* opcode_return,
+ size_t* input_count_return,
+ InstructionOperand* inputs) {
+ DCHECK(*opcode_return == kArmVld1S128 || *opcode_return == kArmVst1S128);
+ ArmOperandGenerator g(selector);
+ InstructionOperand addr = g.TempRegister();
+ InstructionCode op = kArmAdd;
+ op |= AddressingModeField::encode(kMode_Operand2_R);
+ selector->Emit(op, 1, &addr, 2, inputs);
+ *opcode_return |= AddressingModeField::encode(kMode_Operand2_R);
+ *input_count_return -= 1;
+ inputs[0] = addr;
+}
+
void EmitLoad(InstructionSelector* selector, InstructionCode opcode,
InstructionOperand* output, Node* base, Node* index) {
ArmOperandGenerator g(selector);
@@ -368,7 +388,11 @@ void EmitLoad(InstructionSelector* selector, InstructionCode opcode,
input_count = 3;
} else {
inputs[1] = g.UseRegister(index);
- opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ if (opcode == kArmVld1S128) {
+ EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[0]);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ }
}
selector->Emit(opcode, 1, output, input_count, inputs);
}
@@ -386,7 +410,12 @@ void EmitStore(InstructionSelector* selector, InstructionCode opcode,
input_count = 4;
} else {
inputs[input_count++] = g.UseRegister(index);
- opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ if (opcode == kArmVst1S128) {
+ // Inputs are value, base, index, only care about base and index.
+ EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[1]);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_Offset_RR);
+ }
}
selector->Emit(opcode, 0, nullptr, input_count, inputs);
}
@@ -596,8 +625,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
return;
}
- case MachineRepresentation::kFloat64:
- case MachineRepresentation::kSimd128: {
+ case MachineRepresentation::kFloat64: {
// Compute the address of the least-significant byte of the FP value.
// We assume that the base node is unlikely to be an encodable immediate
// or the result of a shift operation, so only consider the addressing
@@ -623,13 +651,10 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
if (CpuFeatures::IsSupported(NEON)) {
// With NEON we can load directly from the calculated address.
- InstructionCode op = load_rep == MachineRepresentation::kFloat64
- ? kArmVld1F64
- : kArmVld1S128;
+ InstructionCode op = kArmVld1F64;
op |= AddressingModeField::encode(kMode_Operand2_R);
Emit(op, g.DefineAsRegister(node), addr);
} else {
- DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
// Load both halves and move to an FP register.
InstructionOperand fp_lo = g.TempRegister();
InstructionOperand fp_hi = g.TempRegister();
@@ -670,8 +695,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
EmitStore(this, kArmStr, input_count, inputs, index);
return;
}
- case MachineRepresentation::kFloat64:
- case MachineRepresentation::kSimd128: {
+ case MachineRepresentation::kFloat64: {
if (CpuFeatures::IsSupported(NEON)) {
InstructionOperand address = g.TempRegister();
{
@@ -697,13 +721,10 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
inputs[input_count++] = g.UseRegister(value);
inputs[input_count++] = address;
- InstructionCode op = store_rep == MachineRepresentation::kFloat64
- ? kArmVst1F64
- : kArmVst1S128;
+ InstructionCode op = kArmVst1F64;
op |= AddressingModeField::encode(kMode_Operand2_R);
Emit(op, 0, nullptr, input_count, inputs);
} else {
- DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
// Store a 64-bit floating point value using two 32-bit integer stores.
// Computing the store address here would require three live temporary
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
@@ -942,7 +963,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) {
uint32_t lsb = m.right().Value();
Int32BinopMatcher mleft(m.left().node());
if (mleft.right().HasValue()) {
- uint32_t value = (mleft.right().Value() >> lsb) << lsb;
+ uint32_t value = static_cast<uint32_t>(mleft.right().Value() >> lsb)
+ << lsb;
uint32_t width = base::bits::CountPopulation(value);
uint32_t msb = base::bits::CountLeadingZeros32(value);
if ((width != 0) && (msb + width + lsb == 32)) {
@@ -1119,6 +1141,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
VisitRR(this, kArmRev, node);
}
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+ UNREACHABLE();
+}
+
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitInt32Add(Node* node) {
@@ -2513,6 +2539,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
+void InstructionSelector::VisitF32x4Sqrt(Node* node) {
+ ArmOperandGenerator g(this);
+ // Use fixed registers in the lower 8 Q-registers so we can directly access
+ // mapped registers S0-S31.
+ Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0),
+ g.UseFixed(node->InputAt(0), q0));
+}
+
void InstructionSelector::VisitF32x4Div(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access