diff options
Diffstat (limited to 'deps/v8/src/compiler/backend/arm')
4 files changed, 112 insertions, 39 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc index 65a569d755..3fe5361083 100644 --- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -44,7 +44,7 @@ class ArmOperandConverter final : public InstructionOperandConverter { UNREACHABLE(); } - Operand InputImmediate(size_t index) { + Operand InputImmediate(size_t index) const { return ToImmediate(instr_->InputAt(index)); } @@ -111,7 +111,7 @@ class ArmOperandConverter final : public InstructionOperandConverter { return InputOffset(&first_index); } - Operand ToImmediate(InstructionOperand* operand) { + Operand ToImmediate(InstructionOperand* operand) const { Constant constant = ToConstant(operand); switch (constant.type()) { case Constant::kInt32: @@ -153,9 +153,6 @@ class ArmOperandConverter final : public InstructionOperandConverter { NeonMemOperand NeonInputOperand(size_t first_index) { const size_t index = first_index; switch (AddressingModeField::decode(instr_->opcode())) { - case kMode_Offset_RR: - return NeonMemOperand(InputRegister(index + 0), - InputRegister(index + 1)); case kMode_Operand2_R: return NeonMemOperand(InputRegister(index + 0)); default: @@ -309,9 +306,9 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { UNREACHABLE(); } -void EmitWordLoadPoisoningIfNeeded( - CodeGenerator* codegen, InstructionCode opcode, - ArmOperandConverter& i) { // NOLINT(runtime/references) +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, + ArmOperandConverter const& i) { const MemoryAccessMode access_mode = static_cast<MemoryAccessMode>(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { @@ -320,10 +317,10 @@ void EmitWordLoadPoisoningIfNeeded( } } -void ComputePoisonedAddressForLoad( - CodeGenerator* codegen, InstructionCode opcode, - ArmOperandConverter& i, // NOLINT(runtime/references) - Register address) { +void ComputePoisonedAddressForLoad(CodeGenerator* codegen, + InstructionCode opcode, + ArmOperandConverter const& i, + Register address) { DCHECK_EQ(kMemoryAccessPoisoned, static_cast<MemoryAccessMode>(MiscField::decode(opcode))); switch (AddressingModeField::decode(opcode)) { @@ -1798,6 +1795,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } + case kArmF32x4Sqrt: { + QwNeonRegister dst = i.OutputSimd128Register(); + QwNeonRegister src1 = i.InputSimd128Register(0); + DCHECK_EQ(dst, q0); + DCHECK_EQ(src1, q0); +#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane) + __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0)); + __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1)); + __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2)); + __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3)); +#undef S_FROM_Q + break; + } case kArmF32x4RecipApprox: { __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; @@ -1919,14 +1929,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI32x4Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kArmI32x4ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vneg(Neon32, tmp, tmp); __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -1998,7 +2014,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI32x4ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon32, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 32. + __ and_(shift, i.InputRegister(1), Operand(31)); + __ vdup(Neon32, tmp, shift); __ vneg(Neon32, tmp, tmp); __ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2029,7 +2048,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI16x8ExtractLane: { - __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16, + __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16, i.InputInt8(1)); break; } @@ -2054,14 +2073,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI16x8Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kArmI16x8ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vneg(Neon16, tmp, tmp); __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2142,7 +2167,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI16x8ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon16, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 16. + __ and_(shift, i.InputRegister(1), Operand(15)); + __ vdup(Neon16, tmp, shift); __ vneg(Neon16, tmp, tmp); __ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2186,7 +2214,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI8x16ExtractLane: { - __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8, + __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8, i.InputInt8(1)); break; } @@ -2201,6 +2229,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16Shl: { QwNeonRegister tmp = i.TempSimd128Register(0); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); __ vdup(Neon8, tmp, i.InputRegister(1)); __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2208,7 +2239,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16ShrS: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon8, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); + __ vdup(Neon8, tmp, shift); __ vneg(Neon8, tmp, tmp); __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); @@ -2275,7 +2309,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI8x16ShrU: { QwNeonRegister tmp = i.TempSimd128Register(0); - __ vdup(Neon8, tmp, i.InputRegister(1)); + Register shift = i.TempRegister(1); + // Take shift value modulo 8. + __ and_(shift, i.InputRegister(1), Operand(7)); + __ vdup(Neon8, tmp, shift); __ vneg(Neon8, tmp, tmp); __ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h index 3551e26aea..d398ec0ed6 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -135,6 +135,7 @@ namespace compiler { V(ArmF32x4UConvertI32x4) \ V(ArmF32x4Abs) \ V(ArmF32x4Neg) \ + V(ArmF32x4Sqrt) \ V(ArmF32x4RecipApprox) \ V(ArmF32x4RecipSqrtApprox) \ V(ArmF32x4Add) \ diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 1d7cf61dfe..92be55dcc3 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -115,6 +115,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4UConvertI32x4: case kArmF32x4Abs: case kArmF32x4Neg: + case kArmF32x4Sqrt: case kArmF32x4RecipApprox: case kArmF32x4RecipSqrtApprox: case kArmF32x4Add: diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc index ce74faa4a6..303648051f 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "src/base/adapters.h" #include "src/base/bits.h" #include "src/base/enum-set.h" +#include "src/base/iterator.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -94,7 +94,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { ArmOperandGenerator g(selector); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), arraysize(temps), temps); @@ -352,6 +352,26 @@ void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode div_opcode, } } +// Adds the base and offset into a register, then change the addressing +// mode of opcode_return to use this register. Certain instructions, e.g. +// vld1 and vst1, when given two registers, will post-increment the offset, i.e. +// perform the operation at base, then add offset to base. What we intend is to +// access at (base+offset). +void EmitAddBeforeS128LoadStore(InstructionSelector* selector, + InstructionCode* opcode_return, + size_t* input_count_return, + InstructionOperand* inputs) { + DCHECK(*opcode_return == kArmVld1S128 || *opcode_return == kArmVst1S128); + ArmOperandGenerator g(selector); + InstructionOperand addr = g.TempRegister(); + InstructionCode op = kArmAdd; + op |= AddressingModeField::encode(kMode_Operand2_R); + selector->Emit(op, 1, &addr, 2, inputs); + *opcode_return |= AddressingModeField::encode(kMode_Operand2_R); + *input_count_return -= 1; + inputs[0] = addr; +} + void EmitLoad(InstructionSelector* selector, InstructionCode opcode, InstructionOperand* output, Node* base, Node* index) { ArmOperandGenerator g(selector); @@ -368,7 +388,11 @@ void EmitLoad(InstructionSelector* selector, InstructionCode opcode, input_count = 3; } else { inputs[1] = g.UseRegister(index); - opcode |= AddressingModeField::encode(kMode_Offset_RR); + if (opcode == kArmVld1S128) { + EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[0]); + } else { + opcode |= AddressingModeField::encode(kMode_Offset_RR); + } } selector->Emit(opcode, 1, output, input_count, inputs); } @@ -386,7 +410,12 @@ void EmitStore(InstructionSelector* selector, InstructionCode opcode, input_count = 4; } else { inputs[input_count++] = g.UseRegister(index); - opcode |= AddressingModeField::encode(kMode_Offset_RR); + if (opcode == kArmVst1S128) { + // Inputs are value, base, index, only care about base and index. + EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[1]); + } else { + opcode |= AddressingModeField::encode(kMode_Offset_RR); + } } selector->Emit(opcode, 0, nullptr, input_count, inputs); } @@ -596,8 +625,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); return; } - case MachineRepresentation::kFloat64: - case MachineRepresentation::kSimd128: { + case MachineRepresentation::kFloat64: { // Compute the address of the least-significant byte of the FP value. // We assume that the base node is unlikely to be an encodable immediate // or the result of a shift operation, so only consider the addressing @@ -623,13 +651,10 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { if (CpuFeatures::IsSupported(NEON)) { // With NEON we can load directly from the calculated address. - InstructionCode op = load_rep == MachineRepresentation::kFloat64 - ? kArmVld1F64 - : kArmVld1S128; + InstructionCode op = kArmVld1F64; op |= AddressingModeField::encode(kMode_Operand2_R); Emit(op, g.DefineAsRegister(node), addr); } else { - DCHECK_NE(MachineRepresentation::kSimd128, load_rep); // Load both halves and move to an FP register. InstructionOperand fp_lo = g.TempRegister(); InstructionOperand fp_hi = g.TempRegister(); @@ -670,8 +695,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { EmitStore(this, kArmStr, input_count, inputs, index); return; } - case MachineRepresentation::kFloat64: - case MachineRepresentation::kSimd128: { + case MachineRepresentation::kFloat64: { if (CpuFeatures::IsSupported(NEON)) { InstructionOperand address = g.TempRegister(); { @@ -697,13 +721,10 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { inputs[input_count++] = g.UseRegister(value); inputs[input_count++] = address; - InstructionCode op = store_rep == MachineRepresentation::kFloat64 - ? kArmVst1F64 - : kArmVst1S128; + InstructionCode op = kArmVst1F64; op |= AddressingModeField::encode(kMode_Operand2_R); Emit(op, 0, nullptr, input_count, inputs); } else { - DCHECK_NE(MachineRepresentation::kSimd128, store_rep); // Store a 64-bit floating point value using two 32-bit integer stores. // Computing the store address here would require three live temporary // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after @@ -942,7 +963,8 @@ void InstructionSelector::VisitWord32Shr(Node* node) { uint32_t lsb = m.right().Value(); Int32BinopMatcher mleft(m.left().node()); if (mleft.right().HasValue()) { - uint32_t value = (mleft.right().Value() >> lsb) << lsb; + uint32_t value = static_cast<uint32_t>(mleft.right().Value() >> lsb) + << lsb; uint32_t width = base::bits::CountPopulation(value); uint32_t msb = base::bits::CountLeadingZeros32(value); if ((width != 0) && (msb + width + lsb == 32)) { @@ -1119,6 +1141,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { VisitRR(this, kArmRev, node); } +void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { + UNREACHABLE(); +} + void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); } void InstructionSelector::VisitInt32Add(Node* node) { @@ -2513,6 +2539,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP) #undef SIMD_VISIT_BINOP #undef SIMD_BINOP_LIST +void InstructionSelector::VisitF32x4Sqrt(Node* node) { + ArmOperandGenerator g(this); + // Use fixed registers in the lower 8 Q-registers so we can directly access + // mapped registers S0-S31. + Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0), + g.UseFixed(node->InputAt(0), q0)); +} + void InstructionSelector::VisitF32x4Div(Node* node) { ArmOperandGenerator g(this); // Use fixed registers in the lower 8 Q-registers so we can directly access |