diff options
Diffstat (limited to 'deps/v8/src/compiler/arm/code-generator-arm.cc')
-rw-r--r-- | deps/v8/src/compiler/arm/code-generator-arm.cc | 878 |
1 files changed, 486 insertions, 392 deletions
diff --git a/deps/v8/src/compiler/arm/code-generator-arm.cc b/deps/v8/src/compiler/arm/code-generator-arm.cc index a238cf29d4..8636c639e0 100644 --- a/deps/v8/src/compiler/arm/code-generator-arm.cc +++ b/deps/v8/src/compiler/arm/code-generator-arm.cc @@ -21,9 +21,6 @@ namespace compiler { #define __ tasm()-> -#define kScratchReg r9 - - // Adds Arm-specific methods to convert InstructionOperands. class ArmOperandConverter final : public InstructionOperandConverter { public: @@ -33,7 +30,9 @@ class ArmOperandConverter final : public InstructionOperandConverter { SBit OutputSBit() const { switch (instr_->flags_mode()) { case kFlags_branch: + case kFlags_branch_and_poison: case kFlags_deoptimize: + case kFlags_deoptimize_and_poison: case kFlags_set: case kFlags_trap: return SetCC; @@ -44,21 +43,7 @@ class ArmOperandConverter final : public InstructionOperandConverter { } Operand InputImmediate(size_t index) { - Constant constant = ToConstant(instr_->InputAt(index)); - switch (constant.type()) { - case Constant::kInt32: - return Operand(constant.ToInt32()); - case Constant::kFloat32: - return Operand::EmbeddedNumber(constant.ToFloat32()); - case Constant::kFloat64: - return Operand::EmbeddedNumber(constant.ToFloat64().value()); - case Constant::kInt64: - case Constant::kExternalReference: - case Constant::kHeapObject: - case Constant::kRpoNumber: - break; - } - UNREACHABLE(); + return ToImmediate(instr_->InputAt(index)); } Operand InputOperand2(size_t first_index) { @@ -124,6 +109,30 @@ class ArmOperandConverter final : public InstructionOperandConverter { return InputOffset(&first_index); } + Operand ToImmediate(InstructionOperand* operand) { + Constant constant = ToConstant(operand); + switch (constant.type()) { + case Constant::kInt32: + if (RelocInfo::IsWasmReference(constant.rmode())) { + return Operand(constant.ToInt32(), constant.rmode()); + } else { + return Operand(constant.ToInt32()); + } + case Constant::kFloat32: + return Operand::EmbeddedNumber(constant.ToFloat32()); + case Constant::kFloat64: + return Operand::EmbeddedNumber(constant.ToFloat64().value()); + case Constant::kExternalReference: + return Operand(constant.ToExternalReference()); + case Constant::kInt64: + case Constant::kHeapObject: + // TODO(dcarney): loading RPO constants on arm. + case Constant::kRpoNumber: + break; + } + UNREACHABLE(); + } + MemOperand ToMemOperand(InstructionOperand* op) const { DCHECK_NOT_NULL(op); DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); @@ -314,6 +323,17 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { UNREACHABLE(); } +void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, + InstructionCode opcode, Instruction* instr, + ArmOperandConverter& i) { + const MemoryAccessMode access_mode = + static_cast<MemoryAccessMode>(MiscField::decode(opcode)); + if (access_mode == kMemoryAccessPoisoned) { + Register value = i.OutputRegister(); + codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister)); + } +} + } // namespace #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \ @@ -571,28 +591,54 @@ void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, first_unused_stack_slot); } +// Check that {kJavaScriptCallCodeStartRegister} is correct. +void CodeGenerator::AssembleCodeStartRegisterCheck() { + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); + __ ComputeCodeStartAddress(scratch); + __ cmp(scratch, kJavaScriptCallCodeStartRegister); + __ Assert(eq, AbortReason::kWrongFunctionCodeStart); +} + // Check if the code object is marked for deoptimization. If it is, then it // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need // to: -// 1. compute the offset of the {CodeDataContainer} from our current location -// and load it. -// 2. read from memory the word that contains that bit, which can be found in +// 1. read from memory the word that contains that bit, which can be found in // the flags in the referenced {CodeDataContainer} object; -// 3. test kMarkedForDeoptimizationBit in those flags; and -// 4. if it is not zero then it jumps to the builtin. +// 2. test kMarkedForDeoptimizationBit in those flags; and +// 3. if it is not zero then it jumps to the builtin. void CodeGenerator::BailoutIfDeoptimized() { - int pc_offset = __ pc_offset(); - int offset = Code::kCodeDataContainerOffset - - (Code::kHeaderSize + pc_offset + TurboAssembler::kPcLoadDelta); - // We can use the register pc - 8 for the address of the current instruction. - __ ldr_pcrel(ip, offset); - __ ldr(ip, FieldMemOperand(ip, CodeDataContainer::kKindSpecificFlagsOffset)); - __ tst(ip, Operand(1 << Code::kMarkedForDeoptimizationBit)); + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); + int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; + __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset)); + __ ldr(scratch, + FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset)); + __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit)); Handle<Code> code = isolate()->builtins()->builtin_handle( Builtins::kCompileLazyDeoptimizedCode); __ Jump(code, RelocInfo::CODE_TARGET, ne); } +void CodeGenerator::GenerateSpeculationPoison() { + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); + + // Set a mask which has all bits set in the normal case, but has all + // bits cleared if we are speculatively executing the wrong PC. + __ ComputeCodeStartAddress(scratch); + __ cmp(kJavaScriptCallCodeStartRegister, scratch); + __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq); + __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne); + __ csdb(); +} + +void CodeGenerator::AssembleRegisterArgumentPoisoning() { + __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister); + __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister); + __ and_(sp, sp, kSpeculationPoisonRegister); +} + // Assembles an instruction after register allocation, producing machine code. CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Instruction* instr) { @@ -611,9 +657,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( if (instr->InputAt(0)->IsImmediate()) { __ Call(i.InputCode(0), RelocInfo::CODE_TARGET); } else { - __ add(ip, i.InputRegister(0), + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); + __ add(scratch, i.InputRegister(0), Operand(Code::kHeaderSize - kHeapObjectTag)); - __ Call(ip); + __ Call(scratch); } RecordCallPosition(instr); DCHECK_EQ(LeaveCC, i.OutputSBit()); @@ -657,9 +705,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( if (instr->InputAt(0)->IsImmediate()) { __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET); } else { - __ add(ip, i.InputRegister(0), + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); + __ add(scratch, i.InputRegister(0), Operand(Code::kHeaderSize - kHeapObjectTag)); - __ Jump(ip); + __ Jump(scratch); } DCHECK_EQ(LeaveCC, i.OutputSBit()); unwinding_info_writer_.MarkBlockWillExit(); @@ -701,14 +751,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArchCallJSFunction: { Register func = i.InputRegister(0); if (FLAG_debug_code) { + UseScratchRegisterScope temps(tasm()); + Register scratch = temps.Acquire(); // Check the function's context matches the context argument. - __ ldr(kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset)); - __ cmp(cp, kScratchReg); + __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset)); + __ cmp(cp, scratch); __ Assert(eq, AbortReason::kWrongFunctionContext); } - __ ldr(ip, FieldMemOperand(func, JSFunction::kCodeOffset)); - __ add(ip, ip, Operand(Code::kHeaderSize - kHeapObjectTag)); - __ Call(ip); + static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch"); + __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset)); + __ add(r2, r2, Operand(Code::kHeaderSize - kHeapObjectTag)); + __ Call(r2); RecordCallPosition(instr); DCHECK_EQ(LeaveCC, i.OutputSBit()); frame_access_state()->ClearSPDelta(); @@ -1154,7 +1207,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputRegister(1), i.InputInt32(2)); } else { __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0), - i.InputRegister(1), kScratchReg, i.InputRegister(2)); + i.InputRegister(1), i.InputRegister(2)); } break; } @@ -1166,7 +1219,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputRegister(1), i.InputInt32(2)); } else { __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), - i.InputRegister(1), kScratchReg, i.InputRegister(2)); + i.InputRegister(1), i.InputRegister(2)); } break; } @@ -1178,7 +1231,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputRegister(1), i.InputInt32(2)); } else { __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), - i.InputRegister(1), kScratchReg, i.InputRegister(2)); + i.InputRegister(1), i.InputRegister(2)); } break; } @@ -1354,35 +1407,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmVcvtF32S32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vmov(scratch, i.InputRegister(0)); __ vcvt_f32_s32(i.OutputFloatRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF32U32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vmov(scratch, i.InputRegister(0)); __ vcvt_f32_u32(i.OutputFloatRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF64S32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vmov(scratch, i.InputRegister(0)); __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF64U32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vmov(scratch, i.InputRegister(0)); __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtS32F32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vcvt_s32_f32(scratch, i.InputFloatRegister(0)); __ vmov(i.OutputRegister(), scratch); // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead, @@ -1393,7 +1451,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmVcvtU32F32: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vcvt_u32_f32(scratch, i.InputFloatRegister(0)); __ vmov(i.OutputRegister(), scratch); // Avoid UINT32_MAX as an overflow indicator and use 0 instead, @@ -1404,14 +1463,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmVcvtS32F64: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0)); __ vmov(i.OutputRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtU32F64: { - SwVfpRegister scratch = kScratchDoubleReg.low(); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister scratch = temps.AcquireS(); __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0)); __ vmov(i.OutputRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); @@ -1453,10 +1514,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmLdrb: __ ldrb(i.OutputRegister(), i.InputOffset()); DCHECK_EQ(LeaveCC, i.OutputSBit()); + EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kArmLdrsb: __ ldrsb(i.OutputRegister(), i.InputOffset()); DCHECK_EQ(LeaveCC, i.OutputSBit()); + EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kArmStrb: __ strb(i.InputRegister(0), i.InputOffset(1)); @@ -1464,9 +1527,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmLdrh: __ ldrh(i.OutputRegister(), i.InputOffset()); + EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kArmLdrsh: __ ldrsh(i.OutputRegister(), i.InputOffset()); + EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kArmStrh: __ strh(i.InputRegister(0), i.InputOffset(1)); @@ -1474,6 +1539,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmLdr: __ ldr(i.OutputRegister(), i.InputOffset()); + EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kArmStr: __ str(i.InputRegister(0), i.InputOffset(1)); @@ -1629,6 +1695,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kArmDsbIsb: { + __ dsb(SY); + __ isb(SY); + break; + } case kArmF32x4Splat: { int src_code = i.InputFloatRegister(0).code(); __ vdup(Neon32, i.OutputSimd128Register(), @@ -2202,41 +2273,50 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); DCHECK(dst == i.InputSimd128Register(0)); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6] + __ vmov(scratch, src1); + __ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6] break; } case kArmS32x4UnzipRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); DCHECK(dst == i.InputSimd128Register(0)); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft). - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7] + __ vmov(scratch, src1); + __ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7] break; } case kArmS32x4TransposeLeft: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); DCHECK(dst == i.InputSimd128Register(0)); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] + __ vmov(scratch, src1); + __ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6] break; } case kArmS32x4Shuffle: { Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); // Check for in-place shuffles. // If dst == src0 == src1, then the shuffle is unary and we only use src0. if (dst == src0) { - __ vmov(kScratchQuadReg, src0); - src0 = kScratchQuadReg; + Simd128Register scratch = temps.AcquireQ(); + __ vmov(scratch, src0); + src0 = scratch; } else if (dst == src1) { - __ vmov(kScratchQuadReg, src1); - src1 = kScratchQuadReg; + Simd128Register scratch = temps.AcquireQ(); + __ vmov(scratch, src1); + src1 = scratch; } // Perform shuffle as a vmov per lane. int dst_code = dst.code() * 4; @@ -2258,10 +2338,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmS32x4TransposeRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] + __ vmov(scratch, src1); + __ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7] break; } case kArmS16x8ZipLeft: { @@ -2285,37 +2367,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmS16x8UnzipLeft: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14] + __ vmov(scratch, src1); + __ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14] break; } case kArmS16x8UnzipRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15] + __ vmov(scratch, src1); + __ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15] break; } case kArmS16x8TransposeLeft: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14] + __ vmov(scratch, src1); + __ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14] break; } case kArmS16x8TransposeRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] + __ vmov(scratch, src1); + __ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15] break; } case kArmS8x16ZipLeft: { @@ -2339,37 +2429,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmS8x16UnzipLeft: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30] + __ vmov(scratch, src1); + __ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30] break; } case kArmS8x16UnzipRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). - __ vmov(kScratchQuadReg, src1); - __ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31] + __ vmov(scratch, src1); + __ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31] break; } case kArmS8x16TransposeLeft: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30] + __ vmov(scratch, src1); + __ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30] break; } case kArmS8x16TransposeRight: { Simd128Register dst = i.OutputSimd128Register(), src1 = i.InputSimd128Register(1); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); DCHECK(dst == i.InputSimd128Register(0)); // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). - __ vmov(kScratchQuadReg, src1); - __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] + __ vmov(scratch, src1); + __ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31] break; } case kArmS8x16Concat: { @@ -2382,12 +2480,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); DwVfpRegister table_base = src0.low(); + UseScratchRegisterScope temps(tasm()); + Simd128Register scratch = temps.AcquireQ(); // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and // src1. They must be consecutive. int table_size = src0 == src1 ? 2 : 4; DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code()); - // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. - int scratch_s_base = kScratchQuadReg.code() * 4; + // The shuffle lane mask is a byte mask, materialize in scratch. + int scratch_s_base = scratch.code() * 4; for (int j = 0; j < 4; j++) { uint32_t four_lanes = i.InputUint32(2 + j); // Ensure byte indices are in [0, 31] so masks are never NaNs. @@ -2397,12 +2497,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } NeonListOperand table(table_base, table_size); if (dst != src0 && dst != src1) { - __ vtbl(dst.low(), table, kScratchQuadReg.low()); - __ vtbl(dst.high(), table, kScratchQuadReg.high()); + __ vtbl(dst.low(), table, scratch.low()); + __ vtbl(dst.high(), table, scratch.high()); } else { - __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); - __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); - __ vmov(dst, kScratchQuadReg); + __ vtbl(scratch.low(), table, scratch.low()); + __ vtbl(scratch.high(), table, scratch.high()); + __ vmov(dst, scratch); } break; } @@ -2432,149 +2532,156 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmS1x4AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); - __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister scratch = temps.AcquireD(); + __ vpmax(NeonU32, scratch, src.low(), src.high()); + __ vpmax(NeonU32, scratch, scratch, scratch); + __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0); break; } case kArmS1x4AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high()); - __ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister scratch = temps.AcquireD(); + __ vpmin(NeonU32, scratch, src.low(), src.high()); + __ vpmin(NeonU32, scratch, scratch, scratch); + __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0); break; } case kArmS1x8AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high()); - __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister scratch = temps.AcquireD(); + __ vpmax(NeonU16, scratch, src.low(), src.high()); + __ vpmax(NeonU16, scratch, scratch, scratch); + __ vpmax(NeonU16, scratch, scratch, scratch); + __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0); break; } case kArmS1x8AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high()); - __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, - kScratchDoubleReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister scratch = temps.AcquireD(); + __ vpmin(NeonU16, scratch, src.low(), src.high()); + __ vpmin(NeonU16, scratch, scratch, scratch); + __ vpmin(NeonU16, scratch, scratch, scratch); + __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0); break; } case kArmS1x16AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmax(NeonU8, kScratchDoubleReg, src.low(), src.high()); - __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - // vtst to detect any bits in the bottom 32 bits of kScratchDoubleReg. + UseScratchRegisterScope temps(tasm()); + QwNeonRegister q_scratch = temps.AcquireQ(); + DwVfpRegister d_scratch = q_scratch.low(); + __ vpmax(NeonU8, d_scratch, src.low(), src.high()); + __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch); + // vtst to detect any bits in the bottom 32 bits of d_scratch. // This saves an instruction vs. the naive sequence of vpmax. // kDoubleRegZero is not changed, since it is 0. - __ vtst(Neon32, kScratchQuadReg, kScratchQuadReg, kScratchQuadReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + __ vtst(Neon32, q_scratch, q_scratch, q_scratch); + __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0); break; } case kArmS1x16AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); - __ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high()); - __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS8, 0); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister scratch = temps.AcquireD(); + __ vpmin(NeonU8, scratch, src.low(), src.high()); + __ vpmin(NeonU8, scratch, scratch, scratch); + __ vpmin(NeonU8, scratch, scratch, scratch); + __ vpmin(NeonU8, scratch, scratch, scratch); + __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0); break; } - case kAtomicLoadInt8: + case kWord32AtomicLoadInt8: ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb); break; - case kAtomicLoadUint8: + case kWord32AtomicLoadUint8: ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb); break; - case kAtomicLoadInt16: + case kWord32AtomicLoadInt16: ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh); break; - case kAtomicLoadUint16: + case kWord32AtomicLoadUint16: ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh); break; - case kAtomicLoadWord32: + case kWord32AtomicLoadWord32: ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr); break; - case kAtomicStoreWord8: + case kWord32AtomicStoreWord8: ASSEMBLE_ATOMIC_STORE_INTEGER(strb); break; - case kAtomicStoreWord16: + case kWord32AtomicStoreWord16: ASSEMBLE_ATOMIC_STORE_INTEGER(strh); break; - case kAtomicStoreWord32: + case kWord32AtomicStoreWord32: ASSEMBLE_ATOMIC_STORE_INTEGER(str); break; - case kAtomicExchangeInt8: + case kWord32AtomicExchangeInt8: ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb); __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); break; - case kAtomicExchangeUint8: + case kWord32AtomicExchangeUint8: ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb); break; - case kAtomicExchangeInt16: + case kWord32AtomicExchangeInt16: ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh); __ sxth(i.OutputRegister(0), i.OutputRegister(0)); break; - case kAtomicExchangeUint16: + case kWord32AtomicExchangeUint16: ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh); break; - case kAtomicExchangeWord32: + case kWord32AtomicExchangeWord32: ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex); break; - case kAtomicCompareExchangeInt8: + case kWord32AtomicCompareExchangeInt8: __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); __ uxtb(i.TempRegister(2), i.InputRegister(2)); ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb, i.TempRegister(2)); __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); break; - case kAtomicCompareExchangeUint8: + case kWord32AtomicCompareExchangeUint8: __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); __ uxtb(i.TempRegister(2), i.InputRegister(2)); ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb, i.TempRegister(2)); break; - case kAtomicCompareExchangeInt16: + case kWord32AtomicCompareExchangeInt16: __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); __ uxth(i.TempRegister(2), i.InputRegister(2)); ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh, i.TempRegister(2)); __ sxth(i.OutputRegister(0), i.OutputRegister(0)); break; - case kAtomicCompareExchangeUint16: + case kWord32AtomicCompareExchangeUint16: __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); __ uxth(i.TempRegister(2), i.InputRegister(2)); ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh, i.TempRegister(2)); break; - case kAtomicCompareExchangeWord32: + case kWord32AtomicCompareExchangeWord32: __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex, i.InputRegister(2)); break; #define ATOMIC_BINOP_CASE(op, inst) \ - case kAtomic##op##Int8: \ + case kWord32Atomic##op##Int8: \ ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \ __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \ break; \ - case kAtomic##op##Uint8: \ + case kWord32Atomic##op##Uint8: \ ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \ break; \ - case kAtomic##op##Int16: \ + case kWord32Atomic##op##Int16: \ ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \ __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \ break; \ - case kAtomic##op##Uint16: \ + case kWord32Atomic##op##Uint16: \ ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \ break; \ - case kAtomic##op##Word32: \ + case kWord32Atomic##op##Word32: \ ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \ break; ATOMIC_BINOP_CASE(Add, add) @@ -2607,6 +2714,20 @@ void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) { if (!branch->fallthru) __ b(flabel); // no fallthru to flabel. } +void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, + Instruction* instr) { + // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal). + if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) { + return; + } + + condition = NegateFlagsCondition(condition); + __ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister, + Operand(kSpeculationPoisonRegister), SBit::LeaveCC, + FlagsConditionToCondition(condition)); + __ csdb(); +} + void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr, BranchInfo* branch) { AssembleArchBranch(instr, branch); @@ -2654,8 +2775,9 @@ void CodeGenerator::AssembleArchTrap(Instruction* instr, __ isolate()), 0); __ LeaveFrame(StackFrame::WASM_COMPILED); - CallDescriptor* descriptor = gen_->linkage()->GetIncomingDescriptor(); - int pop_count = static_cast<int>(descriptor->StackParameterCount()); + auto call_descriptor = gen_->linkage()->GetIncomingDescriptor(); + int pop_count = + static_cast<int>(call_descriptor->StackParameterCount()); __ Drop(pop_count); __ Ret(); } else { @@ -2725,9 +2847,9 @@ void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) { } void CodeGenerator::FinishFrame(Frame* frame) { - CallDescriptor* descriptor = linkage()->GetIncomingDescriptor(); + auto call_descriptor = linkage()->GetIncomingDescriptor(); - const RegList saves_fp = descriptor->CalleeSavedFPRegisters(); + const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); if (saves_fp != 0) { frame->AlignSavedCalleeRegisterSlots(); } @@ -2741,7 +2863,7 @@ void CodeGenerator::FinishFrame(Frame* frame) { frame->AllocateSavedCalleeRegisterSlots((last - first + 1) * (kDoubleSize / kPointerSize)); } - const RegList saves = descriptor->CalleeSavedRegisters(); + const RegList saves = call_descriptor->CalleeSavedRegisters(); if (saves != 0) { // Save callee-saved registers. frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves)); @@ -2749,14 +2871,14 @@ void CodeGenerator::FinishFrame(Frame* frame) { } void CodeGenerator::AssembleConstructFrame() { - CallDescriptor* descriptor = linkage()->GetIncomingDescriptor(); + auto call_descriptor = linkage()->GetIncomingDescriptor(); if (frame_access_state()->has_frame()) { - if (descriptor->IsCFunctionCall()) { + if (call_descriptor->IsCFunctionCall()) { __ Push(lr, fp); __ mov(fp, sp); - } else if (descriptor->IsJSFunctionCall()) { + } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (descriptor->PushArgumentCount()) { + if (call_descriptor->PushArgumentCount()) { __ Push(kJavaScriptCallArgCountRegister); } } else { @@ -2766,8 +2888,8 @@ void CodeGenerator::AssembleConstructFrame() { unwinding_info_writer_.MarkFrameConstructed(__ pc_offset()); } - int shrink_slots = - frame()->GetTotalFrameSlotCount() - descriptor->CalculateFixedFrameSize(); + int shrink_slots = frame()->GetTotalFrameSlotCount() - + call_descriptor->CalculateFixedFrameSize(); if (info()->is_osr()) { // TurboFan OSR-compiled functions cannot be entered directly. @@ -2780,10 +2902,11 @@ void CodeGenerator::AssembleConstructFrame() { if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); osr_pc_offset_ = __ pc_offset(); shrink_slots -= osr_helper()->UnoptimizedFrameSlots(); + InitializePoisonForLoadsIfNeeded(); } - const RegList saves = descriptor->CalleeSavedRegisters(); - const RegList saves_fp = descriptor->CalleeSavedFPRegisters(); + const RegList saves = call_descriptor->CalleeSavedRegisters(); + const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); if (shrink_slots > 0) { if (info()->IsWasm()) { @@ -2861,8 +2984,8 @@ void CodeGenerator::AssembleConstructFrame() { } void CodeGenerator::AssembleReturn(InstructionOperand* pop) { - CallDescriptor* descriptor = linkage()->GetIncomingDescriptor(); - int pop_count = static_cast<int>(descriptor->StackParameterCount()); + auto call_descriptor = linkage()->GetIncomingDescriptor(); + int pop_count = static_cast<int>(call_descriptor->StackParameterCount()); const int returns = frame()->GetReturnSlotCount(); if (returns != 0) { @@ -2871,13 +2994,13 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { } // Restore registers. - const RegList saves = descriptor->CalleeSavedRegisters(); + const RegList saves = call_descriptor->CalleeSavedRegisters(); if (saves != 0) { __ ldm(ia_w, sp, saves); } // Restore FP registers. - const RegList saves_fp = descriptor->CalleeSavedFPRegisters(); + const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); if (saves_fp != 0) { STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32); uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1; @@ -2889,7 +3012,7 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { unwinding_info_writer_.MarkBlockWillExit(); ArmOperandConverter g(this, nullptr); - if (descriptor->IsCFunctionCall()) { + if (call_descriptor->IsCFunctionCall()) { AssembleDeconstructFrame(); } else if (frame_access_state()->has_frame()) { // Canonicalize JSFunction return sites for now unless they have an variable @@ -2922,281 +3045,253 @@ void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); } void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { ArmOperandConverter g(this, nullptr); - // Dispatch on the source and destination operand kinds. Not all - // combinations are possible. - if (source->IsRegister()) { - DCHECK(destination->IsRegister() || destination->IsStackSlot()); - Register src = g.ToRegister(source); - if (destination->IsRegister()) { - __ mov(g.ToRegister(destination), src); - } else { - __ str(src, g.ToMemOperand(destination)); - } - } else if (source->IsStackSlot()) { - DCHECK(destination->IsRegister() || destination->IsStackSlot()); - MemOperand src = g.ToMemOperand(source); - if (destination->IsRegister()) { - __ ldr(g.ToRegister(destination), src); - } else { - Register temp = kScratchReg; - __ ldr(temp, src); - __ str(temp, g.ToMemOperand(destination)); - } - } else if (source->IsConstant()) { - Constant src = g.ToConstant(source); - if (destination->IsRegister() || destination->IsStackSlot()) { - Register dst = - destination->IsRegister() ? g.ToRegister(destination) : kScratchReg; - switch (src.type()) { - case Constant::kInt32: - if (RelocInfo::IsWasmReference(src.rmode())) { - __ mov(dst, Operand(src.ToInt32(), src.rmode())); - } else { - __ mov(dst, Operand(src.ToInt32())); - } - break; - case Constant::kInt64: - UNREACHABLE(); - break; - case Constant::kFloat32: - __ mov(dst, Operand::EmbeddedNumber(src.ToFloat32())); - break; - case Constant::kFloat64: - __ mov(dst, Operand::EmbeddedNumber(src.ToFloat64().value())); - break; - case Constant::kExternalReference: - __ mov(dst, Operand(src.ToExternalReference())); - break; - case Constant::kHeapObject: { - Handle<HeapObject> src_object = src.ToHeapObject(); - Heap::RootListIndex index; - if (IsMaterializableFromRoot(src_object, &index)) { - __ LoadRoot(dst, index); - } else { - __ Move(dst, src_object); - } - break; - } - case Constant::kRpoNumber: - UNREACHABLE(); // TODO(dcarney): loading RPO constants on arm. - break; - } - if (destination->IsStackSlot()) __ str(dst, g.ToMemOperand(destination)); - } else if (src.type() == Constant::kFloat32) { - if (destination->IsFloatStackSlot()) { - MemOperand dst = g.ToMemOperand(destination); - Register temp = kScratchReg; - __ mov(temp, Operand(bit_cast<int32_t>(src.ToFloat32()))); - __ str(temp, dst); + // Helper function to write the given constant to the dst register. + auto MoveConstantToRegister = [&](Register dst, Constant src) { + if (src.type() == Constant::kHeapObject) { + Handle<HeapObject> src_object = src.ToHeapObject(); + Heap::RootListIndex index; + if (IsMaterializableFromRoot(src_object, &index)) { + __ LoadRoot(dst, index); } else { - SwVfpRegister dst = g.ToFloatRegister(destination); - __ vmov(dst, Float32::FromBits(src.ToFloat32AsInt())); + __ Move(dst, src_object); } } else { - DCHECK_EQ(Constant::kFloat64, src.type()); - DwVfpRegister dst = destination->IsFPRegister() - ? g.ToDoubleRegister(destination) - : kScratchDoubleReg; - __ vmov(dst, src.ToFloat64(), kScratchReg); - if (destination->IsDoubleStackSlot()) { - __ vstr(dst, g.ToMemOperand(destination)); - } + __ mov(dst, g.ToImmediate(source)); } - } else if (source->IsFPRegister()) { - MachineRepresentation rep = LocationOperand::cast(source)->representation(); - if (rep == MachineRepresentation::kFloat64) { - DwVfpRegister src = g.ToDoubleRegister(source); - if (destination->IsDoubleRegister()) { - DwVfpRegister dst = g.ToDoubleRegister(destination); - __ Move(dst, src); - } else { - DCHECK(destination->IsDoubleStackSlot()); - __ vstr(src, g.ToMemOperand(destination)); - } - } else if (rep == MachineRepresentation::kFloat32) { - // GapResolver may give us reg codes that don't map to actual s-registers. - // Generate code to work around those cases. - int src_code = LocationOperand::cast(source)->register_code(); - if (destination->IsFloatRegister()) { + }; + switch (MoveType::InferMove(source, destination)) { + case MoveType::kRegisterToRegister: + if (source->IsRegister()) { + __ mov(g.ToRegister(destination), g.ToRegister(source)); + } else if (source->IsFloatRegister()) { + DCHECK(destination->IsFloatRegister()); + // GapResolver may give us reg codes that don't map to actual + // s-registers. Generate code to work around those cases. + int src_code = LocationOperand::cast(source)->register_code(); int dst_code = LocationOperand::cast(destination)->register_code(); __ VmovExtended(dst_code, src_code); + } else if (source->IsDoubleRegister()) { + __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source)); } else { - DCHECK(destination->IsFloatStackSlot()); - __ VmovExtended(g.ToMemOperand(destination), src_code); + __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source)); } - } else { - DCHECK_EQ(MachineRepresentation::kSimd128, rep); - QwNeonRegister src = g.ToSimd128Register(source); - if (destination->IsSimd128Register()) { - QwNeonRegister dst = g.ToSimd128Register(destination); - __ Move(dst, src); + return; + case MoveType::kRegisterToStack: { + MemOperand dst = g.ToMemOperand(destination); + if (source->IsRegister()) { + __ str(g.ToRegister(source), dst); + } else if (source->IsFloatRegister()) { + // GapResolver may give us reg codes that don't map to actual + // s-registers. Generate code to work around those cases. + int src_code = LocationOperand::cast(source)->register_code(); + __ VmovExtended(dst, src_code); + } else if (source->IsDoubleRegister()) { + __ vstr(g.ToDoubleRegister(source), dst); } else { - DCHECK(destination->IsSimd128StackSlot()); - MemOperand dst = g.ToMemOperand(destination); - __ add(kScratchReg, dst.rn(), Operand(dst.offset())); - __ vst1(Neon8, NeonListOperand(src.low(), 2), - NeonMemOperand(kScratchReg)); + UseScratchRegisterScope temps(tasm()); + Register temp = temps.Acquire(); + QwNeonRegister src = g.ToSimd128Register(source); + __ add(temp, dst.rn(), Operand(dst.offset())); + __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp)); } - } - } else if (source->IsFPStackSlot()) { - MemOperand src = g.ToMemOperand(source); - MachineRepresentation rep = - LocationOperand::cast(destination)->representation(); - if (destination->IsFPRegister()) { - if (rep == MachineRepresentation::kFloat64) { - __ vldr(g.ToDoubleRegister(destination), src); - } else if (rep == MachineRepresentation::kFloat32) { + return; + } + case MoveType::kStackToRegister: { + MemOperand src = g.ToMemOperand(source); + if (source->IsStackSlot()) { + __ ldr(g.ToRegister(destination), src); + } else if (source->IsFloatStackSlot()) { + DCHECK(destination->IsFloatRegister()); // GapResolver may give us reg codes that don't map to actual // s-registers. Generate code to work around those cases. int dst_code = LocationOperand::cast(destination)->register_code(); __ VmovExtended(dst_code, src); + } else if (source->IsDoubleStackSlot()) { + __ vldr(g.ToDoubleRegister(destination), src); } else { - DCHECK_EQ(MachineRepresentation::kSimd128, rep); + UseScratchRegisterScope temps(tasm()); + Register temp = temps.Acquire(); QwNeonRegister dst = g.ToSimd128Register(destination); - __ add(kScratchReg, src.rn(), Operand(src.offset())); - __ vld1(Neon8, NeonListOperand(dst.low(), 2), - NeonMemOperand(kScratchReg)); + __ add(temp, src.rn(), Operand(src.offset())); + __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp)); } - } else { - DCHECK(destination->IsFPStackSlot()); - if (rep == MachineRepresentation::kFloat64) { - DwVfpRegister temp = kScratchDoubleReg; + return; + } + case MoveType::kStackToStack: { + MemOperand src = g.ToMemOperand(source); + MemOperand dst = g.ToMemOperand(destination); + UseScratchRegisterScope temps(tasm()); + if (source->IsStackSlot() || source->IsFloatStackSlot()) { + SwVfpRegister temp = temps.AcquireS(); __ vldr(temp, src); - __ vstr(temp, g.ToMemOperand(destination)); - } else if (rep == MachineRepresentation::kFloat32) { - SwVfpRegister temp = kScratchDoubleReg.low(); + __ vstr(temp, dst); + } else if (source->IsDoubleStackSlot()) { + DwVfpRegister temp = temps.AcquireD(); __ vldr(temp, src); - __ vstr(temp, g.ToMemOperand(destination)); + __ vstr(temp, dst); + } else { + DCHECK(source->IsSimd128StackSlot()); + Register temp = temps.Acquire(); + QwNeonRegister temp_q = temps.AcquireQ(); + __ add(temp, src.rn(), Operand(src.offset())); + __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp)); + __ add(temp, dst.rn(), Operand(dst.offset())); + __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp)); + } + return; + } + case MoveType::kConstantToRegister: { + Constant src = g.ToConstant(source); + if (destination->IsRegister()) { + MoveConstantToRegister(g.ToRegister(destination), src); + } else if (destination->IsFloatRegister()) { + __ vmov(g.ToFloatRegister(destination), + Float32::FromBits(src.ToFloat32AsInt())); } else { - DCHECK_EQ(MachineRepresentation::kSimd128, rep); - MemOperand dst = g.ToMemOperand(destination); - __ add(kScratchReg, src.rn(), Operand(src.offset())); - __ vld1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2), - NeonMemOperand(kScratchReg)); - __ add(kScratchReg, dst.rn(), Operand(dst.offset())); - __ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2), - NeonMemOperand(kScratchReg)); + // TODO(arm): Look into optimizing this further if possible. Supporting + // the NEON version of VMOV may help. + __ vmov(g.ToDoubleRegister(destination), src.ToFloat64()); } + return; + } + case MoveType::kConstantToStack: { + Constant src = g.ToConstant(source); + MemOperand dst = g.ToMemOperand(destination); + if (destination->IsStackSlot()) { + UseScratchRegisterScope temps(tasm()); + // Acquire a S register instead of a general purpose register in case + // `vstr` needs one to compute the address of `dst`. + SwVfpRegister s_temp = temps.AcquireS(); + { + // TODO(arm): This sequence could be optimized further if necessary by + // writing the constant directly into `s_temp`. + UseScratchRegisterScope temps(tasm()); + Register temp = temps.Acquire(); + MoveConstantToRegister(temp, src); + __ vmov(s_temp, temp); + } + __ vstr(s_temp, dst); + } else if (destination->IsFloatStackSlot()) { + UseScratchRegisterScope temps(tasm()); + SwVfpRegister temp = temps.AcquireS(); + __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt())); + __ vstr(temp, dst); + } else { + DCHECK(destination->IsDoubleStackSlot()); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister temp = temps.AcquireD(); + // TODO(arm): Look into optimizing this further if possible. Supporting + // the NEON version of VMOV may help. + __ vmov(temp, src.ToFloat64()); + __ vstr(temp, g.ToMemOperand(destination)); + } + return; } - } else { - UNREACHABLE(); } + UNREACHABLE(); } void CodeGenerator::AssembleSwap(InstructionOperand* source, InstructionOperand* destination) { ArmOperandConverter g(this, nullptr); - // Dispatch on the source and destination operand kinds. Not all - // combinations are possible. - if (source->IsRegister()) { - // Register-register. - Register temp = kScratchReg; - Register src = g.ToRegister(source); - if (destination->IsRegister()) { - Register dst = g.ToRegister(destination); - __ Move(temp, src); - __ Move(src, dst); - __ Move(dst, temp); - } else { - DCHECK(destination->IsStackSlot()); - MemOperand dst = g.ToMemOperand(destination); - __ mov(temp, src); - __ ldr(src, dst); - __ str(temp, dst); - } - } else if (source->IsStackSlot()) { - DCHECK(destination->IsStackSlot()); - Register temp_0 = kScratchReg; - SwVfpRegister temp_1 = kScratchDoubleReg.low(); - MemOperand src = g.ToMemOperand(source); - MemOperand dst = g.ToMemOperand(destination); - __ ldr(temp_0, src); - __ vldr(temp_1, dst); - __ str(temp_0, dst); - __ vstr(temp_1, src); - } else if (source->IsFPRegister()) { - MachineRepresentation rep = LocationOperand::cast(source)->representation(); - LowDwVfpRegister temp = kScratchDoubleReg; - if (rep == MachineRepresentation::kFloat64) { - DwVfpRegister src = g.ToDoubleRegister(source); - if (destination->IsFPRegister()) { - DwVfpRegister dst = g.ToDoubleRegister(destination); - __ Swap(src, dst); - } else { - DCHECK(destination->IsFPStackSlot()); - MemOperand dst = g.ToMemOperand(destination); - __ Move(temp, src); - __ vldr(src, dst); - __ vstr(temp, dst); - } - } else if (rep == MachineRepresentation::kFloat32) { - int src_code = LocationOperand::cast(source)->register_code(); - if (destination->IsFPRegister()) { + switch (MoveType::InferSwap(source, destination)) { + case MoveType::kRegisterToRegister: + if (source->IsRegister()) { + __ Swap(g.ToRegister(source), g.ToRegister(destination)); + } else if (source->IsFloatRegister()) { + DCHECK(destination->IsFloatRegister()); + // GapResolver may give us reg codes that don't map to actual + // s-registers. Generate code to work around those cases. + UseScratchRegisterScope temps(tasm()); + LowDwVfpRegister temp = temps.AcquireLowD(); + int src_code = LocationOperand::cast(source)->register_code(); int dst_code = LocationOperand::cast(destination)->register_code(); __ VmovExtended(temp.low().code(), src_code); __ VmovExtended(src_code, dst_code); __ VmovExtended(dst_code, temp.low().code()); + } else if (source->IsDoubleRegister()) { + __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination)); } else { - DCHECK(destination->IsFPStackSlot()); - MemOperand dst = g.ToMemOperand(destination); + __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination)); + } + return; + case MoveType::kRegisterToStack: { + MemOperand dst = g.ToMemOperand(destination); + if (source->IsRegister()) { + Register src = g.ToRegister(source); + UseScratchRegisterScope temps(tasm()); + SwVfpRegister temp = temps.AcquireS(); + __ vmov(temp, src); + __ ldr(src, dst); + __ vstr(temp, dst); + } else if (source->IsFloatRegister()) { + int src_code = LocationOperand::cast(source)->register_code(); + UseScratchRegisterScope temps(tasm()); + LowDwVfpRegister temp = temps.AcquireLowD(); __ VmovExtended(temp.low().code(), src_code); __ VmovExtended(src_code, dst); __ vstr(temp.low(), dst); - } - } else { - DCHECK_EQ(MachineRepresentation::kSimd128, rep); - QwNeonRegister src = g.ToSimd128Register(source); - if (destination->IsFPRegister()) { - QwNeonRegister dst = g.ToSimd128Register(destination); - __ Swap(src, dst); + } else if (source->IsDoubleRegister()) { + UseScratchRegisterScope temps(tasm()); + DwVfpRegister temp = temps.AcquireD(); + DwVfpRegister src = g.ToDoubleRegister(source); + __ Move(temp, src); + __ vldr(src, dst); + __ vstr(temp, dst); } else { - DCHECK(destination->IsFPStackSlot()); - MemOperand dst = g.ToMemOperand(destination); - __ Move(kScratchQuadReg, src); - __ add(kScratchReg, dst.rn(), Operand(dst.offset())); - __ vld1(Neon8, NeonListOperand(src.low(), 2), - NeonMemOperand(kScratchReg)); - __ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2), - NeonMemOperand(kScratchReg)); + QwNeonRegister src = g.ToSimd128Register(source); + UseScratchRegisterScope temps(tasm()); + Register temp = temps.Acquire(); + QwNeonRegister temp_q = temps.AcquireQ(); + __ Move(temp_q, src); + __ add(temp, dst.rn(), Operand(dst.offset())); + __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp)); + __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp)); } + return; } - } else if (source->IsFPStackSlot()) { - DCHECK(destination->IsFPStackSlot()); - Register temp_0 = kScratchReg; - LowDwVfpRegister temp_1 = kScratchDoubleReg; - MemOperand src0 = g.ToMemOperand(source); - MemOperand dst0 = g.ToMemOperand(destination); - MachineRepresentation rep = LocationOperand::cast(source)->representation(); - if (rep == MachineRepresentation::kFloat64) { - MemOperand src1(src0.rn(), src0.offset() + kPointerSize); - MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize); - __ vldr(temp_1, dst0); // Save destination in temp_1. - __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. - __ str(temp_0, dst0); - __ ldr(temp_0, src1); - __ str(temp_0, dst1); - __ vstr(temp_1, src0); - } else if (rep == MachineRepresentation::kFloat32) { - __ vldr(temp_1.low(), dst0); // Save destination in temp_1. - __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. - __ str(temp_0, dst0); - __ vstr(temp_1.low(), src0); - } else { - DCHECK_EQ(MachineRepresentation::kSimd128, rep); - MemOperand src1(src0.rn(), src0.offset() + kDoubleSize); - MemOperand dst1(dst0.rn(), dst0.offset() + kDoubleSize); - __ vldr(kScratchQuadReg.low(), dst0); - __ vldr(kScratchQuadReg.high(), src0); - __ vstr(kScratchQuadReg.low(), src0); - __ vstr(kScratchQuadReg.high(), dst0); - __ vldr(kScratchQuadReg.low(), dst1); - __ vldr(kScratchQuadReg.high(), src1); - __ vstr(kScratchQuadReg.low(), src1); - __ vstr(kScratchQuadReg.high(), dst1); + case MoveType::kStackToStack: { + MemOperand src = g.ToMemOperand(source); + MemOperand dst = g.ToMemOperand(destination); + if (source->IsStackSlot() || source->IsFloatStackSlot()) { + UseScratchRegisterScope temps(tasm()); + SwVfpRegister temp_0 = temps.AcquireS(); + SwVfpRegister temp_1 = temps.AcquireS(); + __ vldr(temp_0, dst); + __ vldr(temp_1, src); + __ vstr(temp_0, src); + __ vstr(temp_1, dst); + } else if (source->IsDoubleStackSlot()) { + UseScratchRegisterScope temps(tasm()); + DwVfpRegister temp_0 = temps.AcquireD(); + DwVfpRegister temp_1 = temps.AcquireD(); + __ vldr(temp_0, dst); + __ vldr(temp_1, src); + __ vstr(temp_0, src); + __ vstr(temp_1, dst); + } else { + DCHECK(source->IsSimd128StackSlot()); + MemOperand src0 = src; + MemOperand dst0 = dst; + MemOperand src1(src.rn(), src.offset() + kDoubleSize); + MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize); + UseScratchRegisterScope temps(tasm()); + DwVfpRegister temp_0 = temps.AcquireD(); + DwVfpRegister temp_1 = temps.AcquireD(); + __ vldr(temp_0, dst0); + __ vldr(temp_1, src0); + __ vstr(temp_0, src0); + __ vstr(temp_1, dst0); + __ vldr(temp_0, dst1); + __ vldr(temp_1, src1); + __ vstr(temp_0, src1); + __ vstr(temp_1, dst1); + } + return; } - } else { - // No other combinations are possible. - UNREACHABLE(); + default: + UNREACHABLE(); + break; } } @@ -3206,7 +3301,6 @@ void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) { } #undef __ -#undef kScratchReg } // namespace compiler } // namespace internal |