// Copyright 2015 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/backend/instruction-scheduler.h" #include "src/base/adapters.h" #include "src/base/utils/random-number-generator.h" #include "src/execution/isolate.h" namespace v8 { namespace internal { namespace compiler { void InstructionScheduler::SchedulingQueueBase::AddNode( ScheduleGraphNode* node) { // We keep the ready list sorted by total latency so that we can quickly find // the next best candidate to schedule. auto it = nodes_.begin(); while ((it != nodes_.end()) && ((*it)->total_latency() >= node->total_latency())) { ++it; } nodes_.insert(it, node); } InstructionScheduler::ScheduleGraphNode* InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { DCHECK(!IsEmpty()); auto candidate = nodes_.end(); for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { // We only consider instructions that have all their operands ready. if (cycle >= (*iterator)->start_cycle()) { candidate = iterator; break; } } if (candidate != nodes_.end()) { ScheduleGraphNode* result = *candidate; nodes_.erase(candidate); return result; } return nullptr; } InstructionScheduler::ScheduleGraphNode* InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { DCHECK(!IsEmpty()); // Choose a random element from the ready list. auto candidate = nodes_.begin(); std::advance(candidate, isolate()->random_number_generator()->NextInt( static_cast(nodes_.size()))); ScheduleGraphNode* result = *candidate; nodes_.erase(candidate); return result; } InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone, Instruction* instr) : instr_(instr), successors_(zone), unscheduled_predecessors_count_(0), latency_(GetInstructionLatency(instr)), total_latency_(-1), start_cycle_(-1) {} void InstructionScheduler::ScheduleGraphNode::AddSuccessor( ScheduleGraphNode* node) { successors_.push_back(node); node->unscheduled_predecessors_count_++; } InstructionScheduler::InstructionScheduler(Zone* zone, InstructionSequence* sequence) : zone_(zone), sequence_(sequence), graph_(zone), last_side_effect_instr_(nullptr), pending_loads_(zone), last_live_in_reg_marker_(nullptr), last_deopt_or_trap_(nullptr), operands_map_(zone) {} void InstructionScheduler::StartBlock(RpoNumber rpo) { DCHECK(graph_.empty()); DCHECK_NULL(last_side_effect_instr_); DCHECK(pending_loads_.empty()); DCHECK_NULL(last_live_in_reg_marker_); DCHECK_NULL(last_deopt_or_trap_); DCHECK(operands_map_.empty()); sequence()->StartBlock(rpo); } void InstructionScheduler::EndBlock(RpoNumber rpo) { if (FLAG_turbo_stress_instruction_scheduling) { ScheduleBlock(); } else { ScheduleBlock(); } sequence()->EndBlock(rpo); graph_.clear(); last_side_effect_instr_ = nullptr; pending_loads_.clear(); last_live_in_reg_marker_ = nullptr; last_deopt_or_trap_ = nullptr; operands_map_.clear(); } void InstructionScheduler::AddTerminator(Instruction* instr) { ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); // Make sure that basic block terminators are not moved by adding them // as successor of every instruction. for (ScheduleGraphNode* node : graph_) { node->AddSuccessor(new_node); } graph_.push_back(new_node); } void InstructionScheduler::AddInstruction(Instruction* instr) { ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); // We should not have branches in the middle of a block. DCHECK_NE(instr->flags_mode(), kFlags_branch); DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison); if (IsFixedRegisterParameter(instr)) { if (last_live_in_reg_marker_ != nullptr) { last_live_in_reg_marker_->AddSuccessor(new_node); } last_live_in_reg_marker_ = new_node; } else { if (last_live_in_reg_marker_ != nullptr) { last_live_in_reg_marker_->AddSuccessor(new_node); } // Make sure that instructions are not scheduled before the last // deoptimization or trap point when they depend on it. if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) { last_deopt_or_trap_->AddSuccessor(new_node); } // Instructions with side effects and memory operations can't be // reordered with respect to each other. if (HasSideEffect(instr)) { if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } for (ScheduleGraphNode* load : pending_loads_) { load->AddSuccessor(new_node); } pending_loads_.clear(); last_side_effect_instr_ = new_node; } else if (IsLoadOperation(instr)) { // Load operations can't be reordered with side effects instructions but // independent loads can be reordered with respect to each other. if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } pending_loads_.push_back(new_node); } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) { // Ensure that deopts or traps are not reordered with respect to // side-effect instructions. if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } last_deopt_or_trap_ = new_node; } // Look for operand dependencies. for (size_t i = 0; i < instr->InputCount(); ++i) { const InstructionOperand* input = instr->InputAt(i); if (input->IsUnallocated()) { int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); auto it = operands_map_.find(vreg); if (it != operands_map_.end()) { it->second->AddSuccessor(new_node); } } } // Record the virtual registers defined by this instruction. for (size_t i = 0; i < instr->OutputCount(); ++i) { const InstructionOperand* output = instr->OutputAt(i); if (output->IsUnallocated()) { operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = new_node; } else if (output->IsConstant()) { operands_map_[ConstantOperand::cast(output)->virtual_register()] = new_node; } } } graph_.push_back(new_node); } template void InstructionScheduler::ScheduleBlock() { QueueType ready_list(this); // Compute total latencies so that we can schedule the critical path first. ComputeTotalLatencies(); // Add nodes which don't have dependencies to the ready list. for (ScheduleGraphNode* node : graph_) { if (!node->HasUnscheduledPredecessor()) { ready_list.AddNode(node); } } // Go through the ready list and schedule the instructions. int cycle = 0; while (!ready_list.IsEmpty()) { ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); if (candidate != nullptr) { sequence()->AddInstruction(candidate->instruction()); for (ScheduleGraphNode* successor : candidate->successors()) { successor->DropUnscheduledPredecessor(); successor->set_start_cycle( std::max(successor->start_cycle(), cycle + candidate->latency())); if (!successor->HasUnscheduledPredecessor()) { ready_list.AddNode(successor); } } } cycle++; } } int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { switch (instr->arch_opcode()) { case kArchNop: case kArchFramePointer: case kArchParentFramePointer: case kArchStackSlot: // Despite its name this opcode will produce a // reference to a frame slot, so it is not affected // by the arm64 dual stack issues mentioned below. case kArchComment: case kArchDeoptimize: case kArchJmp: case kArchBinarySearchSwitch: case kArchLookupSwitch: case kArchRet: case kArchTableSwitch: case kArchThrowTerminator: return kNoOpcodeFlags; case kArchTruncateDoubleToI: case kIeee754Float64Acos: case kIeee754Float64Acosh: case kIeee754Float64Asin: case kIeee754Float64Asinh: case kIeee754Float64Atan: case kIeee754Float64Atanh: case kIeee754Float64Atan2: case kIeee754Float64Cbrt: case kIeee754Float64Cos: case kIeee754Float64Cosh: case kIeee754Float64Exp: case kIeee754Float64Expm1: case kIeee754Float64Log: case kIeee754Float64Log1p: case kIeee754Float64Log10: case kIeee754Float64Log2: case kIeee754Float64Pow: case kIeee754Float64Sin: case kIeee754Float64Sinh: case kIeee754Float64Tan: case kIeee754Float64Tanh: return kNoOpcodeFlags; case kArchStackPointerGreaterThan: // The ArchStackPointerGreaterThan instruction loads the current stack // pointer value and must not be reordered with instructions with side // effects. return kIsLoadOperation; case kArchWordPoisonOnSpeculation: // While poisoning operations have no side effect, they must not be // reordered relative to branches. return kHasSideEffect; case kArchPrepareCallCFunction: case kArchSaveCallerRegisters: case kArchRestoreCallerRegisters: case kArchPrepareTailCall: case kArchCallCFunction: case kArchCallCodeObject: case kArchCallJSFunction: case kArchCallWasmFunction: case kArchCallBuiltinPointer: case kArchTailCallCodeObjectFromJSFunction: case kArchTailCallCodeObject: case kArchTailCallAddress: case kArchTailCallWasm: case kArchAbortCSAAssert: case kArchDebugBreak: return kHasSideEffect; case kArchStoreWithWriteBarrier: return kHasSideEffect; case kWord32AtomicLoadInt8: case kWord32AtomicLoadUint8: case kWord32AtomicLoadInt16: case kWord32AtomicLoadUint16: case kWord32AtomicLoadWord32: return kIsLoadOperation; case kWord32AtomicStoreWord8: case kWord32AtomicStoreWord16: case kWord32AtomicStoreWord32: return kHasSideEffect; case kWord32AtomicExchangeInt8: case kWord32AtomicExchangeUint8: case kWord32AtomicExchangeInt16: case kWord32AtomicExchangeUint16: case kWord32AtomicExchangeWord32: case kWord32AtomicCompareExchangeInt8: case kWord32AtomicCompareExchangeUint8: case kWord32AtomicCompareExchangeInt16: case kWord32AtomicCompareExchangeUint16: case kWord32AtomicCompareExchangeWord32: case kWord32AtomicAddInt8: case kWord32AtomicAddUint8: case kWord32AtomicAddInt16: case kWord32AtomicAddUint16: case kWord32AtomicAddWord32: case kWord32AtomicSubInt8: case kWord32AtomicSubUint8: case kWord32AtomicSubInt16: case kWord32AtomicSubUint16: case kWord32AtomicSubWord32: case kWord32AtomicAndInt8: case kWord32AtomicAndUint8: case kWord32AtomicAndInt16: case kWord32AtomicAndUint16: case kWord32AtomicAndWord32: case kWord32AtomicOrInt8: case kWord32AtomicOrUint8: case kWord32AtomicOrInt16: case kWord32AtomicOrUint16: case kWord32AtomicOrWord32: case kWord32AtomicXorInt8: case kWord32AtomicXorUint8: case kWord32AtomicXorInt16: case kWord32AtomicXorUint16: case kWord32AtomicXorWord32: return kHasSideEffect; #define CASE(Name) case k##Name: TARGET_ARCH_OPCODE_LIST(CASE) #undef CASE return GetTargetInstructionFlags(instr); } UNREACHABLE(); } void InstructionScheduler::ComputeTotalLatencies() { for (ScheduleGraphNode* node : base::Reversed(graph_)) { int max_latency = 0; for (ScheduleGraphNode* successor : node->successors()) { DCHECK_NE(-1, successor->total_latency()); if (successor->total_latency() > max_latency) { max_latency = successor->total_latency(); } } node->set_total_latency(max_latency + node->latency()); } } } // namespace compiler } // namespace internal } // namespace v8