1// Copyright 2015 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/compiler/backend/instruction-scheduler.h" 6 7#include "src/base/iterator.h" 8#include "src/base/optional.h" 9#include "src/base/utils/random-number-generator.h" 10 11namespace v8 { 12namespace internal { 13namespace compiler { 14 15void InstructionScheduler::SchedulingQueueBase::AddNode( 16 ScheduleGraphNode* node) { 17 // We keep the ready list sorted by total latency so that we can quickly find 18 // the next best candidate to schedule. 19 auto it = nodes_.begin(); 20 while ((it != nodes_.end()) && 21 ((*it)->total_latency() >= node->total_latency())) { 22 ++it; 23 } 24 nodes_.insert(it, node); 25} 26 27InstructionScheduler::ScheduleGraphNode* 28InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { 29 DCHECK(!IsEmpty()); 30 auto candidate = nodes_.end(); 31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { 32 // We only consider instructions that have all their operands ready. 33 if (cycle >= (*iterator)->start_cycle()) { 34 candidate = iterator; 35 break; 36 } 37 } 38 39 if (candidate != nodes_.end()) { 40 ScheduleGraphNode* result = *candidate; 41 nodes_.erase(candidate); 42 return result; 43 } 44 45 return nullptr; 46} 47 48InstructionScheduler::ScheduleGraphNode* 49InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { 50 DCHECK(!IsEmpty()); 51 // Choose a random element from the ready list. 52 auto candidate = nodes_.begin(); 53 std::advance(candidate, random_number_generator()->NextInt( 54 static_cast<int>(nodes_.size()))); 55 ScheduleGraphNode* result = *candidate; 56 nodes_.erase(candidate); 57 return result; 58} 59 60InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone, 61 Instruction* instr) 62 : instr_(instr), 63 successors_(zone), 64 unscheduled_predecessors_count_(0), 65 latency_(GetInstructionLatency(instr)), 66 total_latency_(-1), 67 start_cycle_(-1) {} 68 69void InstructionScheduler::ScheduleGraphNode::AddSuccessor( 70 ScheduleGraphNode* node) { 71 successors_.push_back(node); 72 node->unscheduled_predecessors_count_++; 73} 74 75InstructionScheduler::InstructionScheduler(Zone* zone, 76 InstructionSequence* sequence) 77 : zone_(zone), 78 sequence_(sequence), 79 graph_(zone), 80 last_side_effect_instr_(nullptr), 81 pending_loads_(zone), 82 last_live_in_reg_marker_(nullptr), 83 last_deopt_or_trap_(nullptr), 84 operands_map_(zone) { 85 if (FLAG_turbo_stress_instruction_scheduling) { 86 random_number_generator_ = 87 base::Optional<base::RandomNumberGenerator>(FLAG_random_seed); 88 } 89} 90 91void InstructionScheduler::StartBlock(RpoNumber rpo) { 92 DCHECK(graph_.empty()); 93 DCHECK_NULL(last_side_effect_instr_); 94 DCHECK(pending_loads_.empty()); 95 DCHECK_NULL(last_live_in_reg_marker_); 96 DCHECK_NULL(last_deopt_or_trap_); 97 DCHECK(operands_map_.empty()); 98 sequence()->StartBlock(rpo); 99} 100 101void InstructionScheduler::EndBlock(RpoNumber rpo) { 102 if (FLAG_turbo_stress_instruction_scheduling) { 103 Schedule<StressSchedulerQueue>(); 104 } else { 105 Schedule<CriticalPathFirstQueue>(); 106 } 107 sequence()->EndBlock(rpo); 108} 109 110void InstructionScheduler::AddTerminator(Instruction* instr) { 111 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr); 112 // Make sure that basic block terminators are not moved by adding them 113 // as successor of every instruction. 114 for (ScheduleGraphNode* node : graph_) { 115 node->AddSuccessor(new_node); 116 } 117 graph_.push_back(new_node); 118} 119 120void InstructionScheduler::AddInstruction(Instruction* instr) { 121 if (IsBarrier(instr)) { 122 if (FLAG_turbo_stress_instruction_scheduling) { 123 Schedule<StressSchedulerQueue>(); 124 } else { 125 Schedule<CriticalPathFirstQueue>(); 126 } 127 sequence()->AddInstruction(instr); 128 return; 129 } 130 131 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr); 132 133 // We should not have branches in the middle of a block. 134 DCHECK_NE(instr->flags_mode(), kFlags_branch); 135 136 if (IsFixedRegisterParameter(instr)) { 137 if (last_live_in_reg_marker_ != nullptr) { 138 last_live_in_reg_marker_->AddSuccessor(new_node); 139 } 140 last_live_in_reg_marker_ = new_node; 141 } else { 142 if (last_live_in_reg_marker_ != nullptr) { 143 last_live_in_reg_marker_->AddSuccessor(new_node); 144 } 145 146 // Make sure that instructions are not scheduled before the last 147 // deoptimization or trap point when they depend on it. 148 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) { 149 last_deopt_or_trap_->AddSuccessor(new_node); 150 } 151 152 // Instructions with side effects and memory operations can't be 153 // reordered with respect to each other. 154 if (HasSideEffect(instr)) { 155 if (last_side_effect_instr_ != nullptr) { 156 last_side_effect_instr_->AddSuccessor(new_node); 157 } 158 for (ScheduleGraphNode* load : pending_loads_) { 159 load->AddSuccessor(new_node); 160 } 161 pending_loads_.clear(); 162 last_side_effect_instr_ = new_node; 163 } else if (IsLoadOperation(instr)) { 164 // Load operations can't be reordered with side effects instructions but 165 // independent loads can be reordered with respect to each other. 166 if (last_side_effect_instr_ != nullptr) { 167 last_side_effect_instr_->AddSuccessor(new_node); 168 } 169 pending_loads_.push_back(new_node); 170 } else if (instr->IsDeoptimizeCall() || CanTrap(instr)) { 171 // Ensure that deopts or traps are not reordered with respect to 172 // side-effect instructions. 173 if (last_side_effect_instr_ != nullptr) { 174 last_side_effect_instr_->AddSuccessor(new_node); 175 } 176 } 177 178 // Update last deoptimization or trap point. 179 if (instr->IsDeoptimizeCall() || CanTrap(instr)) { 180 last_deopt_or_trap_ = new_node; 181 } 182 183 // Look for operand dependencies. 184 for (size_t i = 0; i < instr->InputCount(); ++i) { 185 const InstructionOperand* input = instr->InputAt(i); 186 if (input->IsUnallocated()) { 187 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); 188 auto it = operands_map_.find(vreg); 189 if (it != operands_map_.end()) { 190 it->second->AddSuccessor(new_node); 191 } 192 } 193 } 194 195 // Record the virtual registers defined by this instruction. 196 for (size_t i = 0; i < instr->OutputCount(); ++i) { 197 const InstructionOperand* output = instr->OutputAt(i); 198 if (output->IsUnallocated()) { 199 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = 200 new_node; 201 } else if (output->IsConstant()) { 202 operands_map_[ConstantOperand::cast(output)->virtual_register()] = 203 new_node; 204 } 205 } 206 } 207 208 graph_.push_back(new_node); 209} 210 211template <typename QueueType> 212void InstructionScheduler::Schedule() { 213 QueueType ready_list(this); 214 215 // Compute total latencies so that we can schedule the critical path first. 216 ComputeTotalLatencies(); 217 218 // Add nodes which don't have dependencies to the ready list. 219 for (ScheduleGraphNode* node : graph_) { 220 if (!node->HasUnscheduledPredecessor()) { 221 ready_list.AddNode(node); 222 } 223 } 224 225 // Go through the ready list and schedule the instructions. 226 int cycle = 0; 227 while (!ready_list.IsEmpty()) { 228 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); 229 230 if (candidate != nullptr) { 231 sequence()->AddInstruction(candidate->instruction()); 232 233 for (ScheduleGraphNode* successor : candidate->successors()) { 234 successor->DropUnscheduledPredecessor(); 235 successor->set_start_cycle( 236 std::max(successor->start_cycle(), cycle + candidate->latency())); 237 238 if (!successor->HasUnscheduledPredecessor()) { 239 ready_list.AddNode(successor); 240 } 241 } 242 } 243 244 cycle++; 245 } 246 247 // Reset own state. 248 graph_.clear(); 249 operands_map_.clear(); 250 pending_loads_.clear(); 251 last_deopt_or_trap_ = nullptr; 252 last_live_in_reg_marker_ = nullptr; 253 last_side_effect_instr_ = nullptr; 254} 255 256int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { 257 switch (instr->arch_opcode()) { 258 case kArchNop: 259 case kArchStackCheckOffset: 260 case kArchFramePointer: 261 case kArchParentFramePointer: 262 case kArchStackSlot: // Despite its name this opcode will produce a 263 // reference to a frame slot, so it is not affected 264 // by the arm64 dual stack issues mentioned below. 265 case kArchComment: 266 case kArchDeoptimize: 267 case kArchJmp: 268 case kArchBinarySearchSwitch: 269 case kArchRet: 270 case kArchTableSwitch: 271 case kArchThrowTerminator: 272 return kNoOpcodeFlags; 273 274 case kArchTruncateDoubleToI: 275 case kIeee754Float64Acos: 276 case kIeee754Float64Acosh: 277 case kIeee754Float64Asin: 278 case kIeee754Float64Asinh: 279 case kIeee754Float64Atan: 280 case kIeee754Float64Atanh: 281 case kIeee754Float64Atan2: 282 case kIeee754Float64Cbrt: 283 case kIeee754Float64Cos: 284 case kIeee754Float64Cosh: 285 case kIeee754Float64Exp: 286 case kIeee754Float64Expm1: 287 case kIeee754Float64Log: 288 case kIeee754Float64Log1p: 289 case kIeee754Float64Log10: 290 case kIeee754Float64Log2: 291 case kIeee754Float64Pow: 292 case kIeee754Float64Sin: 293 case kIeee754Float64Sinh: 294 case kIeee754Float64Tan: 295 case kIeee754Float64Tanh: 296 return kNoOpcodeFlags; 297 298 case kArchStackPointerGreaterThan: 299 // The ArchStackPointerGreaterThan instruction loads the current stack 300 // pointer value and must not be reordered with instructions with side 301 // effects. 302 return kIsLoadOperation; 303 304 case kArchPrepareCallCFunction: 305 case kArchPrepareTailCall: 306 case kArchTailCallCodeObject: 307 case kArchTailCallAddress: 308#if V8_ENABLE_WEBASSEMBLY 309 case kArchTailCallWasm: 310#endif // V8_ENABLE_WEBASSEMBLY 311 case kArchAbortCSADcheck: 312 return kHasSideEffect; 313 314 case kArchDebugBreak: 315 return kIsBarrier; 316 317 case kArchSaveCallerRegisters: 318 case kArchRestoreCallerRegisters: 319 return kIsBarrier; 320 321 case kArchCallCFunction: 322 case kArchCallCodeObject: 323 case kArchCallJSFunction: 324#if V8_ENABLE_WEBASSEMBLY 325 case kArchCallWasmFunction: 326#endif // V8_ENABLE_WEBASSEMBLY 327 case kArchCallBuiltinPointer: 328 // Calls can cause GC and GC may relocate objects. If a pure instruction 329 // operates on a tagged pointer that was cast to a word then it may be 330 // incorrect to move the instruction across the call. Hence we mark all 331 // (non-tail-)calls as barriers. 332 return kIsBarrier; 333 334 case kArchStoreWithWriteBarrier: 335 case kArchAtomicStoreWithWriteBarrier: 336 return kHasSideEffect; 337 338 case kAtomicLoadInt8: 339 case kAtomicLoadUint8: 340 case kAtomicLoadInt16: 341 case kAtomicLoadUint16: 342 case kAtomicLoadWord32: 343 return kIsLoadOperation; 344 345 case kAtomicStoreWord8: 346 case kAtomicStoreWord16: 347 case kAtomicStoreWord32: 348 return kHasSideEffect; 349 350 case kAtomicExchangeInt8: 351 case kAtomicExchangeUint8: 352 case kAtomicExchangeInt16: 353 case kAtomicExchangeUint16: 354 case kAtomicExchangeWord32: 355 case kAtomicCompareExchangeInt8: 356 case kAtomicCompareExchangeUint8: 357 case kAtomicCompareExchangeInt16: 358 case kAtomicCompareExchangeUint16: 359 case kAtomicCompareExchangeWord32: 360 case kAtomicAddInt8: 361 case kAtomicAddUint8: 362 case kAtomicAddInt16: 363 case kAtomicAddUint16: 364 case kAtomicAddWord32: 365 case kAtomicSubInt8: 366 case kAtomicSubUint8: 367 case kAtomicSubInt16: 368 case kAtomicSubUint16: 369 case kAtomicSubWord32: 370 case kAtomicAndInt8: 371 case kAtomicAndUint8: 372 case kAtomicAndInt16: 373 case kAtomicAndUint16: 374 case kAtomicAndWord32: 375 case kAtomicOrInt8: 376 case kAtomicOrUint8: 377 case kAtomicOrInt16: 378 case kAtomicOrUint16: 379 case kAtomicOrWord32: 380 case kAtomicXorInt8: 381 case kAtomicXorUint8: 382 case kAtomicXorInt16: 383 case kAtomicXorUint16: 384 case kAtomicXorWord32: 385 return kHasSideEffect; 386 387#define CASE(Name) case k##Name: 388 TARGET_ARCH_OPCODE_LIST(CASE) 389#undef CASE 390 return GetTargetInstructionFlags(instr); 391 } 392 393 UNREACHABLE(); 394} 395 396void InstructionScheduler::ComputeTotalLatencies() { 397 for (ScheduleGraphNode* node : base::Reversed(graph_)) { 398 int max_latency = 0; 399 400 for (ScheduleGraphNode* successor : node->successors()) { 401 DCHECK_NE(-1, successor->total_latency()); 402 if (successor->total_latency() > max_latency) { 403 max_latency = successor->total_latency(); 404 } 405 } 406 407 node->set_total_latency(max_latency + node->latency()); 408 } 409} 410 411} // namespace compiler 412} // namespace internal 413} // namespace v8 414