1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/backend/instruction-scheduler.h"
6
7#include "src/base/iterator.h"
8#include "src/base/optional.h"
9#include "src/base/utils/random-number-generator.h"
10
11namespace v8 {
12namespace internal {
13namespace compiler {
14
15void InstructionScheduler::SchedulingQueueBase::AddNode(
16    ScheduleGraphNode* node) {
17  // We keep the ready list sorted by total latency so that we can quickly find
18  // the next best candidate to schedule.
19  auto it = nodes_.begin();
20  while ((it != nodes_.end()) &&
21         ((*it)->total_latency() >= node->total_latency())) {
22    ++it;
23  }
24  nodes_.insert(it, node);
25}
26
27InstructionScheduler::ScheduleGraphNode*
28InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29  DCHECK(!IsEmpty());
30  auto candidate = nodes_.end();
31  for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32    // We only consider instructions that have all their operands ready.
33    if (cycle >= (*iterator)->start_cycle()) {
34      candidate = iterator;
35      break;
36    }
37  }
38
39  if (candidate != nodes_.end()) {
40    ScheduleGraphNode* result = *candidate;
41    nodes_.erase(candidate);
42    return result;
43  }
44
45  return nullptr;
46}
47
48InstructionScheduler::ScheduleGraphNode*
49InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50  DCHECK(!IsEmpty());
51  // Choose a random element from the ready list.
52  auto candidate = nodes_.begin();
53  std::advance(candidate, random_number_generator()->NextInt(
54                              static_cast<int>(nodes_.size())));
55  ScheduleGraphNode* result = *candidate;
56  nodes_.erase(candidate);
57  return result;
58}
59
60InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61                                                           Instruction* instr)
62    : instr_(instr),
63      successors_(zone),
64      unscheduled_predecessors_count_(0),
65      latency_(GetInstructionLatency(instr)),
66      total_latency_(-1),
67      start_cycle_(-1) {}
68
69void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70    ScheduleGraphNode* node) {
71  successors_.push_back(node);
72  node->unscheduled_predecessors_count_++;
73}
74
75InstructionScheduler::InstructionScheduler(Zone* zone,
76                                           InstructionSequence* sequence)
77    : zone_(zone),
78      sequence_(sequence),
79      graph_(zone),
80      last_side_effect_instr_(nullptr),
81      pending_loads_(zone),
82      last_live_in_reg_marker_(nullptr),
83      last_deopt_or_trap_(nullptr),
84      operands_map_(zone) {
85  if (FLAG_turbo_stress_instruction_scheduling) {
86    random_number_generator_ =
87        base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
88  }
89}
90
91void InstructionScheduler::StartBlock(RpoNumber rpo) {
92  DCHECK(graph_.empty());
93  DCHECK_NULL(last_side_effect_instr_);
94  DCHECK(pending_loads_.empty());
95  DCHECK_NULL(last_live_in_reg_marker_);
96  DCHECK_NULL(last_deopt_or_trap_);
97  DCHECK(operands_map_.empty());
98  sequence()->StartBlock(rpo);
99}
100
101void InstructionScheduler::EndBlock(RpoNumber rpo) {
102  if (FLAG_turbo_stress_instruction_scheduling) {
103    Schedule<StressSchedulerQueue>();
104  } else {
105    Schedule<CriticalPathFirstQueue>();
106  }
107  sequence()->EndBlock(rpo);
108}
109
110void InstructionScheduler::AddTerminator(Instruction* instr) {
111  ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
112  // Make sure that basic block terminators are not moved by adding them
113  // as successor of every instruction.
114  for (ScheduleGraphNode* node : graph_) {
115    node->AddSuccessor(new_node);
116  }
117  graph_.push_back(new_node);
118}
119
120void InstructionScheduler::AddInstruction(Instruction* instr) {
121  if (IsBarrier(instr)) {
122    if (FLAG_turbo_stress_instruction_scheduling) {
123      Schedule<StressSchedulerQueue>();
124    } else {
125      Schedule<CriticalPathFirstQueue>();
126    }
127    sequence()->AddInstruction(instr);
128    return;
129  }
130
131  ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
132
133  // We should not have branches in the middle of a block.
134  DCHECK_NE(instr->flags_mode(), kFlags_branch);
135
136  if (IsFixedRegisterParameter(instr)) {
137    if (last_live_in_reg_marker_ != nullptr) {
138      last_live_in_reg_marker_->AddSuccessor(new_node);
139    }
140    last_live_in_reg_marker_ = new_node;
141  } else {
142    if (last_live_in_reg_marker_ != nullptr) {
143      last_live_in_reg_marker_->AddSuccessor(new_node);
144    }
145
146    // Make sure that instructions are not scheduled before the last
147    // deoptimization or trap point when they depend on it.
148    if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
149      last_deopt_or_trap_->AddSuccessor(new_node);
150    }
151
152    // Instructions with side effects and memory operations can't be
153    // reordered with respect to each other.
154    if (HasSideEffect(instr)) {
155      if (last_side_effect_instr_ != nullptr) {
156        last_side_effect_instr_->AddSuccessor(new_node);
157      }
158      for (ScheduleGraphNode* load : pending_loads_) {
159        load->AddSuccessor(new_node);
160      }
161      pending_loads_.clear();
162      last_side_effect_instr_ = new_node;
163    } else if (IsLoadOperation(instr)) {
164      // Load operations can't be reordered with side effects instructions but
165      // independent loads can be reordered with respect to each other.
166      if (last_side_effect_instr_ != nullptr) {
167        last_side_effect_instr_->AddSuccessor(new_node);
168      }
169      pending_loads_.push_back(new_node);
170    } else if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
171      // Ensure that deopts or traps are not reordered with respect to
172      // side-effect instructions.
173      if (last_side_effect_instr_ != nullptr) {
174        last_side_effect_instr_->AddSuccessor(new_node);
175      }
176    }
177
178    // Update last deoptimization or trap point.
179    if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
180      last_deopt_or_trap_ = new_node;
181    }
182
183    // Look for operand dependencies.
184    for (size_t i = 0; i < instr->InputCount(); ++i) {
185      const InstructionOperand* input = instr->InputAt(i);
186      if (input->IsUnallocated()) {
187        int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
188        auto it = operands_map_.find(vreg);
189        if (it != operands_map_.end()) {
190          it->second->AddSuccessor(new_node);
191        }
192      }
193    }
194
195    // Record the virtual registers defined by this instruction.
196    for (size_t i = 0; i < instr->OutputCount(); ++i) {
197      const InstructionOperand* output = instr->OutputAt(i);
198      if (output->IsUnallocated()) {
199        operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
200            new_node;
201      } else if (output->IsConstant()) {
202        operands_map_[ConstantOperand::cast(output)->virtual_register()] =
203            new_node;
204      }
205    }
206  }
207
208  graph_.push_back(new_node);
209}
210
211template <typename QueueType>
212void InstructionScheduler::Schedule() {
213  QueueType ready_list(this);
214
215  // Compute total latencies so that we can schedule the critical path first.
216  ComputeTotalLatencies();
217
218  // Add nodes which don't have dependencies to the ready list.
219  for (ScheduleGraphNode* node : graph_) {
220    if (!node->HasUnscheduledPredecessor()) {
221      ready_list.AddNode(node);
222    }
223  }
224
225  // Go through the ready list and schedule the instructions.
226  int cycle = 0;
227  while (!ready_list.IsEmpty()) {
228    ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
229
230    if (candidate != nullptr) {
231      sequence()->AddInstruction(candidate->instruction());
232
233      for (ScheduleGraphNode* successor : candidate->successors()) {
234        successor->DropUnscheduledPredecessor();
235        successor->set_start_cycle(
236            std::max(successor->start_cycle(), cycle + candidate->latency()));
237
238        if (!successor->HasUnscheduledPredecessor()) {
239          ready_list.AddNode(successor);
240        }
241      }
242    }
243
244    cycle++;
245  }
246
247  // Reset own state.
248  graph_.clear();
249  operands_map_.clear();
250  pending_loads_.clear();
251  last_deopt_or_trap_ = nullptr;
252  last_live_in_reg_marker_ = nullptr;
253  last_side_effect_instr_ = nullptr;
254}
255
256int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
257  switch (instr->arch_opcode()) {
258    case kArchNop:
259    case kArchStackCheckOffset:
260    case kArchFramePointer:
261    case kArchParentFramePointer:
262    case kArchStackSlot:  // Despite its name this opcode will produce a
263                          // reference to a frame slot, so it is not affected
264                          // by the arm64 dual stack issues mentioned below.
265    case kArchComment:
266    case kArchDeoptimize:
267    case kArchJmp:
268    case kArchBinarySearchSwitch:
269    case kArchRet:
270    case kArchTableSwitch:
271    case kArchThrowTerminator:
272      return kNoOpcodeFlags;
273
274    case kArchTruncateDoubleToI:
275    case kIeee754Float64Acos:
276    case kIeee754Float64Acosh:
277    case kIeee754Float64Asin:
278    case kIeee754Float64Asinh:
279    case kIeee754Float64Atan:
280    case kIeee754Float64Atanh:
281    case kIeee754Float64Atan2:
282    case kIeee754Float64Cbrt:
283    case kIeee754Float64Cos:
284    case kIeee754Float64Cosh:
285    case kIeee754Float64Exp:
286    case kIeee754Float64Expm1:
287    case kIeee754Float64Log:
288    case kIeee754Float64Log1p:
289    case kIeee754Float64Log10:
290    case kIeee754Float64Log2:
291    case kIeee754Float64Pow:
292    case kIeee754Float64Sin:
293    case kIeee754Float64Sinh:
294    case kIeee754Float64Tan:
295    case kIeee754Float64Tanh:
296      return kNoOpcodeFlags;
297
298    case kArchStackPointerGreaterThan:
299      // The ArchStackPointerGreaterThan instruction loads the current stack
300      // pointer value and must not be reordered with instructions with side
301      // effects.
302      return kIsLoadOperation;
303
304    case kArchPrepareCallCFunction:
305    case kArchPrepareTailCall:
306    case kArchTailCallCodeObject:
307    case kArchTailCallAddress:
308#if V8_ENABLE_WEBASSEMBLY
309    case kArchTailCallWasm:
310#endif  // V8_ENABLE_WEBASSEMBLY
311    case kArchAbortCSADcheck:
312      return kHasSideEffect;
313
314    case kArchDebugBreak:
315      return kIsBarrier;
316
317    case kArchSaveCallerRegisters:
318    case kArchRestoreCallerRegisters:
319      return kIsBarrier;
320
321    case kArchCallCFunction:
322    case kArchCallCodeObject:
323    case kArchCallJSFunction:
324#if V8_ENABLE_WEBASSEMBLY
325    case kArchCallWasmFunction:
326#endif  // V8_ENABLE_WEBASSEMBLY
327    case kArchCallBuiltinPointer:
328      // Calls can cause GC and GC may relocate objects. If a pure instruction
329      // operates on a tagged pointer that was cast to a word then it may be
330      // incorrect to move the instruction across the call. Hence we mark all
331      // (non-tail-)calls as barriers.
332      return kIsBarrier;
333
334    case kArchStoreWithWriteBarrier:
335    case kArchAtomicStoreWithWriteBarrier:
336      return kHasSideEffect;
337
338    case kAtomicLoadInt8:
339    case kAtomicLoadUint8:
340    case kAtomicLoadInt16:
341    case kAtomicLoadUint16:
342    case kAtomicLoadWord32:
343      return kIsLoadOperation;
344
345    case kAtomicStoreWord8:
346    case kAtomicStoreWord16:
347    case kAtomicStoreWord32:
348      return kHasSideEffect;
349
350    case kAtomicExchangeInt8:
351    case kAtomicExchangeUint8:
352    case kAtomicExchangeInt16:
353    case kAtomicExchangeUint16:
354    case kAtomicExchangeWord32:
355    case kAtomicCompareExchangeInt8:
356    case kAtomicCompareExchangeUint8:
357    case kAtomicCompareExchangeInt16:
358    case kAtomicCompareExchangeUint16:
359    case kAtomicCompareExchangeWord32:
360    case kAtomicAddInt8:
361    case kAtomicAddUint8:
362    case kAtomicAddInt16:
363    case kAtomicAddUint16:
364    case kAtomicAddWord32:
365    case kAtomicSubInt8:
366    case kAtomicSubUint8:
367    case kAtomicSubInt16:
368    case kAtomicSubUint16:
369    case kAtomicSubWord32:
370    case kAtomicAndInt8:
371    case kAtomicAndUint8:
372    case kAtomicAndInt16:
373    case kAtomicAndUint16:
374    case kAtomicAndWord32:
375    case kAtomicOrInt8:
376    case kAtomicOrUint8:
377    case kAtomicOrInt16:
378    case kAtomicOrUint16:
379    case kAtomicOrWord32:
380    case kAtomicXorInt8:
381    case kAtomicXorUint8:
382    case kAtomicXorInt16:
383    case kAtomicXorUint16:
384    case kAtomicXorWord32:
385      return kHasSideEffect;
386
387#define CASE(Name) case k##Name:
388      TARGET_ARCH_OPCODE_LIST(CASE)
389#undef CASE
390      return GetTargetInstructionFlags(instr);
391  }
392
393  UNREACHABLE();
394}
395
396void InstructionScheduler::ComputeTotalLatencies() {
397  for (ScheduleGraphNode* node : base::Reversed(graph_)) {
398    int max_latency = 0;
399
400    for (ScheduleGraphNode* successor : node->successors()) {
401      DCHECK_NE(-1, successor->total_latency());
402      if (successor->total_latency() > max_latency) {
403        max_latency = successor->total_latency();
404      }
405    }
406
407    node->set_total_latency(max_latency + node->latency());
408  }
409}
410
411}  // namespace compiler
412}  // namespace internal
413}  // namespace v8
414