1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/backend/move-optimizer.h"
6
7#include "src/codegen/register-configuration.h"
8
9namespace v8 {
10namespace internal {
11namespace compiler {
12
13namespace {
14
15struct MoveKey {
16  InstructionOperand source;
17  InstructionOperand destination;
18};
19
20struct MoveKeyCompare {
21  bool operator()(const MoveKey& a, const MoveKey& b) const {
22    if (a.source.EqualsCanonicalized(b.source)) {
23      return a.destination.CompareCanonicalized(b.destination);
24    }
25    return a.source.CompareCanonicalized(b.source);
26  }
27};
28
29using MoveMap = ZoneMap<MoveKey, unsigned, MoveKeyCompare>;
30
31class OperandSet {
32 public:
33  explicit OperandSet(ZoneVector<InstructionOperand>* buffer)
34      : set_(buffer), fp_reps_(0) {
35    buffer->clear();
36  }
37
38  void InsertOp(const InstructionOperand& op) {
39    set_->push_back(op);
40
41    if (kFPAliasing == AliasingKind::kCombine && op.IsFPRegister())
42      fp_reps_ |= RepresentationBit(LocationOperand::cast(op).representation());
43  }
44
45  bool Contains(const InstructionOperand& op) const {
46    for (const InstructionOperand& elem : *set_) {
47      if (elem.EqualsCanonicalized(op)) return true;
48    }
49    return false;
50  }
51
52  bool ContainsOpOrAlias(const InstructionOperand& op) const {
53    if (Contains(op)) return true;
54
55    if (kFPAliasing == AliasingKind::kCombine && op.IsFPRegister()) {
56      // Platforms where FP registers have complex aliasing need extra checks.
57      const LocationOperand& loc = LocationOperand::cast(op);
58      MachineRepresentation rep = loc.representation();
59      // If haven't encountered mixed rep FP registers, skip the extra checks.
60      if (!HasMixedFPReps(fp_reps_ | RepresentationBit(rep))) return false;
61
62      // Check register against aliasing registers of other FP representations.
63      MachineRepresentation other_rep1, other_rep2;
64      switch (rep) {
65        case MachineRepresentation::kFloat32:
66          other_rep1 = MachineRepresentation::kFloat64;
67          other_rep2 = MachineRepresentation::kSimd128;
68          break;
69        case MachineRepresentation::kFloat64:
70          other_rep1 = MachineRepresentation::kFloat32;
71          other_rep2 = MachineRepresentation::kSimd128;
72          break;
73        case MachineRepresentation::kSimd128:
74          other_rep1 = MachineRepresentation::kFloat32;
75          other_rep2 = MachineRepresentation::kFloat64;
76          break;
77        default:
78          UNREACHABLE();
79      }
80      const RegisterConfiguration* config = RegisterConfiguration::Default();
81      int base = -1;
82      int aliases =
83          config->GetAliases(rep, loc.register_code(), other_rep1, &base);
84      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
85      while (aliases--) {
86        if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
87                                      base + aliases))) {
88          return true;
89        }
90      }
91      aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
92      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
93      while (aliases--) {
94        if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
95                                      base + aliases))) {
96          return true;
97        }
98      }
99    }
100    return false;
101  }
102
103 private:
104  static bool HasMixedFPReps(int reps) {
105    return reps && !base::bits::IsPowerOfTwo(reps);
106  }
107
108  ZoneVector<InstructionOperand>* set_;
109  int fp_reps_;
110};
111
112int FindFirstNonEmptySlot(const Instruction* instr) {
113  int i = Instruction::FIRST_GAP_POSITION;
114  for (; i <= Instruction::LAST_GAP_POSITION; i++) {
115    ParallelMove* moves = instr->parallel_moves()[i];
116    if (moves == nullptr) continue;
117    for (MoveOperands* move : *moves) {
118      if (!move->IsRedundant()) return i;
119      move->Eliminate();
120    }
121    moves->clear();  // Clear this redundant move.
122  }
123  return i;
124}
125
126}  // namespace
127
128MoveOptimizer::MoveOptimizer(Zone* local_zone, InstructionSequence* code)
129    : local_zone_(local_zone),
130      code_(code),
131      local_vector_(local_zone),
132      operand_buffer1(local_zone),
133      operand_buffer2(local_zone) {}
134
135void MoveOptimizer::Run() {
136  for (Instruction* instruction : code()->instructions()) {
137    CompressGaps(instruction);
138  }
139  for (InstructionBlock* block : code()->instruction_blocks()) {
140    CompressBlock(block);
141  }
142  for (InstructionBlock* block : code()->instruction_blocks()) {
143    if (block->PredecessorCount() <= 1) continue;
144    if (!block->IsDeferred()) {
145      bool has_only_deferred = true;
146      for (RpoNumber& pred_id : block->predecessors()) {
147        if (!code()->InstructionBlockAt(pred_id)->IsDeferred()) {
148          has_only_deferred = false;
149          break;
150        }
151      }
152      // This would pull down common moves. If the moves occur in deferred
153      // blocks, and the closest common successor is not deferred, we lose the
154      // optimization of just spilling/filling in deferred blocks, when the
155      // current block is not deferred.
156      if (has_only_deferred) continue;
157    }
158    OptimizeMerge(block);
159  }
160  for (Instruction* gap : code()->instructions()) {
161    FinalizeMoves(gap);
162  }
163}
164
165void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
166  if (instruction->IsCall()) return;
167  ParallelMove* moves = instruction->parallel_moves()[0];
168  if (moves == nullptr) return;
169
170  DCHECK(instruction->parallel_moves()[1] == nullptr ||
171         instruction->parallel_moves()[1]->empty());
172
173  OperandSet outputs(&operand_buffer1);
174  OperandSet inputs(&operand_buffer2);
175
176  // Outputs and temps are treated together as potentially clobbering a
177  // destination operand.
178  for (size_t i = 0; i < instruction->OutputCount(); ++i) {
179    outputs.InsertOp(*instruction->OutputAt(i));
180  }
181  for (size_t i = 0; i < instruction->TempCount(); ++i) {
182    outputs.InsertOp(*instruction->TempAt(i));
183  }
184
185  // Input operands block elisions.
186  for (size_t i = 0; i < instruction->InputCount(); ++i) {
187    inputs.InsertOp(*instruction->InputAt(i));
188  }
189
190  // Elide moves made redundant by the instruction.
191  for (MoveOperands* move : *moves) {
192    if (outputs.ContainsOpOrAlias(move->destination()) &&
193        !inputs.ContainsOpOrAlias(move->destination())) {
194      move->Eliminate();
195    }
196  }
197
198  // The ret instruction makes any assignment before it unnecessary, except for
199  // the one for its input.
200  if (instruction->IsRet() || instruction->IsTailCall()) {
201    for (MoveOperands* move : *moves) {
202      if (!inputs.ContainsOpOrAlias(move->destination())) {
203        move->Eliminate();
204      }
205    }
206  }
207}
208
209void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
210  if (from->IsCall()) return;
211
212  ParallelMove* from_moves = from->parallel_moves()[0];
213  if (from_moves == nullptr || from_moves->empty()) return;
214
215  OperandSet dst_cant_be(&operand_buffer1);
216  OperandSet src_cant_be(&operand_buffer2);
217
218  // If an operand is an input to the instruction, we cannot move assignments
219  // where it appears on the LHS.
220  for (size_t i = 0; i < from->InputCount(); ++i) {
221    dst_cant_be.InsertOp(*from->InputAt(i));
222  }
223  // If an operand is output to the instruction, we cannot move assignments
224  // where it appears on the RHS, because we would lose its value before the
225  // instruction.
226  // Same for temp operands.
227  // The output can't appear on the LHS because we performed
228  // RemoveClobberedDestinations for the "from" instruction.
229  for (size_t i = 0; i < from->OutputCount(); ++i) {
230    src_cant_be.InsertOp(*from->OutputAt(i));
231  }
232  for (size_t i = 0; i < from->TempCount(); ++i) {
233    src_cant_be.InsertOp(*from->TempAt(i));
234  }
235  for (MoveOperands* move : *from_moves) {
236    if (move->IsRedundant()) continue;
237    // Assume dest has a value "V". If we have a "dest = y" move, then we can't
238    // move "z = dest", because z would become y rather than "V".
239    // We assume CompressMoves has happened before this, which means we don't
240    // have more than one assignment to dest.
241    src_cant_be.InsertOp(move->destination());
242  }
243
244  ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
245  // We start with all the moves that don't have conflicting source or
246  // destination operands are eligible for being moved down.
247  for (MoveOperands* move : *from_moves) {
248    if (move->IsRedundant()) continue;
249    if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
250      MoveKey key = {move->source(), move->destination()};
251      move_candidates.insert(key);
252    }
253  }
254  if (move_candidates.empty()) return;
255
256  // Stabilize the candidate set.
257  bool changed = false;
258  do {
259    changed = false;
260    for (auto iter = move_candidates.begin(); iter != move_candidates.end();) {
261      auto current = iter;
262      ++iter;
263      InstructionOperand src = current->source;
264      if (src_cant_be.ContainsOpOrAlias(src)) {
265        src_cant_be.InsertOp(current->destination);
266        move_candidates.erase(current);
267        changed = true;
268      }
269    }
270  } while (changed);
271
272  ParallelMove to_move(local_zone());
273  for (MoveOperands* move : *from_moves) {
274    if (move->IsRedundant()) continue;
275    MoveKey key = {move->source(), move->destination()};
276    if (move_candidates.find(key) != move_candidates.end()) {
277      to_move.AddMove(move->source(), move->destination(), code_zone());
278      move->Eliminate();
279    }
280  }
281  if (to_move.empty()) return;
282
283  ParallelMove* dest =
284      to->GetOrCreateParallelMove(Instruction::GapPosition::START, code_zone());
285
286  CompressMoves(&to_move, dest);
287  DCHECK(dest->empty());
288  for (MoveOperands* m : to_move) {
289    dest->push_back(m);
290  }
291}
292
293void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
294  if (right == nullptr) return;
295
296  MoveOpVector& eliminated = local_vector();
297  DCHECK(eliminated.empty());
298
299  if (!left->empty()) {
300    // Modify the right moves in place and collect moves that will be killed by
301    // merging the two gaps.
302    for (MoveOperands* move : *right) {
303      if (move->IsRedundant()) continue;
304      left->PrepareInsertAfter(move, &eliminated);
305    }
306    // Eliminate dead moves.
307    for (MoveOperands* to_eliminate : eliminated) {
308      to_eliminate->Eliminate();
309    }
310    eliminated.clear();
311  }
312  // Add all possibly modified moves from right side.
313  for (MoveOperands* move : *right) {
314    if (move->IsRedundant()) continue;
315    left->push_back(move);
316  }
317  // Nuke right.
318  right->clear();
319  DCHECK(eliminated.empty());
320}
321
322void MoveOptimizer::CompressGaps(Instruction* instruction) {
323  int i = FindFirstNonEmptySlot(instruction);
324  bool has_moves = i <= Instruction::LAST_GAP_POSITION;
325  USE(has_moves);
326
327  if (i == Instruction::LAST_GAP_POSITION) {
328    std::swap(instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
329              instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
330  } else if (i == Instruction::FIRST_GAP_POSITION) {
331    CompressMoves(
332        instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
333        instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
334  }
335  // We either have no moves, or, after swapping or compressing, we have
336  // all the moves in the first gap position, and none in the second/end gap
337  // position.
338  ParallelMove* first =
339      instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION];
340  ParallelMove* last =
341      instruction->parallel_moves()[Instruction::LAST_GAP_POSITION];
342  USE(first);
343  USE(last);
344
345  DCHECK(!has_moves ||
346         (first != nullptr && (last == nullptr || last->empty())));
347}
348
349void MoveOptimizer::CompressBlock(InstructionBlock* block) {
350  int first_instr_index = block->first_instruction_index();
351  int last_instr_index = block->last_instruction_index();
352
353  // Start by removing gap assignments where the output of the subsequent
354  // instruction appears on LHS, as long as they are not needed by its input.
355  Instruction* prev_instr = code()->instructions()[first_instr_index];
356  RemoveClobberedDestinations(prev_instr);
357
358  for (int index = first_instr_index + 1; index <= last_instr_index; ++index) {
359    Instruction* instr = code()->instructions()[index];
360    // Migrate to the gap of prev_instr eligible moves from instr.
361    MigrateMoves(instr, prev_instr);
362    // Remove gap assignments clobbered by instr's output.
363    RemoveClobberedDestinations(instr);
364    prev_instr = instr;
365  }
366}
367
368const Instruction* MoveOptimizer::LastInstruction(
369    const InstructionBlock* block) const {
370  return code()->instructions()[block->last_instruction_index()];
371}
372
373void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
374  DCHECK_LT(1, block->PredecessorCount());
375  // Ensure that the last instruction in all incoming blocks don't contain
376  // things that would prevent moving gap moves across them.
377  for (RpoNumber& pred_index : block->predecessors()) {
378    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
379
380    // If the predecessor has more than one successor, we shouldn't attempt to
381    // move down to this block (one of the successors) any of the gap moves,
382    // because their effect may be necessary to the other successors.
383    if (pred->SuccessorCount() > 1) return;
384
385    const Instruction* last_instr =
386        code()->instructions()[pred->last_instruction_index()];
387    if (last_instr->IsCall()) return;
388    if (last_instr->TempCount() != 0) return;
389    if (last_instr->OutputCount() != 0) return;
390    for (size_t i = 0; i < last_instr->InputCount(); ++i) {
391      const InstructionOperand* op = last_instr->InputAt(i);
392      if (!op->IsConstant() && !op->IsImmediate()) return;
393    }
394  }
395  // TODO(dcarney): pass a ZoneStats down for this?
396  MoveMap move_map(local_zone());
397  size_t correct_counts = 0;
398  // Accumulate set of shared moves.
399  for (RpoNumber& pred_index : block->predecessors()) {
400    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
401    const Instruction* instr = LastInstruction(pred);
402    if (instr->parallel_moves()[0] == nullptr ||
403        instr->parallel_moves()[0]->empty()) {
404      return;
405    }
406    for (const MoveOperands* move : *instr->parallel_moves()[0]) {
407      if (move->IsRedundant()) continue;
408      InstructionOperand src = move->source();
409      InstructionOperand dst = move->destination();
410      MoveKey key = {src, dst};
411      auto res = move_map.insert(std::make_pair(key, 1));
412      if (!res.second) {
413        res.first->second++;
414        if (res.first->second == block->PredecessorCount()) {
415          correct_counts++;
416        }
417      }
418    }
419  }
420  if (move_map.empty() || correct_counts == 0) return;
421
422  // Find insertion point.
423  Instruction* instr = code()->instructions()[block->first_instruction_index()];
424
425  if (correct_counts != move_map.size()) {
426    // Moves that are unique to each predecessor won't be pushed to the common
427    // successor.
428    OperandSet conflicting_srcs(&operand_buffer1);
429    for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) {
430      auto current = iter;
431      ++iter;
432      if (current->second != block->PredecessorCount()) {
433        InstructionOperand dest = current->first.destination;
434        // Not all the moves in all the gaps are the same. Maybe some are. If
435        // there are such moves, we could move them, but the destination of the
436        // moves staying behind can't appear as a source of a common move,
437        // because the move staying behind will clobber this destination.
438        conflicting_srcs.InsertOp(dest);
439        move_map.erase(current);
440      }
441    }
442
443    bool changed = false;
444    do {
445      // If a common move can't be pushed to the common successor, then its
446      // destination also can't appear as source to any move being pushed.
447      changed = false;
448      for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) {
449        auto current = iter;
450        ++iter;
451        DCHECK_EQ(block->PredecessorCount(), current->second);
452        if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) {
453          conflicting_srcs.InsertOp(current->first.destination);
454          move_map.erase(current);
455          changed = true;
456        }
457      }
458    } while (changed);
459  }
460
461  if (move_map.empty()) return;
462
463  DCHECK_NOT_NULL(instr);
464  bool gap_initialized = true;
465  if (instr->parallel_moves()[0] != nullptr &&
466      !instr->parallel_moves()[0]->empty()) {
467    // Will compress after insertion.
468    gap_initialized = false;
469    std::swap(instr->parallel_moves()[0], instr->parallel_moves()[1]);
470  }
471  ParallelMove* moves = instr->GetOrCreateParallelMove(
472      static_cast<Instruction::GapPosition>(0), code_zone());
473  // Delete relevant entries in predecessors and move everything to block.
474  bool first_iteration = true;
475  for (RpoNumber& pred_index : block->predecessors()) {
476    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
477    for (MoveOperands* move : *LastInstruction(pred)->parallel_moves()[0]) {
478      if (move->IsRedundant()) continue;
479      MoveKey key = {move->source(), move->destination()};
480      auto it = move_map.find(key);
481      if (it != move_map.end()) {
482        if (first_iteration) {
483          moves->AddMove(move->source(), move->destination());
484        }
485        move->Eliminate();
486      }
487    }
488    first_iteration = false;
489  }
490  // Compress.
491  if (!gap_initialized) {
492    CompressMoves(instr->parallel_moves()[0], instr->parallel_moves()[1]);
493  }
494  CompressBlock(block);
495}
496
497namespace {
498
499bool IsSlot(const InstructionOperand& op) {
500  return op.IsStackSlot() || op.IsFPStackSlot();
501}
502
503bool LoadCompare(const MoveOperands* a, const MoveOperands* b) {
504  if (!a->source().EqualsCanonicalized(b->source())) {
505    return a->source().CompareCanonicalized(b->source());
506  }
507  if (IsSlot(a->destination()) && !IsSlot(b->destination())) return false;
508  if (!IsSlot(a->destination()) && IsSlot(b->destination())) return true;
509  return a->destination().CompareCanonicalized(b->destination());
510}
511
512}  // namespace
513
514// Split multiple loads of the same constant or stack slot off into the second
515// slot and keep remaining moves in the first slot.
516void MoveOptimizer::FinalizeMoves(Instruction* instr) {
517  MoveOpVector& loads = local_vector();
518  DCHECK(loads.empty());
519
520  ParallelMove* parallel_moves = instr->parallel_moves()[0];
521  if (parallel_moves == nullptr) return;
522  // Find all the loads.
523  for (MoveOperands* move : *parallel_moves) {
524    if (move->IsRedundant()) continue;
525    if (move->source().IsConstant() || IsSlot(move->source())) {
526      loads.push_back(move);
527    }
528  }
529  if (loads.empty()) return;
530  // Group the loads by source, moving the preferred destination to the
531  // beginning of the group.
532  std::sort(loads.begin(), loads.end(), LoadCompare);
533  MoveOperands* group_begin = nullptr;
534  for (MoveOperands* load : loads) {
535    // New group.
536    if (group_begin == nullptr ||
537        !load->source().EqualsCanonicalized(group_begin->source())) {
538      group_begin = load;
539      continue;
540    }
541    // Nothing to be gained from splitting here.
542    if (IsSlot(group_begin->destination())) continue;
543    // Insert new move into slot 1.
544    ParallelMove* slot_1 = instr->GetOrCreateParallelMove(
545        static_cast<Instruction::GapPosition>(1), code_zone());
546    slot_1->AddMove(group_begin->destination(), load->destination());
547    load->Eliminate();
548  }
549  loads.clear();
550}
551
552}  // namespace compiler
553}  // namespace internal
554}  // namespace v8
555