1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci#include "nv50_ir.h"
24bf215546Sopenharmony_ci#include "nv50_ir_target.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include <algorithm>
27bf215546Sopenharmony_ci#include <stack>
28bf215546Sopenharmony_ci#include <limits>
29bf215546Sopenharmony_ci#include <unordered_map>
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cinamespace nv50_ir {
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#define MAX_REGISTER_FILE_SIZE 256
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ciclass RegisterSet
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_cipublic:
38bf215546Sopenharmony_ci   RegisterSet(const Target *);
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   void init(const Target *);
41bf215546Sopenharmony_ci   void reset(DataFile, bool resetMax = false);
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
44bf215546Sopenharmony_ci   void intersect(DataFile f, const RegisterSet *);
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci   bool assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg);
47bf215546Sopenharmony_ci   void release(DataFile f, int32_t reg, unsigned int size);
48bf215546Sopenharmony_ci   void occupy(DataFile f, int32_t reg, unsigned int size);
49bf215546Sopenharmony_ci   void occupy(const Value *);
50bf215546Sopenharmony_ci   void occupyMask(DataFile f, int32_t reg, uint8_t mask);
51bf215546Sopenharmony_ci   bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
52bf215546Sopenharmony_ci   bool testOccupy(const Value *);
53bf215546Sopenharmony_ci   bool testOccupy(DataFile f, int32_t reg, unsigned int size);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   inline int getMaxAssigned(DataFile f) const { return fill[f]; }
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   inline unsigned int getFileSize(DataFile f) const
58bf215546Sopenharmony_ci   {
59bf215546Sopenharmony_ci      return last[f] + 1;
60bf215546Sopenharmony_ci   }
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   inline unsigned int units(DataFile f, unsigned int size) const
63bf215546Sopenharmony_ci   {
64bf215546Sopenharmony_ci      return size >> unit[f];
65bf215546Sopenharmony_ci   }
66bf215546Sopenharmony_ci   // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
67bf215546Sopenharmony_ci   inline unsigned int idToBytes(const Value *v) const
68bf215546Sopenharmony_ci   {
69bf215546Sopenharmony_ci      return v->reg.data.id * MIN2(v->reg.size, 4);
70bf215546Sopenharmony_ci   }
71bf215546Sopenharmony_ci   inline unsigned int idToUnits(const Value *v) const
72bf215546Sopenharmony_ci   {
73bf215546Sopenharmony_ci      return units(v->reg.file, idToBytes(v));
74bf215546Sopenharmony_ci   }
75bf215546Sopenharmony_ci   inline int bytesToId(Value *v, unsigned int bytes) const
76bf215546Sopenharmony_ci   {
77bf215546Sopenharmony_ci      if (v->reg.size < 4)
78bf215546Sopenharmony_ci         return units(v->reg.file, bytes);
79bf215546Sopenharmony_ci      return bytes / 4;
80bf215546Sopenharmony_ci   }
81bf215546Sopenharmony_ci   inline int unitsToId(DataFile f, int u, uint8_t size) const
82bf215546Sopenharmony_ci   {
83bf215546Sopenharmony_ci      if (u < 0)
84bf215546Sopenharmony_ci         return -1;
85bf215546Sopenharmony_ci      return (size < 4) ? u : ((u << unit[f]) / 4);
86bf215546Sopenharmony_ci   }
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   void print(DataFile f) const;
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   const bool restrictedGPR16Range;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ciprivate:
93bf215546Sopenharmony_ci   BitSet bits[LAST_REGISTER_FILE + 1];
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci   int last[LAST_REGISTER_FILE + 1];
98bf215546Sopenharmony_ci   int fill[LAST_REGISTER_FILE + 1];
99bf215546Sopenharmony_ci};
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_civoid
102bf215546Sopenharmony_ciRegisterSet::reset(DataFile f, bool resetMax)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   bits[f].fill(0);
105bf215546Sopenharmony_ci   if (resetMax)
106bf215546Sopenharmony_ci      fill[f] = -1;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_civoid
110bf215546Sopenharmony_ciRegisterSet::init(const Target *targ)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci   for (unsigned int rf = 0; rf <= LAST_REGISTER_FILE; ++rf) {
113bf215546Sopenharmony_ci      DataFile f = static_cast<DataFile>(rf);
114bf215546Sopenharmony_ci      last[rf] = targ->getFileSize(f) - 1;
115bf215546Sopenharmony_ci      unit[rf] = targ->getFileUnit(f);
116bf215546Sopenharmony_ci      fill[rf] = -1;
117bf215546Sopenharmony_ci      assert(last[rf] < MAX_REGISTER_FILE_SIZE);
118bf215546Sopenharmony_ci      bits[rf].allocate(last[rf] + 1, true);
119bf215546Sopenharmony_ci   }
120bf215546Sopenharmony_ci}
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ciRegisterSet::RegisterSet(const Target *targ)
123bf215546Sopenharmony_ci  : restrictedGPR16Range(targ->getChipset() < 0xc0)
124bf215546Sopenharmony_ci{
125bf215546Sopenharmony_ci   init(targ);
126bf215546Sopenharmony_ci   for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
127bf215546Sopenharmony_ci      reset(static_cast<DataFile>(i));
128bf215546Sopenharmony_ci}
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_civoid
131bf215546Sopenharmony_ciRegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
132bf215546Sopenharmony_ci{
133bf215546Sopenharmony_ci   bits[f].periodicMask32(lock, unlock);
134bf215546Sopenharmony_ci}
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_civoid
137bf215546Sopenharmony_ciRegisterSet::intersect(DataFile f, const RegisterSet *set)
138bf215546Sopenharmony_ci{
139bf215546Sopenharmony_ci   bits[f] |= set->bits[f];
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_civoid
143bf215546Sopenharmony_ciRegisterSet::print(DataFile f) const
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci   INFO("GPR:");
146bf215546Sopenharmony_ci   bits[f].print();
147bf215546Sopenharmony_ci   INFO("\n");
148bf215546Sopenharmony_ci}
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_cibool
151bf215546Sopenharmony_ciRegisterSet::assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg)
152bf215546Sopenharmony_ci{
153bf215546Sopenharmony_ci   reg = bits[f].findFreeRange(size, maxReg);
154bf215546Sopenharmony_ci   if (reg < 0)
155bf215546Sopenharmony_ci      return false;
156bf215546Sopenharmony_ci   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
157bf215546Sopenharmony_ci   return true;
158bf215546Sopenharmony_ci}
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_cibool
161bf215546Sopenharmony_ciRegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
162bf215546Sopenharmony_ci{
163bf215546Sopenharmony_ci   return bits[f].testRange(reg, size);
164bf215546Sopenharmony_ci}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_civoid
167bf215546Sopenharmony_ciRegisterSet::occupy(const Value *v)
168bf215546Sopenharmony_ci{
169bf215546Sopenharmony_ci   occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
170bf215546Sopenharmony_ci}
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_civoid
173bf215546Sopenharmony_ciRegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
174bf215546Sopenharmony_ci{
175bf215546Sopenharmony_ci   bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
176bf215546Sopenharmony_ci}
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_civoid
179bf215546Sopenharmony_ciRegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
180bf215546Sopenharmony_ci{
181bf215546Sopenharmony_ci   bits[f].setRange(reg, size);
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cibool
189bf215546Sopenharmony_ciRegisterSet::testOccupy(const Value *v)
190bf215546Sopenharmony_ci{
191bf215546Sopenharmony_ci   return testOccupy(v->reg.file,
192bf215546Sopenharmony_ci                     idToUnits(v), v->reg.size >> unit[v->reg.file]);
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cibool
196bf215546Sopenharmony_ciRegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci   if (isOccupied(f, reg, size))
199bf215546Sopenharmony_ci      return false;
200bf215546Sopenharmony_ci   occupy(f, reg, size);
201bf215546Sopenharmony_ci   return true;
202bf215546Sopenharmony_ci}
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_civoid
205bf215546Sopenharmony_ciRegisterSet::release(DataFile f, int32_t reg, unsigned int size)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci   bits[f].clrRange(reg, size);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
210bf215546Sopenharmony_ci}
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ciclass RegAlloc
213bf215546Sopenharmony_ci{
214bf215546Sopenharmony_cipublic:
215bf215546Sopenharmony_ci   RegAlloc(Program *program) : prog(program), func(NULL), sequence(0) { }
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   bool exec();
218bf215546Sopenharmony_ci   bool execFunc();
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ciprivate:
221bf215546Sopenharmony_ci   class PhiMovesPass : public Pass {
222bf215546Sopenharmony_ci   private:
223bf215546Sopenharmony_ci      virtual bool visit(BasicBlock *);
224bf215546Sopenharmony_ci      inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
225bf215546Sopenharmony_ci      inline void splitEdges(BasicBlock *b);
226bf215546Sopenharmony_ci   };
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   class ArgumentMovesPass : public Pass {
229bf215546Sopenharmony_ci   private:
230bf215546Sopenharmony_ci      virtual bool visit(BasicBlock *);
231bf215546Sopenharmony_ci   };
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   class BuildIntervalsPass : public Pass {
234bf215546Sopenharmony_ci   private:
235bf215546Sopenharmony_ci      virtual bool visit(BasicBlock *);
236bf215546Sopenharmony_ci      void collectLiveValues(BasicBlock *);
237bf215546Sopenharmony_ci      void addLiveRange(Value *, const BasicBlock *, int end);
238bf215546Sopenharmony_ci   };
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   class InsertConstraintsPass : public Pass {
241bf215546Sopenharmony_ci   public:
242bf215546Sopenharmony_ci      InsertConstraintsPass() : targ(NULL) { }
243bf215546Sopenharmony_ci      bool exec(Function *func);
244bf215546Sopenharmony_ci   private:
245bf215546Sopenharmony_ci      virtual bool visit(BasicBlock *);
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci      void insertConstraintMove(Instruction *, int s);
248bf215546Sopenharmony_ci      bool insertConstraintMoves();
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci      void condenseDefs(Instruction *);
251bf215546Sopenharmony_ci      void condenseDefs(Instruction *, const int first, const int last);
252bf215546Sopenharmony_ci      void condenseSrcs(Instruction *, const int first, const int last);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci      void addHazard(Instruction *i, const ValueRef *src);
255bf215546Sopenharmony_ci      void textureMask(TexInstruction *);
256bf215546Sopenharmony_ci      void addConstraint(Instruction *, int s, int n);
257bf215546Sopenharmony_ci      bool detectConflict(Instruction *, int s);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci      // target specific functions, TODO: put in subclass or Target
260bf215546Sopenharmony_ci      void texConstraintNV50(TexInstruction *);
261bf215546Sopenharmony_ci      void texConstraintNVC0(TexInstruction *);
262bf215546Sopenharmony_ci      void texConstraintNVE0(TexInstruction *);
263bf215546Sopenharmony_ci      void texConstraintGM107(TexInstruction *);
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci      bool isScalarTexGM107(TexInstruction *);
266bf215546Sopenharmony_ci      void handleScalarTexGM107(TexInstruction *);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci      std::list<Instruction *> constrList;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci      const Target *targ;
271bf215546Sopenharmony_ci   };
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   bool buildLiveSets(BasicBlock *);
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ciprivate:
276bf215546Sopenharmony_ci   Program *prog;
277bf215546Sopenharmony_ci   Function *func;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   // instructions in control flow / chronological order
280bf215546Sopenharmony_ci   ArrayList insns;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci   int sequence; // for manual passes through CFG
283bf215546Sopenharmony_ci};
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_citypedef std::pair<Value *, Value *> ValuePair;
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ciclass MergedDefs
288bf215546Sopenharmony_ci{
289bf215546Sopenharmony_ciprivate:
290bf215546Sopenharmony_ci   std::list<ValueDef *>& entry(Value *val) {
291bf215546Sopenharmony_ci      auto it = defs.find(val);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci      if (it == defs.end()) {
294bf215546Sopenharmony_ci         std::list<ValueDef *> &res = defs[val];
295bf215546Sopenharmony_ci         res = val->defs;
296bf215546Sopenharmony_ci         return res;
297bf215546Sopenharmony_ci      } else {
298bf215546Sopenharmony_ci         return (*it).second;
299bf215546Sopenharmony_ci      }
300bf215546Sopenharmony_ci   }
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   std::unordered_map<Value *, std::list<ValueDef *> > defs;
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cipublic:
305bf215546Sopenharmony_ci   std::list<ValueDef *>& operator()(Value *val) {
306bf215546Sopenharmony_ci      return entry(val);
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   void add(Value *val, const std::list<ValueDef *> &vals) {
310bf215546Sopenharmony_ci      assert(val);
311bf215546Sopenharmony_ci      std::list<ValueDef *> &valdefs = entry(val);
312bf215546Sopenharmony_ci      valdefs.insert(valdefs.end(), vals.begin(), vals.end());
313bf215546Sopenharmony_ci   }
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   void removeDefsOfInstruction(Instruction *insn) {
316bf215546Sopenharmony_ci      for (int d = 0; insn->defExists(d); ++d) {
317bf215546Sopenharmony_ci         ValueDef *def = &insn->def(d);
318bf215546Sopenharmony_ci         defs.erase(def->get());
319bf215546Sopenharmony_ci         for (auto &p : defs)
320bf215546Sopenharmony_ci            p.second.remove(def);
321bf215546Sopenharmony_ci      }
322bf215546Sopenharmony_ci   }
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   void merge() {
325bf215546Sopenharmony_ci      for (auto &p : defs)
326bf215546Sopenharmony_ci         p.first->defs = p.second;
327bf215546Sopenharmony_ci   }
328bf215546Sopenharmony_ci};
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ciclass SpillCodeInserter
331bf215546Sopenharmony_ci{
332bf215546Sopenharmony_cipublic:
333bf215546Sopenharmony_ci   SpillCodeInserter(Function *fn, MergedDefs &mergedDefs) : func(fn), mergedDefs(mergedDefs), stackSize(0), stackBase(0) { }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   bool run(const std::list<ValuePair>&);
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   Symbol *assignSlot(const Interval&, const unsigned int size);
338bf215546Sopenharmony_ci   Value *offsetSlot(Value *, const LValue *);
339bf215546Sopenharmony_ci   inline int32_t getStackSize() const { return stackSize; }
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ciprivate:
342bf215546Sopenharmony_ci   Function *func;
343bf215546Sopenharmony_ci   MergedDefs &mergedDefs;
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   struct SpillSlot
346bf215546Sopenharmony_ci   {
347bf215546Sopenharmony_ci      Interval occup;
348bf215546Sopenharmony_ci      std::list<Value *> residents; // needed to recalculate occup
349bf215546Sopenharmony_ci      Symbol *sym;
350bf215546Sopenharmony_ci      int32_t offset;
351bf215546Sopenharmony_ci      inline uint8_t size() const { return sym->reg.size; }
352bf215546Sopenharmony_ci   };
353bf215546Sopenharmony_ci   std::list<SpillSlot> slots;
354bf215546Sopenharmony_ci   int32_t stackSize;
355bf215546Sopenharmony_ci   int32_t stackBase;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   LValue *unspill(Instruction *usei, LValue *, Value *slot);
358bf215546Sopenharmony_ci   void spill(Instruction *defi, Value *slot, LValue *);
359bf215546Sopenharmony_ci};
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_civoid
362bf215546Sopenharmony_ciRegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
363bf215546Sopenharmony_ci                                           const BasicBlock *bb,
364bf215546Sopenharmony_ci                                           int end)
365bf215546Sopenharmony_ci{
366bf215546Sopenharmony_ci   Instruction *insn = val->getUniqueInsn();
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   if (!insn)
369bf215546Sopenharmony_ci      insn = bb->getFirst();
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   assert(bb->getFirst()->serial <= bb->getExit()->serial);
372bf215546Sopenharmony_ci   assert(bb->getExit()->serial + 1 >= end);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   int begin = insn->serial;
375bf215546Sopenharmony_ci   if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
376bf215546Sopenharmony_ci      begin = bb->getEntry()->serial;
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
379bf215546Sopenharmony_ci            val->id, begin, insn->serial, end);
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   if (begin != end) // empty ranges are only added as hazards for fixed regs
382bf215546Sopenharmony_ci      val->livei.extend(begin, end);
383bf215546Sopenharmony_ci}
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_cibool
386bf215546Sopenharmony_ciRegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
387bf215546Sopenharmony_ci{
388bf215546Sopenharmony_ci   if (b->cfg.incidentCount() <= 1)
389bf215546Sopenharmony_ci      return false;
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci   int n = 0;
392bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
393bf215546Sopenharmony_ci      if (ei.getType() == Graph::Edge::TREE ||
394bf215546Sopenharmony_ci          ei.getType() == Graph::Edge::FORWARD)
395bf215546Sopenharmony_ci         ++n;
396bf215546Sopenharmony_ci   return (n == 2);
397bf215546Sopenharmony_ci}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistruct PhiMapHash {
400bf215546Sopenharmony_ci   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
401bf215546Sopenharmony_ci      return std::hash<Instruction*>()(val.first) * 31 +
402bf215546Sopenharmony_ci         std::hash<BasicBlock*>()(val.second);
403bf215546Sopenharmony_ci   }
404bf215546Sopenharmony_ci};
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_citypedef std::unordered_map<
407bf215546Sopenharmony_ci   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci// Critical edges need to be split up so that work can be inserted along
410bf215546Sopenharmony_ci// specific edge transitions. Unfortunately manipulating incident edges into a
411bf215546Sopenharmony_ci// BB invalidates all the PHI nodes since their sources are implicitly ordered
412bf215546Sopenharmony_ci// by incident edge order.
413bf215546Sopenharmony_ci//
414bf215546Sopenharmony_ci// TODO: Make it so that that is not the case, and PHI nodes store pointers to
415bf215546Sopenharmony_ci// the original BBs.
416bf215546Sopenharmony_civoid
417bf215546Sopenharmony_ciRegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
418bf215546Sopenharmony_ci{
419bf215546Sopenharmony_ci   BasicBlock *pb, *pn;
420bf215546Sopenharmony_ci   Instruction *phi;
421bf215546Sopenharmony_ci   Graph::EdgeIterator ei;
422bf215546Sopenharmony_ci   std::stack<BasicBlock *> stack;
423bf215546Sopenharmony_ci   int j = 0;
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
426bf215546Sopenharmony_ci      pb = BasicBlock::get(ei.getNode());
427bf215546Sopenharmony_ci      assert(pb);
428bf215546Sopenharmony_ci      if (needNewElseBlock(bb, pb))
429bf215546Sopenharmony_ci         stack.push(pb);
430bf215546Sopenharmony_ci   }
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci   // No critical edges were found, no need to perform any work.
433bf215546Sopenharmony_ci   if (stack.empty())
434bf215546Sopenharmony_ci      return;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   // We're about to, potentially, reorder the inbound edges. This means that
437bf215546Sopenharmony_ci   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
438bf215546Sopenharmony_ci   // nodes after the graph has been modified.
439bf215546Sopenharmony_ci   PhiMap phis;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   j = 0;
442bf215546Sopenharmony_ci   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
443bf215546Sopenharmony_ci      pb = BasicBlock::get(ei.getNode());
444bf215546Sopenharmony_ci      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
445bf215546Sopenharmony_ci         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
446bf215546Sopenharmony_ci   }
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci   while (!stack.empty()) {
449bf215546Sopenharmony_ci      pb = stack.top();
450bf215546Sopenharmony_ci      pn = new BasicBlock(func);
451bf215546Sopenharmony_ci      stack.pop();
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci      pb->cfg.detach(&bb->cfg);
454bf215546Sopenharmony_ci      pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
455bf215546Sopenharmony_ci      pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci      assert(pb->getExit()->op != OP_CALL);
458bf215546Sopenharmony_ci      if (pb->getExit()->asFlow()->target.bb == bb)
459bf215546Sopenharmony_ci         pb->getExit()->asFlow()->target.bb = pn;
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
462bf215546Sopenharmony_ci         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
463bf215546Sopenharmony_ci         assert(it != phis.end());
464bf215546Sopenharmony_ci         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
465bf215546Sopenharmony_ci         phis.erase(it);
466bf215546Sopenharmony_ci      }
467bf215546Sopenharmony_ci   }
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   // Now go through and fix up all of the phi node sources.
470bf215546Sopenharmony_ci   j = 0;
471bf215546Sopenharmony_ci   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
472bf215546Sopenharmony_ci      pb = BasicBlock::get(ei.getNode());
473bf215546Sopenharmony_ci      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
474bf215546Sopenharmony_ci         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
475bf215546Sopenharmony_ci         assert(it != phis.end());
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci         phi->setSrc(j, it->second);
478bf215546Sopenharmony_ci      }
479bf215546Sopenharmony_ci   }
480bf215546Sopenharmony_ci}
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci// For each operand of each PHI in b, generate a new value by inserting a MOV
483bf215546Sopenharmony_ci// at the end of the block it is coming from and replace the operand with its
484bf215546Sopenharmony_ci// result. This eliminates liveness conflicts and enables us to let values be
485bf215546Sopenharmony_ci// copied to the right register if such a conflict exists nonetheless.
486bf215546Sopenharmony_ci//
487bf215546Sopenharmony_ci// These MOVs are also crucial in making sure the live intervals of phi srces
488bf215546Sopenharmony_ci// are extended until the end of the loop, since they are not included in the
489bf215546Sopenharmony_ci// live-in sets.
490bf215546Sopenharmony_cibool
491bf215546Sopenharmony_ciRegAlloc::PhiMovesPass::visit(BasicBlock *bb)
492bf215546Sopenharmony_ci{
493bf215546Sopenharmony_ci   Instruction *phi, *mov;
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   splitEdges(bb);
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   // insert MOVs (phi->src(j) should stem from j-th in-BB)
498bf215546Sopenharmony_ci   int j = 0;
499bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
500bf215546Sopenharmony_ci      BasicBlock *pb = BasicBlock::get(ei.getNode());
501bf215546Sopenharmony_ci      if (!pb->isTerminated())
502bf215546Sopenharmony_ci         pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
505bf215546Sopenharmony_ci         LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue());
506bf215546Sopenharmony_ci         mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci         mov->setSrc(0, phi->getSrc(j));
509bf215546Sopenharmony_ci         mov->setDef(0, tmp);
510bf215546Sopenharmony_ci         phi->setSrc(j, tmp);
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci         pb->insertBefore(pb->getExit(), mov);
513bf215546Sopenharmony_ci      }
514bf215546Sopenharmony_ci      ++j;
515bf215546Sopenharmony_ci   }
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   return true;
518bf215546Sopenharmony_ci}
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_cibool
521bf215546Sopenharmony_ciRegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
522bf215546Sopenharmony_ci{
523bf215546Sopenharmony_ci   // Bind function call inputs/outputs to the same physical register
524bf215546Sopenharmony_ci   // the callee uses, inserting moves as appropriate for the case a
525bf215546Sopenharmony_ci   // conflict arises.
526bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = i->next) {
527bf215546Sopenharmony_ci      FlowInstruction *cal = i->asFlow();
528bf215546Sopenharmony_ci      // TODO: Handle indirect calls.
529bf215546Sopenharmony_ci      // Right now they should only be generated for builtins.
530bf215546Sopenharmony_ci      if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect)
531bf215546Sopenharmony_ci         continue;
532bf215546Sopenharmony_ci      RegisterSet clobberSet(prog->getTarget());
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci      // Bind input values.
535bf215546Sopenharmony_ci      for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) {
536bf215546Sopenharmony_ci         const int t = cal->indirect ? (s - 1) : s;
537bf215546Sopenharmony_ci         LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
538bf215546Sopenharmony_ci         tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci         Instruction *mov =
541bf215546Sopenharmony_ci            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
542bf215546Sopenharmony_ci         mov->setDef(0, tmp);
543bf215546Sopenharmony_ci         mov->setSrc(0, cal->getSrc(s));
544bf215546Sopenharmony_ci         cal->setSrc(s, tmp);
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci         bb->insertBefore(cal, mov);
547bf215546Sopenharmony_ci      }
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci      // Bind output values.
550bf215546Sopenharmony_ci      for (int d = 0; cal->defExists(d); ++d) {
551bf215546Sopenharmony_ci         LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
552bf215546Sopenharmony_ci         tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci         Instruction *mov =
555bf215546Sopenharmony_ci            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
556bf215546Sopenharmony_ci         mov->setSrc(0, tmp);
557bf215546Sopenharmony_ci         mov->setDef(0, cal->getDef(d));
558bf215546Sopenharmony_ci         cal->setDef(d, tmp);
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci         bb->insertAfter(cal, mov);
561bf215546Sopenharmony_ci         clobberSet.occupy(tmp);
562bf215546Sopenharmony_ci      }
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci      // Bind clobbered values.
565bf215546Sopenharmony_ci      for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
566bf215546Sopenharmony_ci           it != cal->target.fn->clobbers.end();
567bf215546Sopenharmony_ci           ++it) {
568bf215546Sopenharmony_ci         if (clobberSet.testOccupy(*it)) {
569bf215546Sopenharmony_ci            Value *tmp = new_LValue(func, (*it)->asLValue());
570bf215546Sopenharmony_ci            tmp->reg.data.id = (*it)->reg.data.id;
571bf215546Sopenharmony_ci            cal->setDef(cal->defCount(), tmp);
572bf215546Sopenharmony_ci         }
573bf215546Sopenharmony_ci      }
574bf215546Sopenharmony_ci   }
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci   // Update the clobber set of the function.
577bf215546Sopenharmony_ci   if (BasicBlock::get(func->cfgExit) == bb) {
578bf215546Sopenharmony_ci      func->buildDefSets();
579bf215546Sopenharmony_ci      for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
580bf215546Sopenharmony_ci         if (bb->defSet.test(i))
581bf215546Sopenharmony_ci            func->clobbers.push_back(func->getLValue(i));
582bf215546Sopenharmony_ci   }
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   return true;
585bf215546Sopenharmony_ci}
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci// Build the set of live-in variables of bb.
588bf215546Sopenharmony_cibool
589bf215546Sopenharmony_ciRegAlloc::buildLiveSets(BasicBlock *bb)
590bf215546Sopenharmony_ci{
591bf215546Sopenharmony_ci   Function *f = bb->getFunction();
592bf215546Sopenharmony_ci   BasicBlock *bn;
593bf215546Sopenharmony_ci   Instruction *i;
594bf215546Sopenharmony_ci   unsigned int s, d;
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   bb->liveSet.allocate(func->allLValues.getSize(), false);
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   int n = 0;
601bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
602bf215546Sopenharmony_ci      bn = BasicBlock::get(ei.getNode());
603bf215546Sopenharmony_ci      if (bn == bb)
604bf215546Sopenharmony_ci         continue;
605bf215546Sopenharmony_ci      if (bn->cfg.visit(sequence))
606bf215546Sopenharmony_ci         if (!buildLiveSets(bn))
607bf215546Sopenharmony_ci            return false;
608bf215546Sopenharmony_ci      if (n++ || bb->liveSet.marker)
609bf215546Sopenharmony_ci         bb->liveSet |= bn->liveSet;
610bf215546Sopenharmony_ci      else
611bf215546Sopenharmony_ci         bb->liveSet = bn->liveSet;
612bf215546Sopenharmony_ci   }
613bf215546Sopenharmony_ci   if (!n && !bb->liveSet.marker)
614bf215546Sopenharmony_ci      bb->liveSet.fill(0);
615bf215546Sopenharmony_ci   bb->liveSet.marker = true;
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
618bf215546Sopenharmony_ci      INFO("BB:%i live set of out blocks:\n", bb->getId());
619bf215546Sopenharmony_ci      bb->liveSet.print();
620bf215546Sopenharmony_ci   }
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   // if (!bb->getEntry())
623bf215546Sopenharmony_ci   //   return true;
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   if (bb == BasicBlock::get(f->cfgExit)) {
626bf215546Sopenharmony_ci      for (std::deque<ValueRef>::iterator it = f->outs.begin();
627bf215546Sopenharmony_ci           it != f->outs.end(); ++it) {
628bf215546Sopenharmony_ci         assert(it->get()->asLValue());
629bf215546Sopenharmony_ci         bb->liveSet.set(it->get()->id);
630bf215546Sopenharmony_ci      }
631bf215546Sopenharmony_ci   }
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
634bf215546Sopenharmony_ci      for (d = 0; i->defExists(d); ++d)
635bf215546Sopenharmony_ci         bb->liveSet.clr(i->getDef(d)->id);
636bf215546Sopenharmony_ci      for (s = 0; i->srcExists(s); ++s)
637bf215546Sopenharmony_ci         if (i->getSrc(s)->asLValue())
638bf215546Sopenharmony_ci            bb->liveSet.set(i->getSrc(s)->id);
639bf215546Sopenharmony_ci   }
640bf215546Sopenharmony_ci   for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
641bf215546Sopenharmony_ci      bb->liveSet.clr(i->getDef(0)->id);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
644bf215546Sopenharmony_ci      INFO("BB:%i live set after propagation:\n", bb->getId());
645bf215546Sopenharmony_ci      bb->liveSet.print();
646bf215546Sopenharmony_ci   }
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci   return true;
649bf215546Sopenharmony_ci}
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_civoid
652bf215546Sopenharmony_ciRegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
653bf215546Sopenharmony_ci{
654bf215546Sopenharmony_ci   BasicBlock *bbA = NULL, *bbB = NULL;
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   if (bb->cfg.outgoingCount()) {
657bf215546Sopenharmony_ci      // trickery to save a loop of OR'ing liveSets
658bf215546Sopenharmony_ci      // aliasing works fine with BitSet::setOr
659bf215546Sopenharmony_ci      for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
660bf215546Sopenharmony_ci         if (bbA) {
661bf215546Sopenharmony_ci            bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
662bf215546Sopenharmony_ci            bbA = bb;
663bf215546Sopenharmony_ci         } else {
664bf215546Sopenharmony_ci            bbA = bbB;
665bf215546Sopenharmony_ci         }
666bf215546Sopenharmony_ci         bbB = BasicBlock::get(ei.getNode());
667bf215546Sopenharmony_ci      }
668bf215546Sopenharmony_ci      bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
669bf215546Sopenharmony_ci   } else
670bf215546Sopenharmony_ci   if (bb->cfg.incidentCount()) {
671bf215546Sopenharmony_ci      bb->liveSet.fill(0);
672bf215546Sopenharmony_ci   }
673bf215546Sopenharmony_ci}
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_cibool
676bf215546Sopenharmony_ciRegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
677bf215546Sopenharmony_ci{
678bf215546Sopenharmony_ci   collectLiveValues(bb);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci   // go through out blocks and delete phi sources that do not originate from
683bf215546Sopenharmony_ci   // the current block from the live set
684bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
685bf215546Sopenharmony_ci      BasicBlock *out = BasicBlock::get(ei.getNode());
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci      for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
688bf215546Sopenharmony_ci         bb->liveSet.clr(i->getDef(0)->id);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci         for (int s = 0; i->srcExists(s); ++s) {
691bf215546Sopenharmony_ci            assert(i->src(s).getInsn());
692bf215546Sopenharmony_ci            if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
693bf215546Sopenharmony_ci               bb->liveSet.set(i->getSrc(s)->id);
694bf215546Sopenharmony_ci            else
695bf215546Sopenharmony_ci               bb->liveSet.clr(i->getSrc(s)->id);
696bf215546Sopenharmony_ci         }
697bf215546Sopenharmony_ci      }
698bf215546Sopenharmony_ci   }
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci   // remaining live-outs are live until end
701bf215546Sopenharmony_ci   if (bb->getExit()) {
702bf215546Sopenharmony_ci      for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
703bf215546Sopenharmony_ci         if (bb->liveSet.test(j))
704bf215546Sopenharmony_ci            addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
705bf215546Sopenharmony_ci   }
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci   for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
708bf215546Sopenharmony_ci      for (int d = 0; i->defExists(d); ++d) {
709bf215546Sopenharmony_ci         bb->liveSet.clr(i->getDef(d)->id);
710bf215546Sopenharmony_ci         if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
711bf215546Sopenharmony_ci            i->getDef(d)->livei.extend(i->serial, i->serial);
712bf215546Sopenharmony_ci      }
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci      for (int s = 0; i->srcExists(s); ++s) {
715bf215546Sopenharmony_ci         if (!i->getSrc(s)->asLValue())
716bf215546Sopenharmony_ci            continue;
717bf215546Sopenharmony_ci         if (!bb->liveSet.test(i->getSrc(s)->id)) {
718bf215546Sopenharmony_ci            bb->liveSet.set(i->getSrc(s)->id);
719bf215546Sopenharmony_ci            addLiveRange(i->getSrc(s), bb, i->serial);
720bf215546Sopenharmony_ci         }
721bf215546Sopenharmony_ci      }
722bf215546Sopenharmony_ci   }
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_ci   if (bb == BasicBlock::get(func->cfg.getRoot())) {
725bf215546Sopenharmony_ci      for (std::deque<ValueDef>::iterator it = func->ins.begin();
726bf215546Sopenharmony_ci           it != func->ins.end(); ++it) {
727bf215546Sopenharmony_ci         if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
728bf215546Sopenharmony_ci            it->get()->livei.extend(0, 1);
729bf215546Sopenharmony_ci      }
730bf215546Sopenharmony_ci   }
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci   return true;
733bf215546Sopenharmony_ci}
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci#define JOIN_MASK_PHI        (1 << 0)
737bf215546Sopenharmony_ci#define JOIN_MASK_UNION      (1 << 1)
738bf215546Sopenharmony_ci#define JOIN_MASK_MOV        (1 << 2)
739bf215546Sopenharmony_ci#define JOIN_MASK_TEX        (1 << 3)
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ciclass GCRA
742bf215546Sopenharmony_ci{
743bf215546Sopenharmony_cipublic:
744bf215546Sopenharmony_ci   GCRA(Function *, SpillCodeInserter&, MergedDefs&);
745bf215546Sopenharmony_ci   ~GCRA();
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   bool allocateRegisters(ArrayList& insns);
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci   void printNodeInfo() const;
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ciprivate:
752bf215546Sopenharmony_ci   class RIG_Node : public Graph::Node
753bf215546Sopenharmony_ci   {
754bf215546Sopenharmony_ci   public:
755bf215546Sopenharmony_ci      RIG_Node();
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci      void init(const RegisterSet&, LValue *);
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci      void addInterference(RIG_Node *);
760bf215546Sopenharmony_ci      void addRegPreference(RIG_Node *);
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci      inline LValue *getValue() const
763bf215546Sopenharmony_ci      {
764bf215546Sopenharmony_ci         return reinterpret_cast<LValue *>(data);
765bf215546Sopenharmony_ci      }
766bf215546Sopenharmony_ci      inline void setValue(LValue *lval) { data = lval; }
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci      inline uint8_t getCompMask() const
769bf215546Sopenharmony_ci      {
770bf215546Sopenharmony_ci         return ((1 << colors) - 1) << (reg & 7);
771bf215546Sopenharmony_ci      }
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci      static inline RIG_Node *get(const Graph::EdgeIterator& ei)
774bf215546Sopenharmony_ci      {
775bf215546Sopenharmony_ci         return static_cast<RIG_Node *>(ei.getNode());
776bf215546Sopenharmony_ci      }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   public:
779bf215546Sopenharmony_ci      uint32_t degree;
780bf215546Sopenharmony_ci      uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
781bf215546Sopenharmony_ci      uint16_t maxReg;
782bf215546Sopenharmony_ci      uint16_t colors;
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci      DataFile f;
785bf215546Sopenharmony_ci      int32_t reg;
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci      float weight;
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci      // list pointers for simplify() phase
790bf215546Sopenharmony_ci      RIG_Node *next;
791bf215546Sopenharmony_ci      RIG_Node *prev;
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci      // union of the live intervals of all coalesced values (we want to retain
794bf215546Sopenharmony_ci      //  the separate intervals for testing interference of compound values)
795bf215546Sopenharmony_ci      Interval livei;
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci      std::list<RIG_Node *> prefRegs;
798bf215546Sopenharmony_ci   };
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ciprivate:
801bf215546Sopenharmony_ci   inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   void buildRIG(ArrayList&);
804bf215546Sopenharmony_ci   bool coalesce(ArrayList&);
805bf215546Sopenharmony_ci   bool doCoalesce(ArrayList&, unsigned int mask);
806bf215546Sopenharmony_ci   void calculateSpillWeights();
807bf215546Sopenharmony_ci   bool simplify();
808bf215546Sopenharmony_ci   bool selectRegisters();
809bf215546Sopenharmony_ci   void cleanup(const bool success);
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci   void simplifyEdge(RIG_Node *, RIG_Node *);
812bf215546Sopenharmony_ci   void simplifyNode(RIG_Node *);
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci   void copyCompound(Value *dst, Value *src);
815bf215546Sopenharmony_ci   bool coalesceValues(Value *, Value *, bool force);
816bf215546Sopenharmony_ci   void resolveSplitsAndMerges();
817bf215546Sopenharmony_ci   void makeCompound(Instruction *, bool isSplit);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci   inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
822bf215546Sopenharmony_ci   void checkList(std::list<RIG_Node *>&);
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ciprivate:
825bf215546Sopenharmony_ci   std::stack<uint32_t> stack;
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci   // list headers for simplify() phase
828bf215546Sopenharmony_ci   RIG_Node lo[2];
829bf215546Sopenharmony_ci   RIG_Node hi;
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_ci   Graph RIG;
832bf215546Sopenharmony_ci   RIG_Node *nodes;
833bf215546Sopenharmony_ci   unsigned int nodeCount;
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   Function *func;
836bf215546Sopenharmony_ci   Program *prog;
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci   struct RelDegree {
839bf215546Sopenharmony_ci      uint8_t data[17][17];
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci      RelDegree() {
842bf215546Sopenharmony_ci         for (int i = 1; i <= 16; ++i)
843bf215546Sopenharmony_ci            for (int j = 1; j <= 16; ++j)
844bf215546Sopenharmony_ci               data[i][j] = j * ((i + j - 1) / j);
845bf215546Sopenharmony_ci      }
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci      const uint8_t* operator[](std::size_t i) const {
848bf215546Sopenharmony_ci         return data[i];
849bf215546Sopenharmony_ci      }
850bf215546Sopenharmony_ci   };
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci   static const RelDegree relDegree;
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   RegisterSet regs;
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   // need to fixup register id for participants of OP_MERGE/SPLIT
857bf215546Sopenharmony_ci   std::list<Instruction *> merges;
858bf215546Sopenharmony_ci   std::list<Instruction *> splits;
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci   SpillCodeInserter& spill;
861bf215546Sopenharmony_ci   std::list<ValuePair> mustSpill;
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   MergedDefs &mergedDefs;
864bf215546Sopenharmony_ci};
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ciconst GCRA::RelDegree GCRA::relDegree;
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ciGCRA::RIG_Node::RIG_Node() : Node(NULL), degree(0), degreeLimit(0), maxReg(0),
869bf215546Sopenharmony_ci   colors(0), f(FILE_NULL), reg(0), weight(0), next(this), prev(this)
870bf215546Sopenharmony_ci{
871bf215546Sopenharmony_ci}
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_civoid
874bf215546Sopenharmony_ciGCRA::printNodeInfo() const
875bf215546Sopenharmony_ci{
876bf215546Sopenharmony_ci   for (unsigned int i = 0; i < nodeCount; ++i) {
877bf215546Sopenharmony_ci      if (!nodes[i].colors)
878bf215546Sopenharmony_ci         continue;
879bf215546Sopenharmony_ci      INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
880bf215546Sopenharmony_ci           i,
881bf215546Sopenharmony_ci           nodes[i].f,nodes[i].reg,nodes[i].colors,
882bf215546Sopenharmony_ci           nodes[i].weight,
883bf215546Sopenharmony_ci           nodes[i].degree, nodes[i].degreeLimit);
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci      for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
886bf215546Sopenharmony_ci         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
887bf215546Sopenharmony_ci      for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
888bf215546Sopenharmony_ci         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
889bf215546Sopenharmony_ci      INFO("\n");
890bf215546Sopenharmony_ci   }
891bf215546Sopenharmony_ci}
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_cistatic bool
894bf215546Sopenharmony_ciisShortRegOp(Instruction *insn)
895bf215546Sopenharmony_ci{
896bf215546Sopenharmony_ci   // Immediates are always in src1 (except zeroes, which end up getting
897bf215546Sopenharmony_ci   // replaced with a zero reg). Every other situation can be resolved by
898bf215546Sopenharmony_ci   // using a long encoding.
899bf215546Sopenharmony_ci   return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE &&
900bf215546Sopenharmony_ci      insn->getSrc(1)->reg.data.u64;
901bf215546Sopenharmony_ci}
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci// Check if this LValue is ever used in an instruction that can't be encoded
904bf215546Sopenharmony_ci// with long registers (i.e. > r63)
905bf215546Sopenharmony_cistatic bool
906bf215546Sopenharmony_ciisShortRegVal(LValue *lval)
907bf215546Sopenharmony_ci{
908bf215546Sopenharmony_ci   if (lval->getInsn() == NULL)
909bf215546Sopenharmony_ci      return false;
910bf215546Sopenharmony_ci   for (Value::DefCIterator def = lval->defs.begin();
911bf215546Sopenharmony_ci        def != lval->defs.end(); ++def)
912bf215546Sopenharmony_ci      if (isShortRegOp((*def)->getInsn()))
913bf215546Sopenharmony_ci         return true;
914bf215546Sopenharmony_ci   for (Value::UseCIterator use = lval->uses.begin();
915bf215546Sopenharmony_ci        use != lval->uses.end(); ++use)
916bf215546Sopenharmony_ci      if (isShortRegOp((*use)->getInsn()))
917bf215546Sopenharmony_ci         return true;
918bf215546Sopenharmony_ci   return false;
919bf215546Sopenharmony_ci}
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_civoid
922bf215546Sopenharmony_ciGCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
923bf215546Sopenharmony_ci{
924bf215546Sopenharmony_ci   setValue(lval);
925bf215546Sopenharmony_ci   if (lval->reg.data.id >= 0)
926bf215546Sopenharmony_ci      lval->noSpill = lval->fixedReg = 1;
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci   colors = regs.units(lval->reg.file, lval->reg.size);
929bf215546Sopenharmony_ci   f = lval->reg.file;
930bf215546Sopenharmony_ci   reg = -1;
931bf215546Sopenharmony_ci   if (lval->reg.data.id >= 0)
932bf215546Sopenharmony_ci      reg = regs.idToUnits(lval);
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci   weight = std::numeric_limits<float>::infinity();
935bf215546Sopenharmony_ci   degree = 0;
936bf215546Sopenharmony_ci   maxReg = regs.getFileSize(f);
937bf215546Sopenharmony_ci   // On nv50, we lose a bit of gpr encoding when there's an embedded
938bf215546Sopenharmony_ci   // immediate.
939bf215546Sopenharmony_ci   if (regs.restrictedGPR16Range && f == FILE_GPR && (lval->reg.size == 2 || isShortRegVal(lval)))
940bf215546Sopenharmony_ci      maxReg /= 2;
941bf215546Sopenharmony_ci   degreeLimit = maxReg;
942bf215546Sopenharmony_ci   degreeLimit -= relDegree[1][colors] - 1;
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci   livei.insert(lval->livei);
945bf215546Sopenharmony_ci}
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci// Used when coalescing moves. The non-compound value will become one, e.g.:
948bf215546Sopenharmony_ci// mov b32 $r0 $r2            / merge b64 $r0d { $r0 $r1 }
949bf215546Sopenharmony_ci// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
950bf215546Sopenharmony_civoid
951bf215546Sopenharmony_ciGCRA::copyCompound(Value *dst, Value *src)
952bf215546Sopenharmony_ci{
953bf215546Sopenharmony_ci   LValue *ldst = dst->asLValue();
954bf215546Sopenharmony_ci   LValue *lsrc = src->asLValue();
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci   if (ldst->compound && !lsrc->compound) {
957bf215546Sopenharmony_ci      LValue *swap = lsrc;
958bf215546Sopenharmony_ci      lsrc = ldst;
959bf215546Sopenharmony_ci      ldst = swap;
960bf215546Sopenharmony_ci   }
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci   assert(!ldst->compound);
963bf215546Sopenharmony_ci
964bf215546Sopenharmony_ci   if (lsrc->compound) {
965bf215546Sopenharmony_ci      for (ValueDef *d : mergedDefs(ldst->join)) {
966bf215546Sopenharmony_ci         LValue *ldst = d->get()->asLValue();
967bf215546Sopenharmony_ci         if (!ldst->compound)
968bf215546Sopenharmony_ci            ldst->compMask = 0xff;
969bf215546Sopenharmony_ci         ldst->compound = 1;
970bf215546Sopenharmony_ci         ldst->compMask &= lsrc->compMask;
971bf215546Sopenharmony_ci      }
972bf215546Sopenharmony_ci   }
973bf215546Sopenharmony_ci}
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_cibool
976bf215546Sopenharmony_ciGCRA::coalesceValues(Value *dst, Value *src, bool force)
977bf215546Sopenharmony_ci{
978bf215546Sopenharmony_ci   LValue *rep = dst->join->asLValue();
979bf215546Sopenharmony_ci   LValue *val = src->join->asLValue();
980bf215546Sopenharmony_ci
981bf215546Sopenharmony_ci   if (!force && val->reg.data.id >= 0) {
982bf215546Sopenharmony_ci      rep = src->join->asLValue();
983bf215546Sopenharmony_ci      val = dst->join->asLValue();
984bf215546Sopenharmony_ci   }
985bf215546Sopenharmony_ci   RIG_Node *nRep = &nodes[rep->id];
986bf215546Sopenharmony_ci   RIG_Node *nVal = &nodes[val->id];
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci   if (src->reg.file != dst->reg.file) {
989bf215546Sopenharmony_ci      if (!force)
990bf215546Sopenharmony_ci         return false;
991bf215546Sopenharmony_ci      WARN("forced coalescing of values in different files !\n");
992bf215546Sopenharmony_ci   }
993bf215546Sopenharmony_ci   if (!force && dst->reg.size != src->reg.size)
994bf215546Sopenharmony_ci      return false;
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
997bf215546Sopenharmony_ci      if (force) {
998bf215546Sopenharmony_ci         if (val->reg.data.id >= 0)
999bf215546Sopenharmony_ci            WARN("forced coalescing of values in different fixed regs !\n");
1000bf215546Sopenharmony_ci      } else {
1001bf215546Sopenharmony_ci         if (val->reg.data.id >= 0)
1002bf215546Sopenharmony_ci            return false;
1003bf215546Sopenharmony_ci         // make sure that there is no overlap with the fixed register of rep
1004bf215546Sopenharmony_ci         for (ArrayList::Iterator it = func->allLValues.iterator();
1005bf215546Sopenharmony_ci              !it.end(); it.next()) {
1006bf215546Sopenharmony_ci            Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
1007bf215546Sopenharmony_ci            assert(reg);
1008bf215546Sopenharmony_ci            if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
1009bf215546Sopenharmony_ci               return false;
1010bf215546Sopenharmony_ci         }
1011bf215546Sopenharmony_ci      }
1012bf215546Sopenharmony_ci   }
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   if (!force && nRep->livei.overlaps(nVal->livei))
1015bf215546Sopenharmony_ci      return false;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci   // TODO: Handle this case properly.
1018bf215546Sopenharmony_ci   if (!force && rep->compound && val->compound)
1019bf215546Sopenharmony_ci      return false;
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
1022bf215546Sopenharmony_ci            rep->id, rep->reg.data.id, val->id);
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci   if (!force)
1025bf215546Sopenharmony_ci      copyCompound(dst, src);
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci   // set join pointer of all values joined with val
1028bf215546Sopenharmony_ci   const std::list<ValueDef *> &defs = mergedDefs(val);
1029bf215546Sopenharmony_ci   for (ValueDef *def : defs)
1030bf215546Sopenharmony_ci      def->get()->join = rep;
1031bf215546Sopenharmony_ci   assert(rep->join == rep && val->join == rep);
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   // add val's definitions to rep and extend the live interval of its RIG node
1034bf215546Sopenharmony_ci   mergedDefs.add(rep, defs);
1035bf215546Sopenharmony_ci   nRep->livei.unify(nVal->livei);
1036bf215546Sopenharmony_ci   nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit);
1037bf215546Sopenharmony_ci   nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg);
1038bf215546Sopenharmony_ci   return true;
1039bf215546Sopenharmony_ci}
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_cibool
1042bf215546Sopenharmony_ciGCRA::coalesce(ArrayList& insns)
1043bf215546Sopenharmony_ci{
1044bf215546Sopenharmony_ci   bool ret = doCoalesce(insns, JOIN_MASK_PHI);
1045bf215546Sopenharmony_ci   if (!ret)
1046bf215546Sopenharmony_ci      return false;
1047bf215546Sopenharmony_ci   switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
1048bf215546Sopenharmony_ci   case 0x50:
1049bf215546Sopenharmony_ci   case 0x80:
1050bf215546Sopenharmony_ci   case 0x90:
1051bf215546Sopenharmony_ci   case 0xa0:
1052bf215546Sopenharmony_ci      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
1053bf215546Sopenharmony_ci      break;
1054bf215546Sopenharmony_ci   case 0xc0:
1055bf215546Sopenharmony_ci   case 0xd0:
1056bf215546Sopenharmony_ci   case 0xe0:
1057bf215546Sopenharmony_ci   case 0xf0:
1058bf215546Sopenharmony_ci   case 0x100:
1059bf215546Sopenharmony_ci   case 0x110:
1060bf215546Sopenharmony_ci   case 0x120:
1061bf215546Sopenharmony_ci   case 0x130:
1062bf215546Sopenharmony_ci   case 0x140:
1063bf215546Sopenharmony_ci   case 0x160:
1064bf215546Sopenharmony_ci   case 0x170:
1065bf215546Sopenharmony_ci      ret = doCoalesce(insns, JOIN_MASK_UNION);
1066bf215546Sopenharmony_ci      break;
1067bf215546Sopenharmony_ci   default:
1068bf215546Sopenharmony_ci      break;
1069bf215546Sopenharmony_ci   }
1070bf215546Sopenharmony_ci   if (!ret)
1071bf215546Sopenharmony_ci      return false;
1072bf215546Sopenharmony_ci   return doCoalesce(insns, JOIN_MASK_MOV);
1073bf215546Sopenharmony_ci}
1074bf215546Sopenharmony_ci
1075bf215546Sopenharmony_cistatic inline uint8_t makeCompMask(int compSize, int base, int size)
1076bf215546Sopenharmony_ci{
1077bf215546Sopenharmony_ci   uint8_t m = ((1 << size) - 1) << base;
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci   switch (compSize) {
1080bf215546Sopenharmony_ci   case 1:
1081bf215546Sopenharmony_ci      return 0xff;
1082bf215546Sopenharmony_ci   case 2:
1083bf215546Sopenharmony_ci      m |= (m << 2);
1084bf215546Sopenharmony_ci      return (m << 4) | m;
1085bf215546Sopenharmony_ci   case 3:
1086bf215546Sopenharmony_ci   case 4:
1087bf215546Sopenharmony_ci      return (m << 4) | m;
1088bf215546Sopenharmony_ci   default:
1089bf215546Sopenharmony_ci      assert(compSize <= 8);
1090bf215546Sopenharmony_ci      return m;
1091bf215546Sopenharmony_ci   }
1092bf215546Sopenharmony_ci}
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_civoid
1095bf215546Sopenharmony_ciGCRA::makeCompound(Instruction *insn, bool split)
1096bf215546Sopenharmony_ci{
1097bf215546Sopenharmony_ci   LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
1100bf215546Sopenharmony_ci      INFO("makeCompound(split = %i): ", split);
1101bf215546Sopenharmony_ci      insn->print();
1102bf215546Sopenharmony_ci   }
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci   const unsigned int size = getNode(rep)->colors;
1105bf215546Sopenharmony_ci   unsigned int base = 0;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   if (!rep->compound)
1108bf215546Sopenharmony_ci      rep->compMask = 0xff;
1109bf215546Sopenharmony_ci   rep->compound = 1;
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_ci   for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
1112bf215546Sopenharmony_ci      LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci      val->compound = 1;
1115bf215546Sopenharmony_ci      if (!val->compMask)
1116bf215546Sopenharmony_ci         val->compMask = 0xff;
1117bf215546Sopenharmony_ci      val->compMask &= makeCompMask(size, base, getNode(val)->colors);
1118bf215546Sopenharmony_ci      assert(val->compMask);
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_ci      INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
1121bf215546Sopenharmony_ci           rep->id, rep->compMask, val->id, val->compMask);
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci      base += getNode(val)->colors;
1124bf215546Sopenharmony_ci   }
1125bf215546Sopenharmony_ci   assert(base == size);
1126bf215546Sopenharmony_ci}
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_cibool
1129bf215546Sopenharmony_ciGCRA::doCoalesce(ArrayList& insns, unsigned int mask)
1130bf215546Sopenharmony_ci{
1131bf215546Sopenharmony_ci   int c, n;
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci   for (n = 0; n < insns.getSize(); ++n) {
1134bf215546Sopenharmony_ci      Instruction *i;
1135bf215546Sopenharmony_ci      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
1136bf215546Sopenharmony_ci
1137bf215546Sopenharmony_ci      switch (insn->op) {
1138bf215546Sopenharmony_ci      case OP_PHI:
1139bf215546Sopenharmony_ci         if (!(mask & JOIN_MASK_PHI))
1140bf215546Sopenharmony_ci            break;
1141bf215546Sopenharmony_ci         for (c = 0; insn->srcExists(c); ++c)
1142bf215546Sopenharmony_ci            if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
1143bf215546Sopenharmony_ci               // this is bad
1144bf215546Sopenharmony_ci               ERROR("failed to coalesce phi operands\n");
1145bf215546Sopenharmony_ci               return false;
1146bf215546Sopenharmony_ci            }
1147bf215546Sopenharmony_ci         break;
1148bf215546Sopenharmony_ci      case OP_UNION:
1149bf215546Sopenharmony_ci      case OP_MERGE:
1150bf215546Sopenharmony_ci         if (!(mask & JOIN_MASK_UNION))
1151bf215546Sopenharmony_ci            break;
1152bf215546Sopenharmony_ci         for (c = 0; insn->srcExists(c); ++c)
1153bf215546Sopenharmony_ci            coalesceValues(insn->getDef(0), insn->getSrc(c), true);
1154bf215546Sopenharmony_ci         if (insn->op == OP_MERGE) {
1155bf215546Sopenharmony_ci            merges.push_back(insn);
1156bf215546Sopenharmony_ci            if (insn->srcExists(1))
1157bf215546Sopenharmony_ci               makeCompound(insn, false);
1158bf215546Sopenharmony_ci         }
1159bf215546Sopenharmony_ci         break;
1160bf215546Sopenharmony_ci      case OP_SPLIT:
1161bf215546Sopenharmony_ci         if (!(mask & JOIN_MASK_UNION))
1162bf215546Sopenharmony_ci            break;
1163bf215546Sopenharmony_ci         splits.push_back(insn);
1164bf215546Sopenharmony_ci         for (c = 0; insn->defExists(c); ++c)
1165bf215546Sopenharmony_ci            coalesceValues(insn->getSrc(0), insn->getDef(c), true);
1166bf215546Sopenharmony_ci         makeCompound(insn, true);
1167bf215546Sopenharmony_ci         break;
1168bf215546Sopenharmony_ci      case OP_MOV:
1169bf215546Sopenharmony_ci         if (!(mask & JOIN_MASK_MOV))
1170bf215546Sopenharmony_ci            break;
1171bf215546Sopenharmony_ci         i = NULL;
1172bf215546Sopenharmony_ci         if (!insn->getDef(0)->uses.empty())
1173bf215546Sopenharmony_ci            i = (*insn->getDef(0)->uses.begin())->getInsn();
1174bf215546Sopenharmony_ci         // if this is a contraint-move there will only be a single use
1175bf215546Sopenharmony_ci         if (i && i->op == OP_MERGE) // do we really still need this ?
1176bf215546Sopenharmony_ci            break;
1177bf215546Sopenharmony_ci         i = insn->getSrc(0)->getUniqueInsn();
1178bf215546Sopenharmony_ci         if (i && !i->constrainedDefs()) {
1179bf215546Sopenharmony_ci            coalesceValues(insn->getDef(0), insn->getSrc(0), false);
1180bf215546Sopenharmony_ci         }
1181bf215546Sopenharmony_ci         break;
1182bf215546Sopenharmony_ci      case OP_TEX:
1183bf215546Sopenharmony_ci      case OP_TXB:
1184bf215546Sopenharmony_ci      case OP_TXL:
1185bf215546Sopenharmony_ci      case OP_TXF:
1186bf215546Sopenharmony_ci      case OP_TXQ:
1187bf215546Sopenharmony_ci      case OP_TXD:
1188bf215546Sopenharmony_ci      case OP_TXG:
1189bf215546Sopenharmony_ci      case OP_TXLQ:
1190bf215546Sopenharmony_ci      case OP_TEXCSAA:
1191bf215546Sopenharmony_ci      case OP_TEXPREP:
1192bf215546Sopenharmony_ci         if (!(mask & JOIN_MASK_TEX))
1193bf215546Sopenharmony_ci            break;
1194bf215546Sopenharmony_ci         for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
1195bf215546Sopenharmony_ci            coalesceValues(insn->getDef(c), insn->getSrc(c), true);
1196bf215546Sopenharmony_ci         break;
1197bf215546Sopenharmony_ci      default:
1198bf215546Sopenharmony_ci         break;
1199bf215546Sopenharmony_ci      }
1200bf215546Sopenharmony_ci   }
1201bf215546Sopenharmony_ci   return true;
1202bf215546Sopenharmony_ci}
1203bf215546Sopenharmony_ci
1204bf215546Sopenharmony_civoid
1205bf215546Sopenharmony_ciGCRA::RIG_Node::addInterference(RIG_Node *node)
1206bf215546Sopenharmony_ci{
1207bf215546Sopenharmony_ci   this->degree += relDegree[node->colors][colors];
1208bf215546Sopenharmony_ci   node->degree += relDegree[colors][node->colors];
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci   this->attach(node, Graph::Edge::CROSS);
1211bf215546Sopenharmony_ci}
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_civoid
1214bf215546Sopenharmony_ciGCRA::RIG_Node::addRegPreference(RIG_Node *node)
1215bf215546Sopenharmony_ci{
1216bf215546Sopenharmony_ci   prefRegs.push_back(node);
1217bf215546Sopenharmony_ci}
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ciGCRA::GCRA(Function *fn, SpillCodeInserter& spill, MergedDefs& mergedDefs) :
1220bf215546Sopenharmony_ci   nodes(NULL),
1221bf215546Sopenharmony_ci   nodeCount(0),
1222bf215546Sopenharmony_ci   func(fn),
1223bf215546Sopenharmony_ci   regs(fn->getProgram()->getTarget()),
1224bf215546Sopenharmony_ci   spill(spill),
1225bf215546Sopenharmony_ci   mergedDefs(mergedDefs)
1226bf215546Sopenharmony_ci{
1227bf215546Sopenharmony_ci   prog = func->getProgram();
1228bf215546Sopenharmony_ci}
1229bf215546Sopenharmony_ci
1230bf215546Sopenharmony_ciGCRA::~GCRA()
1231bf215546Sopenharmony_ci{
1232bf215546Sopenharmony_ci   if (nodes)
1233bf215546Sopenharmony_ci      delete[] nodes;
1234bf215546Sopenharmony_ci}
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_civoid
1237bf215546Sopenharmony_ciGCRA::checkList(std::list<RIG_Node *>& lst)
1238bf215546Sopenharmony_ci{
1239bf215546Sopenharmony_ci   GCRA::RIG_Node *prev = NULL;
1240bf215546Sopenharmony_ci
1241bf215546Sopenharmony_ci   for (std::list<RIG_Node *>::iterator it = lst.begin();
1242bf215546Sopenharmony_ci        it != lst.end();
1243bf215546Sopenharmony_ci        ++it) {
1244bf215546Sopenharmony_ci      assert((*it)->getValue()->join == (*it)->getValue());
1245bf215546Sopenharmony_ci      if (prev)
1246bf215546Sopenharmony_ci         assert(prev->livei.begin() <= (*it)->livei.begin());
1247bf215546Sopenharmony_ci      prev = *it;
1248bf215546Sopenharmony_ci   }
1249bf215546Sopenharmony_ci}
1250bf215546Sopenharmony_ci
1251bf215546Sopenharmony_civoid
1252bf215546Sopenharmony_ciGCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
1253bf215546Sopenharmony_ci{
1254bf215546Sopenharmony_ci   if (node->livei.isEmpty())
1255bf215546Sopenharmony_ci      return;
1256bf215546Sopenharmony_ci   // only the intervals of joined values don't necessarily arrive in order
1257bf215546Sopenharmony_ci   std::list<RIG_Node *>::iterator prev, it;
1258bf215546Sopenharmony_ci   for (it = list.end(); it != list.begin(); it = prev) {
1259bf215546Sopenharmony_ci      prev = it;
1260bf215546Sopenharmony_ci      --prev;
1261bf215546Sopenharmony_ci      if ((*prev)->livei.begin() <= node->livei.begin())
1262bf215546Sopenharmony_ci         break;
1263bf215546Sopenharmony_ci   }
1264bf215546Sopenharmony_ci   list.insert(it, node);
1265bf215546Sopenharmony_ci}
1266bf215546Sopenharmony_ci
1267bf215546Sopenharmony_civoid
1268bf215546Sopenharmony_ciGCRA::buildRIG(ArrayList& insns)
1269bf215546Sopenharmony_ci{
1270bf215546Sopenharmony_ci   std::list<RIG_Node *> values, active;
1271bf215546Sopenharmony_ci
1272bf215546Sopenharmony_ci   for (std::deque<ValueDef>::iterator it = func->ins.begin();
1273bf215546Sopenharmony_ci        it != func->ins.end(); ++it)
1274bf215546Sopenharmony_ci      insertOrderedTail(values, getNode(it->get()->asLValue()));
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci   for (int i = 0; i < insns.getSize(); ++i) {
1277bf215546Sopenharmony_ci      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
1278bf215546Sopenharmony_ci      for (int d = 0; insn->defExists(d); ++d)
1279bf215546Sopenharmony_ci         if (insn->getDef(d)->reg.file <= LAST_REGISTER_FILE &&
1280bf215546Sopenharmony_ci             insn->getDef(d)->rep() == insn->getDef(d))
1281bf215546Sopenharmony_ci            insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
1282bf215546Sopenharmony_ci   }
1283bf215546Sopenharmony_ci   checkList(values);
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci   while (!values.empty()) {
1286bf215546Sopenharmony_ci      RIG_Node *cur = values.front();
1287bf215546Sopenharmony_ci
1288bf215546Sopenharmony_ci      for (std::list<RIG_Node *>::iterator it = active.begin();
1289bf215546Sopenharmony_ci           it != active.end();) {
1290bf215546Sopenharmony_ci         RIG_Node *node = *it;
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_ci         if (node->livei.end() <= cur->livei.begin()) {
1293bf215546Sopenharmony_ci            it = active.erase(it);
1294bf215546Sopenharmony_ci         } else {
1295bf215546Sopenharmony_ci            if (node->f == cur->f && node->livei.overlaps(cur->livei))
1296bf215546Sopenharmony_ci               cur->addInterference(node);
1297bf215546Sopenharmony_ci            ++it;
1298bf215546Sopenharmony_ci         }
1299bf215546Sopenharmony_ci      }
1300bf215546Sopenharmony_ci      values.pop_front();
1301bf215546Sopenharmony_ci      active.push_back(cur);
1302bf215546Sopenharmony_ci   }
1303bf215546Sopenharmony_ci}
1304bf215546Sopenharmony_ci
1305bf215546Sopenharmony_civoid
1306bf215546Sopenharmony_ciGCRA::calculateSpillWeights()
1307bf215546Sopenharmony_ci{
1308bf215546Sopenharmony_ci   for (unsigned int i = 0; i < nodeCount; ++i) {
1309bf215546Sopenharmony_ci      RIG_Node *const n = &nodes[i];
1310bf215546Sopenharmony_ci      if (!nodes[i].colors || nodes[i].livei.isEmpty())
1311bf215546Sopenharmony_ci         continue;
1312bf215546Sopenharmony_ci      if (nodes[i].reg >= 0) {
1313bf215546Sopenharmony_ci         // update max reg
1314bf215546Sopenharmony_ci         regs.occupy(n->f, n->reg, n->colors);
1315bf215546Sopenharmony_ci         continue;
1316bf215546Sopenharmony_ci      }
1317bf215546Sopenharmony_ci      LValue *val = nodes[i].getValue();
1318bf215546Sopenharmony_ci
1319bf215546Sopenharmony_ci      if (!val->noSpill) {
1320bf215546Sopenharmony_ci         int rc = 0;
1321bf215546Sopenharmony_ci         for (ValueDef *def : mergedDefs(val))
1322bf215546Sopenharmony_ci            rc += def->get()->refCount();
1323bf215546Sopenharmony_ci
1324bf215546Sopenharmony_ci         nodes[i].weight =
1325bf215546Sopenharmony_ci            (float)rc * (float)rc / (float)nodes[i].livei.extent();
1326bf215546Sopenharmony_ci      }
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci      if (nodes[i].degree < nodes[i].degreeLimit) {
1329bf215546Sopenharmony_ci         int l = 0;
1330bf215546Sopenharmony_ci         if (val->reg.size > 4)
1331bf215546Sopenharmony_ci            l = 1;
1332bf215546Sopenharmony_ci         DLLIST_ADDHEAD(&lo[l], &nodes[i]);
1333bf215546Sopenharmony_ci      } else {
1334bf215546Sopenharmony_ci         DLLIST_ADDHEAD(&hi, &nodes[i]);
1335bf215546Sopenharmony_ci      }
1336bf215546Sopenharmony_ci   }
1337bf215546Sopenharmony_ci   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1338bf215546Sopenharmony_ci      printNodeInfo();
1339bf215546Sopenharmony_ci}
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_civoid
1342bf215546Sopenharmony_ciGCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
1343bf215546Sopenharmony_ci{
1344bf215546Sopenharmony_ci   bool move = b->degree >= b->degreeLimit;
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1347bf215546Sopenharmony_ci            "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1348bf215546Sopenharmony_ci            a->getValue()->id, a->degree, a->degreeLimit,
1349bf215546Sopenharmony_ci            b->getValue()->id, b->degree, b->degreeLimit);
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_ci   b->degree -= relDegree[a->colors][b->colors];
1352bf215546Sopenharmony_ci
1353bf215546Sopenharmony_ci   move = move && b->degree < b->degreeLimit;
1354bf215546Sopenharmony_ci   if (move && !DLLIST_EMPTY(b)) {
1355bf215546Sopenharmony_ci      int l = (b->getValue()->reg.size > 4) ? 1 : 0;
1356bf215546Sopenharmony_ci      DLLIST_DEL(b);
1357bf215546Sopenharmony_ci      DLLIST_ADDTAIL(&lo[l], b);
1358bf215546Sopenharmony_ci   }
1359bf215546Sopenharmony_ci}
1360bf215546Sopenharmony_ci
1361bf215546Sopenharmony_civoid
1362bf215546Sopenharmony_ciGCRA::simplifyNode(RIG_Node *node)
1363bf215546Sopenharmony_ci{
1364bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1365bf215546Sopenharmony_ci      simplifyEdge(node, RIG_Node::get(ei));
1366bf215546Sopenharmony_ci
1367bf215546Sopenharmony_ci   for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1368bf215546Sopenharmony_ci      simplifyEdge(node, RIG_Node::get(ei));
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci   DLLIST_DEL(node);
1371bf215546Sopenharmony_ci   stack.push(node->getValue()->id);
1372bf215546Sopenharmony_ci
1373bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
1374bf215546Sopenharmony_ci            node->getValue()->id,
1375bf215546Sopenharmony_ci            (node->degree < node->degreeLimit) ? "" : "(spill)");
1376bf215546Sopenharmony_ci}
1377bf215546Sopenharmony_ci
1378bf215546Sopenharmony_cibool
1379bf215546Sopenharmony_ciGCRA::simplify()
1380bf215546Sopenharmony_ci{
1381bf215546Sopenharmony_ci   for (;;) {
1382bf215546Sopenharmony_ci      if (!DLLIST_EMPTY(&lo[0])) {
1383bf215546Sopenharmony_ci         do {
1384bf215546Sopenharmony_ci            simplifyNode(lo[0].next);
1385bf215546Sopenharmony_ci         } while (!DLLIST_EMPTY(&lo[0]));
1386bf215546Sopenharmony_ci      } else
1387bf215546Sopenharmony_ci      if (!DLLIST_EMPTY(&lo[1])) {
1388bf215546Sopenharmony_ci         simplifyNode(lo[1].next);
1389bf215546Sopenharmony_ci      } else
1390bf215546Sopenharmony_ci      if (!DLLIST_EMPTY(&hi)) {
1391bf215546Sopenharmony_ci         RIG_Node *best = hi.next;
1392bf215546Sopenharmony_ci         unsigned bestMaxReg = best->maxReg;
1393bf215546Sopenharmony_ci         float bestScore = best->weight / (float)best->degree;
1394bf215546Sopenharmony_ci         // Spill candidate. First go through the ones with the highest max
1395bf215546Sopenharmony_ci         // register, then the ones with lower. That way the ones with the
1396bf215546Sopenharmony_ci         // lowest requirement will be allocated first, since it's a stack.
1397bf215546Sopenharmony_ci         for (RIG_Node *it = best->next; it != &hi; it = it->next) {
1398bf215546Sopenharmony_ci            float score = it->weight / (float)it->degree;
1399bf215546Sopenharmony_ci            if (score < bestScore || it->maxReg > bestMaxReg) {
1400bf215546Sopenharmony_ci               best = it;
1401bf215546Sopenharmony_ci               bestScore = score;
1402bf215546Sopenharmony_ci               bestMaxReg = it->maxReg;
1403bf215546Sopenharmony_ci            }
1404bf215546Sopenharmony_ci         }
1405bf215546Sopenharmony_ci         if (isinf(bestScore)) {
1406bf215546Sopenharmony_ci            ERROR("no viable spill candidates left\n");
1407bf215546Sopenharmony_ci            return false;
1408bf215546Sopenharmony_ci         }
1409bf215546Sopenharmony_ci         simplifyNode(best);
1410bf215546Sopenharmony_ci      } else {
1411bf215546Sopenharmony_ci         return true;
1412bf215546Sopenharmony_ci      }
1413bf215546Sopenharmony_ci   }
1414bf215546Sopenharmony_ci}
1415bf215546Sopenharmony_ci
1416bf215546Sopenharmony_civoid
1417bf215546Sopenharmony_ciGCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
1418bf215546Sopenharmony_ci{
1419bf215546Sopenharmony_ci   const RIG_Node *intf = RIG_Node::get(ei);
1420bf215546Sopenharmony_ci
1421bf215546Sopenharmony_ci   if (intf->reg < 0)
1422bf215546Sopenharmony_ci      return;
1423bf215546Sopenharmony_ci   LValue *vA = node->getValue();
1424bf215546Sopenharmony_ci   LValue *vB = intf->getValue();
1425bf215546Sopenharmony_ci
1426bf215546Sopenharmony_ci   const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
1427bf215546Sopenharmony_ci
1428bf215546Sopenharmony_ci   if (vA->compound | vB->compound) {
1429bf215546Sopenharmony_ci      // NOTE: this only works for >aligned< register tuples !
1430bf215546Sopenharmony_ci      for (const ValueDef *D : mergedDefs(vA)) {
1431bf215546Sopenharmony_ci      for (const ValueDef *d : mergedDefs(vB)) {
1432bf215546Sopenharmony_ci         const LValue *vD = D->get()->asLValue();
1433bf215546Sopenharmony_ci         const LValue *vd = d->get()->asLValue();
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci         if (!vD->livei.overlaps(vd->livei)) {
1436bf215546Sopenharmony_ci            INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
1437bf215546Sopenharmony_ci                     vD->id, vd->id);
1438bf215546Sopenharmony_ci            continue;
1439bf215546Sopenharmony_ci         }
1440bf215546Sopenharmony_ci
1441bf215546Sopenharmony_ci         uint8_t mask = vD->compound ? vD->compMask : ~0;
1442bf215546Sopenharmony_ci         if (vd->compound) {
1443bf215546Sopenharmony_ci            assert(vB->compound);
1444bf215546Sopenharmony_ci            mask &= vd->compMask & vB->compMask;
1445bf215546Sopenharmony_ci         } else {
1446bf215546Sopenharmony_ci            mask &= intfMask;
1447bf215546Sopenharmony_ci         }
1448bf215546Sopenharmony_ci
1449bf215546Sopenharmony_ci         INFO_DBG(prog->dbgFlags, REG_ALLOC,
1450bf215546Sopenharmony_ci                  "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1451bf215546Sopenharmony_ci                  vD->id,
1452bf215546Sopenharmony_ci                  vD->compound ? vD->compMask : 0xff,
1453bf215546Sopenharmony_ci                  vd->id,
1454bf215546Sopenharmony_ci                  vd->compound ? vd->compMask : intfMask,
1455bf215546Sopenharmony_ci                  vB->compMask, intf->reg & ~7, mask);
1456bf215546Sopenharmony_ci         if (mask)
1457bf215546Sopenharmony_ci            regs.occupyMask(node->f, intf->reg & ~7, mask);
1458bf215546Sopenharmony_ci      }
1459bf215546Sopenharmony_ci      }
1460bf215546Sopenharmony_ci   } else {
1461bf215546Sopenharmony_ci      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1462bf215546Sopenharmony_ci               "(%%%i) X (%%%i): $r%i + %u\n",
1463bf215546Sopenharmony_ci               vA->id, vB->id, intf->reg, intf->colors);
1464bf215546Sopenharmony_ci      regs.occupy(node->f, intf->reg, intf->colors);
1465bf215546Sopenharmony_ci   }
1466bf215546Sopenharmony_ci}
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_cibool
1469bf215546Sopenharmony_ciGCRA::selectRegisters()
1470bf215546Sopenharmony_ci{
1471bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
1472bf215546Sopenharmony_ci
1473bf215546Sopenharmony_ci   while (!stack.empty()) {
1474bf215546Sopenharmony_ci      RIG_Node *node = &nodes[stack.top()];
1475bf215546Sopenharmony_ci      stack.pop();
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci      regs.reset(node->f);
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_ci      INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
1480bf215546Sopenharmony_ci               node->getValue()->id, node->colors);
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci      for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1483bf215546Sopenharmony_ci         checkInterference(node, ei);
1484bf215546Sopenharmony_ci      for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1485bf215546Sopenharmony_ci         checkInterference(node, ei);
1486bf215546Sopenharmony_ci
1487bf215546Sopenharmony_ci      if (!node->prefRegs.empty()) {
1488bf215546Sopenharmony_ci         for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
1489bf215546Sopenharmony_ci              it != node->prefRegs.end();
1490bf215546Sopenharmony_ci              ++it) {
1491bf215546Sopenharmony_ci            if ((*it)->reg >= 0 &&
1492bf215546Sopenharmony_ci                regs.testOccupy(node->f, (*it)->reg, node->colors)) {
1493bf215546Sopenharmony_ci               node->reg = (*it)->reg;
1494bf215546Sopenharmony_ci               break;
1495bf215546Sopenharmony_ci            }
1496bf215546Sopenharmony_ci         }
1497bf215546Sopenharmony_ci      }
1498bf215546Sopenharmony_ci      if (node->reg >= 0)
1499bf215546Sopenharmony_ci         continue;
1500bf215546Sopenharmony_ci      LValue *lval = node->getValue();
1501bf215546Sopenharmony_ci      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1502bf215546Sopenharmony_ci         regs.print(node->f);
1503bf215546Sopenharmony_ci      bool ret = regs.assign(node->reg, node->f, node->colors, node->maxReg);
1504bf215546Sopenharmony_ci      if (ret) {
1505bf215546Sopenharmony_ci         INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
1506bf215546Sopenharmony_ci         lval->compMask = node->getCompMask();
1507bf215546Sopenharmony_ci      } else {
1508bf215546Sopenharmony_ci         INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
1509bf215546Sopenharmony_ci                  lval->id, lval->reg.size);
1510bf215546Sopenharmony_ci         Symbol *slot = NULL;
1511bf215546Sopenharmony_ci         if (lval->reg.file == FILE_GPR)
1512bf215546Sopenharmony_ci            slot = spill.assignSlot(node->livei, lval->reg.size);
1513bf215546Sopenharmony_ci         mustSpill.push_back(ValuePair(lval, slot));
1514bf215546Sopenharmony_ci      }
1515bf215546Sopenharmony_ci   }
1516bf215546Sopenharmony_ci   if (!mustSpill.empty())
1517bf215546Sopenharmony_ci      return false;
1518bf215546Sopenharmony_ci   for (unsigned int i = 0; i < nodeCount; ++i) {
1519bf215546Sopenharmony_ci      LValue *lval = nodes[i].getValue();
1520bf215546Sopenharmony_ci      if (nodes[i].reg >= 0 && nodes[i].colors > 0)
1521bf215546Sopenharmony_ci         lval->reg.data.id =
1522bf215546Sopenharmony_ci            regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
1523bf215546Sopenharmony_ci   }
1524bf215546Sopenharmony_ci   return true;
1525bf215546Sopenharmony_ci}
1526bf215546Sopenharmony_ci
1527bf215546Sopenharmony_cibool
1528bf215546Sopenharmony_ciGCRA::allocateRegisters(ArrayList& insns)
1529bf215546Sopenharmony_ci{
1530bf215546Sopenharmony_ci   bool ret;
1531bf215546Sopenharmony_ci
1532bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1533bf215546Sopenharmony_ci            "allocateRegisters to %u instructions\n", insns.getSize());
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci   nodeCount = func->allLValues.getSize();
1536bf215546Sopenharmony_ci   nodes = new RIG_Node[nodeCount];
1537bf215546Sopenharmony_ci   if (!nodes)
1538bf215546Sopenharmony_ci      return false;
1539bf215546Sopenharmony_ci   for (unsigned int i = 0; i < nodeCount; ++i) {
1540bf215546Sopenharmony_ci      LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
1541bf215546Sopenharmony_ci      if (lval) {
1542bf215546Sopenharmony_ci         nodes[i].init(regs, lval);
1543bf215546Sopenharmony_ci         RIG.insert(&nodes[i]);
1544bf215546Sopenharmony_ci
1545bf215546Sopenharmony_ci         if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL) {
1546bf215546Sopenharmony_ci            Instruction *insn = lval->getInsn();
1547bf215546Sopenharmony_ci            if (insn->op != OP_MAD && insn->op != OP_FMA && insn->op != OP_SAD)
1548bf215546Sopenharmony_ci               continue;
1549bf215546Sopenharmony_ci            // For both of the cases below, we only want to add the preference
1550bf215546Sopenharmony_ci            // if all arguments are in registers.
1551bf215546Sopenharmony_ci            if (insn->src(0).getFile() != FILE_GPR ||
1552bf215546Sopenharmony_ci                insn->src(1).getFile() != FILE_GPR ||
1553bf215546Sopenharmony_ci                insn->src(2).getFile() != FILE_GPR)
1554bf215546Sopenharmony_ci               continue;
1555bf215546Sopenharmony_ci            if (prog->getTarget()->getChipset() < 0xc0) {
1556bf215546Sopenharmony_ci               // Outputting a flag is not supported with short encodings nor
1557bf215546Sopenharmony_ci               // with immediate arguments.
1558bf215546Sopenharmony_ci               // See handleMADforNV50.
1559bf215546Sopenharmony_ci               if (insn->flagsDef >= 0)
1560bf215546Sopenharmony_ci                  continue;
1561bf215546Sopenharmony_ci            } else {
1562bf215546Sopenharmony_ci               // We can only fold immediate arguments if dst == src2. This
1563bf215546Sopenharmony_ci               // only matters if one of the first two arguments is an
1564bf215546Sopenharmony_ci               // immediate. This form is also only supported for floats.
1565bf215546Sopenharmony_ci               // See handleMADforNVC0.
1566bf215546Sopenharmony_ci               ImmediateValue imm;
1567bf215546Sopenharmony_ci               if (insn->dType != TYPE_F32)
1568bf215546Sopenharmony_ci                  continue;
1569bf215546Sopenharmony_ci               if (!insn->src(0).getImmediate(imm) &&
1570bf215546Sopenharmony_ci                   !insn->src(1).getImmediate(imm))
1571bf215546Sopenharmony_ci                  continue;
1572bf215546Sopenharmony_ci            }
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_ci            nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue()));
1575bf215546Sopenharmony_ci         }
1576bf215546Sopenharmony_ci      }
1577bf215546Sopenharmony_ci   }
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci   // coalesce first, we use only 1 RIG node for a group of joined values
1580bf215546Sopenharmony_ci   ret = coalesce(insns);
1581bf215546Sopenharmony_ci   if (!ret)
1582bf215546Sopenharmony_ci      goto out;
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_ci   if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1585bf215546Sopenharmony_ci      func->printLiveIntervals();
1586bf215546Sopenharmony_ci
1587bf215546Sopenharmony_ci   buildRIG(insns);
1588bf215546Sopenharmony_ci   calculateSpillWeights();
1589bf215546Sopenharmony_ci   ret = simplify();
1590bf215546Sopenharmony_ci   if (!ret)
1591bf215546Sopenharmony_ci      goto out;
1592bf215546Sopenharmony_ci
1593bf215546Sopenharmony_ci   ret = selectRegisters();
1594bf215546Sopenharmony_ci   if (!ret) {
1595bf215546Sopenharmony_ci      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1596bf215546Sopenharmony_ci               "selectRegisters failed, inserting spill code ...\n");
1597bf215546Sopenharmony_ci      regs.reset(FILE_GPR, true);
1598bf215546Sopenharmony_ci      spill.run(mustSpill);
1599bf215546Sopenharmony_ci      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1600bf215546Sopenharmony_ci         func->print();
1601bf215546Sopenharmony_ci   } else {
1602bf215546Sopenharmony_ci      mergedDefs.merge();
1603bf215546Sopenharmony_ci      prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
1604bf215546Sopenharmony_ci   }
1605bf215546Sopenharmony_ci
1606bf215546Sopenharmony_ciout:
1607bf215546Sopenharmony_ci   cleanup(ret);
1608bf215546Sopenharmony_ci   return ret;
1609bf215546Sopenharmony_ci}
1610bf215546Sopenharmony_ci
1611bf215546Sopenharmony_civoid
1612bf215546Sopenharmony_ciGCRA::cleanup(const bool success)
1613bf215546Sopenharmony_ci{
1614bf215546Sopenharmony_ci   mustSpill.clear();
1615bf215546Sopenharmony_ci
1616bf215546Sopenharmony_ci   for (ArrayList::Iterator it = func->allLValues.iterator();
1617bf215546Sopenharmony_ci        !it.end(); it.next()) {
1618bf215546Sopenharmony_ci      LValue *lval =  reinterpret_cast<LValue *>(it.get());
1619bf215546Sopenharmony_ci
1620bf215546Sopenharmony_ci      lval->livei.clear();
1621bf215546Sopenharmony_ci
1622bf215546Sopenharmony_ci      lval->compound = 0;
1623bf215546Sopenharmony_ci      lval->compMask = 0;
1624bf215546Sopenharmony_ci
1625bf215546Sopenharmony_ci      if (lval->join == lval)
1626bf215546Sopenharmony_ci         continue;
1627bf215546Sopenharmony_ci
1628bf215546Sopenharmony_ci      if (success)
1629bf215546Sopenharmony_ci         lval->reg.data.id = lval->join->reg.data.id;
1630bf215546Sopenharmony_ci      else
1631bf215546Sopenharmony_ci         lval->join = lval;
1632bf215546Sopenharmony_ci   }
1633bf215546Sopenharmony_ci
1634bf215546Sopenharmony_ci   if (success)
1635bf215546Sopenharmony_ci      resolveSplitsAndMerges();
1636bf215546Sopenharmony_ci   splits.clear(); // avoid duplicate entries on next coalesce pass
1637bf215546Sopenharmony_ci   merges.clear();
1638bf215546Sopenharmony_ci
1639bf215546Sopenharmony_ci   delete[] nodes;
1640bf215546Sopenharmony_ci   nodes = NULL;
1641bf215546Sopenharmony_ci   hi.next = hi.prev = &hi;
1642bf215546Sopenharmony_ci   lo[0].next = lo[0].prev = &lo[0];
1643bf215546Sopenharmony_ci   lo[1].next = lo[1].prev = &lo[1];
1644bf215546Sopenharmony_ci}
1645bf215546Sopenharmony_ci
1646bf215546Sopenharmony_ciSymbol *
1647bf215546Sopenharmony_ciSpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
1648bf215546Sopenharmony_ci{
1649bf215546Sopenharmony_ci   SpillSlot slot;
1650bf215546Sopenharmony_ci   int32_t offsetBase = stackSize;
1651bf215546Sopenharmony_ci   int32_t offset;
1652bf215546Sopenharmony_ci   std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci   if (!func->stackPtr) {
1655bf215546Sopenharmony_ci      // Later, we compute the address as (offsetBase + tlsBase)
1656bf215546Sopenharmony_ci      // tlsBase might not be size-aligned, so we add just enough
1657bf215546Sopenharmony_ci      // to give the final address the correct alignment
1658bf215546Sopenharmony_ci      offsetBase = align(offsetBase + func->tlsBase, size) - func->tlsBase;
1659bf215546Sopenharmony_ci   } else {
1660bf215546Sopenharmony_ci      offsetBase = align(offsetBase, size);
1661bf215546Sopenharmony_ci   }
1662bf215546Sopenharmony_ci
1663bf215546Sopenharmony_ci   slot.sym = NULL;
1664bf215546Sopenharmony_ci
1665bf215546Sopenharmony_ci   for (offset = offsetBase; offset < stackSize; offset += size) {
1666bf215546Sopenharmony_ci      const int32_t entryEnd = offset + size;
1667bf215546Sopenharmony_ci      while (it != slots.end() && it->offset < offset)
1668bf215546Sopenharmony_ci         ++it;
1669bf215546Sopenharmony_ci      if (it == slots.end()) // no slots left
1670bf215546Sopenharmony_ci         break;
1671bf215546Sopenharmony_ci      std::list<SpillSlot>::iterator bgn = it;
1672bf215546Sopenharmony_ci
1673bf215546Sopenharmony_ci      while (it != slots.end() && it->offset < entryEnd) {
1674bf215546Sopenharmony_ci         it->occup.print();
1675bf215546Sopenharmony_ci         if (it->occup.overlaps(livei))
1676bf215546Sopenharmony_ci            break;
1677bf215546Sopenharmony_ci         ++it;
1678bf215546Sopenharmony_ci      }
1679bf215546Sopenharmony_ci      if (it == slots.end() || it->offset >= entryEnd) {
1680bf215546Sopenharmony_ci         // fits
1681bf215546Sopenharmony_ci         for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
1682bf215546Sopenharmony_ci            bgn->occup.insert(livei);
1683bf215546Sopenharmony_ci            if (bgn->size() == size)
1684bf215546Sopenharmony_ci               slot.sym = bgn->sym;
1685bf215546Sopenharmony_ci         }
1686bf215546Sopenharmony_ci         break;
1687bf215546Sopenharmony_ci      }
1688bf215546Sopenharmony_ci   }
1689bf215546Sopenharmony_ci   if (!slot.sym) {
1690bf215546Sopenharmony_ci      stackSize = offset + size;
1691bf215546Sopenharmony_ci      slot.offset = offset;
1692bf215546Sopenharmony_ci      slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
1693bf215546Sopenharmony_ci      if (!func->stackPtr)
1694bf215546Sopenharmony_ci         offset += func->tlsBase;
1695bf215546Sopenharmony_ci      slot.sym->setAddress(NULL, offset);
1696bf215546Sopenharmony_ci      slot.sym->reg.size = size;
1697bf215546Sopenharmony_ci      slots.insert(pos, slot)->occup.insert(livei);
1698bf215546Sopenharmony_ci   }
1699bf215546Sopenharmony_ci   return slot.sym;
1700bf215546Sopenharmony_ci}
1701bf215546Sopenharmony_ci
1702bf215546Sopenharmony_ciValue *
1703bf215546Sopenharmony_ciSpillCodeInserter::offsetSlot(Value *base, const LValue *lval)
1704bf215546Sopenharmony_ci{
1705bf215546Sopenharmony_ci   if (!lval->compound || (lval->compMask & 0x1))
1706bf215546Sopenharmony_ci      return base;
1707bf215546Sopenharmony_ci   Value *slot = cloneShallow(func, base);
1708bf215546Sopenharmony_ci
1709bf215546Sopenharmony_ci   slot->reg.data.offset += (ffs(lval->compMask) - 1) * lval->reg.size;
1710bf215546Sopenharmony_ci   slot->reg.size = lval->reg.size;
1711bf215546Sopenharmony_ci
1712bf215546Sopenharmony_ci   return slot;
1713bf215546Sopenharmony_ci}
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_civoid
1716bf215546Sopenharmony_ciSpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
1717bf215546Sopenharmony_ci{
1718bf215546Sopenharmony_ci   const DataType ty = typeOfSize(lval->reg.size);
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci   slot = offsetSlot(slot, lval);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci   Instruction *st;
1723bf215546Sopenharmony_ci   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1724bf215546Sopenharmony_ci      lval->noSpill = 1;
1725bf215546Sopenharmony_ci      if (ty != TYPE_B96) {
1726bf215546Sopenharmony_ci         st = new_Instruction(func, OP_STORE, ty);
1727bf215546Sopenharmony_ci         st->setSrc(0, slot);
1728bf215546Sopenharmony_ci         st->setSrc(1, lval);
1729bf215546Sopenharmony_ci      } else {
1730bf215546Sopenharmony_ci         st = new_Instruction(func, OP_SPLIT, ty);
1731bf215546Sopenharmony_ci         st->setSrc(0, lval);
1732bf215546Sopenharmony_ci         for (int d = 0; d < lval->reg.size / 4; ++d)
1733bf215546Sopenharmony_ci            st->setDef(d, new_LValue(func, FILE_GPR));
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci         for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
1736bf215546Sopenharmony_ci            Value *tmp = cloneShallow(func, slot);
1737bf215546Sopenharmony_ci            tmp->reg.size = 4;
1738bf215546Sopenharmony_ci            tmp->reg.data.offset += 4 * d;
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci            Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
1741bf215546Sopenharmony_ci            s->setSrc(0, tmp);
1742bf215546Sopenharmony_ci            s->setSrc(1, st->getDef(d));
1743bf215546Sopenharmony_ci            defi->bb->insertAfter(defi, s);
1744bf215546Sopenharmony_ci         }
1745bf215546Sopenharmony_ci      }
1746bf215546Sopenharmony_ci   } else {
1747bf215546Sopenharmony_ci      st = new_Instruction(func, OP_CVT, ty);
1748bf215546Sopenharmony_ci      st->setDef(0, slot);
1749bf215546Sopenharmony_ci      st->setSrc(0, lval);
1750bf215546Sopenharmony_ci      if (lval->reg.file == FILE_FLAGS)
1751bf215546Sopenharmony_ci         st->flagsSrc = 0;
1752bf215546Sopenharmony_ci   }
1753bf215546Sopenharmony_ci   defi->bb->insertAfter(defi, st);
1754bf215546Sopenharmony_ci}
1755bf215546Sopenharmony_ci
1756bf215546Sopenharmony_ciLValue *
1757bf215546Sopenharmony_ciSpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
1758bf215546Sopenharmony_ci{
1759bf215546Sopenharmony_ci   const DataType ty = typeOfSize(lval->reg.size);
1760bf215546Sopenharmony_ci
1761bf215546Sopenharmony_ci   slot = offsetSlot(slot, lval);
1762bf215546Sopenharmony_ci   lval = cloneShallow(func, lval);
1763bf215546Sopenharmony_ci
1764bf215546Sopenharmony_ci   Instruction *ld;
1765bf215546Sopenharmony_ci   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1766bf215546Sopenharmony_ci      lval->noSpill = 1;
1767bf215546Sopenharmony_ci      if (ty != TYPE_B96) {
1768bf215546Sopenharmony_ci         ld = new_Instruction(func, OP_LOAD, ty);
1769bf215546Sopenharmony_ci      } else {
1770bf215546Sopenharmony_ci         ld = new_Instruction(func, OP_MERGE, ty);
1771bf215546Sopenharmony_ci         for (int d = 0; d < lval->reg.size / 4; ++d) {
1772bf215546Sopenharmony_ci            Value *tmp = cloneShallow(func, slot);
1773bf215546Sopenharmony_ci            LValue *val;
1774bf215546Sopenharmony_ci            tmp->reg.size = 4;
1775bf215546Sopenharmony_ci            tmp->reg.data.offset += 4 * d;
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci            Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
1778bf215546Sopenharmony_ci            l->setDef(0, (val = new_LValue(func, FILE_GPR)));
1779bf215546Sopenharmony_ci            l->setSrc(0, tmp);
1780bf215546Sopenharmony_ci            usei->bb->insertBefore(usei, l);
1781bf215546Sopenharmony_ci            ld->setSrc(d, val);
1782bf215546Sopenharmony_ci            val->noSpill = 1;
1783bf215546Sopenharmony_ci         }
1784bf215546Sopenharmony_ci         ld->setDef(0, lval);
1785bf215546Sopenharmony_ci         usei->bb->insertBefore(usei, ld);
1786bf215546Sopenharmony_ci         return lval;
1787bf215546Sopenharmony_ci      }
1788bf215546Sopenharmony_ci   } else {
1789bf215546Sopenharmony_ci      ld = new_Instruction(func, OP_CVT, ty);
1790bf215546Sopenharmony_ci   }
1791bf215546Sopenharmony_ci   ld->setDef(0, lval);
1792bf215546Sopenharmony_ci   ld->setSrc(0, slot);
1793bf215546Sopenharmony_ci   if (lval->reg.file == FILE_FLAGS)
1794bf215546Sopenharmony_ci      ld->flagsDef = 0;
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_ci   usei->bb->insertBefore(usei, ld);
1797bf215546Sopenharmony_ci   return lval;
1798bf215546Sopenharmony_ci}
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_cistatic bool
1801bf215546Sopenharmony_civalue_cmp(ValueRef *a, ValueRef *b) {
1802bf215546Sopenharmony_ci   Instruction *ai = a->getInsn(), *bi = b->getInsn();
1803bf215546Sopenharmony_ci   if (ai->bb != bi->bb)
1804bf215546Sopenharmony_ci      return ai->bb->getId() < bi->bb->getId();
1805bf215546Sopenharmony_ci   return ai->serial < bi->serial;
1806bf215546Sopenharmony_ci}
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ci// For each value that is to be spilled, go through all its definitions.
1809bf215546Sopenharmony_ci// A value can have multiple definitions if it has been coalesced before.
1810bf215546Sopenharmony_ci// For each definition, first go through all its uses and insert an unspill
1811bf215546Sopenharmony_ci// instruction before it, then replace the use with the temporary register.
1812bf215546Sopenharmony_ci// Unspill can be either a load from memory or simply a move to another
1813bf215546Sopenharmony_ci// register file.
1814bf215546Sopenharmony_ci// For "Pseudo" instructions (like PHI, SPLIT, MERGE) we can erase the use
1815bf215546Sopenharmony_ci// if we have spilled to a memory location, or simply with the new register.
1816bf215546Sopenharmony_ci// No load or conversion instruction should be needed.
1817bf215546Sopenharmony_cibool
1818bf215546Sopenharmony_ciSpillCodeInserter::run(const std::list<ValuePair>& lst)
1819bf215546Sopenharmony_ci{
1820bf215546Sopenharmony_ci   for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
1821bf215546Sopenharmony_ci        ++it) {
1822bf215546Sopenharmony_ci      LValue *lval = it->first->asLValue();
1823bf215546Sopenharmony_ci      Symbol *mem = it->second ? it->second->asSym() : NULL;
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci      // Keep track of which instructions to delete later. Deleting them
1826bf215546Sopenharmony_ci      // inside the loop is unsafe since a single instruction may have
1827bf215546Sopenharmony_ci      // multiple destinations that all need to be spilled (like OP_SPLIT).
1828bf215546Sopenharmony_ci      std::unordered_set<Instruction *> to_del;
1829bf215546Sopenharmony_ci
1830bf215546Sopenharmony_ci      std::list<ValueDef *> &defs = mergedDefs(lval);
1831bf215546Sopenharmony_ci      for (Value::DefIterator d = defs.begin(); d != defs.end();
1832bf215546Sopenharmony_ci           ++d) {
1833bf215546Sopenharmony_ci         Value *slot = mem ?
1834bf215546Sopenharmony_ci            static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
1835bf215546Sopenharmony_ci         Value *tmp = NULL;
1836bf215546Sopenharmony_ci         Instruction *last = NULL;
1837bf215546Sopenharmony_ci
1838bf215546Sopenharmony_ci         LValue *dval = (*d)->get()->asLValue();
1839bf215546Sopenharmony_ci         Instruction *defi = (*d)->getInsn();
1840bf215546Sopenharmony_ci
1841bf215546Sopenharmony_ci         // Sort all the uses by BB/instruction so that we don't unspill
1842bf215546Sopenharmony_ci         // multiple times in a row, and also remove a source of
1843bf215546Sopenharmony_ci         // non-determinism.
1844bf215546Sopenharmony_ci         std::vector<ValueRef *> refs(dval->uses.begin(), dval->uses.end());
1845bf215546Sopenharmony_ci         std::sort(refs.begin(), refs.end(), value_cmp);
1846bf215546Sopenharmony_ci
1847bf215546Sopenharmony_ci         // Unspill at each use *before* inserting spill instructions,
1848bf215546Sopenharmony_ci         // we don't want to have the spill instructions in the use list here.
1849bf215546Sopenharmony_ci         for (std::vector<ValueRef*>::const_iterator it = refs.begin();
1850bf215546Sopenharmony_ci              it != refs.end(); ++it) {
1851bf215546Sopenharmony_ci            ValueRef *u = *it;
1852bf215546Sopenharmony_ci            Instruction *usei = u->getInsn();
1853bf215546Sopenharmony_ci            assert(usei);
1854bf215546Sopenharmony_ci            if (usei->isPseudo()) {
1855bf215546Sopenharmony_ci               tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
1856bf215546Sopenharmony_ci               last = NULL;
1857bf215546Sopenharmony_ci            } else {
1858bf215546Sopenharmony_ci               if (!last || (usei != last->next && usei != last))
1859bf215546Sopenharmony_ci                  tmp = unspill(usei, dval, slot);
1860bf215546Sopenharmony_ci               last = usei;
1861bf215546Sopenharmony_ci            }
1862bf215546Sopenharmony_ci            u->set(tmp);
1863bf215546Sopenharmony_ci         }
1864bf215546Sopenharmony_ci
1865bf215546Sopenharmony_ci         assert(defi);
1866bf215546Sopenharmony_ci         if (defi->isPseudo()) {
1867bf215546Sopenharmony_ci            d = defs.erase(d);
1868bf215546Sopenharmony_ci            --d;
1869bf215546Sopenharmony_ci            if (slot->reg.file == FILE_MEMORY_LOCAL)
1870bf215546Sopenharmony_ci               to_del.insert(defi);
1871bf215546Sopenharmony_ci            else
1872bf215546Sopenharmony_ci               defi->setDef(0, slot);
1873bf215546Sopenharmony_ci         } else {
1874bf215546Sopenharmony_ci            spill(defi, slot, dval);
1875bf215546Sopenharmony_ci         }
1876bf215546Sopenharmony_ci      }
1877bf215546Sopenharmony_ci
1878bf215546Sopenharmony_ci      for (std::unordered_set<Instruction *>::const_iterator it = to_del.begin();
1879bf215546Sopenharmony_ci           it != to_del.end(); ++it) {
1880bf215546Sopenharmony_ci         mergedDefs.removeDefsOfInstruction(*it);
1881bf215546Sopenharmony_ci         delete_Instruction(func->getProgram(), *it);
1882bf215546Sopenharmony_ci      }
1883bf215546Sopenharmony_ci   }
1884bf215546Sopenharmony_ci
1885bf215546Sopenharmony_ci   // TODO: We're not trying to reuse old slots in a potential next iteration.
1886bf215546Sopenharmony_ci   //  We have to update the slots' livei intervals to be able to do that.
1887bf215546Sopenharmony_ci   stackBase = stackSize;
1888bf215546Sopenharmony_ci   slots.clear();
1889bf215546Sopenharmony_ci   return true;
1890bf215546Sopenharmony_ci}
1891bf215546Sopenharmony_ci
1892bf215546Sopenharmony_cibool
1893bf215546Sopenharmony_ciRegAlloc::exec()
1894bf215546Sopenharmony_ci{
1895bf215546Sopenharmony_ci   for (IteratorRef it = prog->calls.iteratorDFS(false);
1896bf215546Sopenharmony_ci        !it->end(); it->next()) {
1897bf215546Sopenharmony_ci      func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
1898bf215546Sopenharmony_ci
1899bf215546Sopenharmony_ci      func->tlsBase = prog->tlsSize;
1900bf215546Sopenharmony_ci      if (!execFunc())
1901bf215546Sopenharmony_ci         return false;
1902bf215546Sopenharmony_ci      prog->tlsSize += func->tlsSize;
1903bf215546Sopenharmony_ci   }
1904bf215546Sopenharmony_ci   return true;
1905bf215546Sopenharmony_ci}
1906bf215546Sopenharmony_ci
1907bf215546Sopenharmony_cibool
1908bf215546Sopenharmony_ciRegAlloc::execFunc()
1909bf215546Sopenharmony_ci{
1910bf215546Sopenharmony_ci   MergedDefs mergedDefs;
1911bf215546Sopenharmony_ci   InsertConstraintsPass insertConstr;
1912bf215546Sopenharmony_ci   PhiMovesPass insertPhiMoves;
1913bf215546Sopenharmony_ci   ArgumentMovesPass insertArgMoves;
1914bf215546Sopenharmony_ci   BuildIntervalsPass buildIntervals;
1915bf215546Sopenharmony_ci   SpillCodeInserter insertSpills(func, mergedDefs);
1916bf215546Sopenharmony_ci
1917bf215546Sopenharmony_ci   GCRA gcra(func, insertSpills, mergedDefs);
1918bf215546Sopenharmony_ci
1919bf215546Sopenharmony_ci   unsigned int i, retries;
1920bf215546Sopenharmony_ci   bool ret;
1921bf215546Sopenharmony_ci
1922bf215546Sopenharmony_ci   if (!func->ins.empty()) {
1923bf215546Sopenharmony_ci      // Insert a nop at the entry so inputs only used by the first instruction
1924bf215546Sopenharmony_ci      // don't count as having an empty live range.
1925bf215546Sopenharmony_ci      Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE);
1926bf215546Sopenharmony_ci      BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
1927bf215546Sopenharmony_ci   }
1928bf215546Sopenharmony_ci
1929bf215546Sopenharmony_ci   ret = insertConstr.exec(func);
1930bf215546Sopenharmony_ci   if (!ret)
1931bf215546Sopenharmony_ci      goto out;
1932bf215546Sopenharmony_ci
1933bf215546Sopenharmony_ci   ret = insertPhiMoves.run(func);
1934bf215546Sopenharmony_ci   if (!ret)
1935bf215546Sopenharmony_ci      goto out;
1936bf215546Sopenharmony_ci
1937bf215546Sopenharmony_ci   ret = insertArgMoves.run(func);
1938bf215546Sopenharmony_ci   if (!ret)
1939bf215546Sopenharmony_ci      goto out;
1940bf215546Sopenharmony_ci
1941bf215546Sopenharmony_ci   // TODO: need to fix up spill slot usage ranges to support > 1 retry
1942bf215546Sopenharmony_ci   for (retries = 0; retries < 3; ++retries) {
1943bf215546Sopenharmony_ci      if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
1944bf215546Sopenharmony_ci         INFO("Retry: %i\n", retries);
1945bf215546Sopenharmony_ci      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1946bf215546Sopenharmony_ci         func->print();
1947bf215546Sopenharmony_ci
1948bf215546Sopenharmony_ci      // spilling to registers may add live ranges, need to rebuild everything
1949bf215546Sopenharmony_ci      ret = true;
1950bf215546Sopenharmony_ci      for (sequence = func->cfg.nextSequence(), i = 0;
1951bf215546Sopenharmony_ci           ret && i <= func->loopNestingBound;
1952bf215546Sopenharmony_ci           sequence = func->cfg.nextSequence(), ++i)
1953bf215546Sopenharmony_ci         ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
1954bf215546Sopenharmony_ci      // reset marker
1955bf215546Sopenharmony_ci      for (ArrayList::Iterator bi = func->allBBlocks.iterator();
1956bf215546Sopenharmony_ci           !bi.end(); bi.next())
1957bf215546Sopenharmony_ci         BasicBlock::get(bi)->liveSet.marker = false;
1958bf215546Sopenharmony_ci      if (!ret)
1959bf215546Sopenharmony_ci         break;
1960bf215546Sopenharmony_ci      func->orderInstructions(this->insns);
1961bf215546Sopenharmony_ci
1962bf215546Sopenharmony_ci      ret = buildIntervals.run(func);
1963bf215546Sopenharmony_ci      if (!ret)
1964bf215546Sopenharmony_ci         break;
1965bf215546Sopenharmony_ci      ret = gcra.allocateRegisters(insns);
1966bf215546Sopenharmony_ci      if (ret)
1967bf215546Sopenharmony_ci         break; // success
1968bf215546Sopenharmony_ci   }
1969bf215546Sopenharmony_ci   INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
1970bf215546Sopenharmony_ci
1971bf215546Sopenharmony_ci   func->tlsSize = insertSpills.getStackSize();
1972bf215546Sopenharmony_ciout:
1973bf215546Sopenharmony_ci   return ret;
1974bf215546Sopenharmony_ci}
1975bf215546Sopenharmony_ci
1976bf215546Sopenharmony_ci// TODO: check if modifying Instruction::join here breaks anything
1977bf215546Sopenharmony_civoid
1978bf215546Sopenharmony_ciGCRA::resolveSplitsAndMerges()
1979bf215546Sopenharmony_ci{
1980bf215546Sopenharmony_ci   for (std::list<Instruction *>::iterator it = splits.begin();
1981bf215546Sopenharmony_ci        it != splits.end();
1982bf215546Sopenharmony_ci        ++it) {
1983bf215546Sopenharmony_ci      Instruction *split = *it;
1984bf215546Sopenharmony_ci      unsigned int reg = regs.idToBytes(split->getSrc(0));
1985bf215546Sopenharmony_ci      for (int d = 0; split->defExists(d); ++d) {
1986bf215546Sopenharmony_ci         Value *v = split->getDef(d);
1987bf215546Sopenharmony_ci         v->reg.data.id = regs.bytesToId(v, reg);
1988bf215546Sopenharmony_ci         v->join = v;
1989bf215546Sopenharmony_ci         reg += v->reg.size;
1990bf215546Sopenharmony_ci      }
1991bf215546Sopenharmony_ci   }
1992bf215546Sopenharmony_ci   splits.clear();
1993bf215546Sopenharmony_ci
1994bf215546Sopenharmony_ci   for (std::list<Instruction *>::iterator it = merges.begin();
1995bf215546Sopenharmony_ci        it != merges.end();
1996bf215546Sopenharmony_ci        ++it) {
1997bf215546Sopenharmony_ci      Instruction *merge = *it;
1998bf215546Sopenharmony_ci      unsigned int reg = regs.idToBytes(merge->getDef(0));
1999bf215546Sopenharmony_ci      for (int s = 0; merge->srcExists(s); ++s) {
2000bf215546Sopenharmony_ci         Value *v = merge->getSrc(s);
2001bf215546Sopenharmony_ci         v->reg.data.id = regs.bytesToId(v, reg);
2002bf215546Sopenharmony_ci         v->join = v;
2003bf215546Sopenharmony_ci         // If the value is defined by a phi/union node, we also need to
2004bf215546Sopenharmony_ci         // perform the same fixup on that node's sources, since after RA
2005bf215546Sopenharmony_ci         // their registers should be identical.
2006bf215546Sopenharmony_ci         if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
2007bf215546Sopenharmony_ci            Instruction *phi = v->getInsn();
2008bf215546Sopenharmony_ci            for (int phis = 0; phi->srcExists(phis); ++phis) {
2009bf215546Sopenharmony_ci               phi->getSrc(phis)->join = v;
2010bf215546Sopenharmony_ci               phi->getSrc(phis)->reg.data.id = v->reg.data.id;
2011bf215546Sopenharmony_ci            }
2012bf215546Sopenharmony_ci         }
2013bf215546Sopenharmony_ci         reg += v->reg.size;
2014bf215546Sopenharmony_ci      }
2015bf215546Sopenharmony_ci   }
2016bf215546Sopenharmony_ci   merges.clear();
2017bf215546Sopenharmony_ci}
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_cibool Program::registerAllocation()
2020bf215546Sopenharmony_ci{
2021bf215546Sopenharmony_ci   RegAlloc ra(this);
2022bf215546Sopenharmony_ci   return ra.exec();
2023bf215546Sopenharmony_ci}
2024bf215546Sopenharmony_ci
2025bf215546Sopenharmony_cibool
2026bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::exec(Function *ir)
2027bf215546Sopenharmony_ci{
2028bf215546Sopenharmony_ci   constrList.clear();
2029bf215546Sopenharmony_ci
2030bf215546Sopenharmony_ci   bool ret = run(ir, true, true);
2031bf215546Sopenharmony_ci   if (ret)
2032bf215546Sopenharmony_ci      ret = insertConstraintMoves();
2033bf215546Sopenharmony_ci   return ret;
2034bf215546Sopenharmony_ci}
2035bf215546Sopenharmony_ci
2036bf215546Sopenharmony_ci// TODO: make part of texture insn
2037bf215546Sopenharmony_civoid
2038bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
2039bf215546Sopenharmony_ci{
2040bf215546Sopenharmony_ci   Value *def[4];
2041bf215546Sopenharmony_ci   int c, k, d;
2042bf215546Sopenharmony_ci   uint8_t mask = 0;
2043bf215546Sopenharmony_ci
2044bf215546Sopenharmony_ci   for (d = 0, k = 0, c = 0; c < 4; ++c) {
2045bf215546Sopenharmony_ci      if (!(tex->tex.mask & (1 << c)))
2046bf215546Sopenharmony_ci         continue;
2047bf215546Sopenharmony_ci      if (tex->getDef(k)->refCount()) {
2048bf215546Sopenharmony_ci         mask |= 1 << c;
2049bf215546Sopenharmony_ci         def[d++] = tex->getDef(k);
2050bf215546Sopenharmony_ci      }
2051bf215546Sopenharmony_ci      ++k;
2052bf215546Sopenharmony_ci   }
2053bf215546Sopenharmony_ci   tex->tex.mask = mask;
2054bf215546Sopenharmony_ci
2055bf215546Sopenharmony_ci   for (c = 0; c < d; ++c)
2056bf215546Sopenharmony_ci      tex->setDef(c, def[c]);
2057bf215546Sopenharmony_ci   for (; c < 4; ++c)
2058bf215546Sopenharmony_ci      tex->setDef(c, NULL);
2059bf215546Sopenharmony_ci}
2060bf215546Sopenharmony_ci
2061bf215546Sopenharmony_cibool
2062bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
2063bf215546Sopenharmony_ci{
2064bf215546Sopenharmony_ci   Value *v = cst->getSrc(s);
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_ci   // current register allocation can't handle it if a value participates in
2067bf215546Sopenharmony_ci   // multiple constraints
2068bf215546Sopenharmony_ci   for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
2069bf215546Sopenharmony_ci      if (cst != (*it)->getInsn())
2070bf215546Sopenharmony_ci         return true;
2071bf215546Sopenharmony_ci   }
2072bf215546Sopenharmony_ci
2073bf215546Sopenharmony_ci   // can start at s + 1 because detectConflict is called on all sources
2074bf215546Sopenharmony_ci   for (int c = s + 1; cst->srcExists(c); ++c)
2075bf215546Sopenharmony_ci      if (v == cst->getSrc(c))
2076bf215546Sopenharmony_ci         return true;
2077bf215546Sopenharmony_ci
2078bf215546Sopenharmony_ci   Instruction *defi = v->getInsn();
2079bf215546Sopenharmony_ci
2080bf215546Sopenharmony_ci   return (!defi || defi->constrainedDefs());
2081bf215546Sopenharmony_ci}
2082bf215546Sopenharmony_ci
2083bf215546Sopenharmony_civoid
2084bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
2085bf215546Sopenharmony_ci{
2086bf215546Sopenharmony_ci   Instruction *cst;
2087bf215546Sopenharmony_ci   int d;
2088bf215546Sopenharmony_ci
2089bf215546Sopenharmony_ci   // first, look for an existing identical constraint op
2090bf215546Sopenharmony_ci   for (std::list<Instruction *>::iterator it = constrList.begin();
2091bf215546Sopenharmony_ci        it != constrList.end();
2092bf215546Sopenharmony_ci        ++it) {
2093bf215546Sopenharmony_ci      cst = (*it);
2094bf215546Sopenharmony_ci      if (!i->bb->dominatedBy(cst->bb))
2095bf215546Sopenharmony_ci         break;
2096bf215546Sopenharmony_ci      for (d = 0; d < n; ++d)
2097bf215546Sopenharmony_ci         if (cst->getSrc(d) != i->getSrc(d + s))
2098bf215546Sopenharmony_ci            break;
2099bf215546Sopenharmony_ci      if (d >= n) {
2100bf215546Sopenharmony_ci         for (d = 0; d < n; ++d, ++s)
2101bf215546Sopenharmony_ci            i->setSrc(s, cst->getDef(d));
2102bf215546Sopenharmony_ci         return;
2103bf215546Sopenharmony_ci      }
2104bf215546Sopenharmony_ci   }
2105bf215546Sopenharmony_ci   cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
2106bf215546Sopenharmony_ci
2107bf215546Sopenharmony_ci   for (d = 0; d < n; ++s, ++d) {
2108bf215546Sopenharmony_ci      cst->setDef(d, new_LValue(func, FILE_GPR));
2109bf215546Sopenharmony_ci      cst->setSrc(d, i->getSrc(s));
2110bf215546Sopenharmony_ci      i->setSrc(s, cst->getDef(d));
2111bf215546Sopenharmony_ci   }
2112bf215546Sopenharmony_ci   i->bb->insertBefore(i, cst);
2113bf215546Sopenharmony_ci
2114bf215546Sopenharmony_ci   constrList.push_back(cst);
2115bf215546Sopenharmony_ci}
2116bf215546Sopenharmony_ci
2117bf215546Sopenharmony_ci// Add a dummy use of the pointer source of >= 8 byte loads after the load
2118bf215546Sopenharmony_ci// to prevent it from being assigned a register which overlapping the load's
2119bf215546Sopenharmony_ci// destination, which would produce random corruptions.
2120bf215546Sopenharmony_civoid
2121bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
2122bf215546Sopenharmony_ci{
2123bf215546Sopenharmony_ci   Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
2124bf215546Sopenharmony_ci   hzd->setSrc(0, src->get());
2125bf215546Sopenharmony_ci   i->bb->insertAfter(i, hzd);
2126bf215546Sopenharmony_ci
2127bf215546Sopenharmony_ci}
2128bf215546Sopenharmony_ci
2129bf215546Sopenharmony_ci// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
2130bf215546Sopenharmony_civoid
2131bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
2132bf215546Sopenharmony_ci{
2133bf215546Sopenharmony_ci   int n;
2134bf215546Sopenharmony_ci   for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n);
2135bf215546Sopenharmony_ci   condenseDefs(insn, 0, n - 1);
2136bf215546Sopenharmony_ci}
2137bf215546Sopenharmony_ci
2138bf215546Sopenharmony_civoid
2139bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn,
2140bf215546Sopenharmony_ci                                              const int a, const int b)
2141bf215546Sopenharmony_ci{
2142bf215546Sopenharmony_ci   uint8_t size = 0;
2143bf215546Sopenharmony_ci   if (a >= b)
2144bf215546Sopenharmony_ci      return;
2145bf215546Sopenharmony_ci   for (int s = a; s <= b; ++s)
2146bf215546Sopenharmony_ci      size += insn->getDef(s)->reg.size;
2147bf215546Sopenharmony_ci   if (!size)
2148bf215546Sopenharmony_ci      return;
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci   LValue *lval = new_LValue(func, FILE_GPR);
2151bf215546Sopenharmony_ci   lval->reg.size = size;
2152bf215546Sopenharmony_ci
2153bf215546Sopenharmony_ci   Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
2154bf215546Sopenharmony_ci   split->setSrc(0, lval);
2155bf215546Sopenharmony_ci   for (int d = a; d <= b; ++d) {
2156bf215546Sopenharmony_ci      split->setDef(d - a, insn->getDef(d));
2157bf215546Sopenharmony_ci      insn->setDef(d, NULL);
2158bf215546Sopenharmony_ci   }
2159bf215546Sopenharmony_ci   insn->setDef(a, lval);
2160bf215546Sopenharmony_ci
2161bf215546Sopenharmony_ci   for (int k = a + 1, d = b + 1; insn->defExists(d); ++d, ++k) {
2162bf215546Sopenharmony_ci      insn->setDef(k, insn->getDef(d));
2163bf215546Sopenharmony_ci      insn->setDef(d, NULL);
2164bf215546Sopenharmony_ci   }
2165bf215546Sopenharmony_ci   // carry over predicate if any (mainly for OP_UNION uses)
2166bf215546Sopenharmony_ci   split->setPredicate(insn->cc, insn->getPredicate());
2167bf215546Sopenharmony_ci
2168bf215546Sopenharmony_ci   insn->bb->insertAfter(insn, split);
2169bf215546Sopenharmony_ci   constrList.push_back(split);
2170bf215546Sopenharmony_ci}
2171bf215546Sopenharmony_ci
2172bf215546Sopenharmony_civoid
2173bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
2174bf215546Sopenharmony_ci                                              const int a, const int b)
2175bf215546Sopenharmony_ci{
2176bf215546Sopenharmony_ci   uint8_t size = 0;
2177bf215546Sopenharmony_ci   if (a >= b)
2178bf215546Sopenharmony_ci      return;
2179bf215546Sopenharmony_ci   for (int s = a; s <= b; ++s)
2180bf215546Sopenharmony_ci      size += insn->getSrc(s)->reg.size;
2181bf215546Sopenharmony_ci   if (!size)
2182bf215546Sopenharmony_ci      return;
2183bf215546Sopenharmony_ci   LValue *lval = new_LValue(func, FILE_GPR);
2184bf215546Sopenharmony_ci   lval->reg.size = size;
2185bf215546Sopenharmony_ci
2186bf215546Sopenharmony_ci   Value *save[3];
2187bf215546Sopenharmony_ci   insn->takeExtraSources(0, save);
2188bf215546Sopenharmony_ci
2189bf215546Sopenharmony_ci   Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
2190bf215546Sopenharmony_ci   merge->setDef(0, lval);
2191bf215546Sopenharmony_ci   for (int s = a, i = 0; s <= b; ++s, ++i) {
2192bf215546Sopenharmony_ci      merge->setSrc(i, insn->getSrc(s));
2193bf215546Sopenharmony_ci   }
2194bf215546Sopenharmony_ci   insn->moveSources(b + 1, a - b);
2195bf215546Sopenharmony_ci   insn->setSrc(a, lval);
2196bf215546Sopenharmony_ci   insn->bb->insertBefore(insn, merge);
2197bf215546Sopenharmony_ci
2198bf215546Sopenharmony_ci   insn->putExtraSources(0, save);
2199bf215546Sopenharmony_ci
2200bf215546Sopenharmony_ci   constrList.push_back(merge);
2201bf215546Sopenharmony_ci}
2202bf215546Sopenharmony_ci
2203bf215546Sopenharmony_cibool
2204bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex)
2205bf215546Sopenharmony_ci{
2206bf215546Sopenharmony_ci   if (tex->tex.sIndirectSrc >= 0 ||
2207bf215546Sopenharmony_ci       tex->tex.rIndirectSrc >= 0 ||
2208bf215546Sopenharmony_ci       tex->tex.derivAll)
2209bf215546Sopenharmony_ci      return false;
2210bf215546Sopenharmony_ci
2211bf215546Sopenharmony_ci   if (tex->tex.mask == 5 || tex->tex.mask == 6)
2212bf215546Sopenharmony_ci      return false;
2213bf215546Sopenharmony_ci
2214bf215546Sopenharmony_ci   switch (tex->op) {
2215bf215546Sopenharmony_ci   case OP_TEX:
2216bf215546Sopenharmony_ci   case OP_TXF:
2217bf215546Sopenharmony_ci   case OP_TXG:
2218bf215546Sopenharmony_ci   case OP_TXL:
2219bf215546Sopenharmony_ci      break;
2220bf215546Sopenharmony_ci   default:
2221bf215546Sopenharmony_ci      return false;
2222bf215546Sopenharmony_ci   }
2223bf215546Sopenharmony_ci
2224bf215546Sopenharmony_ci   // legal variants:
2225bf215546Sopenharmony_ci   // TEXS.1D.LZ
2226bf215546Sopenharmony_ci   // TEXS.2D
2227bf215546Sopenharmony_ci   // TEXS.2D.LZ
2228bf215546Sopenharmony_ci   // TEXS.2D.LL
2229bf215546Sopenharmony_ci   // TEXS.2D.DC
2230bf215546Sopenharmony_ci   // TEXS.2D.LL.DC
2231bf215546Sopenharmony_ci   // TEXS.2D.LZ.DC
2232bf215546Sopenharmony_ci   // TEXS.A2D
2233bf215546Sopenharmony_ci   // TEXS.A2D.LZ
2234bf215546Sopenharmony_ci   // TEXS.A2D.LZ.DC
2235bf215546Sopenharmony_ci   // TEXS.3D
2236bf215546Sopenharmony_ci   // TEXS.3D.LZ
2237bf215546Sopenharmony_ci   // TEXS.CUBE
2238bf215546Sopenharmony_ci   // TEXS.CUBE.LL
2239bf215546Sopenharmony_ci
2240bf215546Sopenharmony_ci   // TLDS.1D.LZ
2241bf215546Sopenharmony_ci   // TLDS.1D.LL
2242bf215546Sopenharmony_ci   // TLDS.2D.LZ
2243bf215546Sopenharmony_ci   // TLSD.2D.LZ.AOFFI
2244bf215546Sopenharmony_ci   // TLDS.2D.LZ.MZ
2245bf215546Sopenharmony_ci   // TLDS.2D.LL
2246bf215546Sopenharmony_ci   // TLDS.2D.LL.AOFFI
2247bf215546Sopenharmony_ci   // TLDS.A2D.LZ
2248bf215546Sopenharmony_ci   // TLDS.3D.LZ
2249bf215546Sopenharmony_ci
2250bf215546Sopenharmony_ci   // TLD4S: all 2D/RECT variants and only offset
2251bf215546Sopenharmony_ci
2252bf215546Sopenharmony_ci   switch (tex->op) {
2253bf215546Sopenharmony_ci   case OP_TEX:
2254bf215546Sopenharmony_ci      if (tex->tex.useOffsets)
2255bf215546Sopenharmony_ci         return false;
2256bf215546Sopenharmony_ci
2257bf215546Sopenharmony_ci      switch (tex->tex.target.getEnum()) {
2258bf215546Sopenharmony_ci      case TEX_TARGET_1D:
2259bf215546Sopenharmony_ci      case TEX_TARGET_2D_ARRAY_SHADOW:
2260bf215546Sopenharmony_ci         return tex->tex.levelZero;
2261bf215546Sopenharmony_ci      case TEX_TARGET_CUBE:
2262bf215546Sopenharmony_ci         return !tex->tex.levelZero;
2263bf215546Sopenharmony_ci      case TEX_TARGET_2D:
2264bf215546Sopenharmony_ci      case TEX_TARGET_2D_ARRAY:
2265bf215546Sopenharmony_ci      case TEX_TARGET_2D_SHADOW:
2266bf215546Sopenharmony_ci      case TEX_TARGET_3D:
2267bf215546Sopenharmony_ci      case TEX_TARGET_RECT:
2268bf215546Sopenharmony_ci      case TEX_TARGET_RECT_SHADOW:
2269bf215546Sopenharmony_ci         return true;
2270bf215546Sopenharmony_ci      default:
2271bf215546Sopenharmony_ci         return false;
2272bf215546Sopenharmony_ci      }
2273bf215546Sopenharmony_ci
2274bf215546Sopenharmony_ci   case OP_TXL:
2275bf215546Sopenharmony_ci      if (tex->tex.useOffsets)
2276bf215546Sopenharmony_ci         return false;
2277bf215546Sopenharmony_ci
2278bf215546Sopenharmony_ci      switch (tex->tex.target.getEnum()) {
2279bf215546Sopenharmony_ci      case TEX_TARGET_2D:
2280bf215546Sopenharmony_ci      case TEX_TARGET_2D_SHADOW:
2281bf215546Sopenharmony_ci      case TEX_TARGET_RECT:
2282bf215546Sopenharmony_ci      case TEX_TARGET_RECT_SHADOW:
2283bf215546Sopenharmony_ci      case TEX_TARGET_CUBE:
2284bf215546Sopenharmony_ci         return true;
2285bf215546Sopenharmony_ci      default:
2286bf215546Sopenharmony_ci         return false;
2287bf215546Sopenharmony_ci      }
2288bf215546Sopenharmony_ci
2289bf215546Sopenharmony_ci   case OP_TXF:
2290bf215546Sopenharmony_ci      switch (tex->tex.target.getEnum()) {
2291bf215546Sopenharmony_ci      case TEX_TARGET_1D:
2292bf215546Sopenharmony_ci         return !tex->tex.useOffsets;
2293bf215546Sopenharmony_ci      case TEX_TARGET_2D:
2294bf215546Sopenharmony_ci      case TEX_TARGET_RECT:
2295bf215546Sopenharmony_ci         return true;
2296bf215546Sopenharmony_ci      case TEX_TARGET_2D_ARRAY:
2297bf215546Sopenharmony_ci      case TEX_TARGET_2D_MS:
2298bf215546Sopenharmony_ci      case TEX_TARGET_3D:
2299bf215546Sopenharmony_ci         return !tex->tex.useOffsets && tex->tex.levelZero;
2300bf215546Sopenharmony_ci      default:
2301bf215546Sopenharmony_ci         return false;
2302bf215546Sopenharmony_ci      }
2303bf215546Sopenharmony_ci
2304bf215546Sopenharmony_ci   case OP_TXG:
2305bf215546Sopenharmony_ci      if (tex->tex.useOffsets > 1)
2306bf215546Sopenharmony_ci         return false;
2307bf215546Sopenharmony_ci      if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf)
2308bf215546Sopenharmony_ci         return false;
2309bf215546Sopenharmony_ci
2310bf215546Sopenharmony_ci      switch (tex->tex.target.getEnum()) {
2311bf215546Sopenharmony_ci      case TEX_TARGET_2D:
2312bf215546Sopenharmony_ci      case TEX_TARGET_2D_MS:
2313bf215546Sopenharmony_ci      case TEX_TARGET_2D_SHADOW:
2314bf215546Sopenharmony_ci      case TEX_TARGET_RECT:
2315bf215546Sopenharmony_ci      case TEX_TARGET_RECT_SHADOW:
2316bf215546Sopenharmony_ci         return true;
2317bf215546Sopenharmony_ci      default:
2318bf215546Sopenharmony_ci         return false;
2319bf215546Sopenharmony_ci      }
2320bf215546Sopenharmony_ci
2321bf215546Sopenharmony_ci   default:
2322bf215546Sopenharmony_ci      return false;
2323bf215546Sopenharmony_ci   }
2324bf215546Sopenharmony_ci}
2325bf215546Sopenharmony_ci
2326bf215546Sopenharmony_civoid
2327bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex)
2328bf215546Sopenharmony_ci{
2329bf215546Sopenharmony_ci   int defCount = tex->defCount(0xff);
2330bf215546Sopenharmony_ci   int srcCount = tex->srcCount(0xff);
2331bf215546Sopenharmony_ci
2332bf215546Sopenharmony_ci   tex->tex.scalar = true;
2333bf215546Sopenharmony_ci
2334bf215546Sopenharmony_ci   // 1. handle defs
2335bf215546Sopenharmony_ci   if (defCount > 3)
2336bf215546Sopenharmony_ci      condenseDefs(tex, 2, 3);
2337bf215546Sopenharmony_ci   if (defCount > 1)
2338bf215546Sopenharmony_ci      condenseDefs(tex, 0, 1);
2339bf215546Sopenharmony_ci
2340bf215546Sopenharmony_ci   // 2. handle srcs
2341bf215546Sopenharmony_ci   // special case for TXF.A2D
2342bf215546Sopenharmony_ci   if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) {
2343bf215546Sopenharmony_ci      assert(srcCount >= 3);
2344bf215546Sopenharmony_ci      condenseSrcs(tex, 1, 2);
2345bf215546Sopenharmony_ci   } else {
2346bf215546Sopenharmony_ci      if (srcCount > 3)
2347bf215546Sopenharmony_ci         condenseSrcs(tex, 2, 3);
2348bf215546Sopenharmony_ci      // only if we have more than 2 sources
2349bf215546Sopenharmony_ci      if (srcCount > 2)
2350bf215546Sopenharmony_ci         condenseSrcs(tex, 0, 1);
2351bf215546Sopenharmony_ci   }
2352bf215546Sopenharmony_ci
2353bf215546Sopenharmony_ci   assert(!tex->defExists(2) && !tex->srcExists(2));
2354bf215546Sopenharmony_ci}
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_civoid
2357bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
2358bf215546Sopenharmony_ci{
2359bf215546Sopenharmony_ci   int n, s;
2360bf215546Sopenharmony_ci
2361bf215546Sopenharmony_ci   if (isTextureOp(tex->op))
2362bf215546Sopenharmony_ci      textureMask(tex);
2363bf215546Sopenharmony_ci
2364bf215546Sopenharmony_ci   if (targ->getChipset() < NVISA_GV100_CHIPSET) {
2365bf215546Sopenharmony_ci      if (isScalarTexGM107(tex)) {
2366bf215546Sopenharmony_ci         handleScalarTexGM107(tex);
2367bf215546Sopenharmony_ci         return;
2368bf215546Sopenharmony_ci      }
2369bf215546Sopenharmony_ci
2370bf215546Sopenharmony_ci      assert(!tex->tex.scalar);
2371bf215546Sopenharmony_ci      condenseDefs(tex);
2372bf215546Sopenharmony_ci   } else {
2373bf215546Sopenharmony_ci      if (isTextureOp(tex->op)) {
2374bf215546Sopenharmony_ci         int defCount = tex->defCount(0xff);
2375bf215546Sopenharmony_ci         if (defCount > 3)
2376bf215546Sopenharmony_ci            condenseDefs(tex, 2, 3);
2377bf215546Sopenharmony_ci         if (defCount > 1)
2378bf215546Sopenharmony_ci            condenseDefs(tex, 0, 1);
2379bf215546Sopenharmony_ci      } else {
2380bf215546Sopenharmony_ci         condenseDefs(tex);
2381bf215546Sopenharmony_ci      }
2382bf215546Sopenharmony_ci   }
2383bf215546Sopenharmony_ci
2384bf215546Sopenharmony_ci   if (isSurfaceOp(tex->op)) {
2385bf215546Sopenharmony_ci      int s = tex->tex.target.getDim() +
2386bf215546Sopenharmony_ci         (tex->tex.target.isArray() || tex->tex.target.isCube());
2387bf215546Sopenharmony_ci      int n = 0;
2388bf215546Sopenharmony_ci
2389bf215546Sopenharmony_ci      switch (tex->op) {
2390bf215546Sopenharmony_ci      case OP_SUSTB:
2391bf215546Sopenharmony_ci      case OP_SUSTP:
2392bf215546Sopenharmony_ci         n = 4;
2393bf215546Sopenharmony_ci         break;
2394bf215546Sopenharmony_ci      case OP_SUREDB:
2395bf215546Sopenharmony_ci      case OP_SUREDP:
2396bf215546Sopenharmony_ci         if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS)
2397bf215546Sopenharmony_ci            n = 2;
2398bf215546Sopenharmony_ci         break;
2399bf215546Sopenharmony_ci      default:
2400bf215546Sopenharmony_ci         break;
2401bf215546Sopenharmony_ci      }
2402bf215546Sopenharmony_ci
2403bf215546Sopenharmony_ci      if (s > 1)
2404bf215546Sopenharmony_ci         condenseSrcs(tex, 0, s - 1);
2405bf215546Sopenharmony_ci      if (n > 1)
2406bf215546Sopenharmony_ci         condenseSrcs(tex, 1, n); // do not condense the tex handle
2407bf215546Sopenharmony_ci   } else
2408bf215546Sopenharmony_ci   if (isTextureOp(tex->op)) {
2409bf215546Sopenharmony_ci      if (tex->op != OP_TXQ) {
2410bf215546Sopenharmony_ci         s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
2411bf215546Sopenharmony_ci         if (tex->op == OP_TXD) {
2412bf215546Sopenharmony_ci            // Indirect handle belongs in the first arg
2413bf215546Sopenharmony_ci            if (tex->tex.rIndirectSrc >= 0)
2414bf215546Sopenharmony_ci               s++;
2415bf215546Sopenharmony_ci            if (!tex->tex.target.isArray() && tex->tex.useOffsets)
2416bf215546Sopenharmony_ci               s++;
2417bf215546Sopenharmony_ci         }
2418bf215546Sopenharmony_ci         n = tex->srcCount(0xff, true) - s;
2419bf215546Sopenharmony_ci         // TODO: Is this necessary? Perhaps just has to be aligned to the
2420bf215546Sopenharmony_ci         // level that the first arg is, not necessarily to 4. This
2421bf215546Sopenharmony_ci         // requirement has not been rigorously verified, as it has been on
2422bf215546Sopenharmony_ci         // Kepler.
2423bf215546Sopenharmony_ci         if (n > 0 && n < 3) {
2424bf215546Sopenharmony_ci            if (tex->srcExists(n + s)) // move potential predicate out of the way
2425bf215546Sopenharmony_ci               tex->moveSources(n + s, 3 - n);
2426bf215546Sopenharmony_ci            while (n < 3)
2427bf215546Sopenharmony_ci               tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
2428bf215546Sopenharmony_ci         }
2429bf215546Sopenharmony_ci      } else {
2430bf215546Sopenharmony_ci         s = tex->srcCount(0xff, true);
2431bf215546Sopenharmony_ci         n = 0;
2432bf215546Sopenharmony_ci      }
2433bf215546Sopenharmony_ci
2434bf215546Sopenharmony_ci      if (s > 1)
2435bf215546Sopenharmony_ci         condenseSrcs(tex, 0, s - 1);
2436bf215546Sopenharmony_ci      if (n > 1) // NOTE: first call modified positions already
2437bf215546Sopenharmony_ci         condenseSrcs(tex, 1, n);
2438bf215546Sopenharmony_ci   }
2439bf215546Sopenharmony_ci}
2440bf215546Sopenharmony_ci
2441bf215546Sopenharmony_civoid
2442bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
2443bf215546Sopenharmony_ci{
2444bf215546Sopenharmony_ci   if (isTextureOp(tex->op))
2445bf215546Sopenharmony_ci      textureMask(tex);
2446bf215546Sopenharmony_ci   condenseDefs(tex);
2447bf215546Sopenharmony_ci
2448bf215546Sopenharmony_ci   if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
2449bf215546Sopenharmony_ci      condenseSrcs(tex, 3, 6);
2450bf215546Sopenharmony_ci   } else
2451bf215546Sopenharmony_ci   if (isTextureOp(tex->op)) {
2452bf215546Sopenharmony_ci      int n = tex->srcCount(0xff, true);
2453bf215546Sopenharmony_ci      int s = n > 4 ? 4 : n;
2454bf215546Sopenharmony_ci      if (n > 4 && n < 7) {
2455bf215546Sopenharmony_ci         if (tex->srcExists(n)) // move potential predicate out of the way
2456bf215546Sopenharmony_ci            tex->moveSources(n, 7 - n);
2457bf215546Sopenharmony_ci
2458bf215546Sopenharmony_ci         while (n < 7)
2459bf215546Sopenharmony_ci            tex->setSrc(n++, new_LValue(func, FILE_GPR));
2460bf215546Sopenharmony_ci      }
2461bf215546Sopenharmony_ci      if (s > 1)
2462bf215546Sopenharmony_ci         condenseSrcs(tex, 0, s - 1);
2463bf215546Sopenharmony_ci      if (n > 4)
2464bf215546Sopenharmony_ci         condenseSrcs(tex, 1, n - s);
2465bf215546Sopenharmony_ci   }
2466bf215546Sopenharmony_ci}
2467bf215546Sopenharmony_ci
2468bf215546Sopenharmony_civoid
2469bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
2470bf215546Sopenharmony_ci{
2471bf215546Sopenharmony_ci   int n, s;
2472bf215546Sopenharmony_ci
2473bf215546Sopenharmony_ci   if (isTextureOp(tex->op))
2474bf215546Sopenharmony_ci      textureMask(tex);
2475bf215546Sopenharmony_ci
2476bf215546Sopenharmony_ci   if (tex->op == OP_TXQ) {
2477bf215546Sopenharmony_ci      s = tex->srcCount(0xff);
2478bf215546Sopenharmony_ci      n = 0;
2479bf215546Sopenharmony_ci   } else if (isSurfaceOp(tex->op)) {
2480bf215546Sopenharmony_ci      s = tex->tex.target.getDim() + (tex->tex.target.isArray() || tex->tex.target.isCube());
2481bf215546Sopenharmony_ci      if (tex->op == OP_SUSTB || tex->op == OP_SUSTP)
2482bf215546Sopenharmony_ci         n = 4;
2483bf215546Sopenharmony_ci      else
2484bf215546Sopenharmony_ci         n = 0;
2485bf215546Sopenharmony_ci   } else {
2486bf215546Sopenharmony_ci      s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
2487bf215546Sopenharmony_ci      if (!tex->tex.target.isArray() &&
2488bf215546Sopenharmony_ci          (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
2489bf215546Sopenharmony_ci         ++s;
2490bf215546Sopenharmony_ci      if (tex->op == OP_TXD && tex->tex.useOffsets)
2491bf215546Sopenharmony_ci         ++s;
2492bf215546Sopenharmony_ci      n = tex->srcCount(0xff) - s;
2493bf215546Sopenharmony_ci      assert(n <= 4);
2494bf215546Sopenharmony_ci   }
2495bf215546Sopenharmony_ci
2496bf215546Sopenharmony_ci   if (s > 1)
2497bf215546Sopenharmony_ci      condenseSrcs(tex, 0, s - 1);
2498bf215546Sopenharmony_ci   if (n > 1) // NOTE: first call modified positions already
2499bf215546Sopenharmony_ci      condenseSrcs(tex, 1, n);
2500bf215546Sopenharmony_ci
2501bf215546Sopenharmony_ci   condenseDefs(tex);
2502bf215546Sopenharmony_ci}
2503bf215546Sopenharmony_ci
2504bf215546Sopenharmony_civoid
2505bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
2506bf215546Sopenharmony_ci{
2507bf215546Sopenharmony_ci   Value *pred = tex->getPredicate();
2508bf215546Sopenharmony_ci   if (pred)
2509bf215546Sopenharmony_ci      tex->setPredicate(tex->cc, NULL);
2510bf215546Sopenharmony_ci
2511bf215546Sopenharmony_ci   textureMask(tex);
2512bf215546Sopenharmony_ci
2513bf215546Sopenharmony_ci   assert(tex->defExists(0) && tex->srcExists(0));
2514bf215546Sopenharmony_ci   // make src and def count match
2515bf215546Sopenharmony_ci   int c;
2516bf215546Sopenharmony_ci   for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
2517bf215546Sopenharmony_ci      if (!tex->srcExists(c))
2518bf215546Sopenharmony_ci         tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
2519bf215546Sopenharmony_ci      else
2520bf215546Sopenharmony_ci         insertConstraintMove(tex, c);
2521bf215546Sopenharmony_ci      if (!tex->defExists(c))
2522bf215546Sopenharmony_ci         tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
2523bf215546Sopenharmony_ci   }
2524bf215546Sopenharmony_ci   if (pred)
2525bf215546Sopenharmony_ci      tex->setPredicate(tex->cc, pred);
2526bf215546Sopenharmony_ci   condenseDefs(tex);
2527bf215546Sopenharmony_ci   condenseSrcs(tex, 0, c - 1);
2528bf215546Sopenharmony_ci}
2529bf215546Sopenharmony_ci
2530bf215546Sopenharmony_ci// Insert constraint markers for instructions whose multiple sources must be
2531bf215546Sopenharmony_ci// located in consecutive registers.
2532bf215546Sopenharmony_cibool
2533bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
2534bf215546Sopenharmony_ci{
2535bf215546Sopenharmony_ci   TexInstruction *tex;
2536bf215546Sopenharmony_ci   Instruction *next;
2537bf215546Sopenharmony_ci   int s, size;
2538bf215546Sopenharmony_ci
2539bf215546Sopenharmony_ci   targ = bb->getProgram()->getTarget();
2540bf215546Sopenharmony_ci
2541bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = next) {
2542bf215546Sopenharmony_ci      next = i->next;
2543bf215546Sopenharmony_ci
2544bf215546Sopenharmony_ci      if ((tex = i->asTex())) {
2545bf215546Sopenharmony_ci         switch (targ->getChipset() & ~0xf) {
2546bf215546Sopenharmony_ci         case 0x50:
2547bf215546Sopenharmony_ci         case 0x80:
2548bf215546Sopenharmony_ci         case 0x90:
2549bf215546Sopenharmony_ci         case 0xa0:
2550bf215546Sopenharmony_ci            texConstraintNV50(tex);
2551bf215546Sopenharmony_ci            break;
2552bf215546Sopenharmony_ci         case 0xc0:
2553bf215546Sopenharmony_ci         case 0xd0:
2554bf215546Sopenharmony_ci            texConstraintNVC0(tex);
2555bf215546Sopenharmony_ci            break;
2556bf215546Sopenharmony_ci         case 0xe0:
2557bf215546Sopenharmony_ci         case 0xf0:
2558bf215546Sopenharmony_ci         case 0x100:
2559bf215546Sopenharmony_ci            texConstraintNVE0(tex);
2560bf215546Sopenharmony_ci            break;
2561bf215546Sopenharmony_ci         case 0x110:
2562bf215546Sopenharmony_ci         case 0x120:
2563bf215546Sopenharmony_ci         case 0x130:
2564bf215546Sopenharmony_ci         case 0x140:
2565bf215546Sopenharmony_ci         case 0x160:
2566bf215546Sopenharmony_ci         case 0x170:
2567bf215546Sopenharmony_ci            texConstraintGM107(tex);
2568bf215546Sopenharmony_ci            break;
2569bf215546Sopenharmony_ci         default:
2570bf215546Sopenharmony_ci            break;
2571bf215546Sopenharmony_ci         }
2572bf215546Sopenharmony_ci      } else
2573bf215546Sopenharmony_ci      if (i->op == OP_EXPORT || i->op == OP_STORE) {
2574bf215546Sopenharmony_ci         for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
2575bf215546Sopenharmony_ci            assert(i->srcExists(s));
2576bf215546Sopenharmony_ci            size -= i->getSrc(s)->reg.size;
2577bf215546Sopenharmony_ci         }
2578bf215546Sopenharmony_ci         condenseSrcs(i, 1, s - 1);
2579bf215546Sopenharmony_ci      } else
2580bf215546Sopenharmony_ci      if (i->op == OP_LOAD || i->op == OP_VFETCH) {
2581bf215546Sopenharmony_ci         condenseDefs(i);
2582bf215546Sopenharmony_ci         if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
2583bf215546Sopenharmony_ci            addHazard(i, i->src(0).getIndirect(0));
2584bf215546Sopenharmony_ci         if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8)
2585bf215546Sopenharmony_ci            addHazard(i, i->src(0).getIndirect(1));
2586bf215546Sopenharmony_ci         if (i->op == OP_LOAD && i->fixed && targ->getChipset() < 0xc0) {
2587bf215546Sopenharmony_ci            // Add a hazard to make sure we keep the op around. These are used
2588bf215546Sopenharmony_ci            // for membars.
2589bf215546Sopenharmony_ci            Instruction *nop = new_Instruction(func, OP_NOP, i->dType);
2590bf215546Sopenharmony_ci            nop->setSrc(0, i->getDef(0));
2591bf215546Sopenharmony_ci            i->bb->insertAfter(i, nop);
2592bf215546Sopenharmony_ci         }
2593bf215546Sopenharmony_ci      } else
2594bf215546Sopenharmony_ci      if (i->op == OP_UNION ||
2595bf215546Sopenharmony_ci          i->op == OP_MERGE ||
2596bf215546Sopenharmony_ci          i->op == OP_SPLIT) {
2597bf215546Sopenharmony_ci         constrList.push_back(i);
2598bf215546Sopenharmony_ci      } else
2599bf215546Sopenharmony_ci      if (i->op == OP_ATOM && i->subOp == NV50_IR_SUBOP_ATOM_CAS &&
2600bf215546Sopenharmony_ci          targ->getChipset() < 0xc0) {
2601bf215546Sopenharmony_ci         // Like a hazard, but for a def.
2602bf215546Sopenharmony_ci         Instruction *nop = new_Instruction(func, OP_NOP, i->dType);
2603bf215546Sopenharmony_ci         nop->setSrc(0, i->getDef(0));
2604bf215546Sopenharmony_ci         i->bb->insertAfter(i, nop);
2605bf215546Sopenharmony_ci      }
2606bf215546Sopenharmony_ci   }
2607bf215546Sopenharmony_ci   return true;
2608bf215546Sopenharmony_ci}
2609bf215546Sopenharmony_ci
2610bf215546Sopenharmony_civoid
2611bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
2612bf215546Sopenharmony_ci{
2613bf215546Sopenharmony_ci   const uint8_t size = cst->src(s).getSize();
2614bf215546Sopenharmony_ci
2615bf215546Sopenharmony_ci   assert(cst->getSrc(s)->defs.size() == 1); // still SSA
2616bf215546Sopenharmony_ci
2617bf215546Sopenharmony_ci   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
2618bf215546Sopenharmony_ci
2619bf215546Sopenharmony_ci   bool imm = defi->op == OP_MOV &&
2620bf215546Sopenharmony_ci      defi->src(0).getFile() == FILE_IMMEDIATE;
2621bf215546Sopenharmony_ci   bool load = defi->op == OP_LOAD &&
2622bf215546Sopenharmony_ci      defi->src(0).getFile() == FILE_MEMORY_CONST &&
2623bf215546Sopenharmony_ci      !defi->src(0).isIndirect(0);
2624bf215546Sopenharmony_ci   // catch some cases where don't really need MOVs
2625bf215546Sopenharmony_ci   if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()
2626bf215546Sopenharmony_ci       && defi->op != OP_MERGE && defi->op != OP_SPLIT) {
2627bf215546Sopenharmony_ci      if (imm || load) {
2628bf215546Sopenharmony_ci         // Move the defi right before the cst. No point in expanding
2629bf215546Sopenharmony_ci         // the range.
2630bf215546Sopenharmony_ci         defi->bb->remove(defi);
2631bf215546Sopenharmony_ci         cst->bb->insertBefore(cst, defi);
2632bf215546Sopenharmony_ci      }
2633bf215546Sopenharmony_ci      return;
2634bf215546Sopenharmony_ci   }
2635bf215546Sopenharmony_ci
2636bf215546Sopenharmony_ci   LValue *lval = new_LValue(func, cst->src(s).getFile());
2637bf215546Sopenharmony_ci   lval->reg.size = size;
2638bf215546Sopenharmony_ci
2639bf215546Sopenharmony_ci   Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size));
2640bf215546Sopenharmony_ci   mov->setDef(0, lval);
2641bf215546Sopenharmony_ci   mov->setSrc(0, cst->getSrc(s));
2642bf215546Sopenharmony_ci
2643bf215546Sopenharmony_ci   if (load) {
2644bf215546Sopenharmony_ci      mov->op = OP_LOAD;
2645bf215546Sopenharmony_ci      mov->setSrc(0, defi->getSrc(0));
2646bf215546Sopenharmony_ci   } else if (imm) {
2647bf215546Sopenharmony_ci      mov->setSrc(0, defi->getSrc(0));
2648bf215546Sopenharmony_ci   }
2649bf215546Sopenharmony_ci
2650bf215546Sopenharmony_ci   if (defi->getPredicate())
2651bf215546Sopenharmony_ci      mov->setPredicate(defi->cc, defi->getPredicate());
2652bf215546Sopenharmony_ci
2653bf215546Sopenharmony_ci   cst->setSrc(s, mov->getDef(0));
2654bf215546Sopenharmony_ci   cst->bb->insertBefore(cst, mov);
2655bf215546Sopenharmony_ci
2656bf215546Sopenharmony_ci   cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
2657bf215546Sopenharmony_ci}
2658bf215546Sopenharmony_ci
2659bf215546Sopenharmony_ci// Insert extra moves so that, if multiple register constraints on a value are
2660bf215546Sopenharmony_ci// in conflict, these conflicts can be resolved.
2661bf215546Sopenharmony_cibool
2662bf215546Sopenharmony_ciRegAlloc::InsertConstraintsPass::insertConstraintMoves()
2663bf215546Sopenharmony_ci{
2664bf215546Sopenharmony_ci   for (std::list<Instruction *>::iterator it = constrList.begin();
2665bf215546Sopenharmony_ci        it != constrList.end();
2666bf215546Sopenharmony_ci        ++it) {
2667bf215546Sopenharmony_ci      Instruction *cst = *it;
2668bf215546Sopenharmony_ci      Instruction *mov;
2669bf215546Sopenharmony_ci
2670bf215546Sopenharmony_ci      if (cst->op == OP_SPLIT && false) {
2671bf215546Sopenharmony_ci         // spilling splits is annoying, just make sure they're separate
2672bf215546Sopenharmony_ci         for (int d = 0; cst->defExists(d); ++d) {
2673bf215546Sopenharmony_ci            if (!cst->getDef(d)->refCount())
2674bf215546Sopenharmony_ci               continue;
2675bf215546Sopenharmony_ci            LValue *lval = new_LValue(func, cst->def(d).getFile());
2676bf215546Sopenharmony_ci            const uint8_t size = cst->def(d).getSize();
2677bf215546Sopenharmony_ci            lval->reg.size = size;
2678bf215546Sopenharmony_ci
2679bf215546Sopenharmony_ci            mov = new_Instruction(func, OP_MOV, typeOfSize(size));
2680bf215546Sopenharmony_ci            mov->setSrc(0, lval);
2681bf215546Sopenharmony_ci            mov->setDef(0, cst->getDef(d));
2682bf215546Sopenharmony_ci            cst->setDef(d, mov->getSrc(0));
2683bf215546Sopenharmony_ci            cst->bb->insertAfter(cst, mov);
2684bf215546Sopenharmony_ci
2685bf215546Sopenharmony_ci            cst->getSrc(0)->asLValue()->noSpill = 1;
2686bf215546Sopenharmony_ci            mov->getSrc(0)->asLValue()->noSpill = 1;
2687bf215546Sopenharmony_ci         }
2688bf215546Sopenharmony_ci      } else
2689bf215546Sopenharmony_ci      if (cst->op == OP_MERGE || cst->op == OP_UNION) {
2690bf215546Sopenharmony_ci         for (int s = 0; cst->srcExists(s); ++s) {
2691bf215546Sopenharmony_ci            const uint8_t size = cst->src(s).getSize();
2692bf215546Sopenharmony_ci
2693bf215546Sopenharmony_ci            if (!cst->getSrc(s)->defs.size()) {
2694bf215546Sopenharmony_ci               mov = new_Instruction(func, OP_NOP, typeOfSize(size));
2695bf215546Sopenharmony_ci               mov->setDef(0, cst->getSrc(s));
2696bf215546Sopenharmony_ci               cst->bb->insertBefore(cst, mov);
2697bf215546Sopenharmony_ci               continue;
2698bf215546Sopenharmony_ci            }
2699bf215546Sopenharmony_ci
2700bf215546Sopenharmony_ci            insertConstraintMove(cst, s);
2701bf215546Sopenharmony_ci         }
2702bf215546Sopenharmony_ci      }
2703bf215546Sopenharmony_ci   }
2704bf215546Sopenharmony_ci
2705bf215546Sopenharmony_ci   return true;
2706bf215546Sopenharmony_ci}
2707bf215546Sopenharmony_ci
2708bf215546Sopenharmony_ci} // namespace nv50_ir
2709