1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci#include "nv50_ir.h"
24bf215546Sopenharmony_ci#include "nv50_ir_target.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_cinamespace nv50_ir {
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ciconst uint8_t Target::operationSrcNr[] =
29bf215546Sopenharmony_ci{
30bf215546Sopenharmony_ci   0, 0,                   // NOP, PHI
31bf215546Sopenharmony_ci   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32bf215546Sopenharmony_ci   1, 1, 2,                // MOV, LOAD, STORE
33bf215546Sopenharmony_ci   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34bf215546Sopenharmony_ci   3, 3,                   // SHLADD, XMAD
35bf215546Sopenharmony_ci   1, 1, 1,                // ABS, NEG, NOT
36bf215546Sopenharmony_ci   2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
37bf215546Sopenharmony_ci   2, 2, 1,                // MAX, MIN, SAT
38bf215546Sopenharmony_ci   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
39bf215546Sopenharmony_ci   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
40bf215546Sopenharmony_ci   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
41bf215546Sopenharmony_ci   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42bf215546Sopenharmony_ci   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
43bf215546Sopenharmony_ci   0, 0, 0,                // PRERET,CONT,BREAK
44bf215546Sopenharmony_ci   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45bf215546Sopenharmony_ci   1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46bf215546Sopenharmony_ci   1, 1, 1,                // EMIT, RESTART, FINAL
47bf215546Sopenharmony_ci   1, 1, 1,                // TEX, TXB, TXL,
48bf215546Sopenharmony_ci   1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49bf215546Sopenharmony_ci   1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50bf215546Sopenharmony_ci   3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51bf215546Sopenharmony_ci   0,                      // TEXBAR
52bf215546Sopenharmony_ci   1, 1,                   // DFDX, DFDY
53bf215546Sopenharmony_ci   1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54bf215546Sopenharmony_ci   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
55bf215546Sopenharmony_ci   2,                      // SGXT
56bf215546Sopenharmony_ci   3, 2,                   // ATOM, BAR
57bf215546Sopenharmony_ci   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
58bf215546Sopenharmony_ci   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
59bf215546Sopenharmony_ci   3,                      // SHFL
60bf215546Sopenharmony_ci   1,                      // VOTE
61bf215546Sopenharmony_ci   1,                      // BUFQ
62bf215546Sopenharmony_ci   1,                      // WARPSYNC
63bf215546Sopenharmony_ci   0
64bf215546Sopenharmony_ci};
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ciconst OpClass Target::operationClass[] =
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69bf215546Sopenharmony_ci   OPCLASS_OTHER,
70bf215546Sopenharmony_ci   OPCLASS_PSEUDO,
71bf215546Sopenharmony_ci   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
72bf215546Sopenharmony_ci   // MOV; LOAD; STORE
73bf215546Sopenharmony_ci   OPCLASS_MOVE,
74bf215546Sopenharmony_ci   OPCLASS_LOAD,
75bf215546Sopenharmony_ci   OPCLASS_STORE,
76bf215546Sopenharmony_ci   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
77bf215546Sopenharmony_ci   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78bf215546Sopenharmony_ci   OPCLASS_ARITH, OPCLASS_ARITH,
79bf215546Sopenharmony_ci   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
80bf215546Sopenharmony_ci   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
81bf215546Sopenharmony_ci   OPCLASS_CONVERT, OPCLASS_CONVERT,
82bf215546Sopenharmony_ci   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
83bf215546Sopenharmony_ci   OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
84bf215546Sopenharmony_ci   // MAX, MIN
85bf215546Sopenharmony_ci   OPCLASS_COMPARE, OPCLASS_COMPARE,
86bf215546Sopenharmony_ci   // SAT, CEIL, FLOOR, TRUNC; CVT
87bf215546Sopenharmony_ci   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
88bf215546Sopenharmony_ci   OPCLASS_CONVERT,
89bf215546Sopenharmony_ci   // SET(AND,OR,XOR); SELP, SLCT
90bf215546Sopenharmony_ci   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
91bf215546Sopenharmony_ci   OPCLASS_COMPARE, OPCLASS_COMPARE,
92bf215546Sopenharmony_ci   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
93bf215546Sopenharmony_ci   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
94bf215546Sopenharmony_ci   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
95bf215546Sopenharmony_ci   OPCLASS_SFU, OPCLASS_SFU,
96bf215546Sopenharmony_ci   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
97bf215546Sopenharmony_ci   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98bf215546Sopenharmony_ci   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
99bf215546Sopenharmony_ci   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
100bf215546Sopenharmony_ci   // DISCARD, EXIT
101bf215546Sopenharmony_ci   OPCLASS_FLOW, OPCLASS_FLOW,
102bf215546Sopenharmony_ci   // MEMBAR
103bf215546Sopenharmony_ci   OPCLASS_CONTROL,
104bf215546Sopenharmony_ci   // VFETCH, PFETCH, AFETCH, EXPORT
105bf215546Sopenharmony_ci   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
106bf215546Sopenharmony_ci   // LINTERP, PINTERP
107bf215546Sopenharmony_ci   OPCLASS_SFU, OPCLASS_SFU,
108bf215546Sopenharmony_ci   // EMIT, RESTART, FINAL
109bf215546Sopenharmony_ci   OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
110bf215546Sopenharmony_ci   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
111bf215546Sopenharmony_ci   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112bf215546Sopenharmony_ci   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
113bf215546Sopenharmony_ci   OPCLASS_TEXTURE, OPCLASS_TEXTURE,
114bf215546Sopenharmony_ci   // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
115bf215546Sopenharmony_ci   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
116bf215546Sopenharmony_ci   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
117bf215546Sopenharmony_ci   // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
118bf215546Sopenharmony_ci   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
119bf215546Sopenharmony_ci   // TEXBAR
120bf215546Sopenharmony_ci   OPCLASS_OTHER,
121bf215546Sopenharmony_ci   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
122bf215546Sopenharmony_ci   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
123bf215546Sopenharmony_ci   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
124bf215546Sopenharmony_ci   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
125bf215546Sopenharmony_ci   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
126bf215546Sopenharmony_ci   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
127bf215546Sopenharmony_ci   // ATOM, BAR
128bf215546Sopenharmony_ci   OPCLASS_ATOMIC, OPCLASS_CONTROL,
129bf215546Sopenharmony_ci   // VADD, VAVG, VMIN, VMAX
130bf215546Sopenharmony_ci   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131bf215546Sopenharmony_ci   // VSAD, VSET, VSHR, VSHL
132bf215546Sopenharmony_ci   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
133bf215546Sopenharmony_ci   // VSEL, CCTL
134bf215546Sopenharmony_ci   OPCLASS_VECTOR, OPCLASS_CONTROL,
135bf215546Sopenharmony_ci   // SHFL
136bf215546Sopenharmony_ci   OPCLASS_OTHER,
137bf215546Sopenharmony_ci   // VOTE
138bf215546Sopenharmony_ci   OPCLASS_OTHER,
139bf215546Sopenharmony_ci   // BUFQ
140bf215546Sopenharmony_ci   OPCLASS_OTHER,
141bf215546Sopenharmony_ci   // WARPSYNC
142bf215546Sopenharmony_ci   OPCLASS_OTHER,
143bf215546Sopenharmony_ci   OPCLASS_PSEUDO // LAST
144bf215546Sopenharmony_ci};
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ciextern Target *getTargetGV100(unsigned int chipset);
148bf215546Sopenharmony_ciextern Target *getTargetGM107(unsigned int chipset);
149bf215546Sopenharmony_ciextern Target *getTargetNVC0(unsigned int chipset);
150bf215546Sopenharmony_ciextern Target *getTargetNV50(unsigned int chipset);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ciTarget *Target::create(unsigned int chipset)
153bf215546Sopenharmony_ci{
154bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
155bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
156bf215546Sopenharmony_ci   switch (chipset & ~0xf) {
157bf215546Sopenharmony_ci   case 0x170:
158bf215546Sopenharmony_ci   case 0x160:
159bf215546Sopenharmony_ci   case 0x140:
160bf215546Sopenharmony_ci      return getTargetGV100(chipset);
161bf215546Sopenharmony_ci   case 0x110:
162bf215546Sopenharmony_ci   case 0x120:
163bf215546Sopenharmony_ci   case 0x130:
164bf215546Sopenharmony_ci      return getTargetGM107(chipset);
165bf215546Sopenharmony_ci   case 0xc0:
166bf215546Sopenharmony_ci   case 0xd0:
167bf215546Sopenharmony_ci   case 0xe0:
168bf215546Sopenharmony_ci   case 0xf0:
169bf215546Sopenharmony_ci   case 0x100:
170bf215546Sopenharmony_ci      return getTargetNVC0(chipset);
171bf215546Sopenharmony_ci   case 0x50:
172bf215546Sopenharmony_ci   case 0x80:
173bf215546Sopenharmony_ci   case 0x90:
174bf215546Sopenharmony_ci   case 0xa0:
175bf215546Sopenharmony_ci      return getTargetNV50(chipset);
176bf215546Sopenharmony_ci   default:
177bf215546Sopenharmony_ci      ERROR("unsupported target: NV%x\n", chipset);
178bf215546Sopenharmony_ci      return 0;
179bf215546Sopenharmony_ci   }
180bf215546Sopenharmony_ci}
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_civoid Target::destroy(Target *targ)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   delete targ;
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ciCodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL),
188bf215546Sopenharmony_ci   codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL)
189bf215546Sopenharmony_ci{
190bf215546Sopenharmony_ci}
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_civoid
193bf215546Sopenharmony_ciCodeEmitter::setCodeLocation(void *ptr, uint32_t size)
194bf215546Sopenharmony_ci{
195bf215546Sopenharmony_ci   code = reinterpret_cast<uint32_t *>(ptr);
196bf215546Sopenharmony_ci   codeSize = 0;
197bf215546Sopenharmony_ci   codeSizeLimit = size;
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_civoid
201bf215546Sopenharmony_ciCodeEmitter::printBinary() const
202bf215546Sopenharmony_ci{
203bf215546Sopenharmony_ci   uint32_t *bin = code - codeSize / 4;
204bf215546Sopenharmony_ci   INFO("program binary (%u bytes)", codeSize);
205bf215546Sopenharmony_ci   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
206bf215546Sopenharmony_ci      if ((pos % 8) == 0)
207bf215546Sopenharmony_ci         INFO("\n");
208bf215546Sopenharmony_ci      INFO("%08x ", bin[pos]);
209bf215546Sopenharmony_ci   }
210bf215546Sopenharmony_ci   INFO("\n");
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_cistatic inline uint32_t sizeToBundlesNVE4(uint32_t size)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci   return (size + 55) / 56;
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_civoid
219bf215546Sopenharmony_ciCodeEmitter::prepareEmission(Program *prog)
220bf215546Sopenharmony_ci{
221bf215546Sopenharmony_ci   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
222bf215546Sopenharmony_ci        !fi.end(); fi.next()) {
223bf215546Sopenharmony_ci      Function *func = reinterpret_cast<Function *>(fi.get());
224bf215546Sopenharmony_ci      func->binPos = prog->binSize;
225bf215546Sopenharmony_ci      prepareEmission(func);
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci      // adjust sizes & positions for scheduling info:
228bf215546Sopenharmony_ci      if (prog->getTarget()->hasSWSched) {
229bf215546Sopenharmony_ci         uint32_t adjPos = func->binPos;
230bf215546Sopenharmony_ci         BasicBlock *bb = NULL;
231bf215546Sopenharmony_ci         for (int i = 0; i < func->bbCount; ++i) {
232bf215546Sopenharmony_ci            bb = func->bbArray[i];
233bf215546Sopenharmony_ci            int32_t adjSize = bb->binSize;
234bf215546Sopenharmony_ci            if (adjPos % 64) {
235bf215546Sopenharmony_ci               adjSize -= 64 - adjPos % 64;
236bf215546Sopenharmony_ci               if (adjSize < 0)
237bf215546Sopenharmony_ci                  adjSize = 0;
238bf215546Sopenharmony_ci            }
239bf215546Sopenharmony_ci            adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
240bf215546Sopenharmony_ci            bb->binPos = adjPos;
241bf215546Sopenharmony_ci            bb->binSize = adjSize;
242bf215546Sopenharmony_ci            adjPos += adjSize;
243bf215546Sopenharmony_ci         }
244bf215546Sopenharmony_ci         if (bb)
245bf215546Sopenharmony_ci            func->binSize = adjPos - func->binPos;
246bf215546Sopenharmony_ci      }
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci      prog->binSize += func->binSize;
249bf215546Sopenharmony_ci   }
250bf215546Sopenharmony_ci}
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_civoid
253bf215546Sopenharmony_ciCodeEmitter::prepareEmission(Function *func)
254bf215546Sopenharmony_ci{
255bf215546Sopenharmony_ci   func->bbCount = 0;
256bf215546Sopenharmony_ci   func->bbArray = new BasicBlock * [func->cfg.getSize()];
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
261bf215546Sopenharmony_ci      prepareEmission(BasicBlock::get(*it));
262bf215546Sopenharmony_ci}
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_civoid
265bf215546Sopenharmony_ciCodeEmitter::prepareEmission(BasicBlock *bb)
266bf215546Sopenharmony_ci{
267bf215546Sopenharmony_ci   Instruction *i, *next;
268bf215546Sopenharmony_ci   Function *func = bb->getFunction();
269bf215546Sopenharmony_ci   int j;
270bf215546Sopenharmony_ci   unsigned int nShort;
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   for (; j >= 0; --j) {
275bf215546Sopenharmony_ci      BasicBlock *in = func->bbArray[j];
276bf215546Sopenharmony_ci      Instruction *exit = in->getExit();
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
279bf215546Sopenharmony_ci         in->binSize -= 8;
280bf215546Sopenharmony_ci         func->binSize -= 8;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci         for (++j; j < func->bbCount; ++j)
283bf215546Sopenharmony_ci            func->bbArray[j]->binPos -= 8;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci         in->remove(exit);
286bf215546Sopenharmony_ci      }
287bf215546Sopenharmony_ci      bb->binPos = in->binPos + in->binSize;
288bf215546Sopenharmony_ci      if (in->binSize) // no more no-op branches to bb
289bf215546Sopenharmony_ci         break;
290bf215546Sopenharmony_ci   }
291bf215546Sopenharmony_ci   func->bbArray[func->bbCount++] = bb;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   if (!bb->getExit())
294bf215546Sopenharmony_ci      return;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   // determine encoding size, try to group short instructions
297bf215546Sopenharmony_ci   nShort = 0;
298bf215546Sopenharmony_ci   for (i = bb->getEntry(); i; i = next) {
299bf215546Sopenharmony_ci      next = i->next;
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci      i->encSize = getMinEncodingSize(i);
302bf215546Sopenharmony_ci      if (next && i->encSize < 8)
303bf215546Sopenharmony_ci         ++nShort;
304bf215546Sopenharmony_ci      else
305bf215546Sopenharmony_ci      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
306bf215546Sopenharmony_ci         if (i->isCommutationLegal(i->next)) {
307bf215546Sopenharmony_ci            bb->permuteAdjacent(i, next);
308bf215546Sopenharmony_ci            next->encSize = 4;
309bf215546Sopenharmony_ci            next = i;
310bf215546Sopenharmony_ci            i = i->prev;
311bf215546Sopenharmony_ci            ++nShort;
312bf215546Sopenharmony_ci         } else
313bf215546Sopenharmony_ci         if (i->isCommutationLegal(i->prev) && next->next) {
314bf215546Sopenharmony_ci            bb->permuteAdjacent(i->prev, i);
315bf215546Sopenharmony_ci            next->encSize = 4;
316bf215546Sopenharmony_ci            next = next->next;
317bf215546Sopenharmony_ci            bb->binSize += 4;
318bf215546Sopenharmony_ci            ++nShort;
319bf215546Sopenharmony_ci         } else {
320bf215546Sopenharmony_ci            i->encSize = 8;
321bf215546Sopenharmony_ci            i->prev->encSize = 8;
322bf215546Sopenharmony_ci            bb->binSize += 4;
323bf215546Sopenharmony_ci            nShort = 0;
324bf215546Sopenharmony_ci         }
325bf215546Sopenharmony_ci      } else {
326bf215546Sopenharmony_ci         i->encSize = 8;
327bf215546Sopenharmony_ci         if (nShort & 1) {
328bf215546Sopenharmony_ci            i->prev->encSize = 8;
329bf215546Sopenharmony_ci            bb->binSize += 4;
330bf215546Sopenharmony_ci         }
331bf215546Sopenharmony_ci         nShort = 0;
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci      bb->binSize += i->encSize;
334bf215546Sopenharmony_ci   }
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   if (bb->getExit()->encSize == 4) {
337bf215546Sopenharmony_ci      assert(nShort);
338bf215546Sopenharmony_ci      bb->getExit()->encSize = 8;
339bf215546Sopenharmony_ci      bb->binSize += 4;
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
342bf215546Sopenharmony_ci         bb->binSize += 8;
343bf215546Sopenharmony_ci         bb->getExit()->prev->encSize = 8;
344bf215546Sopenharmony_ci      }
345bf215546Sopenharmony_ci   }
346bf215546Sopenharmony_ci   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   func->binSize += bb->binSize;
349bf215546Sopenharmony_ci}
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_cibool
352bf215546Sopenharmony_ciProgram::emitBinary(struct nv50_ir_prog_info_out *info)
353bf215546Sopenharmony_ci{
354bf215546Sopenharmony_ci   CodeEmitter *emit = target->getCodeEmitter(progType);
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   emit->prepareEmission(this);
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   if (dbgFlags & NV50_IR_DEBUG_BASIC)
359bf215546Sopenharmony_ci      this->print();
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   if (!binSize) {
362bf215546Sopenharmony_ci      code = NULL;
363bf215546Sopenharmony_ci      return false;
364bf215546Sopenharmony_ci   }
365bf215546Sopenharmony_ci   code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
366bf215546Sopenharmony_ci   if (!code)
367bf215546Sopenharmony_ci      return false;
368bf215546Sopenharmony_ci   emit->setCodeLocation(code, binSize);
369bf215546Sopenharmony_ci   info->bin.instructions = 0;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
372bf215546Sopenharmony_ci      Function *fn = reinterpret_cast<Function *>(fi.get());
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci      assert(emit->getCodeSize() == fn->binPos);
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci      for (int b = 0; b < fn->bbCount; ++b) {
377bf215546Sopenharmony_ci         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
378bf215546Sopenharmony_ci            emit->emitInstruction(i);
379bf215546Sopenharmony_ci            info->bin.instructions++;
380bf215546Sopenharmony_ci            if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
381bf215546Sopenharmony_ci                (isFloatType(i->sType) || isFloatType(i->dType)))
382bf215546Sopenharmony_ci               info->io.fp64 = true;
383bf215546Sopenharmony_ci         }
384bf215546Sopenharmony_ci      }
385bf215546Sopenharmony_ci   }
386bf215546Sopenharmony_ci   info->io.fp64 |= fp64;
387bf215546Sopenharmony_ci   info->bin.relocData = emit->getRelocInfo();
388bf215546Sopenharmony_ci   info->bin.fixupData = emit->getFixupInfo();
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   // the nvc0 driver will print the binary itself together with the header
391bf215546Sopenharmony_ci   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
392bf215546Sopenharmony_ci      emit->printBinary();
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci   delete emit;
395bf215546Sopenharmony_ci   return true;
396bf215546Sopenharmony_ci}
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci#define RELOC_ALLOC_INCREMENT 8
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_cibool
401bf215546Sopenharmony_ciCodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
402bf215546Sopenharmony_ci                      int s)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   unsigned int n = relocInfo ? relocInfo->count : 0;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   if (!(n % RELOC_ALLOC_INCREMENT)) {
407bf215546Sopenharmony_ci      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
408bf215546Sopenharmony_ci      relocInfo = reinterpret_cast<RelocInfo *>(
409bf215546Sopenharmony_ci         REALLOC(relocInfo, n ? size : 0,
410bf215546Sopenharmony_ci                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
411bf215546Sopenharmony_ci      if (!relocInfo)
412bf215546Sopenharmony_ci         return false;
413bf215546Sopenharmony_ci      if (n == 0)
414bf215546Sopenharmony_ci         memset(relocInfo, 0, sizeof(RelocInfo));
415bf215546Sopenharmony_ci   }
416bf215546Sopenharmony_ci   ++relocInfo->count;
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   relocInfo->entry[n].data = data;
419bf215546Sopenharmony_ci   relocInfo->entry[n].mask = m;
420bf215546Sopenharmony_ci   relocInfo->entry[n].offset = codeSize + w * 4;
421bf215546Sopenharmony_ci   relocInfo->entry[n].bitPos = s;
422bf215546Sopenharmony_ci   relocInfo->entry[n].type = ty;
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci   return true;
425bf215546Sopenharmony_ci}
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_cibool
428bf215546Sopenharmony_ciCodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
429bf215546Sopenharmony_ci{
430bf215546Sopenharmony_ci   unsigned int n = fixupInfo ? fixupInfo->count : 0;
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci   if (!(n % RELOC_ALLOC_INCREMENT)) {
433bf215546Sopenharmony_ci      size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
434bf215546Sopenharmony_ci      fixupInfo = reinterpret_cast<FixupInfo *>(
435bf215546Sopenharmony_ci         REALLOC(fixupInfo, n ? size : 0,
436bf215546Sopenharmony_ci                 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
437bf215546Sopenharmony_ci      if (!fixupInfo)
438bf215546Sopenharmony_ci         return false;
439bf215546Sopenharmony_ci      if (n == 0)
440bf215546Sopenharmony_ci         fixupInfo->count = 0;
441bf215546Sopenharmony_ci   }
442bf215546Sopenharmony_ci   ++fixupInfo->count;
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   return true;
447bf215546Sopenharmony_ci}
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_civoid
450bf215546Sopenharmony_ciRelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
451bf215546Sopenharmony_ci{
452bf215546Sopenharmony_ci   uint32_t value = 0;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   switch (type) {
455bf215546Sopenharmony_ci   case TYPE_CODE: value = info->codePos; break;
456bf215546Sopenharmony_ci   case TYPE_BUILTIN: value = info->libPos; break;
457bf215546Sopenharmony_ci   case TYPE_DATA: value = info->dataPos; break;
458bf215546Sopenharmony_ci   default:
459bf215546Sopenharmony_ci      assert(0);
460bf215546Sopenharmony_ci      break;
461bf215546Sopenharmony_ci   }
462bf215546Sopenharmony_ci   value += data;
463bf215546Sopenharmony_ci   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   binary[offset / 4] &= ~mask;
466bf215546Sopenharmony_ci   binary[offset / 4] |= value & mask;
467bf215546Sopenharmony_ci}
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci} // namespace nv50_ir
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci#include "nv50_ir_driver.h"
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ciextern "C" {
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_civoid
477bf215546Sopenharmony_cinv50_ir_relocate_code(void *relocData, uint32_t *code,
478bf215546Sopenharmony_ci                      uint32_t codePos,
479bf215546Sopenharmony_ci                      uint32_t libPos,
480bf215546Sopenharmony_ci                      uint32_t dataPos)
481bf215546Sopenharmony_ci{
482bf215546Sopenharmony_ci   nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci   info->codePos = codePos;
485bf215546Sopenharmony_ci   info->libPos = libPos;
486bf215546Sopenharmony_ci   info->dataPos = dataPos;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   for (unsigned int i = 0; i < info->count; ++i)
489bf215546Sopenharmony_ci      info->entry[i].apply(code, info);
490bf215546Sopenharmony_ci}
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_civoid
493bf215546Sopenharmony_cinv50_ir_apply_fixups(void *fixupData, uint32_t *code,
494bf215546Sopenharmony_ci                     bool force_persample_interp, bool flatshade,
495bf215546Sopenharmony_ci                     uint8_t alphatest, bool msaa)
496bf215546Sopenharmony_ci{
497bf215546Sopenharmony_ci   nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
498bf215546Sopenharmony_ci      fixupData);
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   // force_persample_interp: all non-flat -> per-sample
501bf215546Sopenharmony_ci   // flatshade: all color -> flat
502bf215546Sopenharmony_ci   // alphatest: PIPE_FUNC_* to use with alphatest
503bf215546Sopenharmony_ci   // msaa: false = sample id -> 0 for interpolateAtSample
504bf215546Sopenharmony_ci   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);
505bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->count; ++i)
506bf215546Sopenharmony_ci      info->entry[i].apply(&info->entry[i], code, data);
507bf215546Sopenharmony_ci}
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_civoid
510bf215546Sopenharmony_cinv50_ir_get_target_library(uint32_t chipset,
511bf215546Sopenharmony_ci                           const uint32_t **code, uint32_t *size)
512bf215546Sopenharmony_ci{
513bf215546Sopenharmony_ci   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
514bf215546Sopenharmony_ci   targ->getBuiltinCode(code, size);
515bf215546Sopenharmony_ci   nv50_ir::Target::destroy(targ);
516bf215546Sopenharmony_ci}
517bf215546Sopenharmony_ci
518bf215546Sopenharmony_ci}
519