1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "nv50_ir.h"
24#include "nv50_ir_target.h"
25
26namespace nv50_ir {
27
28const uint8_t Target::operationSrcNr[] =
29{
30   0, 0,                   // NOP, PHI
31   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32   1, 1, 2,                // MOV, LOAD, STORE
33   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34   3, 3,                   // SHLADD, XMAD
35   1, 1, 1,                // ABS, NEG, NOT
36   2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
37   2, 2, 1,                // MAX, MIN, SAT
38   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
39   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
40   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
41   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
43   0, 0, 0,                // PRERET,CONT,BREAK
44   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45   1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46   1, 1, 1,                // EMIT, RESTART, FINAL
47   1, 1, 1,                // TEX, TXB, TXL,
48   1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49   1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50   3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51   0,                      // TEXBAR
52   1, 1,                   // DFDX, DFDY
53   1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
55   2,                      // SGXT
56   3, 2,                   // ATOM, BAR
57   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
58   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
59   3,                      // SHFL
60   1,                      // VOTE
61   1,                      // BUFQ
62   1,                      // WARPSYNC
63   0
64};
65
66const OpClass Target::operationClass[] =
67{
68   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69   OPCLASS_OTHER,
70   OPCLASS_PSEUDO,
71   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
72   // MOV; LOAD; STORE
73   OPCLASS_MOVE,
74   OPCLASS_LOAD,
75   OPCLASS_STORE,
76   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
77   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78   OPCLASS_ARITH, OPCLASS_ARITH,
79   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
80   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
81   OPCLASS_CONVERT, OPCLASS_CONVERT,
82   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
83   OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
84   // MAX, MIN
85   OPCLASS_COMPARE, OPCLASS_COMPARE,
86   // SAT, CEIL, FLOOR, TRUNC; CVT
87   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
88   OPCLASS_CONVERT,
89   // SET(AND,OR,XOR); SELP, SLCT
90   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
91   OPCLASS_COMPARE, OPCLASS_COMPARE,
92   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
93   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
94   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
95   OPCLASS_SFU, OPCLASS_SFU,
96   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
97   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
99   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
100   // DISCARD, EXIT
101   OPCLASS_FLOW, OPCLASS_FLOW,
102   // MEMBAR
103   OPCLASS_CONTROL,
104   // VFETCH, PFETCH, AFETCH, EXPORT
105   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
106   // LINTERP, PINTERP
107   OPCLASS_SFU, OPCLASS_SFU,
108   // EMIT, RESTART, FINAL
109   OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
110   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
111   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
113   OPCLASS_TEXTURE, OPCLASS_TEXTURE,
114   // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
115   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
116   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
117   // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
118   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
119   // TEXBAR
120   OPCLASS_OTHER,
121   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
122   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
123   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
124   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
125   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
126   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
127   // ATOM, BAR
128   OPCLASS_ATOMIC, OPCLASS_CONTROL,
129   // VADD, VAVG, VMIN, VMAX
130   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131   // VSAD, VSET, VSHR, VSHL
132   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
133   // VSEL, CCTL
134   OPCLASS_VECTOR, OPCLASS_CONTROL,
135   // SHFL
136   OPCLASS_OTHER,
137   // VOTE
138   OPCLASS_OTHER,
139   // BUFQ
140   OPCLASS_OTHER,
141   // WARPSYNC
142   OPCLASS_OTHER,
143   OPCLASS_PSEUDO // LAST
144};
145
146
147extern Target *getTargetGV100(unsigned int chipset);
148extern Target *getTargetGM107(unsigned int chipset);
149extern Target *getTargetNVC0(unsigned int chipset);
150extern Target *getTargetNV50(unsigned int chipset);
151
152Target *Target::create(unsigned int chipset)
153{
154   STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
155   STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
156   switch (chipset & ~0xf) {
157   case 0x170:
158   case 0x160:
159   case 0x140:
160      return getTargetGV100(chipset);
161   case 0x110:
162   case 0x120:
163   case 0x130:
164      return getTargetGM107(chipset);
165   case 0xc0:
166   case 0xd0:
167   case 0xe0:
168   case 0xf0:
169   case 0x100:
170      return getTargetNVC0(chipset);
171   case 0x50:
172   case 0x80:
173   case 0x90:
174   case 0xa0:
175      return getTargetNV50(chipset);
176   default:
177      ERROR("unsupported target: NV%x\n", chipset);
178      return 0;
179   }
180}
181
182void Target::destroy(Target *targ)
183{
184   delete targ;
185}
186
187CodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL),
188   codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL)
189{
190}
191
192void
193CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
194{
195   code = reinterpret_cast<uint32_t *>(ptr);
196   codeSize = 0;
197   codeSizeLimit = size;
198}
199
200void
201CodeEmitter::printBinary() const
202{
203   uint32_t *bin = code - codeSize / 4;
204   INFO("program binary (%u bytes)", codeSize);
205   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
206      if ((pos % 8) == 0)
207         INFO("\n");
208      INFO("%08x ", bin[pos]);
209   }
210   INFO("\n");
211}
212
213static inline uint32_t sizeToBundlesNVE4(uint32_t size)
214{
215   return (size + 55) / 56;
216}
217
218void
219CodeEmitter::prepareEmission(Program *prog)
220{
221   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
222        !fi.end(); fi.next()) {
223      Function *func = reinterpret_cast<Function *>(fi.get());
224      func->binPos = prog->binSize;
225      prepareEmission(func);
226
227      // adjust sizes & positions for scheduling info:
228      if (prog->getTarget()->hasSWSched) {
229         uint32_t adjPos = func->binPos;
230         BasicBlock *bb = NULL;
231         for (int i = 0; i < func->bbCount; ++i) {
232            bb = func->bbArray[i];
233            int32_t adjSize = bb->binSize;
234            if (adjPos % 64) {
235               adjSize -= 64 - adjPos % 64;
236               if (adjSize < 0)
237                  adjSize = 0;
238            }
239            adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
240            bb->binPos = adjPos;
241            bb->binSize = adjSize;
242            adjPos += adjSize;
243         }
244         if (bb)
245            func->binSize = adjPos - func->binPos;
246      }
247
248      prog->binSize += func->binSize;
249   }
250}
251
252void
253CodeEmitter::prepareEmission(Function *func)
254{
255   func->bbCount = 0;
256   func->bbArray = new BasicBlock * [func->cfg.getSize()];
257
258   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
259
260   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
261      prepareEmission(BasicBlock::get(*it));
262}
263
264void
265CodeEmitter::prepareEmission(BasicBlock *bb)
266{
267   Instruction *i, *next;
268   Function *func = bb->getFunction();
269   int j;
270   unsigned int nShort;
271
272   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
273
274   for (; j >= 0; --j) {
275      BasicBlock *in = func->bbArray[j];
276      Instruction *exit = in->getExit();
277
278      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
279         in->binSize -= 8;
280         func->binSize -= 8;
281
282         for (++j; j < func->bbCount; ++j)
283            func->bbArray[j]->binPos -= 8;
284
285         in->remove(exit);
286      }
287      bb->binPos = in->binPos + in->binSize;
288      if (in->binSize) // no more no-op branches to bb
289         break;
290   }
291   func->bbArray[func->bbCount++] = bb;
292
293   if (!bb->getExit())
294      return;
295
296   // determine encoding size, try to group short instructions
297   nShort = 0;
298   for (i = bb->getEntry(); i; i = next) {
299      next = i->next;
300
301      i->encSize = getMinEncodingSize(i);
302      if (next && i->encSize < 8)
303         ++nShort;
304      else
305      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
306         if (i->isCommutationLegal(i->next)) {
307            bb->permuteAdjacent(i, next);
308            next->encSize = 4;
309            next = i;
310            i = i->prev;
311            ++nShort;
312         } else
313         if (i->isCommutationLegal(i->prev) && next->next) {
314            bb->permuteAdjacent(i->prev, i);
315            next->encSize = 4;
316            next = next->next;
317            bb->binSize += 4;
318            ++nShort;
319         } else {
320            i->encSize = 8;
321            i->prev->encSize = 8;
322            bb->binSize += 4;
323            nShort = 0;
324         }
325      } else {
326         i->encSize = 8;
327         if (nShort & 1) {
328            i->prev->encSize = 8;
329            bb->binSize += 4;
330         }
331         nShort = 0;
332      }
333      bb->binSize += i->encSize;
334   }
335
336   if (bb->getExit()->encSize == 4) {
337      assert(nShort);
338      bb->getExit()->encSize = 8;
339      bb->binSize += 4;
340
341      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
342         bb->binSize += 8;
343         bb->getExit()->prev->encSize = 8;
344      }
345   }
346   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
347
348   func->binSize += bb->binSize;
349}
350
351bool
352Program::emitBinary(struct nv50_ir_prog_info_out *info)
353{
354   CodeEmitter *emit = target->getCodeEmitter(progType);
355
356   emit->prepareEmission(this);
357
358   if (dbgFlags & NV50_IR_DEBUG_BASIC)
359      this->print();
360
361   if (!binSize) {
362      code = NULL;
363      return false;
364   }
365   code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
366   if (!code)
367      return false;
368   emit->setCodeLocation(code, binSize);
369   info->bin.instructions = 0;
370
371   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
372      Function *fn = reinterpret_cast<Function *>(fi.get());
373
374      assert(emit->getCodeSize() == fn->binPos);
375
376      for (int b = 0; b < fn->bbCount; ++b) {
377         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
378            emit->emitInstruction(i);
379            info->bin.instructions++;
380            if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
381                (isFloatType(i->sType) || isFloatType(i->dType)))
382               info->io.fp64 = true;
383         }
384      }
385   }
386   info->io.fp64 |= fp64;
387   info->bin.relocData = emit->getRelocInfo();
388   info->bin.fixupData = emit->getFixupInfo();
389
390   // the nvc0 driver will print the binary itself together with the header
391   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
392      emit->printBinary();
393
394   delete emit;
395   return true;
396}
397
398#define RELOC_ALLOC_INCREMENT 8
399
400bool
401CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
402                      int s)
403{
404   unsigned int n = relocInfo ? relocInfo->count : 0;
405
406   if (!(n % RELOC_ALLOC_INCREMENT)) {
407      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
408      relocInfo = reinterpret_cast<RelocInfo *>(
409         REALLOC(relocInfo, n ? size : 0,
410                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
411      if (!relocInfo)
412         return false;
413      if (n == 0)
414         memset(relocInfo, 0, sizeof(RelocInfo));
415   }
416   ++relocInfo->count;
417
418   relocInfo->entry[n].data = data;
419   relocInfo->entry[n].mask = m;
420   relocInfo->entry[n].offset = codeSize + w * 4;
421   relocInfo->entry[n].bitPos = s;
422   relocInfo->entry[n].type = ty;
423
424   return true;
425}
426
427bool
428CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
429{
430   unsigned int n = fixupInfo ? fixupInfo->count : 0;
431
432   if (!(n % RELOC_ALLOC_INCREMENT)) {
433      size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
434      fixupInfo = reinterpret_cast<FixupInfo *>(
435         REALLOC(fixupInfo, n ? size : 0,
436                 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
437      if (!fixupInfo)
438         return false;
439      if (n == 0)
440         fixupInfo->count = 0;
441   }
442   ++fixupInfo->count;
443
444   fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
445
446   return true;
447}
448
449void
450RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
451{
452   uint32_t value = 0;
453
454   switch (type) {
455   case TYPE_CODE: value = info->codePos; break;
456   case TYPE_BUILTIN: value = info->libPos; break;
457   case TYPE_DATA: value = info->dataPos; break;
458   default:
459      assert(0);
460      break;
461   }
462   value += data;
463   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
464
465   binary[offset / 4] &= ~mask;
466   binary[offset / 4] |= value & mask;
467}
468
469} // namespace nv50_ir
470
471
472#include "nv50_ir_driver.h"
473
474extern "C" {
475
476void
477nv50_ir_relocate_code(void *relocData, uint32_t *code,
478                      uint32_t codePos,
479                      uint32_t libPos,
480                      uint32_t dataPos)
481{
482   nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
483
484   info->codePos = codePos;
485   info->libPos = libPos;
486   info->dataPos = dataPos;
487
488   for (unsigned int i = 0; i < info->count; ++i)
489      info->entry[i].apply(code, info);
490}
491
492void
493nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
494                     bool force_persample_interp, bool flatshade,
495                     uint8_t alphatest, bool msaa)
496{
497   nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
498      fixupData);
499
500   // force_persample_interp: all non-flat -> per-sample
501   // flatshade: all color -> flat
502   // alphatest: PIPE_FUNC_* to use with alphatest
503   // msaa: false = sample id -> 0 for interpolateAtSample
504   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);
505   for (unsigned i = 0; i < info->count; ++i)
506      info->entry[i].apply(&info->entry[i], code, data);
507}
508
509void
510nv50_ir_get_target_library(uint32_t chipset,
511                           const uint32_t **code, uint32_t *size)
512{
513   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
514   targ->getBuiltinCode(code, size);
515   nv50_ir::Target::destroy(targ);
516}
517
518}
519