1/*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "nv50_ir_target_nvc0.h"
24
25// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27namespace nv50_ir {
28
29class CodeEmitterGK110 : public CodeEmitter
30{
31public:
32   CodeEmitterGK110(const TargetNVC0 *, Program::Type);
33
34   virtual bool emitInstruction(Instruction *);
35   virtual uint32_t getMinEncodingSize(const Instruction *) const;
36   virtual void prepareEmission(Function *);
37
38private:
39   const TargetNVC0 *targNVC0;
40
41   Program::Type progType;
42
43   const bool writeIssueDelays;
44
45private:
46   void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
47   void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
48   void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);
49
50   void emitPredicate(const Instruction *);
51
52   void setCAddress14(const ValueRef&);
53   void setShortImmediate(const Instruction *, const int s);
54   void setImmediate32(const Instruction *, const int s, Modifier);
55   void setSUConst16(const Instruction *, const int s);
56
57   void modNegAbsF32_3b(const Instruction *, const int s);
58
59   void emitCondCode(CondCode cc, int pos, uint8_t mask);
60   void emitInterpMode(const Instruction *);
61   void emitLoadStoreType(DataType ty, const int pos);
62   void emitCachingMode(CacheMode c, const int pos);
63   void emitSUGType(DataType, const int pos);
64   void emitSUCachingMode(CacheMode c);
65
66   inline uint8_t getSRegEncoding(const ValueRef&);
67
68   void emitRoundMode(RoundMode, const int pos, const int rintPos);
69   void emitRoundModeF(RoundMode, const int pos);
70   void emitRoundModeI(RoundMode, const int pos);
71
72   void emitNegAbs12(const Instruction *);
73
74   void emitNOP(const Instruction *);
75
76   void emitLOAD(const Instruction *);
77   void emitSTORE(const Instruction *);
78   void emitMOV(const Instruction *);
79   void emitATOM(const Instruction *);
80   void emitCCTL(const Instruction *);
81
82   void emitINTERP(const Instruction *);
83   void emitAFETCH(const Instruction *);
84   void emitPFETCH(const Instruction *);
85   void emitVFETCH(const Instruction *);
86   void emitEXPORT(const Instruction *);
87   void emitOUT(const Instruction *);
88
89   void emitUADD(const Instruction *);
90   void emitFADD(const Instruction *);
91   void emitDADD(const Instruction *);
92   void emitIMUL(const Instruction *);
93   void emitFMUL(const Instruction *);
94   void emitDMUL(const Instruction *);
95   void emitIMAD(const Instruction *);
96   void emitISAD(const Instruction *);
97   void emitSHLADD(const Instruction *);
98   void emitFMAD(const Instruction *);
99   void emitDMAD(const Instruction *);
100   void emitMADSP(const Instruction *i);
101
102   void emitNOT(const Instruction *);
103   void emitLogicOp(const Instruction *, uint8_t subOp);
104   void emitPOPC(const Instruction *);
105   void emitINSBF(const Instruction *);
106   void emitEXTBF(const Instruction *);
107   void emitBFIND(const Instruction *);
108   void emitPERMT(const Instruction *);
109   void emitShift(const Instruction *);
110   void emitShift64(const Instruction *);
111
112   void emitSFnOp(const Instruction *, uint8_t subOp);
113
114   void emitCVT(const Instruction *);
115   void emitMINMAX(const Instruction *);
116   void emitPreOp(const Instruction *);
117
118   void emitSET(const CmpInstruction *);
119   void emitSLCT(const CmpInstruction *);
120   void emitSELP(const Instruction *);
121
122   void emitTEXBAR(const Instruction *);
123   void emitTEX(const TexInstruction *);
124   void emitTEXCSAA(const TexInstruction *);
125   void emitTXQ(const TexInstruction *);
126
127   void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
128
129   void emitPIXLD(const Instruction *);
130
131   void emitBAR(const Instruction *);
132   void emitMEMBAR(const Instruction *);
133
134   void emitFlow(const Instruction *);
135
136   void emitSHFL(const Instruction *);
137
138   void emitVOTE(const Instruction *);
139
140   void emitSULDGB(const TexInstruction *);
141   void emitSUSTGx(const TexInstruction *);
142   void emitSUCLAMPMode(uint16_t);
143   void emitSUCalc(Instruction *);
144
145   void emitVSHL(const Instruction *);
146   void emitVectorSubOp(const Instruction *);
147
148   inline void defId(const ValueDef&, const int pos);
149   inline void srcId(const ValueRef&, const int pos);
150   inline void srcId(const ValueRef *, const int pos);
151   inline void srcId(const Instruction *, int s, const int pos);
152
153   inline void srcAddr32(const ValueRef&, const int pos); // address / 4
154
155   inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
156};
157
158#define GK110_GPR_ZERO 255
159
160#define NEG_(b, s) \
161   if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
162#define ABS_(b, s) \
163   if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
164
165#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))       \
166   code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
167
168#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
170
171#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
172
173#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
174
175#define SDATA(a) ((a).rep()->reg.data)
176#define DDATA(a) ((a).rep()->reg.data)
177
178void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
179{
180   code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
181}
182
183void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
184{
185   code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
186}
187
188void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
189{
190   int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
191   code[pos / 32] |= r << (pos % 32);
192}
193
194void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
195{
196   code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
197}
198
199void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
200{
201   code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
202}
203
204bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
205{
206   const ImmediateValue *imm = ref.get()->asImm();
207
208   if (ty == TYPE_F32)
209      return imm && imm->reg.data.u32 & 0xfff;
210   else
211      return imm && (imm->reg.data.s32 > 0x7ffff ||
212                     imm->reg.data.s32 < -0x80000);
213}
214
215void
216CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
217{
218   bool rint = false;
219   uint8_t n;
220
221   switch (rnd) {
222   case ROUND_MI: rint = true; FALLTHROUGH; case ROUND_M: n = 1; break;
223   case ROUND_PI: rint = true; FALLTHROUGH; case ROUND_P: n = 2; break;
224   case ROUND_ZI: rint = true; FALLTHROUGH; case ROUND_Z: n = 3; break;
225   default:
226      rint = rnd == ROUND_NI;
227      n = 0;
228      assert(rnd == ROUND_N || rnd == ROUND_NI);
229      break;
230   }
231   code[pos / 32] |= n << (pos % 32);
232   if (rint && rintPos >= 0)
233      code[rintPos / 32] |= 1 << (rintPos % 32);
234}
235
236void
237CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
238{
239   uint8_t n;
240
241   switch (rnd) {
242   case ROUND_M: n = 1; break;
243   case ROUND_P: n = 2; break;
244   case ROUND_Z: n = 3; break;
245   default:
246      n = 0;
247      assert(rnd == ROUND_N);
248      break;
249   }
250   code[pos / 32] |= n << (pos % 32);
251}
252
253void
254CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
255{
256   uint8_t n;
257
258   switch (rnd) {
259   case ROUND_MI: n = 1; break;
260   case ROUND_PI: n = 2; break;
261   case ROUND_ZI: n = 3; break;
262   default:
263      n = 0;
264      assert(rnd == ROUND_NI);
265      break;
266   }
267   code[pos / 32] |= n << (pos % 32);
268}
269
270void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
271{
272   uint8_t n;
273
274   switch (cc) {
275   case CC_FL:  n = 0x00; break;
276   case CC_LT:  n = 0x01; break;
277   case CC_EQ:  n = 0x02; break;
278   case CC_LE:  n = 0x03; break;
279   case CC_GT:  n = 0x04; break;
280   case CC_NE:  n = 0x05; break;
281   case CC_GE:  n = 0x06; break;
282   case CC_LTU: n = 0x09; break;
283   case CC_EQU: n = 0x0a; break;
284   case CC_LEU: n = 0x0b; break;
285   case CC_GTU: n = 0x0c; break;
286   case CC_NEU: n = 0x0d; break;
287   case CC_GEU: n = 0x0e; break;
288   case CC_TR:  n = 0x0f; break;
289   case CC_NO:  n = 0x10; break;
290   case CC_NC:  n = 0x11; break;
291   case CC_NS:  n = 0x12; break;
292   case CC_NA:  n = 0x13; break;
293   case CC_A:   n = 0x14; break;
294   case CC_S:   n = 0x15; break;
295   case CC_C:   n = 0x16; break;
296   case CC_O:   n = 0x17; break;
297   default:
298      n = 0;
299      assert(!"invalid condition code");
300      break;
301   }
302   code[pos / 32] |= (n & mask) << (pos % 32);
303}
304
305void
306CodeEmitterGK110::emitPredicate(const Instruction *i)
307{
308   if (i->predSrc >= 0) {
309      srcId(i->src(i->predSrc), 18);
310      if (i->cc == CC_NOT_P)
311         code[0] |= 8 << 18; // negate
312      assert(i->getPredicate()->reg.file == FILE_PREDICATE);
313   } else {
314      code[0] |= 7 << 18;
315   }
316}
317
318void
319CodeEmitterGK110::setCAddress14(const ValueRef& src)
320{
321   const Storage& res = src.get()->asSym()->reg;
322   const int32_t addr = res.data.offset / 4;
323
324   code[0] |= (addr & 0x01ff) << 23;
325   code[1] |= (addr & 0x3e00) >> 9;
326   code[1] |= res.fileIndex << 5;
327}
328
329void
330CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
331{
332   const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
333   const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
334
335   if (i->sType == TYPE_F32) {
336      assert(!(u32 & 0x00000fff));
337      code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
338      code[1] |= ((u32 & 0x7fe00000) >> 21);
339      code[1] |= ((u32 & 0x80000000) >> 4);
340   } else
341   if (i->sType == TYPE_F64) {
342      assert(!(u64 & 0x00000fffffffffffULL));
343      code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
344      code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
345      code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
346   } else {
347      assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
348      code[0] |= (u32 & 0x001ff) << 23;
349      code[1] |= (u32 & 0x7fe00) >> 9;
350      code[1] |= (u32 & 0x80000) << 8;
351   }
352}
353
354void
355CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
356                                 Modifier mod)
357{
358   uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
359
360   if (mod) {
361      ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
362      mod.applyTo(imm);
363      u32 = imm.reg.data.u32;
364   }
365
366   code[0] |= u32 << 23;
367   code[1] |= u32 >> 9;
368}
369
370void
371CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
372                             Modifier mod, int sCount)
373{
374   code[0] = ctg;
375   code[1] = opc << 20;
376
377   emitPredicate(i);
378
379   defId(i->def(0), 2);
380
381   for (int s = 0; s < sCount && i->srcExists(s); ++s) {
382      switch (i->src(s).getFile()) {
383      case FILE_GPR:
384         srcId(i->src(s), s ? 42 : 10);
385         break;
386      case FILE_IMMEDIATE:
387         setImmediate32(i, s, mod);
388         break;
389      default:
390         break;
391      }
392   }
393}
394
395
396void
397CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
398{
399   code[0] = ctg;
400   code[1] = opc << 20;
401
402   emitPredicate(i);
403
404   defId(i->def(0), 2);
405
406   switch (i->src(0).getFile()) {
407   case FILE_MEMORY_CONST:
408      code[1] |= 0x4 << 28;
409      setCAddress14(i->src(0));
410      break;
411   case FILE_GPR:
412      code[1] |= 0xc << 28;
413      srcId(i->src(0), 23);
414      break;
415   default:
416      assert(0);
417      break;
418   }
419}
420
421// 0x2 for GPR, c[] and 0x1 for short immediate
422void
423CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
424                              uint32_t opc1)
425{
426   const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
427
428   int s1 = 23;
429   if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
430      s1 = 42;
431
432   if (imm) {
433      code[0] = 0x1;
434      code[1] = opc1 << 20;
435   } else {
436      code[0] = 0x2;
437      code[1] = (0xc << 28) | (opc2 << 20);
438   }
439
440   emitPredicate(i);
441
442   defId(i->def(0), 2);
443
444   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
445      switch (i->src(s).getFile()) {
446      case FILE_MEMORY_CONST:
447         code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
448         setCAddress14(i->src(s));
449         break;
450      case FILE_IMMEDIATE:
451         setShortImmediate(i, s);
452         break;
453      case FILE_GPR:
454         srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
455         break;
456      default:
457         if (i->op == OP_SELP) {
458            assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
459            srcId(i->src(s), 42);
460         }
461         // ignore here, can be predicate or flags, but must not be address
462         break;
463      }
464   }
465   // 0x0 = invalid
466   // 0xc = rrr
467   // 0x8 = rrc
468   // 0x4 = rcr
469   assert(imm || (code[1] & (0xc << 28)));
470}
471
472inline void
473CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
474{
475   if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
476   if (i->src(s).mod.neg()) code[1] ^=  (1 << 27);
477}
478
479void
480CodeEmitterGK110::emitNOP(const Instruction *i)
481{
482   code[0] = 0x00003c02;
483   code[1] = 0x85800000;
484
485   if (i)
486      emitPredicate(i);
487   else
488      code[0] = 0x001c3c02;
489}
490
491void
492CodeEmitterGK110::emitFMAD(const Instruction *i)
493{
494   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
495
496   if (isLIMM(i->src(1), TYPE_F32)) {
497      assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
498
499      // last source is dst, so force 2 sources
500      emitForm_L(i, 0x600, 0x0, 0, 2);
501
502      if (i->flagsDef >= 0)
503         code[1] |= 1 << 23;
504
505      SAT_(3a);
506      NEG_(3c, 2);
507
508      if (neg1) {
509         code[1] |= 1 << 27;
510      }
511   } else {
512      emitForm_21(i, 0x0c0, 0x940);
513
514      NEG_(34, 2);
515      SAT_(35);
516      RND_(36, F);
517
518      if (code[0] & 0x1) {
519         if (neg1)
520            code[1] ^= 1 << 27;
521      } else
522      if (neg1) {
523         code[1] |= 1 << 19;
524      }
525   }
526
527   FTZ_(38);
528   DNZ_(39);
529}
530
531void
532CodeEmitterGK110::emitDMAD(const Instruction *i)
533{
534   assert(!i->saturate);
535   assert(!i->ftz);
536
537   emitForm_21(i, 0x1b8, 0xb38);
538
539   NEG_(34, 2);
540   RND_(36, F);
541
542   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
543
544   if (code[0] & 0x1) {
545      if (neg1)
546         code[1] ^= 1 << 27;
547   } else
548   if (neg1) {
549      code[1] |= 1 << 19;
550   }
551}
552
553void
554CodeEmitterGK110::emitMADSP(const Instruction *i)
555{
556   emitForm_21(i, 0x140, 0xa40);
557
558   if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
559      code[1] |= 0x00c00000;
560   } else {
561      code[1] |= (i->subOp & 0x00f) << 19; // imadp1
562      code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
563      code[1] |= (i->subOp & 0x100) << 11; // imadp3
564      code[1] |= (i->subOp & 0x200) << 15; // imadp3
565      code[1] |= (i->subOp & 0xc00) << 12; // imadp3
566   }
567
568   if (i->flagsDef >= 0)
569      code[1] |= 1 << 18;
570}
571
572void
573CodeEmitterGK110::emitFMUL(const Instruction *i)
574{
575   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
576
577   assert(i->postFactor >= -3 && i->postFactor <= 3);
578
579   if (isLIMM(i->src(1), TYPE_F32)) {
580      emitForm_L(i, 0x200, 0x2, Modifier(0));
581
582      FTZ_(38);
583      DNZ_(39);
584      SAT_(3a);
585      if (neg)
586         code[1] ^= 1 << 22;
587
588      assert(i->postFactor == 0);
589   } else {
590      emitForm_21(i, 0x234, 0xc34);
591      code[1] |= ((i->postFactor > 0) ?
592                  (7 - i->postFactor) : (0 - i->postFactor)) << 12;
593
594      RND_(2a, F);
595      FTZ_(2f);
596      DNZ_(30);
597      SAT_(35);
598
599      if (code[0] & 0x1) {
600         if (neg)
601            code[1] ^= 1 << 27;
602      } else
603      if (neg) {
604         code[1] |= 1 << 19;
605      }
606   }
607}
608
609void
610CodeEmitterGK110::emitDMUL(const Instruction *i)
611{
612   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
613
614   assert(!i->postFactor);
615   assert(!i->saturate);
616   assert(!i->ftz);
617   assert(!i->dnz);
618
619   emitForm_21(i, 0x240, 0xc40);
620
621   RND_(2a, F);
622
623   if (code[0] & 0x1) {
624      if (neg)
625         code[1] ^= 1 << 27;
626   } else
627   if (neg) {
628      code[1] |= 1 << 19;
629   }
630}
631
632void
633CodeEmitterGK110::emitIMUL(const Instruction *i)
634{
635   assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
636   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
637
638   if (isLIMM(i->src(1), TYPE_S32)) {
639      emitForm_L(i, 0x280, 2, Modifier(0));
640
641      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
642         code[1] |= 1 << 24;
643      if (i->sType == TYPE_S32)
644         code[1] |= 3 << 25;
645   } else {
646      emitForm_21(i, 0x21c, 0xc1c);
647
648      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
649         code[1] |= 1 << 10;
650      if (i->sType == TYPE_S32)
651         code[1] |= 3 << 11;
652   }
653}
654
655void
656CodeEmitterGK110::emitFADD(const Instruction *i)
657{
658   if (isLIMM(i->src(1), TYPE_F32)) {
659      assert(i->rnd == ROUND_N);
660      assert(!i->saturate);
661
662      Modifier mod = i->src(1).mod ^
663         Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
664
665      emitForm_L(i, 0x400, 0, mod);
666
667      FTZ_(3a);
668      NEG_(3b, 0);
669      ABS_(39, 0);
670   } else {
671      emitForm_21(i, 0x22c, 0xc2c);
672
673      FTZ_(2f);
674      RND_(2a, F);
675      ABS_(31, 0);
676      NEG_(33, 0);
677      SAT_(35);
678
679      if (code[0] & 0x1) {
680         modNegAbsF32_3b(i, 1);
681         if (i->op == OP_SUB) code[1] ^= 1 << 27;
682      } else {
683         ABS_(34, 1);
684         NEG_(30, 1);
685         if (i->op == OP_SUB) code[1] ^= 1 << 16;
686      }
687   }
688}
689
690void
691CodeEmitterGK110::emitDADD(const Instruction *i)
692{
693   assert(!i->saturate);
694   assert(!i->ftz);
695
696   emitForm_21(i, 0x238, 0xc38);
697   RND_(2a, F);
698   ABS_(31, 0);
699   NEG_(33, 0);
700   if (code[0] & 0x1) {
701      modNegAbsF32_3b(i, 1);
702      if (i->op == OP_SUB) code[1] ^= 1 << 27;
703   } else {
704      NEG_(30, 1);
705      ABS_(34, 1);
706      if (i->op == OP_SUB) code[1] ^= 1 << 16;
707   }
708}
709
710void
711CodeEmitterGK110::emitUADD(const Instruction *i)
712{
713   uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
714
715   if (i->op == OP_SUB)
716      addOp ^= 1;
717
718   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
719
720   if (isLIMM(i->src(1), TYPE_S32)) {
721      emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
722
723      if (addOp & 2)
724         code[1] |= 1 << 27;
725
726      assert(i->flagsDef < 0);
727      assert(i->flagsSrc < 0);
728
729      SAT_(39);
730   } else {
731      emitForm_21(i, 0x208, 0xc08);
732
733      assert(addOp != 3); // would be add-plus-one
734
735      code[1] |= addOp << 19;
736
737      if (i->flagsDef >= 0)
738         code[1] |= 1 << 18; // write carry
739      if (i->flagsSrc >= 0)
740         code[1] |= 1 << 14; // add carry
741
742      SAT_(35);
743   }
744}
745
746void
747CodeEmitterGK110::emitIMAD(const Instruction *i)
748{
749   uint8_t addOp =
750      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
751
752   emitForm_21(i, 0x100, 0xa00);
753
754   assert(addOp != 3);
755   code[1] |= addOp << 26;
756
757   if (i->sType == TYPE_S32)
758      code[1] |= (1 << 19) | (1 << 24);
759
760   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
761      code[1] |= 1 << 25;
762
763   if (i->flagsDef >= 0) code[1] |= 1 << 18;
764   if (i->flagsSrc >= 0) code[1] |= 1 << 20;
765
766   SAT_(35);
767}
768
769void
770CodeEmitterGK110::emitISAD(const Instruction *i)
771{
772   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
773
774   emitForm_21(i, 0x1f4, 0xb74);
775
776   if (i->dType == TYPE_S32)
777      code[1] |= 1 << 19;
778}
779
780void
781CodeEmitterGK110::emitSHLADD(const Instruction *i)
782{
783   uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
784   const ImmediateValue *imm = i->src(1).get()->asImm();
785   assert(imm);
786
787   if (i->src(2).getFile() == FILE_IMMEDIATE) {
788      code[0] = 0x1;
789      code[1] = 0xc0c << 20;
790   } else {
791      code[0] = 0x2;
792      code[1] = 0x20c << 20;
793   }
794   code[1] |= addOp << 19;
795
796   emitPredicate(i);
797
798   defId(i->def(0), 2);
799   srcId(i->src(0), 10);
800
801   if (i->flagsDef >= 0)
802      code[1] |= 1 << 18;
803
804   assert(!(imm->reg.data.u32 & 0xffffffe0));
805   code[1] |= imm->reg.data.u32 << 10;
806
807   switch (i->src(2).getFile()) {
808   case FILE_GPR:
809      assert(code[0] & 0x2);
810      code[1] |= 0xc << 28;
811      srcId(i->src(2), 23);
812      break;
813   case FILE_MEMORY_CONST:
814      assert(code[0] & 0x2);
815      code[1] |= 0x4 << 28;
816      setCAddress14(i->src(2));
817      break;
818   case FILE_IMMEDIATE:
819      assert(code[0] & 0x1);
820      setShortImmediate(i, 2);
821      break;
822   default:
823      assert(!"bad src2 file");
824      break;
825   }
826}
827
828void
829CodeEmitterGK110::emitNOT(const Instruction *i)
830{
831   code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
832   code[1] = 0x22003800;
833
834   emitPredicate(i);
835
836   defId(i->def(0), 2);
837
838   switch (i->src(0).getFile()) {
839   case FILE_GPR:
840      code[1] |= 0xc << 28;
841      srcId(i->src(0), 23);
842      break;
843   case FILE_MEMORY_CONST:
844      code[1] |= 0x4 << 28;
845      setCAddress14(i->src(0));
846      break;
847   default:
848      assert(0);
849      break;
850   }
851}
852
853void
854CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
855{
856   if (i->def(0).getFile() == FILE_PREDICATE) {
857      code[0] = 0x00000002 | (subOp << 27);
858      code[1] = 0x84800000;
859
860      emitPredicate(i);
861
862      defId(i->def(0), 5);
863      srcId(i->src(0), 14);
864      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
865      srcId(i->src(1), 32);
866      if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
867
868      if (i->defExists(1)) {
869         defId(i->def(1), 2);
870      } else {
871         code[0] |= 7 << 2;
872      }
873      // (a OP b) OP c
874      if (i->predSrc != 2 && i->srcExists(2)) {
875         code[1] |= subOp << 16;
876         srcId(i->src(2), 42);
877         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
878      } else {
879         code[1] |= 7 << 10;
880      }
881   } else
882   if (isLIMM(i->src(1), TYPE_S32)) {
883      emitForm_L(i, 0x200, 0, i->src(1).mod);
884      code[1] |= subOp << 24;
885      NOT_(3a, 0);
886   } else {
887      emitForm_21(i, 0x220, 0xc20);
888      code[1] |= subOp << 12;
889      NOT_(2a, 0);
890      NOT_(2b, 1);
891   }
892}
893
894void
895CodeEmitterGK110::emitPOPC(const Instruction *i)
896{
897   assert(!isLIMM(i->src(1), TYPE_S32, true));
898
899   emitForm_21(i, 0x204, 0xc04);
900
901   NOT_(2a, 0);
902   if (!(code[0] & 0x1))
903      NOT_(2b, 1);
904}
905
906void
907CodeEmitterGK110::emitINSBF(const Instruction *i)
908{
909   emitForm_21(i, 0x1f8, 0xb78);
910}
911
912void
913CodeEmitterGK110::emitEXTBF(const Instruction *i)
914{
915   emitForm_21(i, 0x600, 0xc00);
916
917   if (i->dType == TYPE_S32)
918      code[1] |= 0x80000;
919   if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
920      code[1] |= 0x800;
921}
922
923void
924CodeEmitterGK110::emitBFIND(const Instruction *i)
925{
926   emitForm_C(i, 0x218, 0x2);
927
928   if (i->dType == TYPE_S32)
929      code[1] |= 0x80000;
930   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
931      code[1] |= 0x800;
932   if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
933      code[1] |= 0x1000;
934}
935
936void
937CodeEmitterGK110::emitPERMT(const Instruction *i)
938{
939   emitForm_21(i, 0x1e0, 0xb60);
940
941   code[1] |= i->subOp << 19;
942}
943
944void
945CodeEmitterGK110::emitShift(const Instruction *i)
946{
947   if (i->op == OP_SHR) {
948      emitForm_21(i, 0x214, 0xc14);
949      if (isSignedType(i->dType))
950         code[1] |= 1 << 19;
951   } else {
952      emitForm_21(i, 0x224, 0xc24);
953   }
954
955   if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
956      code[1] |= 1 << 10;
957}
958
959void
960CodeEmitterGK110::emitShift64(const Instruction *i)
961{
962   if (i->op == OP_SHR) {
963      emitForm_21(i, 0x27c, 0xc7c);
964      if (isSignedType(i->sType))
965         code[1] |= 0x100;
966      if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)
967         code[1] |= 1 << 19;
968   } else {
969      emitForm_21(i, 0xdfc, 0xf7c);
970   }
971   code[1] |= 0x200;
972
973   if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
974      code[1] |= 1 << 21;
975}
976
977void
978CodeEmitterGK110::emitPreOp(const Instruction *i)
979{
980   emitForm_C(i, 0x248, 0x2);
981
982   if (i->op == OP_PREEX2)
983      code[1] |= 1 << 10;
984
985   NEG_(30, 0);
986   ABS_(34, 0);
987}
988
989void
990CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
991{
992   code[0] = 0x00000002 | (subOp << 23);
993   code[1] = 0x84000000;
994
995   emitPredicate(i);
996
997   defId(i->def(0), 2);
998   srcId(i->src(0), 10);
999
1000   NEG_(33, 0);
1001   ABS_(31, 0);
1002   SAT_(35);
1003}
1004
1005void
1006CodeEmitterGK110::emitMINMAX(const Instruction *i)
1007{
1008   uint32_t op2, op1;
1009
1010   switch (i->dType) {
1011   case TYPE_U32:
1012   case TYPE_S32:
1013      op2 = 0x210;
1014      op1 = 0xc10;
1015      break;
1016   case TYPE_F32:
1017      op2 = 0x230;
1018      op1 = 0xc30;
1019      break;
1020   case TYPE_F64:
1021      op2 = 0x228;
1022      op1 = 0xc28;
1023      break;
1024   default:
1025      assert(0);
1026      op2 = 0;
1027      op1 = 0;
1028      break;
1029   }
1030   emitForm_21(i, op2, op1);
1031
1032   if (i->dType == TYPE_S32)
1033      code[1] |= 1 << 19;
1034   code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
1035   code[1] |= i->subOp << 14;
1036   if (i->flagsDef >= 0)
1037      code[1] |= i->subOp << 18;
1038
1039   FTZ_(2f);
1040   ABS_(31, 0);
1041   NEG_(33, 0);
1042   if (code[0] & 0x1) {
1043      modNegAbsF32_3b(i, 1);
1044   } else {
1045      ABS_(34, 1);
1046      NEG_(30, 1);
1047   }
1048}
1049
1050void
1051CodeEmitterGK110::emitCVT(const Instruction *i)
1052{
1053   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1054   const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1055   const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1056
1057   bool sat = i->saturate;
1058   bool abs = i->src(0).mod.abs();
1059   bool neg = i->src(0).mod.neg();
1060
1061   RoundMode rnd = i->rnd;
1062
1063   switch (i->op) {
1064   case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1065   case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1066   case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1067   case OP_SAT: sat = true; break;
1068   case OP_NEG: neg = !neg; break;
1069   case OP_ABS: abs = true; neg = false; break;
1070   default:
1071      break;
1072   }
1073
1074   DataType dType;
1075
1076   if (i->op == OP_NEG && i->dType == TYPE_U32)
1077      dType = TYPE_S32;
1078   else
1079      dType = i->dType;
1080
1081
1082   uint32_t op;
1083
1084   if      (f2f) op = 0x254;
1085   else if (f2i) op = 0x258;
1086   else if (i2f) op = 0x25c;
1087   else          op = 0x260;
1088
1089   emitForm_C(i, op, 0x2);
1090
1091   FTZ_(2f);
1092   if (neg) code[1] |= 1 << 16;
1093   if (abs) code[1] |= 1 << 20;
1094   if (sat) code[1] |= 1 << 21;
1095
1096   emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1097
1098   code[0] |= typeSizeofLog2(dType) << 10;
1099   code[0] |= typeSizeofLog2(i->sType) << 12;
1100   code[1] |= i->subOp << 12;
1101
1102   if (isSignedIntType(dType))
1103      code[0] |= 0x4000;
1104   if (isSignedIntType(i->sType))
1105      code[0] |= 0x8000;
1106}
1107
1108void
1109CodeEmitterGK110::emitSET(const CmpInstruction *i)
1110{
1111   uint16_t op1, op2;
1112
1113   if (i->def(0).getFile() == FILE_PREDICATE) {
1114      switch (i->sType) {
1115      case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1116      case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1117      default:
1118         op2 = 0x1b0;
1119         op1 = 0xb30;
1120         break;
1121      }
1122      emitForm_21(i, op2, op1);
1123
1124      NEG_(2e, 0);
1125      ABS_(9, 0);
1126      if (!(code[0] & 0x1)) {
1127         NEG_(8, 1);
1128         ABS_(2f, 1);
1129      } else {
1130         modNegAbsF32_3b(i, 1);
1131      }
1132      FTZ_(32);
1133
1134      // normal DST field is negated predicate result
1135      code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1136      if (i->defExists(1))
1137         defId(i->def(1), 2);
1138      else
1139         code[0] |= 0x1c;
1140   } else {
1141      switch (i->sType) {
1142      case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1143      case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1144      default:
1145         op2 = 0x1a8;
1146         op1 = 0xb28;
1147         break;
1148      }
1149      emitForm_21(i, op2, op1);
1150
1151      NEG_(2e, 0);
1152      ABS_(39, 0);
1153      if (!(code[0] & 0x1)) {
1154         NEG_(38, 1);
1155         ABS_(2f, 1);
1156      } else {
1157         modNegAbsF32_3b(i, 1);
1158      }
1159      FTZ_(3a);
1160
1161      if (i->dType == TYPE_F32) {
1162         if (isFloatType(i->sType))
1163            code[1] |= 1 << 23;
1164         else
1165            code[1] |= 1 << 15;
1166      }
1167   }
1168   if (i->sType == TYPE_S32)
1169      code[1] |= 1 << 19;
1170
1171   if (i->op != OP_SET) {
1172      switch (i->op) {
1173      case OP_SET_AND: code[1] |= 0x0 << 16; break;
1174      case OP_SET_OR:  code[1] |= 0x1 << 16; break;
1175      case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1176      default:
1177         assert(0);
1178         break;
1179      }
1180      srcId(i->src(2), 0x2a);
1181   } else {
1182      code[1] |= 0x7 << 10;
1183   }
1184   if (i->flagsSrc >= 0)
1185      code[1] |= 1 << 14;
1186   emitCondCode(i->setCond,
1187                isFloatType(i->sType) ? 0x33 : 0x34,
1188                isFloatType(i->sType) ? 0xf : 0x7);
1189}
1190
1191void
1192CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1193{
1194   CondCode cc = i->setCond;
1195   if (i->src(2).mod.neg())
1196      cc = reverseCondCode(cc);
1197
1198   if (i->dType == TYPE_F32) {
1199      emitForm_21(i, 0x1d0, 0xb50);
1200      FTZ_(32);
1201      emitCondCode(cc, 0x33, 0xf);
1202   } else {
1203      emitForm_21(i, 0x1a0, 0xb20);
1204      emitCondCode(cc, 0x34, 0x7);
1205      if (i->dType == TYPE_S32)
1206         code[1] |= 1 << 19;
1207   }
1208}
1209
1210void
1211gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1212{
1213   int loc = entry->loc;
1214   bool val = false;
1215   switch (entry->ipa) {
1216   case 0:
1217      val = data.force_persample_interp;
1218      break;
1219   case 1:
1220      val = data.msaa;
1221      break;
1222   }
1223   if (val)
1224      code[loc + 1] |= 1 << 13;
1225   else
1226      code[loc + 1] &= ~(1 << 13);
1227}
1228
1229void CodeEmitterGK110::emitSELP(const Instruction *i)
1230{
1231   emitForm_21(i, 0x250, 0x050);
1232
1233   if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1234      code[1] |= 1 << 13;
1235
1236   if (i->subOp >= 1) {
1237      addInterp(i->subOp - 1, 0, gk110_selpFlip);
1238   }
1239}
1240
1241void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1242{
1243   code[0] = 0x0000003e | (i->subOp << 23);
1244   code[1] = 0x77000000;
1245
1246   emitPredicate(i);
1247}
1248
1249void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1250{
1251   code[0] = 0x00000002;
1252   code[1] = 0x76c00000;
1253
1254   code[1] |= i->tex.r << 9;
1255   // code[1] |= i->tex.s << (9 + 8);
1256
1257   if (i->tex.liveOnly)
1258      code[0] |= 0x80000000;
1259
1260   defId(i->def(0), 2);
1261   srcId(i->src(0), 10);
1262}
1263
1264static inline bool
1265isNextIndependentTex(const TexInstruction *i)
1266{
1267   if (!i->next || !isTextureOp(i->next->op))
1268      return false;
1269   if (i->getDef(0)->interfers(i->next->getSrc(0)))
1270      return false;
1271   return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1272}
1273
1274void
1275CodeEmitterGK110::emitTEX(const TexInstruction *i)
1276{
1277   const bool ind = i->tex.rIndirectSrc >= 0;
1278
1279   if (ind) {
1280      code[0] = 0x00000002;
1281      switch (i->op) {
1282      case OP_TXD:
1283         code[1] = 0x7e000000;
1284         break;
1285      case OP_TXLQ:
1286         code[1] = 0x7e800000;
1287         break;
1288      case OP_TXF:
1289         code[1] = 0x78000000;
1290         break;
1291      case OP_TXG:
1292         code[1] = 0x7dc00000;
1293         break;
1294      default:
1295         code[1] = 0x7d800000;
1296         break;
1297      }
1298   } else {
1299      switch (i->op) {
1300      case OP_TXD:
1301         code[0] = 0x00000002;
1302         code[1] = 0x76000000;
1303         code[1] |= i->tex.r << 9;
1304         break;
1305      case OP_TXLQ:
1306         code[0] = 0x00000002;
1307         code[1] = 0x76800000;
1308         code[1] |= i->tex.r << 9;
1309         break;
1310      case OP_TXF:
1311         code[0] = 0x00000002;
1312         code[1] = 0x70000000;
1313         code[1] |= i->tex.r << 13;
1314         break;
1315      case OP_TXG:
1316         code[0] = 0x00000001;
1317         code[1] = 0x70000000;
1318         code[1] |= i->tex.r << 15;
1319         break;
1320      default:
1321         code[0] = 0x00000001;
1322         code[1] = 0x60000000;
1323         code[1] |= i->tex.r << 15;
1324         break;
1325      }
1326   }
1327
1328   code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1329
1330   if (i->tex.liveOnly)
1331      code[0] |= 0x80000000;
1332
1333   switch (i->op) {
1334   case OP_TEX: break;
1335   case OP_TXB: code[1] |= 0x2000; break;
1336   case OP_TXL: code[1] |= 0x3000; break;
1337   case OP_TXF: break;
1338   case OP_TXG: break;
1339   case OP_TXD: break;
1340   case OP_TXLQ: break;
1341   default:
1342      assert(!"invalid texture op");
1343      break;
1344   }
1345
1346   if (i->op == OP_TXF) {
1347      if (!i->tex.levelZero)
1348         code[1] |= 0x1000;
1349   } else
1350   if (i->tex.levelZero) {
1351      code[1] |= 0x1000;
1352   }
1353
1354   if (i->op != OP_TXD && i->tex.derivAll)
1355      code[1] |= 0x200;
1356
1357   emitPredicate(i);
1358
1359   code[1] |= i->tex.mask << 2;
1360
1361   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1362
1363   defId(i->def(0), 2);
1364   srcId(i->src(0), 10);
1365   srcId(i, src1, 23);
1366
1367   if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1368
1369   // texture target:
1370   code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1371   if (i->tex.target.isArray())
1372      code[1] |= 0x40;
1373   if (i->tex.target.isShadow())
1374      code[1] |= 0x400;
1375   if (i->tex.target == TEX_TARGET_2D_MS ||
1376       i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1377      code[1] |= 0x800;
1378
1379   if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1380      // ?
1381   }
1382
1383   if (i->tex.useOffsets == 1) {
1384      switch (i->op) {
1385      case OP_TXF: code[1] |= 0x200; break;
1386      case OP_TXD: code[1] |= 0x00400000; break;
1387      default: code[1] |= 0x800; break;
1388      }
1389   }
1390   if (i->tex.useOffsets == 4)
1391      code[1] |= 0x1000;
1392}
1393
1394void
1395CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1396{
1397   code[0] = 0x00000002;
1398   code[1] = 0x75400001;
1399
1400   switch (i->tex.query) {
1401   case TXQ_DIMS:            code[0] |= 0x01 << 25; break;
1402   case TXQ_TYPE:            code[0] |= 0x02 << 25; break;
1403   case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1404   case TXQ_FILTER:          code[0] |= 0x10 << 25; break;
1405   case TXQ_LOD:             code[0] |= 0x12 << 25; break;
1406   case TXQ_BORDER_COLOUR:   code[0] |= 0x16 << 25; break;
1407   default:
1408      assert(!"invalid texture query");
1409      break;
1410   }
1411
1412   code[1] |= i->tex.mask << 2;
1413   code[1] |= i->tex.r << 9;
1414   if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1415      code[1] |= 0x08000000;
1416
1417   defId(i->def(0), 2);
1418   srcId(i->src(0), 10);
1419
1420   emitPredicate(i);
1421}
1422
1423void
1424CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1425{
1426   code[0] = 0x00000002 | ((qOp & 1) << 31);
1427   code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1428
1429   defId(i->def(0), 2);
1430   srcId(i->src(0), 10);
1431   srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1432
1433   emitPredicate(i);
1434}
1435
1436void
1437CodeEmitterGK110::emitPIXLD(const Instruction *i)
1438{
1439   emitForm_L(i, 0x7f4, 2, Modifier(0));
1440   code[1] |= i->subOp << 2;
1441   code[1] |= 0x00070000;
1442}
1443
1444void
1445CodeEmitterGK110::emitBAR(const Instruction *i)
1446{
1447   code[0] = 0x00000002;
1448   code[1] = 0x85400000;
1449
1450   switch (i->subOp) {
1451   case NV50_IR_SUBOP_BAR_ARRIVE:   code[1] |= 0x08; break;
1452   case NV50_IR_SUBOP_BAR_RED_AND:  code[1] |= 0x50; break;
1453   case NV50_IR_SUBOP_BAR_RED_OR:   code[1] |= 0x90; break;
1454   case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1455   default:
1456      assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1457      break;
1458   }
1459
1460   emitPredicate(i);
1461
1462   // barrier id
1463   if (i->src(0).getFile() == FILE_GPR) {
1464      srcId(i->src(0), 10);
1465   } else {
1466      ImmediateValue *imm = i->getSrc(0)->asImm();
1467      assert(imm);
1468      code[0] |= imm->reg.data.u32 << 10;
1469      code[1] |= 0x8000;
1470   }
1471
1472   // thread count
1473   if (i->src(1).getFile() == FILE_GPR) {
1474      srcId(i->src(1), 23);
1475   } else {
1476      ImmediateValue *imm = i->getSrc(0)->asImm();
1477      assert(imm);
1478      assert(imm->reg.data.u32 <= 0xfff);
1479      code[0] |= imm->reg.data.u32 << 23;
1480      code[1] |= imm->reg.data.u32 >> 9;
1481      code[1] |= 0x4000;
1482   }
1483
1484   if (i->srcExists(2) && (i->predSrc != 2)) {
1485      srcId(i->src(2), 32 + 10);
1486      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1487         code[1] |= 1 << 13;
1488   } else {
1489      code[1] |= 7 << 10;
1490   }
1491}
1492
1493void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1494{
1495   code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1496   code[1] = 0x7cc00000;
1497
1498   emitPredicate(i);
1499}
1500
1501void
1502CodeEmitterGK110::emitFlow(const Instruction *i)
1503{
1504   const FlowInstruction *f = i->asFlow();
1505
1506   unsigned mask; // bit 0: predicate, bit 1: target
1507
1508   code[0] = 0x00000000;
1509
1510   switch (i->op) {
1511   case OP_BRA:
1512      code[1] = f->absolute ? 0x10800000 : 0x12000000;
1513      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1514         code[0] |= 0x80;
1515      mask = 3;
1516      break;
1517   case OP_CALL:
1518      code[1] = f->absolute ? 0x11000000 : 0x13000000;
1519      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1520         code[0] |= 0x80;
1521      mask = 2;
1522      break;
1523
1524   case OP_EXIT:    code[1] = 0x18000000; mask = 1; break;
1525   case OP_RET:     code[1] = 0x19000000; mask = 1; break;
1526   case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1527   case OP_BREAK:   code[1] = 0x1a000000; mask = 1; break;
1528   case OP_CONT:    code[1] = 0x1a800000; mask = 1; break;
1529
1530   case OP_JOINAT:   code[1] = 0x14800000; mask = 2; break;
1531   case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1532   case OP_PRECONT:  code[1] = 0x15800000; mask = 2; break;
1533   case OP_PRERET:   code[1] = 0x13800000; mask = 2; break;
1534
1535   case OP_QUADON:  code[1] = 0x1b800000; mask = 0; break;
1536   case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1537   case OP_BRKPT:   code[1] = 0x00000000; mask = 0; break;
1538   default:
1539      assert(!"invalid flow operation");
1540      return;
1541   }
1542
1543   if (mask & 1) {
1544      emitPredicate(i);
1545      if (i->flagsSrc < 0)
1546         code[0] |= 0x3c;
1547   }
1548
1549   if (!f)
1550      return;
1551
1552   if (f->allWarp)
1553      code[0] |= 1 << 9;
1554   if (f->limit)
1555      code[0] |= 1 << 8;
1556
1557   if (f->op == OP_CALL) {
1558      if (f->builtin) {
1559         assert(f->absolute);
1560         uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1561         addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1562         addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1563      } else {
1564         assert(!f->absolute);
1565         int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1566         code[0] |= (pcRel & 0x1ff) << 23;
1567         code[1] |= (pcRel >> 9) & 0x7fff;
1568      }
1569   } else
1570   if (mask & 2) {
1571      int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1572      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1573         pcRel += 8;
1574      // currently we don't want absolute branches
1575      assert(!f->absolute);
1576      code[0] |= (pcRel & 0x1ff) << 23;
1577      code[1] |= (pcRel >> 9) & 0x7fff;
1578   }
1579}
1580
1581void
1582CodeEmitterGK110::emitSHFL(const Instruction *i)
1583{
1584   const ImmediateValue *imm;
1585
1586   code[0] = 0x00000002;
1587   code[1] = 0x78800000 | (i->subOp << 1);
1588
1589   emitPredicate(i);
1590
1591   defId(i->def(0), 2);
1592   srcId(i->src(0), 10);
1593
1594   switch (i->src(1).getFile()) {
1595   case FILE_GPR:
1596      srcId(i->src(1), 23);
1597      break;
1598   case FILE_IMMEDIATE:
1599      imm = i->getSrc(1)->asImm();
1600      assert(imm && imm->reg.data.u32 < 0x20);
1601      code[0] |= imm->reg.data.u32 << 23;
1602      code[0] |= 1 << 31;
1603      break;
1604   default:
1605      assert(!"invalid src1 file");
1606      break;
1607   }
1608
1609   switch (i->src(2).getFile()) {
1610   case FILE_GPR:
1611      srcId(i->src(2), 42);
1612      break;
1613   case FILE_IMMEDIATE:
1614      imm = i->getSrc(2)->asImm();
1615      assert(imm && imm->reg.data.u32 < 0x2000);
1616      code[1] |= imm->reg.data.u32 << 5;
1617      code[1] |= 1;
1618      break;
1619   default:
1620      assert(!"invalid src2 file");
1621      break;
1622   }
1623
1624   if (!i->defExists(1))
1625      code[1] |= 7 << 19;
1626   else {
1627      assert(i->def(1).getFile() == FILE_PREDICATE);
1628      defId(i->def(1), 51);
1629   }
1630}
1631
1632void
1633CodeEmitterGK110::emitVOTE(const Instruction *i)
1634{
1635   const ImmediateValue *imm;
1636   uint32_t u32;
1637
1638   code[0] = 0x00000002;
1639   code[1] = 0x86c00000 | (i->subOp << 19);
1640
1641   emitPredicate(i);
1642
1643   unsigned rp = 0;
1644   for (int d = 0; i->defExists(d); d++) {
1645      if (i->def(d).getFile() == FILE_PREDICATE) {
1646         assert(!(rp & 2));
1647         rp |= 2;
1648         defId(i->def(d), 48);
1649      } else if (i->def(d).getFile() == FILE_GPR) {
1650         assert(!(rp & 1));
1651         rp |= 1;
1652         defId(i->def(d), 2);
1653      } else {
1654         assert(!"Unhandled def");
1655      }
1656   }
1657   if (!(rp & 1))
1658      code[0] |= 255 << 2;
1659   if (!(rp & 2))
1660      code[1] |= 7 << 16;
1661
1662   switch (i->src(0).getFile()) {
1663   case FILE_PREDICATE:
1664      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1665         code[0] |= 1 << 13;
1666      srcId(i->src(0), 42);
1667      break;
1668   case FILE_IMMEDIATE:
1669      imm = i->getSrc(0)->asImm();
1670      assert(imm);
1671      u32 = imm->reg.data.u32;
1672      assert(u32 == 0 || u32 == 1);
1673      code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10;
1674      break;
1675   default:
1676      assert(!"Unhandled src");
1677      break;
1678   }
1679}
1680
1681void
1682CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1683{
1684   uint8_t n = 0;
1685
1686   switch (ty) {
1687   case TYPE_S32: n = 1; break;
1688   case TYPE_U8:  n = 2; break;
1689   case TYPE_S8:  n = 3; break;
1690   default:
1691      assert(ty == TYPE_U32);
1692      break;
1693   }
1694   code[pos / 32] |= n << (pos % 32);
1695}
1696
1697void
1698CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1699{
1700   uint8_t n = 0;
1701
1702   switch (c) {
1703   case CACHE_CA:
1704// case CACHE_WB:
1705      n = 0;
1706      break;
1707   case CACHE_CG:
1708      n = 1;
1709      break;
1710   case CACHE_CS:
1711      n = 2;
1712      break;
1713   case CACHE_CV:
1714// case CACHE_WT:
1715      n = 3;
1716      break;
1717   default:
1718      assert(!"invalid caching mode");
1719      break;
1720   }
1721   code[0] |= (n & 1) << 31;
1722   code[1] |= (n & 2) >> 1;
1723}
1724
1725void
1726CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1727{
1728   const uint32_t offset = i->getSrc(s)->reg.data.offset;
1729
1730   assert(offset == (offset & 0xfffc));
1731
1732   code[0] |= offset << 21;
1733   code[1] |= offset >> 11;
1734   code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1735}
1736
1737void
1738CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1739{
1740   code[0] = 0x00000002;
1741   code[1] = 0x30000000 | (i->subOp << 14);
1742
1743   if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1744      emitLoadStoreType(i->dType, 0x38);
1745      emitCachingMode(i->cache, 0x36);
1746
1747      // format
1748      setSUConst16(i, 1);
1749   } else {
1750      assert(i->src(1).getFile() == FILE_GPR);
1751      code[1] |= 0x49800000;
1752
1753      emitLoadStoreType(i->dType, 0x21);
1754      emitSUCachingMode(i->cache);
1755
1756      srcId(i->src(1), 23);
1757   }
1758
1759   emitSUGType(i->sType, 0x34);
1760
1761   emitPredicate(i);
1762   defId(i->def(0), 2); // destination
1763   srcId(i->src(0), 10); // address
1764
1765   // surface predicate
1766   if (!i->srcExists(2) || (i->predSrc == 2)) {
1767      code[1] |= 0x7 << 10;
1768   } else {
1769      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1770         code[1] |= 1 << 13;
1771      srcId(i->src(2), 32 + 10);
1772   }
1773}
1774
1775void
1776CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1777{
1778   assert(i->op == OP_SUSTP);
1779
1780   code[0] = 0x00000002;
1781   code[1] = 0x38000000;
1782
1783   if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1784      code[0] |= i->subOp << 2;
1785
1786      if (i->op == OP_SUSTP)
1787         code[0] |= i->tex.mask << 4;
1788
1789      emitSUGType(i->sType, 0x8);
1790      emitCachingMode(i->cache, 0x36);
1791
1792      // format
1793      setSUConst16(i, 1);
1794   } else {
1795      assert(i->src(1).getFile() == FILE_GPR);
1796
1797      code[0] |= i->subOp << 23;
1798      code[1] |= 0x41c00000;
1799
1800      if (i->op == OP_SUSTP)
1801         code[0] |= i->tex.mask << 25;
1802
1803      emitSUGType(i->sType, 0x1d);
1804      emitSUCachingMode(i->cache);
1805
1806      srcId(i->src(1), 2);
1807   }
1808
1809   emitPredicate(i);
1810   srcId(i->src(0), 10); // address
1811   srcId(i->src(3), 42); // values
1812
1813   // surface predicate
1814   if (!i->srcExists(2) || (i->predSrc == 2)) {
1815      code[1] |= 0x7 << 18;
1816   } else {
1817      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1818         code[1] |= 1 << 21;
1819      srcId(i->src(2), 32 + 18);
1820   }
1821}
1822
1823void
1824CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1825{
1826   uint8_t m;
1827   switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1828   case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1829   case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1830   case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1831   case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1832   case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1833   case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1834   case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1835   case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1836   case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1837   case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1838   case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1839   case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1840   case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1841   case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1842   case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1843   default:
1844      return;
1845   }
1846   code[1] |= m << 20;
1847   if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1848      code[1] |= 1 << 24;
1849}
1850
1851void
1852CodeEmitterGK110::emitSUCalc(Instruction *i)
1853{
1854   ImmediateValue *imm = NULL;
1855   uint64_t opc1, opc2;
1856
1857   if (i->srcExists(2)) {
1858      imm = i->getSrc(2)->asImm();
1859      if (imm)
1860         i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1861   }
1862
1863   switch (i->op) {
1864   case OP_SUCLAMP:  opc1 = 0xb00; opc2 = 0x580; break;
1865   case OP_SUBFM:    opc1 = 0xb68; opc2 = 0x1e8; break;
1866   case OP_SUEAU:    opc1 = 0xb6c; opc2 = 0x1ec; break;
1867   default:
1868      assert(0);
1869      return;
1870   }
1871   emitForm_21(i, opc2, opc1);
1872
1873   if (i->op == OP_SUCLAMP) {
1874      if (i->dType == TYPE_S32)
1875         code[1] |= 1 << 19;
1876      emitSUCLAMPMode(i->subOp);
1877   }
1878
1879   if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1880      code[1] |= 1 << 18;
1881
1882   if (i->op != OP_SUEAU) {
1883      const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1884      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1885         code[0] |= 255 << 2;
1886         code[1] |= i->getDef(1)->reg.data.id << pos;
1887      } else
1888      if (i->defExists(1)) { // r, p
1889         assert(i->def(1).getFile() == FILE_PREDICATE);
1890         code[1] |= i->getDef(1)->reg.data.id << pos;
1891      } else { // r, #
1892         code[1] |= 7 << pos;
1893      }
1894   }
1895
1896   if (imm) {
1897      assert(i->op == OP_SUCLAMP);
1898      i->setSrc(2, imm);
1899      code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1900   }
1901}
1902
1903
1904void
1905CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1906{
1907   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1908   case 0:
1909      code[1] |= (i->subOp & 0x000f) << 7;  // vsrc1
1910      code[1] |= (i->subOp & 0x00e0) >> 6;  // vsrc2
1911      code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1912      code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1913      break;
1914   default:
1915      assert(0);
1916      break;
1917   }
1918}
1919
1920void
1921CodeEmitterGK110::emitVSHL(const Instruction *i)
1922{
1923   code[0] = 0x00000002;
1924   code[1] = 0xb8000000;
1925
1926   assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1927
1928   if (isSignedType(i->dType)) code[1] |= 1 << 25;
1929   if (isSignedType(i->sType)) code[1] |= 1 << 19;
1930
1931   emitVectorSubOp(i);
1932
1933   emitPredicate(i);
1934   defId(i->def(0), 2);
1935   srcId(i->src(0), 10);
1936
1937   if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1938      ImmediateValue *imm = i->getSrc(1)->asImm();
1939      assert(imm);
1940      code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1941      code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1942   } else {
1943      assert(i->getSrc(1)->reg.file == FILE_GPR);
1944      code[1] |= 1 << 21;
1945      srcId(i->src(1), 23);
1946   }
1947   srcId(i->src(2), 42);
1948
1949   if (i->saturate)
1950      code[0] |= 1 << 22;
1951   if (i->flagsDef >= 0)
1952      code[1] |= 1 << 18;
1953}
1954
1955void
1956CodeEmitterGK110::emitAFETCH(const Instruction *i)
1957{
1958   uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1959
1960   code[0] = 0x00000002 | (offset << 23);
1961   code[1] = 0x7d000000 | (offset >> 9);
1962
1963   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1964      code[1] |= 0x8;
1965
1966   emitPredicate(i);
1967
1968   defId(i->def(0), 2);
1969   srcId(i->src(0).getIndirect(0), 10);
1970}
1971
1972void
1973CodeEmitterGK110::emitPFETCH(const Instruction *i)
1974{
1975   uint32_t prim = i->src(0).get()->reg.data.u32;
1976
1977   code[0] = 0x00000002 | ((prim & 0xff) << 23);
1978   code[1] = 0x7f800000;
1979
1980   emitPredicate(i);
1981
1982   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1983
1984   defId(i->def(0), 2);
1985   srcId(i, src1, 10);
1986}
1987
1988void
1989CodeEmitterGK110::emitVFETCH(const Instruction *i)
1990{
1991   unsigned int size = typeSizeof(i->dType);
1992   uint32_t offset = i->src(0).get()->reg.data.offset;
1993
1994   code[0] = 0x00000002 | (offset << 23);
1995   code[1] = 0x7ec00000 | (offset >> 9);
1996   code[1] |= (size / 4 - 1) << 18;
1997
1998   if (i->perPatch)
1999      code[1] |= 0x4;
2000   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
2001      code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
2002
2003   emitPredicate(i);
2004
2005   defId(i->def(0), 2);
2006   srcId(i->src(0).getIndirect(0), 10);
2007   srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
2008}
2009
2010void
2011CodeEmitterGK110::emitEXPORT(const Instruction *i)
2012{
2013   unsigned int size = typeSizeof(i->dType);
2014   uint32_t offset = i->src(0).get()->reg.data.offset;
2015
2016   code[0] = 0x00000002 | (offset << 23);
2017   code[1] = 0x7f000000 | (offset >> 9);
2018   code[1] |= (size / 4 - 1) << 18;
2019
2020   if (i->perPatch)
2021      code[1] |= 0x4;
2022
2023   emitPredicate(i);
2024
2025   assert(i->src(1).getFile() == FILE_GPR);
2026
2027   srcId(i->src(0).getIndirect(0), 10);
2028   srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
2029   srcId(i->src(1), 2);
2030}
2031
2032void
2033CodeEmitterGK110::emitOUT(const Instruction *i)
2034{
2035   assert(i->src(0).getFile() == FILE_GPR);
2036
2037   emitForm_21(i, 0x1f0, 0xb70);
2038
2039   if (i->op == OP_EMIT)
2040      code[1] |= 1 << 10;
2041   if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
2042      code[1] |= 1 << 11;
2043}
2044
2045void
2046CodeEmitterGK110::emitInterpMode(const Instruction *i)
2047{
2048   code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
2049   code[1] |= (i->ipa & 0xc) << (19 - 2);
2050}
2051
2052void
2053gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data)
2054{
2055   int ipa = entry->ipa;
2056   int reg = entry->reg;
2057   int loc = entry->loc;
2058
2059   if (data.flatshade &&
2060       (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2061      ipa = NV50_IR_INTERP_FLAT;
2062      reg = 0xff;
2063   } else if (data.force_persample_interp &&
2064              (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2065              (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2066      ipa |= NV50_IR_INTERP_CENTROID;
2067   }
2068   code[loc + 1] &= ~(0xf << 19);
2069   code[loc + 1] |= (ipa & 0x3) << 21;
2070   code[loc + 1] |= (ipa & 0xc) << (19 - 2);
2071   code[loc + 0] &= ~(0xff << 23);
2072   code[loc + 0] |= reg << 23;
2073}
2074
2075void
2076CodeEmitterGK110::emitINTERP(const Instruction *i)
2077{
2078   const uint32_t base = i->getSrc(0)->reg.data.offset;
2079
2080   code[0] = 0x00000002 | (base << 31);
2081   code[1] = 0x74800000 | (base >> 1);
2082
2083   if (i->saturate)
2084      code[1] |= 1 << 18;
2085
2086   if (i->op == OP_PINTERP) {
2087      srcId(i->src(1), 23);
2088      addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
2089   } else {
2090      code[0] |= 0xff << 23;
2091      addInterp(i->ipa, 0xff, gk110_interpApply);
2092   }
2093
2094   srcId(i->src(0).getIndirect(0), 10);
2095   emitInterpMode(i);
2096
2097   emitPredicate(i);
2098   defId(i->def(0), 2);
2099
2100   if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
2101      srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
2102   else
2103      code[1] |= 0xff << 10;
2104}
2105
2106void
2107CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
2108{
2109   uint8_t n;
2110
2111   switch (ty) {
2112   case TYPE_U8:
2113      n = 0;
2114      break;
2115   case TYPE_S8:
2116      n = 1;
2117      break;
2118   case TYPE_U16:
2119      n = 2;
2120      break;
2121   case TYPE_S16:
2122      n = 3;
2123      break;
2124   case TYPE_F32:
2125   case TYPE_U32:
2126   case TYPE_S32:
2127      n = 4;
2128      break;
2129   case TYPE_F64:
2130   case TYPE_U64:
2131   case TYPE_S64:
2132      n = 5;
2133      break;
2134   case TYPE_B128:
2135      n = 6;
2136      break;
2137   default:
2138      n = 0;
2139      assert(!"invalid ld/st type");
2140      break;
2141   }
2142   code[pos / 32] |= n << (pos % 32);
2143}
2144
2145void
2146CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2147{
2148   uint8_t n;
2149
2150   switch (c) {
2151   case CACHE_CA:
2152// case CACHE_WB:
2153      n = 0;
2154      break;
2155   case CACHE_CG:
2156      n = 1;
2157      break;
2158   case CACHE_CS:
2159      n = 2;
2160      break;
2161   case CACHE_CV:
2162// case CACHE_WT:
2163      n = 3;
2164      break;
2165   default:
2166      n = 0;
2167      assert(!"invalid caching mode");
2168      break;
2169   }
2170   code[pos / 32] |= n << (pos % 32);
2171}
2172
2173void
2174CodeEmitterGK110::emitSTORE(const Instruction *i)
2175{
2176   int32_t offset = SDATA(i->src(0)).offset;
2177
2178   switch (i->src(0).getFile()) {
2179   case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2180   case FILE_MEMORY_LOCAL:  code[1] = 0x7a800000; code[0] = 0x00000002; break;
2181   case FILE_MEMORY_SHARED:
2182      code[0] = 0x00000002;
2183      if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2184         code[1] = 0x78400000;
2185      else
2186         code[1] = 0x7ac00000;
2187      break;
2188   default:
2189      assert(!"invalid memory file");
2190      break;
2191   }
2192
2193   if (code[0] & 0x2) {
2194      offset &= 0xffffff;
2195      emitLoadStoreType(i->dType, 0x33);
2196      if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2197         emitCachingMode(i->cache, 0x2f);
2198   } else {
2199      emitLoadStoreType(i->dType, 0x38);
2200      emitCachingMode(i->cache, 0x3b);
2201   }
2202   code[0] |= offset << 23;
2203   code[1] |= offset >> 9;
2204
2205   // Unlocked store on shared memory can fail.
2206   if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2207       i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2208      assert(i->defExists(0));
2209      defId(i->def(0), 32 + 16);
2210   }
2211
2212   emitPredicate(i);
2213
2214   srcId(i->src(1), 2);
2215   srcId(i->src(0).getIndirect(0), 10);
2216   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2217       i->src(0).isIndirect(0) &&
2218       i->getIndirect(0, 0)->reg.size == 8)
2219      code[1] |= 1 << 23;
2220}
2221
2222void
2223CodeEmitterGK110::emitLOAD(const Instruction *i)
2224{
2225   int32_t offset = SDATA(i->src(0)).offset;
2226
2227   switch (i->src(0).getFile()) {
2228   case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2229   case FILE_MEMORY_LOCAL:  code[1] = 0x7a000000; code[0] = 0x00000002; break;
2230   case FILE_MEMORY_SHARED:
2231      code[0] = 0x00000002;
2232      if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2233         code[1] = 0x77400000;
2234      else
2235         code[1] = 0x7a400000;
2236      break;
2237   case FILE_MEMORY_CONST:
2238      if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2239         emitMOV(i);
2240         return;
2241      }
2242      offset &= 0xffff;
2243      code[0] = 0x00000002;
2244      code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2245      code[1] |= i->subOp << 15;
2246      break;
2247   default:
2248      assert(!"invalid memory file");
2249      break;
2250   }
2251
2252   if (code[0] & 0x2) {
2253      offset &= 0xffffff;
2254      emitLoadStoreType(i->dType, 0x33);
2255      if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2256         emitCachingMode(i->cache, 0x2f);
2257   } else {
2258      emitLoadStoreType(i->dType, 0x38);
2259      emitCachingMode(i->cache, 0x3b);
2260   }
2261   code[0] |= offset << 23;
2262   code[1] |= offset >> 9;
2263
2264   // Locked store on shared memory can fail.
2265   int r = 0, p = -1;
2266   if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2267       i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2268      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2269         r = -1;
2270         p = 0;
2271      } else if (i->defExists(1)) { // r, p
2272         p = 1;
2273      } else {
2274         assert(!"Expected predicate dest for load locked");
2275      }
2276   }
2277
2278   emitPredicate(i);
2279
2280   if (r >= 0)
2281      defId(i->def(r), 2);
2282   else
2283      code[0] |= 255 << 2;
2284
2285   if (p >= 0)
2286      defId(i->def(p), 32 + 16);
2287
2288   if (i->getIndirect(0, 0)) {
2289      srcId(i->src(0).getIndirect(0), 10);
2290      if (i->getIndirect(0, 0)->reg.size == 8)
2291         code[1] |= 1 << 23;
2292   } else {
2293      code[0] |= 255 << 10;
2294   }
2295}
2296
2297uint8_t
2298CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2299{
2300   switch (SDATA(ref).sv.sv) {
2301   case SV_LANEID:        return 0x00;
2302   case SV_PHYSID:        return 0x03;
2303   case SV_VERTEX_COUNT:  return 0x10;
2304   case SV_INVOCATION_ID: return 0x11;
2305   case SV_YDIR:          return 0x12;
2306   case SV_THREAD_KILL:   return 0x13;
2307   case SV_COMBINED_TID:  return 0x20;
2308   case SV_TID:           return 0x21 + SDATA(ref).sv.index;
2309   case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
2310   case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
2311   case SV_GRIDID:        return 0x2c;
2312   case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
2313   case SV_LBASE:         return 0x34;
2314   case SV_SBASE:         return 0x30;
2315   case SV_LANEMASK_EQ:   return 0x38;
2316   case SV_LANEMASK_LT:   return 0x39;
2317   case SV_LANEMASK_LE:   return 0x3a;
2318   case SV_LANEMASK_GT:   return 0x3b;
2319   case SV_LANEMASK_GE:   return 0x3c;
2320   case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2321   default:
2322      assert(!"no sreg for system value");
2323      return 0;
2324   }
2325}
2326
2327void
2328CodeEmitterGK110::emitMOV(const Instruction *i)
2329{
2330   if (i->def(0).getFile() == FILE_PREDICATE) {
2331      if (i->src(0).getFile() == FILE_GPR) {
2332         // Use ISETP.NE.AND dst, PT, src, RZ, PT
2333         code[0] = 0x00000002;
2334         code[1] = 0xdb500000;
2335
2336         code[0] |= 0x7 << 2;
2337         code[0] |= 0xff << 23;
2338         code[1] |= 0x7 << 10;
2339         srcId(i->src(0), 10);
2340      } else
2341      if (i->src(0).getFile() == FILE_PREDICATE) {
2342         // Use PSETP.AND.AND dst, PT, src, PT, PT
2343         code[0] = 0x00000002;
2344         code[1] = 0x84800000;
2345
2346         code[0] |= 0x7 << 2;
2347         code[1] |= 0x7 << 0;
2348         code[1] |= 0x7 << 10;
2349
2350         srcId(i->src(0), 14);
2351      } else {
2352         assert(!"Unexpected source for predicate destination");
2353         emitNOP(i);
2354      }
2355      emitPredicate(i);
2356      defId(i->def(0), 5);
2357   } else
2358   if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2359      code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2360      code[1] = 0x86400000;
2361      emitPredicate(i);
2362      defId(i->def(0), 2);
2363   } else
2364   if (i->src(0).getFile() == FILE_IMMEDIATE) {
2365      code[0] = 0x00000002 | (i->lanes << 14);
2366      code[1] = 0x74000000;
2367      emitPredicate(i);
2368      defId(i->def(0), 2);
2369      setImmediate32(i, 0, Modifier(0));
2370   } else
2371   if (i->src(0).getFile() == FILE_PREDICATE) {
2372      code[0] = 0x00000002;
2373      code[1] = 0x84401c07;
2374      emitPredicate(i);
2375      defId(i->def(0), 2);
2376      srcId(i->src(0), 14);
2377   } else {
2378      emitForm_C(i, 0x24c, 2);
2379      code[1] |= i->lanes << 10;
2380   }
2381}
2382
2383static inline bool
2384uses64bitAddress(const Instruction *ldst)
2385{
2386   return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2387      ldst->src(0).isIndirect(0) &&
2388      ldst->getIndirect(0, 0)->reg.size == 8;
2389}
2390
2391void
2392CodeEmitterGK110::emitATOM(const Instruction *i)
2393{
2394   const bool hasDst = i->defExists(0);
2395   const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2396
2397   code[0] = 0x00000002;
2398   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2399      code[1] = 0x77800000;
2400   else
2401      code[1] = 0x68000000;
2402
2403   switch (i->subOp) {
2404   case NV50_IR_SUBOP_ATOM_CAS: break;
2405   case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2406   default: code[1] |= i->subOp << 23; break;
2407   }
2408
2409   switch (i->dType) {
2410   case TYPE_U32: break;
2411   case TYPE_S32: code[1] |= 0x00100000; break;
2412   case TYPE_U64: code[1] |= 0x00200000; break;
2413   case TYPE_F32: code[1] |= 0x00300000; break;
2414   case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2415   case TYPE_S64: code[1] |= 0x00500000; break;
2416   default: assert(!"unsupported type"); break;
2417   }
2418
2419   emitPredicate(i);
2420
2421   /* TODO: cas: check that src regs line up */
2422   /* TODO: cas: flip bits if $r255 is used */
2423   srcId(i->src(1), 23);
2424
2425   if (hasDst) {
2426      defId(i->def(0), 2);
2427   } else
2428   if (!exch) {
2429      code[0] |= 255 << 2;
2430   }
2431
2432   if (hasDst || !exch) {
2433      const int32_t offset = SDATA(i->src(0)).offset;
2434      assert(offset < 0x80000 && offset >= -0x80000);
2435      code[0] |= (offset & 1) << 31;
2436      code[1] |= (offset & 0xffffe) >> 1;
2437   } else {
2438      srcAddr32(i->src(0), 31);
2439   }
2440
2441   if (i->getIndirect(0, 0)) {
2442      srcId(i->getIndirect(0, 0), 10);
2443      if (i->getIndirect(0, 0)->reg.size == 8)
2444         code[1] |= 1 << 19;
2445   } else {
2446      code[0] |= 255 << 10;
2447   }
2448}
2449
2450void
2451CodeEmitterGK110::emitCCTL(const Instruction *i)
2452{
2453   int32_t offset = SDATA(i->src(0)).offset;
2454
2455   code[0] = 0x00000002 | (i->subOp << 2);
2456
2457   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2458      code[1] = 0x7b000000;
2459   } else {
2460      code[1] = 0x7c000000;
2461      offset &= 0xffffff;
2462   }
2463   code[0] |= offset << 23;
2464   code[1] |= offset >> 9;
2465
2466   if (uses64bitAddress(i))
2467      code[1] |= 1 << 23;
2468   srcId(i->src(0).getIndirect(0), 10);
2469
2470   emitPredicate(i);
2471}
2472
2473bool
2474CodeEmitterGK110::emitInstruction(Instruction *insn)
2475{
2476   const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2477
2478   if (insn->encSize != 8) {
2479      ERROR("skipping unencodable instruction: ");
2480      insn->print();
2481      return false;
2482   } else
2483   if (codeSize + size > codeSizeLimit) {
2484      ERROR("code emitter output buffer too small\n");
2485      return false;
2486   }
2487
2488   if (writeIssueDelays) {
2489      int id = (codeSize & 0x3f) / 8 - 1;
2490      if (id < 0) {
2491         id += 1;
2492         code[0] = 0x00000000; // cf issue delay "instruction"
2493         code[1] = 0x08000000;
2494         code += 2;
2495         codeSize += 8;
2496      }
2497      uint32_t *data = code - (id * 2 + 2);
2498
2499      switch (id) {
2500      case 0: data[0] |= insn->sched << 2; break;
2501      case 1: data[0] |= insn->sched << 10; break;
2502      case 2: data[0] |= insn->sched << 18; break;
2503      case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2504      case 4: data[1] |= insn->sched << 2; break;
2505      case 5: data[1] |= insn->sched << 10; break;
2506      case 6: data[1] |= insn->sched << 18; break;
2507      default:
2508         assert(0);
2509         break;
2510      }
2511   }
2512
2513   // assert that instructions with multiple defs don't corrupt registers
2514   for (int d = 0; insn->defExists(d); ++d)
2515      assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2516
2517   switch (insn->op) {
2518   case OP_MOV:
2519   case OP_RDSV:
2520      emitMOV(insn);
2521      break;
2522   case OP_NOP:
2523      break;
2524   case OP_LOAD:
2525      emitLOAD(insn);
2526      break;
2527   case OP_STORE:
2528      emitSTORE(insn);
2529      break;
2530   case OP_LINTERP:
2531   case OP_PINTERP:
2532      emitINTERP(insn);
2533      break;
2534   case OP_VFETCH:
2535      emitVFETCH(insn);
2536      break;
2537   case OP_EXPORT:
2538      emitEXPORT(insn);
2539      break;
2540   case OP_AFETCH:
2541      emitAFETCH(insn);
2542      break;
2543   case OP_PFETCH:
2544      emitPFETCH(insn);
2545      break;
2546   case OP_EMIT:
2547   case OP_RESTART:
2548      emitOUT(insn);
2549      break;
2550   case OP_ADD:
2551   case OP_SUB:
2552      if (insn->dType == TYPE_F64)
2553         emitDADD(insn);
2554      else if (isFloatType(insn->dType))
2555         emitFADD(insn);
2556      else
2557         emitUADD(insn);
2558      break;
2559   case OP_MUL:
2560      if (insn->dType == TYPE_F64)
2561         emitDMUL(insn);
2562      else if (isFloatType(insn->dType))
2563         emitFMUL(insn);
2564      else
2565         emitIMUL(insn);
2566      break;
2567   case OP_MAD:
2568   case OP_FMA:
2569      if (insn->dType == TYPE_F64)
2570         emitDMAD(insn);
2571      else if (isFloatType(insn->dType))
2572         emitFMAD(insn);
2573      else
2574         emitIMAD(insn);
2575      break;
2576   case OP_MADSP:
2577      emitMADSP(insn);
2578      break;
2579   case OP_SAD:
2580      emitISAD(insn);
2581      break;
2582   case OP_SHLADD:
2583      emitSHLADD(insn);
2584      break;
2585   case OP_NOT:
2586      emitNOT(insn);
2587      break;
2588   case OP_AND:
2589      emitLogicOp(insn, 0);
2590      break;
2591   case OP_OR:
2592      emitLogicOp(insn, 1);
2593      break;
2594   case OP_XOR:
2595      emitLogicOp(insn, 2);
2596      break;
2597   case OP_SHL:
2598   case OP_SHR:
2599      if (typeSizeof(insn->sType) == 8)
2600         emitShift64(insn);
2601      else
2602         emitShift(insn);
2603      break;
2604   case OP_SET:
2605   case OP_SET_AND:
2606   case OP_SET_OR:
2607   case OP_SET_XOR:
2608      emitSET(insn->asCmp());
2609      break;
2610   case OP_SELP:
2611      emitSELP(insn);
2612      break;
2613   case OP_SLCT:
2614      emitSLCT(insn->asCmp());
2615      break;
2616   case OP_MIN:
2617   case OP_MAX:
2618      emitMINMAX(insn);
2619      break;
2620   case OP_ABS:
2621   case OP_NEG:
2622   case OP_CEIL:
2623   case OP_FLOOR:
2624   case OP_TRUNC:
2625   case OP_SAT:
2626      emitCVT(insn);
2627      break;
2628   case OP_CVT:
2629      if (insn->def(0).getFile() == FILE_PREDICATE ||
2630          insn->src(0).getFile() == FILE_PREDICATE)
2631         emitMOV(insn);
2632      else
2633         emitCVT(insn);
2634      break;
2635   case OP_RSQ:
2636      emitSFnOp(insn, 5 + 2 * insn->subOp);
2637      break;
2638   case OP_RCP:
2639      emitSFnOp(insn, 4 + 2 * insn->subOp);
2640      break;
2641   case OP_LG2:
2642      emitSFnOp(insn, 3);
2643      break;
2644   case OP_EX2:
2645      emitSFnOp(insn, 2);
2646      break;
2647   case OP_SIN:
2648      emitSFnOp(insn, 1);
2649      break;
2650   case OP_COS:
2651      emitSFnOp(insn, 0);
2652      break;
2653   case OP_PRESIN:
2654   case OP_PREEX2:
2655      emitPreOp(insn);
2656      break;
2657   case OP_TEX:
2658   case OP_TXB:
2659   case OP_TXL:
2660   case OP_TXD:
2661   case OP_TXF:
2662   case OP_TXG:
2663   case OP_TXLQ:
2664      emitTEX(insn->asTex());
2665      break;
2666   case OP_TXQ:
2667      emitTXQ(insn->asTex());
2668      break;
2669   case OP_TEXBAR:
2670      emitTEXBAR(insn);
2671      break;
2672   case OP_PIXLD:
2673      emitPIXLD(insn);
2674      break;
2675   case OP_BRA:
2676   case OP_CALL:
2677   case OP_PRERET:
2678   case OP_RET:
2679   case OP_DISCARD:
2680   case OP_EXIT:
2681   case OP_PRECONT:
2682   case OP_CONT:
2683   case OP_PREBREAK:
2684   case OP_BREAK:
2685   case OP_JOINAT:
2686   case OP_BRKPT:
2687   case OP_QUADON:
2688   case OP_QUADPOP:
2689      emitFlow(insn);
2690      break;
2691   case OP_QUADOP:
2692      emitQUADOP(insn, insn->subOp, insn->lanes);
2693      break;
2694   case OP_DFDX:
2695      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2696      break;
2697   case OP_DFDY:
2698      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2699      break;
2700   case OP_POPCNT:
2701      emitPOPC(insn);
2702      break;
2703   case OP_INSBF:
2704      emitINSBF(insn);
2705      break;
2706   case OP_EXTBF:
2707      emitEXTBF(insn);
2708      break;
2709   case OP_BFIND:
2710      emitBFIND(insn);
2711      break;
2712   case OP_PERMT:
2713      emitPERMT(insn);
2714      break;
2715   case OP_JOIN:
2716      emitNOP(insn);
2717      insn->join = 1;
2718      break;
2719   case OP_BAR:
2720      emitBAR(insn);
2721      break;
2722   case OP_MEMBAR:
2723      emitMEMBAR(insn);
2724      break;
2725   case OP_ATOM:
2726      emitATOM(insn);
2727      break;
2728   case OP_CCTL:
2729      emitCCTL(insn);
2730      break;
2731   case OP_SHFL:
2732      emitSHFL(insn);
2733      break;
2734   case OP_VOTE:
2735      emitVOTE(insn);
2736      break;
2737   case OP_SULDB:
2738      emitSULDGB(insn->asTex());
2739      break;
2740   case OP_SUSTB:
2741   case OP_SUSTP:
2742      emitSUSTGx(insn->asTex());
2743      break;
2744   case OP_SUBFM:
2745   case OP_SUCLAMP:
2746   case OP_SUEAU:
2747      emitSUCalc(insn);
2748      break;
2749   case OP_VSHL:
2750      emitVSHL(insn);
2751      break;
2752   case OP_PHI:
2753   case OP_UNION:
2754   case OP_CONSTRAINT:
2755      ERROR("operation should have been eliminated");
2756      return false;
2757   case OP_EXP:
2758   case OP_LOG:
2759   case OP_SQRT:
2760   case OP_POW:
2761      ERROR("operation should have been lowered\n");
2762      return false;
2763   default:
2764      ERROR("unknown op: %u\n", insn->op);
2765      return false;
2766   }
2767
2768   if (insn->join)
2769      code[0] |= 1 << 22;
2770
2771   code += 2;
2772   codeSize += 8;
2773   return true;
2774}
2775
2776uint32_t
2777CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2778{
2779   // No more short instruction encodings.
2780   return 8;
2781}
2782
2783void
2784CodeEmitterGK110::prepareEmission(Function *func)
2785{
2786   const Target *targ = func->getProgram()->getTarget();
2787
2788   CodeEmitter::prepareEmission(func);
2789
2790   if (targ->hasSWSched)
2791      calculateSchedDataNVC0(targ, func);
2792}
2793
2794CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target, Program::Type type)
2795   : CodeEmitter(target),
2796     targNVC0(target),
2797     progType(type),
2798     writeIssueDelays(target->hasSWSched)
2799{
2800   code = NULL;
2801   codeSize = codeSizeLimit = 0;
2802   relocInfo = NULL;
2803}
2804
2805CodeEmitter *
2806TargetNVC0::createCodeEmitterGK110(Program::Type type)
2807{
2808   CodeEmitterGK110 *emit = new CodeEmitterGK110(this, type);
2809   return emit;
2810}
2811
2812} // namespace nv50_ir
2813