1/*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25#include "nv50_ir_target_gm107.h"
26#include "nv50_ir_sched_gm107.h"
27
28//#define GM107_DEBUG_SCHED_DATA
29
30namespace nv50_ir {
31
32class CodeEmitterGM107 : public CodeEmitter
33{
34public:
35   CodeEmitterGM107(const TargetGM107 *);
36
37   virtual bool emitInstruction(Instruction *);
38   virtual uint32_t getMinEncodingSize(const Instruction *) const;
39
40   virtual void prepareEmission(Program *);
41   virtual void prepareEmission(Function *);
42
43   inline void setProgramType(Program::Type pType) { progType = pType; }
44
45private:
46   const TargetGM107 *targGM107;
47
48   Program::Type progType;
49
50   const Instruction *insn;
51   const bool writeIssueDelays;
52   uint32_t *data;
53
54private:
55   inline void emitField(uint32_t *, int, int, uint32_t);
56   inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57
58   inline void emitInsn(uint32_t, bool);
59   inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60   inline void emitPred();
61   inline void emitGPR(int, const Value *);
62   inline void emitGPR(int pos) {
63      emitGPR(pos, (const Value *)NULL);
64   }
65   inline void emitGPR(int pos, const ValueRef &ref) {
66      emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67   }
68   inline void emitGPR(int pos, const ValueRef *ref) {
69      emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70   }
71   inline void emitGPR(int pos, const ValueDef &def) {
72      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73   }
74   inline void emitSYS(int, const Value *);
75   inline void emitSYS(int pos, const ValueRef &ref) {
76      emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77   }
78   inline void emitPRED(int, const Value *);
79   inline void emitPRED(int pos) {
80      emitPRED(pos, (const Value *)NULL);
81   }
82   inline void emitPRED(int pos, const ValueRef &ref) {
83      emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84   }
85   inline void emitPRED(int pos, const ValueDef &def) {
86      emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87   }
88   inline void emitADDR(int, int, int, int, const ValueRef &);
89   inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90   inline bool longIMMD(const ValueRef &);
91   inline void emitIMMD(int, int, const ValueRef &);
92
93   void emitCond3(int, CondCode);
94   void emitCond4(int, CondCode);
95   void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96   inline void emitO(int);
97   inline void emitP(int);
98   inline void emitSAT(int);
99   inline void emitCC(int);
100   inline void emitX(int);
101   inline void emitABS(int, const ValueRef &);
102   inline void emitNEG(int, const ValueRef &);
103   inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104   inline void emitFMZ(int, int);
105   inline void emitRND(int, RoundMode, int);
106   inline void emitRND(int pos) {
107      emitRND(pos, insn->rnd, -1);
108   }
109   inline void emitPDIV(int);
110   inline void emitINV(int, const ValueRef &);
111
112   void emitEXIT();
113   void emitBRA();
114   void emitCAL();
115   void emitPCNT();
116   void emitCONT();
117   void emitPBK();
118   void emitBRK();
119   void emitPRET();
120   void emitRET();
121   void emitSSY();
122   void emitSYNC();
123   void emitSAM();
124   void emitRAM();
125
126   void emitPSETP();
127
128   void emitMOV();
129   void emitS2R();
130   void emitCS2R();
131   void emitF2F();
132   void emitF2I();
133   void emitI2F();
134   void emitI2I();
135   void emitSEL();
136   void emitSHFL();
137
138   void emitDADD();
139   void emitDMUL();
140   void emitDFMA();
141   void emitDMNMX();
142   void emitDSET();
143   void emitDSETP();
144
145   void emitFADD();
146   void emitFMUL();
147   void emitFFMA();
148   void emitMUFU();
149   void emitFMNMX();
150   void emitRRO();
151   void emitFCMP();
152   void emitFSET();
153   void emitFSETP();
154   void emitFSWZADD();
155
156   void emitLOP();
157   void emitNOT();
158   void emitIADD();
159   void emitIMUL();
160   void emitIMAD();
161   void emitISCADD();
162   void emitXMAD();
163   void emitIMNMX();
164   void emitICMP();
165   void emitISET();
166   void emitISETP();
167   void emitSHL();
168   void emitSHR();
169   void emitSHF();
170   void emitPOPC();
171   void emitBFI();
172   void emitBFE();
173   void emitFLO();
174   void emitPRMT();
175
176   void emitLDSTs(int, DataType);
177   void emitLDSTc(int);
178   void emitLDC();
179   void emitLDL();
180   void emitLDS();
181   void emitLD();
182   void emitSTL();
183   void emitSTS();
184   void emitST();
185   void emitALD();
186   void emitAST();
187   void emitISBERD();
188   void emitAL2P();
189   void emitIPA();
190   void emitATOM();
191   void emitATOMS();
192   void emitRED();
193   void emitCCTL();
194
195   void emitPIXLD();
196
197   void emitTEXs(int);
198   void emitTEX();
199   void emitTEXS();
200   void emitTLD();
201   void emitTLD4();
202   void emitTXD();
203   void emitTXQ();
204   void emitTMML();
205   void emitDEPBAR();
206
207   void emitNOP();
208   void emitKIL();
209   void emitOUT();
210
211   void emitBAR();
212   void emitMEMBAR();
213
214   void emitVOTE();
215
216   void emitSUTarget();
217   void emitSUHandle(const int s);
218   void emitSUSTx();
219   void emitSULDx();
220   void emitSUREDx();
221};
222
223/*******************************************************************************
224 * general instruction layout/fields
225 ******************************************************************************/
226
227void
228CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229{
230   if (b >= 0) {
231      uint32_t m = ((1ULL << s) - 1);
232      uint64_t d = (uint64_t)(v & m) << b;
233      assert(!(v & ~m) || (v & ~m) == ~m);
234      data[1] |= d >> 32;
235      data[0] |= d;
236   }
237}
238
239void
240CodeEmitterGM107::emitPred()
241{
242   if (insn->predSrc >= 0) {
243      emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244      emitField(19, 1, insn->cc == CC_NOT_P);
245   } else {
246      emitField(16, 3, 7);
247   }
248}
249
250void
251CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252{
253   code[0] = 0x00000000;
254   code[1] = hi;
255   if (pred)
256      emitPred();
257}
258
259void
260CodeEmitterGM107::emitGPR(int pos, const Value *val)
261{
262   emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263             val->reg.data.id : 255);
264}
265
266void
267CodeEmitterGM107::emitSYS(int pos, const Value *val)
268{
269   int id = val ? val->reg.data.id : -1;
270
271   switch (id) {
272   case SV_LANEID         : id = 0x00; break;
273   case SV_VERTEX_COUNT   : id = 0x10; break;
274   case SV_INVOCATION_ID  : id = 0x11; break;
275   case SV_THREAD_KILL    : id = 0x13; break;
276   case SV_INVOCATION_INFO: id = 0x1d; break;
277   case SV_COMBINED_TID   : id = 0x20; break;
278   case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
279   case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
280   case SV_LANEMASK_EQ    : id = 0x38; break;
281   case SV_LANEMASK_LT    : id = 0x39; break;
282   case SV_LANEMASK_LE    : id = 0x3a; break;
283   case SV_LANEMASK_GT    : id = 0x3b; break;
284   case SV_LANEMASK_GE    : id = 0x3c; break;
285   case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
286   default:
287      assert(!"invalid system value");
288      id = 0;
289      break;
290   }
291
292   emitField(pos, 8, id);
293}
294
295void
296CodeEmitterGM107::emitPRED(int pos, const Value *val)
297{
298   emitField(pos, 3, val ? val->reg.data.id : 7);
299}
300
301void
302CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303                           const ValueRef &ref)
304{
305   const Value *v = ref.get();
306   assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307   if (gpr >= 0)
308      emitGPR(gpr, ref.getIndirect(0));
309   emitField(off, len, v->reg.data.offset >> shr);
310}
311
312void
313CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314                           const ValueRef &ref)
315{
316   const Value *v = ref.get();
317   const Symbol *s = v->asSym();
318
319   assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320
321   emitField(buf,  5, v->reg.fileIndex);
322   if (gpr >= 0)
323      emitGPR(gpr, ref.getIndirect(0));
324   emitField(off, 16, s->reg.data.offset >> shr);
325}
326
327bool
328CodeEmitterGM107::longIMMD(const ValueRef &ref)
329{
330   if (ref.getFile() == FILE_IMMEDIATE) {
331      const ImmediateValue *imm = ref.get()->asImm();
332      if (isFloatType(insn->sType))
333         return imm->reg.data.u32 & 0xfff;
334      else
335         return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336   }
337   return false;
338}
339
340void
341CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342{
343   const ImmediateValue *imm = ref.get()->asImm();
344   uint32_t val = imm->reg.data.u32;
345
346   if (len == 19) {
347      if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348         assert(!(val & 0x00000fff));
349         val >>= 12;
350      } else if (insn->sType == TYPE_F64) {
351         assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352         val = imm->reg.data.u64 >> 44;
353      } else {
354         assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355      }
356      emitField( 56,   1, (val & 0x80000) >> 19);
357      emitField(pos, len, (val & 0x7ffff));
358   } else {
359      emitField(pos, len, val);
360   }
361}
362
363/*******************************************************************************
364 * modifiers
365 ******************************************************************************/
366
367void
368CodeEmitterGM107::emitCond3(int pos, CondCode code)
369{
370   int data = 0;
371
372   switch (code) {
373   case CC_FL : data = 0x00; break;
374   case CC_LTU:
375   case CC_LT : data = 0x01; break;
376   case CC_EQU:
377   case CC_EQ : data = 0x02; break;
378   case CC_LEU:
379   case CC_LE : data = 0x03; break;
380   case CC_GTU:
381   case CC_GT : data = 0x04; break;
382   case CC_NEU:
383   case CC_NE : data = 0x05; break;
384   case CC_GEU:
385   case CC_GE : data = 0x06; break;
386   case CC_TR : data = 0x07; break;
387   default:
388      assert(!"invalid cond3");
389      break;
390   }
391
392   emitField(pos, 3, data);
393}
394
395void
396CodeEmitterGM107::emitCond4(int pos, CondCode code)
397{
398   int data = 0;
399
400   switch (code) {
401   case CC_FL: data = 0x00; break;
402   case CC_LT: data = 0x01; break;
403   case CC_EQ: data = 0x02; break;
404   case CC_LE: data = 0x03; break;
405   case CC_GT: data = 0x04; break;
406   case CC_NE: data = 0x05; break;
407   case CC_GE: data = 0x06; break;
408//   case CC_NUM: data = 0x07; break;
409//   case CC_NAN: data = 0x08; break;
410   case CC_LTU: data = 0x09; break;
411   case CC_EQU: data = 0x0a; break;
412   case CC_LEU: data = 0x0b; break;
413   case CC_GTU: data = 0x0c; break;
414   case CC_NEU: data = 0x0d; break;
415   case CC_GEU: data = 0x0e; break;
416   case CC_TR:  data = 0x0f; break;
417   default:
418      assert(!"invalid cond4");
419      break;
420   }
421
422   emitField(pos, 4, data);
423}
424
425void
426CodeEmitterGM107::emitO(int pos)
427{
428   emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429}
430
431void
432CodeEmitterGM107::emitP(int pos)
433{
434   emitField(pos, 1, insn->perPatch);
435}
436
437void
438CodeEmitterGM107::emitSAT(int pos)
439{
440   emitField(pos, 1, insn->saturate);
441}
442
443void
444CodeEmitterGM107::emitCC(int pos)
445{
446   emitField(pos, 1, insn->flagsDef >= 0);
447}
448
449void
450CodeEmitterGM107::emitX(int pos)
451{
452   emitField(pos, 1, insn->flagsSrc >= 0);
453}
454
455void
456CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457{
458   emitField(pos, 1, ref.mod.abs());
459}
460
461void
462CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463{
464   emitField(pos, 1, ref.mod.neg());
465}
466
467void
468CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469{
470   emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471}
472
473void
474CodeEmitterGM107::emitFMZ(int pos, int len)
475{
476   emitField(pos, len, insn->dnz << 1 | insn->ftz);
477}
478
479void
480CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481{
482   int rm = 0, ri = 0;
483   switch (rnd) {
484   case ROUND_NI: ri = 1;
485   case ROUND_N : rm = 0; break;
486   case ROUND_MI: ri = 1;
487   case ROUND_M : rm = 1; break;
488   case ROUND_PI: ri = 1;
489   case ROUND_P : rm = 2; break;
490   case ROUND_ZI: ri = 1;
491   case ROUND_Z : rm = 3; break;
492   default:
493      assert(!"invalid round mode");
494      break;
495   }
496   emitField(rip, 1, ri);
497   emitField(rmp, 2, rm);
498}
499
500void
501CodeEmitterGM107::emitPDIV(int pos)
502{
503   assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504   if (insn->postFactor > 0)
505      emitField(pos, 3, 7 - insn->postFactor);
506   else
507      emitField(pos, 3, 0 - insn->postFactor);
508}
509
510void
511CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512{
513   emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514}
515
516/*******************************************************************************
517 * control flow
518 ******************************************************************************/
519
520void
521CodeEmitterGM107::emitEXIT()
522{
523   emitInsn (0xe3000000);
524   emitCond5(0x00, CC_TR);
525}
526
527void
528CodeEmitterGM107::emitBRA()
529{
530   const FlowInstruction *insn = this->insn->asFlow();
531   int gpr = -1;
532
533   if (insn->indirect) {
534      if (insn->absolute)
535         emitInsn(0xe2000000); // JMX
536      else
537         emitInsn(0xe2500000); // BRX
538      gpr = 0x08;
539   } else {
540      if (insn->absolute)
541         emitInsn(0xe2100000); // JMP
542      else
543         emitInsn(0xe2400000); // BRA
544      emitField(0x07, 1, insn->allWarp);
545   }
546
547   emitField(0x06, 1, insn->limit);
548   emitCond5(0x00, CC_TR);
549
550   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551      int32_t pos = insn->target.bb->binPos;
552      if (writeIssueDelays && !(pos & 0x1f))
553         pos += 8;
554      if (!insn->absolute)
555         emitField(0x14, 24, pos - (codeSize + 8));
556      else
557         emitField(0x14, 32, pos);
558   } else {
559      emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560      emitField(0x05, 1, 1);
561   }
562}
563
564void
565CodeEmitterGM107::emitCAL()
566{
567   const FlowInstruction *insn = this->insn->asFlow();
568
569   if (insn->absolute) {
570      emitInsn(0xe2200000, false); // JCAL
571   } else {
572      emitInsn(0xe2600000, false); // CAL
573   }
574
575   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576      if (!insn->absolute)
577         emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578      else {
579         if (insn->builtin) {
580            int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581            addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
582            addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583         } else {
584            emitField(0x14, 32, insn->target.bb->binPos);
585         }
586      }
587   } else {
588      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589      emitField(0x05, 1, 1);
590   }
591}
592
593void
594CodeEmitterGM107::emitPCNT()
595{
596   const FlowInstruction *insn = this->insn->asFlow();
597
598   emitInsn(0xe2b00000, false);
599
600   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602   } else {
603      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604      emitField(0x05, 1, 1);
605   }
606}
607
608void
609CodeEmitterGM107::emitCONT()
610{
611   emitInsn (0xe3500000);
612   emitCond5(0x00, CC_TR);
613}
614
615void
616CodeEmitterGM107::emitPBK()
617{
618   const FlowInstruction *insn = this->insn->asFlow();
619
620   emitInsn(0xe2a00000, false);
621
622   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624   } else {
625      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626      emitField(0x05, 1, 1);
627   }
628}
629
630void
631CodeEmitterGM107::emitBRK()
632{
633   emitInsn (0xe3400000);
634   emitCond5(0x00, CC_TR);
635}
636
637void
638CodeEmitterGM107::emitPRET()
639{
640   const FlowInstruction *insn = this->insn->asFlow();
641
642   emitInsn(0xe2700000, false);
643
644   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646   } else {
647      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648      emitField(0x05, 1, 1);
649   }
650}
651
652void
653CodeEmitterGM107::emitRET()
654{
655   emitInsn (0xe3200000);
656   emitCond5(0x00, CC_TR);
657}
658
659void
660CodeEmitterGM107::emitSSY()
661{
662   const FlowInstruction *insn = this->insn->asFlow();
663
664   emitInsn(0xe2900000, false);
665
666   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668   } else {
669      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670      emitField(0x05, 1, 1);
671   }
672}
673
674void
675CodeEmitterGM107::emitSYNC()
676{
677   emitInsn (0xf0f80000);
678   emitCond5(0x00, CC_TR);
679}
680
681void
682CodeEmitterGM107::emitSAM()
683{
684   emitInsn(0xe3700000, false);
685}
686
687void
688CodeEmitterGM107::emitRAM()
689{
690   emitInsn(0xe3800000, false);
691}
692
693/*******************************************************************************
694 * predicate/cc
695 ******************************************************************************/
696
697void
698CodeEmitterGM107::emitPSETP()
699{
700
701   emitInsn(0x50900000);
702
703   switch (insn->op) {
704   case OP_AND: emitField(0x18, 3, 0); break;
705   case OP_OR:  emitField(0x18, 3, 1); break;
706   case OP_XOR: emitField(0x18, 3, 2); break;
707   default:
708      assert(!"unexpected operation");
709      break;
710   }
711
712   // emitINV (0x2a);
713   emitPRED(0x27); // TODO: support 3-arg
714   emitINV (0x20, insn->src(1));
715   emitPRED(0x1d, insn->src(1));
716   emitINV (0x0f, insn->src(0));
717   emitPRED(0x0c, insn->src(0));
718   emitPRED(0x03, insn->def(0));
719   emitPRED(0x00);
720}
721
722/*******************************************************************************
723 * movement / conversion
724 ******************************************************************************/
725
726void
727CodeEmitterGM107::emitMOV()
728{
729   if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730      switch (insn->src(0).getFile()) {
731      case FILE_GPR:
732         if (insn->def(0).getFile() == FILE_PREDICATE) {
733            emitInsn(0x5b6a0000);
734            emitGPR (0x08);
735         } else {
736            emitInsn(0x5c980000);
737         }
738         emitGPR (0x14, insn->src(0));
739         break;
740      case FILE_MEMORY_CONST:
741         emitInsn(0x4c980000);
742         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743         break;
744      case FILE_IMMEDIATE:
745         emitInsn(0x38980000);
746         emitIMMD(0x14, 19, insn->src(0));
747         break;
748      case FILE_PREDICATE:
749         emitInsn(0x50880000);
750         emitPRED(0x0c, insn->src(0));
751         emitPRED(0x1d);
752         emitPRED(0x27);
753         break;
754      default:
755         assert(!"bad src file");
756         break;
757      }
758      if (insn->def(0).getFile() != FILE_PREDICATE &&
759          insn->src(0).getFile() != FILE_PREDICATE)
760         emitField(0x27, 4, insn->lanes);
761   } else {
762      emitInsn (0x01000000);
763      emitIMMD (0x14, 32, insn->src(0));
764      emitField(0x0c, 4, insn->lanes);
765   }
766
767   if (insn->def(0).getFile() == FILE_PREDICATE) {
768      emitPRED(0x27);
769      emitPRED(0x03, insn->def(0));
770      emitPRED(0x00);
771   } else {
772      emitGPR(0x00, insn->def(0));
773   }
774}
775
776void
777CodeEmitterGM107::emitS2R()
778{
779   emitInsn(0xf0c80000);
780   emitSYS (0x14, insn->src(0));
781   emitGPR (0x00, insn->def(0));
782}
783
784void
785CodeEmitterGM107::emitCS2R()
786{
787   emitInsn(0x50c80000);
788   emitSYS (0x14, insn->src(0));
789   emitGPR (0x00, insn->def(0));
790}
791
792void
793CodeEmitterGM107::emitF2F()
794{
795   RoundMode rnd = insn->rnd;
796
797   switch (insn->op) {
798   case OP_FLOOR: rnd = ROUND_MI; break;
799   case OP_CEIL : rnd = ROUND_PI; break;
800   case OP_TRUNC: rnd = ROUND_ZI; break;
801   default:
802      break;
803   }
804
805   switch (insn->src(0).getFile()) {
806   case FILE_GPR:
807      emitInsn(0x5ca80000);
808      emitGPR (0x14, insn->src(0));
809      break;
810   case FILE_MEMORY_CONST:
811      emitInsn(0x4ca80000);
812      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813      break;
814   case FILE_IMMEDIATE:
815      emitInsn(0x38a80000);
816      emitIMMD(0x14, 19, insn->src(0));
817      break;
818   default:
819      assert(!"bad src0 file");
820      break;
821   }
822
823   emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825   emitCC   (0x2f);
826   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827   emitFMZ  (0x2c, 1);
828   emitField(0x29, 1, insn->subOp);
829   emitRND  (0x27, rnd, 0x2a);
830   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832   emitGPR  (0x00, insn->def(0));
833}
834
835void
836CodeEmitterGM107::emitF2I()
837{
838   RoundMode rnd = insn->rnd;
839
840   switch (insn->op) {
841   case OP_FLOOR: rnd = ROUND_M; break;
842   case OP_CEIL : rnd = ROUND_P; break;
843   case OP_TRUNC: rnd = ROUND_Z; break;
844   default:
845      break;
846   }
847
848   switch (insn->src(0).getFile()) {
849   case FILE_GPR:
850      emitInsn(0x5cb00000);
851      emitGPR (0x14, insn->src(0));
852      break;
853   case FILE_MEMORY_CONST:
854      emitInsn(0x4cb00000);
855      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856      break;
857   case FILE_IMMEDIATE:
858      emitInsn(0x38b00000);
859      emitIMMD(0x14, 19, insn->src(0));
860      break;
861   default:
862      assert(!"bad src0 file");
863      break;
864   }
865
866   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867   emitCC   (0x2f);
868   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869   emitFMZ  (0x2c, 1);
870   emitRND  (0x27, rnd, 0x2a);
871   emitField(0x0c, 1, isSignedType(insn->dType));
872   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874   emitGPR  (0x00, insn->def(0));
875}
876
877void
878CodeEmitterGM107::emitI2F()
879{
880   RoundMode rnd = insn->rnd;
881
882   switch (insn->op) {
883   case OP_FLOOR: rnd = ROUND_M; break;
884   case OP_CEIL : rnd = ROUND_P; break;
885   case OP_TRUNC: rnd = ROUND_Z; break;
886   default:
887      break;
888   }
889
890   switch (insn->src(0).getFile()) {
891   case FILE_GPR:
892      emitInsn(0x5cb80000);
893      emitGPR (0x14, insn->src(0));
894      break;
895   case FILE_MEMORY_CONST:
896      emitInsn(0x4cb80000);
897      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898      break;
899   case FILE_IMMEDIATE:
900      emitInsn(0x38b80000);
901      emitIMMD(0x14, 19, insn->src(0));
902      break;
903   default:
904      assert(!"bad src0 file");
905      break;
906   }
907
908   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909   emitCC   (0x2f);
910   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911   emitField(0x29, 2, insn->subOp);
912   emitRND  (0x27, rnd, -1);
913   emitField(0x0d, 1, isSignedType(insn->sType));
914   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916   emitGPR  (0x00, insn->def(0));
917}
918
919void
920CodeEmitterGM107::emitI2I()
921{
922   switch (insn->src(0).getFile()) {
923   case FILE_GPR:
924      emitInsn(0x5ce00000);
925      emitGPR (0x14, insn->src(0));
926      break;
927   case FILE_MEMORY_CONST:
928      emitInsn(0x4ce00000);
929      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930      break;
931   case FILE_IMMEDIATE:
932      emitInsn(0x38e00000);
933      emitIMMD(0x14, 19, insn->src(0));
934      break;
935   default:
936      assert(!"bad src0 file");
937      break;
938   }
939
940   emitSAT  (0x32);
941   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942   emitCC   (0x2f);
943   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944   emitField(0x29, 2, insn->subOp);
945   emitField(0x0d, 1, isSignedType(insn->sType));
946   emitField(0x0c, 1, isSignedType(insn->dType));
947   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949   emitGPR  (0x00, insn->def(0));
950}
951
952void
953gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954{
955   int loc = entry->loc;
956   bool val = false;
957   switch (entry->ipa) {
958   case 0:
959      val = data.force_persample_interp;
960      break;
961   case 1:
962      val = data.msaa;
963      break;
964   }
965   if (val)
966      code[loc + 1] |= 1 << 10;
967   else
968      code[loc + 1] &= ~(1 << 10);
969}
970
971void
972CodeEmitterGM107::emitSEL()
973{
974   switch (insn->src(1).getFile()) {
975   case FILE_GPR:
976      emitInsn(0x5ca00000);
977      emitGPR (0x14, insn->src(1));
978      break;
979   case FILE_MEMORY_CONST:
980      emitInsn(0x4ca00000);
981      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
982      break;
983   case FILE_IMMEDIATE:
984      emitInsn(0x38a00000);
985      emitIMMD(0x14, 19, insn->src(1));
986      break;
987   default:
988      assert(!"bad src1 file");
989      break;
990   }
991
992   emitINV (0x2a, insn->src(2));
993   emitPRED(0x27, insn->src(2));
994   emitGPR (0x08, insn->src(0));
995   emitGPR (0x00, insn->def(0));
996
997   if (insn->subOp >= 1) {
998      addInterp(insn->subOp - 1, 0, gm107_selpFlip);
999   }
1000}
1001
1002void
1003CodeEmitterGM107::emitSHFL()
1004{
1005   int type = 0;
1006
1007   emitInsn (0xef100000);
1008
1009   switch (insn->src(1).getFile()) {
1010   case FILE_GPR:
1011      emitGPR(0x14, insn->src(1));
1012      break;
1013   case FILE_IMMEDIATE:
1014      emitIMMD(0x14, 5, insn->src(1));
1015      type |= 1;
1016      break;
1017   default:
1018      assert(!"invalid src1 file");
1019      break;
1020   }
1021
1022   switch (insn->src(2).getFile()) {
1023   case FILE_GPR:
1024      emitGPR(0x27, insn->src(2));
1025      break;
1026   case FILE_IMMEDIATE:
1027      emitIMMD(0x22, 13, insn->src(2));
1028      type |= 2;
1029      break;
1030   default:
1031      assert(!"invalid src2 file");
1032      break;
1033   }
1034
1035   if (!insn->defExists(1))
1036      emitPRED(0x30);
1037   else {
1038      assert(insn->def(1).getFile() == FILE_PREDICATE);
1039      emitPRED(0x30, insn->def(1));
1040   }
1041
1042   emitField(0x1e, 2, insn->subOp);
1043   emitField(0x1c, 2, type);
1044   emitGPR  (0x08, insn->src(0));
1045   emitGPR  (0x00, insn->def(0));
1046}
1047
1048/*******************************************************************************
1049 * double
1050 ******************************************************************************/
1051
1052void
1053CodeEmitterGM107::emitDADD()
1054{
1055   switch (insn->src(1).getFile()) {
1056   case FILE_GPR:
1057      emitInsn(0x5c700000);
1058      emitGPR (0x14, insn->src(1));
1059      break;
1060   case FILE_MEMORY_CONST:
1061      emitInsn(0x4c700000);
1062      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1063      break;
1064   case FILE_IMMEDIATE:
1065      emitInsn(0x38700000);
1066      emitIMMD(0x14, 19, insn->src(1));
1067      break;
1068   default:
1069      assert(!"bad src1 file");
1070      break;
1071   }
1072   emitABS(0x31, insn->src(1));
1073   emitNEG(0x30, insn->src(0));
1074   emitCC (0x2f);
1075   emitABS(0x2e, insn->src(0));
1076   emitNEG(0x2d, insn->src(1));
1077
1078   if (insn->op == OP_SUB)
1079      code[1] ^= 0x00002000;
1080
1081   emitGPR(0x08, insn->src(0));
1082   emitGPR(0x00, insn->def(0));
1083}
1084
1085void
1086CodeEmitterGM107::emitDMUL()
1087{
1088   switch (insn->src(1).getFile()) {
1089   case FILE_GPR:
1090      emitInsn(0x5c800000);
1091      emitGPR (0x14, insn->src(1));
1092      break;
1093   case FILE_MEMORY_CONST:
1094      emitInsn(0x4c800000);
1095      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096      break;
1097   case FILE_IMMEDIATE:
1098      emitInsn(0x38800000);
1099      emitIMMD(0x14, 19, insn->src(1));
1100      break;
1101   default:
1102      assert(!"bad src1 file");
1103      break;
1104   }
1105
1106   emitNEG2(0x30, insn->src(0), insn->src(1));
1107   emitCC  (0x2f);
1108   emitRND (0x27);
1109   emitGPR (0x08, insn->src(0));
1110   emitGPR (0x00, insn->def(0));
1111}
1112
1113void
1114CodeEmitterGM107::emitDFMA()
1115{
1116   switch(insn->src(2).getFile()) {
1117   case FILE_GPR:
1118      switch (insn->src(1).getFile()) {
1119      case FILE_GPR:
1120         emitInsn(0x5b700000);
1121         emitGPR (0x14, insn->src(1));
1122         break;
1123      case FILE_MEMORY_CONST:
1124         emitInsn(0x4b700000);
1125         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1126         break;
1127      case FILE_IMMEDIATE:
1128         emitInsn(0x36700000);
1129         emitIMMD(0x14, 19, insn->src(1));
1130         break;
1131      default:
1132         assert(!"bad src1 file");
1133         break;
1134      }
1135      emitGPR (0x27, insn->src(2));
1136      break;
1137   case FILE_MEMORY_CONST:
1138      emitInsn(0x53700000);
1139      emitGPR (0x27, insn->src(1));
1140      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1141      break;
1142   default:
1143      assert(!"bad src2 file");
1144      break;
1145   }
1146
1147   emitRND (0x32);
1148   emitNEG (0x31, insn->src(2));
1149   emitNEG2(0x30, insn->src(0), insn->src(1));
1150   emitCC  (0x2f);
1151   emitGPR (0x08, insn->src(0));
1152   emitGPR (0x00, insn->def(0));
1153}
1154
1155void
1156CodeEmitterGM107::emitDMNMX()
1157{
1158   switch (insn->src(1).getFile()) {
1159   case FILE_GPR:
1160      emitInsn(0x5c500000);
1161      emitGPR (0x14, insn->src(1));
1162      break;
1163   case FILE_MEMORY_CONST:
1164      emitInsn(0x4c500000);
1165      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1166      break;
1167   case FILE_IMMEDIATE:
1168      emitInsn(0x38500000);
1169      emitIMMD(0x14, 19, insn->src(1));
1170      break;
1171   default:
1172      assert(!"bad src1 file");
1173      break;
1174   }
1175
1176   emitABS  (0x31, insn->src(1));
1177   emitNEG  (0x30, insn->src(0));
1178   emitCC   (0x2f);
1179   emitABS  (0x2e, insn->src(0));
1180   emitNEG  (0x2d, insn->src(1));
1181   emitField(0x2a, 1, insn->op == OP_MAX);
1182   emitPRED (0x27);
1183   emitGPR  (0x08, insn->src(0));
1184   emitGPR  (0x00, insn->def(0));
1185}
1186
1187void
1188CodeEmitterGM107::emitDSET()
1189{
1190   const CmpInstruction *insn = this->insn->asCmp();
1191
1192   switch (insn->src(1).getFile()) {
1193   case FILE_GPR:
1194      emitInsn(0x59000000);
1195      emitGPR (0x14, insn->src(1));
1196      break;
1197   case FILE_MEMORY_CONST:
1198      emitInsn(0x49000000);
1199      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1200      break;
1201   case FILE_IMMEDIATE:
1202      emitInsn(0x32000000);
1203      emitIMMD(0x14, 19, insn->src(1));
1204      break;
1205   default:
1206      assert(!"bad src1 file");
1207      break;
1208   }
1209
1210   if (insn->op != OP_SET) {
1211      switch (insn->op) {
1212      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1215      default:
1216         assert(!"invalid set op");
1217         break;
1218      }
1219      emitPRED(0x27, insn->src(2));
1220   } else {
1221      emitPRED(0x27);
1222   }
1223
1224   emitABS  (0x36, insn->src(0));
1225   emitNEG  (0x35, insn->src(1));
1226   emitField(0x34, 1, insn->dType == TYPE_F32);
1227   emitCond4(0x30, insn->setCond);
1228   emitCC   (0x2f);
1229   emitABS  (0x2c, insn->src(1));
1230   emitNEG  (0x2b, insn->src(0));
1231   emitGPR  (0x08, insn->src(0));
1232   emitGPR  (0x00, insn->def(0));
1233}
1234
1235void
1236CodeEmitterGM107::emitDSETP()
1237{
1238   const CmpInstruction *insn = this->insn->asCmp();
1239
1240   switch (insn->src(1).getFile()) {
1241   case FILE_GPR:
1242      emitInsn(0x5b800000);
1243      emitGPR (0x14, insn->src(1));
1244      break;
1245   case FILE_MEMORY_CONST:
1246      emitInsn(0x4b800000);
1247      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1248      break;
1249   case FILE_IMMEDIATE:
1250      emitInsn(0x36800000);
1251      emitIMMD(0x14, 19, insn->src(1));
1252      break;
1253   default:
1254      assert(!"bad src1 file");
1255      break;
1256   }
1257
1258   if (insn->op != OP_SET) {
1259      switch (insn->op) {
1260      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1263      default:
1264         assert(!"invalid set op");
1265         break;
1266      }
1267      emitPRED(0x27, insn->src(2));
1268   } else {
1269      emitPRED(0x27);
1270   }
1271
1272   emitCond4(0x30, insn->setCond);
1273   emitABS  (0x2c, insn->src(1));
1274   emitNEG  (0x2b, insn->src(0));
1275   emitGPR  (0x08, insn->src(0));
1276   emitABS  (0x07, insn->src(0));
1277   emitNEG  (0x06, insn->src(1));
1278   emitPRED (0x03, insn->def(0));
1279   if (insn->defExists(1))
1280      emitPRED(0x00, insn->def(1));
1281   else
1282      emitPRED(0x00);
1283}
1284
1285/*******************************************************************************
1286 * float
1287 ******************************************************************************/
1288
1289void
1290CodeEmitterGM107::emitFADD()
1291{
1292   if (!longIMMD(insn->src(1))) {
1293      switch (insn->src(1).getFile()) {
1294      case FILE_GPR:
1295         emitInsn(0x5c580000);
1296         emitGPR (0x14, insn->src(1));
1297         break;
1298      case FILE_MEMORY_CONST:
1299         emitInsn(0x4c580000);
1300         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1301         break;
1302      case FILE_IMMEDIATE:
1303         emitInsn(0x38580000);
1304         emitIMMD(0x14, 19, insn->src(1));
1305         break;
1306      default:
1307         assert(!"bad src1 file");
1308         break;
1309      }
1310      emitSAT(0x32);
1311      emitABS(0x31, insn->src(1));
1312      emitNEG(0x30, insn->src(0));
1313      emitCC (0x2f);
1314      emitABS(0x2e, insn->src(0));
1315      emitNEG(0x2d, insn->src(1));
1316      emitFMZ(0x2c, 1);
1317
1318      if (insn->op == OP_SUB)
1319         code[1] ^= 0x00002000;
1320   } else {
1321      emitInsn(0x08000000);
1322      emitABS(0x39, insn->src(1));
1323      emitNEG(0x38, insn->src(0));
1324      emitFMZ(0x37, 1);
1325      emitABS(0x36, insn->src(0));
1326      emitNEG(0x35, insn->src(1));
1327      emitCC  (0x34);
1328      emitIMMD(0x14, 32, insn->src(1));
1329
1330      if (insn->op == OP_SUB)
1331         code[1] ^= 0x00080000;
1332   }
1333
1334   emitGPR(0x08, insn->src(0));
1335   emitGPR(0x00, insn->def(0));
1336}
1337
1338void
1339CodeEmitterGM107::emitFMUL()
1340{
1341   if (!longIMMD(insn->src(1))) {
1342      switch (insn->src(1).getFile()) {
1343      case FILE_GPR:
1344         emitInsn(0x5c680000);
1345         emitGPR (0x14, insn->src(1));
1346         break;
1347      case FILE_MEMORY_CONST:
1348         emitInsn(0x4c680000);
1349         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1350         break;
1351      case FILE_IMMEDIATE:
1352         emitInsn(0x38680000);
1353         emitIMMD(0x14, 19, insn->src(1));
1354         break;
1355      default:
1356         assert(!"bad src1 file");
1357         break;
1358      }
1359      emitSAT (0x32);
1360      emitNEG2(0x30, insn->src(0), insn->src(1));
1361      emitCC  (0x2f);
1362      emitFMZ (0x2c, 2);
1363      emitPDIV(0x29);
1364      emitRND (0x27);
1365   } else {
1366      emitInsn(0x1e000000);
1367      emitSAT (0x37);
1368      emitFMZ (0x35, 2);
1369      emitCC  (0x34);
1370      emitIMMD(0x14, 32, insn->src(1));
1371      if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372         code[1] ^= 0x00080000; /* flip immd sign bit */
1373   }
1374
1375   emitGPR(0x08, insn->src(0));
1376   emitGPR(0x00, insn->def(0));
1377}
1378
1379void
1380CodeEmitterGM107::emitFFMA()
1381{
1382   bool isLongIMMD = false;
1383   switch(insn->src(2).getFile()) {
1384   case FILE_GPR:
1385      switch (insn->src(1).getFile()) {
1386      case FILE_GPR:
1387         emitInsn(0x59800000);
1388         emitGPR (0x14, insn->src(1));
1389         break;
1390      case FILE_MEMORY_CONST:
1391         emitInsn(0x49800000);
1392         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393         break;
1394      case FILE_IMMEDIATE:
1395         if (longIMMD(insn->getSrc(1))) {
1396            assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1397            isLongIMMD = true;
1398            emitInsn(0x0c000000);
1399            emitIMMD(0x14, 32, insn->src(1));
1400         } else {
1401            emitInsn(0x32800000);
1402            emitIMMD(0x14, 19, insn->src(1));
1403         }
1404         break;
1405      default:
1406         assert(!"bad src1 file");
1407         break;
1408      }
1409      if (!isLongIMMD)
1410         emitGPR (0x27, insn->src(2));
1411      break;
1412   case FILE_MEMORY_CONST:
1413      emitInsn(0x51800000);
1414      emitGPR (0x27, insn->src(1));
1415      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1416      break;
1417   default:
1418      assert(!"bad src2 file");
1419      break;
1420   }
1421
1422   if (isLongIMMD) {
1423      emitNEG (0x39, insn->src(2));
1424      emitNEG2(0x38, insn->src(0), insn->src(1));
1425      emitSAT (0x37);
1426      emitCC  (0x34);
1427   } else {
1428      emitRND (0x33);
1429      emitSAT (0x32);
1430      emitNEG (0x31, insn->src(2));
1431      emitNEG2(0x30, insn->src(0), insn->src(1));
1432      emitCC  (0x2f);
1433   }
1434
1435   emitFMZ(0x35, 2);
1436   emitGPR(0x08, insn->src(0));
1437   emitGPR(0x00, insn->def(0));
1438}
1439
1440void
1441CodeEmitterGM107::emitMUFU()
1442{
1443   int mufu = 0;
1444
1445   switch (insn->op) {
1446   case OP_COS: mufu = 0; break;
1447   case OP_SIN: mufu = 1; break;
1448   case OP_EX2: mufu = 2; break;
1449   case OP_LG2: mufu = 3; break;
1450   case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451   case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452   case OP_SQRT: mufu = 8; break;
1453   default:
1454      assert(!"invalid mufu");
1455      break;
1456   }
1457
1458   emitInsn (0x50800000);
1459   emitSAT  (0x32);
1460   emitNEG  (0x30, insn->src(0));
1461   emitABS  (0x2e, insn->src(0));
1462   emitField(0x14, 4, mufu);
1463   emitGPR  (0x08, insn->src(0));
1464   emitGPR  (0x00, insn->def(0));
1465}
1466
1467void
1468CodeEmitterGM107::emitFMNMX()
1469{
1470   switch (insn->src(1).getFile()) {
1471   case FILE_GPR:
1472      emitInsn(0x5c600000);
1473      emitGPR (0x14, insn->src(1));
1474      break;
1475   case FILE_MEMORY_CONST:
1476      emitInsn(0x4c600000);
1477      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1478      break;
1479   case FILE_IMMEDIATE:
1480      emitInsn(0x38600000);
1481      emitIMMD(0x14, 19, insn->src(1));
1482      break;
1483   default:
1484      assert(!"bad src1 file");
1485      break;
1486   }
1487
1488   emitField(0x2a, 1, insn->op == OP_MAX);
1489   emitPRED (0x27);
1490
1491   emitABS(0x31, insn->src(1));
1492   emitNEG(0x30, insn->src(0));
1493   emitCC (0x2f);
1494   emitABS(0x2e, insn->src(0));
1495   emitNEG(0x2d, insn->src(1));
1496   emitFMZ(0x2c, 1);
1497   emitGPR(0x08, insn->src(0));
1498   emitGPR(0x00, insn->def(0));
1499}
1500
1501void
1502CodeEmitterGM107::emitRRO()
1503{
1504   switch (insn->src(0).getFile()) {
1505   case FILE_GPR:
1506      emitInsn(0x5c900000);
1507      emitGPR (0x14, insn->src(0));
1508      break;
1509   case FILE_MEMORY_CONST:
1510      emitInsn(0x4c900000);
1511      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1512      break;
1513   case FILE_IMMEDIATE:
1514      emitInsn(0x38900000);
1515      emitIMMD(0x14, 19, insn->src(0));
1516      break;
1517   default:
1518      assert(!"bad src file");
1519      break;
1520   }
1521
1522   emitABS  (0x31, insn->src(0));
1523   emitNEG  (0x2d, insn->src(0));
1524   emitField(0x27, 1, insn->op == OP_PREEX2);
1525   emitGPR  (0x00, insn->def(0));
1526}
1527
1528void
1529CodeEmitterGM107::emitFCMP()
1530{
1531   const CmpInstruction *insn = this->insn->asCmp();
1532   CondCode cc = insn->setCond;
1533
1534   if (insn->src(2).mod.neg())
1535      cc = reverseCondCode(cc);
1536
1537   switch(insn->src(2).getFile()) {
1538   case FILE_GPR:
1539      switch (insn->src(1).getFile()) {
1540      case FILE_GPR:
1541         emitInsn(0x5ba00000);
1542         emitGPR (0x14, insn->src(1));
1543         break;
1544      case FILE_MEMORY_CONST:
1545         emitInsn(0x4ba00000);
1546         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1547         break;
1548      case FILE_IMMEDIATE:
1549         emitInsn(0x36a00000);
1550         emitIMMD(0x14, 19, insn->src(1));
1551         break;
1552      default:
1553         assert(!"bad src1 file");
1554         break;
1555      }
1556      emitGPR (0x27, insn->src(2));
1557      break;
1558   case FILE_MEMORY_CONST:
1559      emitInsn(0x53a00000);
1560      emitGPR (0x27, insn->src(1));
1561      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1562      break;
1563   default:
1564      assert(!"bad src2 file");
1565      break;
1566   }
1567
1568   emitCond4(0x30, cc);
1569   emitFMZ  (0x2f, 1);
1570   emitGPR  (0x08, insn->src(0));
1571   emitGPR  (0x00, insn->def(0));
1572}
1573
1574void
1575CodeEmitterGM107::emitFSET()
1576{
1577   const CmpInstruction *insn = this->insn->asCmp();
1578
1579   switch (insn->src(1).getFile()) {
1580   case FILE_GPR:
1581      emitInsn(0x58000000);
1582      emitGPR (0x14, insn->src(1));
1583      break;
1584   case FILE_MEMORY_CONST:
1585      emitInsn(0x48000000);
1586      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1587      break;
1588   case FILE_IMMEDIATE:
1589      emitInsn(0x30000000);
1590      emitIMMD(0x14, 19, insn->src(1));
1591      break;
1592   default:
1593      assert(!"bad src1 file");
1594      break;
1595   }
1596
1597   if (insn->op != OP_SET) {
1598      switch (insn->op) {
1599      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1602      default:
1603         assert(!"invalid set op");
1604         break;
1605      }
1606      emitPRED(0x27, insn->src(2));
1607   } else {
1608      emitPRED(0x27);
1609   }
1610
1611   emitFMZ  (0x37, 1);
1612   emitABS  (0x36, insn->src(0));
1613   emitNEG  (0x35, insn->src(1));
1614   emitField(0x34, 1, insn->dType == TYPE_F32);
1615   emitCond4(0x30, insn->setCond);
1616   emitCC   (0x2f);
1617   emitABS  (0x2c, insn->src(1));
1618   emitNEG  (0x2b, insn->src(0));
1619   emitGPR  (0x08, insn->src(0));
1620   emitGPR  (0x00, insn->def(0));
1621}
1622
1623void
1624CodeEmitterGM107::emitFSETP()
1625{
1626   const CmpInstruction *insn = this->insn->asCmp();
1627
1628   switch (insn->src(1).getFile()) {
1629   case FILE_GPR:
1630      emitInsn(0x5bb00000);
1631      emitGPR (0x14, insn->src(1));
1632      break;
1633   case FILE_MEMORY_CONST:
1634      emitInsn(0x4bb00000);
1635      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1636      break;
1637   case FILE_IMMEDIATE:
1638      emitInsn(0x36b00000);
1639      emitIMMD(0x14, 19, insn->src(1));
1640      break;
1641   default:
1642      assert(!"bad src1 file");
1643      break;
1644   }
1645
1646   if (insn->op != OP_SET) {
1647      switch (insn->op) {
1648      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1651      default:
1652         assert(!"invalid set op");
1653         break;
1654      }
1655      emitPRED(0x27, insn->src(2));
1656   } else {
1657      emitPRED(0x27);
1658   }
1659
1660   emitCond4(0x30, insn->setCond);
1661   emitFMZ  (0x2f, 1);
1662   emitABS  (0x2c, insn->src(1));
1663   emitNEG  (0x2b, insn->src(0));
1664   emitGPR  (0x08, insn->src(0));
1665   emitABS  (0x07, insn->src(0));
1666   emitNEG  (0x06, insn->src(1));
1667   emitPRED (0x03, insn->def(0));
1668   if (insn->defExists(1))
1669      emitPRED(0x00, insn->def(1));
1670   else
1671      emitPRED(0x00);
1672}
1673
1674void
1675CodeEmitterGM107::emitFSWZADD()
1676{
1677   emitInsn (0x50f80000);
1678   emitCC   (0x2f);
1679   emitFMZ  (0x2c, 1);
1680   emitRND  (0x27);
1681   emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682   emitField(0x1c, 8, insn->subOp);
1683   if (insn->predSrc != 1)
1684      emitGPR  (0x14, insn->src(1));
1685   else
1686      emitGPR  (0x14);
1687   emitGPR  (0x08, insn->src(0));
1688   emitGPR  (0x00, insn->def(0));
1689}
1690
1691/*******************************************************************************
1692 * integer
1693 ******************************************************************************/
1694
1695void
1696CodeEmitterGM107::emitLOP()
1697{
1698   int lop = 0;
1699
1700   switch (insn->op) {
1701   case OP_AND: lop = 0; break;
1702   case OP_OR : lop = 1; break;
1703   case OP_XOR: lop = 2; break;
1704   default:
1705      assert(!"invalid lop");
1706      break;
1707   }
1708
1709   if (!longIMMD(insn->src(1))) {
1710      switch (insn->src(1).getFile()) {
1711      case FILE_GPR:
1712         emitInsn(0x5c400000);
1713         emitGPR (0x14, insn->src(1));
1714         break;
1715      case FILE_MEMORY_CONST:
1716         emitInsn(0x4c400000);
1717         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1718         break;
1719      case FILE_IMMEDIATE:
1720         emitInsn(0x38400000);
1721         emitIMMD(0x14, 19, insn->src(1));
1722         break;
1723      default:
1724         assert(!"bad src1 file");
1725         break;
1726      }
1727      emitPRED (0x30);
1728      emitCC   (0x2f);
1729      emitX    (0x2b);
1730      emitField(0x29, 2, lop);
1731      emitINV  (0x28, insn->src(1));
1732      emitINV  (0x27, insn->src(0));
1733   } else {
1734      emitInsn (0x04000000);
1735      emitX    (0x39);
1736      emitINV  (0x38, insn->src(1));
1737      emitINV  (0x37, insn->src(0));
1738      emitField(0x35, 2, lop);
1739      emitCC   (0x34);
1740      emitIMMD (0x14, 32, insn->src(1));
1741   }
1742
1743   emitGPR  (0x08, insn->src(0));
1744   emitGPR  (0x00, insn->def(0));
1745}
1746
1747/* special-case of emitLOP(): lop pass_b dst 0 ~src */
1748void
1749CodeEmitterGM107::emitNOT()
1750{
1751   if (!longIMMD(insn->src(0))) {
1752      switch (insn->src(0).getFile()) {
1753      case FILE_GPR:
1754         emitInsn(0x5c400700);
1755         emitGPR (0x14, insn->src(0));
1756         break;
1757      case FILE_MEMORY_CONST:
1758         emitInsn(0x4c400700);
1759         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1760         break;
1761      case FILE_IMMEDIATE:
1762         emitInsn(0x38400700);
1763         emitIMMD(0x14, 19, insn->src(0));
1764         break;
1765      default:
1766         assert(!"bad src1 file");
1767         break;
1768      }
1769      emitPRED (0x30);
1770   } else {
1771      emitInsn (0x05600000);
1772      emitIMMD (0x14, 32, insn->src(1));
1773   }
1774
1775   emitGPR(0x08);
1776   emitGPR(0x00, insn->def(0));
1777}
1778
1779void
1780CodeEmitterGM107::emitIADD()
1781{
1782   if (!longIMMD(insn->src(1))) {
1783      switch (insn->src(1).getFile()) {
1784      case FILE_GPR:
1785         emitInsn(0x5c100000);
1786         emitGPR (0x14, insn->src(1));
1787         break;
1788      case FILE_MEMORY_CONST:
1789         emitInsn(0x4c100000);
1790         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1791         break;
1792      case FILE_IMMEDIATE:
1793         emitInsn(0x38100000);
1794         emitIMMD(0x14, 19, insn->src(1));
1795         break;
1796      default:
1797         assert(!"bad src1 file");
1798         break;
1799      }
1800      emitSAT(0x32);
1801      emitNEG(0x31, insn->src(0));
1802      emitNEG(0x30, insn->src(1));
1803      emitCC (0x2f);
1804      emitX  (0x2b);
1805   } else {
1806      emitInsn(0x1c000000);
1807      emitNEG (0x38, insn->src(0));
1808      emitSAT (0x36);
1809      emitX   (0x35);
1810      emitCC  (0x34);
1811      emitIMMD(0x14, 32, insn->src(1));
1812   }
1813
1814   if (insn->op == OP_SUB)
1815      code[1] ^= 0x00010000;
1816
1817   emitGPR(0x08, insn->src(0));
1818   emitGPR(0x00, insn->def(0));
1819}
1820
1821void
1822CodeEmitterGM107::emitIMUL()
1823{
1824   if (!longIMMD(insn->src(1))) {
1825      switch (insn->src(1).getFile()) {
1826      case FILE_GPR:
1827         emitInsn(0x5c380000);
1828         emitGPR (0x14, insn->src(1));
1829         break;
1830      case FILE_MEMORY_CONST:
1831         emitInsn(0x4c380000);
1832         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1833         break;
1834      case FILE_IMMEDIATE:
1835         emitInsn(0x38380000);
1836         emitIMMD(0x14, 19, insn->src(1));
1837         break;
1838      default:
1839         assert(!"bad src1 file");
1840         break;
1841      }
1842      emitCC   (0x2f);
1843      emitField(0x29, 1, isSignedType(insn->sType));
1844      emitField(0x28, 1, isSignedType(insn->dType));
1845      emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1846   } else {
1847      emitInsn (0x1f000000);
1848      emitField(0x37, 1, isSignedType(insn->sType));
1849      emitField(0x36, 1, isSignedType(insn->dType));
1850      emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1851      emitCC   (0x34);
1852      emitIMMD (0x14, 32, insn->src(1));
1853   }
1854
1855   emitGPR(0x08, insn->src(0));
1856   emitGPR(0x00, insn->def(0));
1857}
1858
1859void
1860CodeEmitterGM107::emitIMAD()
1861{
1862   /*XXX: imad32i exists, but not using it as third src overlaps dst */
1863   switch(insn->src(2).getFile()) {
1864   case FILE_GPR:
1865      switch (insn->src(1).getFile()) {
1866      case FILE_GPR:
1867         emitInsn(0x5a000000);
1868         emitGPR (0x14, insn->src(1));
1869         break;
1870      case FILE_MEMORY_CONST:
1871         emitInsn(0x4a000000);
1872         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1873         break;
1874      case FILE_IMMEDIATE:
1875         emitInsn(0x34000000);
1876         emitIMMD(0x14, 19, insn->src(1));
1877         break;
1878      default:
1879         assert(!"bad src1 file");
1880         break;
1881      }
1882      emitGPR (0x27, insn->src(2));
1883      break;
1884   case FILE_MEMORY_CONST:
1885      emitInsn(0x52000000);
1886      emitGPR (0x27, insn->src(1));
1887      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1888      break;
1889   default:
1890      assert(!"bad src2 file");
1891      break;
1892   }
1893
1894   emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895   emitField(0x35, 1, isSignedType(insn->sType));
1896   emitNEG  (0x34, insn->src(2));
1897   emitNEG2 (0x33, insn->src(0), insn->src(1));
1898   emitSAT  (0x32);
1899   emitX    (0x31);
1900   emitField(0x30, 1, isSignedType(insn->dType));
1901   emitCC   (0x2f);
1902   emitGPR  (0x08, insn->src(0));
1903   emitGPR  (0x00, insn->def(0));
1904}
1905
1906void
1907CodeEmitterGM107::emitISCADD()
1908{
1909   assert(insn->src(1).get()->asImm());
1910
1911   switch (insn->src(2).getFile()) {
1912   case FILE_GPR:
1913      emitInsn(0x5c180000);
1914      emitGPR (0x14, insn->src(2));
1915      break;
1916   case FILE_MEMORY_CONST:
1917      emitInsn(0x4c180000);
1918      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1919      break;
1920   case FILE_IMMEDIATE:
1921      emitInsn(0x38180000);
1922      emitIMMD(0x14, 19, insn->src(2));
1923      break;
1924   default:
1925      assert(!"bad src1 file");
1926      break;
1927   }
1928   emitNEG (0x31, insn->src(0));
1929   emitNEG (0x30, insn->src(2));
1930   emitCC  (0x2f);
1931   emitIMMD(0x27, 5, insn->src(1));
1932   emitGPR (0x08, insn->src(0));
1933   emitGPR (0x00, insn->def(0));
1934}
1935
1936void
1937CodeEmitterGM107::emitXMAD()
1938{
1939   assert(insn->src(0).getFile() == FILE_GPR);
1940
1941   bool constbuf = false;
1942   bool psl_mrg = true;
1943   bool immediate = false;
1944   if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945      assert(insn->src(1).getFile() == FILE_GPR);
1946      constbuf = true;
1947      psl_mrg = false;
1948      emitInsn(0x51000000);
1949      emitGPR(0x27, insn->src(1));
1950      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951   } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952      assert(insn->src(2).getFile() == FILE_GPR);
1953      constbuf = true;
1954      emitInsn(0x4e000000);
1955      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956      emitGPR(0x27, insn->src(2));
1957   } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958      assert(insn->src(2).getFile() == FILE_GPR);
1959      assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1960      immediate = true;
1961      emitInsn(0x36000000);
1962      emitIMMD(0x14, 16, insn->src(1));
1963      emitGPR(0x27, insn->src(2));
1964   } else {
1965      assert(insn->src(1).getFile() == FILE_GPR);
1966      assert(insn->src(2).getFile() == FILE_GPR);
1967      emitInsn(0x5b000000);
1968      emitGPR(0x14, insn->src(1));
1969      emitGPR(0x27, insn->src(2));
1970   }
1971
1972   if (psl_mrg)
1973      emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1974
1975   unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976   cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977   emitField(0x32, constbuf ? 2 : 3, cmode);
1978
1979   emitX(constbuf ? 0x36 : 0x26);
1980   emitCC(0x2f);
1981
1982   emitGPR(0x0, insn->def(0));
1983   emitGPR(0x8, insn->src(0));
1984
1985   // source flags
1986   if (isSignedType(insn->sType)) {
1987      uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988      emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1989   }
1990   emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1991   if (!immediate) {
1992      bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993      emitField(constbuf ? 0x34 : 0x23, 1, h1);
1994   }
1995}
1996
1997void
1998CodeEmitterGM107::emitIMNMX()
1999{
2000   switch (insn->src(1).getFile()) {
2001   case FILE_GPR:
2002      emitInsn(0x5c200000);
2003      emitGPR (0x14, insn->src(1));
2004      break;
2005   case FILE_MEMORY_CONST:
2006      emitInsn(0x4c200000);
2007      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008      break;
2009   case FILE_IMMEDIATE:
2010      emitInsn(0x38200000);
2011      emitIMMD(0x14, 19, insn->src(1));
2012      break;
2013   default:
2014      assert(!"bad src1 file");
2015      break;
2016   }
2017
2018   emitField(0x30, 1, isSignedType(insn->dType));
2019   emitCC   (0x2f);
2020   emitField(0x2b, 2, insn->subOp);
2021   emitField(0x2a, 1, insn->op == OP_MAX);
2022   emitPRED (0x27);
2023   emitGPR  (0x08, insn->src(0));
2024   emitGPR  (0x00, insn->def(0));
2025}
2026
2027void
2028CodeEmitterGM107::emitICMP()
2029{
2030   const CmpInstruction *insn = this->insn->asCmp();
2031   CondCode cc = insn->setCond;
2032
2033   if (insn->src(2).mod.neg())
2034      cc = reverseCondCode(cc);
2035
2036   switch(insn->src(2).getFile()) {
2037   case FILE_GPR:
2038      switch (insn->src(1).getFile()) {
2039      case FILE_GPR:
2040         emitInsn(0x5b400000);
2041         emitGPR (0x14, insn->src(1));
2042         break;
2043      case FILE_MEMORY_CONST:
2044         emitInsn(0x4b400000);
2045         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2046         break;
2047      case FILE_IMMEDIATE:
2048         emitInsn(0x36400000);
2049         emitIMMD(0x14, 19, insn->src(1));
2050         break;
2051      default:
2052         assert(!"bad src1 file");
2053         break;
2054      }
2055      emitGPR (0x27, insn->src(2));
2056      break;
2057   case FILE_MEMORY_CONST:
2058      emitInsn(0x53400000);
2059      emitGPR (0x27, insn->src(1));
2060      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2061      break;
2062   default:
2063      assert(!"bad src2 file");
2064      break;
2065   }
2066
2067   emitCond3(0x31, cc);
2068   emitField(0x30, 1, isSignedType(insn->sType));
2069   emitGPR  (0x08, insn->src(0));
2070   emitGPR  (0x00, insn->def(0));
2071}
2072
2073void
2074CodeEmitterGM107::emitISET()
2075{
2076   const CmpInstruction *insn = this->insn->asCmp();
2077
2078   switch (insn->src(1).getFile()) {
2079   case FILE_GPR:
2080      emitInsn(0x5b500000);
2081      emitGPR (0x14, insn->src(1));
2082      break;
2083   case FILE_MEMORY_CONST:
2084      emitInsn(0x4b500000);
2085      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2086      break;
2087   case FILE_IMMEDIATE:
2088      emitInsn(0x36500000);
2089      emitIMMD(0x14, 19, insn->src(1));
2090      break;
2091   default:
2092      assert(!"bad src1 file");
2093      break;
2094   }
2095
2096   if (insn->op != OP_SET) {
2097      switch (insn->op) {
2098      case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099      case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2101      default:
2102         assert(!"invalid set op");
2103         break;
2104      }
2105      emitPRED(0x27, insn->src(2));
2106   } else {
2107      emitPRED(0x27);
2108   }
2109
2110   emitCond3(0x31, insn->setCond);
2111   emitField(0x30, 1, isSignedType(insn->sType));
2112   emitCC   (0x2f);
2113   emitField(0x2c, 1, insn->dType == TYPE_F32);
2114   emitX    (0x2b);
2115   emitGPR  (0x08, insn->src(0));
2116   emitGPR  (0x00, insn->def(0));
2117}
2118
2119void
2120CodeEmitterGM107::emitISETP()
2121{
2122   const CmpInstruction *insn = this->insn->asCmp();
2123
2124   switch (insn->src(1).getFile()) {
2125   case FILE_GPR:
2126      emitInsn(0x5b600000);
2127      emitGPR (0x14, insn->src(1));
2128      break;
2129   case FILE_MEMORY_CONST:
2130      emitInsn(0x4b600000);
2131      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2132      break;
2133   case FILE_IMMEDIATE:
2134      emitInsn(0x36600000);
2135      emitIMMD(0x14, 19, insn->src(1));
2136      break;
2137   default:
2138      assert(!"bad src1 file");
2139      break;
2140   }
2141
2142   if (insn->op != OP_SET) {
2143      switch (insn->op) {
2144      case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145      case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2147      default:
2148         assert(!"invalid set op");
2149         break;
2150      }
2151      emitPRED(0x27, insn->src(2));
2152   } else {
2153      emitPRED(0x27);
2154   }
2155
2156   emitCond3(0x31, insn->setCond);
2157   emitField(0x30, 1, isSignedType(insn->sType));
2158   emitX    (0x2b);
2159   emitGPR  (0x08, insn->src(0));
2160   emitPRED (0x03, insn->def(0));
2161   if (insn->defExists(1))
2162      emitPRED(0x00, insn->def(1));
2163   else
2164      emitPRED(0x00);
2165}
2166
2167void
2168CodeEmitterGM107::emitSHL()
2169{
2170   switch (insn->src(1).getFile()) {
2171   case FILE_GPR:
2172      emitInsn(0x5c480000);
2173      emitGPR (0x14, insn->src(1));
2174      break;
2175   case FILE_MEMORY_CONST:
2176      emitInsn(0x4c480000);
2177      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2178      break;
2179   case FILE_IMMEDIATE:
2180      emitInsn(0x38480000);
2181      emitIMMD(0x14, 19, insn->src(1));
2182      break;
2183   default:
2184      assert(!"bad src1 file");
2185      break;
2186   }
2187
2188   emitCC   (0x2f);
2189   emitX    (0x2b);
2190   emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191   emitGPR  (0x08, insn->src(0));
2192   emitGPR  (0x00, insn->def(0));
2193}
2194
2195void
2196CodeEmitterGM107::emitSHR()
2197{
2198   switch (insn->src(1).getFile()) {
2199   case FILE_GPR:
2200      emitInsn(0x5c280000);
2201      emitGPR (0x14, insn->src(1));
2202      break;
2203   case FILE_MEMORY_CONST:
2204      emitInsn(0x4c280000);
2205      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2206      break;
2207   case FILE_IMMEDIATE:
2208      emitInsn(0x38280000);
2209      emitIMMD(0x14, 19, insn->src(1));
2210      break;
2211   default:
2212      assert(!"bad src1 file");
2213      break;
2214   }
2215
2216   emitField(0x30, 1, isSignedType(insn->dType));
2217   emitCC   (0x2f);
2218   emitX    (0x2c);
2219   emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220   emitGPR  (0x08, insn->src(0));
2221   emitGPR  (0x00, insn->def(0));
2222}
2223
2224void
2225CodeEmitterGM107::emitSHF()
2226{
2227   unsigned type;
2228
2229   switch (insn->src(1).getFile()) {
2230   case FILE_GPR:
2231      emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232      emitGPR(0x14, insn->src(1));
2233      break;
2234   case FILE_IMMEDIATE:
2235      emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236      emitIMMD(0x14, 19, insn->src(1));
2237      break;
2238   default:
2239      assert(!"bad src1 file");
2240      break;
2241   }
2242
2243   switch (insn->sType) {
2244   case TYPE_U64:
2245      type = 2;
2246      break;
2247   case TYPE_S64:
2248      type = 3;
2249      break;
2250   default:
2251      type = 0;
2252      break;
2253   }
2254
2255   emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2256   emitX    (0x31);
2257   emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2258   emitCC   (0x2f);
2259   emitGPR  (0x27, insn->src(2));
2260   emitField(0x25, 2, type);
2261   emitGPR  (0x08, insn->src(0));
2262   emitGPR  (0x00, insn->def(0));
2263}
2264
2265void
2266CodeEmitterGM107::emitPOPC()
2267{
2268   switch (insn->src(0).getFile()) {
2269   case FILE_GPR:
2270      emitInsn(0x5c080000);
2271      emitGPR (0x14, insn->src(0));
2272      break;
2273   case FILE_MEMORY_CONST:
2274      emitInsn(0x4c080000);
2275      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2276      break;
2277   case FILE_IMMEDIATE:
2278      emitInsn(0x38080000);
2279      emitIMMD(0x14, 19, insn->src(0));
2280      break;
2281   default:
2282      assert(!"bad src1 file");
2283      break;
2284   }
2285
2286   emitINV(0x28, insn->src(0));
2287   emitGPR(0x00, insn->def(0));
2288}
2289
2290void
2291CodeEmitterGM107::emitBFI()
2292{
2293   switch(insn->src(2).getFile()) {
2294   case FILE_GPR:
2295      switch (insn->src(1).getFile()) {
2296      case FILE_GPR:
2297         emitInsn(0x5bf00000);
2298         emitGPR (0x14, insn->src(1));
2299         break;
2300      case FILE_MEMORY_CONST:
2301         emitInsn(0x4bf00000);
2302         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2303         break;
2304      case FILE_IMMEDIATE:
2305         emitInsn(0x36f00000);
2306         emitIMMD(0x14, 19, insn->src(1));
2307         break;
2308      default:
2309         assert(!"bad src1 file");
2310         break;
2311      }
2312      emitGPR (0x27, insn->src(2));
2313      break;
2314   case FILE_MEMORY_CONST:
2315      emitInsn(0x53f00000);
2316      emitGPR (0x27, insn->src(1));
2317      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2318      break;
2319   default:
2320      assert(!"bad src2 file");
2321      break;
2322   }
2323
2324   emitCC   (0x2f);
2325   emitGPR  (0x08, insn->src(0));
2326   emitGPR  (0x00, insn->def(0));
2327}
2328
2329void
2330CodeEmitterGM107::emitBFE()
2331{
2332   switch (insn->src(1).getFile()) {
2333   case FILE_GPR:
2334      emitInsn(0x5c000000);
2335      emitGPR (0x14, insn->src(1));
2336      break;
2337   case FILE_MEMORY_CONST:
2338      emitInsn(0x4c000000);
2339      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2340      break;
2341   case FILE_IMMEDIATE:
2342      emitInsn(0x38000000);
2343      emitIMMD(0x14, 19, insn->src(1));
2344      break;
2345   default:
2346      assert(!"bad src1 file");
2347      break;
2348   }
2349
2350   emitField(0x30, 1, isSignedType(insn->dType));
2351   emitCC   (0x2f);
2352   emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353   emitGPR  (0x08, insn->src(0));
2354   emitGPR  (0x00, insn->def(0));
2355}
2356
2357void
2358CodeEmitterGM107::emitFLO()
2359{
2360   switch (insn->src(0).getFile()) {
2361   case FILE_GPR:
2362      emitInsn(0x5c300000);
2363      emitGPR (0x14, insn->src(0));
2364      break;
2365   case FILE_MEMORY_CONST:
2366      emitInsn(0x4c300000);
2367      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2368      break;
2369   case FILE_IMMEDIATE:
2370      emitInsn(0x38300000);
2371      emitIMMD(0x14, 19, insn->src(0));
2372      break;
2373   default:
2374      assert(!"bad src1 file");
2375      break;
2376   }
2377
2378   emitField(0x30, 1, isSignedType(insn->dType));
2379   emitCC   (0x2f);
2380   emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381   emitINV  (0x28, insn->src(0));
2382   emitGPR  (0x00, insn->def(0));
2383}
2384
2385void
2386CodeEmitterGM107::emitPRMT()
2387{
2388   switch (insn->src(1).getFile()) {
2389   case FILE_GPR:
2390      emitInsn(0x5bc00000);
2391      emitGPR (0x14, insn->src(1));
2392      break;
2393   case FILE_MEMORY_CONST:
2394      emitInsn(0x4bc00000);
2395      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2396      break;
2397   case FILE_IMMEDIATE:
2398      emitInsn(0x36c00000);
2399      emitIMMD(0x14, 19, insn->src(1));
2400      break;
2401   default:
2402      assert(!"bad src1 file");
2403      break;
2404   }
2405
2406   emitField(0x30, 3, insn->subOp);
2407   emitGPR  (0x27, insn->src(2));
2408   emitGPR  (0x08, insn->src(0));
2409   emitGPR  (0x00, insn->def(0));
2410}
2411
2412/*******************************************************************************
2413 * memory
2414 ******************************************************************************/
2415
2416void
2417CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2418{
2419   int data = 0;
2420
2421   switch (typeSizeof(type)) {
2422   case  1: data = isSignedType(type) ? 1 : 0; break;
2423   case  2: data = isSignedType(type) ? 3 : 2; break;
2424   case  4: data = 4; break;
2425   case  8: data = 5; break;
2426   case 16: data = 6; break;
2427   default:
2428      assert(!"bad type");
2429      break;
2430   }
2431
2432   emitField(pos, 3, data);
2433}
2434
2435void
2436CodeEmitterGM107::emitLDSTc(int pos)
2437{
2438   int mode = 0;
2439
2440   switch (insn->cache) {
2441   case CACHE_CA: mode = 0; break;
2442   case CACHE_CG: mode = 1; break;
2443   case CACHE_CS: mode = 2; break;
2444   case CACHE_CV: mode = 3; break;
2445   default:
2446      assert(!"invalid caching mode");
2447      break;
2448   }
2449
2450   emitField(pos, 2, mode);
2451}
2452
2453void
2454CodeEmitterGM107::emitLDC()
2455{
2456   emitInsn (0xef900000);
2457   emitLDSTs(0x30, insn->dType);
2458   emitField(0x2c, 2, insn->subOp);
2459   emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460   emitGPR  (0x00, insn->def(0));
2461}
2462
2463void
2464CodeEmitterGM107::emitLDL()
2465{
2466   emitInsn (0xef400000);
2467   emitLDSTs(0x30, insn->dType);
2468   emitLDSTc(0x2c);
2469   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470   emitGPR  (0x00, insn->def(0));
2471}
2472
2473void
2474CodeEmitterGM107::emitLDS()
2475{
2476   emitInsn (0xef480000);
2477   emitLDSTs(0x30, insn->dType);
2478   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479   emitGPR  (0x00, insn->def(0));
2480}
2481
2482void
2483CodeEmitterGM107::emitLD()
2484{
2485   emitInsn (0x80000000);
2486   emitPRED (0x3a);
2487   emitLDSTc(0x38);
2488   emitLDSTs(0x35, insn->dType);
2489   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490   emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491   emitGPR  (0x00, insn->def(0));
2492}
2493
2494void
2495CodeEmitterGM107::emitSTL()
2496{
2497   emitInsn (0xef500000);
2498   emitLDSTs(0x30, insn->dType);
2499   emitLDSTc(0x2c);
2500   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501   emitGPR  (0x00, insn->src(1));
2502}
2503
2504void
2505CodeEmitterGM107::emitSTS()
2506{
2507   emitInsn (0xef580000);
2508   emitLDSTs(0x30, insn->dType);
2509   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510   emitGPR  (0x00, insn->src(1));
2511}
2512
2513void
2514CodeEmitterGM107::emitST()
2515{
2516   emitInsn (0xa0000000);
2517   emitPRED (0x3a);
2518   emitLDSTc(0x38);
2519   emitLDSTs(0x35, insn->dType);
2520   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521   emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522   emitGPR  (0x00, insn->src(1));
2523}
2524
2525void
2526CodeEmitterGM107::emitALD()
2527{
2528   emitInsn (0xefd80000);
2529   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530   emitGPR  (0x27, insn->src(0).getIndirect(1));
2531   emitO    (0x20);
2532   emitP    (0x1f);
2533   emitADDR (0x08, 20, 10, 0, insn->src(0));
2534   emitGPR  (0x00, insn->def(0));
2535}
2536
2537void
2538CodeEmitterGM107::emitAST()
2539{
2540   emitInsn (0xeff00000);
2541   emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542   emitGPR  (0x27, insn->src(0).getIndirect(1));
2543   emitP    (0x1f);
2544   emitADDR (0x08, 20, 10, 0, insn->src(0));
2545   emitGPR  (0x00, insn->src(1));
2546}
2547
2548void
2549CodeEmitterGM107::emitISBERD()
2550{
2551   emitInsn(0xefd00000);
2552   emitGPR (0x08, insn->src(0));
2553   emitGPR (0x00, insn->def(0));
2554}
2555
2556void
2557CodeEmitterGM107::emitAL2P()
2558{
2559   emitInsn (0xefa00000);
2560   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2561   emitPRED (0x2c);
2562   emitO    (0x20);
2563   emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564   emitGPR  (0x08, insn->src(0).getIndirect(0));
2565   emitGPR  (0x00, insn->def(0));
2566}
2567
2568void
2569gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2570{
2571   int ipa = entry->ipa;
2572   int reg = entry->reg;
2573   int loc = entry->loc;
2574
2575   if (data.flatshade &&
2576       (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577      ipa = NV50_IR_INTERP_FLAT;
2578      reg = 0xff;
2579   } else if (data.force_persample_interp &&
2580              (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581              (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582      ipa |= NV50_IR_INTERP_CENTROID;
2583   }
2584   code[loc + 1] &= ~(0xf << 0x14);
2585   code[loc + 1] |= (ipa & 0x3) << 0x16;
2586   code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587   code[loc + 0] &= ~(0xff << 0x14);
2588   code[loc + 0] |= reg << 0x14;
2589}
2590
2591void
2592CodeEmitterGM107::emitIPA()
2593{
2594   int ipam = 0, ipas = 0;
2595
2596   switch (insn->getInterpMode()) {
2597   case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2598   case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599   case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2600   case NV50_IR_INTERP_SC         : ipam = 3; break;
2601   default:
2602      assert(!"invalid ipa mode");
2603      break;
2604   }
2605
2606   switch (insn->getSampleMode()) {
2607   case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608   case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609   case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2610   default:
2611      assert(!"invalid ipa sample mode");
2612      break;
2613   }
2614
2615   emitInsn (0xe0000000);
2616   emitField(0x36, 2, ipam);
2617   emitField(0x34, 2, ipas);
2618   emitSAT  (0x33);
2619   emitField(0x2f, 3, 7);
2620   emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621   if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622      code[1] |= 0x00000040; /* .idx */
2623   emitGPR(0x00, insn->def(0));
2624
2625   if (insn->op == OP_PINTERP) {
2626      emitGPR(0x14, insn->src(1));
2627      if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628         emitGPR(0x27, insn->src(2));
2629      addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2630   } else {
2631      if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632         emitGPR(0x27, insn->src(1));
2633      emitGPR(0x14);
2634      addInterp(insn->ipa, 0xff, gm107_interpApply);
2635   }
2636
2637   if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2638      emitGPR(0x27);
2639}
2640
2641void
2642CodeEmitterGM107::emitATOM()
2643{
2644   unsigned dType, subOp;
2645
2646   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647      switch (insn->dType) {
2648      case TYPE_U32: dType = 0; break;
2649      case TYPE_U64: dType = 1; break;
2650      default: assert(!"unexpected dType"); dType = 0; break;
2651      }
2652      subOp = 15;
2653
2654      emitInsn (0xee000000);
2655   } else {
2656      switch (insn->dType) {
2657      case TYPE_U32: dType = 0; break;
2658      case TYPE_S32: dType = 1; break;
2659      case TYPE_U64: dType = 2; break;
2660      case TYPE_F32: dType = 3; break;
2661      case TYPE_B128: dType = 4; break;
2662      case TYPE_S64: dType = 5; break;
2663      default: assert(!"unexpected dType"); dType = 0; break;
2664      }
2665      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2666         subOp = 8;
2667      else
2668         subOp = insn->subOp;
2669
2670      emitInsn (0xed000000);
2671   }
2672
2673   emitField(0x34, 4, subOp);
2674   emitField(0x31, 3, dType);
2675   emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676   emitGPR  (0x14, insn->src(1));
2677   emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678   emitGPR  (0x00, insn->def(0));
2679}
2680
2681void
2682CodeEmitterGM107::emitATOMS()
2683{
2684   unsigned dType, subOp;
2685
2686   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687      switch (insn->dType) {
2688      case TYPE_U32: dType = 0; break;
2689      case TYPE_U64: dType = 1; break;
2690      default: assert(!"unexpected dType"); dType = 0; break;
2691      }
2692      subOp = 4;
2693
2694      emitInsn (0xee000000);
2695      emitField(0x34, 1, dType);
2696   } else {
2697      switch (insn->dType) {
2698      case TYPE_U32: dType = 0; break;
2699      case TYPE_S32: dType = 1; break;
2700      case TYPE_U64: dType = 2; break;
2701      case TYPE_S64: dType = 3; break;
2702      default: assert(!"unexpected dType"); dType = 0; break;
2703      }
2704
2705      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2706         subOp = 8;
2707      else
2708         subOp = insn->subOp;
2709
2710      emitInsn (0xec000000);
2711      emitField(0x1c, 3, dType);
2712   }
2713
2714   emitField(0x34, 4, subOp);
2715   emitGPR  (0x14, insn->src(1));
2716   emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717   emitGPR  (0x00, insn->def(0));
2718}
2719
2720void
2721CodeEmitterGM107::emitRED()
2722{
2723   unsigned dType;
2724
2725   switch (insn->dType) {
2726   case TYPE_U32: dType = 0; break;
2727   case TYPE_S32: dType = 1; break;
2728   case TYPE_U64: dType = 2; break;
2729   case TYPE_F32: dType = 3; break;
2730   case TYPE_B128: dType = 4; break;
2731   case TYPE_S64: dType = 5; break;
2732   default: assert(!"unexpected dType"); dType = 0; break;
2733   }
2734
2735   emitInsn (0xebf80000);
2736   emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737   emitField(0x17, 3, insn->subOp);
2738   emitField(0x14, 3, dType);
2739   emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740   emitGPR  (0x00, insn->src(1));
2741}
2742
2743void
2744CodeEmitterGM107::emitCCTL()
2745{
2746   unsigned width;
2747   if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748      emitInsn(0xef600000);
2749      width = 30;
2750   } else {
2751      emitInsn(0xef800000);
2752      width = 22;
2753   }
2754   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755   emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756   emitField(0x00, 4, insn->subOp);
2757}
2758
2759/*******************************************************************************
2760 * surface
2761 ******************************************************************************/
2762
2763void
2764CodeEmitterGM107::emitPIXLD()
2765{
2766   emitInsn (0xefe80000);
2767   emitPRED (0x2d);
2768   emitField(0x1f, 3, insn->subOp);
2769   emitGPR  (0x08, insn->src(0));
2770   emitGPR  (0x00, insn->def(0));
2771}
2772
2773/*******************************************************************************
2774 * texture
2775 ******************************************************************************/
2776
2777void
2778CodeEmitterGM107::emitTEXs(int pos)
2779{
2780   int src1 = insn->predSrc == 1 ? 2 : 1;
2781   if (insn->srcExists(src1))
2782      emitGPR(pos, insn->src(src1));
2783   else
2784      emitGPR(pos);
2785}
2786
2787static uint8_t
2788getTEXSMask(uint8_t mask)
2789{
2790   switch (mask) {
2791   case 0x1: return 0x0;
2792   case 0x2: return 0x1;
2793   case 0x3: return 0x4;
2794   case 0x4: return 0x2;
2795   case 0x7: return 0x0;
2796   case 0x8: return 0x3;
2797   case 0x9: return 0x5;
2798   case 0xa: return 0x6;
2799   case 0xb: return 0x1;
2800   case 0xc: return 0x7;
2801   case 0xd: return 0x2;
2802   case 0xe: return 0x3;
2803   case 0xf: return 0x4;
2804   default:
2805      assert(!"invalid mask");
2806      return 0;
2807   }
2808}
2809
2810static uint8_t
2811getTEXSTarget(const TexInstruction *tex)
2812{
2813   assert(tex->op == OP_TEX || tex->op == OP_TXL);
2814
2815   switch (tex->tex.target.getEnum()) {
2816   case TEX_TARGET_1D:
2817      assert(tex->tex.levelZero);
2818      return 0x0;
2819   case TEX_TARGET_2D:
2820   case TEX_TARGET_RECT:
2821      if (tex->tex.levelZero)
2822         return 0x2;
2823      if (tex->op == OP_TXL)
2824         return 0x3;
2825      return 0x1;
2826   case TEX_TARGET_2D_SHADOW:
2827   case TEX_TARGET_RECT_SHADOW:
2828      if (tex->tex.levelZero)
2829         return 0x6;
2830      if (tex->op == OP_TXL)
2831         return 0x5;
2832      return 0x4;
2833   case TEX_TARGET_2D_ARRAY:
2834      if (tex->tex.levelZero)
2835         return 0x8;
2836      return 0x7;
2837   case TEX_TARGET_2D_ARRAY_SHADOW:
2838      assert(tex->tex.levelZero);
2839      return 0x9;
2840   case TEX_TARGET_3D:
2841      if (tex->tex.levelZero)
2842         return 0xb;
2843      assert(tex->op != OP_TXL);
2844      return 0xa;
2845   case TEX_TARGET_CUBE:
2846      assert(!tex->tex.levelZero);
2847      if (tex->op == OP_TXL)
2848         return 0xd;
2849      return 0xc;
2850   default:
2851      assert(false);
2852      return 0x0;
2853   }
2854}
2855
2856static uint8_t
2857getTLDSTarget(const TexInstruction *tex)
2858{
2859   switch (tex->tex.target.getEnum()) {
2860   case TEX_TARGET_1D:
2861      if (tex->tex.levelZero)
2862         return 0x0;
2863      return 0x1;
2864   case TEX_TARGET_2D:
2865   case TEX_TARGET_RECT:
2866      if (tex->tex.levelZero)
2867         return tex->tex.useOffsets ? 0x4 : 0x2;
2868      return tex->tex.useOffsets ? 0xc : 0x5;
2869   case TEX_TARGET_2D_MS:
2870      assert(tex->tex.levelZero);
2871      return 0x6;
2872   case TEX_TARGET_3D:
2873      assert(tex->tex.levelZero);
2874      return 0x7;
2875   case TEX_TARGET_2D_ARRAY:
2876      assert(tex->tex.levelZero);
2877      return 0x8;
2878
2879   default:
2880      assert(false);
2881      return 0x0;
2882   }
2883}
2884
2885void
2886CodeEmitterGM107::emitTEX()
2887{
2888   const TexInstruction *insn = this->insn->asTex();
2889   int lodm = 0;
2890
2891   if (!insn->tex.levelZero) {
2892      switch (insn->op) {
2893      case OP_TEX: lodm = 0; break;
2894      case OP_TXB: lodm = 2; break;
2895      case OP_TXL: lodm = 3; break;
2896      default:
2897         assert(!"invalid tex op");
2898         break;
2899      }
2900   } else {
2901      lodm = 1;
2902   }
2903
2904   if (insn->tex.rIndirectSrc >= 0) {
2905      emitInsn (0xdeb80000);
2906      emitField(0x25, 2, lodm);
2907      emitField(0x24, 1, insn->tex.useOffsets == 1);
2908   } else {
2909      emitInsn (0xc0380000);
2910      emitField(0x37, 2, lodm);
2911      emitField(0x36, 1, insn->tex.useOffsets == 1);
2912      emitField(0x24, 13, insn->tex.r);
2913   }
2914
2915   emitField(0x32, 1, insn->tex.target.isShadow());
2916   emitField(0x31, 1, insn->tex.liveOnly);
2917   emitField(0x23, 1, insn->tex.derivAll);
2918   emitField(0x1f, 4, insn->tex.mask);
2919   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920                      insn->tex.target.getDim() - 1);
2921   emitField(0x1c, 1, insn->tex.target.isArray());
2922   emitTEXs (0x14);
2923   emitGPR  (0x08, insn->src(0));
2924   emitGPR  (0x00, insn->def(0));
2925}
2926
2927void
2928CodeEmitterGM107::emitTEXS()
2929{
2930   const TexInstruction *insn = this->insn->asTex();
2931   assert(!insn->tex.derivAll);
2932
2933   switch (insn->op) {
2934   case OP_TEX:
2935   case OP_TXL:
2936      emitInsn (0xd8000000);
2937      emitField(0x35, 4, getTEXSTarget(insn));
2938      emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2939      break;
2940   case OP_TXF:
2941      emitInsn (0xda000000);
2942      emitField(0x35, 4, getTLDSTarget(insn));
2943      emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2944      break;
2945   case OP_TXG:
2946      assert(insn->tex.useOffsets != 4);
2947      emitInsn (0xdf000000);
2948      emitField(0x34, 2, insn->tex.gatherComp);
2949      emitField(0x33, 1, insn->tex.useOffsets == 1);
2950      emitField(0x32, 1, insn->tex.target.isShadow());
2951      break;
2952   default:
2953      unreachable("unknown op in emitTEXS()");
2954      break;
2955   }
2956
2957   emitField(0x31, 1, insn->tex.liveOnly);
2958   emitField(0x24, 13, insn->tex.r);
2959   if (insn->defExists(1))
2960      emitGPR(0x1c, insn->def(1));
2961   else
2962      emitGPR(0x1c);
2963   if (insn->srcExists(1))
2964      emitGPR(0x14, insn->getSrc(1));
2965   else
2966      emitGPR(0x14);
2967   emitGPR  (0x08, insn->src(0));
2968   emitGPR  (0x00, insn->def(0));
2969}
2970
2971void
2972CodeEmitterGM107::emitTLD()
2973{
2974   const TexInstruction *insn = this->insn->asTex();
2975
2976   if (insn->tex.rIndirectSrc >= 0) {
2977      emitInsn (0xdd380000);
2978   } else {
2979      emitInsn (0xdc380000);
2980      emitField(0x24, 13, insn->tex.r);
2981   }
2982
2983   emitField(0x37, 1, insn->tex.levelZero == 0);
2984   emitField(0x32, 1, insn->tex.target.isMS());
2985   emitField(0x31, 1, insn->tex.liveOnly);
2986   emitField(0x23, 1, insn->tex.useOffsets == 1);
2987   emitField(0x1f, 4, insn->tex.mask);
2988   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989                      insn->tex.target.getDim() - 1);
2990   emitField(0x1c, 1, insn->tex.target.isArray());
2991   emitTEXs (0x14);
2992   emitGPR  (0x08, insn->src(0));
2993   emitGPR  (0x00, insn->def(0));
2994}
2995
2996void
2997CodeEmitterGM107::emitTLD4()
2998{
2999   const TexInstruction *insn = this->insn->asTex();
3000
3001   if (insn->tex.rIndirectSrc >= 0) {
3002      emitInsn (0xdef80000);
3003      emitField(0x26, 2, insn->tex.gatherComp);
3004      emitField(0x25, 2, insn->tex.useOffsets == 4);
3005      emitField(0x24, 2, insn->tex.useOffsets == 1);
3006   } else {
3007      emitInsn (0xc8380000);
3008      emitField(0x38, 2, insn->tex.gatherComp);
3009      emitField(0x37, 2, insn->tex.useOffsets == 4);
3010      emitField(0x36, 2, insn->tex.useOffsets == 1);
3011      emitField(0x24, 13, insn->tex.r);
3012   }
3013
3014   emitField(0x32, 1, insn->tex.target.isShadow());
3015   emitField(0x31, 1, insn->tex.liveOnly);
3016   emitField(0x23, 1, insn->tex.derivAll);
3017   emitField(0x1f, 4, insn->tex.mask);
3018   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019                      insn->tex.target.getDim() - 1);
3020   emitField(0x1c, 1, insn->tex.target.isArray());
3021   emitTEXs (0x14);
3022   emitGPR  (0x08, insn->src(0));
3023   emitGPR  (0x00, insn->def(0));
3024}
3025
3026void
3027CodeEmitterGM107::emitTXD()
3028{
3029   const TexInstruction *insn = this->insn->asTex();
3030
3031   if (insn->tex.rIndirectSrc >= 0) {
3032      emitInsn (0xde780000);
3033   } else {
3034      emitInsn (0xde380000);
3035      emitField(0x24, 13, insn->tex.r);
3036   }
3037
3038   emitField(0x31, 1, insn->tex.liveOnly);
3039   emitField(0x23, 1, insn->tex.useOffsets == 1);
3040   emitField(0x1f, 4, insn->tex.mask);
3041   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042                      insn->tex.target.getDim() - 1);
3043   emitField(0x1c, 1, insn->tex.target.isArray());
3044   emitTEXs (0x14);
3045   emitGPR  (0x08, insn->src(0));
3046   emitGPR  (0x00, insn->def(0));
3047}
3048
3049void
3050CodeEmitterGM107::emitTMML()
3051{
3052   const TexInstruction *insn = this->insn->asTex();
3053
3054   if (insn->tex.rIndirectSrc >= 0) {
3055      emitInsn (0xdf600000);
3056   } else {
3057      emitInsn (0xdf580000);
3058      emitField(0x24, 13, insn->tex.r);
3059   }
3060
3061   emitField(0x31, 1, insn->tex.liveOnly);
3062   emitField(0x23, 1, insn->tex.derivAll);
3063   emitField(0x1f, 4, insn->tex.mask);
3064   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065                      insn->tex.target.getDim() - 1);
3066   emitField(0x1c, 1, insn->tex.target.isArray());
3067   emitTEXs (0x14);
3068   emitGPR  (0x08, insn->src(0));
3069   emitGPR  (0x00, insn->def(0));
3070}
3071
3072void
3073CodeEmitterGM107::emitTXQ()
3074{
3075   const TexInstruction *insn = this->insn->asTex();
3076   int type = 0;
3077
3078   switch (insn->tex.query) {
3079   case TXQ_DIMS           : type = 0x01; break;
3080   case TXQ_TYPE           : type = 0x02; break;
3081   case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082   case TXQ_FILTER         : type = 0x10; break;
3083   case TXQ_LOD            : type = 0x12; break;
3084   case TXQ_WRAP           : type = 0x14; break;
3085   case TXQ_BORDER_COLOUR  : type = 0x16; break;
3086   default:
3087      assert(!"invalid txq query");
3088      break;
3089   }
3090
3091   if (insn->tex.rIndirectSrc >= 0) {
3092      emitInsn (0xdf500000);
3093   } else {
3094      emitInsn (0xdf480000);
3095      emitField(0x24, 13, insn->tex.r);
3096   }
3097
3098   emitField(0x31, 1, insn->tex.liveOnly);
3099   emitField(0x1f, 4, insn->tex.mask);
3100   emitField(0x16, 6, type);
3101   emitGPR  (0x08, insn->src(0));
3102   emitGPR  (0x00, insn->def(0));
3103}
3104
3105void
3106CodeEmitterGM107::emitDEPBAR()
3107{
3108   emitInsn (0xf0f00000);
3109   emitField(0x1d, 1, 1); /* le */
3110   emitField(0x1a, 3, 5);
3111   emitField(0x14, 6, insn->subOp);
3112   emitField(0x00, 6, insn->subOp);
3113}
3114
3115/*******************************************************************************
3116 * misc
3117 ******************************************************************************/
3118
3119void
3120CodeEmitterGM107::emitNOP()
3121{
3122   emitInsn(0x50b00000);
3123}
3124
3125void
3126CodeEmitterGM107::emitKIL()
3127{
3128   emitInsn (0xe3300000);
3129   emitCond5(0x00, CC_TR);
3130}
3131
3132void
3133CodeEmitterGM107::emitOUT()
3134{
3135   const int cut  = insn->op == OP_RESTART || insn->subOp;
3136   const int emit = insn->op == OP_EMIT;
3137
3138   switch (insn->src(1).getFile()) {
3139   case FILE_GPR:
3140      emitInsn(0xfbe00000);
3141      emitGPR (0x14, insn->src(1));
3142      break;
3143   case FILE_IMMEDIATE:
3144      emitInsn(0xf6e00000);
3145      emitIMMD(0x14, 19, insn->src(1));
3146      break;
3147   case FILE_MEMORY_CONST:
3148      emitInsn(0xebe00000);
3149      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3150      break;
3151   default:
3152      assert(!"bad src1 file");
3153      break;
3154   }
3155
3156   emitField(0x27, 2, (cut << 1) | emit);
3157   emitGPR  (0x08, insn->src(0));
3158   emitGPR  (0x00, insn->def(0));
3159}
3160
3161void
3162CodeEmitterGM107::emitBAR()
3163{
3164   uint8_t subop;
3165
3166   emitInsn (0xf0a80000);
3167
3168   switch (insn->subOp) {
3169   case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170   case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
3171   case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
3172   case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
3173   default:
3174      subop = 0x80;
3175      assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3176      break;
3177   }
3178
3179   emitField(0x20, 8, subop);
3180
3181   // barrier id
3182   if (insn->src(0).getFile() == FILE_GPR) {
3183      emitGPR(0x08, insn->src(0));
3184   } else {
3185      ImmediateValue *imm = insn->getSrc(0)->asImm();
3186      assert(imm);
3187      emitField(0x08, 8, imm->reg.data.u32);
3188      emitField(0x2b, 1, 1);
3189   }
3190
3191   // thread count
3192   if (insn->src(1).getFile() == FILE_GPR) {
3193      emitGPR(0x14, insn->src(1));
3194   } else {
3195      ImmediateValue *imm = insn->getSrc(0)->asImm();
3196      assert(imm);
3197      emitField(0x14, 12, imm->reg.data.u32);
3198      emitField(0x2c, 1, 1);
3199   }
3200
3201   if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202      emitPRED (0x27, insn->src(2));
3203      emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3204   } else {
3205      emitField(0x27, 3, 7);
3206   }
3207}
3208
3209void
3210CodeEmitterGM107::emitMEMBAR()
3211{
3212   emitInsn (0xef980000);
3213   emitField(0x08, 2, insn->subOp >> 2);
3214}
3215
3216void
3217CodeEmitterGM107::emitVOTE()
3218{
3219   const ImmediateValue *imm;
3220   uint32_t u32;
3221
3222   int r = -1, p = -1;
3223   for (int i = 0; insn->defExists(i); i++) {
3224      if (insn->def(i).getFile() == FILE_GPR)
3225         r = i;
3226      else if (insn->def(i).getFile() == FILE_PREDICATE)
3227         p = i;
3228   }
3229
3230   emitInsn (0x50d80000);
3231   emitField(0x30, 2, insn->subOp);
3232   if (r >= 0)
3233      emitGPR  (0x00, insn->def(r));
3234   else
3235      emitGPR  (0x00);
3236   if (p >= 0)
3237      emitPRED (0x2d, insn->def(p));
3238   else
3239      emitPRED (0x2d);
3240
3241   switch (insn->src(0).getFile()) {
3242   case FILE_PREDICATE:
3243      emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244      emitPRED (0x27, insn->src(0));
3245      break;
3246   case FILE_IMMEDIATE:
3247      imm = insn->getSrc(0)->asImm();
3248      assert(imm);
3249      u32 = imm->reg.data.u32;
3250      assert(u32 == 0 || u32 == 1);
3251      emitPRED(0x27);
3252      emitField(0x2a, 1, u32 == 0);
3253      break;
3254   default:
3255      assert(!"Unhandled src");
3256      break;
3257   }
3258}
3259
3260void
3261CodeEmitterGM107::emitSUTarget()
3262{
3263   const TexInstruction *insn = this->insn->asTex();
3264   int target = 0;
3265
3266   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3267
3268   if (insn->tex.target == TEX_TARGET_BUFFER) {
3269      target = 2;
3270   } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3271      target = 4;
3272   } else if (insn->tex.target == TEX_TARGET_2D ||
3273              insn->tex.target == TEX_TARGET_RECT) {
3274      target = 6;
3275   } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276              insn->tex.target == TEX_TARGET_CUBE ||
3277              insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3278      target = 8;
3279   } else if (insn->tex.target == TEX_TARGET_3D) {
3280      target = 10;
3281   } else {
3282      assert(insn->tex.target == TEX_TARGET_1D);
3283   }
3284   emitField(0x20, 4, target);
3285}
3286
3287void
3288CodeEmitterGM107::emitSUHandle(const int s)
3289{
3290   const TexInstruction *insn = this->insn->asTex();
3291
3292   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3293
3294   if (insn->src(s).getFile() == FILE_GPR) {
3295      emitGPR(0x27, insn->src(s));
3296   } else {
3297      ImmediateValue *imm = insn->getSrc(s)->asImm();
3298      assert(imm);
3299      emitField(0x33, 1, 1);
3300      emitField(0x24, 13, imm->reg.data.u32);
3301   }
3302}
3303
3304void
3305CodeEmitterGM107::emitSUSTx()
3306{
3307   const TexInstruction *insn = this->insn->asTex();
3308
3309   emitInsn(0xeb200000);
3310   if (insn->op == OP_SUSTB)
3311      emitField(0x34, 1, 1);
3312   emitSUTarget();
3313
3314   emitLDSTc(0x18);
3315   emitField(0x14, 4, 0xf); // rgba
3316   emitGPR  (0x08, insn->src(0));
3317   emitGPR  (0x00, insn->src(1));
3318
3319   emitSUHandle(2);
3320}
3321
3322void
3323CodeEmitterGM107::emitSULDx()
3324{
3325   const TexInstruction *insn = this->insn->asTex();
3326   int type = 0;
3327
3328   emitInsn(0xeb000000);
3329   if (insn->op == OP_SULDB)
3330      emitField(0x34, 1, 1);
3331   emitSUTarget();
3332
3333   switch (insn->dType) {
3334   case TYPE_S8:   type = 1; break;
3335   case TYPE_U16:  type = 2; break;
3336   case TYPE_S16:  type = 3; break;
3337   case TYPE_U32:  type = 4; break;
3338   case TYPE_U64:  type = 5; break;
3339   case TYPE_B128: type = 6; break;
3340   default:
3341      assert(insn->dType == TYPE_U8);
3342      break;
3343   }
3344   emitLDSTc(0x18);
3345   emitField(0x14, 3, type);
3346   emitGPR  (0x00, insn->def(0));
3347   emitGPR  (0x08, insn->src(0));
3348
3349   emitSUHandle(1);
3350}
3351
3352void
3353CodeEmitterGM107::emitSUREDx()
3354{
3355   const TexInstruction *insn = this->insn->asTex();
3356   uint8_t type = 0, subOp;
3357
3358   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3359      emitInsn(0xeac00000);
3360   else
3361      emitInsn(0xea600000);
3362
3363   if (insn->op == OP_SUREDB)
3364      emitField(0x34, 1, 1);
3365   emitSUTarget();
3366
3367   // destination type
3368   switch (insn->dType) {
3369   case TYPE_S32: type = 1; break;
3370   case TYPE_U64: type = 2; break;
3371   case TYPE_F32: type = 3; break;
3372   case TYPE_S64: type = 5; break;
3373   default:
3374      assert(insn->dType == TYPE_U32);
3375      break;
3376   }
3377
3378   // atomic operation
3379   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3380      subOp = 0;
3381   } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3382      subOp = 8;
3383   } else {
3384      subOp = insn->subOp;
3385   }
3386
3387   emitField(0x24, 3, type);
3388   emitField(0x1d, 4, subOp);
3389   emitGPR  (0x14, insn->src(1));
3390   emitGPR  (0x08, insn->src(0));
3391   emitGPR  (0x00, insn->def(0));
3392
3393   emitSUHandle(2);
3394}
3395
3396/*******************************************************************************
3397 * assembler front-end
3398 ******************************************************************************/
3399
3400bool
3401CodeEmitterGM107::emitInstruction(Instruction *i)
3402{
3403   const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3404   bool ret = true;
3405
3406   insn = i;
3407
3408   if (insn->encSize != 8) {
3409      ERROR("skipping undecodable instruction: "); insn->print();
3410      return false;
3411   } else
3412   if (codeSize + size > codeSizeLimit) {
3413      ERROR("code emitter output buffer too small\n");
3414      return false;
3415   }
3416
3417   if (writeIssueDelays) {
3418      int n = ((codeSize & 0x1f) / 8) - 1;
3419      if (n < 0) {
3420         data = code;
3421         data[0] = 0x00000000;
3422         data[1] = 0x00000000;
3423         code += 2;
3424         codeSize += 8;
3425         n++;
3426      }
3427
3428      emitField(data, n * 21, 21, insn->sched);
3429   }
3430
3431   switch (insn->op) {
3432   case OP_EXIT:
3433      emitEXIT();
3434      break;
3435   case OP_BRA:
3436      emitBRA();
3437      break;
3438   case OP_CALL:
3439      emitCAL();
3440      break;
3441   case OP_PRECONT:
3442      emitPCNT();
3443      break;
3444   case OP_CONT:
3445      emitCONT();
3446      break;
3447   case OP_PREBREAK:
3448      emitPBK();
3449      break;
3450   case OP_BREAK:
3451      emitBRK();
3452      break;
3453   case OP_PRERET:
3454      emitPRET();
3455      break;
3456   case OP_RET:
3457      emitRET();
3458      break;
3459   case OP_JOINAT:
3460      emitSSY();
3461      break;
3462   case OP_JOIN:
3463      emitSYNC();
3464      break;
3465   case OP_QUADON:
3466      emitSAM();
3467      break;
3468   case OP_QUADPOP:
3469      emitRAM();
3470      break;
3471   case OP_MOV:
3472      emitMOV();
3473      break;
3474   case OP_RDSV:
3475      if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3476         emitCS2R();
3477      else
3478         emitS2R();
3479      break;
3480   case OP_ABS:
3481   case OP_NEG:
3482   case OP_SAT:
3483   case OP_FLOOR:
3484   case OP_CEIL:
3485   case OP_TRUNC:
3486   case OP_CVT:
3487      if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3488                                 insn->src(0).getFile() == FILE_PREDICATE)) {
3489         emitMOV();
3490      } else if (isFloatType(insn->dType)) {
3491         if (isFloatType(insn->sType))
3492            emitF2F();
3493         else
3494            emitI2F();
3495      } else {
3496         if (isFloatType(insn->sType))
3497            emitF2I();
3498         else
3499            emitI2I();
3500      }
3501      break;
3502   case OP_SHFL:
3503      emitSHFL();
3504      break;
3505   case OP_ADD:
3506   case OP_SUB:
3507      if (isFloatType(insn->dType)) {
3508         if (insn->dType == TYPE_F64)
3509            emitDADD();
3510         else
3511            emitFADD();
3512      } else {
3513         emitIADD();
3514      }
3515      break;
3516   case OP_MUL:
3517      if (isFloatType(insn->dType)) {
3518         if (insn->dType == TYPE_F64)
3519            emitDMUL();
3520         else
3521            emitFMUL();
3522      } else {
3523         emitIMUL();
3524      }
3525      break;
3526   case OP_MAD:
3527   case OP_FMA:
3528      if (isFloatType(insn->dType)) {
3529         if (insn->dType == TYPE_F64)
3530            emitDFMA();
3531         else
3532            emitFFMA();
3533      } else {
3534         emitIMAD();
3535      }
3536      break;
3537   case OP_SHLADD:
3538      emitISCADD();
3539      break;
3540   case OP_XMAD:
3541      emitXMAD();
3542      break;
3543   case OP_MIN:
3544   case OP_MAX:
3545      if (isFloatType(insn->dType)) {
3546         if (insn->dType == TYPE_F64)
3547            emitDMNMX();
3548         else
3549            emitFMNMX();
3550      } else {
3551         emitIMNMX();
3552      }
3553      break;
3554   case OP_SHL:
3555      if (typeSizeof(insn->sType) == 8)
3556         emitSHF();
3557      else
3558         emitSHL();
3559      break;
3560   case OP_SHR:
3561      if (typeSizeof(insn->sType) == 8)
3562         emitSHF();
3563      else
3564         emitSHR();
3565      break;
3566   case OP_POPCNT:
3567      emitPOPC();
3568      break;
3569   case OP_INSBF:
3570      emitBFI();
3571      break;
3572   case OP_EXTBF:
3573      emitBFE();
3574      break;
3575   case OP_BFIND:
3576      emitFLO();
3577      break;
3578   case OP_PERMT:
3579      emitPRMT();
3580      break;
3581   case OP_SLCT:
3582      if (isFloatType(insn->dType))
3583         emitFCMP();
3584      else
3585         emitICMP();
3586      break;
3587   case OP_SET:
3588   case OP_SET_AND:
3589   case OP_SET_OR:
3590   case OP_SET_XOR:
3591      if (insn->def(0).getFile() != FILE_PREDICATE) {
3592         if (isFloatType(insn->sType))
3593            if (insn->sType == TYPE_F64)
3594               emitDSET();
3595            else
3596               emitFSET();
3597         else
3598            emitISET();
3599      } else {
3600         if (isFloatType(insn->sType))
3601            if (insn->sType == TYPE_F64)
3602               emitDSETP();
3603            else
3604               emitFSETP();
3605         else
3606            emitISETP();
3607      }
3608      break;
3609   case OP_SELP:
3610      emitSEL();
3611      break;
3612   case OP_PRESIN:
3613   case OP_PREEX2:
3614      emitRRO();
3615      break;
3616   case OP_COS:
3617   case OP_SIN:
3618   case OP_EX2:
3619   case OP_LG2:
3620   case OP_RCP:
3621   case OP_RSQ:
3622   case OP_SQRT:
3623      emitMUFU();
3624      break;
3625   case OP_AND:
3626   case OP_OR:
3627   case OP_XOR:
3628      switch (insn->def(0).getFile()) {
3629      case FILE_GPR: emitLOP(); break;
3630      case FILE_PREDICATE: emitPSETP(); break;
3631      default:
3632         assert(!"invalid bool op");
3633      }
3634      break;
3635   case OP_NOT:
3636      emitNOT();
3637      break;
3638   case OP_LOAD:
3639      switch (insn->src(0).getFile()) {
3640      case FILE_MEMORY_CONST : emitLDC(); break;
3641      case FILE_MEMORY_LOCAL : emitLDL(); break;
3642      case FILE_MEMORY_SHARED: emitLDS(); break;
3643      case FILE_MEMORY_GLOBAL: emitLD(); break;
3644      default:
3645         assert(!"invalid load");
3646         emitNOP();
3647         break;
3648      }
3649      break;
3650   case OP_STORE:
3651      switch (insn->src(0).getFile()) {
3652      case FILE_MEMORY_LOCAL : emitSTL(); break;
3653      case FILE_MEMORY_SHARED: emitSTS(); break;
3654      case FILE_MEMORY_GLOBAL: emitST(); break;
3655      default:
3656         assert(!"invalid store");
3657         emitNOP();
3658         break;
3659      }
3660      break;
3661   case OP_ATOM:
3662      if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3663         emitATOMS();
3664      else
3665         if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3666            emitRED();
3667         else
3668            emitATOM();
3669      break;
3670   case OP_CCTL:
3671      emitCCTL();
3672      break;
3673   case OP_VFETCH:
3674      emitALD();
3675      break;
3676   case OP_EXPORT:
3677      emitAST();
3678      break;
3679   case OP_PFETCH:
3680      emitISBERD();
3681      break;
3682   case OP_AFETCH:
3683      emitAL2P();
3684      break;
3685   case OP_LINTERP:
3686   case OP_PINTERP:
3687      emitIPA();
3688      break;
3689   case OP_PIXLD:
3690      emitPIXLD();
3691      break;
3692   case OP_TEX:
3693   case OP_TXL:
3694      if (insn->asTex()->tex.scalar)
3695         emitTEXS();
3696      else
3697         emitTEX();
3698      break;
3699   case OP_TXB:
3700      emitTEX();
3701      break;
3702   case OP_TXF:
3703      if (insn->asTex()->tex.scalar)
3704         emitTEXS();
3705      else
3706         emitTLD();
3707      break;
3708   case OP_TXG:
3709      if (insn->asTex()->tex.scalar)
3710         emitTEXS();
3711      else
3712         emitTLD4();
3713      break;
3714   case OP_TXD:
3715      emitTXD();
3716      break;
3717   case OP_TXQ:
3718      emitTXQ();
3719      break;
3720   case OP_TXLQ:
3721      emitTMML();
3722      break;
3723   case OP_TEXBAR:
3724      emitDEPBAR();
3725      break;
3726   case OP_QUADOP:
3727      emitFSWZADD();
3728      break;
3729   case OP_NOP:
3730      emitNOP();
3731      break;
3732   case OP_DISCARD:
3733      emitKIL();
3734      break;
3735   case OP_EMIT:
3736   case OP_RESTART:
3737      emitOUT();
3738      break;
3739   case OP_BAR:
3740      emitBAR();
3741      break;
3742   case OP_MEMBAR:
3743      emitMEMBAR();
3744      break;
3745   case OP_VOTE:
3746      emitVOTE();
3747      break;
3748   case OP_SUSTB:
3749   case OP_SUSTP:
3750      emitSUSTx();
3751      break;
3752   case OP_SULDB:
3753   case OP_SULDP:
3754      emitSULDx();
3755      break;
3756   case OP_SUREDB:
3757   case OP_SUREDP:
3758      emitSUREDx();
3759      break;
3760   default:
3761      assert(!"invalid opcode");
3762      emitNOP();
3763      ret = false;
3764      break;
3765   }
3766
3767   if (insn->join) {
3768      /*XXX*/
3769   }
3770
3771   code += 2;
3772   codeSize += 8;
3773   return ret;
3774}
3775
3776uint32_t
3777CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3778{
3779   return 8;
3780}
3781
3782/*******************************************************************************
3783 * sched data calculator
3784 ******************************************************************************/
3785
3786inline void
3787SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3788{
3789   assert(cnt < 16);
3790   insn->sched |= cnt;
3791}
3792
3793inline void
3794SchedDataCalculatorGM107::emitYield(Instruction *insn)
3795{
3796   insn->sched |= 1 << 4;
3797}
3798
3799inline void
3800SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3801{
3802   assert(id < 6);
3803   if ((insn->sched & 0xe0) == 0xe0)
3804      insn->sched ^= 0xe0;
3805   insn->sched |= id << 5;
3806}
3807
3808inline void
3809SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3810{
3811   assert(id < 6);
3812   if ((insn->sched & 0x700) == 0x700)
3813      insn->sched ^= 0x700;
3814   insn->sched |= id << 8;
3815}
3816
3817inline void
3818SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3819{
3820   assert(id < 6);
3821   insn->sched |= 1 << (11 + id);
3822}
3823
3824inline void
3825SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3826{
3827   assert(id < 4);
3828   insn->sched |= 1 << (17 + id);
3829}
3830
3831inline void
3832SchedDataCalculatorGM107::printSchedInfo(int cycle,
3833                                         const Instruction *insn) const
3834{
3835   uint8_t st, yl, wr, rd, wt, ru;
3836
3837   st = (insn->sched & 0x00000f) >> 0;
3838   yl = (insn->sched & 0x000010) >> 4;
3839   wr = (insn->sched & 0x0000e0) >> 5;
3840   rd = (insn->sched & 0x000700) >> 8;
3841   wt = (insn->sched & 0x01f800) >> 11;
3842   ru = (insn->sched & 0x1e0000) >> 17;
3843
3844   INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3845        cycle, st, yl, wr, rd, wt, ru);
3846}
3847
3848inline int
3849SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3850{
3851   return insn->sched & 0xf;
3852}
3853
3854inline int
3855SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3856{
3857   return (insn->sched & 0x0000e0) >> 5;
3858}
3859
3860inline int
3861SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3862{
3863   return (insn->sched & 0x000700) >> 8;
3864}
3865
3866inline int
3867SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3868{
3869   return (insn->sched & 0x01f800) >> 11;
3870}
3871
3872// Emit the reuse flag which allows to make use of the new memory hierarchy
3873// introduced since Maxwell, the operand reuse cache.
3874//
3875// It allows to reduce bank conflicts by caching operands. Each time you issue
3876// an instruction, that flag can tell the hw which operands are going to be
3877// re-used by the next instruction. Note that the next instruction has to use
3878// the same GPR id in the same operand slot.
3879void
3880SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3881{
3882   Instruction *next = insn->next;
3883   BitSet defs(255, true);
3884
3885   if (!targ->isReuseSupported(insn))
3886      return;
3887
3888   for (int d = 0; insn->defExists(d); ++d) {
3889      const Value *def = insn->def(d).rep();
3890      if (insn->def(d).getFile() != FILE_GPR)
3891         continue;
3892      if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3893         continue;
3894      defs.set(def->reg.data.id);
3895   }
3896
3897   for (int s = 0; insn->srcExists(s); s++) {
3898      const Value *src = insn->src(s).rep();
3899      if (insn->src(s).getFile() != FILE_GPR)
3900         continue;
3901      if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3902         continue;
3903      if (defs.test(src->reg.data.id))
3904         continue;
3905      if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3906         continue;
3907      if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3908         continue;
3909      assert(s < 4);
3910      emitReuse(insn, s);
3911   }
3912}
3913
3914void
3915SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3916{
3917   int a = v->reg.data.id, b;
3918
3919   switch (v->reg.file) {
3920   case FILE_GPR:
3921      b = a + v->reg.size / 4;
3922      for (int r = a; r < b; ++r)
3923         score->rd.r[r] = ready;
3924      break;
3925   case FILE_PREDICATE:
3926      // To immediately use a predicate set by any instructions, the minimum
3927      // number of stall counts is 13.
3928      score->rd.p[a] = cycle + 13;
3929      break;
3930   case FILE_FLAGS:
3931      score->rd.c = ready;
3932      break;
3933   default:
3934      break;
3935   }
3936}
3937
3938void
3939SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3940{
3941   int a = v->reg.data.id, b;
3942   int ready = cycle;
3943
3944   switch (v->reg.file) {
3945   case FILE_GPR:
3946      b = a + v->reg.size / 4;
3947      for (int r = a; r < b; ++r)
3948         ready = MAX2(ready, score->rd.r[r]);
3949      break;
3950   case FILE_PREDICATE:
3951      ready = MAX2(ready, score->rd.p[a]);
3952      break;
3953   case FILE_FLAGS:
3954      ready = MAX2(ready, score->rd.c);
3955      break;
3956   default:
3957      break;
3958   }
3959   if (cycle < ready)
3960      delay = MAX2(delay, ready - cycle);
3961}
3962
3963void
3964SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3965{
3966   const int ready = cycle + targ->getLatency(insn);
3967
3968   for (int d = 0; insn->defExists(d); ++d)
3969      recordWr(insn->getDef(d), cycle, ready);
3970
3971#ifdef GM107_DEBUG_SCHED_DATA
3972   score->print(cycle);
3973#endif
3974}
3975
3976#define GM107_MIN_ISSUE_DELAY 0x1
3977#define GM107_MAX_ISSUE_DELAY 0xf
3978
3979int
3980SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3981{
3982   int delay = 0, ready = cycle;
3983
3984   for (int s = 0; insn->srcExists(s); ++s)
3985      checkRd(insn->getSrc(s), cycle, delay);
3986
3987   // TODO: make use of getReadLatency()!
3988
3989   return MAX2(delay, ready - cycle);
3990}
3991
3992void
3993SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3994                                   const Instruction *next)
3995{
3996   const OpClass cl = targ->getOpClass(insn->op);
3997   int wr, rd;
3998
3999   if (insn->op == OP_EXIT ||
4000       insn->op == OP_BAR ||
4001       insn->op == OP_MEMBAR) {
4002      delay = GM107_MAX_ISSUE_DELAY;
4003   } else
4004   if (insn->op == OP_QUADON ||
4005       insn->op == OP_QUADPOP) {
4006      delay = 0xd;
4007   } else
4008   if (cl == OPCLASS_FLOW || insn->join) {
4009      delay = 0xd;
4010   }
4011
4012   if (!next || !targ->canDualIssue(insn, next)) {
4013      delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4014   } else {
4015      delay = 0x0; // dual-issue
4016   }
4017
4018   wr = getWrDepBar(insn);
4019   rd = getRdDepBar(insn);
4020
4021   if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4022      // Barriers take one additional clock cycle to become active on top of
4023      // the clock consumed by the instruction producing it.
4024      if (!next || insn->bb != next->bb) {
4025         delay = 0x2;
4026      } else {
4027         int wt = getWtDepBar(next);
4028         if ((wt & (1 << wr)) | (wt & (1 << rd)))
4029            delay = 0x2;
4030      }
4031   }
4032
4033   emitStall(insn, delay);
4034}
4035
4036
4037// Return true when the given instruction needs to emit a read dependency
4038// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4039// setting the maximum number of stall counts is not enough.
4040bool
4041SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4042{
4043   BitSet srcs(255, true), defs(255, true);
4044   int a, b;
4045
4046   if (!targ->isBarrierRequired(insn))
4047      return false;
4048
4049   // Do not emit a read dependency barrier when the instruction doesn't use
4050   // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4051   for (int s = 0; insn->srcExists(s); ++s) {
4052      const Value *src = insn->src(s).rep();
4053      if (insn->src(s).getFile() != FILE_GPR)
4054         continue;
4055      if (src->reg.data.id == 255)
4056         continue;
4057
4058      a = src->reg.data.id;
4059      b = a + src->reg.size / 4;
4060      for (int r = a; r < b; ++r)
4061         srcs.set(r);
4062   }
4063
4064   if (!srcs.popCount())
4065      return false;
4066
4067   // Do not emit a read dependency barrier when the output GPRs are equal to
4068   // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4069   // be produced and WaR hazards are prevented.
4070   for (int d = 0; insn->defExists(d); ++d) {
4071      const Value *def = insn->def(d).rep();
4072      if (insn->def(d).getFile() != FILE_GPR)
4073         continue;
4074      if (def->reg.data.id == 255)
4075         continue;
4076
4077      a = def->reg.data.id;
4078      b = a + def->reg.size / 4;
4079      for (int r = a; r < b; ++r)
4080         defs.set(r);
4081   }
4082
4083   srcs.andNot(defs);
4084   if (!srcs.popCount())
4085      return false;
4086
4087   return true;
4088}
4089
4090// Return true when the given instruction needs to emit a write dependency
4091// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4092// setting the maximum number of stall counts is not enough. This is only legal
4093// if the instruction output something.
4094bool
4095SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4096{
4097   if (!targ->isBarrierRequired(insn))
4098      return false;
4099
4100   for (int d = 0; insn->defExists(d); ++d) {
4101      if (insn->def(d).getFile() == FILE_GPR ||
4102          insn->def(d).getFile() == FILE_FLAGS ||
4103          insn->def(d).getFile() == FILE_PREDICATE)
4104         return true;
4105   }
4106   return false;
4107}
4108
4109// Helper function for findFirstUse() and findFirstDef()
4110bool
4111SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4112                                          const Value *val) const
4113{
4114   if (val->reg.file != FILE_GPR &&
4115       val->reg.file != FILE_PREDICATE &&
4116       val->reg.file != FILE_FLAGS)
4117      return false;
4118
4119   for (int d = 0; insn->defExists(d); ++d) {
4120      const Value* def = insn->getDef(d);
4121      int minGPR = def->reg.data.id;
4122      int maxGPR = minGPR + def->reg.size / 4 - 1;
4123
4124      if (def->reg.file != val->reg.file)
4125         continue;
4126
4127      if (def->reg.file == FILE_GPR) {
4128         if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4129             val->reg.data.id > maxGPR)
4130            continue;
4131         return true;
4132      } else
4133      if (def->reg.file == FILE_PREDICATE) {
4134         if (val->reg.data.id != minGPR)
4135            continue;
4136         return true;
4137      } else
4138      if (def->reg.file == FILE_FLAGS) {
4139         if (val->reg.data.id != minGPR)
4140            continue;
4141         return true;
4142      }
4143   }
4144
4145   return false;
4146}
4147
4148// Find the next instruction inside the same basic block which uses (reads or
4149// writes from) the output of the given instruction in order to avoid RaW and
4150// WaW hazards.
4151Instruction *
4152SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4153{
4154   Instruction *insn, *next;
4155
4156   if (!bari->defExists(0))
4157      return NULL;
4158
4159   for (insn = bari->next; insn != NULL; insn = next) {
4160      next = insn->next;
4161
4162      for (int s = 0; insn->srcExists(s); ++s)
4163         if (doesInsnWriteTo(bari, insn->getSrc(s)))
4164            return insn;
4165
4166      for (int d = 0; insn->defExists(d); ++d)
4167         if (doesInsnWriteTo(bari, insn->getDef(d)))
4168            return insn;
4169   }
4170   return NULL;
4171}
4172
4173// Find the next instruction inside the same basic block which overwrites, at
4174// least, one source of the given instruction in order to avoid WaR hazards.
4175Instruction *
4176SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4177{
4178   Instruction *insn, *next;
4179
4180   if (!bari->srcExists(0))
4181      return NULL;
4182
4183   for (insn = bari->next; insn != NULL; insn = next) {
4184      next = insn->next;
4185
4186      for (int s = 0; bari->srcExists(s); ++s)
4187         if (doesInsnWriteTo(insn, bari->getSrc(s)))
4188            return insn;
4189   }
4190   return NULL;
4191}
4192
4193// Dependency barriers:
4194// This pass is a bit ugly and could probably be improved by performing a
4195// better allocation.
4196//
4197// The main idea is to avoid WaR and RaW hazards by emitting read/write
4198// dependency barriers using the control codes.
4199bool
4200SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4201{
4202   std::list<LiveBarUse> live_uses;
4203   std::list<LiveBarDef> live_defs;
4204   Instruction *insn, *next;
4205   BitSet bars(6, true);
4206   int bar_id;
4207
4208   for (insn = bb->getEntry(); insn != NULL; insn = next) {
4209      Instruction *usei = NULL, *defi = NULL;
4210      bool need_wr_bar, need_rd_bar;
4211
4212      next = insn->next;
4213
4214      // Expire old barrier uses.
4215      for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4216           it != live_uses.end();) {
4217         if (insn->serial >= it->usei->serial) {
4218            int wr = getWrDepBar(it->insn);
4219            emitWtDepBar(insn, wr);
4220            bars.clr(wr); // free barrier
4221            it = live_uses.erase(it);
4222            continue;
4223         }
4224         ++it;
4225      }
4226
4227      // Expire old barrier defs.
4228      for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4229           it != live_defs.end();) {
4230         if (insn->serial >= it->defi->serial) {
4231            int rd = getRdDepBar(it->insn);
4232            emitWtDepBar(insn, rd);
4233            bars.clr(rd); // free barrier
4234            it = live_defs.erase(it);
4235            continue;
4236         }
4237         ++it;
4238      }
4239
4240      need_wr_bar = needWrDepBar(insn);
4241      need_rd_bar = needRdDepBar(insn);
4242
4243      if (need_wr_bar) {
4244         // When the instruction requires to emit a write dependency barrier
4245         // (all which write something at a variable latency), find the next
4246         // instruction which reads the outputs (or writes to them, potentially
4247         // completing before this insn.
4248         usei = findFirstUse(insn);
4249
4250         // Allocate and emit a new barrier.
4251         bar_id = bars.findFreeRange(1);
4252         if (bar_id == -1)
4253            bar_id = 5;
4254         bars.set(bar_id);
4255         emitWrDepBar(insn, bar_id);
4256         if (usei)
4257            live_uses.push_back(LiveBarUse(insn, usei));
4258      }
4259
4260      if (need_rd_bar) {
4261         // When the instruction requires to emit a read dependency barrier
4262         // (all which read something at a variable latency), find the next
4263         // instruction which will write the inputs.
4264         defi = findFirstDef(insn);
4265
4266         if (usei && defi && usei->serial <= defi->serial)
4267            continue;
4268
4269         // Allocate and emit a new barrier.
4270         bar_id = bars.findFreeRange(1);
4271         if (bar_id == -1)
4272            bar_id = 5;
4273         bars.set(bar_id);
4274         emitRdDepBar(insn, bar_id);
4275         if (defi)
4276            live_defs.push_back(LiveBarDef(insn, defi));
4277      }
4278   }
4279
4280   // Remove unnecessary barrier waits.
4281   BitSet alive_bars(6, true);
4282   for (insn = bb->getEntry(); insn != NULL; insn = next) {
4283      int wr, rd, wt;
4284
4285      next = insn->next;
4286
4287      wr = getWrDepBar(insn);
4288      rd = getRdDepBar(insn);
4289      wt = getWtDepBar(insn);
4290
4291      for (int idx = 0; idx < 6; ++idx) {
4292         if (!(wt & (1 << idx)))
4293            continue;
4294         if (!alive_bars.test(idx)) {
4295            insn->sched &= ~(1 << (11  + idx));
4296         } else {
4297            alive_bars.clr(idx);
4298         }
4299      }
4300
4301      if (wr < 6)
4302         alive_bars.set(wr);
4303      if (rd < 6)
4304         alive_bars.set(rd);
4305   }
4306
4307   return true;
4308}
4309
4310bool
4311SchedDataCalculatorGM107::visit(Function *func)
4312{
4313   ArrayList insns;
4314
4315   func->orderInstructions(insns);
4316
4317   scoreBoards.resize(func->cfg.getSize());
4318   for (size_t i = 0; i < scoreBoards.size(); ++i)
4319      scoreBoards[i].wipe();
4320   return true;
4321}
4322
4323bool
4324SchedDataCalculatorGM107::visit(BasicBlock *bb)
4325{
4326   Instruction *insn, *next = NULL;
4327   int cycle = 0;
4328
4329   for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4330      /*XXX*/
4331      insn->sched = 0x7e0;
4332   }
4333
4334   if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4335      return true;
4336
4337   // Insert read/write dependency barriers for instructions which don't
4338   // operate at a fixed latency.
4339   insertBarriers(bb);
4340
4341   score = &scoreBoards.at(bb->getId());
4342
4343   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4344      // back branches will wait until all target dependencies are satisfied
4345      if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4346         continue;
4347      BasicBlock *in = BasicBlock::get(ei.getNode());
4348      score->setMax(&scoreBoards.at(in->getId()));
4349   }
4350
4351#ifdef GM107_DEBUG_SCHED_DATA
4352   INFO("=== BB:%i initial scores\n", bb->getId());
4353   score->print(cycle);
4354#endif
4355
4356   // Because barriers are allocated locally (intra-BB), we have to make sure
4357   // that all produced barriers have been consumed before entering inside a
4358   // new basic block. The best way is to do a global allocation pre RA but
4359   // it's really more difficult, especially because of the phi nodes. Anyways,
4360   // it seems like that waiting on a barrier which has already been consumed
4361   // doesn't add any additional cost, it's just not elegant!
4362   Instruction *start = bb->getEntry();
4363   if (start && bb->cfg.incidentCount() > 0) {
4364      for (int b = 0; b < 6; b++)
4365         emitWtDepBar(start, b);
4366   }
4367
4368   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4369      next = insn->next;
4370
4371      commitInsn(insn, cycle);
4372      int delay = calcDelay(next, cycle);
4373      setDelay(insn, delay, next);
4374      cycle += getStall(insn);
4375
4376      setReuseFlag(insn);
4377
4378      // XXX: The yield flag seems to destroy a bunch of things when it is
4379      // set on every instruction, need investigation.
4380      //emitYield(insn);
4381
4382#ifdef GM107_DEBUG_SCHED_DATA
4383      printSchedInfo(cycle, insn);
4384      insn->print();
4385      next->print();
4386#endif
4387   }
4388
4389   if (!insn)
4390      return true;
4391   commitInsn(insn, cycle);
4392
4393   int bbDelay = -1;
4394
4395#ifdef GM107_DEBUG_SCHED_DATA
4396   fprintf(stderr, "last instruction is : ");
4397   insn->print();
4398   fprintf(stderr, "cycle=%d\n", cycle);
4399#endif
4400
4401   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4402      BasicBlock *out = BasicBlock::get(ei.getNode());
4403
4404      if (ei.getType() != Graph::Edge::BACK) {
4405         // Only test the first instruction of the outgoing block.
4406         next = out->getEntry();
4407         if (next) {
4408            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4409         } else {
4410            // When the outgoing BB is empty, make sure to set the number of
4411            // stall counts needed by the instruction because we don't know the
4412            // next instruction.
4413            bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4414         }
4415      } else {
4416         // Wait until all dependencies are satisfied.
4417         const int regsFree = score->getLatest();
4418         next = out->getFirst();
4419         for (int c = cycle; next && c < regsFree; next = next->next) {
4420            bbDelay = MAX2(bbDelay, calcDelay(next, c));
4421            c += getStall(next);
4422         }
4423         next = NULL;
4424      }
4425   }
4426   if (bb->cfg.outgoingCount() != 1)
4427      next = NULL;
4428   setDelay(insn, bbDelay, next);
4429   cycle += getStall(insn);
4430
4431   score->rebase(cycle); // common base for initializing out blocks' scores
4432   return true;
4433}
4434
4435/*******************************************************************************
4436 * main
4437 ******************************************************************************/
4438
4439void
4440CodeEmitterGM107::prepareEmission(Function *func)
4441{
4442   SchedDataCalculatorGM107 sched(targGM107);
4443   CodeEmitter::prepareEmission(func);
4444   sched.run(func, true, true);
4445}
4446
4447static inline uint32_t sizeToBundlesGM107(uint32_t size)
4448{
4449   return (size + 23) / 24;
4450}
4451
4452void
4453CodeEmitterGM107::prepareEmission(Program *prog)
4454{
4455   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4456        !fi.end(); fi.next()) {
4457      Function *func = reinterpret_cast<Function *>(fi.get());
4458      func->binPos = prog->binSize;
4459      prepareEmission(func);
4460
4461      // adjust sizes & positions for schedulding info:
4462      if (prog->getTarget()->hasSWSched) {
4463         uint32_t adjPos = func->binPos;
4464         BasicBlock *bb = NULL;
4465         for (int i = 0; i < func->bbCount; ++i) {
4466            bb = func->bbArray[i];
4467            int32_t adjSize = bb->binSize;
4468            if (adjPos % 32) {
4469               adjSize -= 32 - adjPos % 32;
4470               if (adjSize < 0)
4471                  adjSize = 0;
4472            }
4473            adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4474            bb->binPos = adjPos;
4475            bb->binSize = adjSize;
4476            adjPos += adjSize;
4477         }
4478         if (bb)
4479            func->binSize = adjPos - func->binPos;
4480      }
4481
4482      prog->binSize += func->binSize;
4483   }
4484}
4485
4486CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4487   : CodeEmitter(target),
4488     targGM107(target),
4489     progType(Program::TYPE_VERTEX),
4490     insn(NULL),
4491     writeIssueDelays(target->hasSWSched),
4492     data(NULL)
4493{
4494   code = NULL;
4495   codeSize = codeSizeLimit = 0;
4496   relocInfo = NULL;
4497}
4498
4499CodeEmitter *
4500TargetGM107::createCodeEmitterGM107(Program::Type type)
4501{
4502   CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4503   emit->setProgramType(type);
4504   return emit;
4505}
4506
4507} // namespace nv50_ir
4508