1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "nv50_ir.h"
24#include "nv50_ir_target.h"
25#include "nv50_ir_driver.h"
26
27namespace nv50_ir {
28
29Modifier::Modifier(operation op)
30{
31   switch (op) {
32   case OP_NEG: bits = NV50_IR_MOD_NEG; break;
33   case OP_ABS: bits = NV50_IR_MOD_ABS; break;
34   case OP_SAT: bits = NV50_IR_MOD_SAT; break;
35   case OP_NOT: bits = NV50_IR_MOD_NOT; break;
36   default:
37      bits = 0;
38      break;
39   }
40}
41
42Modifier Modifier::operator*(const Modifier m) const
43{
44   unsigned int a, b, c;
45
46   b = m.bits;
47   if (this->bits & NV50_IR_MOD_ABS)
48      b &= ~NV50_IR_MOD_NEG;
49
50   a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
51   c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
52
53   return Modifier(a | c);
54}
55
56ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
57{
58   indirect[0] = -1;
59   indirect[1] = -1;
60   usedAsPtr = false;
61   set(v);
62}
63
64ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
65{
66   set(ref);
67   usedAsPtr = ref.usedAsPtr;
68}
69
70ValueRef::~ValueRef()
71{
72   this->set(NULL);
73}
74
75bool ValueRef::getImmediate(ImmediateValue &imm) const
76{
77   const ValueRef *src = this;
78   Modifier m;
79   DataType type = src->insn->sType;
80
81   while (src) {
82      if (src->mod) {
83         if (src->insn->sType != type)
84            break;
85         m *= src->mod;
86      }
87      if (src->getFile() == FILE_IMMEDIATE) {
88         imm = *(src->value->asImm());
89         // The immediate's type isn't required to match its use, it's
90         // more of a hint; applying a modifier makes use of that hint.
91         imm.reg.type = type;
92         m.applyTo(imm);
93         return true;
94      }
95
96      Instruction *insn = src->value->getUniqueInsn();
97
98      if (insn && insn->op == OP_MOV) {
99         src = &insn->src(0);
100         if (src->mod)
101            WARN("OP_MOV with modifier encountered !\n");
102      } else {
103         src = NULL;
104      }
105   }
106   return false;
107}
108
109ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
110{
111   set(v);
112}
113
114ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
115{
116   set(def.get());
117}
118
119ValueDef::~ValueDef()
120{
121   this->set(NULL);
122}
123
124void
125ValueRef::set(const ValueRef &ref)
126{
127   this->set(ref.get());
128   mod = ref.mod;
129   indirect[0] = ref.indirect[0];
130   indirect[1] = ref.indirect[1];
131}
132
133void
134ValueRef::set(Value *refVal)
135{
136   if (value == refVal)
137      return;
138   if (value)
139      value->uses.erase(this);
140   if (refVal)
141      refVal->uses.insert(this);
142
143   value = refVal;
144}
145
146void
147ValueDef::set(Value *defVal)
148{
149   if (value == defVal)
150      return;
151   if (value)
152      value->defs.remove(this);
153   if (defVal)
154      defVal->defs.push_back(this);
155
156   value = defVal;
157}
158
159// Check if we can replace this definition's value by the value in @rep,
160// including the source modifiers, i.e. make sure that all uses support
161// @rep.mod.
162bool
163ValueDef::mayReplace(const ValueRef &rep)
164{
165   if (!rep.mod)
166      return true;
167
168   if (!insn || !insn->bb) // Unbound instruction ?
169      return false;
170
171   const Target *target = insn->bb->getProgram()->getTarget();
172
173   for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
174        ++it) {
175      Instruction *insn = (*it)->getInsn();
176      int s = -1;
177
178      for (int i = 0; insn->srcExists(i); ++i) {
179         if (insn->src(i).get() == value) {
180            // If there are multiple references to us we'd have to check if the
181            // combination of mods is still supported, but just bail for now.
182            if (&insn->src(i) != (*it))
183               return false;
184            s = i;
185         }
186      }
187      assert(s >= 0); // integrity of uses list
188
189      if (!target->isModSupported(insn, s, rep.mod))
190         return false;
191   }
192   return true;
193}
194
195void
196ValueDef::replace(const ValueRef &repVal, bool doSet)
197{
198   assert(mayReplace(repVal));
199
200   if (value == repVal.get())
201      return;
202
203   while (!value->uses.empty()) {
204      ValueRef *ref = *value->uses.begin();
205      ref->set(repVal.get());
206      ref->mod *= repVal.mod;
207   }
208
209   if (doSet)
210      set(repVal.get());
211}
212
213Value::Value() : id(-1)
214{
215  join = this;
216  memset(&reg, 0, sizeof(reg));
217  reg.size = 4;
218}
219
220LValue::LValue(Function *fn, DataFile file)
221{
222   reg.file = file;
223   reg.size = (file != FILE_PREDICATE) ? 4 : 1;
224   reg.data.id = -1;
225
226   compMask = 0;
227   compound = 0;
228   ssa = 0;
229   fixedReg = 0;
230   noSpill = 0;
231
232   fn->add(this, this->id);
233}
234
235LValue::LValue(Function *fn, LValue *lval)
236{
237   assert(lval);
238
239   reg.file = lval->reg.file;
240   reg.size = lval->reg.size;
241   reg.data.id = -1;
242
243   compMask = 0;
244   compound = 0;
245   ssa = 0;
246   fixedReg = 0;
247   noSpill = 0;
248
249   fn->add(this, this->id);
250}
251
252LValue *
253LValue::clone(ClonePolicy<Function>& pol) const
254{
255   LValue *that = new_LValue(pol.context(), reg.file);
256
257   pol.set<Value>(this, that);
258
259   that->reg.size = this->reg.size;
260   that->reg.type = this->reg.type;
261   that->reg.data = this->reg.data;
262
263   return that;
264}
265
266bool
267LValue::isUniform() const
268{
269   if (defs.size() > 1)
270      return false;
271   Instruction *insn = getInsn();
272   if (!insn)
273      return false;
274   // let's not try too hard here for now ...
275   return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
276}
277
278Symbol::Symbol(Program *prog, DataFile f, uint8_t fidx)
279{
280   baseSym = NULL;
281
282   reg.file = f;
283   reg.fileIndex = fidx;
284   reg.data.offset = 0;
285
286   prog->add(this, this->id);
287}
288
289Symbol *
290Symbol::clone(ClonePolicy<Function>& pol) const
291{
292   Program *prog = pol.context()->getProgram();
293
294   Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
295
296   pol.set<Value>(this, that);
297
298   that->reg.size = this->reg.size;
299   that->reg.type = this->reg.type;
300   that->reg.data = this->reg.data;
301
302   that->baseSym = this->baseSym;
303
304   return that;
305}
306
307bool
308Symbol::isUniform() const
309{
310   return
311      reg.file != FILE_SYSTEM_VALUE &&
312      reg.file != FILE_MEMORY_LOCAL &&
313      reg.file != FILE_SHADER_INPUT;
314}
315
316ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
317{
318   memset(&reg, 0, sizeof(reg));
319
320   reg.file = FILE_IMMEDIATE;
321   reg.size = 4;
322   reg.type = TYPE_U32;
323
324   reg.data.u32 = uval;
325
326   prog->add(this, this->id);
327}
328
329ImmediateValue::ImmediateValue(Program *prog, float fval)
330{
331   memset(&reg, 0, sizeof(reg));
332
333   reg.file = FILE_IMMEDIATE;
334   reg.size = 4;
335   reg.type = TYPE_F32;
336
337   reg.data.f32 = fval;
338
339   prog->add(this, this->id);
340}
341
342ImmediateValue::ImmediateValue(Program *prog, double dval)
343{
344   memset(&reg, 0, sizeof(reg));
345
346   reg.file = FILE_IMMEDIATE;
347   reg.size = 8;
348   reg.type = TYPE_F64;
349
350   reg.data.f64 = dval;
351
352   prog->add(this, this->id);
353}
354
355ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
356{
357   reg = proto->reg;
358
359   reg.type = ty;
360   reg.size = typeSizeof(ty);
361}
362
363ImmediateValue *
364ImmediateValue::clone(ClonePolicy<Function>& pol) const
365{
366   Program *prog = pol.context()->getProgram();
367   ImmediateValue *that = new_ImmediateValue(prog, 0u);
368
369   pol.set<Value>(this, that);
370
371   that->reg.size = this->reg.size;
372   that->reg.type = this->reg.type;
373   that->reg.data = this->reg.data;
374
375   return that;
376}
377
378bool
379ImmediateValue::isInteger(const int i) const
380{
381   switch (reg.type) {
382   case TYPE_S8:
383      return reg.data.s8 == i;
384   case TYPE_U8:
385      return reg.data.u8 == i;
386   case TYPE_S16:
387      return reg.data.s16 == i;
388   case TYPE_U16:
389      return reg.data.u16 == i;
390   case TYPE_S32:
391   case TYPE_U32:
392      return reg.data.s32 == i; // as if ...
393   case TYPE_S64:
394   case TYPE_U64:
395      return reg.data.s64 == i; // as if ...
396   case TYPE_F32:
397      return reg.data.f32 == static_cast<float>(i);
398   case TYPE_F64:
399      return reg.data.f64 == static_cast<double>(i);
400   default:
401      return false;
402   }
403}
404
405bool
406ImmediateValue::isNegative() const
407{
408   switch (reg.type) {
409   case TYPE_S8:  return reg.data.s8 < 0;
410   case TYPE_S16: return reg.data.s16 < 0;
411   case TYPE_S32:
412   case TYPE_U32: return reg.data.s32 < 0;
413   case TYPE_F32: return reg.data.u32 & (1 << 31);
414   case TYPE_F64: return reg.data.u64 & (1ULL << 63);
415   default:
416      return false;
417   }
418}
419
420bool
421ImmediateValue::isPow2() const
422{
423   if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
424      return util_is_power_of_two_or_zero64(reg.data.u64);
425   else
426      return util_is_power_of_two_or_zero(reg.data.u32);
427}
428
429void
430ImmediateValue::applyLog2()
431{
432   switch (reg.type) {
433   case TYPE_S8:
434   case TYPE_S16:
435   case TYPE_S32:
436      assert(!this->isNegative());
437      FALLTHROUGH;
438   case TYPE_U8:
439   case TYPE_U16:
440   case TYPE_U32:
441      reg.data.u32 = util_logbase2(reg.data.u32);
442      break;
443   case TYPE_S64:
444      assert(!this->isNegative());
445      FALLTHROUGH;
446   case TYPE_U64:
447      reg.data.u64 = util_logbase2_64(reg.data.u64);
448      break;
449   case TYPE_F32:
450      reg.data.f32 = log2f(reg.data.f32);
451      break;
452   case TYPE_F64:
453      reg.data.f64 = log2(reg.data.f64);
454      break;
455   default:
456      assert(0);
457      break;
458   }
459}
460
461bool
462ImmediateValue::compare(CondCode cc, float fval) const
463{
464   if (reg.type != TYPE_F32)
465      ERROR("immediate value is not of type f32");
466
467   switch (static_cast<CondCode>(cc & 7)) {
468   case CC_TR: return true;
469   case CC_FL: return false;
470   case CC_LT: return reg.data.f32 <  fval;
471   case CC_LE: return reg.data.f32 <= fval;
472   case CC_GT: return reg.data.f32 >  fval;
473   case CC_GE: return reg.data.f32 >= fval;
474   case CC_EQ: return reg.data.f32 == fval;
475   case CC_NE: return reg.data.f32 != fval;
476   default:
477      assert(0);
478      return false;
479   }
480}
481
482ImmediateValue&
483ImmediateValue::operator=(const ImmediateValue &that)
484{
485   this->reg = that.reg;
486   return (*this);
487}
488
489bool
490Value::interfers(const Value *that) const
491{
492   uint32_t idA, idB;
493
494   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
495      return false;
496   if (this->asImm())
497      return false;
498
499   if (this->asSym()) {
500      idA = this->join->reg.data.offset;
501      idB = that->join->reg.data.offset;
502   } else {
503      idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
504      idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
505   }
506
507   if (idA < idB)
508      return (idA + this->reg.size > idB);
509   else
510   if (idA > idB)
511      return (idB + that->reg.size > idA);
512   else
513      return (idA == idB);
514}
515
516bool
517Value::equals(const Value *that, bool strict) const
518{
519   if (strict)
520      return this == that;
521
522   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
523      return false;
524   if (that->reg.size != this->reg.size)
525      return false;
526
527   if (that->reg.data.id != this->reg.data.id)
528      return false;
529
530   return true;
531}
532
533bool
534ImmediateValue::equals(const Value *that, bool strict) const
535{
536   const ImmediateValue *imm = that->asImm();
537   if (!imm)
538      return false;
539   return reg.data.u64 == imm->reg.data.u64;
540}
541
542bool
543Symbol::equals(const Value *that, bool strict) const
544{
545   if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
546      return false;
547   assert(that->asSym());
548
549   if (this->baseSym != that->asSym()->baseSym)
550      return false;
551
552   if (reg.file == FILE_SYSTEM_VALUE)
553      return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
554              this->reg.data.sv.index == that->reg.data.sv.index);
555   return this->reg.data.offset == that->reg.data.offset;
556}
557
558void Instruction::init()
559{
560   next = prev = 0;
561   serial = 0;
562
563   cc = CC_ALWAYS;
564   rnd = ROUND_N;
565   cache = CACHE_CA;
566   subOp = 0;
567
568   saturate = 0;
569   join = 0;
570   exit = 0;
571   terminator = 0;
572   ftz = 0;
573   dnz = 0;
574   perPatch = 0;
575   fixed = 0;
576   encSize = 0;
577   ipa = 0;
578   mask = 0;
579   precise = 0;
580
581   lanes = 0xf;
582
583   postFactor = 0;
584
585   predSrc = -1;
586   flagsDef = -1;
587   flagsSrc = -1;
588
589   sched = 0;
590   bb = NULL;
591}
592
593Instruction::Instruction()
594{
595   init();
596
597   op = OP_NOP;
598   dType = sType = TYPE_F32;
599
600   id = -1;
601}
602
603Instruction::Instruction(Function *fn, operation opr, DataType ty)
604{
605   init();
606
607   op = opr;
608   dType = sType = ty;
609
610   fn->add(this, id);
611}
612
613Instruction::~Instruction()
614{
615   if (bb) {
616      Function *fn = bb->getFunction();
617      bb->remove(this);
618      fn->allInsns.remove(id);
619   }
620
621   for (int s = 0; srcExists(s); ++s)
622      setSrc(s, NULL);
623   // must unlink defs too since the list pointers will get deallocated
624   for (int d = 0; defExists(d); ++d)
625      setDef(d, NULL);
626}
627
628void
629Instruction::setDef(int i, Value *val)
630{
631   int size = defs.size();
632   if (i >= size) {
633      defs.resize(i + 1);
634      while (size <= i)
635         defs[size++].setInsn(this);
636   }
637   defs[i].set(val);
638}
639
640void
641Instruction::setSrc(int s, Value *val)
642{
643   int size = srcs.size();
644   if (s >= size) {
645      srcs.resize(s + 1);
646      while (size <= s)
647         srcs[size++].setInsn(this);
648   }
649   srcs[s].set(val);
650}
651
652void
653Instruction::setSrc(int s, const ValueRef& ref)
654{
655   setSrc(s, ref.get());
656   srcs[s].mod = ref.mod;
657}
658
659void
660Instruction::swapSources(int a, int b)
661{
662   Value *value = srcs[a].get();
663   Modifier m = srcs[a].mod;
664
665   setSrc(a, srcs[b]);
666
667   srcs[b].set(value);
668   srcs[b].mod = m;
669}
670
671static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
672{
673   if (index >= s)
674      index += delta;
675   else
676   if ((delta < 0) && (index >= (s + delta)))
677      index = -1;
678}
679
680// Moves sources [@s,last_source] by @delta.
681// If @delta < 0, sources [@s - abs(@delta), @s) are erased.
682void
683Instruction::moveSources(const int s, const int delta)
684{
685   if (delta == 0)
686      return;
687   assert(s + delta >= 0);
688
689   int k;
690
691   for (k = 0; srcExists(k); ++k) {
692      for (int i = 0; i < 2; ++i)
693         moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
694   }
695   moveSourcesAdjustIndex(predSrc, s, delta);
696   moveSourcesAdjustIndex(flagsSrc, s, delta);
697   if (asTex()) {
698      TexInstruction *tex = asTex();
699      moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
700      moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
701   }
702
703   if (delta > 0) {
704      --k;
705      for (int p = k + delta; k >= s; --k, --p)
706         setSrc(p, src(k));
707   } else {
708      int p;
709      for (p = s; p < k; ++p)
710         setSrc(p + delta, src(p));
711      for (; (p + delta) < k; ++p)
712         setSrc(p + delta, NULL);
713   }
714}
715
716void
717Instruction::takeExtraSources(int s, Value *values[3])
718{
719   values[0] = getIndirect(s, 0);
720   if (values[0])
721      setIndirect(s, 0, NULL);
722
723   values[1] = getIndirect(s, 1);
724   if (values[1])
725      setIndirect(s, 1, NULL);
726
727   values[2] = getPredicate();
728   if (values[2])
729      setPredicate(cc, NULL);
730}
731
732void
733Instruction::putExtraSources(int s, Value *values[3])
734{
735   if (values[0])
736      setIndirect(s, 0, values[0]);
737   if (values[1])
738      setIndirect(s, 1, values[1]);
739   if (values[2])
740      setPredicate(cc, values[2]);
741}
742
743Instruction *
744Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
745{
746   if (!i)
747      i = new_Instruction(pol.context(), op, dType);
748#if !defined(NDEBUG) && defined(__cpp_rtti)
749   assert(typeid(*i) == typeid(*this));
750#endif
751
752   pol.set<Instruction>(this, i);
753
754   i->sType = sType;
755
756   i->rnd = rnd;
757   i->cache = cache;
758   i->subOp = subOp;
759
760   i->saturate = saturate;
761   i->join = join;
762   i->exit = exit;
763   i->mask = mask;
764   i->ftz = ftz;
765   i->dnz = dnz;
766   i->ipa = ipa;
767   i->lanes = lanes;
768   i->perPatch = perPatch;
769
770   i->postFactor = postFactor;
771
772   for (int d = 0; defExists(d); ++d)
773      i->setDef(d, pol.get(getDef(d)));
774
775   for (int s = 0; srcExists(s); ++s) {
776      i->setSrc(s, pol.get(getSrc(s)));
777      i->src(s).mod = src(s).mod;
778   }
779
780   i->cc = cc;
781   i->predSrc = predSrc;
782   i->flagsDef = flagsDef;
783   i->flagsSrc = flagsSrc;
784
785   return i;
786}
787
788unsigned int
789Instruction::defCount(unsigned int mask, bool singleFile) const
790{
791   unsigned int i, n;
792
793   if (singleFile) {
794      unsigned int d = ffs(mask);
795      if (!d)
796         return 0;
797      for (i = d--; defExists(i); ++i)
798         if (getDef(i)->reg.file != getDef(d)->reg.file)
799            mask &= ~(1 << i);
800   }
801
802   for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
803      n += mask & 1;
804   return n;
805}
806
807unsigned int
808Instruction::srcCount(unsigned int mask, bool singleFile) const
809{
810   unsigned int i, n;
811
812   if (singleFile) {
813      unsigned int s = ffs(mask);
814      if (!s)
815         return 0;
816      for (i = s--; srcExists(i); ++i)
817         if (getSrc(i)->reg.file != getSrc(s)->reg.file)
818            mask &= ~(1 << i);
819   }
820
821   for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
822      n += mask & 1;
823   return n;
824}
825
826bool
827Instruction::setIndirect(int s, int dim, Value *value)
828{
829   assert(this->srcExists(s));
830
831   int p = srcs[s].indirect[dim];
832   if (p < 0) {
833      if (!value)
834         return true;
835      p = srcs.size();
836      while (p > 0 && !srcExists(p - 1))
837         --p;
838   }
839   setSrc(p, value);
840   srcs[p].usedAsPtr = (value != 0);
841   srcs[s].indirect[dim] = value ? p : -1;
842   return true;
843}
844
845bool
846Instruction::setPredicate(CondCode ccode, Value *value)
847{
848   cc = ccode;
849
850   if (!value) {
851      if (predSrc >= 0) {
852         srcs[predSrc].set(NULL);
853         predSrc = -1;
854      }
855      return true;
856   }
857
858   if (predSrc < 0) {
859      predSrc = srcs.size();
860      while (predSrc > 0 && !srcExists(predSrc - 1))
861         --predSrc;
862   }
863
864   setSrc(predSrc, value);
865   return true;
866}
867
868bool
869Instruction::writesPredicate() const
870{
871   for (int d = 0; defExists(d); ++d)
872      if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
873         return true;
874   return false;
875}
876
877bool
878Instruction::canCommuteDefSrc(const Instruction *i) const
879{
880   for (int d = 0; defExists(d); ++d)
881      for (int s = 0; i->srcExists(s); ++s)
882         if (getDef(d)->interfers(i->getSrc(s)))
883            return false;
884   return true;
885}
886
887bool
888Instruction::canCommuteDefDef(const Instruction *i) const
889{
890   for (int d = 0; defExists(d); ++d)
891      for (int c = 0; i->defExists(c); ++c)
892         if (getDef(d)->interfers(i->getDef(c)))
893            return false;
894   return true;
895}
896
897bool
898Instruction::isCommutationLegal(const Instruction *i) const
899{
900   return canCommuteDefDef(i) &&
901      canCommuteDefSrc(i) &&
902      i->canCommuteDefSrc(this);
903}
904
905TexInstruction::TexInstruction(Function *fn, operation op)
906   : Instruction(fn, op, TYPE_F32), tex()
907{
908   tex.rIndirectSrc = -1;
909   tex.sIndirectSrc = -1;
910
911   if (op == OP_TXF)
912      sType = TYPE_U32;
913}
914
915TexInstruction::~TexInstruction()
916{
917   for (int c = 0; c < 3; ++c) {
918      dPdx[c].set(NULL);
919      dPdy[c].set(NULL);
920   }
921   for (int n = 0; n < 4; ++n)
922      for (int c = 0; c < 3; ++c)
923         offset[n][c].set(NULL);
924}
925
926TexInstruction *
927TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
928{
929   TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
930                          new_TexInstruction(pol.context(), op));
931
932   Instruction::clone(pol, tex);
933
934   tex->tex = this->tex;
935
936   if (op == OP_TXD) {
937      for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
938         tex->dPdx[c].set(dPdx[c]);
939         tex->dPdy[c].set(dPdy[c]);
940      }
941   }
942
943   for (int n = 0; n < tex->tex.useOffsets; ++n)
944      for (int c = 0; c < 3; ++c)
945         tex->offset[n][c].set(offset[n][c]);
946
947   return tex;
948}
949
950const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
951{
952   { "1D",                1, 1, false, false, false },
953   { "2D",                2, 2, false, false, false },
954   { "2D_MS",             2, 3, false, false, false },
955   { "3D",                3, 3, false, false, false },
956   { "CUBE",              2, 3, false, true,  false },
957   { "1D_SHADOW",         1, 1, false, false, true  },
958   { "2D_SHADOW",         2, 2, false, false, true  },
959   { "CUBE_SHADOW",       2, 3, false, true,  true  },
960   { "1D_ARRAY",          1, 2, true,  false, false },
961   { "2D_ARRAY",          2, 3, true,  false, false },
962   { "2D_MS_ARRAY",       2, 4, true,  false, false },
963   { "CUBE_ARRAY",        2, 4, true,  true,  false },
964   { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
965   { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
966   { "RECT",              2, 2, false, false, false },
967   { "RECT_SHADOW",       2, 2, false, false, true  },
968   { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
969   { "BUFFER",            1, 1, false, false, false },
970};
971
972const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
973{
974   { "NONE",         0, {  0,  0,  0,  0 },  UINT },
975
976   { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
977   { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
978   { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
979   { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
980   { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
981   { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
982   { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
983
984   { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
985   { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
986   { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
987   { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
988   { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
989   { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
990   { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
991   { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
992   { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
993   { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
994
995   { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
996   { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
997   { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
998   { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
999   { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1000   { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1001   { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1002   { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1003   { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1004
1005   { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1006   { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1007   { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1008   { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1009   { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1010   { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1011   { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1012
1013   { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1014   { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1015   { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1016   { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1017   { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1018   { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1019
1020   { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1021};
1022
1023const struct TexInstruction::ImgFormatDesc *
1024TexInstruction::translateImgFormat(enum pipe_format format)
1025{
1026
1027#define FMT_CASE(a, b) \
1028  case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1029
1030   switch (format) {
1031   FMT_CASE(NONE, NONE);
1032
1033   FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1034   FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1035   FMT_CASE(R32G32_FLOAT, RG32F);
1036   FMT_CASE(R16G16_FLOAT, RG16F);
1037   FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1038   FMT_CASE(R32_FLOAT, R32F);
1039   FMT_CASE(R16_FLOAT, R16F);
1040
1041   FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1042   FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1043   FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1044   FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1045   FMT_CASE(R32G32_UINT, RG32UI);
1046   FMT_CASE(R16G16_UINT, RG16UI);
1047   FMT_CASE(R8G8_UINT, RG8UI);
1048   FMT_CASE(R32_UINT, R32UI);
1049   FMT_CASE(R16_UINT, R16UI);
1050   FMT_CASE(R8_UINT, R8UI);
1051
1052   FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1053   FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1054   FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1055   FMT_CASE(R32G32_SINT, RG32I);
1056   FMT_CASE(R16G16_SINT, RG16I);
1057   FMT_CASE(R8G8_SINT, RG8I);
1058   FMT_CASE(R32_SINT, R32I);
1059   FMT_CASE(R16_SINT, R16I);
1060   FMT_CASE(R8_SINT, R8I);
1061
1062   FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1063   FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1064   FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1065   FMT_CASE(R16G16_UNORM, RG16);
1066   FMT_CASE(R8G8_UNORM, RG8);
1067   FMT_CASE(R16_UNORM, R16);
1068   FMT_CASE(R8_UNORM, R8);
1069
1070   FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1071   FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1072   FMT_CASE(R16G16_SNORM, RG16_SNORM);
1073   FMT_CASE(R8G8_SNORM, RG8_SNORM);
1074   FMT_CASE(R16_SNORM, R16_SNORM);
1075   FMT_CASE(R8_SNORM, R8_SNORM);
1076
1077   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1078
1079   default:
1080      assert(!"Unexpected format");
1081      return &formatTable[nv50_ir::FMT_NONE];
1082   }
1083}
1084
1085void
1086TexInstruction::setIndirectR(Value *v)
1087{
1088   int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1089   if (p >= 0) {
1090      tex.rIndirectSrc = p;
1091      setSrc(p, v);
1092      srcs[p].usedAsPtr = !!v;
1093   }
1094}
1095
1096void
1097TexInstruction::setIndirectS(Value *v)
1098{
1099   int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1100   if (p >= 0) {
1101      tex.sIndirectSrc = p;
1102      setSrc(p, v);
1103      srcs[p].usedAsPtr = !!v;
1104   }
1105}
1106
1107CmpInstruction::CmpInstruction(Function *fn, operation op)
1108   : Instruction(fn, op, TYPE_F32)
1109{
1110   setCond = CC_ALWAYS;
1111}
1112
1113CmpInstruction *
1114CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1115{
1116   CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1117                          new_CmpInstruction(pol.context(), op));
1118   cmp->dType = dType;
1119   Instruction::clone(pol, cmp);
1120   cmp->setCond = setCond;
1121   return cmp;
1122}
1123
1124FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1125   : Instruction(fn, op, TYPE_NONE)
1126{
1127   if (op == OP_CALL)
1128      target.fn = reinterpret_cast<Function *>(targ);
1129   else
1130      target.bb = reinterpret_cast<BasicBlock *>(targ);
1131
1132   if (op == OP_BRA ||
1133       op == OP_CONT || op == OP_BREAK ||
1134       op == OP_RET || op == OP_EXIT)
1135      terminator = 1;
1136   else
1137   if (op == OP_JOIN)
1138      terminator = targ ? 1 : 0;
1139
1140   allWarp = absolute = limit = builtin = indirect = 0;
1141}
1142
1143FlowInstruction *
1144FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1145{
1146   FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1147                            new_FlowInstruction(pol.context(), op, NULL));
1148
1149   Instruction::clone(pol, flow);
1150   flow->allWarp = allWarp;
1151   flow->absolute = absolute;
1152   flow->limit = limit;
1153   flow->builtin = builtin;
1154
1155   if (builtin)
1156      flow->target.builtin = target.builtin;
1157   else
1158   if (op == OP_CALL)
1159      flow->target.fn = target.fn;
1160   else
1161   if (target.bb)
1162      flow->target.bb = pol.get<BasicBlock>(target.bb);
1163
1164   return flow;
1165}
1166
1167Program::Program(Type type, Target *arch)
1168   : progType(type),
1169     target(arch),
1170     tlsSize(0),
1171     mem_Instruction(sizeof(Instruction), 6),
1172     mem_CmpInstruction(sizeof(CmpInstruction), 4),
1173     mem_TexInstruction(sizeof(TexInstruction), 4),
1174     mem_FlowInstruction(sizeof(FlowInstruction), 4),
1175     mem_LValue(sizeof(LValue), 8),
1176     mem_Symbol(sizeof(Symbol), 7),
1177     mem_ImmediateValue(sizeof(ImmediateValue), 7),
1178     driver(NULL),
1179     driver_out(NULL)
1180{
1181   code = NULL;
1182   binSize = 0;
1183
1184   maxGPR = -1;
1185   fp64 = false;
1186   persampleInvocation = false;
1187
1188   main = new Function(this, "MAIN", ~0);
1189   calls.insert(&main->call);
1190
1191   dbgFlags = 0;
1192   optLevel = 0;
1193
1194   targetPriv = NULL;
1195}
1196
1197Program::~Program()
1198{
1199   for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1200      delete reinterpret_cast<Function *>(it.get());
1201
1202   for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1203      releaseValue(reinterpret_cast<Value *>(it.get()));
1204}
1205
1206void Program::releaseInstruction(Instruction *insn)
1207{
1208   // TODO: make this not suck so much
1209
1210   insn->~Instruction();
1211
1212   if (insn->asCmp())
1213      mem_CmpInstruction.release(insn);
1214   else
1215   if (insn->asTex())
1216      mem_TexInstruction.release(insn);
1217   else
1218   if (insn->asFlow())
1219      mem_FlowInstruction.release(insn);
1220   else
1221      mem_Instruction.release(insn);
1222}
1223
1224void Program::releaseValue(Value *value)
1225{
1226   value->~Value();
1227
1228   if (value->asLValue())
1229      mem_LValue.release(value);
1230   else
1231   if (value->asImm())
1232      mem_ImmediateValue.release(value);
1233   else
1234   if (value->asSym())
1235      mem_Symbol.release(value);
1236}
1237
1238
1239} // namespace nv50_ir
1240
1241extern "C" {
1242
1243static void
1244nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1245                       struct nv50_ir_prog_info_out *info_out)
1246{
1247   info_out->target = info->target;
1248   info_out->type = info->type;
1249   if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1250      info_out->prop.tp.domain = PIPE_PRIM_MAX;
1251      info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1252   }
1253   if (info->type == PIPE_SHADER_GEOMETRY) {
1254      info_out->prop.gp.instanceCount = 1;
1255      info_out->prop.gp.maxVertices = 1;
1256   }
1257   if (info->type == PIPE_SHADER_COMPUTE) {
1258      info->prop.cp.numThreads[0] =
1259      info->prop.cp.numThreads[1] =
1260      info->prop.cp.numThreads[2] = 1;
1261   }
1262   info_out->bin.smemSize = info->bin.smemSize;
1263   info_out->io.genUserClip = info->io.genUserClip;
1264   info_out->io.instanceId = 0xff;
1265   info_out->io.vertexId = 0xff;
1266   info_out->io.edgeFlagIn = 0xff;
1267   info_out->io.edgeFlagOut = 0xff;
1268   info_out->io.fragDepth = 0xff;
1269   info_out->io.sampleMask = 0xff;
1270}
1271
1272int
1273nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1274                      struct nv50_ir_prog_info_out *info_out)
1275{
1276   int ret = 0;
1277
1278   nv50_ir::Program::Type type;
1279
1280   nv50_ir_init_prog_info(info, info_out);
1281
1282#define PROG_TYPE_CASE(a, b)                                      \
1283   case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1284
1285   switch (info->type) {
1286   PROG_TYPE_CASE(VERTEX, VERTEX);
1287   PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1288   PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1289   PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1290   PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1291   PROG_TYPE_CASE(COMPUTE, COMPUTE);
1292   default:
1293      INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1294      return -1;
1295   }
1296   INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1297
1298   nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1299   if (!targ)
1300      return -1;
1301
1302   nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1303   if (!prog) {
1304      nv50_ir::Target::destroy(targ);
1305      return -1;
1306   }
1307   prog->driver = info;
1308   prog->driver_out = info_out;
1309   prog->dbgFlags = info->dbgFlags;
1310   prog->optLevel = info->optLevel;
1311
1312   switch (info->bin.sourceRep) {
1313   case PIPE_SHADER_IR_NIR:
1314      ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1315      break;
1316   case PIPE_SHADER_IR_TGSI:
1317      ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1318      break;
1319   default:
1320      ret = -1;
1321      break;
1322   }
1323   if (ret < 0)
1324      goto out;
1325   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1326      prog->print();
1327
1328   targ->parseDriverInfo(info, info_out);
1329   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1330
1331   prog->convertToSSA();
1332
1333   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1334      prog->print();
1335
1336   prog->optimizeSSA(info->optLevel);
1337   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1338
1339   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1340      prog->print();
1341
1342   if (!prog->registerAllocation()) {
1343      ret = -4;
1344      goto out;
1345   }
1346   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1347
1348   prog->optimizePostRA(info->optLevel);
1349
1350   if (!prog->emitBinary(info_out)) {
1351      ret = -5;
1352      goto out;
1353   }
1354
1355out:
1356   INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1357
1358   info_out->bin.maxGPR = prog->maxGPR;
1359   info_out->bin.code = prog->code;
1360   info_out->bin.codeSize = prog->binSize;
1361   info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
1362
1363   delete prog;
1364   nv50_ir::Target::destroy(targ);
1365
1366   return ret;
1367}
1368
1369} // extern "C"
1370