1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "nv50_ir.h" 24#include "nv50_ir_target.h" 25#include "nv50_ir_driver.h" 26 27namespace nv50_ir { 28 29Modifier::Modifier(operation op) 30{ 31 switch (op) { 32 case OP_NEG: bits = NV50_IR_MOD_NEG; break; 33 case OP_ABS: bits = NV50_IR_MOD_ABS; break; 34 case OP_SAT: bits = NV50_IR_MOD_SAT; break; 35 case OP_NOT: bits = NV50_IR_MOD_NOT; break; 36 default: 37 bits = 0; 38 break; 39 } 40} 41 42Modifier Modifier::operator*(const Modifier m) const 43{ 44 unsigned int a, b, c; 45 46 b = m.bits; 47 if (this->bits & NV50_IR_MOD_ABS) 48 b &= ~NV50_IR_MOD_NEG; 49 50 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); 51 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); 52 53 return Modifier(a | c); 54} 55 56ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) 57{ 58 indirect[0] = -1; 59 indirect[1] = -1; 60 usedAsPtr = false; 61 set(v); 62} 63 64ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) 65{ 66 set(ref); 67 usedAsPtr = ref.usedAsPtr; 68} 69 70ValueRef::~ValueRef() 71{ 72 this->set(NULL); 73} 74 75bool ValueRef::getImmediate(ImmediateValue &imm) const 76{ 77 const ValueRef *src = this; 78 Modifier m; 79 DataType type = src->insn->sType; 80 81 while (src) { 82 if (src->mod) { 83 if (src->insn->sType != type) 84 break; 85 m *= src->mod; 86 } 87 if (src->getFile() == FILE_IMMEDIATE) { 88 imm = *(src->value->asImm()); 89 // The immediate's type isn't required to match its use, it's 90 // more of a hint; applying a modifier makes use of that hint. 91 imm.reg.type = type; 92 m.applyTo(imm); 93 return true; 94 } 95 96 Instruction *insn = src->value->getUniqueInsn(); 97 98 if (insn && insn->op == OP_MOV) { 99 src = &insn->src(0); 100 if (src->mod) 101 WARN("OP_MOV with modifier encountered !\n"); 102 } else { 103 src = NULL; 104 } 105 } 106 return false; 107} 108 109ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL) 110{ 111 set(v); 112} 113 114ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL) 115{ 116 set(def.get()); 117} 118 119ValueDef::~ValueDef() 120{ 121 this->set(NULL); 122} 123 124void 125ValueRef::set(const ValueRef &ref) 126{ 127 this->set(ref.get()); 128 mod = ref.mod; 129 indirect[0] = ref.indirect[0]; 130 indirect[1] = ref.indirect[1]; 131} 132 133void 134ValueRef::set(Value *refVal) 135{ 136 if (value == refVal) 137 return; 138 if (value) 139 value->uses.erase(this); 140 if (refVal) 141 refVal->uses.insert(this); 142 143 value = refVal; 144} 145 146void 147ValueDef::set(Value *defVal) 148{ 149 if (value == defVal) 150 return; 151 if (value) 152 value->defs.remove(this); 153 if (defVal) 154 defVal->defs.push_back(this); 155 156 value = defVal; 157} 158 159// Check if we can replace this definition's value by the value in @rep, 160// including the source modifiers, i.e. make sure that all uses support 161// @rep.mod. 162bool 163ValueDef::mayReplace(const ValueRef &rep) 164{ 165 if (!rep.mod) 166 return true; 167 168 if (!insn || !insn->bb) // Unbound instruction ? 169 return false; 170 171 const Target *target = insn->bb->getProgram()->getTarget(); 172 173 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); 174 ++it) { 175 Instruction *insn = (*it)->getInsn(); 176 int s = -1; 177 178 for (int i = 0; insn->srcExists(i); ++i) { 179 if (insn->src(i).get() == value) { 180 // If there are multiple references to us we'd have to check if the 181 // combination of mods is still supported, but just bail for now. 182 if (&insn->src(i) != (*it)) 183 return false; 184 s = i; 185 } 186 } 187 assert(s >= 0); // integrity of uses list 188 189 if (!target->isModSupported(insn, s, rep.mod)) 190 return false; 191 } 192 return true; 193} 194 195void 196ValueDef::replace(const ValueRef &repVal, bool doSet) 197{ 198 assert(mayReplace(repVal)); 199 200 if (value == repVal.get()) 201 return; 202 203 while (!value->uses.empty()) { 204 ValueRef *ref = *value->uses.begin(); 205 ref->set(repVal.get()); 206 ref->mod *= repVal.mod; 207 } 208 209 if (doSet) 210 set(repVal.get()); 211} 212 213Value::Value() : id(-1) 214{ 215 join = this; 216 memset(®, 0, sizeof(reg)); 217 reg.size = 4; 218} 219 220LValue::LValue(Function *fn, DataFile file) 221{ 222 reg.file = file; 223 reg.size = (file != FILE_PREDICATE) ? 4 : 1; 224 reg.data.id = -1; 225 226 compMask = 0; 227 compound = 0; 228 ssa = 0; 229 fixedReg = 0; 230 noSpill = 0; 231 232 fn->add(this, this->id); 233} 234 235LValue::LValue(Function *fn, LValue *lval) 236{ 237 assert(lval); 238 239 reg.file = lval->reg.file; 240 reg.size = lval->reg.size; 241 reg.data.id = -1; 242 243 compMask = 0; 244 compound = 0; 245 ssa = 0; 246 fixedReg = 0; 247 noSpill = 0; 248 249 fn->add(this, this->id); 250} 251 252LValue * 253LValue::clone(ClonePolicy<Function>& pol) const 254{ 255 LValue *that = new_LValue(pol.context(), reg.file); 256 257 pol.set<Value>(this, that); 258 259 that->reg.size = this->reg.size; 260 that->reg.type = this->reg.type; 261 that->reg.data = this->reg.data; 262 263 return that; 264} 265 266bool 267LValue::isUniform() const 268{ 269 if (defs.size() > 1) 270 return false; 271 Instruction *insn = getInsn(); 272 if (!insn) 273 return false; 274 // let's not try too hard here for now ... 275 return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); 276} 277 278Symbol::Symbol(Program *prog, DataFile f, uint8_t fidx) 279{ 280 baseSym = NULL; 281 282 reg.file = f; 283 reg.fileIndex = fidx; 284 reg.data.offset = 0; 285 286 prog->add(this, this->id); 287} 288 289Symbol * 290Symbol::clone(ClonePolicy<Function>& pol) const 291{ 292 Program *prog = pol.context()->getProgram(); 293 294 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); 295 296 pol.set<Value>(this, that); 297 298 that->reg.size = this->reg.size; 299 that->reg.type = this->reg.type; 300 that->reg.data = this->reg.data; 301 302 that->baseSym = this->baseSym; 303 304 return that; 305} 306 307bool 308Symbol::isUniform() const 309{ 310 return 311 reg.file != FILE_SYSTEM_VALUE && 312 reg.file != FILE_MEMORY_LOCAL && 313 reg.file != FILE_SHADER_INPUT; 314} 315 316ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) 317{ 318 memset(®, 0, sizeof(reg)); 319 320 reg.file = FILE_IMMEDIATE; 321 reg.size = 4; 322 reg.type = TYPE_U32; 323 324 reg.data.u32 = uval; 325 326 prog->add(this, this->id); 327} 328 329ImmediateValue::ImmediateValue(Program *prog, float fval) 330{ 331 memset(®, 0, sizeof(reg)); 332 333 reg.file = FILE_IMMEDIATE; 334 reg.size = 4; 335 reg.type = TYPE_F32; 336 337 reg.data.f32 = fval; 338 339 prog->add(this, this->id); 340} 341 342ImmediateValue::ImmediateValue(Program *prog, double dval) 343{ 344 memset(®, 0, sizeof(reg)); 345 346 reg.file = FILE_IMMEDIATE; 347 reg.size = 8; 348 reg.type = TYPE_F64; 349 350 reg.data.f64 = dval; 351 352 prog->add(this, this->id); 353} 354 355ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) 356{ 357 reg = proto->reg; 358 359 reg.type = ty; 360 reg.size = typeSizeof(ty); 361} 362 363ImmediateValue * 364ImmediateValue::clone(ClonePolicy<Function>& pol) const 365{ 366 Program *prog = pol.context()->getProgram(); 367 ImmediateValue *that = new_ImmediateValue(prog, 0u); 368 369 pol.set<Value>(this, that); 370 371 that->reg.size = this->reg.size; 372 that->reg.type = this->reg.type; 373 that->reg.data = this->reg.data; 374 375 return that; 376} 377 378bool 379ImmediateValue::isInteger(const int i) const 380{ 381 switch (reg.type) { 382 case TYPE_S8: 383 return reg.data.s8 == i; 384 case TYPE_U8: 385 return reg.data.u8 == i; 386 case TYPE_S16: 387 return reg.data.s16 == i; 388 case TYPE_U16: 389 return reg.data.u16 == i; 390 case TYPE_S32: 391 case TYPE_U32: 392 return reg.data.s32 == i; // as if ... 393 case TYPE_S64: 394 case TYPE_U64: 395 return reg.data.s64 == i; // as if ... 396 case TYPE_F32: 397 return reg.data.f32 == static_cast<float>(i); 398 case TYPE_F64: 399 return reg.data.f64 == static_cast<double>(i); 400 default: 401 return false; 402 } 403} 404 405bool 406ImmediateValue::isNegative() const 407{ 408 switch (reg.type) { 409 case TYPE_S8: return reg.data.s8 < 0; 410 case TYPE_S16: return reg.data.s16 < 0; 411 case TYPE_S32: 412 case TYPE_U32: return reg.data.s32 < 0; 413 case TYPE_F32: return reg.data.u32 & (1 << 31); 414 case TYPE_F64: return reg.data.u64 & (1ULL << 63); 415 default: 416 return false; 417 } 418} 419 420bool 421ImmediateValue::isPow2() const 422{ 423 if (reg.type == TYPE_U64 || reg.type == TYPE_S64) 424 return util_is_power_of_two_or_zero64(reg.data.u64); 425 else 426 return util_is_power_of_two_or_zero(reg.data.u32); 427} 428 429void 430ImmediateValue::applyLog2() 431{ 432 switch (reg.type) { 433 case TYPE_S8: 434 case TYPE_S16: 435 case TYPE_S32: 436 assert(!this->isNegative()); 437 FALLTHROUGH; 438 case TYPE_U8: 439 case TYPE_U16: 440 case TYPE_U32: 441 reg.data.u32 = util_logbase2(reg.data.u32); 442 break; 443 case TYPE_S64: 444 assert(!this->isNegative()); 445 FALLTHROUGH; 446 case TYPE_U64: 447 reg.data.u64 = util_logbase2_64(reg.data.u64); 448 break; 449 case TYPE_F32: 450 reg.data.f32 = log2f(reg.data.f32); 451 break; 452 case TYPE_F64: 453 reg.data.f64 = log2(reg.data.f64); 454 break; 455 default: 456 assert(0); 457 break; 458 } 459} 460 461bool 462ImmediateValue::compare(CondCode cc, float fval) const 463{ 464 if (reg.type != TYPE_F32) 465 ERROR("immediate value is not of type f32"); 466 467 switch (static_cast<CondCode>(cc & 7)) { 468 case CC_TR: return true; 469 case CC_FL: return false; 470 case CC_LT: return reg.data.f32 < fval; 471 case CC_LE: return reg.data.f32 <= fval; 472 case CC_GT: return reg.data.f32 > fval; 473 case CC_GE: return reg.data.f32 >= fval; 474 case CC_EQ: return reg.data.f32 == fval; 475 case CC_NE: return reg.data.f32 != fval; 476 default: 477 assert(0); 478 return false; 479 } 480} 481 482ImmediateValue& 483ImmediateValue::operator=(const ImmediateValue &that) 484{ 485 this->reg = that.reg; 486 return (*this); 487} 488 489bool 490Value::interfers(const Value *that) const 491{ 492 uint32_t idA, idB; 493 494 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 495 return false; 496 if (this->asImm()) 497 return false; 498 499 if (this->asSym()) { 500 idA = this->join->reg.data.offset; 501 idB = that->join->reg.data.offset; 502 } else { 503 idA = this->join->reg.data.id * MIN2(this->reg.size, 4); 504 idB = that->join->reg.data.id * MIN2(that->reg.size, 4); 505 } 506 507 if (idA < idB) 508 return (idA + this->reg.size > idB); 509 else 510 if (idA > idB) 511 return (idB + that->reg.size > idA); 512 else 513 return (idA == idB); 514} 515 516bool 517Value::equals(const Value *that, bool strict) const 518{ 519 if (strict) 520 return this == that; 521 522 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 523 return false; 524 if (that->reg.size != this->reg.size) 525 return false; 526 527 if (that->reg.data.id != this->reg.data.id) 528 return false; 529 530 return true; 531} 532 533bool 534ImmediateValue::equals(const Value *that, bool strict) const 535{ 536 const ImmediateValue *imm = that->asImm(); 537 if (!imm) 538 return false; 539 return reg.data.u64 == imm->reg.data.u64; 540} 541 542bool 543Symbol::equals(const Value *that, bool strict) const 544{ 545 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) 546 return false; 547 assert(that->asSym()); 548 549 if (this->baseSym != that->asSym()->baseSym) 550 return false; 551 552 if (reg.file == FILE_SYSTEM_VALUE) 553 return (this->reg.data.sv.sv == that->reg.data.sv.sv && 554 this->reg.data.sv.index == that->reg.data.sv.index); 555 return this->reg.data.offset == that->reg.data.offset; 556} 557 558void Instruction::init() 559{ 560 next = prev = 0; 561 serial = 0; 562 563 cc = CC_ALWAYS; 564 rnd = ROUND_N; 565 cache = CACHE_CA; 566 subOp = 0; 567 568 saturate = 0; 569 join = 0; 570 exit = 0; 571 terminator = 0; 572 ftz = 0; 573 dnz = 0; 574 perPatch = 0; 575 fixed = 0; 576 encSize = 0; 577 ipa = 0; 578 mask = 0; 579 precise = 0; 580 581 lanes = 0xf; 582 583 postFactor = 0; 584 585 predSrc = -1; 586 flagsDef = -1; 587 flagsSrc = -1; 588 589 sched = 0; 590 bb = NULL; 591} 592 593Instruction::Instruction() 594{ 595 init(); 596 597 op = OP_NOP; 598 dType = sType = TYPE_F32; 599 600 id = -1; 601} 602 603Instruction::Instruction(Function *fn, operation opr, DataType ty) 604{ 605 init(); 606 607 op = opr; 608 dType = sType = ty; 609 610 fn->add(this, id); 611} 612 613Instruction::~Instruction() 614{ 615 if (bb) { 616 Function *fn = bb->getFunction(); 617 bb->remove(this); 618 fn->allInsns.remove(id); 619 } 620 621 for (int s = 0; srcExists(s); ++s) 622 setSrc(s, NULL); 623 // must unlink defs too since the list pointers will get deallocated 624 for (int d = 0; defExists(d); ++d) 625 setDef(d, NULL); 626} 627 628void 629Instruction::setDef(int i, Value *val) 630{ 631 int size = defs.size(); 632 if (i >= size) { 633 defs.resize(i + 1); 634 while (size <= i) 635 defs[size++].setInsn(this); 636 } 637 defs[i].set(val); 638} 639 640void 641Instruction::setSrc(int s, Value *val) 642{ 643 int size = srcs.size(); 644 if (s >= size) { 645 srcs.resize(s + 1); 646 while (size <= s) 647 srcs[size++].setInsn(this); 648 } 649 srcs[s].set(val); 650} 651 652void 653Instruction::setSrc(int s, const ValueRef& ref) 654{ 655 setSrc(s, ref.get()); 656 srcs[s].mod = ref.mod; 657} 658 659void 660Instruction::swapSources(int a, int b) 661{ 662 Value *value = srcs[a].get(); 663 Modifier m = srcs[a].mod; 664 665 setSrc(a, srcs[b]); 666 667 srcs[b].set(value); 668 srcs[b].mod = m; 669} 670 671static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) 672{ 673 if (index >= s) 674 index += delta; 675 else 676 if ((delta < 0) && (index >= (s + delta))) 677 index = -1; 678} 679 680// Moves sources [@s,last_source] by @delta. 681// If @delta < 0, sources [@s - abs(@delta), @s) are erased. 682void 683Instruction::moveSources(const int s, const int delta) 684{ 685 if (delta == 0) 686 return; 687 assert(s + delta >= 0); 688 689 int k; 690 691 for (k = 0; srcExists(k); ++k) { 692 for (int i = 0; i < 2; ++i) 693 moveSourcesAdjustIndex(src(k).indirect[i], s, delta); 694 } 695 moveSourcesAdjustIndex(predSrc, s, delta); 696 moveSourcesAdjustIndex(flagsSrc, s, delta); 697 if (asTex()) { 698 TexInstruction *tex = asTex(); 699 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); 700 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); 701 } 702 703 if (delta > 0) { 704 --k; 705 for (int p = k + delta; k >= s; --k, --p) 706 setSrc(p, src(k)); 707 } else { 708 int p; 709 for (p = s; p < k; ++p) 710 setSrc(p + delta, src(p)); 711 for (; (p + delta) < k; ++p) 712 setSrc(p + delta, NULL); 713 } 714} 715 716void 717Instruction::takeExtraSources(int s, Value *values[3]) 718{ 719 values[0] = getIndirect(s, 0); 720 if (values[0]) 721 setIndirect(s, 0, NULL); 722 723 values[1] = getIndirect(s, 1); 724 if (values[1]) 725 setIndirect(s, 1, NULL); 726 727 values[2] = getPredicate(); 728 if (values[2]) 729 setPredicate(cc, NULL); 730} 731 732void 733Instruction::putExtraSources(int s, Value *values[3]) 734{ 735 if (values[0]) 736 setIndirect(s, 0, values[0]); 737 if (values[1]) 738 setIndirect(s, 1, values[1]); 739 if (values[2]) 740 setPredicate(cc, values[2]); 741} 742 743Instruction * 744Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 745{ 746 if (!i) 747 i = new_Instruction(pol.context(), op, dType); 748#if !defined(NDEBUG) && defined(__cpp_rtti) 749 assert(typeid(*i) == typeid(*this)); 750#endif 751 752 pol.set<Instruction>(this, i); 753 754 i->sType = sType; 755 756 i->rnd = rnd; 757 i->cache = cache; 758 i->subOp = subOp; 759 760 i->saturate = saturate; 761 i->join = join; 762 i->exit = exit; 763 i->mask = mask; 764 i->ftz = ftz; 765 i->dnz = dnz; 766 i->ipa = ipa; 767 i->lanes = lanes; 768 i->perPatch = perPatch; 769 770 i->postFactor = postFactor; 771 772 for (int d = 0; defExists(d); ++d) 773 i->setDef(d, pol.get(getDef(d))); 774 775 for (int s = 0; srcExists(s); ++s) { 776 i->setSrc(s, pol.get(getSrc(s))); 777 i->src(s).mod = src(s).mod; 778 } 779 780 i->cc = cc; 781 i->predSrc = predSrc; 782 i->flagsDef = flagsDef; 783 i->flagsSrc = flagsSrc; 784 785 return i; 786} 787 788unsigned int 789Instruction::defCount(unsigned int mask, bool singleFile) const 790{ 791 unsigned int i, n; 792 793 if (singleFile) { 794 unsigned int d = ffs(mask); 795 if (!d) 796 return 0; 797 for (i = d--; defExists(i); ++i) 798 if (getDef(i)->reg.file != getDef(d)->reg.file) 799 mask &= ~(1 << i); 800 } 801 802 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) 803 n += mask & 1; 804 return n; 805} 806 807unsigned int 808Instruction::srcCount(unsigned int mask, bool singleFile) const 809{ 810 unsigned int i, n; 811 812 if (singleFile) { 813 unsigned int s = ffs(mask); 814 if (!s) 815 return 0; 816 for (i = s--; srcExists(i); ++i) 817 if (getSrc(i)->reg.file != getSrc(s)->reg.file) 818 mask &= ~(1 << i); 819 } 820 821 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) 822 n += mask & 1; 823 return n; 824} 825 826bool 827Instruction::setIndirect(int s, int dim, Value *value) 828{ 829 assert(this->srcExists(s)); 830 831 int p = srcs[s].indirect[dim]; 832 if (p < 0) { 833 if (!value) 834 return true; 835 p = srcs.size(); 836 while (p > 0 && !srcExists(p - 1)) 837 --p; 838 } 839 setSrc(p, value); 840 srcs[p].usedAsPtr = (value != 0); 841 srcs[s].indirect[dim] = value ? p : -1; 842 return true; 843} 844 845bool 846Instruction::setPredicate(CondCode ccode, Value *value) 847{ 848 cc = ccode; 849 850 if (!value) { 851 if (predSrc >= 0) { 852 srcs[predSrc].set(NULL); 853 predSrc = -1; 854 } 855 return true; 856 } 857 858 if (predSrc < 0) { 859 predSrc = srcs.size(); 860 while (predSrc > 0 && !srcExists(predSrc - 1)) 861 --predSrc; 862 } 863 864 setSrc(predSrc, value); 865 return true; 866} 867 868bool 869Instruction::writesPredicate() const 870{ 871 for (int d = 0; defExists(d); ++d) 872 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) 873 return true; 874 return false; 875} 876 877bool 878Instruction::canCommuteDefSrc(const Instruction *i) const 879{ 880 for (int d = 0; defExists(d); ++d) 881 for (int s = 0; i->srcExists(s); ++s) 882 if (getDef(d)->interfers(i->getSrc(s))) 883 return false; 884 return true; 885} 886 887bool 888Instruction::canCommuteDefDef(const Instruction *i) const 889{ 890 for (int d = 0; defExists(d); ++d) 891 for (int c = 0; i->defExists(c); ++c) 892 if (getDef(d)->interfers(i->getDef(c))) 893 return false; 894 return true; 895} 896 897bool 898Instruction::isCommutationLegal(const Instruction *i) const 899{ 900 return canCommuteDefDef(i) && 901 canCommuteDefSrc(i) && 902 i->canCommuteDefSrc(this); 903} 904 905TexInstruction::TexInstruction(Function *fn, operation op) 906 : Instruction(fn, op, TYPE_F32), tex() 907{ 908 tex.rIndirectSrc = -1; 909 tex.sIndirectSrc = -1; 910 911 if (op == OP_TXF) 912 sType = TYPE_U32; 913} 914 915TexInstruction::~TexInstruction() 916{ 917 for (int c = 0; c < 3; ++c) { 918 dPdx[c].set(NULL); 919 dPdy[c].set(NULL); 920 } 921 for (int n = 0; n < 4; ++n) 922 for (int c = 0; c < 3; ++c) 923 offset[n][c].set(NULL); 924} 925 926TexInstruction * 927TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 928{ 929 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : 930 new_TexInstruction(pol.context(), op)); 931 932 Instruction::clone(pol, tex); 933 934 tex->tex = this->tex; 935 936 if (op == OP_TXD) { 937 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { 938 tex->dPdx[c].set(dPdx[c]); 939 tex->dPdy[c].set(dPdy[c]); 940 } 941 } 942 943 for (int n = 0; n < tex->tex.useOffsets; ++n) 944 for (int c = 0; c < 3; ++c) 945 tex->offset[n][c].set(offset[n][c]); 946 947 return tex; 948} 949 950const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = 951{ 952 { "1D", 1, 1, false, false, false }, 953 { "2D", 2, 2, false, false, false }, 954 { "2D_MS", 2, 3, false, false, false }, 955 { "3D", 3, 3, false, false, false }, 956 { "CUBE", 2, 3, false, true, false }, 957 { "1D_SHADOW", 1, 1, false, false, true }, 958 { "2D_SHADOW", 2, 2, false, false, true }, 959 { "CUBE_SHADOW", 2, 3, false, true, true }, 960 { "1D_ARRAY", 1, 2, true, false, false }, 961 { "2D_ARRAY", 2, 3, true, false, false }, 962 { "2D_MS_ARRAY", 2, 4, true, false, false }, 963 { "CUBE_ARRAY", 2, 4, true, true, false }, 964 { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, 965 { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, 966 { "RECT", 2, 2, false, false, false }, 967 { "RECT_SHADOW", 2, 2, false, false, true }, 968 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, 969 { "BUFFER", 1, 1, false, false, false }, 970}; 971 972const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = 973{ 974 { "NONE", 0, { 0, 0, 0, 0 }, UINT }, 975 976 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT }, 977 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT }, 978 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT }, 979 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT }, 980 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT }, 981 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT }, 982 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT }, 983 984 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT }, 985 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT }, 986 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT }, 987 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT }, 988 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT }, 989 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT }, 990 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT }, 991 { "R32UI", 1, { 32, 0, 0, 0 }, UINT }, 992 { "R16UI", 1, { 16, 0, 0, 0 }, UINT }, 993 { "R8UI", 1, { 8, 0, 0, 0 }, UINT }, 994 995 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT }, 996 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT }, 997 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT }, 998 { "RG32I", 2, { 32, 32, 0, 0 }, SINT }, 999 { "RG16I", 2, { 16, 16, 0, 0 }, SINT }, 1000 { "RG8I", 2, { 8, 8, 0, 0 }, SINT }, 1001 { "R32I", 1, { 32, 0, 0, 0 }, SINT }, 1002 { "R16I", 1, { 16, 0, 0, 0 }, SINT }, 1003 { "R8I", 1, { 8, 0, 0, 0 }, SINT }, 1004 1005 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM }, 1006 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM }, 1007 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM }, 1008 { "RG16", 2, { 16, 16, 0, 0 }, UNORM }, 1009 { "RG8", 2, { 8, 8, 0, 0 }, UNORM }, 1010 { "R16", 1, { 16, 0, 0, 0 }, UNORM }, 1011 { "R8", 1, { 8, 0, 0, 0 }, UNORM }, 1012 1013 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM }, 1014 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM }, 1015 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM }, 1016 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM }, 1017 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM }, 1018 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, 1019 1020 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true }, 1021}; 1022 1023const struct TexInstruction::ImgFormatDesc * 1024TexInstruction::translateImgFormat(enum pipe_format format) 1025{ 1026 1027#define FMT_CASE(a, b) \ 1028 case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b] 1029 1030 switch (format) { 1031 FMT_CASE(NONE, NONE); 1032 1033 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F); 1034 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F); 1035 FMT_CASE(R32G32_FLOAT, RG32F); 1036 FMT_CASE(R16G16_FLOAT, RG16F); 1037 FMT_CASE(R11G11B10_FLOAT, R11G11B10F); 1038 FMT_CASE(R32_FLOAT, R32F); 1039 FMT_CASE(R16_FLOAT, R16F); 1040 1041 FMT_CASE(R32G32B32A32_UINT, RGBA32UI); 1042 FMT_CASE(R16G16B16A16_UINT, RGBA16UI); 1043 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI); 1044 FMT_CASE(R8G8B8A8_UINT, RGBA8UI); 1045 FMT_CASE(R32G32_UINT, RG32UI); 1046 FMT_CASE(R16G16_UINT, RG16UI); 1047 FMT_CASE(R8G8_UINT, RG8UI); 1048 FMT_CASE(R32_UINT, R32UI); 1049 FMT_CASE(R16_UINT, R16UI); 1050 FMT_CASE(R8_UINT, R8UI); 1051 1052 FMT_CASE(R32G32B32A32_SINT, RGBA32I); 1053 FMT_CASE(R16G16B16A16_SINT, RGBA16I); 1054 FMT_CASE(R8G8B8A8_SINT, RGBA8I); 1055 FMT_CASE(R32G32_SINT, RG32I); 1056 FMT_CASE(R16G16_SINT, RG16I); 1057 FMT_CASE(R8G8_SINT, RG8I); 1058 FMT_CASE(R32_SINT, R32I); 1059 FMT_CASE(R16_SINT, R16I); 1060 FMT_CASE(R8_SINT, R8I); 1061 1062 FMT_CASE(R16G16B16A16_UNORM, RGBA16); 1063 FMT_CASE(R10G10B10A2_UNORM, RGB10A2); 1064 FMT_CASE(R8G8B8A8_UNORM, RGBA8); 1065 FMT_CASE(R16G16_UNORM, RG16); 1066 FMT_CASE(R8G8_UNORM, RG8); 1067 FMT_CASE(R16_UNORM, R16); 1068 FMT_CASE(R8_UNORM, R8); 1069 1070 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM); 1071 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM); 1072 FMT_CASE(R16G16_SNORM, RG16_SNORM); 1073 FMT_CASE(R8G8_SNORM, RG8_SNORM); 1074 FMT_CASE(R16_SNORM, R16_SNORM); 1075 FMT_CASE(R8_SNORM, R8_SNORM); 1076 1077 FMT_CASE(B8G8R8A8_UNORM, BGRA8); 1078 1079 default: 1080 assert(!"Unexpected format"); 1081 return &formatTable[nv50_ir::FMT_NONE]; 1082 } 1083} 1084 1085void 1086TexInstruction::setIndirectR(Value *v) 1087{ 1088 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; 1089 if (p >= 0) { 1090 tex.rIndirectSrc = p; 1091 setSrc(p, v); 1092 srcs[p].usedAsPtr = !!v; 1093 } 1094} 1095 1096void 1097TexInstruction::setIndirectS(Value *v) 1098{ 1099 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; 1100 if (p >= 0) { 1101 tex.sIndirectSrc = p; 1102 setSrc(p, v); 1103 srcs[p].usedAsPtr = !!v; 1104 } 1105} 1106 1107CmpInstruction::CmpInstruction(Function *fn, operation op) 1108 : Instruction(fn, op, TYPE_F32) 1109{ 1110 setCond = CC_ALWAYS; 1111} 1112 1113CmpInstruction * 1114CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1115{ 1116 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : 1117 new_CmpInstruction(pol.context(), op)); 1118 cmp->dType = dType; 1119 Instruction::clone(pol, cmp); 1120 cmp->setCond = setCond; 1121 return cmp; 1122} 1123 1124FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) 1125 : Instruction(fn, op, TYPE_NONE) 1126{ 1127 if (op == OP_CALL) 1128 target.fn = reinterpret_cast<Function *>(targ); 1129 else 1130 target.bb = reinterpret_cast<BasicBlock *>(targ); 1131 1132 if (op == OP_BRA || 1133 op == OP_CONT || op == OP_BREAK || 1134 op == OP_RET || op == OP_EXIT) 1135 terminator = 1; 1136 else 1137 if (op == OP_JOIN) 1138 terminator = targ ? 1 : 0; 1139 1140 allWarp = absolute = limit = builtin = indirect = 0; 1141} 1142 1143FlowInstruction * 1144FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1145{ 1146 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : 1147 new_FlowInstruction(pol.context(), op, NULL)); 1148 1149 Instruction::clone(pol, flow); 1150 flow->allWarp = allWarp; 1151 flow->absolute = absolute; 1152 flow->limit = limit; 1153 flow->builtin = builtin; 1154 1155 if (builtin) 1156 flow->target.builtin = target.builtin; 1157 else 1158 if (op == OP_CALL) 1159 flow->target.fn = target.fn; 1160 else 1161 if (target.bb) 1162 flow->target.bb = pol.get<BasicBlock>(target.bb); 1163 1164 return flow; 1165} 1166 1167Program::Program(Type type, Target *arch) 1168 : progType(type), 1169 target(arch), 1170 tlsSize(0), 1171 mem_Instruction(sizeof(Instruction), 6), 1172 mem_CmpInstruction(sizeof(CmpInstruction), 4), 1173 mem_TexInstruction(sizeof(TexInstruction), 4), 1174 mem_FlowInstruction(sizeof(FlowInstruction), 4), 1175 mem_LValue(sizeof(LValue), 8), 1176 mem_Symbol(sizeof(Symbol), 7), 1177 mem_ImmediateValue(sizeof(ImmediateValue), 7), 1178 driver(NULL), 1179 driver_out(NULL) 1180{ 1181 code = NULL; 1182 binSize = 0; 1183 1184 maxGPR = -1; 1185 fp64 = false; 1186 persampleInvocation = false; 1187 1188 main = new Function(this, "MAIN", ~0); 1189 calls.insert(&main->call); 1190 1191 dbgFlags = 0; 1192 optLevel = 0; 1193 1194 targetPriv = NULL; 1195} 1196 1197Program::~Program() 1198{ 1199 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) 1200 delete reinterpret_cast<Function *>(it.get()); 1201 1202 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) 1203 releaseValue(reinterpret_cast<Value *>(it.get())); 1204} 1205 1206void Program::releaseInstruction(Instruction *insn) 1207{ 1208 // TODO: make this not suck so much 1209 1210 insn->~Instruction(); 1211 1212 if (insn->asCmp()) 1213 mem_CmpInstruction.release(insn); 1214 else 1215 if (insn->asTex()) 1216 mem_TexInstruction.release(insn); 1217 else 1218 if (insn->asFlow()) 1219 mem_FlowInstruction.release(insn); 1220 else 1221 mem_Instruction.release(insn); 1222} 1223 1224void Program::releaseValue(Value *value) 1225{ 1226 value->~Value(); 1227 1228 if (value->asLValue()) 1229 mem_LValue.release(value); 1230 else 1231 if (value->asImm()) 1232 mem_ImmediateValue.release(value); 1233 else 1234 if (value->asSym()) 1235 mem_Symbol.release(value); 1236} 1237 1238 1239} // namespace nv50_ir 1240 1241extern "C" { 1242 1243static void 1244nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, 1245 struct nv50_ir_prog_info_out *info_out) 1246{ 1247 info_out->target = info->target; 1248 info_out->type = info->type; 1249 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { 1250 info_out->prop.tp.domain = PIPE_PRIM_MAX; 1251 info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; 1252 } 1253 if (info->type == PIPE_SHADER_GEOMETRY) { 1254 info_out->prop.gp.instanceCount = 1; 1255 info_out->prop.gp.maxVertices = 1; 1256 } 1257 if (info->type == PIPE_SHADER_COMPUTE) { 1258 info->prop.cp.numThreads[0] = 1259 info->prop.cp.numThreads[1] = 1260 info->prop.cp.numThreads[2] = 1; 1261 } 1262 info_out->bin.smemSize = info->bin.smemSize; 1263 info_out->io.genUserClip = info->io.genUserClip; 1264 info_out->io.instanceId = 0xff; 1265 info_out->io.vertexId = 0xff; 1266 info_out->io.edgeFlagIn = 0xff; 1267 info_out->io.edgeFlagOut = 0xff; 1268 info_out->io.fragDepth = 0xff; 1269 info_out->io.sampleMask = 0xff; 1270} 1271 1272int 1273nv50_ir_generate_code(struct nv50_ir_prog_info *info, 1274 struct nv50_ir_prog_info_out *info_out) 1275{ 1276 int ret = 0; 1277 1278 nv50_ir::Program::Type type; 1279 1280 nv50_ir_init_prog_info(info, info_out); 1281 1282#define PROG_TYPE_CASE(a, b) \ 1283 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break 1284 1285 switch (info->type) { 1286 PROG_TYPE_CASE(VERTEX, VERTEX); 1287 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL); 1288 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL); 1289 PROG_TYPE_CASE(GEOMETRY, GEOMETRY); 1290 PROG_TYPE_CASE(FRAGMENT, FRAGMENT); 1291 PROG_TYPE_CASE(COMPUTE, COMPUTE); 1292 default: 1293 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type); 1294 return -1; 1295 } 1296 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); 1297 1298 nv50_ir::Target *targ = nv50_ir::Target::create(info->target); 1299 if (!targ) 1300 return -1; 1301 1302 nv50_ir::Program *prog = new nv50_ir::Program(type, targ); 1303 if (!prog) { 1304 nv50_ir::Target::destroy(targ); 1305 return -1; 1306 } 1307 prog->driver = info; 1308 prog->driver_out = info_out; 1309 prog->dbgFlags = info->dbgFlags; 1310 prog->optLevel = info->optLevel; 1311 1312 switch (info->bin.sourceRep) { 1313 case PIPE_SHADER_IR_NIR: 1314 ret = prog->makeFromNIR(info, info_out) ? 0 : -2; 1315 break; 1316 case PIPE_SHADER_IR_TGSI: 1317 ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; 1318 break; 1319 default: 1320 ret = -1; 1321 break; 1322 } 1323 if (ret < 0) 1324 goto out; 1325 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1326 prog->print(); 1327 1328 targ->parseDriverInfo(info, info_out); 1329 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); 1330 1331 prog->convertToSSA(); 1332 1333 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1334 prog->print(); 1335 1336 prog->optimizeSSA(info->optLevel); 1337 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); 1338 1339 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1340 prog->print(); 1341 1342 if (!prog->registerAllocation()) { 1343 ret = -4; 1344 goto out; 1345 } 1346 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); 1347 1348 prog->optimizePostRA(info->optLevel); 1349 1350 if (!prog->emitBinary(info_out)) { 1351 ret = -5; 1352 goto out; 1353 } 1354 1355out: 1356 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); 1357 1358 info_out->bin.maxGPR = prog->maxGPR; 1359 info_out->bin.code = prog->code; 1360 info_out->bin.codeSize = prog->binSize; 1361 info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10); 1362 1363 delete prog; 1364 nv50_ir::Target::destroy(targ); 1365 1366 return ret; 1367} 1368 1369} // extern "C" 1370