1/* 2 * Copyright 2014 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Ben Skeggs <bskeggs@redhat.com> 23 */ 24 25#include "nv50_ir_target_gm107.h" 26#include "nv50_ir_sched_gm107.h" 27 28//#define GM107_DEBUG_SCHED_DATA 29 30namespace nv50_ir { 31 32class CodeEmitterGM107 : public CodeEmitter 33{ 34public: 35 CodeEmitterGM107(const TargetGM107 *); 36 37 virtual bool emitInstruction(Instruction *); 38 virtual uint32_t getMinEncodingSize(const Instruction *) const; 39 40 virtual void prepareEmission(Program *); 41 virtual void prepareEmission(Function *); 42 43 inline void setProgramType(Program::Type pType) { progType = pType; } 44 45private: 46 const TargetGM107 *targGM107; 47 48 Program::Type progType; 49 50 const Instruction *insn; 51 const bool writeIssueDelays; 52 uint32_t *data; 53 54private: 55 inline void emitField(uint32_t *, int, int, uint32_t); 56 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); } 57 58 inline void emitInsn(uint32_t, bool); 59 inline void emitInsn(uint32_t o) { emitInsn(o, true); } 60 inline void emitPred(); 61 inline void emitGPR(int, const Value *); 62 inline void emitGPR(int pos) { 63 emitGPR(pos, (const Value *)NULL); 64 } 65 inline void emitGPR(int pos, const ValueRef &ref) { 66 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); 67 } 68 inline void emitGPR(int pos, const ValueRef *ref) { 69 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); 70 } 71 inline void emitGPR(int pos, const ValueDef &def) { 72 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); 73 } 74 inline void emitSYS(int, const Value *); 75 inline void emitSYS(int pos, const ValueRef &ref) { 76 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); 77 } 78 inline void emitPRED(int, const Value *); 79 inline void emitPRED(int pos) { 80 emitPRED(pos, (const Value *)NULL); 81 } 82 inline void emitPRED(int pos, const ValueRef &ref) { 83 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); 84 } 85 inline void emitPRED(int pos, const ValueDef &def) { 86 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); 87 } 88 inline void emitADDR(int, int, int, int, const ValueRef &); 89 inline void emitCBUF(int, int, int, int, int, const ValueRef &); 90 inline bool longIMMD(const ValueRef &); 91 inline void emitIMMD(int, int, const ValueRef &); 92 93 void emitCond3(int, CondCode); 94 void emitCond4(int, CondCode); 95 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); } 96 inline void emitO(int); 97 inline void emitP(int); 98 inline void emitSAT(int); 99 inline void emitCC(int); 100 inline void emitX(int); 101 inline void emitABS(int, const ValueRef &); 102 inline void emitNEG(int, const ValueRef &); 103 inline void emitNEG2(int, const ValueRef &, const ValueRef &); 104 inline void emitFMZ(int, int); 105 inline void emitRND(int, RoundMode, int); 106 inline void emitRND(int pos) { 107 emitRND(pos, insn->rnd, -1); 108 } 109 inline void emitPDIV(int); 110 inline void emitINV(int, const ValueRef &); 111 112 void emitEXIT(); 113 void emitBRA(); 114 void emitCAL(); 115 void emitPCNT(); 116 void emitCONT(); 117 void emitPBK(); 118 void emitBRK(); 119 void emitPRET(); 120 void emitRET(); 121 void emitSSY(); 122 void emitSYNC(); 123 void emitSAM(); 124 void emitRAM(); 125 126 void emitPSETP(); 127 128 void emitMOV(); 129 void emitS2R(); 130 void emitCS2R(); 131 void emitF2F(); 132 void emitF2I(); 133 void emitI2F(); 134 void emitI2I(); 135 void emitSEL(); 136 void emitSHFL(); 137 138 void emitDADD(); 139 void emitDMUL(); 140 void emitDFMA(); 141 void emitDMNMX(); 142 void emitDSET(); 143 void emitDSETP(); 144 145 void emitFADD(); 146 void emitFMUL(); 147 void emitFFMA(); 148 void emitMUFU(); 149 void emitFMNMX(); 150 void emitRRO(); 151 void emitFCMP(); 152 void emitFSET(); 153 void emitFSETP(); 154 void emitFSWZADD(); 155 156 void emitLOP(); 157 void emitNOT(); 158 void emitIADD(); 159 void emitIMUL(); 160 void emitIMAD(); 161 void emitISCADD(); 162 void emitXMAD(); 163 void emitIMNMX(); 164 void emitICMP(); 165 void emitISET(); 166 void emitISETP(); 167 void emitSHL(); 168 void emitSHR(); 169 void emitSHF(); 170 void emitPOPC(); 171 void emitBFI(); 172 void emitBFE(); 173 void emitFLO(); 174 void emitPRMT(); 175 176 void emitLDSTs(int, DataType); 177 void emitLDSTc(int); 178 void emitLDC(); 179 void emitLDL(); 180 void emitLDS(); 181 void emitLD(); 182 void emitSTL(); 183 void emitSTS(); 184 void emitST(); 185 void emitALD(); 186 void emitAST(); 187 void emitISBERD(); 188 void emitAL2P(); 189 void emitIPA(); 190 void emitATOM(); 191 void emitATOMS(); 192 void emitRED(); 193 void emitCCTL(); 194 195 void emitPIXLD(); 196 197 void emitTEXs(int); 198 void emitTEX(); 199 void emitTEXS(); 200 void emitTLD(); 201 void emitTLD4(); 202 void emitTXD(); 203 void emitTXQ(); 204 void emitTMML(); 205 void emitDEPBAR(); 206 207 void emitNOP(); 208 void emitKIL(); 209 void emitOUT(); 210 211 void emitBAR(); 212 void emitMEMBAR(); 213 214 void emitVOTE(); 215 216 void emitSUTarget(); 217 void emitSUHandle(const int s); 218 void emitSUSTx(); 219 void emitSULDx(); 220 void emitSUREDx(); 221}; 222 223/******************************************************************************* 224 * general instruction layout/fields 225 ******************************************************************************/ 226 227void 228CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v) 229{ 230 if (b >= 0) { 231 uint32_t m = ((1ULL << s) - 1); 232 uint64_t d = (uint64_t)(v & m) << b; 233 assert(!(v & ~m) || (v & ~m) == ~m); 234 data[1] |= d >> 32; 235 data[0] |= d; 236 } 237} 238 239void 240CodeEmitterGM107::emitPred() 241{ 242 if (insn->predSrc >= 0) { 243 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); 244 emitField(19, 1, insn->cc == CC_NOT_P); 245 } else { 246 emitField(16, 3, 7); 247 } 248} 249 250void 251CodeEmitterGM107::emitInsn(uint32_t hi, bool pred) 252{ 253 code[0] = 0x00000000; 254 code[1] = hi; 255 if (pred) 256 emitPred(); 257} 258 259void 260CodeEmitterGM107::emitGPR(int pos, const Value *val) 261{ 262 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ? 263 val->reg.data.id : 255); 264} 265 266void 267CodeEmitterGM107::emitSYS(int pos, const Value *val) 268{ 269 int id = val ? val->reg.data.id : -1; 270 271 switch (id) { 272 case SV_LANEID : id = 0x00; break; 273 case SV_VERTEX_COUNT : id = 0x10; break; 274 case SV_INVOCATION_ID : id = 0x11; break; 275 case SV_THREAD_KILL : id = 0x13; break; 276 case SV_INVOCATION_INFO: id = 0x1d; break; 277 case SV_COMBINED_TID : id = 0x20; break; 278 case SV_TID : id = 0x21 + val->reg.data.sv.index; break; 279 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; 280 case SV_LANEMASK_EQ : id = 0x38; break; 281 case SV_LANEMASK_LT : id = 0x39; break; 282 case SV_LANEMASK_LE : id = 0x3a; break; 283 case SV_LANEMASK_GT : id = 0x3b; break; 284 case SV_LANEMASK_GE : id = 0x3c; break; 285 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break; 286 default: 287 assert(!"invalid system value"); 288 id = 0; 289 break; 290 } 291 292 emitField(pos, 8, id); 293} 294 295void 296CodeEmitterGM107::emitPRED(int pos, const Value *val) 297{ 298 emitField(pos, 3, val ? val->reg.data.id : 7); 299} 300 301void 302CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr, 303 const ValueRef &ref) 304{ 305 const Value *v = ref.get(); 306 assert(!(v->reg.data.offset & ((1 << shr) - 1))); 307 if (gpr >= 0) 308 emitGPR(gpr, ref.getIndirect(0)); 309 emitField(off, len, v->reg.data.offset >> shr); 310} 311 312void 313CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr, 314 const ValueRef &ref) 315{ 316 const Value *v = ref.get(); 317 const Symbol *s = v->asSym(); 318 319 assert(!(s->reg.data.offset & ((1 << shr) - 1))); 320 321 emitField(buf, 5, v->reg.fileIndex); 322 if (gpr >= 0) 323 emitGPR(gpr, ref.getIndirect(0)); 324 emitField(off, 16, s->reg.data.offset >> shr); 325} 326 327bool 328CodeEmitterGM107::longIMMD(const ValueRef &ref) 329{ 330 if (ref.getFile() == FILE_IMMEDIATE) { 331 const ImmediateValue *imm = ref.get()->asImm(); 332 if (isFloatType(insn->sType)) 333 return imm->reg.data.u32 & 0xfff; 334 else 335 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000; 336 } 337 return false; 338} 339 340void 341CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref) 342{ 343 const ImmediateValue *imm = ref.get()->asImm(); 344 uint32_t val = imm->reg.data.u32; 345 346 if (len == 19) { 347 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) { 348 assert(!(val & 0x00000fff)); 349 val >>= 12; 350 } else if (insn->sType == TYPE_F64) { 351 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL)); 352 val = imm->reg.data.u64 >> 44; 353 } else { 354 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000); 355 } 356 emitField( 56, 1, (val & 0x80000) >> 19); 357 emitField(pos, len, (val & 0x7ffff)); 358 } else { 359 emitField(pos, len, val); 360 } 361} 362 363/******************************************************************************* 364 * modifiers 365 ******************************************************************************/ 366 367void 368CodeEmitterGM107::emitCond3(int pos, CondCode code) 369{ 370 int data = 0; 371 372 switch (code) { 373 case CC_FL : data = 0x00; break; 374 case CC_LTU: 375 case CC_LT : data = 0x01; break; 376 case CC_EQU: 377 case CC_EQ : data = 0x02; break; 378 case CC_LEU: 379 case CC_LE : data = 0x03; break; 380 case CC_GTU: 381 case CC_GT : data = 0x04; break; 382 case CC_NEU: 383 case CC_NE : data = 0x05; break; 384 case CC_GEU: 385 case CC_GE : data = 0x06; break; 386 case CC_TR : data = 0x07; break; 387 default: 388 assert(!"invalid cond3"); 389 break; 390 } 391 392 emitField(pos, 3, data); 393} 394 395void 396CodeEmitterGM107::emitCond4(int pos, CondCode code) 397{ 398 int data = 0; 399 400 switch (code) { 401 case CC_FL: data = 0x00; break; 402 case CC_LT: data = 0x01; break; 403 case CC_EQ: data = 0x02; break; 404 case CC_LE: data = 0x03; break; 405 case CC_GT: data = 0x04; break; 406 case CC_NE: data = 0x05; break; 407 case CC_GE: data = 0x06; break; 408// case CC_NUM: data = 0x07; break; 409// case CC_NAN: data = 0x08; break; 410 case CC_LTU: data = 0x09; break; 411 case CC_EQU: data = 0x0a; break; 412 case CC_LEU: data = 0x0b; break; 413 case CC_GTU: data = 0x0c; break; 414 case CC_NEU: data = 0x0d; break; 415 case CC_GEU: data = 0x0e; break; 416 case CC_TR: data = 0x0f; break; 417 default: 418 assert(!"invalid cond4"); 419 break; 420 } 421 422 emitField(pos, 4, data); 423} 424 425void 426CodeEmitterGM107::emitO(int pos) 427{ 428 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); 429} 430 431void 432CodeEmitterGM107::emitP(int pos) 433{ 434 emitField(pos, 1, insn->perPatch); 435} 436 437void 438CodeEmitterGM107::emitSAT(int pos) 439{ 440 emitField(pos, 1, insn->saturate); 441} 442 443void 444CodeEmitterGM107::emitCC(int pos) 445{ 446 emitField(pos, 1, insn->flagsDef >= 0); 447} 448 449void 450CodeEmitterGM107::emitX(int pos) 451{ 452 emitField(pos, 1, insn->flagsSrc >= 0); 453} 454 455void 456CodeEmitterGM107::emitABS(int pos, const ValueRef &ref) 457{ 458 emitField(pos, 1, ref.mod.abs()); 459} 460 461void 462CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref) 463{ 464 emitField(pos, 1, ref.mod.neg()); 465} 466 467void 468CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b) 469{ 470 emitField(pos, 1, a.mod.neg() ^ b.mod.neg()); 471} 472 473void 474CodeEmitterGM107::emitFMZ(int pos, int len) 475{ 476 emitField(pos, len, insn->dnz << 1 | insn->ftz); 477} 478 479void 480CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip) 481{ 482 int rm = 0, ri = 0; 483 switch (rnd) { 484 case ROUND_NI: ri = 1; 485 case ROUND_N : rm = 0; break; 486 case ROUND_MI: ri = 1; 487 case ROUND_M : rm = 1; break; 488 case ROUND_PI: ri = 1; 489 case ROUND_P : rm = 2; break; 490 case ROUND_ZI: ri = 1; 491 case ROUND_Z : rm = 3; break; 492 default: 493 assert(!"invalid round mode"); 494 break; 495 } 496 emitField(rip, 1, ri); 497 emitField(rmp, 2, rm); 498} 499 500void 501CodeEmitterGM107::emitPDIV(int pos) 502{ 503 assert(insn->postFactor >= -3 && insn->postFactor <= 3); 504 if (insn->postFactor > 0) 505 emitField(pos, 3, 7 - insn->postFactor); 506 else 507 emitField(pos, 3, 0 - insn->postFactor); 508} 509 510void 511CodeEmitterGM107::emitINV(int pos, const ValueRef &ref) 512{ 513 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); 514} 515 516/******************************************************************************* 517 * control flow 518 ******************************************************************************/ 519 520void 521CodeEmitterGM107::emitEXIT() 522{ 523 emitInsn (0xe3000000); 524 emitCond5(0x00, CC_TR); 525} 526 527void 528CodeEmitterGM107::emitBRA() 529{ 530 const FlowInstruction *insn = this->insn->asFlow(); 531 int gpr = -1; 532 533 if (insn->indirect) { 534 if (insn->absolute) 535 emitInsn(0xe2000000); // JMX 536 else 537 emitInsn(0xe2500000); // BRX 538 gpr = 0x08; 539 } else { 540 if (insn->absolute) 541 emitInsn(0xe2100000); // JMP 542 else 543 emitInsn(0xe2400000); // BRA 544 emitField(0x07, 1, insn->allWarp); 545 } 546 547 emitField(0x06, 1, insn->limit); 548 emitCond5(0x00, CC_TR); 549 550 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 551 int32_t pos = insn->target.bb->binPos; 552 if (writeIssueDelays && !(pos & 0x1f)) 553 pos += 8; 554 if (!insn->absolute) 555 emitField(0x14, 24, pos - (codeSize + 8)); 556 else 557 emitField(0x14, 32, pos); 558 } else { 559 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); 560 emitField(0x05, 1, 1); 561 } 562} 563 564void 565CodeEmitterGM107::emitCAL() 566{ 567 const FlowInstruction *insn = this->insn->asFlow(); 568 569 if (insn->absolute) { 570 emitInsn(0xe2200000, false); // JCAL 571 } else { 572 emitInsn(0xe2600000, false); // CAL 573 } 574 575 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 576 if (!insn->absolute) 577 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 578 else { 579 if (insn->builtin) { 580 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin); 581 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20); 582 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12); 583 } else { 584 emitField(0x14, 32, insn->target.bb->binPos); 585 } 586 } 587 } else { 588 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 589 emitField(0x05, 1, 1); 590 } 591} 592 593void 594CodeEmitterGM107::emitPCNT() 595{ 596 const FlowInstruction *insn = this->insn->asFlow(); 597 598 emitInsn(0xe2b00000, false); 599 600 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 601 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 602 } else { 603 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 604 emitField(0x05, 1, 1); 605 } 606} 607 608void 609CodeEmitterGM107::emitCONT() 610{ 611 emitInsn (0xe3500000); 612 emitCond5(0x00, CC_TR); 613} 614 615void 616CodeEmitterGM107::emitPBK() 617{ 618 const FlowInstruction *insn = this->insn->asFlow(); 619 620 emitInsn(0xe2a00000, false); 621 622 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 623 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 624 } else { 625 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 626 emitField(0x05, 1, 1); 627 } 628} 629 630void 631CodeEmitterGM107::emitBRK() 632{ 633 emitInsn (0xe3400000); 634 emitCond5(0x00, CC_TR); 635} 636 637void 638CodeEmitterGM107::emitPRET() 639{ 640 const FlowInstruction *insn = this->insn->asFlow(); 641 642 emitInsn(0xe2700000, false); 643 644 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 645 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 646 } else { 647 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 648 emitField(0x05, 1, 1); 649 } 650} 651 652void 653CodeEmitterGM107::emitRET() 654{ 655 emitInsn (0xe3200000); 656 emitCond5(0x00, CC_TR); 657} 658 659void 660CodeEmitterGM107::emitSSY() 661{ 662 const FlowInstruction *insn = this->insn->asFlow(); 663 664 emitInsn(0xe2900000, false); 665 666 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 667 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 668 } else { 669 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 670 emitField(0x05, 1, 1); 671 } 672} 673 674void 675CodeEmitterGM107::emitSYNC() 676{ 677 emitInsn (0xf0f80000); 678 emitCond5(0x00, CC_TR); 679} 680 681void 682CodeEmitterGM107::emitSAM() 683{ 684 emitInsn(0xe3700000, false); 685} 686 687void 688CodeEmitterGM107::emitRAM() 689{ 690 emitInsn(0xe3800000, false); 691} 692 693/******************************************************************************* 694 * predicate/cc 695 ******************************************************************************/ 696 697void 698CodeEmitterGM107::emitPSETP() 699{ 700 701 emitInsn(0x50900000); 702 703 switch (insn->op) { 704 case OP_AND: emitField(0x18, 3, 0); break; 705 case OP_OR: emitField(0x18, 3, 1); break; 706 case OP_XOR: emitField(0x18, 3, 2); break; 707 default: 708 assert(!"unexpected operation"); 709 break; 710 } 711 712 // emitINV (0x2a); 713 emitPRED(0x27); // TODO: support 3-arg 714 emitINV (0x20, insn->src(1)); 715 emitPRED(0x1d, insn->src(1)); 716 emitINV (0x0f, insn->src(0)); 717 emitPRED(0x0c, insn->src(0)); 718 emitPRED(0x03, insn->def(0)); 719 emitPRED(0x00); 720} 721 722/******************************************************************************* 723 * movement / conversion 724 ******************************************************************************/ 725 726void 727CodeEmitterGM107::emitMOV() 728{ 729 if (insn->src(0).getFile() != FILE_IMMEDIATE) { 730 switch (insn->src(0).getFile()) { 731 case FILE_GPR: 732 if (insn->def(0).getFile() == FILE_PREDICATE) { 733 emitInsn(0x5b6a0000); 734 emitGPR (0x08); 735 } else { 736 emitInsn(0x5c980000); 737 } 738 emitGPR (0x14, insn->src(0)); 739 break; 740 case FILE_MEMORY_CONST: 741 emitInsn(0x4c980000); 742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 743 break; 744 case FILE_IMMEDIATE: 745 emitInsn(0x38980000); 746 emitIMMD(0x14, 19, insn->src(0)); 747 break; 748 case FILE_PREDICATE: 749 emitInsn(0x50880000); 750 emitPRED(0x0c, insn->src(0)); 751 emitPRED(0x1d); 752 emitPRED(0x27); 753 break; 754 default: 755 assert(!"bad src file"); 756 break; 757 } 758 if (insn->def(0).getFile() != FILE_PREDICATE && 759 insn->src(0).getFile() != FILE_PREDICATE) 760 emitField(0x27, 4, insn->lanes); 761 } else { 762 emitInsn (0x01000000); 763 emitIMMD (0x14, 32, insn->src(0)); 764 emitField(0x0c, 4, insn->lanes); 765 } 766 767 if (insn->def(0).getFile() == FILE_PREDICATE) { 768 emitPRED(0x27); 769 emitPRED(0x03, insn->def(0)); 770 emitPRED(0x00); 771 } else { 772 emitGPR(0x00, insn->def(0)); 773 } 774} 775 776void 777CodeEmitterGM107::emitS2R() 778{ 779 emitInsn(0xf0c80000); 780 emitSYS (0x14, insn->src(0)); 781 emitGPR (0x00, insn->def(0)); 782} 783 784void 785CodeEmitterGM107::emitCS2R() 786{ 787 emitInsn(0x50c80000); 788 emitSYS (0x14, insn->src(0)); 789 emitGPR (0x00, insn->def(0)); 790} 791 792void 793CodeEmitterGM107::emitF2F() 794{ 795 RoundMode rnd = insn->rnd; 796 797 switch (insn->op) { 798 case OP_FLOOR: rnd = ROUND_MI; break; 799 case OP_CEIL : rnd = ROUND_PI; break; 800 case OP_TRUNC: rnd = ROUND_ZI; break; 801 default: 802 break; 803 } 804 805 switch (insn->src(0).getFile()) { 806 case FILE_GPR: 807 emitInsn(0x5ca80000); 808 emitGPR (0x14, insn->src(0)); 809 break; 810 case FILE_MEMORY_CONST: 811 emitInsn(0x4ca80000); 812 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 813 break; 814 case FILE_IMMEDIATE: 815 emitInsn(0x38a80000); 816 emitIMMD(0x14, 19, insn->src(0)); 817 break; 818 default: 819 assert(!"bad src0 file"); 820 break; 821 } 822 823 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate); 824 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 825 emitCC (0x2f); 826 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 827 emitFMZ (0x2c, 1); 828 emitField(0x29, 1, insn->subOp); 829 emitRND (0x27, rnd, 0x2a); 830 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 831 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 832 emitGPR (0x00, insn->def(0)); 833} 834 835void 836CodeEmitterGM107::emitF2I() 837{ 838 RoundMode rnd = insn->rnd; 839 840 switch (insn->op) { 841 case OP_FLOOR: rnd = ROUND_M; break; 842 case OP_CEIL : rnd = ROUND_P; break; 843 case OP_TRUNC: rnd = ROUND_Z; break; 844 default: 845 break; 846 } 847 848 switch (insn->src(0).getFile()) { 849 case FILE_GPR: 850 emitInsn(0x5cb00000); 851 emitGPR (0x14, insn->src(0)); 852 break; 853 case FILE_MEMORY_CONST: 854 emitInsn(0x4cb00000); 855 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 856 break; 857 case FILE_IMMEDIATE: 858 emitInsn(0x38b00000); 859 emitIMMD(0x14, 19, insn->src(0)); 860 break; 861 default: 862 assert(!"bad src0 file"); 863 break; 864 } 865 866 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 867 emitCC (0x2f); 868 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 869 emitFMZ (0x2c, 1); 870 emitRND (0x27, rnd, 0x2a); 871 emitField(0x0c, 1, isSignedType(insn->dType)); 872 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 873 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 874 emitGPR (0x00, insn->def(0)); 875} 876 877void 878CodeEmitterGM107::emitI2F() 879{ 880 RoundMode rnd = insn->rnd; 881 882 switch (insn->op) { 883 case OP_FLOOR: rnd = ROUND_M; break; 884 case OP_CEIL : rnd = ROUND_P; break; 885 case OP_TRUNC: rnd = ROUND_Z; break; 886 default: 887 break; 888 } 889 890 switch (insn->src(0).getFile()) { 891 case FILE_GPR: 892 emitInsn(0x5cb80000); 893 emitGPR (0x14, insn->src(0)); 894 break; 895 case FILE_MEMORY_CONST: 896 emitInsn(0x4cb80000); 897 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 898 break; 899 case FILE_IMMEDIATE: 900 emitInsn(0x38b80000); 901 emitIMMD(0x14, 19, insn->src(0)); 902 break; 903 default: 904 assert(!"bad src0 file"); 905 break; 906 } 907 908 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 909 emitCC (0x2f); 910 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 911 emitField(0x29, 2, insn->subOp); 912 emitRND (0x27, rnd, -1); 913 emitField(0x0d, 1, isSignedType(insn->sType)); 914 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 915 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 916 emitGPR (0x00, insn->def(0)); 917} 918 919void 920CodeEmitterGM107::emitI2I() 921{ 922 switch (insn->src(0).getFile()) { 923 case FILE_GPR: 924 emitInsn(0x5ce00000); 925 emitGPR (0x14, insn->src(0)); 926 break; 927 case FILE_MEMORY_CONST: 928 emitInsn(0x4ce00000); 929 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 930 break; 931 case FILE_IMMEDIATE: 932 emitInsn(0x38e00000); 933 emitIMMD(0x14, 19, insn->src(0)); 934 break; 935 default: 936 assert(!"bad src0 file"); 937 break; 938 } 939 940 emitSAT (0x32); 941 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 942 emitCC (0x2f); 943 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 944 emitField(0x29, 2, insn->subOp); 945 emitField(0x0d, 1, isSignedType(insn->sType)); 946 emitField(0x0c, 1, isSignedType(insn->dType)); 947 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 948 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 949 emitGPR (0x00, insn->def(0)); 950} 951 952void 953gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) 954{ 955 int loc = entry->loc; 956 bool val = false; 957 switch (entry->ipa) { 958 case 0: 959 val = data.force_persample_interp; 960 break; 961 case 1: 962 val = data.msaa; 963 break; 964 } 965 if (val) 966 code[loc + 1] |= 1 << 10; 967 else 968 code[loc + 1] &= ~(1 << 10); 969} 970 971void 972CodeEmitterGM107::emitSEL() 973{ 974 switch (insn->src(1).getFile()) { 975 case FILE_GPR: 976 emitInsn(0x5ca00000); 977 emitGPR (0x14, insn->src(1)); 978 break; 979 case FILE_MEMORY_CONST: 980 emitInsn(0x4ca00000); 981 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 982 break; 983 case FILE_IMMEDIATE: 984 emitInsn(0x38a00000); 985 emitIMMD(0x14, 19, insn->src(1)); 986 break; 987 default: 988 assert(!"bad src1 file"); 989 break; 990 } 991 992 emitINV (0x2a, insn->src(2)); 993 emitPRED(0x27, insn->src(2)); 994 emitGPR (0x08, insn->src(0)); 995 emitGPR (0x00, insn->def(0)); 996 997 if (insn->subOp >= 1) { 998 addInterp(insn->subOp - 1, 0, gm107_selpFlip); 999 } 1000} 1001 1002void 1003CodeEmitterGM107::emitSHFL() 1004{ 1005 int type = 0; 1006 1007 emitInsn (0xef100000); 1008 1009 switch (insn->src(1).getFile()) { 1010 case FILE_GPR: 1011 emitGPR(0x14, insn->src(1)); 1012 break; 1013 case FILE_IMMEDIATE: 1014 emitIMMD(0x14, 5, insn->src(1)); 1015 type |= 1; 1016 break; 1017 default: 1018 assert(!"invalid src1 file"); 1019 break; 1020 } 1021 1022 switch (insn->src(2).getFile()) { 1023 case FILE_GPR: 1024 emitGPR(0x27, insn->src(2)); 1025 break; 1026 case FILE_IMMEDIATE: 1027 emitIMMD(0x22, 13, insn->src(2)); 1028 type |= 2; 1029 break; 1030 default: 1031 assert(!"invalid src2 file"); 1032 break; 1033 } 1034 1035 if (!insn->defExists(1)) 1036 emitPRED(0x30); 1037 else { 1038 assert(insn->def(1).getFile() == FILE_PREDICATE); 1039 emitPRED(0x30, insn->def(1)); 1040 } 1041 1042 emitField(0x1e, 2, insn->subOp); 1043 emitField(0x1c, 2, type); 1044 emitGPR (0x08, insn->src(0)); 1045 emitGPR (0x00, insn->def(0)); 1046} 1047 1048/******************************************************************************* 1049 * double 1050 ******************************************************************************/ 1051 1052void 1053CodeEmitterGM107::emitDADD() 1054{ 1055 switch (insn->src(1).getFile()) { 1056 case FILE_GPR: 1057 emitInsn(0x5c700000); 1058 emitGPR (0x14, insn->src(1)); 1059 break; 1060 case FILE_MEMORY_CONST: 1061 emitInsn(0x4c700000); 1062 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1063 break; 1064 case FILE_IMMEDIATE: 1065 emitInsn(0x38700000); 1066 emitIMMD(0x14, 19, insn->src(1)); 1067 break; 1068 default: 1069 assert(!"bad src1 file"); 1070 break; 1071 } 1072 emitABS(0x31, insn->src(1)); 1073 emitNEG(0x30, insn->src(0)); 1074 emitCC (0x2f); 1075 emitABS(0x2e, insn->src(0)); 1076 emitNEG(0x2d, insn->src(1)); 1077 1078 if (insn->op == OP_SUB) 1079 code[1] ^= 0x00002000; 1080 1081 emitGPR(0x08, insn->src(0)); 1082 emitGPR(0x00, insn->def(0)); 1083} 1084 1085void 1086CodeEmitterGM107::emitDMUL() 1087{ 1088 switch (insn->src(1).getFile()) { 1089 case FILE_GPR: 1090 emitInsn(0x5c800000); 1091 emitGPR (0x14, insn->src(1)); 1092 break; 1093 case FILE_MEMORY_CONST: 1094 emitInsn(0x4c800000); 1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1096 break; 1097 case FILE_IMMEDIATE: 1098 emitInsn(0x38800000); 1099 emitIMMD(0x14, 19, insn->src(1)); 1100 break; 1101 default: 1102 assert(!"bad src1 file"); 1103 break; 1104 } 1105 1106 emitNEG2(0x30, insn->src(0), insn->src(1)); 1107 emitCC (0x2f); 1108 emitRND (0x27); 1109 emitGPR (0x08, insn->src(0)); 1110 emitGPR (0x00, insn->def(0)); 1111} 1112 1113void 1114CodeEmitterGM107::emitDFMA() 1115{ 1116 switch(insn->src(2).getFile()) { 1117 case FILE_GPR: 1118 switch (insn->src(1).getFile()) { 1119 case FILE_GPR: 1120 emitInsn(0x5b700000); 1121 emitGPR (0x14, insn->src(1)); 1122 break; 1123 case FILE_MEMORY_CONST: 1124 emitInsn(0x4b700000); 1125 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1126 break; 1127 case FILE_IMMEDIATE: 1128 emitInsn(0x36700000); 1129 emitIMMD(0x14, 19, insn->src(1)); 1130 break; 1131 default: 1132 assert(!"bad src1 file"); 1133 break; 1134 } 1135 emitGPR (0x27, insn->src(2)); 1136 break; 1137 case FILE_MEMORY_CONST: 1138 emitInsn(0x53700000); 1139 emitGPR (0x27, insn->src(1)); 1140 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1141 break; 1142 default: 1143 assert(!"bad src2 file"); 1144 break; 1145 } 1146 1147 emitRND (0x32); 1148 emitNEG (0x31, insn->src(2)); 1149 emitNEG2(0x30, insn->src(0), insn->src(1)); 1150 emitCC (0x2f); 1151 emitGPR (0x08, insn->src(0)); 1152 emitGPR (0x00, insn->def(0)); 1153} 1154 1155void 1156CodeEmitterGM107::emitDMNMX() 1157{ 1158 switch (insn->src(1).getFile()) { 1159 case FILE_GPR: 1160 emitInsn(0x5c500000); 1161 emitGPR (0x14, insn->src(1)); 1162 break; 1163 case FILE_MEMORY_CONST: 1164 emitInsn(0x4c500000); 1165 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1166 break; 1167 case FILE_IMMEDIATE: 1168 emitInsn(0x38500000); 1169 emitIMMD(0x14, 19, insn->src(1)); 1170 break; 1171 default: 1172 assert(!"bad src1 file"); 1173 break; 1174 } 1175 1176 emitABS (0x31, insn->src(1)); 1177 emitNEG (0x30, insn->src(0)); 1178 emitCC (0x2f); 1179 emitABS (0x2e, insn->src(0)); 1180 emitNEG (0x2d, insn->src(1)); 1181 emitField(0x2a, 1, insn->op == OP_MAX); 1182 emitPRED (0x27); 1183 emitGPR (0x08, insn->src(0)); 1184 emitGPR (0x00, insn->def(0)); 1185} 1186 1187void 1188CodeEmitterGM107::emitDSET() 1189{ 1190 const CmpInstruction *insn = this->insn->asCmp(); 1191 1192 switch (insn->src(1).getFile()) { 1193 case FILE_GPR: 1194 emitInsn(0x59000000); 1195 emitGPR (0x14, insn->src(1)); 1196 break; 1197 case FILE_MEMORY_CONST: 1198 emitInsn(0x49000000); 1199 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1200 break; 1201 case FILE_IMMEDIATE: 1202 emitInsn(0x32000000); 1203 emitIMMD(0x14, 19, insn->src(1)); 1204 break; 1205 default: 1206 assert(!"bad src1 file"); 1207 break; 1208 } 1209 1210 if (insn->op != OP_SET) { 1211 switch (insn->op) { 1212 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1213 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1214 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1215 default: 1216 assert(!"invalid set op"); 1217 break; 1218 } 1219 emitPRED(0x27, insn->src(2)); 1220 } else { 1221 emitPRED(0x27); 1222 } 1223 1224 emitABS (0x36, insn->src(0)); 1225 emitNEG (0x35, insn->src(1)); 1226 emitField(0x34, 1, insn->dType == TYPE_F32); 1227 emitCond4(0x30, insn->setCond); 1228 emitCC (0x2f); 1229 emitABS (0x2c, insn->src(1)); 1230 emitNEG (0x2b, insn->src(0)); 1231 emitGPR (0x08, insn->src(0)); 1232 emitGPR (0x00, insn->def(0)); 1233} 1234 1235void 1236CodeEmitterGM107::emitDSETP() 1237{ 1238 const CmpInstruction *insn = this->insn->asCmp(); 1239 1240 switch (insn->src(1).getFile()) { 1241 case FILE_GPR: 1242 emitInsn(0x5b800000); 1243 emitGPR (0x14, insn->src(1)); 1244 break; 1245 case FILE_MEMORY_CONST: 1246 emitInsn(0x4b800000); 1247 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1248 break; 1249 case FILE_IMMEDIATE: 1250 emitInsn(0x36800000); 1251 emitIMMD(0x14, 19, insn->src(1)); 1252 break; 1253 default: 1254 assert(!"bad src1 file"); 1255 break; 1256 } 1257 1258 if (insn->op != OP_SET) { 1259 switch (insn->op) { 1260 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1261 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1262 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1263 default: 1264 assert(!"invalid set op"); 1265 break; 1266 } 1267 emitPRED(0x27, insn->src(2)); 1268 } else { 1269 emitPRED(0x27); 1270 } 1271 1272 emitCond4(0x30, insn->setCond); 1273 emitABS (0x2c, insn->src(1)); 1274 emitNEG (0x2b, insn->src(0)); 1275 emitGPR (0x08, insn->src(0)); 1276 emitABS (0x07, insn->src(0)); 1277 emitNEG (0x06, insn->src(1)); 1278 emitPRED (0x03, insn->def(0)); 1279 if (insn->defExists(1)) 1280 emitPRED(0x00, insn->def(1)); 1281 else 1282 emitPRED(0x00); 1283} 1284 1285/******************************************************************************* 1286 * float 1287 ******************************************************************************/ 1288 1289void 1290CodeEmitterGM107::emitFADD() 1291{ 1292 if (!longIMMD(insn->src(1))) { 1293 switch (insn->src(1).getFile()) { 1294 case FILE_GPR: 1295 emitInsn(0x5c580000); 1296 emitGPR (0x14, insn->src(1)); 1297 break; 1298 case FILE_MEMORY_CONST: 1299 emitInsn(0x4c580000); 1300 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1301 break; 1302 case FILE_IMMEDIATE: 1303 emitInsn(0x38580000); 1304 emitIMMD(0x14, 19, insn->src(1)); 1305 break; 1306 default: 1307 assert(!"bad src1 file"); 1308 break; 1309 } 1310 emitSAT(0x32); 1311 emitABS(0x31, insn->src(1)); 1312 emitNEG(0x30, insn->src(0)); 1313 emitCC (0x2f); 1314 emitABS(0x2e, insn->src(0)); 1315 emitNEG(0x2d, insn->src(1)); 1316 emitFMZ(0x2c, 1); 1317 1318 if (insn->op == OP_SUB) 1319 code[1] ^= 0x00002000; 1320 } else { 1321 emitInsn(0x08000000); 1322 emitABS(0x39, insn->src(1)); 1323 emitNEG(0x38, insn->src(0)); 1324 emitFMZ(0x37, 1); 1325 emitABS(0x36, insn->src(0)); 1326 emitNEG(0x35, insn->src(1)); 1327 emitCC (0x34); 1328 emitIMMD(0x14, 32, insn->src(1)); 1329 1330 if (insn->op == OP_SUB) 1331 code[1] ^= 0x00080000; 1332 } 1333 1334 emitGPR(0x08, insn->src(0)); 1335 emitGPR(0x00, insn->def(0)); 1336} 1337 1338void 1339CodeEmitterGM107::emitFMUL() 1340{ 1341 if (!longIMMD(insn->src(1))) { 1342 switch (insn->src(1).getFile()) { 1343 case FILE_GPR: 1344 emitInsn(0x5c680000); 1345 emitGPR (0x14, insn->src(1)); 1346 break; 1347 case FILE_MEMORY_CONST: 1348 emitInsn(0x4c680000); 1349 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1350 break; 1351 case FILE_IMMEDIATE: 1352 emitInsn(0x38680000); 1353 emitIMMD(0x14, 19, insn->src(1)); 1354 break; 1355 default: 1356 assert(!"bad src1 file"); 1357 break; 1358 } 1359 emitSAT (0x32); 1360 emitNEG2(0x30, insn->src(0), insn->src(1)); 1361 emitCC (0x2f); 1362 emitFMZ (0x2c, 2); 1363 emitPDIV(0x29); 1364 emitRND (0x27); 1365 } else { 1366 emitInsn(0x1e000000); 1367 emitSAT (0x37); 1368 emitFMZ (0x35, 2); 1369 emitCC (0x34); 1370 emitIMMD(0x14, 32, insn->src(1)); 1371 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg()) 1372 code[1] ^= 0x00080000; /* flip immd sign bit */ 1373 } 1374 1375 emitGPR(0x08, insn->src(0)); 1376 emitGPR(0x00, insn->def(0)); 1377} 1378 1379void 1380CodeEmitterGM107::emitFFMA() 1381{ 1382 bool isLongIMMD = false; 1383 switch(insn->src(2).getFile()) { 1384 case FILE_GPR: 1385 switch (insn->src(1).getFile()) { 1386 case FILE_GPR: 1387 emitInsn(0x59800000); 1388 emitGPR (0x14, insn->src(1)); 1389 break; 1390 case FILE_MEMORY_CONST: 1391 emitInsn(0x49800000); 1392 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1393 break; 1394 case FILE_IMMEDIATE: 1395 if (longIMMD(insn->getSrc(1))) { 1396 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id); 1397 isLongIMMD = true; 1398 emitInsn(0x0c000000); 1399 emitIMMD(0x14, 32, insn->src(1)); 1400 } else { 1401 emitInsn(0x32800000); 1402 emitIMMD(0x14, 19, insn->src(1)); 1403 } 1404 break; 1405 default: 1406 assert(!"bad src1 file"); 1407 break; 1408 } 1409 if (!isLongIMMD) 1410 emitGPR (0x27, insn->src(2)); 1411 break; 1412 case FILE_MEMORY_CONST: 1413 emitInsn(0x51800000); 1414 emitGPR (0x27, insn->src(1)); 1415 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1416 break; 1417 default: 1418 assert(!"bad src2 file"); 1419 break; 1420 } 1421 1422 if (isLongIMMD) { 1423 emitNEG (0x39, insn->src(2)); 1424 emitNEG2(0x38, insn->src(0), insn->src(1)); 1425 emitSAT (0x37); 1426 emitCC (0x34); 1427 } else { 1428 emitRND (0x33); 1429 emitSAT (0x32); 1430 emitNEG (0x31, insn->src(2)); 1431 emitNEG2(0x30, insn->src(0), insn->src(1)); 1432 emitCC (0x2f); 1433 } 1434 1435 emitFMZ(0x35, 2); 1436 emitGPR(0x08, insn->src(0)); 1437 emitGPR(0x00, insn->def(0)); 1438} 1439 1440void 1441CodeEmitterGM107::emitMUFU() 1442{ 1443 int mufu = 0; 1444 1445 switch (insn->op) { 1446 case OP_COS: mufu = 0; break; 1447 case OP_SIN: mufu = 1; break; 1448 case OP_EX2: mufu = 2; break; 1449 case OP_LG2: mufu = 3; break; 1450 case OP_RCP: mufu = 4 + 2 * insn->subOp; break; 1451 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break; 1452 case OP_SQRT: mufu = 8; break; 1453 default: 1454 assert(!"invalid mufu"); 1455 break; 1456 } 1457 1458 emitInsn (0x50800000); 1459 emitSAT (0x32); 1460 emitNEG (0x30, insn->src(0)); 1461 emitABS (0x2e, insn->src(0)); 1462 emitField(0x14, 4, mufu); 1463 emitGPR (0x08, insn->src(0)); 1464 emitGPR (0x00, insn->def(0)); 1465} 1466 1467void 1468CodeEmitterGM107::emitFMNMX() 1469{ 1470 switch (insn->src(1).getFile()) { 1471 case FILE_GPR: 1472 emitInsn(0x5c600000); 1473 emitGPR (0x14, insn->src(1)); 1474 break; 1475 case FILE_MEMORY_CONST: 1476 emitInsn(0x4c600000); 1477 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1478 break; 1479 case FILE_IMMEDIATE: 1480 emitInsn(0x38600000); 1481 emitIMMD(0x14, 19, insn->src(1)); 1482 break; 1483 default: 1484 assert(!"bad src1 file"); 1485 break; 1486 } 1487 1488 emitField(0x2a, 1, insn->op == OP_MAX); 1489 emitPRED (0x27); 1490 1491 emitABS(0x31, insn->src(1)); 1492 emitNEG(0x30, insn->src(0)); 1493 emitCC (0x2f); 1494 emitABS(0x2e, insn->src(0)); 1495 emitNEG(0x2d, insn->src(1)); 1496 emitFMZ(0x2c, 1); 1497 emitGPR(0x08, insn->src(0)); 1498 emitGPR(0x00, insn->def(0)); 1499} 1500 1501void 1502CodeEmitterGM107::emitRRO() 1503{ 1504 switch (insn->src(0).getFile()) { 1505 case FILE_GPR: 1506 emitInsn(0x5c900000); 1507 emitGPR (0x14, insn->src(0)); 1508 break; 1509 case FILE_MEMORY_CONST: 1510 emitInsn(0x4c900000); 1511 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 1512 break; 1513 case FILE_IMMEDIATE: 1514 emitInsn(0x38900000); 1515 emitIMMD(0x14, 19, insn->src(0)); 1516 break; 1517 default: 1518 assert(!"bad src file"); 1519 break; 1520 } 1521 1522 emitABS (0x31, insn->src(0)); 1523 emitNEG (0x2d, insn->src(0)); 1524 emitField(0x27, 1, insn->op == OP_PREEX2); 1525 emitGPR (0x00, insn->def(0)); 1526} 1527 1528void 1529CodeEmitterGM107::emitFCMP() 1530{ 1531 const CmpInstruction *insn = this->insn->asCmp(); 1532 CondCode cc = insn->setCond; 1533 1534 if (insn->src(2).mod.neg()) 1535 cc = reverseCondCode(cc); 1536 1537 switch(insn->src(2).getFile()) { 1538 case FILE_GPR: 1539 switch (insn->src(1).getFile()) { 1540 case FILE_GPR: 1541 emitInsn(0x5ba00000); 1542 emitGPR (0x14, insn->src(1)); 1543 break; 1544 case FILE_MEMORY_CONST: 1545 emitInsn(0x4ba00000); 1546 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1547 break; 1548 case FILE_IMMEDIATE: 1549 emitInsn(0x36a00000); 1550 emitIMMD(0x14, 19, insn->src(1)); 1551 break; 1552 default: 1553 assert(!"bad src1 file"); 1554 break; 1555 } 1556 emitGPR (0x27, insn->src(2)); 1557 break; 1558 case FILE_MEMORY_CONST: 1559 emitInsn(0x53a00000); 1560 emitGPR (0x27, insn->src(1)); 1561 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1562 break; 1563 default: 1564 assert(!"bad src2 file"); 1565 break; 1566 } 1567 1568 emitCond4(0x30, cc); 1569 emitFMZ (0x2f, 1); 1570 emitGPR (0x08, insn->src(0)); 1571 emitGPR (0x00, insn->def(0)); 1572} 1573 1574void 1575CodeEmitterGM107::emitFSET() 1576{ 1577 const CmpInstruction *insn = this->insn->asCmp(); 1578 1579 switch (insn->src(1).getFile()) { 1580 case FILE_GPR: 1581 emitInsn(0x58000000); 1582 emitGPR (0x14, insn->src(1)); 1583 break; 1584 case FILE_MEMORY_CONST: 1585 emitInsn(0x48000000); 1586 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1587 break; 1588 case FILE_IMMEDIATE: 1589 emitInsn(0x30000000); 1590 emitIMMD(0x14, 19, insn->src(1)); 1591 break; 1592 default: 1593 assert(!"bad src1 file"); 1594 break; 1595 } 1596 1597 if (insn->op != OP_SET) { 1598 switch (insn->op) { 1599 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1600 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1601 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1602 default: 1603 assert(!"invalid set op"); 1604 break; 1605 } 1606 emitPRED(0x27, insn->src(2)); 1607 } else { 1608 emitPRED(0x27); 1609 } 1610 1611 emitFMZ (0x37, 1); 1612 emitABS (0x36, insn->src(0)); 1613 emitNEG (0x35, insn->src(1)); 1614 emitField(0x34, 1, insn->dType == TYPE_F32); 1615 emitCond4(0x30, insn->setCond); 1616 emitCC (0x2f); 1617 emitABS (0x2c, insn->src(1)); 1618 emitNEG (0x2b, insn->src(0)); 1619 emitGPR (0x08, insn->src(0)); 1620 emitGPR (0x00, insn->def(0)); 1621} 1622 1623void 1624CodeEmitterGM107::emitFSETP() 1625{ 1626 const CmpInstruction *insn = this->insn->asCmp(); 1627 1628 switch (insn->src(1).getFile()) { 1629 case FILE_GPR: 1630 emitInsn(0x5bb00000); 1631 emitGPR (0x14, insn->src(1)); 1632 break; 1633 case FILE_MEMORY_CONST: 1634 emitInsn(0x4bb00000); 1635 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1636 break; 1637 case FILE_IMMEDIATE: 1638 emitInsn(0x36b00000); 1639 emitIMMD(0x14, 19, insn->src(1)); 1640 break; 1641 default: 1642 assert(!"bad src1 file"); 1643 break; 1644 } 1645 1646 if (insn->op != OP_SET) { 1647 switch (insn->op) { 1648 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1649 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1650 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1651 default: 1652 assert(!"invalid set op"); 1653 break; 1654 } 1655 emitPRED(0x27, insn->src(2)); 1656 } else { 1657 emitPRED(0x27); 1658 } 1659 1660 emitCond4(0x30, insn->setCond); 1661 emitFMZ (0x2f, 1); 1662 emitABS (0x2c, insn->src(1)); 1663 emitNEG (0x2b, insn->src(0)); 1664 emitGPR (0x08, insn->src(0)); 1665 emitABS (0x07, insn->src(0)); 1666 emitNEG (0x06, insn->src(1)); 1667 emitPRED (0x03, insn->def(0)); 1668 if (insn->defExists(1)) 1669 emitPRED(0x00, insn->def(1)); 1670 else 1671 emitPRED(0x00); 1672} 1673 1674void 1675CodeEmitterGM107::emitFSWZADD() 1676{ 1677 emitInsn (0x50f80000); 1678 emitCC (0x2f); 1679 emitFMZ (0x2c, 1); 1680 emitRND (0x27); 1681 emitField(0x26, 1, insn->lanes); /* abused for .ndv */ 1682 emitField(0x1c, 8, insn->subOp); 1683 if (insn->predSrc != 1) 1684 emitGPR (0x14, insn->src(1)); 1685 else 1686 emitGPR (0x14); 1687 emitGPR (0x08, insn->src(0)); 1688 emitGPR (0x00, insn->def(0)); 1689} 1690 1691/******************************************************************************* 1692 * integer 1693 ******************************************************************************/ 1694 1695void 1696CodeEmitterGM107::emitLOP() 1697{ 1698 int lop = 0; 1699 1700 switch (insn->op) { 1701 case OP_AND: lop = 0; break; 1702 case OP_OR : lop = 1; break; 1703 case OP_XOR: lop = 2; break; 1704 default: 1705 assert(!"invalid lop"); 1706 break; 1707 } 1708 1709 if (!longIMMD(insn->src(1))) { 1710 switch (insn->src(1).getFile()) { 1711 case FILE_GPR: 1712 emitInsn(0x5c400000); 1713 emitGPR (0x14, insn->src(1)); 1714 break; 1715 case FILE_MEMORY_CONST: 1716 emitInsn(0x4c400000); 1717 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1718 break; 1719 case FILE_IMMEDIATE: 1720 emitInsn(0x38400000); 1721 emitIMMD(0x14, 19, insn->src(1)); 1722 break; 1723 default: 1724 assert(!"bad src1 file"); 1725 break; 1726 } 1727 emitPRED (0x30); 1728 emitCC (0x2f); 1729 emitX (0x2b); 1730 emitField(0x29, 2, lop); 1731 emitINV (0x28, insn->src(1)); 1732 emitINV (0x27, insn->src(0)); 1733 } else { 1734 emitInsn (0x04000000); 1735 emitX (0x39); 1736 emitINV (0x38, insn->src(1)); 1737 emitINV (0x37, insn->src(0)); 1738 emitField(0x35, 2, lop); 1739 emitCC (0x34); 1740 emitIMMD (0x14, 32, insn->src(1)); 1741 } 1742 1743 emitGPR (0x08, insn->src(0)); 1744 emitGPR (0x00, insn->def(0)); 1745} 1746 1747/* special-case of emitLOP(): lop pass_b dst 0 ~src */ 1748void 1749CodeEmitterGM107::emitNOT() 1750{ 1751 if (!longIMMD(insn->src(0))) { 1752 switch (insn->src(0).getFile()) { 1753 case FILE_GPR: 1754 emitInsn(0x5c400700); 1755 emitGPR (0x14, insn->src(0)); 1756 break; 1757 case FILE_MEMORY_CONST: 1758 emitInsn(0x4c400700); 1759 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 1760 break; 1761 case FILE_IMMEDIATE: 1762 emitInsn(0x38400700); 1763 emitIMMD(0x14, 19, insn->src(0)); 1764 break; 1765 default: 1766 assert(!"bad src1 file"); 1767 break; 1768 } 1769 emitPRED (0x30); 1770 } else { 1771 emitInsn (0x05600000); 1772 emitIMMD (0x14, 32, insn->src(1)); 1773 } 1774 1775 emitGPR(0x08); 1776 emitGPR(0x00, insn->def(0)); 1777} 1778 1779void 1780CodeEmitterGM107::emitIADD() 1781{ 1782 if (!longIMMD(insn->src(1))) { 1783 switch (insn->src(1).getFile()) { 1784 case FILE_GPR: 1785 emitInsn(0x5c100000); 1786 emitGPR (0x14, insn->src(1)); 1787 break; 1788 case FILE_MEMORY_CONST: 1789 emitInsn(0x4c100000); 1790 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1791 break; 1792 case FILE_IMMEDIATE: 1793 emitInsn(0x38100000); 1794 emitIMMD(0x14, 19, insn->src(1)); 1795 break; 1796 default: 1797 assert(!"bad src1 file"); 1798 break; 1799 } 1800 emitSAT(0x32); 1801 emitNEG(0x31, insn->src(0)); 1802 emitNEG(0x30, insn->src(1)); 1803 emitCC (0x2f); 1804 emitX (0x2b); 1805 } else { 1806 emitInsn(0x1c000000); 1807 emitNEG (0x38, insn->src(0)); 1808 emitSAT (0x36); 1809 emitX (0x35); 1810 emitCC (0x34); 1811 emitIMMD(0x14, 32, insn->src(1)); 1812 } 1813 1814 if (insn->op == OP_SUB) 1815 code[1] ^= 0x00010000; 1816 1817 emitGPR(0x08, insn->src(0)); 1818 emitGPR(0x00, insn->def(0)); 1819} 1820 1821void 1822CodeEmitterGM107::emitIMUL() 1823{ 1824 if (!longIMMD(insn->src(1))) { 1825 switch (insn->src(1).getFile()) { 1826 case FILE_GPR: 1827 emitInsn(0x5c380000); 1828 emitGPR (0x14, insn->src(1)); 1829 break; 1830 case FILE_MEMORY_CONST: 1831 emitInsn(0x4c380000); 1832 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1833 break; 1834 case FILE_IMMEDIATE: 1835 emitInsn(0x38380000); 1836 emitIMMD(0x14, 19, insn->src(1)); 1837 break; 1838 default: 1839 assert(!"bad src1 file"); 1840 break; 1841 } 1842 emitCC (0x2f); 1843 emitField(0x29, 1, isSignedType(insn->sType)); 1844 emitField(0x28, 1, isSignedType(insn->dType)); 1845 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1846 } else { 1847 emitInsn (0x1f000000); 1848 emitField(0x37, 1, isSignedType(insn->sType)); 1849 emitField(0x36, 1, isSignedType(insn->dType)); 1850 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1851 emitCC (0x34); 1852 emitIMMD (0x14, 32, insn->src(1)); 1853 } 1854 1855 emitGPR(0x08, insn->src(0)); 1856 emitGPR(0x00, insn->def(0)); 1857} 1858 1859void 1860CodeEmitterGM107::emitIMAD() 1861{ 1862 /*XXX: imad32i exists, but not using it as third src overlaps dst */ 1863 switch(insn->src(2).getFile()) { 1864 case FILE_GPR: 1865 switch (insn->src(1).getFile()) { 1866 case FILE_GPR: 1867 emitInsn(0x5a000000); 1868 emitGPR (0x14, insn->src(1)); 1869 break; 1870 case FILE_MEMORY_CONST: 1871 emitInsn(0x4a000000); 1872 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1873 break; 1874 case FILE_IMMEDIATE: 1875 emitInsn(0x34000000); 1876 emitIMMD(0x14, 19, insn->src(1)); 1877 break; 1878 default: 1879 assert(!"bad src1 file"); 1880 break; 1881 } 1882 emitGPR (0x27, insn->src(2)); 1883 break; 1884 case FILE_MEMORY_CONST: 1885 emitInsn(0x52000000); 1886 emitGPR (0x27, insn->src(1)); 1887 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1888 break; 1889 default: 1890 assert(!"bad src2 file"); 1891 break; 1892 } 1893 1894 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1895 emitField(0x35, 1, isSignedType(insn->sType)); 1896 emitNEG (0x34, insn->src(2)); 1897 emitNEG2 (0x33, insn->src(0), insn->src(1)); 1898 emitSAT (0x32); 1899 emitX (0x31); 1900 emitField(0x30, 1, isSignedType(insn->dType)); 1901 emitCC (0x2f); 1902 emitGPR (0x08, insn->src(0)); 1903 emitGPR (0x00, insn->def(0)); 1904} 1905 1906void 1907CodeEmitterGM107::emitISCADD() 1908{ 1909 assert(insn->src(1).get()->asImm()); 1910 1911 switch (insn->src(2).getFile()) { 1912 case FILE_GPR: 1913 emitInsn(0x5c180000); 1914 emitGPR (0x14, insn->src(2)); 1915 break; 1916 case FILE_MEMORY_CONST: 1917 emitInsn(0x4c180000); 1918 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1919 break; 1920 case FILE_IMMEDIATE: 1921 emitInsn(0x38180000); 1922 emitIMMD(0x14, 19, insn->src(2)); 1923 break; 1924 default: 1925 assert(!"bad src1 file"); 1926 break; 1927 } 1928 emitNEG (0x31, insn->src(0)); 1929 emitNEG (0x30, insn->src(2)); 1930 emitCC (0x2f); 1931 emitIMMD(0x27, 5, insn->src(1)); 1932 emitGPR (0x08, insn->src(0)); 1933 emitGPR (0x00, insn->def(0)); 1934} 1935 1936void 1937CodeEmitterGM107::emitXMAD() 1938{ 1939 assert(insn->src(0).getFile() == FILE_GPR); 1940 1941 bool constbuf = false; 1942 bool psl_mrg = true; 1943 bool immediate = false; 1944 if (insn->src(2).getFile() == FILE_MEMORY_CONST) { 1945 assert(insn->src(1).getFile() == FILE_GPR); 1946 constbuf = true; 1947 psl_mrg = false; 1948 emitInsn(0x51000000); 1949 emitGPR(0x27, insn->src(1)); 1950 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1951 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) { 1952 assert(insn->src(2).getFile() == FILE_GPR); 1953 constbuf = true; 1954 emitInsn(0x4e000000); 1955 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1956 emitGPR(0x27, insn->src(2)); 1957 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) { 1958 assert(insn->src(2).getFile() == FILE_GPR); 1959 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1))); 1960 immediate = true; 1961 emitInsn(0x36000000); 1962 emitIMMD(0x14, 16, insn->src(1)); 1963 emitGPR(0x27, insn->src(2)); 1964 } else { 1965 assert(insn->src(1).getFile() == FILE_GPR); 1966 assert(insn->src(2).getFile() == FILE_GPR); 1967 emitInsn(0x5b000000); 1968 emitGPR(0x14, insn->src(1)); 1969 emitGPR(0x27, insn->src(2)); 1970 } 1971 1972 if (psl_mrg) 1973 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3); 1974 1975 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK); 1976 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT; 1977 emitField(0x32, constbuf ? 2 : 3, cmode); 1978 1979 emitX(constbuf ? 0x36 : 0x26); 1980 emitCC(0x2f); 1981 1982 emitGPR(0x0, insn->def(0)); 1983 emitGPR(0x8, insn->src(0)); 1984 1985 // source flags 1986 if (isSignedType(insn->sType)) { 1987 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK; 1988 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT); 1989 } 1990 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0); 1991 if (!immediate) { 1992 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1); 1993 emitField(constbuf ? 0x34 : 0x23, 1, h1); 1994 } 1995} 1996 1997void 1998CodeEmitterGM107::emitIMNMX() 1999{ 2000 switch (insn->src(1).getFile()) { 2001 case FILE_GPR: 2002 emitInsn(0x5c200000); 2003 emitGPR (0x14, insn->src(1)); 2004 break; 2005 case FILE_MEMORY_CONST: 2006 emitInsn(0x4c200000); 2007 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2008 break; 2009 case FILE_IMMEDIATE: 2010 emitInsn(0x38200000); 2011 emitIMMD(0x14, 19, insn->src(1)); 2012 break; 2013 default: 2014 assert(!"bad src1 file"); 2015 break; 2016 } 2017 2018 emitField(0x30, 1, isSignedType(insn->dType)); 2019 emitCC (0x2f); 2020 emitField(0x2b, 2, insn->subOp); 2021 emitField(0x2a, 1, insn->op == OP_MAX); 2022 emitPRED (0x27); 2023 emitGPR (0x08, insn->src(0)); 2024 emitGPR (0x00, insn->def(0)); 2025} 2026 2027void 2028CodeEmitterGM107::emitICMP() 2029{ 2030 const CmpInstruction *insn = this->insn->asCmp(); 2031 CondCode cc = insn->setCond; 2032 2033 if (insn->src(2).mod.neg()) 2034 cc = reverseCondCode(cc); 2035 2036 switch(insn->src(2).getFile()) { 2037 case FILE_GPR: 2038 switch (insn->src(1).getFile()) { 2039 case FILE_GPR: 2040 emitInsn(0x5b400000); 2041 emitGPR (0x14, insn->src(1)); 2042 break; 2043 case FILE_MEMORY_CONST: 2044 emitInsn(0x4b400000); 2045 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2046 break; 2047 case FILE_IMMEDIATE: 2048 emitInsn(0x36400000); 2049 emitIMMD(0x14, 19, insn->src(1)); 2050 break; 2051 default: 2052 assert(!"bad src1 file"); 2053 break; 2054 } 2055 emitGPR (0x27, insn->src(2)); 2056 break; 2057 case FILE_MEMORY_CONST: 2058 emitInsn(0x53400000); 2059 emitGPR (0x27, insn->src(1)); 2060 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 2061 break; 2062 default: 2063 assert(!"bad src2 file"); 2064 break; 2065 } 2066 2067 emitCond3(0x31, cc); 2068 emitField(0x30, 1, isSignedType(insn->sType)); 2069 emitGPR (0x08, insn->src(0)); 2070 emitGPR (0x00, insn->def(0)); 2071} 2072 2073void 2074CodeEmitterGM107::emitISET() 2075{ 2076 const CmpInstruction *insn = this->insn->asCmp(); 2077 2078 switch (insn->src(1).getFile()) { 2079 case FILE_GPR: 2080 emitInsn(0x5b500000); 2081 emitGPR (0x14, insn->src(1)); 2082 break; 2083 case FILE_MEMORY_CONST: 2084 emitInsn(0x4b500000); 2085 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2086 break; 2087 case FILE_IMMEDIATE: 2088 emitInsn(0x36500000); 2089 emitIMMD(0x14, 19, insn->src(1)); 2090 break; 2091 default: 2092 assert(!"bad src1 file"); 2093 break; 2094 } 2095 2096 if (insn->op != OP_SET) { 2097 switch (insn->op) { 2098 case OP_SET_AND: emitField(0x2d, 2, 0); break; 2099 case OP_SET_OR : emitField(0x2d, 2, 1); break; 2100 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 2101 default: 2102 assert(!"invalid set op"); 2103 break; 2104 } 2105 emitPRED(0x27, insn->src(2)); 2106 } else { 2107 emitPRED(0x27); 2108 } 2109 2110 emitCond3(0x31, insn->setCond); 2111 emitField(0x30, 1, isSignedType(insn->sType)); 2112 emitCC (0x2f); 2113 emitField(0x2c, 1, insn->dType == TYPE_F32); 2114 emitX (0x2b); 2115 emitGPR (0x08, insn->src(0)); 2116 emitGPR (0x00, insn->def(0)); 2117} 2118 2119void 2120CodeEmitterGM107::emitISETP() 2121{ 2122 const CmpInstruction *insn = this->insn->asCmp(); 2123 2124 switch (insn->src(1).getFile()) { 2125 case FILE_GPR: 2126 emitInsn(0x5b600000); 2127 emitGPR (0x14, insn->src(1)); 2128 break; 2129 case FILE_MEMORY_CONST: 2130 emitInsn(0x4b600000); 2131 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2132 break; 2133 case FILE_IMMEDIATE: 2134 emitInsn(0x36600000); 2135 emitIMMD(0x14, 19, insn->src(1)); 2136 break; 2137 default: 2138 assert(!"bad src1 file"); 2139 break; 2140 } 2141 2142 if (insn->op != OP_SET) { 2143 switch (insn->op) { 2144 case OP_SET_AND: emitField(0x2d, 2, 0); break; 2145 case OP_SET_OR : emitField(0x2d, 2, 1); break; 2146 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 2147 default: 2148 assert(!"invalid set op"); 2149 break; 2150 } 2151 emitPRED(0x27, insn->src(2)); 2152 } else { 2153 emitPRED(0x27); 2154 } 2155 2156 emitCond3(0x31, insn->setCond); 2157 emitField(0x30, 1, isSignedType(insn->sType)); 2158 emitX (0x2b); 2159 emitGPR (0x08, insn->src(0)); 2160 emitPRED (0x03, insn->def(0)); 2161 if (insn->defExists(1)) 2162 emitPRED(0x00, insn->def(1)); 2163 else 2164 emitPRED(0x00); 2165} 2166 2167void 2168CodeEmitterGM107::emitSHL() 2169{ 2170 switch (insn->src(1).getFile()) { 2171 case FILE_GPR: 2172 emitInsn(0x5c480000); 2173 emitGPR (0x14, insn->src(1)); 2174 break; 2175 case FILE_MEMORY_CONST: 2176 emitInsn(0x4c480000); 2177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2178 break; 2179 case FILE_IMMEDIATE: 2180 emitInsn(0x38480000); 2181 emitIMMD(0x14, 19, insn->src(1)); 2182 break; 2183 default: 2184 assert(!"bad src1 file"); 2185 break; 2186 } 2187 2188 emitCC (0x2f); 2189 emitX (0x2b); 2190 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); 2191 emitGPR (0x08, insn->src(0)); 2192 emitGPR (0x00, insn->def(0)); 2193} 2194 2195void 2196CodeEmitterGM107::emitSHR() 2197{ 2198 switch (insn->src(1).getFile()) { 2199 case FILE_GPR: 2200 emitInsn(0x5c280000); 2201 emitGPR (0x14, insn->src(1)); 2202 break; 2203 case FILE_MEMORY_CONST: 2204 emitInsn(0x4c280000); 2205 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2206 break; 2207 case FILE_IMMEDIATE: 2208 emitInsn(0x38280000); 2209 emitIMMD(0x14, 19, insn->src(1)); 2210 break; 2211 default: 2212 assert(!"bad src1 file"); 2213 break; 2214 } 2215 2216 emitField(0x30, 1, isSignedType(insn->dType)); 2217 emitCC (0x2f); 2218 emitX (0x2c); 2219 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); 2220 emitGPR (0x08, insn->src(0)); 2221 emitGPR (0x00, insn->def(0)); 2222} 2223 2224void 2225CodeEmitterGM107::emitSHF() 2226{ 2227 unsigned type; 2228 2229 switch (insn->src(1).getFile()) { 2230 case FILE_GPR: 2231 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000); 2232 emitGPR(0x14, insn->src(1)); 2233 break; 2234 case FILE_IMMEDIATE: 2235 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000); 2236 emitIMMD(0x14, 19, insn->src(1)); 2237 break; 2238 default: 2239 assert(!"bad src1 file"); 2240 break; 2241 } 2242 2243 switch (insn->sType) { 2244 case TYPE_U64: 2245 type = 2; 2246 break; 2247 case TYPE_S64: 2248 type = 3; 2249 break; 2250 default: 2251 type = 0; 2252 break; 2253 } 2254 2255 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP)); 2256 emitX (0x31); 2257 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH)); 2258 emitCC (0x2f); 2259 emitGPR (0x27, insn->src(2)); 2260 emitField(0x25, 2, type); 2261 emitGPR (0x08, insn->src(0)); 2262 emitGPR (0x00, insn->def(0)); 2263} 2264 2265void 2266CodeEmitterGM107::emitPOPC() 2267{ 2268 switch (insn->src(0).getFile()) { 2269 case FILE_GPR: 2270 emitInsn(0x5c080000); 2271 emitGPR (0x14, insn->src(0)); 2272 break; 2273 case FILE_MEMORY_CONST: 2274 emitInsn(0x4c080000); 2275 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 2276 break; 2277 case FILE_IMMEDIATE: 2278 emitInsn(0x38080000); 2279 emitIMMD(0x14, 19, insn->src(0)); 2280 break; 2281 default: 2282 assert(!"bad src1 file"); 2283 break; 2284 } 2285 2286 emitINV(0x28, insn->src(0)); 2287 emitGPR(0x00, insn->def(0)); 2288} 2289 2290void 2291CodeEmitterGM107::emitBFI() 2292{ 2293 switch(insn->src(2).getFile()) { 2294 case FILE_GPR: 2295 switch (insn->src(1).getFile()) { 2296 case FILE_GPR: 2297 emitInsn(0x5bf00000); 2298 emitGPR (0x14, insn->src(1)); 2299 break; 2300 case FILE_MEMORY_CONST: 2301 emitInsn(0x4bf00000); 2302 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2303 break; 2304 case FILE_IMMEDIATE: 2305 emitInsn(0x36f00000); 2306 emitIMMD(0x14, 19, insn->src(1)); 2307 break; 2308 default: 2309 assert(!"bad src1 file"); 2310 break; 2311 } 2312 emitGPR (0x27, insn->src(2)); 2313 break; 2314 case FILE_MEMORY_CONST: 2315 emitInsn(0x53f00000); 2316 emitGPR (0x27, insn->src(1)); 2317 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 2318 break; 2319 default: 2320 assert(!"bad src2 file"); 2321 break; 2322 } 2323 2324 emitCC (0x2f); 2325 emitGPR (0x08, insn->src(0)); 2326 emitGPR (0x00, insn->def(0)); 2327} 2328 2329void 2330CodeEmitterGM107::emitBFE() 2331{ 2332 switch (insn->src(1).getFile()) { 2333 case FILE_GPR: 2334 emitInsn(0x5c000000); 2335 emitGPR (0x14, insn->src(1)); 2336 break; 2337 case FILE_MEMORY_CONST: 2338 emitInsn(0x4c000000); 2339 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2340 break; 2341 case FILE_IMMEDIATE: 2342 emitInsn(0x38000000); 2343 emitIMMD(0x14, 19, insn->src(1)); 2344 break; 2345 default: 2346 assert(!"bad src1 file"); 2347 break; 2348 } 2349 2350 emitField(0x30, 1, isSignedType(insn->dType)); 2351 emitCC (0x2f); 2352 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV); 2353 emitGPR (0x08, insn->src(0)); 2354 emitGPR (0x00, insn->def(0)); 2355} 2356 2357void 2358CodeEmitterGM107::emitFLO() 2359{ 2360 switch (insn->src(0).getFile()) { 2361 case FILE_GPR: 2362 emitInsn(0x5c300000); 2363 emitGPR (0x14, insn->src(0)); 2364 break; 2365 case FILE_MEMORY_CONST: 2366 emitInsn(0x4c300000); 2367 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 2368 break; 2369 case FILE_IMMEDIATE: 2370 emitInsn(0x38300000); 2371 emitIMMD(0x14, 19, insn->src(0)); 2372 break; 2373 default: 2374 assert(!"bad src1 file"); 2375 break; 2376 } 2377 2378 emitField(0x30, 1, isSignedType(insn->dType)); 2379 emitCC (0x2f); 2380 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); 2381 emitINV (0x28, insn->src(0)); 2382 emitGPR (0x00, insn->def(0)); 2383} 2384 2385void 2386CodeEmitterGM107::emitPRMT() 2387{ 2388 switch (insn->src(1).getFile()) { 2389 case FILE_GPR: 2390 emitInsn(0x5bc00000); 2391 emitGPR (0x14, insn->src(1)); 2392 break; 2393 case FILE_MEMORY_CONST: 2394 emitInsn(0x4bc00000); 2395 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2396 break; 2397 case FILE_IMMEDIATE: 2398 emitInsn(0x36c00000); 2399 emitIMMD(0x14, 19, insn->src(1)); 2400 break; 2401 default: 2402 assert(!"bad src1 file"); 2403 break; 2404 } 2405 2406 emitField(0x30, 3, insn->subOp); 2407 emitGPR (0x27, insn->src(2)); 2408 emitGPR (0x08, insn->src(0)); 2409 emitGPR (0x00, insn->def(0)); 2410} 2411 2412/******************************************************************************* 2413 * memory 2414 ******************************************************************************/ 2415 2416void 2417CodeEmitterGM107::emitLDSTs(int pos, DataType type) 2418{ 2419 int data = 0; 2420 2421 switch (typeSizeof(type)) { 2422 case 1: data = isSignedType(type) ? 1 : 0; break; 2423 case 2: data = isSignedType(type) ? 3 : 2; break; 2424 case 4: data = 4; break; 2425 case 8: data = 5; break; 2426 case 16: data = 6; break; 2427 default: 2428 assert(!"bad type"); 2429 break; 2430 } 2431 2432 emitField(pos, 3, data); 2433} 2434 2435void 2436CodeEmitterGM107::emitLDSTc(int pos) 2437{ 2438 int mode = 0; 2439 2440 switch (insn->cache) { 2441 case CACHE_CA: mode = 0; break; 2442 case CACHE_CG: mode = 1; break; 2443 case CACHE_CS: mode = 2; break; 2444 case CACHE_CV: mode = 3; break; 2445 default: 2446 assert(!"invalid caching mode"); 2447 break; 2448 } 2449 2450 emitField(pos, 2, mode); 2451} 2452 2453void 2454CodeEmitterGM107::emitLDC() 2455{ 2456 emitInsn (0xef900000); 2457 emitLDSTs(0x30, insn->dType); 2458 emitField(0x2c, 2, insn->subOp); 2459 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0)); 2460 emitGPR (0x00, insn->def(0)); 2461} 2462 2463void 2464CodeEmitterGM107::emitLDL() 2465{ 2466 emitInsn (0xef400000); 2467 emitLDSTs(0x30, insn->dType); 2468 emitLDSTc(0x2c); 2469 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2470 emitGPR (0x00, insn->def(0)); 2471} 2472 2473void 2474CodeEmitterGM107::emitLDS() 2475{ 2476 emitInsn (0xef480000); 2477 emitLDSTs(0x30, insn->dType); 2478 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2479 emitGPR (0x00, insn->def(0)); 2480} 2481 2482void 2483CodeEmitterGM107::emitLD() 2484{ 2485 emitInsn (0x80000000); 2486 emitPRED (0x3a); 2487 emitLDSTc(0x38); 2488 emitLDSTs(0x35, insn->dType); 2489 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2490 emitADDR (0x08, 0x14, 32, 0, insn->src(0)); 2491 emitGPR (0x00, insn->def(0)); 2492} 2493 2494void 2495CodeEmitterGM107::emitSTL() 2496{ 2497 emitInsn (0xef500000); 2498 emitLDSTs(0x30, insn->dType); 2499 emitLDSTc(0x2c); 2500 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2501 emitGPR (0x00, insn->src(1)); 2502} 2503 2504void 2505CodeEmitterGM107::emitSTS() 2506{ 2507 emitInsn (0xef580000); 2508 emitLDSTs(0x30, insn->dType); 2509 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2510 emitGPR (0x00, insn->src(1)); 2511} 2512 2513void 2514CodeEmitterGM107::emitST() 2515{ 2516 emitInsn (0xa0000000); 2517 emitPRED (0x3a); 2518 emitLDSTc(0x38); 2519 emitLDSTs(0x35, insn->dType); 2520 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2521 emitADDR (0x08, 0x14, 32, 0, insn->src(0)); 2522 emitGPR (0x00, insn->src(1)); 2523} 2524 2525void 2526CodeEmitterGM107::emitALD() 2527{ 2528 emitInsn (0xefd80000); 2529 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); 2530 emitGPR (0x27, insn->src(0).getIndirect(1)); 2531 emitO (0x20); 2532 emitP (0x1f); 2533 emitADDR (0x08, 20, 10, 0, insn->src(0)); 2534 emitGPR (0x00, insn->def(0)); 2535} 2536 2537void 2538CodeEmitterGM107::emitAST() 2539{ 2540 emitInsn (0xeff00000); 2541 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1); 2542 emitGPR (0x27, insn->src(0).getIndirect(1)); 2543 emitP (0x1f); 2544 emitADDR (0x08, 20, 10, 0, insn->src(0)); 2545 emitGPR (0x00, insn->src(1)); 2546} 2547 2548void 2549CodeEmitterGM107::emitISBERD() 2550{ 2551 emitInsn(0xefd00000); 2552 emitGPR (0x08, insn->src(0)); 2553 emitGPR (0x00, insn->def(0)); 2554} 2555 2556void 2557CodeEmitterGM107::emitAL2P() 2558{ 2559 emitInsn (0xefa00000); 2560 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); 2561 emitPRED (0x2c); 2562 emitO (0x20); 2563 emitField(0x14, 11, insn->src(0).get()->reg.data.offset); 2564 emitGPR (0x08, insn->src(0).getIndirect(0)); 2565 emitGPR (0x00, insn->def(0)); 2566} 2567 2568void 2569gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) 2570{ 2571 int ipa = entry->ipa; 2572 int reg = entry->reg; 2573 int loc = entry->loc; 2574 2575 if (data.flatshade && 2576 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { 2577 ipa = NV50_IR_INTERP_FLAT; 2578 reg = 0xff; 2579 } else if (data.force_persample_interp && 2580 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && 2581 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { 2582 ipa |= NV50_IR_INTERP_CENTROID; 2583 } 2584 code[loc + 1] &= ~(0xf << 0x14); 2585 code[loc + 1] |= (ipa & 0x3) << 0x16; 2586 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2); 2587 code[loc + 0] &= ~(0xff << 0x14); 2588 code[loc + 0] |= reg << 0x14; 2589} 2590 2591void 2592CodeEmitterGM107::emitIPA() 2593{ 2594 int ipam = 0, ipas = 0; 2595 2596 switch (insn->getInterpMode()) { 2597 case NV50_IR_INTERP_LINEAR : ipam = 0; break; 2598 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break; 2599 case NV50_IR_INTERP_FLAT : ipam = 2; break; 2600 case NV50_IR_INTERP_SC : ipam = 3; break; 2601 default: 2602 assert(!"invalid ipa mode"); 2603 break; 2604 } 2605 2606 switch (insn->getSampleMode()) { 2607 case NV50_IR_INTERP_DEFAULT : ipas = 0; break; 2608 case NV50_IR_INTERP_CENTROID: ipas = 1; break; 2609 case NV50_IR_INTERP_OFFSET : ipas = 2; break; 2610 default: 2611 assert(!"invalid ipa sample mode"); 2612 break; 2613 } 2614 2615 emitInsn (0xe0000000); 2616 emitField(0x36, 2, ipam); 2617 emitField(0x34, 2, ipas); 2618 emitSAT (0x33); 2619 emitField(0x2f, 3, 7); 2620 emitADDR (0x08, 0x1c, 10, 0, insn->src(0)); 2621 if ((code[0] & 0x0000ff00) != 0x0000ff00) 2622 code[1] |= 0x00000040; /* .idx */ 2623 emitGPR(0x00, insn->def(0)); 2624 2625 if (insn->op == OP_PINTERP) { 2626 emitGPR(0x14, insn->src(1)); 2627 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) 2628 emitGPR(0x27, insn->src(2)); 2629 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply); 2630 } else { 2631 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) 2632 emitGPR(0x27, insn->src(1)); 2633 emitGPR(0x14); 2634 addInterp(insn->ipa, 0xff, gm107_interpApply); 2635 } 2636 2637 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) 2638 emitGPR(0x27); 2639} 2640 2641void 2642CodeEmitterGM107::emitATOM() 2643{ 2644 unsigned dType, subOp; 2645 2646 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 2647 switch (insn->dType) { 2648 case TYPE_U32: dType = 0; break; 2649 case TYPE_U64: dType = 1; break; 2650 default: assert(!"unexpected dType"); dType = 0; break; 2651 } 2652 subOp = 15; 2653 2654 emitInsn (0xee000000); 2655 } else { 2656 switch (insn->dType) { 2657 case TYPE_U32: dType = 0; break; 2658 case TYPE_S32: dType = 1; break; 2659 case TYPE_U64: dType = 2; break; 2660 case TYPE_F32: dType = 3; break; 2661 case TYPE_B128: dType = 4; break; 2662 case TYPE_S64: dType = 5; break; 2663 default: assert(!"unexpected dType"); dType = 0; break; 2664 } 2665 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) 2666 subOp = 8; 2667 else 2668 subOp = insn->subOp; 2669 2670 emitInsn (0xed000000); 2671 } 2672 2673 emitField(0x34, 4, subOp); 2674 emitField(0x31, 3, dType); 2675 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2676 emitGPR (0x14, insn->src(1)); 2677 emitADDR (0x08, 0x1c, 20, 0, insn->src(0)); 2678 emitGPR (0x00, insn->def(0)); 2679} 2680 2681void 2682CodeEmitterGM107::emitATOMS() 2683{ 2684 unsigned dType, subOp; 2685 2686 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 2687 switch (insn->dType) { 2688 case TYPE_U32: dType = 0; break; 2689 case TYPE_U64: dType = 1; break; 2690 default: assert(!"unexpected dType"); dType = 0; break; 2691 } 2692 subOp = 4; 2693 2694 emitInsn (0xee000000); 2695 emitField(0x34, 1, dType); 2696 } else { 2697 switch (insn->dType) { 2698 case TYPE_U32: dType = 0; break; 2699 case TYPE_S32: dType = 1; break; 2700 case TYPE_U64: dType = 2; break; 2701 case TYPE_S64: dType = 3; break; 2702 default: assert(!"unexpected dType"); dType = 0; break; 2703 } 2704 2705 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) 2706 subOp = 8; 2707 else 2708 subOp = insn->subOp; 2709 2710 emitInsn (0xec000000); 2711 emitField(0x1c, 3, dType); 2712 } 2713 2714 emitField(0x34, 4, subOp); 2715 emitGPR (0x14, insn->src(1)); 2716 emitADDR (0x08, 0x1e, 22, 2, insn->src(0)); 2717 emitGPR (0x00, insn->def(0)); 2718} 2719 2720void 2721CodeEmitterGM107::emitRED() 2722{ 2723 unsigned dType; 2724 2725 switch (insn->dType) { 2726 case TYPE_U32: dType = 0; break; 2727 case TYPE_S32: dType = 1; break; 2728 case TYPE_U64: dType = 2; break; 2729 case TYPE_F32: dType = 3; break; 2730 case TYPE_B128: dType = 4; break; 2731 case TYPE_S64: dType = 5; break; 2732 default: assert(!"unexpected dType"); dType = 0; break; 2733 } 2734 2735 emitInsn (0xebf80000); 2736 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2737 emitField(0x17, 3, insn->subOp); 2738 emitField(0x14, 3, dType); 2739 emitADDR (0x08, 0x1c, 20, 0, insn->src(0)); 2740 emitGPR (0x00, insn->src(1)); 2741} 2742 2743void 2744CodeEmitterGM107::emitCCTL() 2745{ 2746 unsigned width; 2747 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { 2748 emitInsn(0xef600000); 2749 width = 30; 2750 } else { 2751 emitInsn(0xef800000); 2752 width = 22; 2753 } 2754 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2755 emitADDR (0x08, 0x16, width, 2, insn->src(0)); 2756 emitField(0x00, 4, insn->subOp); 2757} 2758 2759/******************************************************************************* 2760 * surface 2761 ******************************************************************************/ 2762 2763void 2764CodeEmitterGM107::emitPIXLD() 2765{ 2766 emitInsn (0xefe80000); 2767 emitPRED (0x2d); 2768 emitField(0x1f, 3, insn->subOp); 2769 emitGPR (0x08, insn->src(0)); 2770 emitGPR (0x00, insn->def(0)); 2771} 2772 2773/******************************************************************************* 2774 * texture 2775 ******************************************************************************/ 2776 2777void 2778CodeEmitterGM107::emitTEXs(int pos) 2779{ 2780 int src1 = insn->predSrc == 1 ? 2 : 1; 2781 if (insn->srcExists(src1)) 2782 emitGPR(pos, insn->src(src1)); 2783 else 2784 emitGPR(pos); 2785} 2786 2787static uint8_t 2788getTEXSMask(uint8_t mask) 2789{ 2790 switch (mask) { 2791 case 0x1: return 0x0; 2792 case 0x2: return 0x1; 2793 case 0x3: return 0x4; 2794 case 0x4: return 0x2; 2795 case 0x7: return 0x0; 2796 case 0x8: return 0x3; 2797 case 0x9: return 0x5; 2798 case 0xa: return 0x6; 2799 case 0xb: return 0x1; 2800 case 0xc: return 0x7; 2801 case 0xd: return 0x2; 2802 case 0xe: return 0x3; 2803 case 0xf: return 0x4; 2804 default: 2805 assert(!"invalid mask"); 2806 return 0; 2807 } 2808} 2809 2810static uint8_t 2811getTEXSTarget(const TexInstruction *tex) 2812{ 2813 assert(tex->op == OP_TEX || tex->op == OP_TXL); 2814 2815 switch (tex->tex.target.getEnum()) { 2816 case TEX_TARGET_1D: 2817 assert(tex->tex.levelZero); 2818 return 0x0; 2819 case TEX_TARGET_2D: 2820 case TEX_TARGET_RECT: 2821 if (tex->tex.levelZero) 2822 return 0x2; 2823 if (tex->op == OP_TXL) 2824 return 0x3; 2825 return 0x1; 2826 case TEX_TARGET_2D_SHADOW: 2827 case TEX_TARGET_RECT_SHADOW: 2828 if (tex->tex.levelZero) 2829 return 0x6; 2830 if (tex->op == OP_TXL) 2831 return 0x5; 2832 return 0x4; 2833 case TEX_TARGET_2D_ARRAY: 2834 if (tex->tex.levelZero) 2835 return 0x8; 2836 return 0x7; 2837 case TEX_TARGET_2D_ARRAY_SHADOW: 2838 assert(tex->tex.levelZero); 2839 return 0x9; 2840 case TEX_TARGET_3D: 2841 if (tex->tex.levelZero) 2842 return 0xb; 2843 assert(tex->op != OP_TXL); 2844 return 0xa; 2845 case TEX_TARGET_CUBE: 2846 assert(!tex->tex.levelZero); 2847 if (tex->op == OP_TXL) 2848 return 0xd; 2849 return 0xc; 2850 default: 2851 assert(false); 2852 return 0x0; 2853 } 2854} 2855 2856static uint8_t 2857getTLDSTarget(const TexInstruction *tex) 2858{ 2859 switch (tex->tex.target.getEnum()) { 2860 case TEX_TARGET_1D: 2861 if (tex->tex.levelZero) 2862 return 0x0; 2863 return 0x1; 2864 case TEX_TARGET_2D: 2865 case TEX_TARGET_RECT: 2866 if (tex->tex.levelZero) 2867 return tex->tex.useOffsets ? 0x4 : 0x2; 2868 return tex->tex.useOffsets ? 0xc : 0x5; 2869 case TEX_TARGET_2D_MS: 2870 assert(tex->tex.levelZero); 2871 return 0x6; 2872 case TEX_TARGET_3D: 2873 assert(tex->tex.levelZero); 2874 return 0x7; 2875 case TEX_TARGET_2D_ARRAY: 2876 assert(tex->tex.levelZero); 2877 return 0x8; 2878 2879 default: 2880 assert(false); 2881 return 0x0; 2882 } 2883} 2884 2885void 2886CodeEmitterGM107::emitTEX() 2887{ 2888 const TexInstruction *insn = this->insn->asTex(); 2889 int lodm = 0; 2890 2891 if (!insn->tex.levelZero) { 2892 switch (insn->op) { 2893 case OP_TEX: lodm = 0; break; 2894 case OP_TXB: lodm = 2; break; 2895 case OP_TXL: lodm = 3; break; 2896 default: 2897 assert(!"invalid tex op"); 2898 break; 2899 } 2900 } else { 2901 lodm = 1; 2902 } 2903 2904 if (insn->tex.rIndirectSrc >= 0) { 2905 emitInsn (0xdeb80000); 2906 emitField(0x25, 2, lodm); 2907 emitField(0x24, 1, insn->tex.useOffsets == 1); 2908 } else { 2909 emitInsn (0xc0380000); 2910 emitField(0x37, 2, lodm); 2911 emitField(0x36, 1, insn->tex.useOffsets == 1); 2912 emitField(0x24, 13, insn->tex.r); 2913 } 2914 2915 emitField(0x32, 1, insn->tex.target.isShadow()); 2916 emitField(0x31, 1, insn->tex.liveOnly); 2917 emitField(0x23, 1, insn->tex.derivAll); 2918 emitField(0x1f, 4, insn->tex.mask); 2919 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2920 insn->tex.target.getDim() - 1); 2921 emitField(0x1c, 1, insn->tex.target.isArray()); 2922 emitTEXs (0x14); 2923 emitGPR (0x08, insn->src(0)); 2924 emitGPR (0x00, insn->def(0)); 2925} 2926 2927void 2928CodeEmitterGM107::emitTEXS() 2929{ 2930 const TexInstruction *insn = this->insn->asTex(); 2931 assert(!insn->tex.derivAll); 2932 2933 switch (insn->op) { 2934 case OP_TEX: 2935 case OP_TXL: 2936 emitInsn (0xd8000000); 2937 emitField(0x35, 4, getTEXSTarget(insn)); 2938 emitField(0x32, 3, getTEXSMask(insn->tex.mask)); 2939 break; 2940 case OP_TXF: 2941 emitInsn (0xda000000); 2942 emitField(0x35, 4, getTLDSTarget(insn)); 2943 emitField(0x32, 3, getTEXSMask(insn->tex.mask)); 2944 break; 2945 case OP_TXG: 2946 assert(insn->tex.useOffsets != 4); 2947 emitInsn (0xdf000000); 2948 emitField(0x34, 2, insn->tex.gatherComp); 2949 emitField(0x33, 1, insn->tex.useOffsets == 1); 2950 emitField(0x32, 1, insn->tex.target.isShadow()); 2951 break; 2952 default: 2953 unreachable("unknown op in emitTEXS()"); 2954 break; 2955 } 2956 2957 emitField(0x31, 1, insn->tex.liveOnly); 2958 emitField(0x24, 13, insn->tex.r); 2959 if (insn->defExists(1)) 2960 emitGPR(0x1c, insn->def(1)); 2961 else 2962 emitGPR(0x1c); 2963 if (insn->srcExists(1)) 2964 emitGPR(0x14, insn->getSrc(1)); 2965 else 2966 emitGPR(0x14); 2967 emitGPR (0x08, insn->src(0)); 2968 emitGPR (0x00, insn->def(0)); 2969} 2970 2971void 2972CodeEmitterGM107::emitTLD() 2973{ 2974 const TexInstruction *insn = this->insn->asTex(); 2975 2976 if (insn->tex.rIndirectSrc >= 0) { 2977 emitInsn (0xdd380000); 2978 } else { 2979 emitInsn (0xdc380000); 2980 emitField(0x24, 13, insn->tex.r); 2981 } 2982 2983 emitField(0x37, 1, insn->tex.levelZero == 0); 2984 emitField(0x32, 1, insn->tex.target.isMS()); 2985 emitField(0x31, 1, insn->tex.liveOnly); 2986 emitField(0x23, 1, insn->tex.useOffsets == 1); 2987 emitField(0x1f, 4, insn->tex.mask); 2988 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2989 insn->tex.target.getDim() - 1); 2990 emitField(0x1c, 1, insn->tex.target.isArray()); 2991 emitTEXs (0x14); 2992 emitGPR (0x08, insn->src(0)); 2993 emitGPR (0x00, insn->def(0)); 2994} 2995 2996void 2997CodeEmitterGM107::emitTLD4() 2998{ 2999 const TexInstruction *insn = this->insn->asTex(); 3000 3001 if (insn->tex.rIndirectSrc >= 0) { 3002 emitInsn (0xdef80000); 3003 emitField(0x26, 2, insn->tex.gatherComp); 3004 emitField(0x25, 2, insn->tex.useOffsets == 4); 3005 emitField(0x24, 2, insn->tex.useOffsets == 1); 3006 } else { 3007 emitInsn (0xc8380000); 3008 emitField(0x38, 2, insn->tex.gatherComp); 3009 emitField(0x37, 2, insn->tex.useOffsets == 4); 3010 emitField(0x36, 2, insn->tex.useOffsets == 1); 3011 emitField(0x24, 13, insn->tex.r); 3012 } 3013 3014 emitField(0x32, 1, insn->tex.target.isShadow()); 3015 emitField(0x31, 1, insn->tex.liveOnly); 3016 emitField(0x23, 1, insn->tex.derivAll); 3017 emitField(0x1f, 4, insn->tex.mask); 3018 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 3019 insn->tex.target.getDim() - 1); 3020 emitField(0x1c, 1, insn->tex.target.isArray()); 3021 emitTEXs (0x14); 3022 emitGPR (0x08, insn->src(0)); 3023 emitGPR (0x00, insn->def(0)); 3024} 3025 3026void 3027CodeEmitterGM107::emitTXD() 3028{ 3029 const TexInstruction *insn = this->insn->asTex(); 3030 3031 if (insn->tex.rIndirectSrc >= 0) { 3032 emitInsn (0xde780000); 3033 } else { 3034 emitInsn (0xde380000); 3035 emitField(0x24, 13, insn->tex.r); 3036 } 3037 3038 emitField(0x31, 1, insn->tex.liveOnly); 3039 emitField(0x23, 1, insn->tex.useOffsets == 1); 3040 emitField(0x1f, 4, insn->tex.mask); 3041 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 3042 insn->tex.target.getDim() - 1); 3043 emitField(0x1c, 1, insn->tex.target.isArray()); 3044 emitTEXs (0x14); 3045 emitGPR (0x08, insn->src(0)); 3046 emitGPR (0x00, insn->def(0)); 3047} 3048 3049void 3050CodeEmitterGM107::emitTMML() 3051{ 3052 const TexInstruction *insn = this->insn->asTex(); 3053 3054 if (insn->tex.rIndirectSrc >= 0) { 3055 emitInsn (0xdf600000); 3056 } else { 3057 emitInsn (0xdf580000); 3058 emitField(0x24, 13, insn->tex.r); 3059 } 3060 3061 emitField(0x31, 1, insn->tex.liveOnly); 3062 emitField(0x23, 1, insn->tex.derivAll); 3063 emitField(0x1f, 4, insn->tex.mask); 3064 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 3065 insn->tex.target.getDim() - 1); 3066 emitField(0x1c, 1, insn->tex.target.isArray()); 3067 emitTEXs (0x14); 3068 emitGPR (0x08, insn->src(0)); 3069 emitGPR (0x00, insn->def(0)); 3070} 3071 3072void 3073CodeEmitterGM107::emitTXQ() 3074{ 3075 const TexInstruction *insn = this->insn->asTex(); 3076 int type = 0; 3077 3078 switch (insn->tex.query) { 3079 case TXQ_DIMS : type = 0x01; break; 3080 case TXQ_TYPE : type = 0x02; break; 3081 case TXQ_SAMPLE_POSITION: type = 0x05; break; 3082 case TXQ_FILTER : type = 0x10; break; 3083 case TXQ_LOD : type = 0x12; break; 3084 case TXQ_WRAP : type = 0x14; break; 3085 case TXQ_BORDER_COLOUR : type = 0x16; break; 3086 default: 3087 assert(!"invalid txq query"); 3088 break; 3089 } 3090 3091 if (insn->tex.rIndirectSrc >= 0) { 3092 emitInsn (0xdf500000); 3093 } else { 3094 emitInsn (0xdf480000); 3095 emitField(0x24, 13, insn->tex.r); 3096 } 3097 3098 emitField(0x31, 1, insn->tex.liveOnly); 3099 emitField(0x1f, 4, insn->tex.mask); 3100 emitField(0x16, 6, type); 3101 emitGPR (0x08, insn->src(0)); 3102 emitGPR (0x00, insn->def(0)); 3103} 3104 3105void 3106CodeEmitterGM107::emitDEPBAR() 3107{ 3108 emitInsn (0xf0f00000); 3109 emitField(0x1d, 1, 1); /* le */ 3110 emitField(0x1a, 3, 5); 3111 emitField(0x14, 6, insn->subOp); 3112 emitField(0x00, 6, insn->subOp); 3113} 3114 3115/******************************************************************************* 3116 * misc 3117 ******************************************************************************/ 3118 3119void 3120CodeEmitterGM107::emitNOP() 3121{ 3122 emitInsn(0x50b00000); 3123} 3124 3125void 3126CodeEmitterGM107::emitKIL() 3127{ 3128 emitInsn (0xe3300000); 3129 emitCond5(0x00, CC_TR); 3130} 3131 3132void 3133CodeEmitterGM107::emitOUT() 3134{ 3135 const int cut = insn->op == OP_RESTART || insn->subOp; 3136 const int emit = insn->op == OP_EMIT; 3137 3138 switch (insn->src(1).getFile()) { 3139 case FILE_GPR: 3140 emitInsn(0xfbe00000); 3141 emitGPR (0x14, insn->src(1)); 3142 break; 3143 case FILE_IMMEDIATE: 3144 emitInsn(0xf6e00000); 3145 emitIMMD(0x14, 19, insn->src(1)); 3146 break; 3147 case FILE_MEMORY_CONST: 3148 emitInsn(0xebe00000); 3149 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 3150 break; 3151 default: 3152 assert(!"bad src1 file"); 3153 break; 3154 } 3155 3156 emitField(0x27, 2, (cut << 1) | emit); 3157 emitGPR (0x08, insn->src(0)); 3158 emitGPR (0x00, insn->def(0)); 3159} 3160 3161void 3162CodeEmitterGM107::emitBAR() 3163{ 3164 uint8_t subop; 3165 3166 emitInsn (0xf0a80000); 3167 3168 switch (insn->subOp) { 3169 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break; 3170 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break; 3171 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break; 3172 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break; 3173 default: 3174 subop = 0x80; 3175 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC); 3176 break; 3177 } 3178 3179 emitField(0x20, 8, subop); 3180 3181 // barrier id 3182 if (insn->src(0).getFile() == FILE_GPR) { 3183 emitGPR(0x08, insn->src(0)); 3184 } else { 3185 ImmediateValue *imm = insn->getSrc(0)->asImm(); 3186 assert(imm); 3187 emitField(0x08, 8, imm->reg.data.u32); 3188 emitField(0x2b, 1, 1); 3189 } 3190 3191 // thread count 3192 if (insn->src(1).getFile() == FILE_GPR) { 3193 emitGPR(0x14, insn->src(1)); 3194 } else { 3195 ImmediateValue *imm = insn->getSrc(0)->asImm(); 3196 assert(imm); 3197 emitField(0x14, 12, imm->reg.data.u32); 3198 emitField(0x2c, 1, 1); 3199 } 3200 3201 if (insn->srcExists(2) && (insn->predSrc != 2)) { 3202 emitPRED (0x27, insn->src(2)); 3203 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT)); 3204 } else { 3205 emitField(0x27, 3, 7); 3206 } 3207} 3208 3209void 3210CodeEmitterGM107::emitMEMBAR() 3211{ 3212 emitInsn (0xef980000); 3213 emitField(0x08, 2, insn->subOp >> 2); 3214} 3215 3216void 3217CodeEmitterGM107::emitVOTE() 3218{ 3219 const ImmediateValue *imm; 3220 uint32_t u32; 3221 3222 int r = -1, p = -1; 3223 for (int i = 0; insn->defExists(i); i++) { 3224 if (insn->def(i).getFile() == FILE_GPR) 3225 r = i; 3226 else if (insn->def(i).getFile() == FILE_PREDICATE) 3227 p = i; 3228 } 3229 3230 emitInsn (0x50d80000); 3231 emitField(0x30, 2, insn->subOp); 3232 if (r >= 0) 3233 emitGPR (0x00, insn->def(r)); 3234 else 3235 emitGPR (0x00); 3236 if (p >= 0) 3237 emitPRED (0x2d, insn->def(p)); 3238 else 3239 emitPRED (0x2d); 3240 3241 switch (insn->src(0).getFile()) { 3242 case FILE_PREDICATE: 3243 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); 3244 emitPRED (0x27, insn->src(0)); 3245 break; 3246 case FILE_IMMEDIATE: 3247 imm = insn->getSrc(0)->asImm(); 3248 assert(imm); 3249 u32 = imm->reg.data.u32; 3250 assert(u32 == 0 || u32 == 1); 3251 emitPRED(0x27); 3252 emitField(0x2a, 1, u32 == 0); 3253 break; 3254 default: 3255 assert(!"Unhandled src"); 3256 break; 3257 } 3258} 3259 3260void 3261CodeEmitterGM107::emitSUTarget() 3262{ 3263 const TexInstruction *insn = this->insn->asTex(); 3264 int target = 0; 3265 3266 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); 3267 3268 if (insn->tex.target == TEX_TARGET_BUFFER) { 3269 target = 2; 3270 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { 3271 target = 4; 3272 } else if (insn->tex.target == TEX_TARGET_2D || 3273 insn->tex.target == TEX_TARGET_RECT) { 3274 target = 6; 3275 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || 3276 insn->tex.target == TEX_TARGET_CUBE || 3277 insn->tex.target == TEX_TARGET_CUBE_ARRAY) { 3278 target = 8; 3279 } else if (insn->tex.target == TEX_TARGET_3D) { 3280 target = 10; 3281 } else { 3282 assert(insn->tex.target == TEX_TARGET_1D); 3283 } 3284 emitField(0x20, 4, target); 3285} 3286 3287void 3288CodeEmitterGM107::emitSUHandle(const int s) 3289{ 3290 const TexInstruction *insn = this->insn->asTex(); 3291 3292 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); 3293 3294 if (insn->src(s).getFile() == FILE_GPR) { 3295 emitGPR(0x27, insn->src(s)); 3296 } else { 3297 ImmediateValue *imm = insn->getSrc(s)->asImm(); 3298 assert(imm); 3299 emitField(0x33, 1, 1); 3300 emitField(0x24, 13, imm->reg.data.u32); 3301 } 3302} 3303 3304void 3305CodeEmitterGM107::emitSUSTx() 3306{ 3307 const TexInstruction *insn = this->insn->asTex(); 3308 3309 emitInsn(0xeb200000); 3310 if (insn->op == OP_SUSTB) 3311 emitField(0x34, 1, 1); 3312 emitSUTarget(); 3313 3314 emitLDSTc(0x18); 3315 emitField(0x14, 4, 0xf); // rgba 3316 emitGPR (0x08, insn->src(0)); 3317 emitGPR (0x00, insn->src(1)); 3318 3319 emitSUHandle(2); 3320} 3321 3322void 3323CodeEmitterGM107::emitSULDx() 3324{ 3325 const TexInstruction *insn = this->insn->asTex(); 3326 int type = 0; 3327 3328 emitInsn(0xeb000000); 3329 if (insn->op == OP_SULDB) 3330 emitField(0x34, 1, 1); 3331 emitSUTarget(); 3332 3333 switch (insn->dType) { 3334 case TYPE_S8: type = 1; break; 3335 case TYPE_U16: type = 2; break; 3336 case TYPE_S16: type = 3; break; 3337 case TYPE_U32: type = 4; break; 3338 case TYPE_U64: type = 5; break; 3339 case TYPE_B128: type = 6; break; 3340 default: 3341 assert(insn->dType == TYPE_U8); 3342 break; 3343 } 3344 emitLDSTc(0x18); 3345 emitField(0x14, 3, type); 3346 emitGPR (0x00, insn->def(0)); 3347 emitGPR (0x08, insn->src(0)); 3348 3349 emitSUHandle(1); 3350} 3351 3352void 3353CodeEmitterGM107::emitSUREDx() 3354{ 3355 const TexInstruction *insn = this->insn->asTex(); 3356 uint8_t type = 0, subOp; 3357 3358 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) 3359 emitInsn(0xeac00000); 3360 else 3361 emitInsn(0xea600000); 3362 3363 if (insn->op == OP_SUREDB) 3364 emitField(0x34, 1, 1); 3365 emitSUTarget(); 3366 3367 // destination type 3368 switch (insn->dType) { 3369 case TYPE_S32: type = 1; break; 3370 case TYPE_U64: type = 2; break; 3371 case TYPE_F32: type = 3; break; 3372 case TYPE_S64: type = 5; break; 3373 default: 3374 assert(insn->dType == TYPE_U32); 3375 break; 3376 } 3377 3378 // atomic operation 3379 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 3380 subOp = 0; 3381 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 3382 subOp = 8; 3383 } else { 3384 subOp = insn->subOp; 3385 } 3386 3387 emitField(0x24, 3, type); 3388 emitField(0x1d, 4, subOp); 3389 emitGPR (0x14, insn->src(1)); 3390 emitGPR (0x08, insn->src(0)); 3391 emitGPR (0x00, insn->def(0)); 3392 3393 emitSUHandle(2); 3394} 3395 3396/******************************************************************************* 3397 * assembler front-end 3398 ******************************************************************************/ 3399 3400bool 3401CodeEmitterGM107::emitInstruction(Instruction *i) 3402{ 3403 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8; 3404 bool ret = true; 3405 3406 insn = i; 3407 3408 if (insn->encSize != 8) { 3409 ERROR("skipping undecodable instruction: "); insn->print(); 3410 return false; 3411 } else 3412 if (codeSize + size > codeSizeLimit) { 3413 ERROR("code emitter output buffer too small\n"); 3414 return false; 3415 } 3416 3417 if (writeIssueDelays) { 3418 int n = ((codeSize & 0x1f) / 8) - 1; 3419 if (n < 0) { 3420 data = code; 3421 data[0] = 0x00000000; 3422 data[1] = 0x00000000; 3423 code += 2; 3424 codeSize += 8; 3425 n++; 3426 } 3427 3428 emitField(data, n * 21, 21, insn->sched); 3429 } 3430 3431 switch (insn->op) { 3432 case OP_EXIT: 3433 emitEXIT(); 3434 break; 3435 case OP_BRA: 3436 emitBRA(); 3437 break; 3438 case OP_CALL: 3439 emitCAL(); 3440 break; 3441 case OP_PRECONT: 3442 emitPCNT(); 3443 break; 3444 case OP_CONT: 3445 emitCONT(); 3446 break; 3447 case OP_PREBREAK: 3448 emitPBK(); 3449 break; 3450 case OP_BREAK: 3451 emitBRK(); 3452 break; 3453 case OP_PRERET: 3454 emitPRET(); 3455 break; 3456 case OP_RET: 3457 emitRET(); 3458 break; 3459 case OP_JOINAT: 3460 emitSSY(); 3461 break; 3462 case OP_JOIN: 3463 emitSYNC(); 3464 break; 3465 case OP_QUADON: 3466 emitSAM(); 3467 break; 3468 case OP_QUADPOP: 3469 emitRAM(); 3470 break; 3471 case OP_MOV: 3472 emitMOV(); 3473 break; 3474 case OP_RDSV: 3475 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv)) 3476 emitCS2R(); 3477 else 3478 emitS2R(); 3479 break; 3480 case OP_ABS: 3481 case OP_NEG: 3482 case OP_SAT: 3483 case OP_FLOOR: 3484 case OP_CEIL: 3485 case OP_TRUNC: 3486 case OP_CVT: 3487 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || 3488 insn->src(0).getFile() == FILE_PREDICATE)) { 3489 emitMOV(); 3490 } else if (isFloatType(insn->dType)) { 3491 if (isFloatType(insn->sType)) 3492 emitF2F(); 3493 else 3494 emitI2F(); 3495 } else { 3496 if (isFloatType(insn->sType)) 3497 emitF2I(); 3498 else 3499 emitI2I(); 3500 } 3501 break; 3502 case OP_SHFL: 3503 emitSHFL(); 3504 break; 3505 case OP_ADD: 3506 case OP_SUB: 3507 if (isFloatType(insn->dType)) { 3508 if (insn->dType == TYPE_F64) 3509 emitDADD(); 3510 else 3511 emitFADD(); 3512 } else { 3513 emitIADD(); 3514 } 3515 break; 3516 case OP_MUL: 3517 if (isFloatType(insn->dType)) { 3518 if (insn->dType == TYPE_F64) 3519 emitDMUL(); 3520 else 3521 emitFMUL(); 3522 } else { 3523 emitIMUL(); 3524 } 3525 break; 3526 case OP_MAD: 3527 case OP_FMA: 3528 if (isFloatType(insn->dType)) { 3529 if (insn->dType == TYPE_F64) 3530 emitDFMA(); 3531 else 3532 emitFFMA(); 3533 } else { 3534 emitIMAD(); 3535 } 3536 break; 3537 case OP_SHLADD: 3538 emitISCADD(); 3539 break; 3540 case OP_XMAD: 3541 emitXMAD(); 3542 break; 3543 case OP_MIN: 3544 case OP_MAX: 3545 if (isFloatType(insn->dType)) { 3546 if (insn->dType == TYPE_F64) 3547 emitDMNMX(); 3548 else 3549 emitFMNMX(); 3550 } else { 3551 emitIMNMX(); 3552 } 3553 break; 3554 case OP_SHL: 3555 if (typeSizeof(insn->sType) == 8) 3556 emitSHF(); 3557 else 3558 emitSHL(); 3559 break; 3560 case OP_SHR: 3561 if (typeSizeof(insn->sType) == 8) 3562 emitSHF(); 3563 else 3564 emitSHR(); 3565 break; 3566 case OP_POPCNT: 3567 emitPOPC(); 3568 break; 3569 case OP_INSBF: 3570 emitBFI(); 3571 break; 3572 case OP_EXTBF: 3573 emitBFE(); 3574 break; 3575 case OP_BFIND: 3576 emitFLO(); 3577 break; 3578 case OP_PERMT: 3579 emitPRMT(); 3580 break; 3581 case OP_SLCT: 3582 if (isFloatType(insn->dType)) 3583 emitFCMP(); 3584 else 3585 emitICMP(); 3586 break; 3587 case OP_SET: 3588 case OP_SET_AND: 3589 case OP_SET_OR: 3590 case OP_SET_XOR: 3591 if (insn->def(0).getFile() != FILE_PREDICATE) { 3592 if (isFloatType(insn->sType)) 3593 if (insn->sType == TYPE_F64) 3594 emitDSET(); 3595 else 3596 emitFSET(); 3597 else 3598 emitISET(); 3599 } else { 3600 if (isFloatType(insn->sType)) 3601 if (insn->sType == TYPE_F64) 3602 emitDSETP(); 3603 else 3604 emitFSETP(); 3605 else 3606 emitISETP(); 3607 } 3608 break; 3609 case OP_SELP: 3610 emitSEL(); 3611 break; 3612 case OP_PRESIN: 3613 case OP_PREEX2: 3614 emitRRO(); 3615 break; 3616 case OP_COS: 3617 case OP_SIN: 3618 case OP_EX2: 3619 case OP_LG2: 3620 case OP_RCP: 3621 case OP_RSQ: 3622 case OP_SQRT: 3623 emitMUFU(); 3624 break; 3625 case OP_AND: 3626 case OP_OR: 3627 case OP_XOR: 3628 switch (insn->def(0).getFile()) { 3629 case FILE_GPR: emitLOP(); break; 3630 case FILE_PREDICATE: emitPSETP(); break; 3631 default: 3632 assert(!"invalid bool op"); 3633 } 3634 break; 3635 case OP_NOT: 3636 emitNOT(); 3637 break; 3638 case OP_LOAD: 3639 switch (insn->src(0).getFile()) { 3640 case FILE_MEMORY_CONST : emitLDC(); break; 3641 case FILE_MEMORY_LOCAL : emitLDL(); break; 3642 case FILE_MEMORY_SHARED: emitLDS(); break; 3643 case FILE_MEMORY_GLOBAL: emitLD(); break; 3644 default: 3645 assert(!"invalid load"); 3646 emitNOP(); 3647 break; 3648 } 3649 break; 3650 case OP_STORE: 3651 switch (insn->src(0).getFile()) { 3652 case FILE_MEMORY_LOCAL : emitSTL(); break; 3653 case FILE_MEMORY_SHARED: emitSTS(); break; 3654 case FILE_MEMORY_GLOBAL: emitST(); break; 3655 default: 3656 assert(!"invalid store"); 3657 emitNOP(); 3658 break; 3659 } 3660 break; 3661 case OP_ATOM: 3662 if (insn->src(0).getFile() == FILE_MEMORY_SHARED) 3663 emitATOMS(); 3664 else 3665 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS) 3666 emitRED(); 3667 else 3668 emitATOM(); 3669 break; 3670 case OP_CCTL: 3671 emitCCTL(); 3672 break; 3673 case OP_VFETCH: 3674 emitALD(); 3675 break; 3676 case OP_EXPORT: 3677 emitAST(); 3678 break; 3679 case OP_PFETCH: 3680 emitISBERD(); 3681 break; 3682 case OP_AFETCH: 3683 emitAL2P(); 3684 break; 3685 case OP_LINTERP: 3686 case OP_PINTERP: 3687 emitIPA(); 3688 break; 3689 case OP_PIXLD: 3690 emitPIXLD(); 3691 break; 3692 case OP_TEX: 3693 case OP_TXL: 3694 if (insn->asTex()->tex.scalar) 3695 emitTEXS(); 3696 else 3697 emitTEX(); 3698 break; 3699 case OP_TXB: 3700 emitTEX(); 3701 break; 3702 case OP_TXF: 3703 if (insn->asTex()->tex.scalar) 3704 emitTEXS(); 3705 else 3706 emitTLD(); 3707 break; 3708 case OP_TXG: 3709 if (insn->asTex()->tex.scalar) 3710 emitTEXS(); 3711 else 3712 emitTLD4(); 3713 break; 3714 case OP_TXD: 3715 emitTXD(); 3716 break; 3717 case OP_TXQ: 3718 emitTXQ(); 3719 break; 3720 case OP_TXLQ: 3721 emitTMML(); 3722 break; 3723 case OP_TEXBAR: 3724 emitDEPBAR(); 3725 break; 3726 case OP_QUADOP: 3727 emitFSWZADD(); 3728 break; 3729 case OP_NOP: 3730 emitNOP(); 3731 break; 3732 case OP_DISCARD: 3733 emitKIL(); 3734 break; 3735 case OP_EMIT: 3736 case OP_RESTART: 3737 emitOUT(); 3738 break; 3739 case OP_BAR: 3740 emitBAR(); 3741 break; 3742 case OP_MEMBAR: 3743 emitMEMBAR(); 3744 break; 3745 case OP_VOTE: 3746 emitVOTE(); 3747 break; 3748 case OP_SUSTB: 3749 case OP_SUSTP: 3750 emitSUSTx(); 3751 break; 3752 case OP_SULDB: 3753 case OP_SULDP: 3754 emitSULDx(); 3755 break; 3756 case OP_SUREDB: 3757 case OP_SUREDP: 3758 emitSUREDx(); 3759 break; 3760 default: 3761 assert(!"invalid opcode"); 3762 emitNOP(); 3763 ret = false; 3764 break; 3765 } 3766 3767 if (insn->join) { 3768 /*XXX*/ 3769 } 3770 3771 code += 2; 3772 codeSize += 8; 3773 return ret; 3774} 3775 3776uint32_t 3777CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const 3778{ 3779 return 8; 3780} 3781 3782/******************************************************************************* 3783 * sched data calculator 3784 ******************************************************************************/ 3785 3786inline void 3787SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt) 3788{ 3789 assert(cnt < 16); 3790 insn->sched |= cnt; 3791} 3792 3793inline void 3794SchedDataCalculatorGM107::emitYield(Instruction *insn) 3795{ 3796 insn->sched |= 1 << 4; 3797} 3798 3799inline void 3800SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id) 3801{ 3802 assert(id < 6); 3803 if ((insn->sched & 0xe0) == 0xe0) 3804 insn->sched ^= 0xe0; 3805 insn->sched |= id << 5; 3806} 3807 3808inline void 3809SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id) 3810{ 3811 assert(id < 6); 3812 if ((insn->sched & 0x700) == 0x700) 3813 insn->sched ^= 0x700; 3814 insn->sched |= id << 8; 3815} 3816 3817inline void 3818SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id) 3819{ 3820 assert(id < 6); 3821 insn->sched |= 1 << (11 + id); 3822} 3823 3824inline void 3825SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id) 3826{ 3827 assert(id < 4); 3828 insn->sched |= 1 << (17 + id); 3829} 3830 3831inline void 3832SchedDataCalculatorGM107::printSchedInfo(int cycle, 3833 const Instruction *insn) const 3834{ 3835 uint8_t st, yl, wr, rd, wt, ru; 3836 3837 st = (insn->sched & 0x00000f) >> 0; 3838 yl = (insn->sched & 0x000010) >> 4; 3839 wr = (insn->sched & 0x0000e0) >> 5; 3840 rd = (insn->sched & 0x000700) >> 8; 3841 wt = (insn->sched & 0x01f800) >> 11; 3842 ru = (insn->sched & 0x1e0000) >> 17; 3843 3844 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n", 3845 cycle, st, yl, wr, rd, wt, ru); 3846} 3847 3848inline int 3849SchedDataCalculatorGM107::getStall(const Instruction *insn) const 3850{ 3851 return insn->sched & 0xf; 3852} 3853 3854inline int 3855SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const 3856{ 3857 return (insn->sched & 0x0000e0) >> 5; 3858} 3859 3860inline int 3861SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const 3862{ 3863 return (insn->sched & 0x000700) >> 8; 3864} 3865 3866inline int 3867SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const 3868{ 3869 return (insn->sched & 0x01f800) >> 11; 3870} 3871 3872// Emit the reuse flag which allows to make use of the new memory hierarchy 3873// introduced since Maxwell, the operand reuse cache. 3874// 3875// It allows to reduce bank conflicts by caching operands. Each time you issue 3876// an instruction, that flag can tell the hw which operands are going to be 3877// re-used by the next instruction. Note that the next instruction has to use 3878// the same GPR id in the same operand slot. 3879void 3880SchedDataCalculatorGM107::setReuseFlag(Instruction *insn) 3881{ 3882 Instruction *next = insn->next; 3883 BitSet defs(255, true); 3884 3885 if (!targ->isReuseSupported(insn)) 3886 return; 3887 3888 for (int d = 0; insn->defExists(d); ++d) { 3889 const Value *def = insn->def(d).rep(); 3890 if (insn->def(d).getFile() != FILE_GPR) 3891 continue; 3892 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255) 3893 continue; 3894 defs.set(def->reg.data.id); 3895 } 3896 3897 for (int s = 0; insn->srcExists(s); s++) { 3898 const Value *src = insn->src(s).rep(); 3899 if (insn->src(s).getFile() != FILE_GPR) 3900 continue; 3901 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255) 3902 continue; 3903 if (defs.test(src->reg.data.id)) 3904 continue; 3905 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR) 3906 continue; 3907 if (src->reg.data.id != next->getSrc(s)->reg.data.id) 3908 continue; 3909 assert(s < 4); 3910 emitReuse(insn, s); 3911 } 3912} 3913 3914void 3915SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready) 3916{ 3917 int a = v->reg.data.id, b; 3918 3919 switch (v->reg.file) { 3920 case FILE_GPR: 3921 b = a + v->reg.size / 4; 3922 for (int r = a; r < b; ++r) 3923 score->rd.r[r] = ready; 3924 break; 3925 case FILE_PREDICATE: 3926 // To immediately use a predicate set by any instructions, the minimum 3927 // number of stall counts is 13. 3928 score->rd.p[a] = cycle + 13; 3929 break; 3930 case FILE_FLAGS: 3931 score->rd.c = ready; 3932 break; 3933 default: 3934 break; 3935 } 3936} 3937 3938void 3939SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const 3940{ 3941 int a = v->reg.data.id, b; 3942 int ready = cycle; 3943 3944 switch (v->reg.file) { 3945 case FILE_GPR: 3946 b = a + v->reg.size / 4; 3947 for (int r = a; r < b; ++r) 3948 ready = MAX2(ready, score->rd.r[r]); 3949 break; 3950 case FILE_PREDICATE: 3951 ready = MAX2(ready, score->rd.p[a]); 3952 break; 3953 case FILE_FLAGS: 3954 ready = MAX2(ready, score->rd.c); 3955 break; 3956 default: 3957 break; 3958 } 3959 if (cycle < ready) 3960 delay = MAX2(delay, ready - cycle); 3961} 3962 3963void 3964SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle) 3965{ 3966 const int ready = cycle + targ->getLatency(insn); 3967 3968 for (int d = 0; insn->defExists(d); ++d) 3969 recordWr(insn->getDef(d), cycle, ready); 3970 3971#ifdef GM107_DEBUG_SCHED_DATA 3972 score->print(cycle); 3973#endif 3974} 3975 3976#define GM107_MIN_ISSUE_DELAY 0x1 3977#define GM107_MAX_ISSUE_DELAY 0xf 3978 3979int 3980SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const 3981{ 3982 int delay = 0, ready = cycle; 3983 3984 for (int s = 0; insn->srcExists(s); ++s) 3985 checkRd(insn->getSrc(s), cycle, delay); 3986 3987 // TODO: make use of getReadLatency()! 3988 3989 return MAX2(delay, ready - cycle); 3990} 3991 3992void 3993SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay, 3994 const Instruction *next) 3995{ 3996 const OpClass cl = targ->getOpClass(insn->op); 3997 int wr, rd; 3998 3999 if (insn->op == OP_EXIT || 4000 insn->op == OP_BAR || 4001 insn->op == OP_MEMBAR) { 4002 delay = GM107_MAX_ISSUE_DELAY; 4003 } else 4004 if (insn->op == OP_QUADON || 4005 insn->op == OP_QUADPOP) { 4006 delay = 0xd; 4007 } else 4008 if (cl == OPCLASS_FLOW || insn->join) { 4009 delay = 0xd; 4010 } 4011 4012 if (!next || !targ->canDualIssue(insn, next)) { 4013 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY); 4014 } else { 4015 delay = 0x0; // dual-issue 4016 } 4017 4018 wr = getWrDepBar(insn); 4019 rd = getRdDepBar(insn); 4020 4021 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) { 4022 // Barriers take one additional clock cycle to become active on top of 4023 // the clock consumed by the instruction producing it. 4024 if (!next || insn->bb != next->bb) { 4025 delay = 0x2; 4026 } else { 4027 int wt = getWtDepBar(next); 4028 if ((wt & (1 << wr)) | (wt & (1 << rd))) 4029 delay = 0x2; 4030 } 4031 } 4032 4033 emitStall(insn, delay); 4034} 4035 4036 4037// Return true when the given instruction needs to emit a read dependency 4038// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and 4039// setting the maximum number of stall counts is not enough. 4040bool 4041SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const 4042{ 4043 BitSet srcs(255, true), defs(255, true); 4044 int a, b; 4045 4046 if (!targ->isBarrierRequired(insn)) 4047 return false; 4048 4049 // Do not emit a read dependency barrier when the instruction doesn't use 4050 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary. 4051 for (int s = 0; insn->srcExists(s); ++s) { 4052 const Value *src = insn->src(s).rep(); 4053 if (insn->src(s).getFile() != FILE_GPR) 4054 continue; 4055 if (src->reg.data.id == 255) 4056 continue; 4057 4058 a = src->reg.data.id; 4059 b = a + src->reg.size / 4; 4060 for (int r = a; r < b; ++r) 4061 srcs.set(r); 4062 } 4063 4064 if (!srcs.popCount()) 4065 return false; 4066 4067 // Do not emit a read dependency barrier when the output GPRs are equal to 4068 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will 4069 // be produced and WaR hazards are prevented. 4070 for (int d = 0; insn->defExists(d); ++d) { 4071 const Value *def = insn->def(d).rep(); 4072 if (insn->def(d).getFile() != FILE_GPR) 4073 continue; 4074 if (def->reg.data.id == 255) 4075 continue; 4076 4077 a = def->reg.data.id; 4078 b = a + def->reg.size / 4; 4079 for (int r = a; r < b; ++r) 4080 defs.set(r); 4081 } 4082 4083 srcs.andNot(defs); 4084 if (!srcs.popCount()) 4085 return false; 4086 4087 return true; 4088} 4089 4090// Return true when the given instruction needs to emit a write dependency 4091// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and 4092// setting the maximum number of stall counts is not enough. This is only legal 4093// if the instruction output something. 4094bool 4095SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const 4096{ 4097 if (!targ->isBarrierRequired(insn)) 4098 return false; 4099 4100 for (int d = 0; insn->defExists(d); ++d) { 4101 if (insn->def(d).getFile() == FILE_GPR || 4102 insn->def(d).getFile() == FILE_FLAGS || 4103 insn->def(d).getFile() == FILE_PREDICATE) 4104 return true; 4105 } 4106 return false; 4107} 4108 4109// Helper function for findFirstUse() and findFirstDef() 4110bool 4111SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn, 4112 const Value *val) const 4113{ 4114 if (val->reg.file != FILE_GPR && 4115 val->reg.file != FILE_PREDICATE && 4116 val->reg.file != FILE_FLAGS) 4117 return false; 4118 4119 for (int d = 0; insn->defExists(d); ++d) { 4120 const Value* def = insn->getDef(d); 4121 int minGPR = def->reg.data.id; 4122 int maxGPR = minGPR + def->reg.size / 4 - 1; 4123 4124 if (def->reg.file != val->reg.file) 4125 continue; 4126 4127 if (def->reg.file == FILE_GPR) { 4128 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR || 4129 val->reg.data.id > maxGPR) 4130 continue; 4131 return true; 4132 } else 4133 if (def->reg.file == FILE_PREDICATE) { 4134 if (val->reg.data.id != minGPR) 4135 continue; 4136 return true; 4137 } else 4138 if (def->reg.file == FILE_FLAGS) { 4139 if (val->reg.data.id != minGPR) 4140 continue; 4141 return true; 4142 } 4143 } 4144 4145 return false; 4146} 4147 4148// Find the next instruction inside the same basic block which uses (reads or 4149// writes from) the output of the given instruction in order to avoid RaW and 4150// WaW hazards. 4151Instruction * 4152SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const 4153{ 4154 Instruction *insn, *next; 4155 4156 if (!bari->defExists(0)) 4157 return NULL; 4158 4159 for (insn = bari->next; insn != NULL; insn = next) { 4160 next = insn->next; 4161 4162 for (int s = 0; insn->srcExists(s); ++s) 4163 if (doesInsnWriteTo(bari, insn->getSrc(s))) 4164 return insn; 4165 4166 for (int d = 0; insn->defExists(d); ++d) 4167 if (doesInsnWriteTo(bari, insn->getDef(d))) 4168 return insn; 4169 } 4170 return NULL; 4171} 4172 4173// Find the next instruction inside the same basic block which overwrites, at 4174// least, one source of the given instruction in order to avoid WaR hazards. 4175Instruction * 4176SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const 4177{ 4178 Instruction *insn, *next; 4179 4180 if (!bari->srcExists(0)) 4181 return NULL; 4182 4183 for (insn = bari->next; insn != NULL; insn = next) { 4184 next = insn->next; 4185 4186 for (int s = 0; bari->srcExists(s); ++s) 4187 if (doesInsnWriteTo(insn, bari->getSrc(s))) 4188 return insn; 4189 } 4190 return NULL; 4191} 4192 4193// Dependency barriers: 4194// This pass is a bit ugly and could probably be improved by performing a 4195// better allocation. 4196// 4197// The main idea is to avoid WaR and RaW hazards by emitting read/write 4198// dependency barriers using the control codes. 4199bool 4200SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb) 4201{ 4202 std::list<LiveBarUse> live_uses; 4203 std::list<LiveBarDef> live_defs; 4204 Instruction *insn, *next; 4205 BitSet bars(6, true); 4206 int bar_id; 4207 4208 for (insn = bb->getEntry(); insn != NULL; insn = next) { 4209 Instruction *usei = NULL, *defi = NULL; 4210 bool need_wr_bar, need_rd_bar; 4211 4212 next = insn->next; 4213 4214 // Expire old barrier uses. 4215 for (std::list<LiveBarUse>::iterator it = live_uses.begin(); 4216 it != live_uses.end();) { 4217 if (insn->serial >= it->usei->serial) { 4218 int wr = getWrDepBar(it->insn); 4219 emitWtDepBar(insn, wr); 4220 bars.clr(wr); // free barrier 4221 it = live_uses.erase(it); 4222 continue; 4223 } 4224 ++it; 4225 } 4226 4227 // Expire old barrier defs. 4228 for (std::list<LiveBarDef>::iterator it = live_defs.begin(); 4229 it != live_defs.end();) { 4230 if (insn->serial >= it->defi->serial) { 4231 int rd = getRdDepBar(it->insn); 4232 emitWtDepBar(insn, rd); 4233 bars.clr(rd); // free barrier 4234 it = live_defs.erase(it); 4235 continue; 4236 } 4237 ++it; 4238 } 4239 4240 need_wr_bar = needWrDepBar(insn); 4241 need_rd_bar = needRdDepBar(insn); 4242 4243 if (need_wr_bar) { 4244 // When the instruction requires to emit a write dependency barrier 4245 // (all which write something at a variable latency), find the next 4246 // instruction which reads the outputs (or writes to them, potentially 4247 // completing before this insn. 4248 usei = findFirstUse(insn); 4249 4250 // Allocate and emit a new barrier. 4251 bar_id = bars.findFreeRange(1); 4252 if (bar_id == -1) 4253 bar_id = 5; 4254 bars.set(bar_id); 4255 emitWrDepBar(insn, bar_id); 4256 if (usei) 4257 live_uses.push_back(LiveBarUse(insn, usei)); 4258 } 4259 4260 if (need_rd_bar) { 4261 // When the instruction requires to emit a read dependency barrier 4262 // (all which read something at a variable latency), find the next 4263 // instruction which will write the inputs. 4264 defi = findFirstDef(insn); 4265 4266 if (usei && defi && usei->serial <= defi->serial) 4267 continue; 4268 4269 // Allocate and emit a new barrier. 4270 bar_id = bars.findFreeRange(1); 4271 if (bar_id == -1) 4272 bar_id = 5; 4273 bars.set(bar_id); 4274 emitRdDepBar(insn, bar_id); 4275 if (defi) 4276 live_defs.push_back(LiveBarDef(insn, defi)); 4277 } 4278 } 4279 4280 // Remove unnecessary barrier waits. 4281 BitSet alive_bars(6, true); 4282 for (insn = bb->getEntry(); insn != NULL; insn = next) { 4283 int wr, rd, wt; 4284 4285 next = insn->next; 4286 4287 wr = getWrDepBar(insn); 4288 rd = getRdDepBar(insn); 4289 wt = getWtDepBar(insn); 4290 4291 for (int idx = 0; idx < 6; ++idx) { 4292 if (!(wt & (1 << idx))) 4293 continue; 4294 if (!alive_bars.test(idx)) { 4295 insn->sched &= ~(1 << (11 + idx)); 4296 } else { 4297 alive_bars.clr(idx); 4298 } 4299 } 4300 4301 if (wr < 6) 4302 alive_bars.set(wr); 4303 if (rd < 6) 4304 alive_bars.set(rd); 4305 } 4306 4307 return true; 4308} 4309 4310bool 4311SchedDataCalculatorGM107::visit(Function *func) 4312{ 4313 ArrayList insns; 4314 4315 func->orderInstructions(insns); 4316 4317 scoreBoards.resize(func->cfg.getSize()); 4318 for (size_t i = 0; i < scoreBoards.size(); ++i) 4319 scoreBoards[i].wipe(); 4320 return true; 4321} 4322 4323bool 4324SchedDataCalculatorGM107::visit(BasicBlock *bb) 4325{ 4326 Instruction *insn, *next = NULL; 4327 int cycle = 0; 4328 4329 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) { 4330 /*XXX*/ 4331 insn->sched = 0x7e0; 4332 } 4333 4334 if (!debug_get_bool_option("NV50_PROG_SCHED", true)) 4335 return true; 4336 4337 // Insert read/write dependency barriers for instructions which don't 4338 // operate at a fixed latency. 4339 insertBarriers(bb); 4340 4341 score = &scoreBoards.at(bb->getId()); 4342 4343 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 4344 // back branches will wait until all target dependencies are satisfied 4345 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized 4346 continue; 4347 BasicBlock *in = BasicBlock::get(ei.getNode()); 4348 score->setMax(&scoreBoards.at(in->getId())); 4349 } 4350 4351#ifdef GM107_DEBUG_SCHED_DATA 4352 INFO("=== BB:%i initial scores\n", bb->getId()); 4353 score->print(cycle); 4354#endif 4355 4356 // Because barriers are allocated locally (intra-BB), we have to make sure 4357 // that all produced barriers have been consumed before entering inside a 4358 // new basic block. The best way is to do a global allocation pre RA but 4359 // it's really more difficult, especially because of the phi nodes. Anyways, 4360 // it seems like that waiting on a barrier which has already been consumed 4361 // doesn't add any additional cost, it's just not elegant! 4362 Instruction *start = bb->getEntry(); 4363 if (start && bb->cfg.incidentCount() > 0) { 4364 for (int b = 0; b < 6; b++) 4365 emitWtDepBar(start, b); 4366 } 4367 4368 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) { 4369 next = insn->next; 4370 4371 commitInsn(insn, cycle); 4372 int delay = calcDelay(next, cycle); 4373 setDelay(insn, delay, next); 4374 cycle += getStall(insn); 4375 4376 setReuseFlag(insn); 4377 4378 // XXX: The yield flag seems to destroy a bunch of things when it is 4379 // set on every instruction, need investigation. 4380 //emitYield(insn); 4381 4382#ifdef GM107_DEBUG_SCHED_DATA 4383 printSchedInfo(cycle, insn); 4384 insn->print(); 4385 next->print(); 4386#endif 4387 } 4388 4389 if (!insn) 4390 return true; 4391 commitInsn(insn, cycle); 4392 4393 int bbDelay = -1; 4394 4395#ifdef GM107_DEBUG_SCHED_DATA 4396 fprintf(stderr, "last instruction is : "); 4397 insn->print(); 4398 fprintf(stderr, "cycle=%d\n", cycle); 4399#endif 4400 4401 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 4402 BasicBlock *out = BasicBlock::get(ei.getNode()); 4403 4404 if (ei.getType() != Graph::Edge::BACK) { 4405 // Only test the first instruction of the outgoing block. 4406 next = out->getEntry(); 4407 if (next) { 4408 bbDelay = MAX2(bbDelay, calcDelay(next, cycle)); 4409 } else { 4410 // When the outgoing BB is empty, make sure to set the number of 4411 // stall counts needed by the instruction because we don't know the 4412 // next instruction. 4413 bbDelay = MAX2(bbDelay, targ->getLatency(insn)); 4414 } 4415 } else { 4416 // Wait until all dependencies are satisfied. 4417 const int regsFree = score->getLatest(); 4418 next = out->getFirst(); 4419 for (int c = cycle; next && c < regsFree; next = next->next) { 4420 bbDelay = MAX2(bbDelay, calcDelay(next, c)); 4421 c += getStall(next); 4422 } 4423 next = NULL; 4424 } 4425 } 4426 if (bb->cfg.outgoingCount() != 1) 4427 next = NULL; 4428 setDelay(insn, bbDelay, next); 4429 cycle += getStall(insn); 4430 4431 score->rebase(cycle); // common base for initializing out blocks' scores 4432 return true; 4433} 4434 4435/******************************************************************************* 4436 * main 4437 ******************************************************************************/ 4438 4439void 4440CodeEmitterGM107::prepareEmission(Function *func) 4441{ 4442 SchedDataCalculatorGM107 sched(targGM107); 4443 CodeEmitter::prepareEmission(func); 4444 sched.run(func, true, true); 4445} 4446 4447static inline uint32_t sizeToBundlesGM107(uint32_t size) 4448{ 4449 return (size + 23) / 24; 4450} 4451 4452void 4453CodeEmitterGM107::prepareEmission(Program *prog) 4454{ 4455 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 4456 !fi.end(); fi.next()) { 4457 Function *func = reinterpret_cast<Function *>(fi.get()); 4458 func->binPos = prog->binSize; 4459 prepareEmission(func); 4460 4461 // adjust sizes & positions for schedulding info: 4462 if (prog->getTarget()->hasSWSched) { 4463 uint32_t adjPos = func->binPos; 4464 BasicBlock *bb = NULL; 4465 for (int i = 0; i < func->bbCount; ++i) { 4466 bb = func->bbArray[i]; 4467 int32_t adjSize = bb->binSize; 4468 if (adjPos % 32) { 4469 adjSize -= 32 - adjPos % 32; 4470 if (adjSize < 0) 4471 adjSize = 0; 4472 } 4473 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8; 4474 bb->binPos = adjPos; 4475 bb->binSize = adjSize; 4476 adjPos += adjSize; 4477 } 4478 if (bb) 4479 func->binSize = adjPos - func->binPos; 4480 } 4481 4482 prog->binSize += func->binSize; 4483 } 4484} 4485 4486CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target) 4487 : CodeEmitter(target), 4488 targGM107(target), 4489 progType(Program::TYPE_VERTEX), 4490 insn(NULL), 4491 writeIssueDelays(target->hasSWSched), 4492 data(NULL) 4493{ 4494 code = NULL; 4495 codeSize = codeSizeLimit = 0; 4496 relocInfo = NULL; 4497} 4498 4499CodeEmitter * 4500TargetGM107::createCodeEmitterGM107(Program::Type type) 4501{ 4502 CodeEmitterGM107 *emit = new CodeEmitterGM107(this); 4503 emit->setProgramType(type); 4504 return emit; 4505} 4506 4507} // namespace nv50_ir 4508