1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 12bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci#include "nv50_ir.h" 24bf215546Sopenharmony_ci#include "nv50_ir_target.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_cinamespace nv50_ir { 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ciconst uint8_t Target::operationSrcNr[] = 29bf215546Sopenharmony_ci{ 30bf215546Sopenharmony_ci 0, 0, // NOP, PHI 31bf215546Sopenharmony_ci 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 32bf215546Sopenharmony_ci 1, 1, 2, // MOV, LOAD, STORE 33bf215546Sopenharmony_ci 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 34bf215546Sopenharmony_ci 3, 3, // SHLADD, XMAD 35bf215546Sopenharmony_ci 1, 1, 1, // ABS, NEG, NOT 36bf215546Sopenharmony_ci 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF 37bf215546Sopenharmony_ci 2, 2, 1, // MAX, MIN, SAT 38bf215546Sopenharmony_ci 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 39bf215546Sopenharmony_ci 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT 40bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 41bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW 42bf215546Sopenharmony_ci 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 43bf215546Sopenharmony_ci 0, 0, 0, // PRERET,CONT,BREAK 44bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 45bf215546Sopenharmony_ci 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP 46bf215546Sopenharmony_ci 1, 1, 1, // EMIT, RESTART, FINAL 47bf215546Sopenharmony_ci 1, 1, 1, // TEX, TXB, TXL, 48bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 49bf215546Sopenharmony_ci 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA 50bf215546Sopenharmony_ci 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 51bf215546Sopenharmony_ci 0, // TEXBAR 52bf215546Sopenharmony_ci 1, 1, // DFDX, DFDY 53bf215546Sopenharmony_ci 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 54bf215546Sopenharmony_ci 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT 55bf215546Sopenharmony_ci 2, // SGXT 56bf215546Sopenharmony_ci 3, 2, // ATOM, BAR 57bf215546Sopenharmony_ci 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 58bf215546Sopenharmony_ci 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 59bf215546Sopenharmony_ci 3, // SHFL 60bf215546Sopenharmony_ci 1, // VOTE 61bf215546Sopenharmony_ci 1, // BUFQ 62bf215546Sopenharmony_ci 1, // WARPSYNC 63bf215546Sopenharmony_ci 0 64bf215546Sopenharmony_ci}; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ciconst OpClass Target::operationClass[] = 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT 69bf215546Sopenharmony_ci OPCLASS_OTHER, 70bf215546Sopenharmony_ci OPCLASS_PSEUDO, 71bf215546Sopenharmony_ci OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, 72bf215546Sopenharmony_ci // MOV; LOAD; STORE 73bf215546Sopenharmony_ci OPCLASS_MOVE, 74bf215546Sopenharmony_ci OPCLASS_LOAD, 75bf215546Sopenharmony_ci OPCLASS_STORE, 76bf215546Sopenharmony_ci // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD 77bf215546Sopenharmony_ci OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 78bf215546Sopenharmony_ci OPCLASS_ARITH, OPCLASS_ARITH, 79bf215546Sopenharmony_ci OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 80bf215546Sopenharmony_ci // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF 81bf215546Sopenharmony_ci OPCLASS_CONVERT, OPCLASS_CONVERT, 82bf215546Sopenharmony_ci OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, 83bf215546Sopenharmony_ci OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT, 84bf215546Sopenharmony_ci // MAX, MIN 85bf215546Sopenharmony_ci OPCLASS_COMPARE, OPCLASS_COMPARE, 86bf215546Sopenharmony_ci // SAT, CEIL, FLOOR, TRUNC; CVT 87bf215546Sopenharmony_ci OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, 88bf215546Sopenharmony_ci OPCLASS_CONVERT, 89bf215546Sopenharmony_ci // SET(AND,OR,XOR); SELP, SLCT 90bf215546Sopenharmony_ci OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, 91bf215546Sopenharmony_ci OPCLASS_COMPARE, OPCLASS_COMPARE, 92bf215546Sopenharmony_ci // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW 93bf215546Sopenharmony_ci OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 94bf215546Sopenharmony_ci OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 95bf215546Sopenharmony_ci OPCLASS_SFU, OPCLASS_SFU, 96bf215546Sopenharmony_ci // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN 97bf215546Sopenharmony_ci OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 98bf215546Sopenharmony_ci OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 99bf215546Sopenharmony_ci OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 100bf215546Sopenharmony_ci // DISCARD, EXIT 101bf215546Sopenharmony_ci OPCLASS_FLOW, OPCLASS_FLOW, 102bf215546Sopenharmony_ci // MEMBAR 103bf215546Sopenharmony_ci OPCLASS_CONTROL, 104bf215546Sopenharmony_ci // VFETCH, PFETCH, AFETCH, EXPORT 105bf215546Sopenharmony_ci OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, 106bf215546Sopenharmony_ci // LINTERP, PINTERP 107bf215546Sopenharmony_ci OPCLASS_SFU, OPCLASS_SFU, 108bf215546Sopenharmony_ci // EMIT, RESTART, FINAL 109bf215546Sopenharmony_ci OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL, 110bf215546Sopenharmony_ci // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP 111bf215546Sopenharmony_ci OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 112bf215546Sopenharmony_ci OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 113bf215546Sopenharmony_ci OPCLASS_TEXTURE, OPCLASS_TEXTURE, 114bf215546Sopenharmony_ci // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA 115bf215546Sopenharmony_ci OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, 116bf215546Sopenharmony_ci OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, 117bf215546Sopenharmony_ci // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 118bf215546Sopenharmony_ci OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, 119bf215546Sopenharmony_ci // TEXBAR 120bf215546Sopenharmony_ci OPCLASS_OTHER, 121bf215546Sopenharmony_ci // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP 122bf215546Sopenharmony_ci OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 123bf215546Sopenharmony_ci OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, 124bf215546Sopenharmony_ci // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT 125bf215546Sopenharmony_ci OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 126bf215546Sopenharmony_ci OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 127bf215546Sopenharmony_ci // ATOM, BAR 128bf215546Sopenharmony_ci OPCLASS_ATOMIC, OPCLASS_CONTROL, 129bf215546Sopenharmony_ci // VADD, VAVG, VMIN, VMAX 130bf215546Sopenharmony_ci OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 131bf215546Sopenharmony_ci // VSAD, VSET, VSHR, VSHL 132bf215546Sopenharmony_ci OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 133bf215546Sopenharmony_ci // VSEL, CCTL 134bf215546Sopenharmony_ci OPCLASS_VECTOR, OPCLASS_CONTROL, 135bf215546Sopenharmony_ci // SHFL 136bf215546Sopenharmony_ci OPCLASS_OTHER, 137bf215546Sopenharmony_ci // VOTE 138bf215546Sopenharmony_ci OPCLASS_OTHER, 139bf215546Sopenharmony_ci // BUFQ 140bf215546Sopenharmony_ci OPCLASS_OTHER, 141bf215546Sopenharmony_ci // WARPSYNC 142bf215546Sopenharmony_ci OPCLASS_OTHER, 143bf215546Sopenharmony_ci OPCLASS_PSEUDO // LAST 144bf215546Sopenharmony_ci}; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ciextern Target *getTargetGV100(unsigned int chipset); 148bf215546Sopenharmony_ciextern Target *getTargetGM107(unsigned int chipset); 149bf215546Sopenharmony_ciextern Target *getTargetNVC0(unsigned int chipset); 150bf215546Sopenharmony_ciextern Target *getTargetNV50(unsigned int chipset); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ciTarget *Target::create(unsigned int chipset) 153bf215546Sopenharmony_ci{ 154bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); 155bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); 156bf215546Sopenharmony_ci switch (chipset & ~0xf) { 157bf215546Sopenharmony_ci case 0x170: 158bf215546Sopenharmony_ci case 0x160: 159bf215546Sopenharmony_ci case 0x140: 160bf215546Sopenharmony_ci return getTargetGV100(chipset); 161bf215546Sopenharmony_ci case 0x110: 162bf215546Sopenharmony_ci case 0x120: 163bf215546Sopenharmony_ci case 0x130: 164bf215546Sopenharmony_ci return getTargetGM107(chipset); 165bf215546Sopenharmony_ci case 0xc0: 166bf215546Sopenharmony_ci case 0xd0: 167bf215546Sopenharmony_ci case 0xe0: 168bf215546Sopenharmony_ci case 0xf0: 169bf215546Sopenharmony_ci case 0x100: 170bf215546Sopenharmony_ci return getTargetNVC0(chipset); 171bf215546Sopenharmony_ci case 0x50: 172bf215546Sopenharmony_ci case 0x80: 173bf215546Sopenharmony_ci case 0x90: 174bf215546Sopenharmony_ci case 0xa0: 175bf215546Sopenharmony_ci return getTargetNV50(chipset); 176bf215546Sopenharmony_ci default: 177bf215546Sopenharmony_ci ERROR("unsupported target: NV%x\n", chipset); 178bf215546Sopenharmony_ci return 0; 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci} 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_civoid Target::destroy(Target *targ) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci delete targ; 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ciCodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL), 188bf215546Sopenharmony_ci codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL) 189bf215546Sopenharmony_ci{ 190bf215546Sopenharmony_ci} 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_civoid 193bf215546Sopenharmony_ciCodeEmitter::setCodeLocation(void *ptr, uint32_t size) 194bf215546Sopenharmony_ci{ 195bf215546Sopenharmony_ci code = reinterpret_cast<uint32_t *>(ptr); 196bf215546Sopenharmony_ci codeSize = 0; 197bf215546Sopenharmony_ci codeSizeLimit = size; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_civoid 201bf215546Sopenharmony_ciCodeEmitter::printBinary() const 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci uint32_t *bin = code - codeSize / 4; 204bf215546Sopenharmony_ci INFO("program binary (%u bytes)", codeSize); 205bf215546Sopenharmony_ci for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { 206bf215546Sopenharmony_ci if ((pos % 8) == 0) 207bf215546Sopenharmony_ci INFO("\n"); 208bf215546Sopenharmony_ci INFO("%08x ", bin[pos]); 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci INFO("\n"); 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_cistatic inline uint32_t sizeToBundlesNVE4(uint32_t size) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci return (size + 55) / 56; 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_civoid 219bf215546Sopenharmony_ciCodeEmitter::prepareEmission(Program *prog) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 222bf215546Sopenharmony_ci !fi.end(); fi.next()) { 223bf215546Sopenharmony_ci Function *func = reinterpret_cast<Function *>(fi.get()); 224bf215546Sopenharmony_ci func->binPos = prog->binSize; 225bf215546Sopenharmony_ci prepareEmission(func); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci // adjust sizes & positions for scheduling info: 228bf215546Sopenharmony_ci if (prog->getTarget()->hasSWSched) { 229bf215546Sopenharmony_ci uint32_t adjPos = func->binPos; 230bf215546Sopenharmony_ci BasicBlock *bb = NULL; 231bf215546Sopenharmony_ci for (int i = 0; i < func->bbCount; ++i) { 232bf215546Sopenharmony_ci bb = func->bbArray[i]; 233bf215546Sopenharmony_ci int32_t adjSize = bb->binSize; 234bf215546Sopenharmony_ci if (adjPos % 64) { 235bf215546Sopenharmony_ci adjSize -= 64 - adjPos % 64; 236bf215546Sopenharmony_ci if (adjSize < 0) 237bf215546Sopenharmony_ci adjSize = 0; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; 240bf215546Sopenharmony_ci bb->binPos = adjPos; 241bf215546Sopenharmony_ci bb->binSize = adjSize; 242bf215546Sopenharmony_ci adjPos += adjSize; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci if (bb) 245bf215546Sopenharmony_ci func->binSize = adjPos - func->binPos; 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci prog->binSize += func->binSize; 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci} 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_civoid 253bf215546Sopenharmony_ciCodeEmitter::prepareEmission(Function *func) 254bf215546Sopenharmony_ci{ 255bf215546Sopenharmony_ci func->bbCount = 0; 256bf215546Sopenharmony_ci func->bbArray = new BasicBlock * [func->cfg.getSize()]; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) 261bf215546Sopenharmony_ci prepareEmission(BasicBlock::get(*it)); 262bf215546Sopenharmony_ci} 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_civoid 265bf215546Sopenharmony_ciCodeEmitter::prepareEmission(BasicBlock *bb) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci Instruction *i, *next; 268bf215546Sopenharmony_ci Function *func = bb->getFunction(); 269bf215546Sopenharmony_ci int j; 270bf215546Sopenharmony_ci unsigned int nShort; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci for (; j >= 0; --j) { 275bf215546Sopenharmony_ci BasicBlock *in = func->bbArray[j]; 276bf215546Sopenharmony_ci Instruction *exit = in->getExit(); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { 279bf215546Sopenharmony_ci in->binSize -= 8; 280bf215546Sopenharmony_ci func->binSize -= 8; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci for (++j; j < func->bbCount; ++j) 283bf215546Sopenharmony_ci func->bbArray[j]->binPos -= 8; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci in->remove(exit); 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci bb->binPos = in->binPos + in->binSize; 288bf215546Sopenharmony_ci if (in->binSize) // no more no-op branches to bb 289bf215546Sopenharmony_ci break; 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci func->bbArray[func->bbCount++] = bb; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci if (!bb->getExit()) 294bf215546Sopenharmony_ci return; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci // determine encoding size, try to group short instructions 297bf215546Sopenharmony_ci nShort = 0; 298bf215546Sopenharmony_ci for (i = bb->getEntry(); i; i = next) { 299bf215546Sopenharmony_ci next = i->next; 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci i->encSize = getMinEncodingSize(i); 302bf215546Sopenharmony_ci if (next && i->encSize < 8) 303bf215546Sopenharmony_ci ++nShort; 304bf215546Sopenharmony_ci else 305bf215546Sopenharmony_ci if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { 306bf215546Sopenharmony_ci if (i->isCommutationLegal(i->next)) { 307bf215546Sopenharmony_ci bb->permuteAdjacent(i, next); 308bf215546Sopenharmony_ci next->encSize = 4; 309bf215546Sopenharmony_ci next = i; 310bf215546Sopenharmony_ci i = i->prev; 311bf215546Sopenharmony_ci ++nShort; 312bf215546Sopenharmony_ci } else 313bf215546Sopenharmony_ci if (i->isCommutationLegal(i->prev) && next->next) { 314bf215546Sopenharmony_ci bb->permuteAdjacent(i->prev, i); 315bf215546Sopenharmony_ci next->encSize = 4; 316bf215546Sopenharmony_ci next = next->next; 317bf215546Sopenharmony_ci bb->binSize += 4; 318bf215546Sopenharmony_ci ++nShort; 319bf215546Sopenharmony_ci } else { 320bf215546Sopenharmony_ci i->encSize = 8; 321bf215546Sopenharmony_ci i->prev->encSize = 8; 322bf215546Sopenharmony_ci bb->binSize += 4; 323bf215546Sopenharmony_ci nShort = 0; 324bf215546Sopenharmony_ci } 325bf215546Sopenharmony_ci } else { 326bf215546Sopenharmony_ci i->encSize = 8; 327bf215546Sopenharmony_ci if (nShort & 1) { 328bf215546Sopenharmony_ci i->prev->encSize = 8; 329bf215546Sopenharmony_ci bb->binSize += 4; 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci nShort = 0; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci bb->binSize += i->encSize; 334bf215546Sopenharmony_ci } 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci if (bb->getExit()->encSize == 4) { 337bf215546Sopenharmony_ci assert(nShort); 338bf215546Sopenharmony_ci bb->getExit()->encSize = 8; 339bf215546Sopenharmony_ci bb->binSize += 4; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { 342bf215546Sopenharmony_ci bb->binSize += 8; 343bf215546Sopenharmony_ci bb->getExit()->prev->encSize = 8; 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci func->binSize += bb->binSize; 349bf215546Sopenharmony_ci} 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_cibool 352bf215546Sopenharmony_ciProgram::emitBinary(struct nv50_ir_prog_info_out *info) 353bf215546Sopenharmony_ci{ 354bf215546Sopenharmony_ci CodeEmitter *emit = target->getCodeEmitter(progType); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci emit->prepareEmission(this); 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci if (dbgFlags & NV50_IR_DEBUG_BASIC) 359bf215546Sopenharmony_ci this->print(); 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci if (!binSize) { 362bf215546Sopenharmony_ci code = NULL; 363bf215546Sopenharmony_ci return false; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); 366bf215546Sopenharmony_ci if (!code) 367bf215546Sopenharmony_ci return false; 368bf215546Sopenharmony_ci emit->setCodeLocation(code, binSize); 369bf215546Sopenharmony_ci info->bin.instructions = 0; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { 372bf215546Sopenharmony_ci Function *fn = reinterpret_cast<Function *>(fi.get()); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci assert(emit->getCodeSize() == fn->binPos); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci for (int b = 0; b < fn->bbCount; ++b) { 377bf215546Sopenharmony_ci for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) { 378bf215546Sopenharmony_ci emit->emitInstruction(i); 379bf215546Sopenharmony_ci info->bin.instructions++; 380bf215546Sopenharmony_ci if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) && 381bf215546Sopenharmony_ci (isFloatType(i->sType) || isFloatType(i->dType))) 382bf215546Sopenharmony_ci info->io.fp64 = true; 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci } 385bf215546Sopenharmony_ci } 386bf215546Sopenharmony_ci info->io.fp64 |= fp64; 387bf215546Sopenharmony_ci info->bin.relocData = emit->getRelocInfo(); 388bf215546Sopenharmony_ci info->bin.fixupData = emit->getFixupInfo(); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci // the nvc0 driver will print the binary itself together with the header 391bf215546Sopenharmony_ci if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) 392bf215546Sopenharmony_ci emit->printBinary(); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci delete emit; 395bf215546Sopenharmony_ci return true; 396bf215546Sopenharmony_ci} 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci#define RELOC_ALLOC_INCREMENT 8 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cibool 401bf215546Sopenharmony_ciCodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, 402bf215546Sopenharmony_ci int s) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci unsigned int n = relocInfo ? relocInfo->count : 0; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci if (!(n % RELOC_ALLOC_INCREMENT)) { 407bf215546Sopenharmony_ci size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); 408bf215546Sopenharmony_ci relocInfo = reinterpret_cast<RelocInfo *>( 409bf215546Sopenharmony_ci REALLOC(relocInfo, n ? size : 0, 410bf215546Sopenharmony_ci size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); 411bf215546Sopenharmony_ci if (!relocInfo) 412bf215546Sopenharmony_ci return false; 413bf215546Sopenharmony_ci if (n == 0) 414bf215546Sopenharmony_ci memset(relocInfo, 0, sizeof(RelocInfo)); 415bf215546Sopenharmony_ci } 416bf215546Sopenharmony_ci ++relocInfo->count; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci relocInfo->entry[n].data = data; 419bf215546Sopenharmony_ci relocInfo->entry[n].mask = m; 420bf215546Sopenharmony_ci relocInfo->entry[n].offset = codeSize + w * 4; 421bf215546Sopenharmony_ci relocInfo->entry[n].bitPos = s; 422bf215546Sopenharmony_ci relocInfo->entry[n].type = ty; 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci return true; 425bf215546Sopenharmony_ci} 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cibool 428bf215546Sopenharmony_ciCodeEmitter::addInterp(int ipa, int reg, FixupApply apply) 429bf215546Sopenharmony_ci{ 430bf215546Sopenharmony_ci unsigned int n = fixupInfo ? fixupInfo->count : 0; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci if (!(n % RELOC_ALLOC_INCREMENT)) { 433bf215546Sopenharmony_ci size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry); 434bf215546Sopenharmony_ci fixupInfo = reinterpret_cast<FixupInfo *>( 435bf215546Sopenharmony_ci REALLOC(fixupInfo, n ? size : 0, 436bf215546Sopenharmony_ci size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry))); 437bf215546Sopenharmony_ci if (!fixupInfo) 438bf215546Sopenharmony_ci return false; 439bf215546Sopenharmony_ci if (n == 0) 440bf215546Sopenharmony_ci fixupInfo->count = 0; 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci ++fixupInfo->count; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci return true; 447bf215546Sopenharmony_ci} 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_civoid 450bf215546Sopenharmony_ciRelocEntry::apply(uint32_t *binary, const RelocInfo *info) const 451bf215546Sopenharmony_ci{ 452bf215546Sopenharmony_ci uint32_t value = 0; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci switch (type) { 455bf215546Sopenharmony_ci case TYPE_CODE: value = info->codePos; break; 456bf215546Sopenharmony_ci case TYPE_BUILTIN: value = info->libPos; break; 457bf215546Sopenharmony_ci case TYPE_DATA: value = info->dataPos; break; 458bf215546Sopenharmony_ci default: 459bf215546Sopenharmony_ci assert(0); 460bf215546Sopenharmony_ci break; 461bf215546Sopenharmony_ci } 462bf215546Sopenharmony_ci value += data; 463bf215546Sopenharmony_ci value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci binary[offset / 4] &= ~mask; 466bf215546Sopenharmony_ci binary[offset / 4] |= value & mask; 467bf215546Sopenharmony_ci} 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci} // namespace nv50_ir 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci#include "nv50_ir_driver.h" 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ciextern "C" { 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_civoid 477bf215546Sopenharmony_cinv50_ir_relocate_code(void *relocData, uint32_t *code, 478bf215546Sopenharmony_ci uint32_t codePos, 479bf215546Sopenharmony_ci uint32_t libPos, 480bf215546Sopenharmony_ci uint32_t dataPos) 481bf215546Sopenharmony_ci{ 482bf215546Sopenharmony_ci nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci info->codePos = codePos; 485bf215546Sopenharmony_ci info->libPos = libPos; 486bf215546Sopenharmony_ci info->dataPos = dataPos; 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci for (unsigned int i = 0; i < info->count; ++i) 489bf215546Sopenharmony_ci info->entry[i].apply(code, info); 490bf215546Sopenharmony_ci} 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_civoid 493bf215546Sopenharmony_cinv50_ir_apply_fixups(void *fixupData, uint32_t *code, 494bf215546Sopenharmony_ci bool force_persample_interp, bool flatshade, 495bf215546Sopenharmony_ci uint8_t alphatest, bool msaa) 496bf215546Sopenharmony_ci{ 497bf215546Sopenharmony_ci nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>( 498bf215546Sopenharmony_ci fixupData); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci // force_persample_interp: all non-flat -> per-sample 501bf215546Sopenharmony_ci // flatshade: all color -> flat 502bf215546Sopenharmony_ci // alphatest: PIPE_FUNC_* to use with alphatest 503bf215546Sopenharmony_ci // msaa: false = sample id -> 0 for interpolateAtSample 504bf215546Sopenharmony_ci nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa); 505bf215546Sopenharmony_ci for (unsigned i = 0; i < info->count; ++i) 506bf215546Sopenharmony_ci info->entry[i].apply(&info->entry[i], code, data); 507bf215546Sopenharmony_ci} 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_civoid 510bf215546Sopenharmony_cinv50_ir_get_target_library(uint32_t chipset, 511bf215546Sopenharmony_ci const uint32_t **code, uint32_t *size) 512bf215546Sopenharmony_ci{ 513bf215546Sopenharmony_ci nv50_ir::Target *targ = nv50_ir::Target::create(chipset); 514bf215546Sopenharmony_ci targ->getBuiltinCode(code, size); 515bf215546Sopenharmony_ci nv50_ir::Target::destroy(targ); 516bf215546Sopenharmony_ci} 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci} 519